在 MCU 部署边缘 AI —— 以 RA6E2 天气预测为例
在单片机上使用过去 24h 的天气数据(温度、湿度、大气压强)预测未来 24h 的平均天气
将 TinyMaix 移植到你的单片机
TinyMaix 是面向单片机的超轻量级的神经网络推理库,允许你在任意单片机上运行轻量级深度学习模型
Github:https://github.com/sipeed/TinyMaix
要将 TinyMaix 移植到你的单片机,首先将 TinyMaix 克隆/下载到 PC,在单片机运行 TinyMaix 只需要 include、src 两个目录中的文件,将它们拷贝到你的工程中
这里以瑞萨的 e2 studio 工程为例,在 src 目录下新建 tinymaix 放置它们:
D:\RA_WORKSPACE\WEATHERSTATION\SRC
└─ tinymaix
├─ include
└─ src然后按需编辑 tm_port.h 的 PORT CONFIG 部分,现在已经完成了移植
模型训练
数据集处理
为了让预测结果尽可能准确,需要使用本地天气数据的数据集,下面是我使用的原始数据,分为两张表:
| 日期 | 气温 | 气压 | 湿度 |
|---|---|---|---|
| 2023-01-01 00:00 | -9.99 | 1024.56 | 48.4 |
| 2023-01-01 01:00 | -10.59 | 1024.74 | 50.97 |
| 2023-01-01 02:00 | -11.64 | 1025.2 | 55.42 |
| 日期 | 天气代码 |
|---|---|
| 2023-01-01 | 晴 |
| 2023-01-02 | 阴 |
| 2023-01-03 | 雪 |
可以规定天气代码:{"晴": 0, "多云": 1, "阴": 2, "雨": 3, "雪": 4}
使用 Python 将数据集分为训练集、验证集两张表,前 72 列是 24h 的天气特征,最后 1 列是天气标签
import pandas as pd
import numpy as np
RAW_H = '逐小时天气_2324.csv'
RAW_D = '逐日天气_2324.csv'
TRAIN_CSV = 'train_72x5.csv'
VAL_CSV = 'val_72x5.csv'
h = pd.read_csv(RAW_H, parse_dates=['日期'])
d = pd.read_csv(RAW_D, parse_dates=['日期'])
wcode = {'晴': 0, '多云': 1, '阴': 2, '雨': 3, '雪': 4}
d['code'] = d['天气代码'].map(wcode)
samples = []
for day in d['日期']:
hrs = h[h['日期'].dt.date == day.date()]
if len(hrs) < 24:
continue
next_day = day + pd.Timedelta(days=1)
label_row = d[d['日期'] == next_day]
if label_row.empty:
continue
y = label_row.iloc[0]['code']
x = hrs.iloc[:24][['气温', '气压', '湿度']].values.flatten()
samples.append(np.concatenate([x, [y]]))
df = pd.DataFrame(np.array(samples))
# 前 72 列是特征,最后 1 列是标签
train = df[df.index % 10 < 8] # 简单 8:2 划分
val = df[df.index % 10 >= 8]
train.to_csv(TRAIN_CSV, index=False, header=False)
val.to_csv(VAL_CSV, index=False, header=False)
print('train:', len(train), 'val:', len(val))训练模型
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
def load(csv):
arr = pd.read_csv(csv, header=None).values.astype('float32')
return arr[:, :72], arr[:, 72].astype('int32')
x_train, y_train = load('train_72x5.csv')
x_val, y_val = load('val_72x5.csv')
mean = x_train.mean(axis=0)
std = x_train.std(axis=0) + 1e-5
def standardize(x):
return (x - mean) / std
model = tf.keras.Sequential([
tf.keras.layers.Input(shape=(72,)),
tf.keras.layers.Dense(128, activation=tf.nn.relu6),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(64, activation=tf.nn.relu6),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(64, activation=tf.nn.relu6),
tf.keras.layers.Dropout(0.3),
tf.keras.layers.Dense(5, activation='softmax')
])
opt = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=opt,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
es = tf.keras.callbacks.EarlyStopping(patience=50,
restore_best_weights=True)
history = model.fit(standardize(x_train), y_train,
validation_data=(standardize(x_val), y_val),
epochs=1000,
batch_size=64,
callbacks=[es],
verbose=1)
def representative_dataset():
for x in tf.data.Dataset.from_tensor_slices(x_train).batch(1).take(300):
yield [tf.cast(standardize(x), tf.float32)]
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
tflite_model = converter.convert()
open('weather_int8.tflite', 'wb').write(tflite_model)
print('tflite saved, size:', len(tflite_model))
np.save('mean.npy', mean)
np.save('std.npy', std)
plt.figure(figsize=(6, 4))
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()导出量化参数
使用 Python 脚本导出量化参数 mean、std、scale 和 zp:
import numpy as np
import tensorflow as tf
mean = np.load('mean.npy')
std = np.load('std.npy')
model = tf.lite.Interpreter(model_path="weather_int8.tflite")
model.allocate_tensors()
def to_c_arr(name, vec):
print(f'static const float {name}[72] = {{')
print(' ', end='')
for i, v in enumerate(vec):
print(f'{v:.6f}f', end='')
if i != len(vec) - 1:
print(',', end='')
if (i + 1) % 3 == 0:
print('\n ', end='')
else:
print(' ', end='')
print('\n};')
input_details = model.get_input_details()[0]
scale = input_details['quantization_parameters']['scales'][0]
zp = input_details['quantization_parameters']['zero_points'][0]
to_c_arr('mean', mean)
to_c_arr('std', std)
print(f'static const float scale = {scale:.6f}f;')
print(f'static const int zp = {zp};')运行后输出:
static const float mean[72] = {
6.828718f, 1009.098938f, 68.643417f,
// ...
};
static const float std[72] = {
12.587724f, 9.960853f, 19.852306f,
// ...
};
static const float scale = 0.024126f;
static const int zp = 5;导出 TinyMiax 模型
导出 TinyMiax 模型需要使用 TinyMiax 的 tflite2tmdl 工具,位于 tools 目录,可以将之前训练得到的 weather_int8.tflite 复制到 TinyMiax 的根目录,执行:
python .\tools\tflite2tmdl.py .\weather_int8.tflite .\weather.tmdl int8 1 72 5执行结果将生成下面两个文件:
weather_day5.h
weather_day5.tmdl部署模型
首先将 weather_day5.h 复制到工程中,我放置在了 tinymaix\model 中:
C:\USERS\HP\DESKTOP\DEV\RA_WORKSPACE\WEATHERSTATION\SRC
└─tinymaix
├─include
├─model
│ weather.h
└─src然后将 mean、std、scale 和 zp 粘贴到 hal_entry.c 中,下面就可以开始编写推理程序部分了:
static tm_mat_t outs[1];
static tm_err_t res;
static tm_mat_t in_mat;
static int8_t input[72] = {0};
static void parse_output(tm_mat_t* outs)
{
tm_mat_t out = outs[0];
char w_tags[5][10] = {"Sunny", "Cloudy", "Overcast", "Rainy", "Snowy"};
float* data = out.dataf;
float maxp = 0;
int maxi = -1;
for(int i=0; i<5; i++){
printf("%d: %.6f\n", i, data[i]);
if(data[i] > maxp) {
maxi = i;
maxp = data[i];
}
}
w_maxi = maxi;
printf("### Predict output is: %s, prob %.6f\r\n", w_tags[maxi], maxp);
return;
}
void tinyML_run(void)
{
TM_DBGT_INIT();
tm_mdl_t mdl;
// 加载模型
tm_stat((tm_mdlbin_t*)mdl_data);
res = tm_load(&mdl, mdl_data, NULL, NULL, &in_mat);
if(res != TM_OK) {
printf("tm model load err %d\r\n", res);
}
// 量化
for(int i = 0; i < 72; i++){
float v = (weather[i/3][i%3] - mean[i]) / std[i];
int q = (int)round(v / scale) + zp;
if(q > 127) q = 127;
if(q < -128) q = -128;
input[i] = (int8_t)q;
}
// 导入输入数据
in_mat.data = (mtype_t*)input;
// 开始推理
TM_DBGT_START();
res = tm_run(&mdl, &in_mat, outs);
TM_DBGT("tm_run");
// 输出结果
if(res==TM_OK) parse_output(outs);
else printf("tm run error: %d\n", res);
// 卸载模型
tm_unload(&mdl);
}在 hal_entry() 中调用 tinyML_run() 即可
完整 hal_entry.c
#include "hal_data.h"
#include "uart/bsp_uart.h"
#include "tinymaix.h"
#include "tinymaix/model/weather.h"
static tm_mat_t outs[1];
static tm_err_t res;
static tm_mat_t in_mat;
static int8_t input[72] = {0};
FSP_CPP_HEADER
void R_BSP_WarmStart(bsp_warm_start_event_t event);
FSP_CPP_FOOTER
static const float mean[72] = {
6.828718f, 1009.098938f, 68.643417f,
6.503504f, 1008.989441f, 69.627861f,
// ...
};
static const float std[72] = {
12.587724f, 9.960853f, 19.852306f,
12.558513f, 10.011790f, 19.922518f,
// ...
};
static const float scale = 0.024126f;
static const int zp = 5;
float weather[24][3] = {
{8.66f, 1018.67f, 90.3f},
{8.61f, 1018.47f, 89.99f},
// ...
};
static const char w_tags[5][10] = {"Sunny", "Cloudy", "Overcast", "Rainy", "Snowy"};
static int8_t w_maxi = -1;
/*******************************************************************************************************************//**
* 格式化模型输出
*
* @param[in] outs 模型输出
**********************************************************************************************************************/
static void parse_output(tm_mat_t* outs)
{
tm_mat_t out = outs[0];
float* data = out.dataf;
float maxp = 0;
int maxi = -1;
for(int i=0; i<5; i++){
printf("%d: %.6f\n", i, data[i]);
if(data[i] > maxp) {
maxi = i;
maxp = data[i];
}
}
w_maxi = maxi;
printf("### Predict output is: %s, prob %.6f\r\n", w_tags[maxi], maxp);
return;
}
/*******************************************************************************************************************//**
* 执行 TinyML 推理
**********************************************************************************************************************/
void tinyML_run(void)
{
TM_DBGT_INIT();
tm_mdl_t mdl;
tm_stat((tm_mdlbin_t*)mdl_data);
res = tm_load(&mdl, mdl_data, NULL, NULL, &in_mat);
if(res != TM_OK) {
printf("tm model load err %d\r\n", res);
}
for(int i=0;i<72;i++){
float v = (weather[i/3][i%3] - mean[i]) / std[i];
int q = (int)round(v / scale) + zp;
if(q > 127) q = 127;
if(q < -128) q = -128;
input[i] = (int8_t)q;
}
in_mat.data = (mtype_t*)input;
TM_DBGT_START();
res = tm_run(&mdl, &in_mat, outs);
TM_DBGT("tm_run");
if(res==TM_OK) parse_output(outs);
else printf("tm run error: %d\n", res);
tm_unload(&mdl);
}
/*******************************************************************************************************************//**
* main() is generated by the RA Configuration editor and is used to generate threads if an RTOS is used. This function
* is called by main() when no RTOS is used.
**********************************************************************************************************************/
void hal_entry(void)
{
UART0_Init();
R_IOPORT_Open(&g_ioport_ctrl, g_ioport.p_cfg);
tinyML_run();
while(1) {
R_IOPORT_PinWrite(&g_ioport_ctrl, BSP_IO_PORT_01_PIN_13, BSP_IO_LEVEL_HIGH);
R_BSP_SoftwareDelay(1, BSP_DELAY_UNITS_SECONDS);
R_IOPORT_PinWrite(&g_ioport_ctrl, BSP_IO_PORT_01_PIN_13, BSP_IO_LEVEL_LOW);
R_BSP_SoftwareDelay(1, BSP_DELAY_UNITS_SECONDS);
}
#if BSP_TZ_SECURE_BUILD
/* Enter non-secure code */
R_BSP_NonSecureEnter();
#endif
}
/*******************************************************************************************************************//**
* This function is called at various points during the startup process. This implementation uses the event that is
* called right before main() to set up the pins.
*
* @param[in] event Where at in the start up process the code is currently at
**********************************************************************************************************************/
void R_BSP_WarmStart(bsp_warm_start_event_t event)
{
if (BSP_WARM_START_RESET == event)
{
#if BSP_FEATURE_FLASH_LP_VERSION != 0
/* Enable reading from data flash. */
R_FACI_LP->DFLCTL = 1U;
/* Would normally have to wait tDSTOP(6us) for data flash recovery. Placing the enable here, before clock and
* C runtime initialization, should negate the need for a delay since the initialization will typically take more than 6us. */
#endif
}
if (BSP_WARM_START_POST_C == event)
{
/* C runtime environment and system clocks are setup. */
/* Configure pins. */
R_IOPORT_Open (&IOPORT_CFG_CTRL, &IOPORT_CFG_NAME);
#if BSP_CFG_SDRAM_ENABLED
/* Setup SDRAM and initialize it. Must configure pins first. */
R_BSP_SdramInit(true);
#endif
}
}
#if BSP_TZ_SECURE_BUILD
FSP_CPP_HEADER
BSP_CMSE_NONSECURE_ENTRY void template_nonsecure_callable ();
/* Trustzone Secure Projects require at least one nonsecure callable function in order to build (Remove this if it is not required to build). */
BSP_CMSE_NONSECURE_ENTRY void template_nonsecure_callable ()
{
}
FSP_CPP_FOOTER
#endif烧录程序后串口输出:
================================ model stat ================================
mdl_type=0 (int8))
out_deq=1
input_cnt=1, output_cnt=1, layer_cnt=5
input 1dims: (1, 1, 72)
output 1dims: (1, 1, 5)
main buf size 200; sub buf size 0
//Note: PARAM is layer param size, include align padding
Idx Layer outshape inoft outoft PARAM MEMOUT OPS
--- Input 1, 1, 72 - 0 0 72 0
###L71: body oft = 64
###L72: type=2, is_out=0, size=10304, in_oft=0, out_oft=72, in_dims=[1,1,1,72], out_dims=[1,1,1,128], in_s=0.024, in_zp=5, out_s=0.015, out_zp=-128
###L96: FC: ws_oft=64, w_oft=576, b_oft=9792
000 FC 1, 1,128 0 72 10240 128 9216
###L71: body oft = 10368
###L72: type=2, is_out=0, size=8768, in_oft=72, out_oft=0, in_dims=[1,1,1,128], out_dims=[1,1,1,64], in_s=0.015, in_zp=-128, out_s=0.013, out_zp=-128
###L96: FC: ws_oft=64, w_oft=320, b_oft=8512
001 FC 1, 1, 64 72 0 8704 64 8192
###L71: body oft = 19136
###L72: type=2, is_out=0, size=4672, in_oft=0, out_oft=136, in_dims=[1,1,1,64], out_dims=[1,1,1,64], in_s=0.013, in_zp=-128, out_s=0.012, out_zp=-128
###L96: FC: ws_oft=64, w_oft=320, b_oft=4416
002 FC 1, 1, 64 0 136 4608 64 4096
###L71: body oft = 23808
###L72: type=2, is_out=0, size=432, in_oft=136, out_oft=0, in_dims=[1,1,1,64], out_dims=[1,1,1,5], in_s=0.012, in_zp=-128, out_s=0.034, out_zp=-6
###L96: FC: ws_oft=64, w_oft=88, b_oft=408
003 FC 1, 1, 5 136 0 368 5 320
###L71: body oft = 24240
###L72: type=3, is_out=1, size=48, in_oft=0, out_oft=168, in_dims=[1,1,1,5], out_dims=[1,1,1,5], in_s=0.034, in_zp=-6, out_s=0.004, out_zp=-128
004 Softmax 1, 1, 5 0 168 0 5 30
Total param ~23.4 KB, OPS ~0.02 MOPS, buffer 0.2 KB
===tm_run use 0.000 ms
0: 0.152344
1: 0.843750
2: 0.003906
3: 0.003906
4: 0.003906
### Predict output is: Cloudy, prob 0.843750
- 分类:技术


暂无评论