在 MCU 部署边缘 AI —— 以 RA6E2 天气预测为例

在单片机上使用过去 24h 的天气数据（温度、湿度、大气压强）预测未来 24h 的平均天气

将 TinyMaix 移植到你的单片机

TinyMaix 是面向单片机的超轻量级的神经网络推理库，允许你在任意单片机上运行轻量级深度学习模型

Github：https://github.com/sipeed/TinyMaix

要将 TinyMaix 移植到你的单片机，首先将 TinyMaix 克隆/下载到 PC，在单片机运行 TinyMaix 只需要 include、src 两个目录中的文件，将它们拷贝到你的工程中

这里以瑞萨的 e² studio 工程为例，在 src 目录下新建 tinymaix 放置它们：

D:\RA_WORKSPACE\WEATHERSTATION\SRC
└─ tinymaix
    ├─ include
    └─ src

然后按需编辑 tm_port.h 的 PORT CONFIG 部分，现在已经完成了移植

模型训练

数据集处理

为了让预测结果尽可能准确，需要使用本地天气数据的数据集，下面是我使用的原始数据，分为两张表：

日期	气温	气压	湿度
2023-01-01 00:00	-9.99	1024.56	48.4
2023-01-01 01:00	-10.59	1024.74	50.97
2023-01-01 02:00	-11.64	1025.2	55.42

日期	天气代码
2023-01-01	晴
2023-01-02	阴
2023-01-03	雪

可以规定天气代码：{"晴": 0, "多云": 1, "阴": 2, "雨": 3, "雪": 4}

使用 Python 将数据集分为训练集、验证集两张表，前 72 列是 24h 的天气特征，最后 1 列是天气标签

import pandas as pd
import numpy as np

RAW_H = '逐小时天气_2324.csv'
RAW_D = '逐日天气_2324.csv'
TRAIN_CSV = 'train_72x5.csv'
VAL_CSV = 'val_72x5.csv'

h = pd.read_csv(RAW_H, parse_dates=['日期'])
d = pd.read_csv(RAW_D, parse_dates=['日期'])
wcode = {'晴': 0, '多云': 1, '阴': 2, '雨': 3, '雪': 4}
d['code'] = d['天气代码'].map(wcode)

samples = []
for day in d['日期']:
    hrs = h[h['日期'].dt.date == day.date()]
    if len(hrs) < 24:
        continue
    next_day = day + pd.Timedelta(days=1)
    label_row = d[d['日期'] == next_day]
    if label_row.empty:
        continue
    y = label_row.iloc[0]['code']
    x = hrs.iloc[:24][['气温', '气压', '湿度']].values.flatten()
    samples.append(np.concatenate([x, [y]]))

df = pd.DataFrame(np.array(samples))
# 前 72 列是特征，最后 1 列是标签
train = df[df.index % 10 < 8]  # 简单 8:2 划分
val = df[df.index % 10 >= 8]
train.to_csv(TRAIN_CSV, index=False, header=False)
val.to_csv(VAL_CSV, index=False, header=False)
print('train:', len(train), 'val:', len(val))

训练模型

import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


def load(csv):
    arr = pd.read_csv(csv, header=None).values.astype('float32')
    return arr[:, :72], arr[:, 72].astype('int32')


x_train, y_train = load('train_72x5.csv')
x_val, y_val = load('val_72x5.csv')

mean = x_train.mean(axis=0)
std = x_train.std(axis=0) + 1e-5


def standardize(x):
    return (x - mean) / std


model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(72,)),
    tf.keras.layers.Dense(128, activation=tf.nn.relu6),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation=tf.nn.relu6),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation=tf.nn.relu6),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(5, activation='softmax')
])

opt = tf.keras.optimizers.Adam(learning_rate=1e-4)
model.compile(optimizer=opt,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
es = tf.keras.callbacks.EarlyStopping(patience=50,
                                      restore_best_weights=True)
history = model.fit(standardize(x_train), y_train,
                    validation_data=(standardize(x_val), y_val),
                    epochs=1000,
                    batch_size=64,
                    callbacks=[es],
                    verbose=1)


def representative_dataset():
    for x in tf.data.Dataset.from_tensor_slices(x_train).batch(1).take(300):
        yield [tf.cast(standardize(x), tf.float32)]


converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
tflite_model = converter.convert()
open('weather_int8.tflite', 'wb').write(tflite_model)
print('tflite saved, size:', len(tflite_model))

np.save('mean.npy', mean)
np.save('std.npy', std)

plt.figure(figsize=(6, 4))
plt.plot(history.history['loss'], label='train loss')
plt.plot(history.history['val_loss'], label='val loss')
plt.title('Loss curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

导出量化参数

使用 Python 脚本导出量化参数 mean、std、scale 和 zp：

import numpy as np
import tensorflow as tf

mean = np.load('mean.npy')
std = np.load('std.npy')
model = tf.lite.Interpreter(model_path="weather_int8.tflite")
model.allocate_tensors()


def to_c_arr(name, vec):
    print(f'static const float {name}[72] = {{')
    print('  ', end='')
    for i, v in enumerate(vec):
        print(f'{v:.6f}f', end='')
        if i != len(vec) - 1:
            print(',', end='')
        if (i + 1) % 3 == 0:
            print('\n  ', end='')
        else:
            print(' ', end='')
    print('\n};')


input_details = model.get_input_details()[0]
scale = input_details['quantization_parameters']['scales'][0]
zp = input_details['quantization_parameters']['zero_points'][0]

to_c_arr('mean', mean)
to_c_arr('std', std)
print(f'static const float scale = {scale:.6f}f;')
print(f'static const int zp = {zp};')

运行后输出：

static const float mean[72] = {
  6.828718f, 1009.098938f, 68.643417f,
  // ...
};
static const float std[72] = {
  12.587724f, 9.960853f, 19.852306f,
  // ...
};
static const float scale = 0.024126f;
static const int zp = 5;

导出 TinyMiax 模型

导出 TinyMiax 模型需要使用 TinyMiax 的 tflite2tmdl 工具，位于 tools 目录，可以将之前训练得到的 weather_int8.tflite 复制到 TinyMiax 的根目录，执行：

python .\tools\tflite2tmdl.py .\weather_int8.tflite .\weather.tmdl int8 1 72 5

执行结果将生成下面两个文件：

weather_day5.h
weather_day5.tmdl

部署模型

首先将 weather_day5.h 复制到工程中，我放置在了 tinymaix\model 中：

C:\USERS\HP\DESKTOP\DEV\RA_WORKSPACE\WEATHERSTATION\SRC
└─tinymaix
    ├─include
    ├─model
    │      weather.h
    └─src

然后将 mean、std、scale 和 zp 粘贴到 hal_entry.c 中，下面就可以开始编写推理程序部分了：

static tm_mat_t outs[1];
static tm_err_t res;
static tm_mat_t in_mat;
static int8_t input[72] = {0};

static void parse_output(tm_mat_t* outs)
{
    tm_mat_t out = outs[0];
    char w_tags[5][10] = {"Sunny", "Cloudy", "Overcast", "Rainy", "Snowy"};
    float* data  = out.dataf;
    float maxp = 0;
    int maxi = -1;
    for(int i=0; i<5; i++){
        printf("%d: %.6f\n", i, data[i]);
        if(data[i] > maxp) {
            maxi = i;
            maxp = data[i];
        }
    }
    w_maxi = maxi;
    printf("### Predict output is: %s, prob %.6f\r\n", w_tags[maxi], maxp);
    return;
}

void tinyML_run(void)
{
    TM_DBGT_INIT();
    tm_mdl_t mdl;

    // 加载模型
    tm_stat((tm_mdlbin_t*)mdl_data);

    res = tm_load(&mdl, mdl_data, NULL, NULL, &in_mat);

    if(res != TM_OK) {
        printf("tm model load err %d\r\n", res);
    }

    // 量化
    for(int i = 0; i < 72; i++){
        float v = (weather[i/3][i%3] - mean[i]) / std[i];
        int q = (int)round(v / scale) + zp;
        if(q > 127)  q = 127;
        if(q < -128) q = -128;
        input[i] = (int8_t)q;
    }

    // 导入输入数据
    in_mat.data = (mtype_t*)input;

    // 开始推理
    TM_DBGT_START();
    res = tm_run(&mdl, &in_mat, outs);
    TM_DBGT("tm_run");

    // 输出结果
    if(res==TM_OK) parse_output(outs);
    else printf("tm run error: %d\n", res);

    // 卸载模型
    tm_unload(&mdl);
}

在 hal_entry() 中调用 tinyML_run() 即可

完整 `hal_entry.c`

#include "hal_data.h"
#include "uart/bsp_uart.h"
#include "tinymaix.h"
#include "tinymaix/model/weather.h"

static tm_mat_t outs[1];
static tm_err_t res;
static tm_mat_t in_mat;
static int8_t input[72] = {0};

FSP_CPP_HEADER
void R_BSP_WarmStart(bsp_warm_start_event_t event);
FSP_CPP_FOOTER

static const float mean[72] = {
                               6.828718f, 1009.098938f, 68.643417f,
                               6.503504f, 1008.989441f, 69.627861f,
                               // ...
};
static const float std[72] = {
                              12.587724f, 9.960853f, 19.852306f,
                              12.558513f, 10.011790f, 19.922518f,
                              // ...
};
static const float scale = 0.024126f;
static const int zp = 5;

float weather[24][3] = {
                        {8.66f, 1018.67f, 90.3f},
                        {8.61f, 1018.47f, 89.99f},
                        // ...
};

static const char w_tags[5][10] = {"Sunny", "Cloudy", "Overcast", "Rainy", "Snowy"};
static int8_t  w_maxi = -1;

/*******************************************************************************************************************//**
 * 格式化模型输出
 *
 * @param[in]  outs    模型输出
 **********************************************************************************************************************/
static void parse_output(tm_mat_t* outs)
{
    tm_mat_t out = outs[0];
    float* data  = out.dataf;
    float maxp = 0;
    int maxi = -1;
    for(int i=0; i<5; i++){
        printf("%d: %.6f\n", i, data[i]);
        if(data[i] > maxp) {
            maxi = i;
            maxp = data[i];
        }
    }
    w_maxi = maxi;
    printf("### Predict output is: %s, prob %.6f\r\n", w_tags[maxi], maxp);
    return;
}

/*******************************************************************************************************************//**
 * 执行 TinyML 推理
 **********************************************************************************************************************/
void tinyML_run(void)
{
    TM_DBGT_INIT();
    tm_mdl_t mdl;

    tm_stat((tm_mdlbin_t*)mdl_data);

    res = tm_load(&mdl, mdl_data, NULL, NULL, &in_mat);

    if(res != TM_OK) {
        printf("tm model load err %d\r\n", res);
    }

    for(int i=0;i<72;i++){
        float v = (weather[i/3][i%3] - mean[i]) / std[i];
        int q = (int)round(v / scale) + zp;
        if(q > 127)  q = 127;
        if(q < -128) q = -128;
        input[i] = (int8_t)q;
    }

    in_mat.data = (mtype_t*)input;

    TM_DBGT_START();
    res = tm_run(&mdl, &in_mat, outs);
    TM_DBGT("tm_run");

    if(res==TM_OK) parse_output(outs);
    else printf("tm run error: %d\n", res);

    tm_unload(&mdl);
}

/*******************************************************************************************************************//**
 * main() is generated by the RA Configuration editor and is used to generate threads if an RTOS is used.  This function
 * is called by main() when no RTOS is used.
 **********************************************************************************************************************/
void hal_entry(void)
{
    UART0_Init();
    R_IOPORT_Open(&g_ioport_ctrl, g_ioport.p_cfg);

    tinyML_run();

    while(1) {
        R_IOPORT_PinWrite(&g_ioport_ctrl, BSP_IO_PORT_01_PIN_13, BSP_IO_LEVEL_HIGH);
        R_BSP_SoftwareDelay(1, BSP_DELAY_UNITS_SECONDS);
        R_IOPORT_PinWrite(&g_ioport_ctrl, BSP_IO_PORT_01_PIN_13, BSP_IO_LEVEL_LOW);
        R_BSP_SoftwareDelay(1, BSP_DELAY_UNITS_SECONDS);
    }

#if BSP_TZ_SECURE_BUILD
    /* Enter non-secure code */
    R_BSP_NonSecureEnter();
#endif
}

/*******************************************************************************************************************//**
 * This function is called at various points during the startup process.  This implementation uses the event that is
 * called right before main() to set up the pins.
 *
 * @param[in]  event    Where at in the start up process the code is currently at
 **********************************************************************************************************************/
void R_BSP_WarmStart(bsp_warm_start_event_t event)
{
    if (BSP_WARM_START_RESET == event)
    {
#if BSP_FEATURE_FLASH_LP_VERSION != 0

        /* Enable reading from data flash. */
        R_FACI_LP->DFLCTL = 1U;

        /* Would normally have to wait tDSTOP(6us) for data flash recovery. Placing the enable here, before clock and
         * C runtime initialization, should negate the need for a delay since the initialization will typically take more than 6us. */
#endif
    }

    if (BSP_WARM_START_POST_C == event)
    {
        /* C runtime environment and system clocks are setup. */

        /* Configure pins. */
        R_IOPORT_Open (&IOPORT_CFG_CTRL, &IOPORT_CFG_NAME);

#if BSP_CFG_SDRAM_ENABLED

        /* Setup SDRAM and initialize it. Must configure pins first. */
        R_BSP_SdramInit(true);
#endif
    }
}

#if BSP_TZ_SECURE_BUILD

FSP_CPP_HEADER
BSP_CMSE_NONSECURE_ENTRY void template_nonsecure_callable ();

/* Trustzone Secure Projects require at least one nonsecure callable function in order to build (Remove this if it is not required to build). */
BSP_CMSE_NONSECURE_ENTRY void template_nonsecure_callable ()
{

}
FSP_CPP_FOOTER

#endif

烧录程序后串口输出：

================================ model stat ================================
mdl_type=0 (int8))
out_deq=1 
input_cnt=1, output_cnt=1, layer_cnt=5
input 1dims: (1, 1, 72)
output 1dims: (1, 1, 5)
main buf size 200; sub buf size 0
//Note: PARAM is layer param size, include align padding

Idx    Layer             outshape    inoft    outoft    PARAM    MEMOUT OPS
---    Input          1,  1, 72    -       0        0     72     0
###L71: body oft = 64
###L72: type=2, is_out=0, size=10304, in_oft=0, out_oft=72, in_dims=[1,1,1,72], out_dims=[1,1,1,128], in_s=0.024, in_zp=5, out_s=0.015, out_zp=-128
###L96: FC: ws_oft=64, w_oft=576, b_oft=9792
000    FC            1,  1,128    0    72    10240    128    9216
###L71: body oft = 10368
###L72: type=2, is_out=0, size=8768, in_oft=72, out_oft=0, in_dims=[1,1,1,128], out_dims=[1,1,1,64], in_s=0.015, in_zp=-128, out_s=0.013, out_zp=-128
###L96: FC: ws_oft=64, w_oft=320, b_oft=8512
001    FC            1,  1, 64    72    0    8704    64    8192
###L71: body oft = 19136
###L72: type=2, is_out=0, size=4672, in_oft=0, out_oft=136, in_dims=[1,1,1,64], out_dims=[1,1,1,64], in_s=0.013, in_zp=-128, out_s=0.012, out_zp=-128
###L96: FC: ws_oft=64, w_oft=320, b_oft=4416
002    FC            1,  1, 64    0    136    4608    64    4096
###L71: body oft = 23808
###L72: type=2, is_out=0, size=432, in_oft=136, out_oft=0, in_dims=[1,1,1,64], out_dims=[1,1,1,5], in_s=0.012, in_zp=-128, out_s=0.034, out_zp=-6
###L96: FC: ws_oft=64, w_oft=88, b_oft=408
003    FC            1,  1,  5    136    0    368    5    320
###L71: body oft = 24240
###L72: type=3, is_out=1, size=48, in_oft=0, out_oft=168, in_dims=[1,1,1,5], out_dims=[1,1,1,5], in_s=0.034, in_zp=-6, out_s=0.004, out_zp=-128
004    Softmax            1,  1,  5    0    168    0    5    30

Total param ~23.4 KB, OPS ~0.02 MOPS, buffer 0.2 KB

===tm_run use 0.000 ms
0: 0.152344
1: 0.843750
2: 0.003906
3: 0.003906
4: 0.003906
### Predict output is: Cloudy, prob 0.843750

阿方的博客