2208-TVM对前端模型进行优化的过程分析

本文以ResNet-50为例分析TVM在对前端网络模型进行优化时的逻辑。待更新
0.测试代码

#其他引入包
import onnx     #模型前端
from PIL import Image   #图片处理
import timeit       #用作计时
import numpy as np  #模型输入 预处理使用
from scipy.special import softmax  #模型输出 后处理使用

#tvm使用到的包
import tvm  #引了本包
from tvm.contrib.download import download_testdata #下载测试数据
import tvm.relay as relay       #relay 用在tvm编译模型
from tvm.contrib import graph_executor #用来生成计算图
# import tvm.auto_scheduler as auto_scheduler #自动scheduler 没用到
from tvm.autotvm.tuner import XGBTuner #使用默认的XGBoost Grid算法进行tune
from tvm import autotvm #引入autotvm 用在tune过程



#模型时间计算工具
def model_time_test(module):
    timing_number = 10
    timing_repeat = 10
    result = (
        np.array(timeit.Timer(lambda: module.run()).repeat(repeat=timing_repeat, number=timing_number))
        * 1000
        / timing_number
    )
    result = {
        "mean": np.mean(result),
        "median": np.median(result),
        "std": np.std(result),
    }
    return result



#1. 模型和样例下载及预处理
#1.1 下载onnx模型到本地并加载
model_url = "".join(
    [
        "https://github.com/onnx/models/raw/",
        "b9a54e89508f101a1611cd64f4ef56b9cb62c7cf/vision/classification/resnet/model/",
        "resnet50-v2-7.onnx",
    ]
)
model_path = download_testdata(model_url, "resnet50-v2-7.onnx", module="onnx")
onnx_model = onnx.load(model_path)

#1.2 下载图像测试用例 并转换为模型输入的形式
img_url = "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
img_path = download_testdata(img_url, "imagenet_cat.png", module="data")
## resize it to 224*224
resized_image = Image.open(img_path).resize((224, 224))
img_data = np.asarray(resized_image).astype("float32")
## Our input image is in HWC layout while ONNX expects CHW input, so convert the array
img_data = np.transpose(img_data, (2, 0, 1))
## Normalize according to the ImageNet input specification
imagenet_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
imagenet_stddev = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
norm_img_data = (img_data / 255 - imagenet_mean) / imagenet_stddev
## Add the batch dimension, as we are expecting 4-dimensional input: NCHW.
img_data = np.expand_dims(norm_img_data, axis=0)

#2. 使用relay编译模型(得到计算图模型) 等效于tvmc compiler
#2.1 设置后端target
target = "llvm -mcpu=broadwell"
#2.2  模型输入的名字和尺寸 可以用工具查看(例如netron)
input_name = "data"
shape_dict = {input_name: img_data.shape}
#2.3 选择合适的前端 使用relay编译
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)    
#2.4 生成模型的计算图    
dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))

#3. 在tvm运行时的支持下，运行未优化的模型进行推理 等效于tvmc run
#3.1 设置模型输入
dtype = "float32"
## 这里img_data就是1.2中处理过的图片数据
module.set_input(input_name, img_data)
#3.2 模型运行
module.run()
#3.3 取得模型输出
output_shape = (1, 1000)
tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()
#3.4 计算未优化模型的时间
unoptimized=model_time_test(module)
#3.5 对模型输出进行后处理 将resNet输出转为更可读的形式
## Download a list of labels
labels_url = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
labels_path = download_testdata(labels_url, "synset.txt", module="data")
with open(labels_path, "r") as f:
    labels = [l.rstrip() for l in f]
## Open the output and read the output tensor
scores = softmax(tvm_output)
scores = np.squeeze(scores)
ranks = np.argsort(scores)[::-1]
for rank in ranks[0:5]:
    print("class='%s' with probability=%f" % (labels[rank], scores[rank]))


#4. 使用tvm对模型进行tune 等效于tvmc tune
#4.1 创建runner
##设置runner参数
number = 10
repeat = 1
min_repeat_ms = 0  # since we're tuning on a CPU, can be set to 0
timeout = 10  # in seconds
##创建runner
runner = autotvm.LocalRunner(
    number=number,
    repeat=repeat,
    timeout=timeout,
    min_repeat_ms=min_repeat_ms,
    enable_cpu_cache_flush=True,
)
#4.2 设置tuning选项
tuning_option = {
    "tuner": "xgb",
    "trials": 10,
    "early_stopping": 100,
    "measure_option": autotvm.measure_option(
        builder=autotvm.LocalBuilder(build_func="default"), runner=runner
    ),
    "tuning_records": "resnet-50-v2-autotuning.json",
}
#4.3 从onnx model中提取task
tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params)
#4.4 相继对提取的任务进行tune
for i, task in enumerate(tasks):
    prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
    tuner_obj = XGBTuner(task, loss_type="rank")
    tuner_obj.tune(
        n_trial=min(tuning_option["trials"], len(task.config_space)),
        early_stopping=tuning_option["early_stopping"],
        measure_option=tuning_option["measure_option"],
        callbacks=[
            autotvm.callback.progress_bar(tuning_option["trials"], prefix=prefix),
            autotvm.callback.log_to_file(tuning_option["tuning_records"]),
        ],
    )

#5. 提取最好的调优数据来编译优化后的模型 等效于tvmc compiler --tuning-records
with autotvm.apply_history_best(tuning_option["tuning_records"]):
    with tvm.transform.PassContext(opt_level=3, config={}):
        lib = relay.build(mod, target=target, params=params)
dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))


#6. 在tvm运行时下，对优化的模型进行样例推理
dtype = "float32"
module.set_input(input_name, img_data)
module.run()
output_shape = (1, 1000)
tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()
scores = softmax(tvm_output)
scores = np.squeeze(scores)
ranks = np.argsort(scores)[::-1]
for rank in ranks[0:5]:
    print("class='%s' with probability=%f" % (labels[rank], scores[rank]))
## 计算优化模型的时间
optimized=model_time_test(module)


#7. 输出优化和未优化模型对比
print("optimized: %s" % (optimized))
print("unoptimized: %s" % (unoptimized))
#TVM
2208-TVM对前端模型进行优化的过程分析
https://piscesfinalizer.github.io/2022/07/29/TVM对前端模型进行优化的过程分析/
作者
PiscesFinalizer
发布于
2022年7月29日
许可协议
2208-CMake笔记上一篇
2207-TVM在Ubuntu系统下的安装下一篇