2208-TVM对前端模型进行优化的过程分析

本文以ResNet-50为例分析TVM在对前端网络模型进行优化时的逻辑。待更新

0.测试代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#其他引入包
import onnx #模型前端
from PIL import Image #图片处理
import timeit #用作计时
import numpy as np #模型输入 预处理使用
from scipy.special import softmax #模型输出 后处理使用

#tvm使用到的包
import tvm #引了本包
from tvm.contrib.download import download_testdata #下载测试数据
import tvm.relay as relay #relay 用在tvm编译模型
from tvm.contrib import graph_executor #用来生成计算图
# import tvm.auto_scheduler as auto_scheduler #自动scheduler 没用到
from tvm.autotvm.tuner import XGBTuner #使用默认的XGBoost Grid算法进行tune
from tvm import autotvm #引入autotvm 用在tune过程



#模型时间计算工具
def model_time_test(module):
timing_number = 10
timing_repeat = 10
result = (
np.array(timeit.Timer(lambda: module.run()).repeat(repeat=timing_repeat, number=timing_number))
* 1000
/ timing_number
)
result = {
"mean": np.mean(result),
"median": np.median(result),
"std": np.std(result),
}
return result



#1. 模型和样例下载及预处理
#1.1 下载onnx模型到本地并加载
model_url = "".join(
[
"https://github.com/onnx/models/raw/",
"b9a54e89508f101a1611cd64f4ef56b9cb62c7cf/vision/classification/resnet/model/",
"resnet50-v2-7.onnx",
]
)
model_path = download_testdata(model_url, "resnet50-v2-7.onnx", module="onnx")
onnx_model = onnx.load(model_path)

#1.2 下载图像测试用例 并转换为模型输入的形式
img_url = "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
img_path = download_testdata(img_url, "imagenet_cat.png", module="data")
## resize it to 224*224
resized_image = Image.open(img_path).resize((224, 224))
img_data = np.asarray(resized_image).astype("float32")
## Our input image is in HWC layout while ONNX expects CHW input, so convert the array
img_data = np.transpose(img_data, (2, 0, 1))
## Normalize according to the ImageNet input specification
imagenet_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
imagenet_stddev = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
norm_img_data = (img_data / 255 - imagenet_mean) / imagenet_stddev
## Add the batch dimension, as we are expecting 4-dimensional input: NCHW.
img_data = np.expand_dims(norm_img_data, axis=0)

#2. 使用relay编译模型(得到计算图模型) 等效于tvmc compiler
#2.1 设置后端target
target = "llvm -mcpu=broadwell"
#2.2 模型输入的名字和尺寸 可以用工具查看(例如netron)
input_name = "data"
shape_dict = {input_name: img_data.shape}
#2.3 选择合适的前端 使用relay编译
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
with tvm.transform.PassContext(opt_level=3):
lib = relay.build(mod, target=target, params=params)
#2.4 生成模型的计算图
dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))

#3. 在tvm运行时的支持下,运行未优化的模型进行推理 等效于tvmc run
#3.1 设置模型输入
dtype = "float32"
## 这里img_data就是1.2中处理过的图片数据
module.set_input(input_name, img_data)
#3.2 模型运行
module.run()
#3.3 取得模型输出
output_shape = (1, 1000)
tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()
#3.4 计算未优化模型的时间
unoptimized=model_time_test(module)
#3.5 对模型输出进行后处理 将resNet输出转为更可读的形式
## Download a list of labels
labels_url = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
labels_path = download_testdata(labels_url, "synset.txt", module="data")
with open(labels_path, "r") as f:
labels = [l.rstrip() for l in f]
## Open the output and read the output tensor
scores = softmax(tvm_output)
scores = np.squeeze(scores)
ranks = np.argsort(scores)[::-1]
for rank in ranks[0:5]:
print("class='%s' with probability=%f" % (labels[rank], scores[rank]))


#4. 使用tvm对模型进行tune 等效于tvmc tune
#4.1 创建runner
##设置runner参数
number = 10
repeat = 1
min_repeat_ms = 0 # since we're tuning on a CPU, can be set to 0
timeout = 10 # in seconds
##创建runner
runner = autotvm.LocalRunner(
number=number,
repeat=repeat,
timeout=timeout,
min_repeat_ms=min_repeat_ms,
enable_cpu_cache_flush=True,
)
#4.2 设置tuning选项
tuning_option = {
"tuner": "xgb",
"trials": 10,
"early_stopping": 100,
"measure_option": autotvm.measure_option(
builder=autotvm.LocalBuilder(build_func="default"), runner=runner
),
"tuning_records": "resnet-50-v2-autotuning.json",
}
#4.3 从onnx model中提取task
tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params)
#4.4 相继对提取的任务进行tune
for i, task in enumerate(tasks):
prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))
tuner_obj = XGBTuner(task, loss_type="rank")
tuner_obj.tune(
n_trial=min(tuning_option["trials"], len(task.config_space)),
early_stopping=tuning_option["early_stopping"],
measure_option=tuning_option["measure_option"],
callbacks=[
autotvm.callback.progress_bar(tuning_option["trials"], prefix=prefix),
autotvm.callback.log_to_file(tuning_option["tuning_records"]),
],
)

#5. 提取最好的调优数据来编译优化后的模型 等效于tvmc compiler --tuning-records
with autotvm.apply_history_best(tuning_option["tuning_records"]):
with tvm.transform.PassContext(opt_level=3, config={}):
lib = relay.build(mod, target=target, params=params)
dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))


#6. 在tvm运行时下,对优化的模型进行样例推理
dtype = "float32"
module.set_input(input_name, img_data)
module.run()
output_shape = (1, 1000)
tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()
scores = softmax(tvm_output)
scores = np.squeeze(scores)
ranks = np.argsort(scores)[::-1]
for rank in ranks[0:5]:
print("class='%s' with probability=%f" % (labels[rank], scores[rank]))
## 计算优化模型的时间
optimized=model_time_test(module)


#7. 输出优化和未优化模型对比
print("optimized: %s" % (optimized))
print("unoptimized: %s" % (unoptimized))

2208-TVM对前端模型进行优化的过程分析
https://piscesfinalizer.github.io/2022/07/29/TVM对前端模型进行优化的过程分析/
作者
PiscesFinalizer
发布于
2022年7月29日
许可协议