-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
124 lines (95 loc) · 33.7 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=2"><meta name="theme-color" content="#222"><meta http-equiv="X-UA-COMPATIBLE" content="IE=edge,chrome=1"><meta name="renderer" content="webkit"><link rel="icon" type="image/ico" sizes="32x32" href="/assets/favicon.ico"><link rel="apple-touch-icon" sizes="180x180" href="/assets/apple-touch-icon.png"><link rel="alternate" href="/rss.xml" title="Пусть этот камень будет более крепким, чем человек" type="application/rss+xml"><link rel="alternate" href="/atom.xml" title="Пусть этот камень будет более крепким, чем человек" type="application/atom+xml"><link rel="alternate" type="application/json" title="Пусть этот камень будет более крепким, чем человек" href="https://forcheetah.github.io/feed.json"><link rel="preconnect" href="https://lf9-cdn-tos.bytecdntp.com"><link rel="preconnect" href="https://at.alicdn.com"><link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Mulish:400,400italic,700,700italic%7CFredericka%20the%20Great:400,400italic,700,700italic%7CNoto%20Serif%20JP:400,400italic,700,700italic%7CNoto%20Serif%20SC:400,400italic,700,700italic%7CInconsolata:400,400italic,700,700italic&display=swap&subset=latin,latin-ext" media="none" onload="this.media='all'"><link rel="stylesheet" href="/css/app.css?v=0.4.2"><link rel="modulepreload" href="/js/chunk-FJ7AJ5BW.js"><link rel="modulepreload" href="/js/chunk-MQTNP6EI.js"><link rel="modulepreload" href="/js/chunk-QAWHJ5B3.js"><link rel="modulepreload" href="/js/index.esm-SU253EAQ.js"><link rel="modulepreload" href="/js/post-SZ2V6ERD.js"><link rel="modulepreload" href="/js/quicklink-GO25OZIT.js"><link rel="modulepreload" href="/js/siteInit.js"><link rel="preload" href="https://forcheetah.github.io/assets/danger.webp" as="image" fetchpriority="high"><link rel="preload" href="https://forcheetah.github.io/assets/girl.webp" as="image" fetchpriority="high"><link rel="preload" href="https://forcheetah.github.io/assets/lunbo4.webp" as="image" fetchpriority="high"><link rel="preload" href="https://forcheetah.github.io/assets/lunbo12.webp" as="image" fetchpriority="high"><link rel="preload" href="https://forcheetah.github.io/assets/lunbo11.webp" as="image" fetchpriority="high"><link rel="preload" href="https://forcheetah.github.io/assets/harrypotter.webp" as="image" fetchpriority="high"><meta name="keywords" content="AI, CNN, Inference"><meta name="description" content="有自己的博客很帅,但是我很懒,要命!!!"><link rel="canonical" href="https://forcheetah.github.io/"><title>暮冬Z羡慕的博客 = Пусть этот камень будет более крепким, чем человек = 神经网络推理、加速、AI编译。 我必须立刻开始挣扎!</title><meta name="generator" content="Hexo 7.0.0"></head><body itemscope="" itemtype="http://schema.org/WebPage"><div id="loading"><div class="cat"><div class="body"></div><div class="head"><div class="face"></div></div><div class="foot"><div class="tummy-end"></div><div class="bottom"></div><div class="legs left"></div><div class="legs right"></div></div><div class="paw"><div class="hands left"></div><div class="hands right"></div></div></div></div><div id="container"><header id="header" itemscope="" itemtype="http://schema.org/WPHeader"><div class="inner"><div id="brand"><div class="pjax"><a class="logo" href="/" rel="start"><p class="artboard">暮冬Z羡慕的博客</p><h1 class="title" itemprop="name headline">Пусть этот камень будет более крепким, чем человек</h1></a><p class="meta" itemprop="description">= 神经网络推理、加速、AI编译。 我必须立刻开始挣扎! =</p></div></div><nav id="nav"><div class="inner"><div class="toggle"><div class="lines" aria-label="Toggle navigation bar"><span class="line"></span><span class="line"></span><span class="line"></span></div></div><ul class="menu"><li class="item title"><a href="/" rel="start">暮冬Z羡慕的博客</a></li></ul><ul class="right" id="rightNav"><li class="item theme"><i class="ic i-sun"></i></li><li class="item search"><i class="ic i-search"></i></li></ul></div></nav></div><div class="pjax" id="imgs"><ul><li class="item" style="background-image: url("https://forcheetah.github.io/assets/danger.webp");"></li><li class="item" style="background-image: url("https://forcheetah.github.io/assets/girl.webp");"></li><li class="item" style="background-image: url("https://forcheetah.github.io/assets/lunbo4.webp");"></li><li class="item" style="background-image: url("https://forcheetah.github.io/assets/lunbo12.webp");"></li><li class="item" style="background-image: url("https://forcheetah.github.io/assets/lunbo11.webp");"></li><li class="item" style="background-image: url("https://forcheetah.github.io/assets/harrypotter.webp");"></li></ul></div></header><div id="waves"><svg class="waves" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 24 150 28" preserveAspectRatio="none" shape-rendering="auto"><defs><path id="gentle-wave" d="M-160 44c30 0 58-18 88-18s 58 18 88 18 58-18 88-18 58 18 88 18 v44h-352z"></path></defs><g class="parallax"><use xlink:href="#gentle-wave" x="48" y="0"></use><use xlink:href="#gentle-wave" x="48" y="3"></use><use xlink:href="#gentle-wave" x="48" y="5"></use><use xlink:href="#gentle-wave" x="48" y="7"></use></g></svg></div><main><div class="inner"><div class="pjax" id="main"><div class="index wrap"><h2 class="divider">Sticky Posts</h2><div class="segments sticky"><article class="item"><div class="cover"><a href="/2024/05/24/category/" itemprop="url" title="博客汇总目录"><img loading="eager" decoding="async" src="https://forcheetah.github.io/assets/lunbo6.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2024-05-24 21:25:53"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2024-05-24T21:25:53+08:00">2024-05-24</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>2.3k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>2 mins.</span></span></div><h3><a href="/2024/05/24/category/" itemprop="url" title="博客汇总目录">博客汇总目录</a></h3><div class="excerpt"># 暮冬 Z 羡慕 的博客 文章汇总
# 卷积加速算法
【Im2Col】卷积加速算法【1】 NCHW
【Im2Col】卷积加速算法【2】NHWC
【im2col】昇腾卷积加速算法
【Winograd】卷积加速算法原理及实现
【gemm】Gemm 计算加速
【gemm】内存对齐
[【conv 加速】NVDLA 卷积加速算法]
# AI 推理引擎
【推理引擎】常见推理引擎
【推理引擎】NCNN 和 Tengine 量化推理逻辑对比
【量化】连续卷积层首尾量化的可行性
[【推理引擎】Tengine 框架结构介绍]
# AI 编译器
【TVM】</div><div class="meta footer"><span><a href="/categories/%E7%9B%AE%E5%BD%95/" itemprop="url" title="目录"><i class="ic i-flag"></i>目录</a></span></div><a href="/2024/05/24/category/" class="btn" itemprop="url" title="博客汇总目录">more...</a></div></article></div><h2 class="divider">Post List</h2><div class="segments posts"><article class="item"><div class="cover"><a href="/2025/01/03/quanti01/" itemprop="url" title="【量化】连续卷积层首尾量化的可行性"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo3.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2025-01-03 21:12:09"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2025-01-03T21:12:09+08:00">2025-01-03</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>9.4k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>9 mins.</span></span></div><h3><a href="/2025/01/03/quanti01/" itemprop="url" title="【量化】连续卷积层首尾量化的可行性">【量化】连续卷积层首尾量化的可行性</a></h3><div class="excerpt"># 前言
本篇文章探讨一下连续卷积层首尾量化的可行性。连续卷积指的是神经网络模型中接连出现若干个卷积 layer,中间没有 pooling、FC 等其他 layer;首尾量化指的是只在首端量化,仅在尾端反量化,中间不添加量化反量化操作。探索连续卷积层首尾量化的可行性,目的是试图降低每一层都进行繁杂的量化反量化运算的时间消耗。在上一篇文章【推理引擎】NCNN 和 Tengine 量化推理逻辑对比 中提到了这个想法,本篇文章将通过公式演算和代码仿真的方式探索其可行性。
作为初学者,错误在所难免,还望不吝赐教。
# 连续卷积层首尾量化
借用上一篇文章【推理引擎】NCNN 和 Tengine 量化推理</div><div class="meta footer"><span><a href="/categories/%E9%87%8F%E5%8C%96/" itemprop="url" title="量化"><i class="ic i-flag"></i>量化</a></span></div><a href="/2025/01/03/quanti01/" class="btn" itemprop="url" title="【量化】连续卷积层首尾量化的可行性">more...</a></div></article><article class="item"><div class="cover"><a href="/2024/12/24/conv6/" itemprop="url" title="【Gemm】内存对齐"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo5.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2024-12-24 20:44:31"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2024-12-24T20:44:31+08:00">2024-12-24</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>8.5k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>8 mins.</span></span></div><h3><a href="/2024/12/24/conv6/" itemprop="url" title="【Gemm】内存对齐">【Gemm】内存对齐</a></h3><div class="excerpt"># 前言
本篇文章梳理内存对齐的相关概念和知识,并借上一篇文章《【Gemm】Gemm 计算加速》 来简单探讨一下内存排布对计算加速的重要作用。
本篇文章参考和引用了 《nihui 大佬的 ncnn 工程》,《ncnn 初探二:图解 ncnn::Mat 的内存排布》,关于 mat 中 data 的内存排列问题,《Linux 内核:内存管理 —— 内存对齐》等,并询问过大语言模型通义千问、文心一言等。如有不清楚的地方可以点击链接查阅。
作为初学者,错误在所难免,还望不吝赐教。
# 处理器的对齐访问
对齐访问的定义:对齐访问是指处理器在访问内存时,要求访问的起始地址是某个特定数值(通常是 2 的幂)</div><div class="meta footer"><span><a href="/categories/%E5%8D%B7%E7%A7%AF%E5%8A%A0%E9%80%9F/" itemprop="url" title="卷积加速"><i class="ic i-flag"></i>卷积加速</a></span></div><a href="/2024/12/24/conv6/" class="btn" itemprop="url" title="【Gemm】内存对齐">more...</a></div></article><article class="item"><div class="cover"><a href="/2024/12/20/conv5/" itemprop="url" title="【gemm】Gemm计算加速"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/harrypotter.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2024-12-20 22:06:41"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2024-12-20T22:06:41+08:00">2024-12-20</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>23k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>21 mins.</span></span></div><h3><a href="/2024/12/20/conv5/" itemprop="url" title="【gemm】Gemm计算加速">【gemm】Gemm计算加速</a></h3><div class="excerpt"># 前言
本篇文章简单介绍矩阵乘的加速方法,以学习算子加速需要注意的方面。想要学习更多内容可以参考《OpenBLAS gemm 从零入门》,《BLISlab: A Sandbox for Optimizing GEMM》,道阻且长_再探矩阵乘法优化,《How To Optimize GEMM》等项目或文章。
作为初学者,错误在所难免,还望不吝赐教。
# 1. 基准算法
矩阵乘运算的基准算法,未经过任何优化。矩阵以行主序进行排布, 针对 X86 平台。矩阵 C= A * B,A 矩阵为 (M,K), B 矩阵为 (K,N)。
#include <stdio.h>#defi</div><div class="meta footer"><span><a href="/categories/%E5%8D%B7%E7%A7%AF%E5%8A%A0%E9%80%9F/" itemprop="url" title="卷积加速"><i class="ic i-flag"></i>卷积加速</a></span></div><a href="/2024/12/20/conv5/" class="btn" itemprop="url" title="【gemm】Gemm计算加速">more...</a></div></article><article class="item"><div class="cover"><a href="/2024/10/31/tvm05/" itemprop="url" title="【TVM】通过代码学习编译流程【5】FuseOps"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo1.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2024-10-31 21:11:09"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2024-10-31T21:11:09+08:00">2024-10-31</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>14k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>13 mins.</span></span></div><h3><a href="/2024/10/31/tvm05/" itemprop="url" title="【TVM】通过代码学习编译流程【5】FuseOps">【TVM】通过代码学习编译流程【5】FuseOps</a></h3><div class="excerpt"># 前言
本系列文章将从代码和流程图入手,详细介绍 TVM AI 编译器的编译流程。本文章为第五篇,对应的 TVM 版本为当前最新版本 1.7。
网络上有不少 TVM 工程的教程资源,如果本博客也是其他教程的简单重复的话,则网络的角落里又多了一份纯粹的空间占用者。所以我在想,本文章有什么特点才值得一看呢?我觉得有两个优点: 1、本文从代码出发,不会泛泛而谈,能够从细节了解 TVM;2、自认为结构流程图画的不错,能够从整体上把握 TVM 的脉络。所以,也许值得一看呢。
本篇文章介绍 TVM FuseOps 算子融合 Pass。文章 《【TVM】通过代码学习编译流程【4】BuildRelay》 已</div><div class="meta footer"><span><a href="/categories/tvm/" itemprop="url" title="tvm"><i class="ic i-flag"></i>tvm</a></span></div><a href="/2024/10/31/tvm05/" class="btn" itemprop="url" title="【TVM】通过代码学习编译流程【5】FuseOps">more...</a></div></article><article class="item"><div class="cover"><a href="/2024/10/25/tvm06/" itemprop="url" title="【TVM】通过代码学习编译流程【6】CodeGen"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo8.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2024-10-25 22:11:43"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2024-10-25T22:11:43+08:00">2024-10-25</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>17k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>15 mins.</span></span></div><h3><a href="/2024/10/25/tvm06/" itemprop="url" title="【TVM】通过代码学习编译流程【6】CodeGen">【TVM】通过代码学习编译流程【6】CodeGen</a></h3><div class="excerpt"># 前言
本系列文章将从代码和流程图入手,详细介绍 TVM AI 编译器的编译流程。本文章为第六篇,对应的 TVM 版本为当前最新版本 1.7。
网络上有不少 TVM 工程的教程资源,如果本博客也是其他教程的简单重复的话,则网络的角落里又多了一份纯粹的空间占用者。所以我在想,本文章有什么特点才值得一看呢?我觉得有两个优点: 1、本文从代码出发,不会泛泛而谈,能够从细节了解 TVM;2、自认为结构流程图画的不错,能够从整体上把握 TVM 的脉络。所以,也许值得一看呢。
本篇文章介绍 TVM CodeGen 函数。文章 《【TVM】通过代码学习编译流程【4】BuildRelay》 已经介绍了 Bu</div><div class="meta footer"><span><a href="/categories/tvm/" itemprop="url" title="tvm"><i class="ic i-flag"></i>tvm</a></span></div><a href="/2024/10/25/tvm06/" class="btn" itemprop="url" title="【TVM】通过代码学习编译流程【6】CodeGen">more...</a></div></article><article class="item"><div class="cover"><a href="/2024/10/22/tvm3.5/" itemprop="url" title="【TVM】通过代码学习类【3.5】Pass"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo3.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2024-10-22 19:48:24"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2024-10-22T19:48:24+08:00">2024-10-22</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>9.2k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>8 mins.</span></span></div><h3><a href="/2024/10/22/tvm3.5/" itemprop="url" title="【TVM】通过代码学习类【3.5】Pass">【TVM】通过代码学习类【3.5】Pass</a></h3><div class="excerpt"># 前言
文章 《【TVM】通过代码学习编译流程》系列 主要介绍 TVM 在模型编译过程的流程,有时候感觉缺少了对类及其属性和方法的介绍。所以决定在系列文章的中间插入一些 “类的结构及其属性方法” 的介绍。
本篇文章主要介绍 Pass 及其相关类。
作为初学者,错误在所难免,还望不吝赐教。
# Pass
可以再回顾一下在《【TVM】通过代码学习编译流程【4】》中讲到的本体、桥梁、指针的关系。
先看一看 Pass 的基类, 位于 include/tvm/ir/transform.h 。 Pass 本体 PassNode 。内容很少,主要就是 Pass 的执行函</div><div class="meta footer"><span><a href="/categories/tvm/" itemprop="url" title="tvm"><i class="ic i-flag"></i>tvm</a></span></div><a href="/2024/10/22/tvm3.5/" class="btn" itemprop="url" title="【TVM】通过代码学习类【3.5】Pass">more...</a></div></article><article class="item"><div class="cover"><a href="/2024/10/21/tvm04/" itemprop="url" title="【TVM】通过代码学习编译流程【4】BuildRelay"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo5.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2024-10-21 22:03:00"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2024-10-21T22:03:00+08:00">2024-10-21</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>16k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>15 mins.</span></span></div><h3><a href="/2024/10/21/tvm04/" itemprop="url" title="【TVM】通过代码学习编译流程【4】BuildRelay">【TVM】通过代码学习编译流程【4】BuildRelay</a></h3><div class="excerpt"># 前言
本系列文章将从代码和流程图入手,详细介绍 TVM AI 编译器的编译流程。本文章为第四篇,对应的 TVM 版本为当前最新版本 1.7。
网络上有不少 TVM 工程的教程资源,如果本博客也是其他教程的简单重复的话,则网络的角落里又多了一份纯粹的空间占用者。所以我在想,本文章有什么特点才值得一看呢?我觉得有两个优点: 1、本文从代码出发,不会泛泛而谈,能够从细节了解 TVM;2、自认为结构流程图画的不错,能够从整体上把握 TVM 的脉络。所以,也许值得一看呢。
本篇文章介绍 TVM BuildRelay 函数。文章 《【TVM】通过代码学习编译流程【3】模型编译》 已经介绍了 Relay</div><div class="meta footer"><span><a href="/categories/tvm/" itemprop="url" title="tvm"><i class="ic i-flag"></i>tvm</a></span></div><a href="/2024/10/21/tvm04/" class="btn" itemprop="url" title="【TVM】通过代码学习编译流程【4】BuildRelay">more...</a></div></article><article class="item"><div class="cover"><a href="/2024/10/18/aicompile01/" itemprop="url" title="【AI编译】Tiling操作能优化什么时间"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo4.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2024-10-18 21:44:56"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2024-10-18T21:44:56+08:00">2024-10-18</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>3.3k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>3 mins.</span></span></div><h3><a href="/2024/10/18/aicompile01/" itemprop="url" title="【AI编译】Tiling操作能优化什么时间">【AI编译】Tiling操作能优化什么时间</a></h3><div class="excerpt"># 前言
本篇讲解 Tiling 操作为什么能够优化神经网络推理。
也可以参考 《Ascend C 算子优化实用技巧 04——Tiling 优化》
作为初学者,错误在所难免,还望不吝赐教。
# 什么是 tiling
无法完整的容纳算子的输入与输出,需要每次搬运一部分输入进行计算然后搬出,再搬运下一部分输入进行计算,直到得到完整的最终结果,这个数据切分、分块计算的过程称之为 Tiling,切分数据的算法称为 Tiling 算法或者 Tiling 策略。
# tile 算子和 tiling 的区别
我们先问一问语言大模型两者的区别:
# 神经网络推理中的 Tile 算子
在神经网络中,会发现 ti</div><div class="meta footer"><span><a href="/categories/compile/" itemprop="url" title="compile"><i class="ic i-flag"></i>compile</a></span></div><a href="/2024/10/18/aicompile01/" class="btn" itemprop="url" title="【AI编译】Tiling操作能优化什么时间">more...</a></div></article><article class="item"><div class="cover"><a href="/2024/10/17/tvm03/" itemprop="url" title="【TVM】通过代码学习编译流程【3】模型编译"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/gamersky.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2024-10-17 20:03:36"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2024-10-17T20:03:36+08:00">2024-10-17</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>24k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>22 mins.</span></span></div><h3><a href="/2024/10/17/tvm03/" itemprop="url" title="【TVM】通过代码学习编译流程【3】模型编译">【TVM】通过代码学习编译流程【3】模型编译</a></h3><div class="excerpt"># 前言
本系列文章将从代码和流程图入手,详细介绍 TVM AI 编译器的编译流程。本文章为第三篇,对应的 TVM 版本为当前最新版本 1.7。
网络上有不少 TVM 工程的教程资源,如果本博客也是其他教程的简单重复的话,则网络的角落里又多了一份纯粹的空间占用者。所以我在想,本文章有什么特点才值得一看呢?我觉得有两个优点: 1、本文从代码出发,不会泛泛而谈,能够从细节了解 TVM;2、自认为结构流程图画的不错,能够从整体上把握 TVM 的脉络。所以,也许值得一看呢。
本篇文章介绍 TVM 对模型的编译流程。文章 《【TVM】通过代码学习编译流程【2】模型转换》 已经介绍了 onnx 模型转换为</div><div class="meta footer"><span><a href="/categories/tvm/" itemprop="url" title="tvm"><i class="ic i-flag"></i>tvm</a></span></div><a href="/2024/10/17/tvm03/" class="btn" itemprop="url" title="【TVM】通过代码学习编译流程【3】模型编译">more...</a></div></article><article class="item"><div class="cover"><a href="/2024/10/13/tvm02/" itemprop="url" title="【TVM】通过代码学习编译流程【2】模型转换"><img loading="lazy" decoding="async" data-src="https://forcheetah.github.io/assets/lunbo13.webp" alt="article cover"></a></div><div class="info"><div class="meta"><span class="item" title="Created: 2024-10-13 20:29:30"><span class="icon"><i class="ic i-calendar"></i></span><time itemprop="dateCreated datePublished" datetime="2024-10-13T20:29:30+08:00">2024-10-13</time></span><span class="item" title="Symbols count in article"><span class="icon"><i class="ic i-pen"></i></span><span>20k</span><span class="text">words</span></span><span class="item" title="Reading time"><span class="icon"><i class="ic i-clock"></i></span><span>18 mins.</span></span></div><h3><a href="/2024/10/13/tvm02/" itemprop="url" title="【TVM】通过代码学习编译流程【2】模型转换">【TVM】通过代码学习编译流程【2】模型转换</a></h3><div class="excerpt"># 前言
本系列文章将从代码和流程图入手,详细介绍 TVM AI 编译器的编译流程。本文章为第二篇,对应的 TVM 版本为当前最新版本 1.7。
网络上有不少 TVM 工程的教程资源,如果本博客也是其他教程的简单重复的话,则网络的角落里又多了一份纯粹的空间占用者。所以我在想,本文章有什么特点才值得一看呢?我觉得有两个优点: 1、本文从代码出发,不会泛泛而谈,能够从细节了解 TVM;2、自认为结构流程图画的不错,能够从整体上把握 TVM 的脉络。所以,也许值得一看呢。
本篇文章以 onnx 为例,介绍主流模型转换为 TVM 高级中间表示 Relay IR 的过程。
作为初学者,错误在所难免,还望</div><div class="meta footer"><span><a href="/categories/tvm/" itemprop="url" title="tvm"><i class="ic i-flag"></i>tvm</a></span></div><a href="/2024/10/13/tvm02/" class="btn" itemprop="url" title="【TVM】通过代码学习编译流程【2】模型转换">more...</a></div></article></div></div><nav class="pagination"><div class="inner"><span class="page-number current">1</span><a class="page-number" href="/page/2/">2</a><a class="page-number" href="/page/3/">3</a><a class="extend next" rel="next" href="/page/2/"><i class="ic i-angle-right" aria-label="Next page"></i></a></div></nav></div><div id="sidebar"><div class="inner"><div class="panels"><div class="inner"><div class="contents panel pjax" data-title="Contents"></div><div class="related panel pjax" data-title="Related"></div><div class="overview panel" data-title="Overview"><div class="author" itemprop="author" itemscope="itemscope" itemtype="http://schema.org/Person"><img class="image" loading="lazy" decoding="async" itemprop="image" alt="XianMu" src="/assets/avatar.webp"><p class="name" itemprop="name">XianMu</p><div class="description" itemprop="description">有自己的博客很帅,但是我很懒,要命!!!</div></div><nav class="state"><div class="item posts"><a href="/archives/"><span class="count">29</span><span class="name">posts</span></a></div><div class="item categories"><a href="/categories/"><span class="count">13</span><span class="name">categories</span></a></div><div class="item tags"><a href="/tags/"><span class="count">20</span><span class="name">tags</span></a></div></nav><div class="social"><a target="_blank" rel="noopener" href="https://github.com/ForCheetah" class="item github" title="https://github.com/ForCheetah"><i class="ic i-github"></i></a><a target="_blank" rel="noopener" href="https://www.zhihu.com/people/guai-dao-ji-de-3-50" class="item zhihu" title="https://www.zhihu.com/people/guai-dao-ji-de-3-50"><i class="ic i-zhihu"></i></a><a href="/huasen.w@foxmail.com" class="item email" title="huasen.w@foxmail.com"><i class="ic i-envelope"></i></a></div><div class="menu"><li class="item"><a href="/" rel="section"><i class="ic i-home"></i>Home</a></li></div></div></div></div><ul id="quick"><li class="prev pjax"></li><li class="up"><i class="ic i-arrow-up"></i></li><li class="down"><i class="ic i-arrow-down"></i></li><li class="next pjax"><a href="/page/2/" rel="next" title="Next Post"><i class="ic i-chevron-right"></i></a></li><li class="percent"></li></ul></div></div><div class="dimmer"></div></div></main><footer id="footer"><div class="inner"><div class="widgets"><div class="rpost pjax"><h2>Random Posts</h2><ul><li class="item"><div class="breadcrumb"><a href="/categories/%E5%8D%B7%E7%A7%AF%E5%8A%A0%E9%80%9F/" title="In卷积加速">卷积加速</a></div><span><a href="/2024/05/23/conv2/">【Im2Col】卷积加速算法【2】NHWC</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/%E5%8D%B7%E7%A7%AF%E5%8A%A0%E9%80%9F/" title="In卷积加速">卷积加速</a></div><span><a href="/2024/12/20/conv5/">【gemm】Gemm计算加速</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/tvm/" title="Intvm">tvm</a></div><span><a href="/2024/06/10/deployTVM/">【TVM】C++部署运行TVM</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/%E5%8D%B7%E7%A7%AF%E5%8A%A0%E9%80%9F/" title="In卷积加速">卷积加速</a></div><span><a href="/2024/07/07/conv4/">【Winograd】卷积加速算法原理及实现</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/%E6%8E%A8%E7%90%86%E5%BC%95%E6%93%8E/" title="In推理引擎">推理引擎</a></div><span><a href="/2024/05/30/engine1/">【推理引擎】NCNN和Tengine量化推理逻辑对比</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/%E8%AE%A1%E7%AE%97%E6%9C%BA%E7%A7%91%E5%AD%A6/" title="In计算机科学">计算机科学</a><i class="ic i-angle-right"></i><a href="/categories/%E8%AE%A1%E7%AE%97%E6%9C%BA%E7%A7%91%E5%AD%A6/Linux/" title="InLinux">Linux</a></div><span><a href="/2024/05/12/test/">foo</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/tvm/" title="Intvm">tvm</a></div><span><a href="/2024/10/10/tvm01/">【TVM】通过代码学习编译流程【1】必要知识</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/%E9%97%AE%E9%A2%98%E8%A7%A3%E5%86%B3/" title="In问题解决">问题解决</a></div><span><a href="/2024/05/15/openBlas/">openBlas库的安装与简单使用</a></span></li><li class="item"><div class="breadcrumb"><a href="/categories/tvm/" title="Intvm">tvm</a></div><span><a href="/2024/10/13/tvm02/">【TVM】通过代码学习编译流程【2】模型转换</a></span></li><li class="item"><div class="breadcrumb"></div><span><a href="/2024/05/12/hello-world/">Hello World</a></span></li></ul></div><div class="rpost pjax"><h2>Recent Comments</h2></div></div><div class="status"><div class="copyright">© 2010 -<span itemprop="copyrightYear">2025</span><span class="with-love"><i class="ic i-sakura rotate"></i></span><span class="author" itemprop="copyrightHolder">XianMu @ 暮冬Z羡慕的博客</span></div><div class="count"><span class="post-meta-item-icon"><i class="ic i-chart-area"></i></span><span title="Symbols count total">212k words</span><span class="post-meta-divider"> | </span><span class="post-meta-item-icon"><i class="ic i-coffee"></i></span><span title="Reading time total">3:13</span></div><div class="powered-by">Powered by <a target="_blank" rel="noopener" href="https://hexo.io/">Hexo</a> & Theme.<a target="_blank" rel="noopener" href="https://github.com/theme-shoka-x/hexo-theme-shokaX/">ShokaX</a></div></div></div></footer></div><script data-config="" type="text/javascript">var LOCAL = {
ispost: false,
path: ``,
favicon: {
show: `(●´3`●) Here we go again.`,
hide: `(´Д`) It's a disaster!`
},
search: {
placeholder: "Search for Posts",
empty: "We didn't find any results for the search: ${query}",
stats: "${hits} results found in ${time} ms"
},
copy_tex: false,
katex: false,
mermaid: false,
audio: undefined,
fancybox: true,
nocopy: false,
outime: true,
template: `<div class="note warning"><p><span class="label warning">Article Timeliness Alert</span><br>This is an article published {{publish}} days ago and last updated {{updated}} days ago. Some information may have changed, so please be careful to screen it.</p></div>`,
quiz: {
choice: `Multiple Choice`,
multiple: `Multiple Answer`,
true_false: `True/False`,
essay: `Questions`,
gap_fill: `Gap Filling`,
mistake: `Wrong Answer`
},
ignores: [
(uri) => uri.includes('#'),
(uri) => new RegExp(LOCAL.path + '$').test(uri),
[]
]
};
</script><script src="https://lf9-cdn-tos.bytecdntp.com/cdn/expire-6-M/pace/1.2.4/pace.min.js" async=""></script><script src="/js/siteInit.js?v=0.4.2" type="module" fetchpriority="high" defer=""></script></body></html>