在LLVM中可视化代码结构

1. 摘要
2. 调用图、控制和数据流

2.1 调用图
2.2 CFG(基本块)
2.3 CFG & DFG(指令)
2.4 指令
2.5 Zeller’s Ex. 7.1 的 CFG & DFG

3. 依赖分析Pass

3.1 内存依赖分析
3.2 依赖分析

1. 摘要

本文概述了LLVM创建人类可读的代码结构可视化和相关依赖分析passes的功能。
程序代码示例(vector_sum)

void vector_sum(int length, float * in1, float * in2, float * out){
	for(int i = 0; i < length; i++)
		out[i] = in1[i] + in2[i];
}
#include<malloc.h>
#include<stdio.h>
#include<stdlib.h>
int main(int argc, char* argv[]){
	if(argc != 2) return 1;
	int length = atoi(argv[1]);
	float* in1 = (float*)malloc(length * sizeof(float));
	float* in2 = (float*)malloc(length * sizeof(float));
	float* out = (float*)malloc(length * sizeof(float));
	for(int i = 0; i < length; i++){
		in1[i] = drand48();
		in2[i] = drand48();
	}
	vector_sum(length, in1, in2, out);
	printf("%f\n", out[length−1]);
	return 0;
}

这个函数的LLVM IR (vector_sum)

$ clang −emit − llvm example.c −O1 −S −o − | less
define void @vector_sum ( i32 %length , float * nocapture readonly %in1 , float * nocapture readonly %in2 , float * nocapture %out ) #0 {
entry :
	%cmp9 = icmp sgt i32 %length , 0
	br i1 %cmp9 , label %for.body , label %for.end
	
for.body :														; preds = %entry, %for.body
	%indvars.iv = phi i64 [ %indvars.iv.next , %for.body ] , [ 0 , %entry ]
	%arrayidx = getelementptr inbounds float * %in1 , i64 %indvars.iv
	%0 = load float * %arrayidx , align 4 , ! tbaa ! 1
	%arrayidx2 = getelementptr inbounds float * %in2 , i64 %indvars.iv
	%1 = load float * %arrayidx2 , align 4 , ! tbaa ! 1
	%add = fadd float %0 , %1
	%arrayidx4 = getelementptr inbounds float * %out , i64 %indvars.iv
	store float %add , float * %arrayidx4 , align 4 , ! tbaa ! 1
	%indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
	%lftr.wideiv = trunc i64 %indvars.iv.next to i32
	%exitcond = icmp eq i32 %lftr.wideiv , %length
	br i1 %exitcond , label %for.end , label %for.body
	
for.end :														; preds = %for.body, %entry
	ret void
}

2. 调用图、控制和数据流

在LLVM中，从命令行可以得到两种形式的图：

调用图
CFG(基本块)
CFG & DFG(指令)

2.1 调用图

在LLVM中可视化代码结构

2.2 CFG(基本块)

在LLVM中可视化代码结构

2.3 CFG & DFG(指令)

在LLVM中可视化代码结构

2.4 指令

调用图 —— 内置LLVM pass + dot

$ clang −emit − llvm −S −c example . c −o − | opt −O1 | opt −dot − callgraph −o / dev / null && dot −Tpdf callgraph . dot −o callgraph . pdf

CFG(基本块) —— 内置LLVM pass + dot

$ clang −emit − llvm −S −c example . c −o − | opt −O1 | opt −dot − cfg −o / dev / null && dot −Tpdf cfg . vector_sum . dot −o cfg . vector_sum . pdf

CFG & DFG(指令) —— 由Paul Sokolovsky编写的llvmpy脚本 + dot

$ . / graph − llvm − ir . / example . ll && dot −Tpdf vector_sum . dot −o vector_sum . pdf

2.5 Zeller’s Ex. 7.1 的 CFG & DFG

/* fibo.c -- Fibonacci C program to be debugged */
#include<stdio.h>
int fib(int n){
	int f, f0 = 1, f1 = 1;
	while(n > 1){
		n = n − 1;
		f = f0 + f1;
		f0 = f1;
		f1 = f;
	}
	return f;
}
int main(){
	int n = 9;
	while(n > 0){
		printf("fib(%d)=%dN", n, fib(n));
		n = n − 1;
	}
	return 0;
}

注意，未初始化的int f变成了undef →
在LLVM中可视化代码结构

3. 依赖分析Pass

关注依赖关系的两个LLVM passes：

内存依赖分析
依赖分析

两者都利用别名分析(Alias Analysis)来降低 O(n^2) 的强度。

3.1 内存依赖分析

功能齐全
分析内存引用之间的依赖关系：
- Clobber —— clobbers内存的指令，例如一个may-aliased store
- Def —— 指令定义/生成所需的内存位置
- NonLocal —— 在当前基本块之外(需要检查前一个块)
- NonFuncLocal —— 在当前函数外部
无可视化图形输出
打印输出不是很清晰(如下)

define void @vector_sum ( i32 %length , float * nocapture readonly %in1 , float * nocapture readonly %in2 , float * nocapture %out ) #0 {
entry :
	%cmp9 = icmp sgt i32 %length , 0
	br i1 %cmp9 , label %for.body , label %for.end
	
for.body :									; preds = %entry, %for.body
	%indvars.iv = phi i64 [ %indvars.iv.next , %for.body ] , [ 0 , %entry ]
	%arrayidx = getelementptr inbounds float * %in1 , i64 %indvars.iv
	%0 = load float * %arrayidx , align 4 , ! tbaa ! 1
	%arrayidx2 = getelementptr inbounds float * %in2 , i64 %indvars.iv
	%1 = load float * %arrayidx2 , align 4 , ! tbaa ! 1
	%add = fadd float %0 , %1
	%arrayidx4 = getelementptr inbounds float * %out , i64 %indvars.iv
	store float %add , float * %arrayidx4 , align 4 , ! tbaa ! 1
	%indvars.iv.next = add nuw nsw i64 %indvars.iv , 1
	%lftr.wideiv = trunc i64 %indvars.iv.next to i32
	%exitcond = icmp eq i32 %lftr.wideiv , %length
	br i1 %exitcond , label %for.end , label %for.body

for.end :									; preds = %for.body, %entry
	ret void
}

内存依赖分析打印输出：

$ clang −emit − llvm example.c −O1 −S −o − | opt −memdep −print−memdeps −o / dev / null
Function vector_sum memory dependencies :

		Unknown in block %for.body
		Unknown in block %entry
	%0 = load float * %arrayidx , align 4 , ! tbaa ! 1
	
		Unknown in block %for.body
		Unknown in block %entry
	%1 = load float * %arrayidx2 , align 4 , ! tbaa ! 1

		Def from :	%1 = load float * %arrayidx2 , align 4 , ! tbaa ! 1
	store float %add , float * %arrayidx4 , align 4 , ! tbaa ! 1

为什么store只依赖于两次load中的一次?

3.2 依赖分析

进行中的工作
在Goff, Kennedy, Tseng – PLDI 1991的“Practical Dependence Testing”之后，由Preston Briggs实现
全面依赖分析：
- Output —— 写后写
- Flow (true) —— 写后读
- Anti - 读后写
- …
无可视化图形输出

在LLVM中可视化代码结构

在LLVM中可视化代码结构

1. 摘要

2. 调用图、控制和数据流

2.1 调用图

2.2 CFG(基本块)

2.3 CFG & DFG(指令)

2.4 指令

2.5 Zeller’s Ex. 7.1 的 CFG & DFG

3. 依赖分析Pass

3.1 内存依赖分析

3.2 依赖分析

相关推荐