Skip to content

Commit 8d565c9

Browse files
committed
doc init
1 parent b2cb296 commit 8d565c9

File tree

2 files changed

+332
-0
lines changed

2 files changed

+332
-0
lines changed

doc/main.tex

Lines changed: 332 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
\documentclass{article}
2+
\usepackage[utf8]{inputenc}
3+
\usepackage{algorithm}
4+
\usepackage{algorithmic}
5+
\usepackage{amsmath}
6+
\usepackage{bm}
7+
\usepackage{listings}
8+
\usepackage[usenames,dvipsnames]{xcolor}
9+
\title{smtGen}
10+
\author{qx51 }
11+
\date{October 2019}
12+
\renewcommand{\algorithmicrequire}{ \textbf{Input:}} %Use Input in the format of Algorithm
13+
\renewcommand{\algorithmicensure}{ \textbf{Output:}} %UseOutput in the format of Algorithm
14+
15+
\definecolor{mygreen}{rgb}{0,0.6,0}
16+
\definecolor{mygray}{rgb}{0.5,0.5,0.5}
17+
\definecolor{mymauve}{rgb}{0.58,0,0.82}
18+
\lstset{ %
19+
backgroundcolor=\color{white}, % choose the background color
20+
basicstyle=\footnotesize\ttfamily, % size of fonts used for the code
21+
columns=fullflexible,
22+
tabsize=4,
23+
breaklines=true, % automatic line breaking only at whitespace
24+
captionpos=b, % sets the caption-position to bottom
25+
commentstyle=\color{mygreen}, % comment style
26+
escapeinside={\%*}{*)}, % if you want to add LaTeX within your code
27+
keywordstyle=\color{blue}, % keyword style
28+
stringstyle=\color{mymauve}\ttfamily, % string literal style
29+
frame=single,
30+
rulesepcolor=\color{red!20!green!20!blue!20},
31+
% identifierstyle=\color{red},
32+
language=c++,
33+
}
34+
35+
\begin{document}
36+
37+
\section{Algorithm for program SMT}
38+
\subsection{Definition}
39+
40+
Some definitions are made in order to make the algorithm easier understood.
41+
\begin{enumerate}
42+
\item $F_{i}$ is SMT formula for the basic block $i$.
43+
\item $r_{i\_j\_k}$ represents the register value, where $i$ is the register ID, $j$ is the basic block ID, $k$ is the version ID (i.e., the number of update times in this block). $k$ starts from $0$ and each update in this basic block will increase $k$ by 1. For example, $r_{0\_0\_1}$ is the value that represents $r_1$ in basic block 0 (i.e., the starting basic block), and $r_1$ has been updated once in basic block 0. $j$ ensures that each basic block has its own register version, and $k$ ensures that different values of the same register can be differed in the same basic block.
44+
\item $f_{pl}$ is the formula of program logic for the current basic block.
45+
\item $IV = \{iv_0, iv_1, ... \}$ is the initial value set for all basic blocks. Each item $iv_i = \langle iv_{i\_0}, iv_{i\_1}, ... \rangle = \langle r_{0\_\bm{i}\_k_0}, r_{1\_\bm{i}\_k_1}, ...\rangle$ represents the initial values of all registers \textbf{\textit{generated}} by basic block $i$.
46+
47+
\item $f_{iv}$ is the formula representing the logic that initial values of register from the last basic block $b_1$ are fed to the register values in current basic block $b_2$, that is,
48+
\begin{equation}
49+
\begin{aligned}
50+
f_{iv} &= \bigwedge_{i} r_{i\_b_1\_k_i} == r_{i\_b_2\_0} \\
51+
&= \bigwedge_{i} iv_{b_1\_i} == r_{i\_b_2\_0}
52+
\end{aligned}
53+
\end{equation}
54+
\item $C = \{c_{b_{i_1}\rightarrow b_{j_1}}, c_{b_{i_2}\rightarrow b_{j_2}}, ...\}$ is the initial path condition formulas set for all edges. Each item $c_{b_i\rightarrow b_j}$ is a path condition formula generated by the basic block $b_i$ for the basic block $b_j$.
55+
The post path condition $C_b$ for the basic block $b$ is
56+
\begin{equation}
57+
C_b = \bigcup_i \{c_{parents}\wedge c_{instEnd\_case_i}\}
58+
\end{equation}
59+
where $c_{parents} = \bigvee_{p} c_{p\rightarrow b}$
60+
\end{enumerate}
61+
62+
\subsection{Target SMT formula}
63+
SMT formula for the program (do not take pre-condition or post-condition into consideration here) is
64+
65+
\begin{equation}
66+
\begin{aligned}
67+
F &= \bigwedge_i F_{i} \\
68+
&= \bigwedge_i \bigwedge_j c_{j\rightarrow i}\rightarrow f_{iv\_i} \wedge f_{pl\_i}
69+
\end{aligned}
70+
\end{equation}
71+
72+
\begin{algorithm}
73+
\begin{algorithmic}[1]
74+
\caption{SMT\_Prog}
75+
\label{alg:smt_prog}
76+
% \REQUIRE $instLst$, $instLstLen$, $inputRegLst$
77+
\REQUIRE $instLst$, $instLstLen$
78+
\ENSURE $F$, $outputRegLst$
79+
\STATE // Sort basic blocks by Topological\_Sorting
80+
\STATE // $B = \langle b_0, b_1, ...\rangle$ is set of basic blocks, $b_0$ is the start basic block
81+
\STATE // $InB = \langle inb_0, inb_1, ...\rangle$ is set of incoming edges for each basic block
82+
\STATE // $OutB = \langle outb_0, outb_1, ...\rangle$ is set of outgoing edges for each basic block
83+
\STATE // $InB$ and $OutB$ are from $cfg$
84+
\STATE $cfg \Leftarrow Gen\_Cfg(instLst, instLstLen)$
85+
\STATE $B \Leftarrow Topological\_Sorting(cfg)$
86+
\STATE
87+
\STATE // Process Basic Block 0
88+
\STATE $f_{pl} \Leftarrow Gen\_Block\_Prog\_Logic(0,instLst[start:end])$
89+
% \STATE $f_{iv} \Leftarrow \bigwedge_{i=1}^{i=NumOfReg} inputRegLst_i == r_{i\_0\_0}$
90+
% \STATE $F_0 \Leftarrow true \rightarrow (f_{iv} \wedge f_{pl})$
91+
\STATE $F_0 \Leftarrow f_{pl}$
92+
\STATE $iv_0 \Leftarrow Get\_Cur\_Reg\_Val(0)$
93+
\STATE $IV \Leftarrow \{iv_0\}$
94+
\STATE $C \Leftarrow Gen\_Path\_Cond\_For\_Next\_Blocks(0, instLst[end], inb_0, outb_0)$
95+
96+
\STATE
97+
\STATE // Process Basic Block $1 \rightarrow (len-1)$
98+
\FOR{$i=1$ to $len(B)-1$}
99+
\STATE $f_{pl} \Leftarrow Gen\_Block\_Prog\_Logic(i,instLst[start:end])$
100+
\STATE $F_i \Leftarrow true$
101+
\FOR{each last block $b$ in $inb_{\bm{i}}$}
102+
\STATE $f_{iv} \Leftarrow \bigwedge_{j=1}^{j=NumOfReg} iv_{b_\_j} == r_{j_\_i_\_0}$
103+
\STATE $F_i \Leftarrow F_i \wedge (c_{b \rightarrow i} \rightarrow (f_{iv} \wedge f_{pl}))$
104+
\ENDFOR
105+
\STATE $iv_i \Leftarrow Get\_Cur\_Reg\_Val(i)$
106+
\STATE $IV \Leftarrow IV \cup \{iv_i\}$
107+
\STATE $C_{out\_i} \Leftarrow Gen\_Path\_Cond\_For\_Next\_Blocks(i, instLst[end], inb_i, outb_i)$
108+
\STATE $C \Leftarrow C \cup C_{out_\_i}$
109+
\ENDFOR
110+
\STATE
111+
\FOR{$i=0$ to $len(B)-1$}
112+
\STATE $F \Leftarrow F \wedge F_i$
113+
\ENDFOR
114+
\STATE
115+
\STATE initialize $outputRegLst$
116+
\FOR{$i=0$ to $NumOfReg-1$}
117+
\STATE $outputRegLst_i \Leftarrow iv_{(instLstLen-1)\_i}$
118+
\ENDFOR
119+
\RETURN $outputRegLst_i$
120+
\end{algorithmic}
121+
\end{algorithm}
122+
123+
\begin{algorithm}
124+
\begin{algorithmic}[1]
125+
\caption{Gen\_Block\_Prog\_Logic}
126+
\REQUIRE Basic Block ID $b$, $instLst$
127+
\ENSURE $f_{pl}$
128+
\STATE $f_{pl} \Leftarrow true$
129+
\FOR{$i=1$ to $len(instLst)$}
130+
\IF{type of instruction is not $JMP$ or $END$}
131+
\STATE $f_{pl} \Leftarrow f_{pl} \wedge f_{instruction_\_i}$
132+
\ENDIF
133+
\ENDFOR
134+
\RETURN $f_{pl}$
135+
\end{algorithmic}
136+
\end{algorithm}
137+
138+
\begin{algorithm}
139+
\begin{algorithmic}[1]
140+
\caption{Get\_Cur\_Reg\_Val}
141+
\REQUIRE Basic Block ID $b$, $R_b$
142+
\ENSURE $iv_b$
143+
\STATE $iv_{b} \Leftarrow\emptyset$
144+
\FOR{$i=0$ to $(NumberOfRegisters-1)$}
145+
\STATE //$R$ stores the current values of all register for the basic blocks
146+
\STATE $iv_{b} \Leftarrow iv_{b} \cup \{R_{b\_i}\}$
147+
\ENDFOR
148+
\RETURN $iv_{b}$
149+
\end{algorithmic}
150+
\end{algorithm}
151+
152+
\begin{algorithm}
153+
\begin{algorithmic}[1]
154+
\caption{Gen\_Path\_Cond\_For\_Next\_Blocks}
155+
\REQUIRE Basic Block ID $b$, $instEnd$, $inBlocks$, $outBlocks$
156+
\ENSURE $C_{out}$
157+
\STATE $c_{in} \Leftarrow true$
158+
\FOR{each last block $b_{i}$ in $inBlocks$}
159+
\STATE $c_{in} \Leftarrow c_{in} \vee c_{b_i \rightarrow b}$
160+
\ENDFOR
161+
\STATE $C_{out} \Leftarrow \emptyset$
162+
\STATE $c_{instEnd} \Leftarrow Parse Instruction(instEnd)$
163+
\FOR{each last block $c$ in $c_{instEnd}$}
164+
\STATE $C_{out} \Leftarrow C_{out} \cup \{c_{in} \wedge c\}$
165+
\ENDFOR
166+
\RETURN $C_{out}$
167+
\end{algorithmic}
168+
\end{algorithm}
169+
170+
\subsection{Example}
171+
\begin{enumerate}
172+
\item Instructions:
173+
\begin{lstlisting}
174+
inst(MOVXC, 2, 15), // 0 mov r2, 15
175+
inst(JMPGT, 0, 2, 2), // 1 if r0 <= r2 no jmp else jmp to 4
176+
inst(ADDXY, 0, 1), // 2 add r0, r1
177+
inst(RETX, 2), // 3 ret r2
178+
inst(ADDXY, 2, 1), // 4 add r2, r1
179+
inst(RETX, 0), // 5 else ret r0
180+
\end{lstlisting}
181+
\item Instructions $\rightarrow$ CFG \\
182+
CFG:\\
183+
nodes: 0[0:1] 1[2:3] 2[4:5] \\
184+
edges: 0 $\rightarrow$ 1 0 $\rightarrow$ 2
185+
\item List by topological Sorting \\
186+
$B = \langle 0, 1, 2 \rangle$ \\
187+
$InB = \langle \langle\rangle, \langle 0 \rangle, \langle 0 \rangle \rangle$ \\
188+
$OutB = \langle \langle 1, 2 \rangle, \langle\rangle, \langle \rangle \rangle$
189+
\item Process Node 0 \\
190+
$f_{pl} = (r_{2\_0\_1} == 15)$\\
191+
$iv_0 = \langle r_{0\_0\_0},r_{1\_0\_0},r_{2\_0\_1}\rangle$ \\
192+
$IV = \{iv_0\}$\\
193+
$F_0 = (r_{2\_0\_1} == 15)$ \\
194+
$C_{out\_0} = \{ c_{0\rightarrow 1}=(r_0 > r_2), c_{0\rightarrow 2}=\neg(r_0 > r_2)\}$ \\
195+
$C = C_{out\_0}$
196+
\item Process Node 1 \\
197+
$f_{pl} = (r_{0\_1\_1} == r_{0\_1\_0} + r_{1\_1\_0})$\\
198+
$iv_1 = \langle r_{0\_1\_1},r_{1\_1\_0},r_{2\_1\_0}\rangle$\\
199+
$IV = \{iv_0, iv_1\}$\\
200+
$f_{iv} = \bigwedge_{i=1}^{i=3} (iv_{0\_i} == r_{i\_1\_0})$\\
201+
$F_1 = c_{0\rightarrow 1} \rightarrow f_{iv} \wedge f_{pl} = (r_0 > r_2)\rightarrow (\bigwedge_{i=1}^{i=3} (iv_{0\_i} == r_{i\_1\_0}) \wedge (r_{0\_1\_1} == r_{0\_1\_0} + r_{1\_1\_0}))$ \\
202+
$C_{out\_1} = \emptyset$ \\
203+
$C = C_{out\_0}$
204+
\item Process Node 2 \\
205+
$f_{pl} = (r_{2\_2\_1} == r_{2\_2\_0} + r_{1\_2\_0})$\\
206+
$iv_1 = \langle r_{0\_2\_0},r_{1\_2\_0},r_{2\_2\_1}\rangle$\\
207+
$IV = \{iv_0, iv_1, iv_2\}$\\
208+
$f_{iv} = \bigwedge_{i=1}^{i=3} (iv_{0\_i} == r_{i\_2\_0})$\\
209+
$F_2 = c_{0\rightarrow 2} \rightarrow f_{iv} \wedge f_{pl} = \neg(r_0 > r_2)\rightarrow (\bigwedge_{i=1}^{i=3} (iv_{0\_i} == r_{i\_2\_0})\wedge (r_{2\_2\_1} == r_{2\_2\_0} + r_{1\_2\_0}))$ \\
210+
$C_{out\_2} = \emptyset$ \\
211+
$C = C_{out\_0}$
212+
\item Generate $F$ \\
213+
$F = F_0 \wedge F_1 \wedge F_2$ \\
214+
where \\
215+
$F_0 = true\rightarrow (true \wedge (r_{2\_0\_1} == 15))$ \\
216+
$F_1 = (r_0 > r_2)\rightarrow (\bigwedge_{i=1}^{i=3} (iv_{0\_i} == r_{i\_1\_0}) \wedge (r_{0\_1\_1} == r_{0\_1\_0} + r_{1\_1\_0}))$ \\
217+
$F_2 = \neg(r_0 > r_2)\rightarrow (\bigwedge_{i=1}^{i=3} (iv_{0\_i} == r_{i\_2\_0})\wedge (r_{2\_2\_1} == r_{2\_2\_0} + r_{1\_2\_0}))$
218+
\end{enumerate}
219+
220+
\section{Algorithm for equivalence check}
221+
\subsection{Definition}
222+
\begin{enumerate}
223+
\item Since there are two programs, a new dimension, program ID, should be added into register value $r_{i\_j\_k}$ to ensure that each program has it own register value version. Then $r_{i\_j\_k}$ extends to $\bm{r_{i\_j\_k\_p}}$, where $p$ is the program ID.
224+
\item Equivalence check formula
225+
\begin{equation}
226+
F = F_{pre_1} \wedge F_{pre_2} \wedge F_1 \wedge F_2 \rightarrow F_{post}
227+
\end{equation}
228+
where \\
229+
$F_{pre_i}$, $F_i$, $F_{post}$ are the pre-conditon, program logic, post-condition FOL formulas of program $i$, that is,
230+
\begin{equation}
231+
F_{pre_i} = (r_{0\_*\_*\_i})\wedge (\bigwedge_{j = 1}^{j = NumOfReg-1} r_{j\_*\_*\_i} == 0)
232+
\end{equation}
233+
234+
\begin{equation}
235+
\begin{aligned}
236+
F_{i} &= (\bigwedge_{j=0}^{j=NumOfInstLst-1} f_{inst\_j}) \wedge (F_{output}) \\
237+
&= (\bigwedge_{j=0}^{j=NumOfInstLst-1} f_{inst\_j}) \wedge (\bigwedge_{r_k = reg[ret_k]} (c \rightarrow r_k == output_i))
238+
\end{aligned}
239+
\end{equation}
240+
241+
\begin{equation}
242+
F_{post} = (output_1 == output_2)
243+
\end{equation}
244+
\end{enumerate}
245+
246+
\subsection{Algorithm}
247+
\begin{algorithm}
248+
\begin{algorithmic}[1]
249+
\caption{Equivalence\_Check}
250+
\REQUIRE $input$, $instLst_1$, $len_1$, $instLst_2$, $len_2$
251+
\ENSURE $isEqual$
252+
\STATE // pre-condition formula: $input_1 == input_2$
253+
\STATE $F_{pre_1} \Leftarrow true$
254+
\STATE $F_{pre_2} \Leftarrow true$
255+
\FOR{each value $\bm{rv}$ of register $\bm{i}$ in $input$}
256+
\STATE $F_{pre_1} \Leftarrow F_{pre_1} \wedge (rv == r_{i\_0\_0\_1})$
257+
\STATE $F_{pre_2} \Leftarrow F_{pre_2} \wedge (rv == r_{i\_0\_0\_2})$
258+
\ENDFOR{}
259+
\STATE // program logic formula: $F_1$, $F_2$
260+
\STATE $F_1, output_1 \Leftarrow SMT\_Prog(instLst_1, len_1)$
261+
\STATE $F_2, output_2 \Leftarrow SMT\_Prog(instLst_2, len_2)$
262+
\STATE // post-condition formula: $output_1 == output_2$
263+
\STATE $F_{post} \Leftarrow true$
264+
\FOR{$i=0$ to $len(output_1)$}
265+
\STATE $F_{post} \Leftarrow F_{post} \wedge (output_{1\_i} == output_{2\_i})$
266+
\ENDFOR
267+
\STATE $F \Leftarrow F_{pre_1} \wedge F_{pre_2} \wedge F_1 \wedge F_2 \rightarrow F_{post} $
268+
\STATE $isEqual \Leftarrow Is\_Unsat(\neg F)$
269+
\RETURN $isEqual$
270+
\end{algorithmic}
271+
\end{algorithm}
272+
273+
\section{Test set}
274+
\subsection{Program equivalence}
275+
\begin{enumerate}
276+
\item no branch
277+
\item branch: use more arithmetic and jmp instructions
278+
\subitem with RET,
279+
\subitem without RET
280+
\item illegal input
281+
\end{enumerate}
282+
\subsection{Specification check}
283+
284+
\begin{enumerate}
285+
\item Instruction specification: the range of register ID,...; \\done by candidate program generator?\\
286+
Now it is done by candidate program generator.
287+
\item Invalid instruction reach: Jmp instruction check; has done\\
288+
End instruction check $\rightarrow$ Is the program is always ended with \textbf{RETs}? \\
289+
For now, only with registers, if no RET, return $r_0$
290+
\item No loop
291+
\end{enumerate}
292+
293+
\section{Data structure}
294+
\begin{lstlisting}[caption={}]
295+
class node {
296+
private:
297+
public:
298+
unsigned int _start = 0; // start intruction ID
299+
unsigned int _end = 0; // end instruction ID
300+
};
301+
302+
class graph {
303+
private:
304+
vector<node> nodes;
305+
vector<vector<unsigned int> > nodesIn;
306+
vector<vector<unsigned int> > nodesOut;
307+
public:
308+
};
309+
310+
class progSmt {
311+
private:
312+
// f[i] is program logic FOL formula F of basic block i
313+
vector<expr> f;
314+
// postRegVal[i] is post register values of basic block i,
315+
// which are initial values for NEXT basic blocks
316+
vector<vector<expr> > postRegVal;
317+
// pathCon[i] stores pre path conditions of basic block i
318+
// There is a corresponding relationship between pathCon and g.nodesIn
319+
// more specifically, pathCon[i][j] stores the pre path condition from basic block g.nodesIn[i][j] to i
320+
vector<vector<expr> > pathCon;
321+
// program FOL formula
322+
expr smt = stringToExpr("true");
323+
// program output FOL formula
324+
expr smtOutput = stringToExpr("true");
325+
// return the SMT for the given program
326+
expr smtProg(inst* program, int length, smtVar* sv);
327+
// return SMT for the given instruction
328+
expr smtInst(smtVar* sv, inst* in);
329+
public:
330+
};
331+
\end{lstlisting}
332+
\end{document}

doc/smt_gen.pdf

189 KB
Binary file not shown.

0 commit comments

Comments
 (0)