|
| 1 | +\documentclass{article} |
| 2 | +\usepackage[utf8]{inputenc} |
| 3 | +\usepackage{algorithm} |
| 4 | +\usepackage{algorithmic} |
| 5 | +\usepackage{amsmath} |
| 6 | +\usepackage{bm} |
| 7 | +\usepackage{listings} |
| 8 | +\usepackage[usenames,dvipsnames]{xcolor} |
| 9 | +\title{smtGen} |
| 10 | +\author{qx51 } |
| 11 | +\date{October 2019} |
| 12 | +\renewcommand{\algorithmicrequire}{ \textbf{Input:}} %Use Input in the format of Algorithm |
| 13 | +\renewcommand{\algorithmicensure}{ \textbf{Output:}} %UseOutput in the format of Algorithm |
| 14 | + |
| 15 | +\definecolor{mygreen}{rgb}{0,0.6,0} |
| 16 | +\definecolor{mygray}{rgb}{0.5,0.5,0.5} |
| 17 | +\definecolor{mymauve}{rgb}{0.58,0,0.82} |
| 18 | +\lstset{ % |
| 19 | +backgroundcolor=\color{white}, % choose the background color |
| 20 | +basicstyle=\footnotesize\ttfamily, % size of fonts used for the code |
| 21 | +columns=fullflexible, |
| 22 | +tabsize=4, |
| 23 | +breaklines=true, % automatic line breaking only at whitespace |
| 24 | +captionpos=b, % sets the caption-position to bottom |
| 25 | +commentstyle=\color{mygreen}, % comment style |
| 26 | +escapeinside={\%*}{*)}, % if you want to add LaTeX within your code |
| 27 | +keywordstyle=\color{blue}, % keyword style |
| 28 | +stringstyle=\color{mymauve}\ttfamily, % string literal style |
| 29 | +frame=single, |
| 30 | +rulesepcolor=\color{red!20!green!20!blue!20}, |
| 31 | +% identifierstyle=\color{red}, |
| 32 | +language=c++, |
| 33 | +} |
| 34 | + |
| 35 | +\begin{document} |
| 36 | + |
| 37 | +\section{Algorithm for program SMT} |
| 38 | +\subsection{Definition} |
| 39 | + |
| 40 | +Some definitions are made in order to make the algorithm easier understood. |
| 41 | +\begin{enumerate} |
| 42 | +\item $F_{i}$ is SMT formula for the basic block $i$. |
| 43 | +\item $r_{i\_j\_k}$ represents the register value, where $i$ is the register ID, $j$ is the basic block ID, $k$ is the version ID (i.e., the number of update times in this block). $k$ starts from $0$ and each update in this basic block will increase $k$ by 1. For example, $r_{0\_0\_1}$ is the value that represents $r_1$ in basic block 0 (i.e., the starting basic block), and $r_1$ has been updated once in basic block 0. $j$ ensures that each basic block has its own register version, and $k$ ensures that different values of the same register can be differed in the same basic block. |
| 44 | +\item $f_{pl}$ is the formula of program logic for the current basic block. |
| 45 | +\item $IV = \{iv_0, iv_1, ... \}$ is the initial value set for all basic blocks. Each item $iv_i = \langle iv_{i\_0}, iv_{i\_1}, ... \rangle = \langle r_{0\_\bm{i}\_k_0}, r_{1\_\bm{i}\_k_1}, ...\rangle$ represents the initial values of all registers \textbf{\textit{generated}} by basic block $i$. |
| 46 | + |
| 47 | +\item $f_{iv}$ is the formula representing the logic that initial values of register from the last basic block $b_1$ are fed to the register values in current basic block $b_2$, that is, |
| 48 | +\begin{equation} |
| 49 | +\begin{aligned} |
| 50 | +f_{iv} &= \bigwedge_{i} r_{i\_b_1\_k_i} == r_{i\_b_2\_0} \\ |
| 51 | +&= \bigwedge_{i} iv_{b_1\_i} == r_{i\_b_2\_0} |
| 52 | +\end{aligned} |
| 53 | +\end{equation} |
| 54 | +\item $C = \{c_{b_{i_1}\rightarrow b_{j_1}}, c_{b_{i_2}\rightarrow b_{j_2}}, ...\}$ is the initial path condition formulas set for all edges. Each item $c_{b_i\rightarrow b_j}$ is a path condition formula generated by the basic block $b_i$ for the basic block $b_j$. |
| 55 | +The post path condition $C_b$ for the basic block $b$ is |
| 56 | +\begin{equation} |
| 57 | +C_b = \bigcup_i \{c_{parents}\wedge c_{instEnd\_case_i}\} |
| 58 | +\end{equation} |
| 59 | +where $c_{parents} = \bigvee_{p} c_{p\rightarrow b}$ |
| 60 | +\end{enumerate} |
| 61 | + |
| 62 | +\subsection{Target SMT formula} |
| 63 | +SMT formula for the program (do not take pre-condition or post-condition into consideration here) is |
| 64 | + |
| 65 | +\begin{equation} |
| 66 | +\begin{aligned} |
| 67 | +F &= \bigwedge_i F_{i} \\ |
| 68 | + &= \bigwedge_i \bigwedge_j c_{j\rightarrow i}\rightarrow f_{iv\_i} \wedge f_{pl\_i} |
| 69 | +\end{aligned} |
| 70 | +\end{equation} |
| 71 | + |
| 72 | +\begin{algorithm} |
| 73 | +\begin{algorithmic}[1] |
| 74 | +\caption{SMT\_Prog} |
| 75 | +\label{alg:smt_prog} |
| 76 | +% \REQUIRE $instLst$, $instLstLen$, $inputRegLst$ |
| 77 | +\REQUIRE $instLst$, $instLstLen$ |
| 78 | +\ENSURE $F$, $outputRegLst$ |
| 79 | +\STATE // Sort basic blocks by Topological\_Sorting |
| 80 | +\STATE // $B = \langle b_0, b_1, ...\rangle$ is set of basic blocks, $b_0$ is the start basic block |
| 81 | +\STATE // $InB = \langle inb_0, inb_1, ...\rangle$ is set of incoming edges for each basic block |
| 82 | +\STATE // $OutB = \langle outb_0, outb_1, ...\rangle$ is set of outgoing edges for each basic block |
| 83 | +\STATE // $InB$ and $OutB$ are from $cfg$ |
| 84 | +\STATE $cfg \Leftarrow Gen\_Cfg(instLst, instLstLen)$ |
| 85 | +\STATE $B \Leftarrow Topological\_Sorting(cfg)$ |
| 86 | +\STATE |
| 87 | +\STATE // Process Basic Block 0 |
| 88 | +\STATE $f_{pl} \Leftarrow Gen\_Block\_Prog\_Logic(0,instLst[start:end])$ |
| 89 | +% \STATE $f_{iv} \Leftarrow \bigwedge_{i=1}^{i=NumOfReg} inputRegLst_i == r_{i\_0\_0}$ |
| 90 | +% \STATE $F_0 \Leftarrow true \rightarrow (f_{iv} \wedge f_{pl})$ |
| 91 | +\STATE $F_0 \Leftarrow f_{pl}$ |
| 92 | +\STATE $iv_0 \Leftarrow Get\_Cur\_Reg\_Val(0)$ |
| 93 | +\STATE $IV \Leftarrow \{iv_0\}$ |
| 94 | +\STATE $C \Leftarrow Gen\_Path\_Cond\_For\_Next\_Blocks(0, instLst[end], inb_0, outb_0)$ |
| 95 | + |
| 96 | +\STATE |
| 97 | +\STATE // Process Basic Block $1 \rightarrow (len-1)$ |
| 98 | +\FOR{$i=1$ to $len(B)-1$} |
| 99 | +\STATE $f_{pl} \Leftarrow Gen\_Block\_Prog\_Logic(i,instLst[start:end])$ |
| 100 | +\STATE $F_i \Leftarrow true$ |
| 101 | +\FOR{each last block $b$ in $inb_{\bm{i}}$} |
| 102 | +\STATE $f_{iv} \Leftarrow \bigwedge_{j=1}^{j=NumOfReg} iv_{b_\_j} == r_{j_\_i_\_0}$ |
| 103 | +\STATE $F_i \Leftarrow F_i \wedge (c_{b \rightarrow i} \rightarrow (f_{iv} \wedge f_{pl}))$ |
| 104 | +\ENDFOR |
| 105 | +\STATE $iv_i \Leftarrow Get\_Cur\_Reg\_Val(i)$ |
| 106 | +\STATE $IV \Leftarrow IV \cup \{iv_i\}$ |
| 107 | +\STATE $C_{out\_i} \Leftarrow Gen\_Path\_Cond\_For\_Next\_Blocks(i, instLst[end], inb_i, outb_i)$ |
| 108 | +\STATE $C \Leftarrow C \cup C_{out_\_i}$ |
| 109 | +\ENDFOR |
| 110 | +\STATE |
| 111 | +\FOR{$i=0$ to $len(B)-1$} |
| 112 | +\STATE $F \Leftarrow F \wedge F_i$ |
| 113 | +\ENDFOR |
| 114 | +\STATE |
| 115 | +\STATE initialize $outputRegLst$ |
| 116 | +\FOR{$i=0$ to $NumOfReg-1$} |
| 117 | +\STATE $outputRegLst_i \Leftarrow iv_{(instLstLen-1)\_i}$ |
| 118 | +\ENDFOR |
| 119 | +\RETURN $outputRegLst_i$ |
| 120 | +\end{algorithmic} |
| 121 | +\end{algorithm} |
| 122 | + |
| 123 | +\begin{algorithm} |
| 124 | +\begin{algorithmic}[1] |
| 125 | +\caption{Gen\_Block\_Prog\_Logic} |
| 126 | +\REQUIRE Basic Block ID $b$, $instLst$ |
| 127 | +\ENSURE $f_{pl}$ |
| 128 | +\STATE $f_{pl} \Leftarrow true$ |
| 129 | +\FOR{$i=1$ to $len(instLst)$} |
| 130 | +\IF{type of instruction is not $JMP$ or $END$} |
| 131 | +\STATE $f_{pl} \Leftarrow f_{pl} \wedge f_{instruction_\_i}$ |
| 132 | +\ENDIF |
| 133 | +\ENDFOR |
| 134 | +\RETURN $f_{pl}$ |
| 135 | +\end{algorithmic} |
| 136 | +\end{algorithm} |
| 137 | + |
| 138 | +\begin{algorithm} |
| 139 | +\begin{algorithmic}[1] |
| 140 | +\caption{Get\_Cur\_Reg\_Val} |
| 141 | +\REQUIRE Basic Block ID $b$, $R_b$ |
| 142 | +\ENSURE $iv_b$ |
| 143 | +\STATE $iv_{b} \Leftarrow\emptyset$ |
| 144 | +\FOR{$i=0$ to $(NumberOfRegisters-1)$} |
| 145 | +\STATE //$R$ stores the current values of all register for the basic blocks |
| 146 | +\STATE $iv_{b} \Leftarrow iv_{b} \cup \{R_{b\_i}\}$ |
| 147 | +\ENDFOR |
| 148 | +\RETURN $iv_{b}$ |
| 149 | +\end{algorithmic} |
| 150 | +\end{algorithm} |
| 151 | + |
| 152 | +\begin{algorithm} |
| 153 | +\begin{algorithmic}[1] |
| 154 | +\caption{Gen\_Path\_Cond\_For\_Next\_Blocks} |
| 155 | +\REQUIRE Basic Block ID $b$, $instEnd$, $inBlocks$, $outBlocks$ |
| 156 | +\ENSURE $C_{out}$ |
| 157 | +\STATE $c_{in} \Leftarrow true$ |
| 158 | +\FOR{each last block $b_{i}$ in $inBlocks$} |
| 159 | +\STATE $c_{in} \Leftarrow c_{in} \vee c_{b_i \rightarrow b}$ |
| 160 | +\ENDFOR |
| 161 | +\STATE $C_{out} \Leftarrow \emptyset$ |
| 162 | +\STATE $c_{instEnd} \Leftarrow Parse Instruction(instEnd)$ |
| 163 | +\FOR{each last block $c$ in $c_{instEnd}$} |
| 164 | +\STATE $C_{out} \Leftarrow C_{out} \cup \{c_{in} \wedge c\}$ |
| 165 | +\ENDFOR |
| 166 | +\RETURN $C_{out}$ |
| 167 | +\end{algorithmic} |
| 168 | +\end{algorithm} |
| 169 | + |
| 170 | +\subsection{Example} |
| 171 | +\begin{enumerate} |
| 172 | +\item Instructions: |
| 173 | +\begin{lstlisting} |
| 174 | +inst(MOVXC, 2, 15), // 0 mov r2, 15 |
| 175 | +inst(JMPGT, 0, 2, 2), // 1 if r0 <= r2 no jmp else jmp to 4 |
| 176 | +inst(ADDXY, 0, 1), // 2 add r0, r1 |
| 177 | +inst(RETX, 2), // 3 ret r2 |
| 178 | +inst(ADDXY, 2, 1), // 4 add r2, r1 |
| 179 | +inst(RETX, 0), // 5 else ret r0 |
| 180 | +\end{lstlisting} |
| 181 | +\item Instructions $\rightarrow$ CFG \\ |
| 182 | +CFG:\\ |
| 183 | +nodes: 0[0:1] 1[2:3] 2[4:5] \\ |
| 184 | +edges: 0 $\rightarrow$ 1 0 $\rightarrow$ 2 |
| 185 | +\item List by topological Sorting \\ |
| 186 | +$B = \langle 0, 1, 2 \rangle$ \\ |
| 187 | +$InB = \langle \langle\rangle, \langle 0 \rangle, \langle 0 \rangle \rangle$ \\ |
| 188 | +$OutB = \langle \langle 1, 2 \rangle, \langle\rangle, \langle \rangle \rangle$ |
| 189 | +\item Process Node 0 \\ |
| 190 | +$f_{pl} = (r_{2\_0\_1} == 15)$\\ |
| 191 | +$iv_0 = \langle r_{0\_0\_0},r_{1\_0\_0},r_{2\_0\_1}\rangle$ \\ |
| 192 | +$IV = \{iv_0\}$\\ |
| 193 | +$F_0 = (r_{2\_0\_1} == 15)$ \\ |
| 194 | +$C_{out\_0} = \{ c_{0\rightarrow 1}=(r_0 > r_2), c_{0\rightarrow 2}=\neg(r_0 > r_2)\}$ \\ |
| 195 | +$C = C_{out\_0}$ |
| 196 | +\item Process Node 1 \\ |
| 197 | +$f_{pl} = (r_{0\_1\_1} == r_{0\_1\_0} + r_{1\_1\_0})$\\ |
| 198 | +$iv_1 = \langle r_{0\_1\_1},r_{1\_1\_0},r_{2\_1\_0}\rangle$\\ |
| 199 | +$IV = \{iv_0, iv_1\}$\\ |
| 200 | +$f_{iv} = \bigwedge_{i=1}^{i=3} (iv_{0\_i} == r_{i\_1\_0})$\\ |
| 201 | +$F_1 = c_{0\rightarrow 1} \rightarrow f_{iv} \wedge f_{pl} = (r_0 > r_2)\rightarrow (\bigwedge_{i=1}^{i=3} (iv_{0\_i} == r_{i\_1\_0}) \wedge (r_{0\_1\_1} == r_{0\_1\_0} + r_{1\_1\_0}))$ \\ |
| 202 | +$C_{out\_1} = \emptyset$ \\ |
| 203 | +$C = C_{out\_0}$ |
| 204 | +\item Process Node 2 \\ |
| 205 | +$f_{pl} = (r_{2\_2\_1} == r_{2\_2\_0} + r_{1\_2\_0})$\\ |
| 206 | +$iv_1 = \langle r_{0\_2\_0},r_{1\_2\_0},r_{2\_2\_1}\rangle$\\ |
| 207 | +$IV = \{iv_0, iv_1, iv_2\}$\\ |
| 208 | +$f_{iv} = \bigwedge_{i=1}^{i=3} (iv_{0\_i} == r_{i\_2\_0})$\\ |
| 209 | +$F_2 = c_{0\rightarrow 2} \rightarrow f_{iv} \wedge f_{pl} = \neg(r_0 > r_2)\rightarrow (\bigwedge_{i=1}^{i=3} (iv_{0\_i} == r_{i\_2\_0})\wedge (r_{2\_2\_1} == r_{2\_2\_0} + r_{1\_2\_0}))$ \\ |
| 210 | +$C_{out\_2} = \emptyset$ \\ |
| 211 | +$C = C_{out\_0}$ |
| 212 | +\item Generate $F$ \\ |
| 213 | +$F = F_0 \wedge F_1 \wedge F_2$ \\ |
| 214 | +where \\ |
| 215 | +$F_0 = true\rightarrow (true \wedge (r_{2\_0\_1} == 15))$ \\ |
| 216 | +$F_1 = (r_0 > r_2)\rightarrow (\bigwedge_{i=1}^{i=3} (iv_{0\_i} == r_{i\_1\_0}) \wedge (r_{0\_1\_1} == r_{0\_1\_0} + r_{1\_1\_0}))$ \\ |
| 217 | +$F_2 = \neg(r_0 > r_2)\rightarrow (\bigwedge_{i=1}^{i=3} (iv_{0\_i} == r_{i\_2\_0})\wedge (r_{2\_2\_1} == r_{2\_2\_0} + r_{1\_2\_0}))$ |
| 218 | +\end{enumerate} |
| 219 | + |
| 220 | +\section{Algorithm for equivalence check} |
| 221 | +\subsection{Definition} |
| 222 | +\begin{enumerate} |
| 223 | +\item Since there are two programs, a new dimension, program ID, should be added into register value $r_{i\_j\_k}$ to ensure that each program has it own register value version. Then $r_{i\_j\_k}$ extends to $\bm{r_{i\_j\_k\_p}}$, where $p$ is the program ID. |
| 224 | +\item Equivalence check formula |
| 225 | +\begin{equation} |
| 226 | +F = F_{pre_1} \wedge F_{pre_2} \wedge F_1 \wedge F_2 \rightarrow F_{post} |
| 227 | +\end{equation} |
| 228 | +where \\ |
| 229 | +$F_{pre_i}$, $F_i$, $F_{post}$ are the pre-conditon, program logic, post-condition FOL formulas of program $i$, that is, |
| 230 | +\begin{equation} |
| 231 | +F_{pre_i} = (r_{0\_*\_*\_i})\wedge (\bigwedge_{j = 1}^{j = NumOfReg-1} r_{j\_*\_*\_i} == 0) |
| 232 | +\end{equation} |
| 233 | + |
| 234 | +\begin{equation} |
| 235 | +\begin{aligned} |
| 236 | +F_{i} &= (\bigwedge_{j=0}^{j=NumOfInstLst-1} f_{inst\_j}) \wedge (F_{output}) \\ |
| 237 | +&= (\bigwedge_{j=0}^{j=NumOfInstLst-1} f_{inst\_j}) \wedge (\bigwedge_{r_k = reg[ret_k]} (c \rightarrow r_k == output_i)) |
| 238 | +\end{aligned} |
| 239 | +\end{equation} |
| 240 | + |
| 241 | +\begin{equation} |
| 242 | +F_{post} = (output_1 == output_2) |
| 243 | +\end{equation} |
| 244 | +\end{enumerate} |
| 245 | + |
| 246 | +\subsection{Algorithm} |
| 247 | +\begin{algorithm} |
| 248 | +\begin{algorithmic}[1] |
| 249 | +\caption{Equivalence\_Check} |
| 250 | +\REQUIRE $input$, $instLst_1$, $len_1$, $instLst_2$, $len_2$ |
| 251 | +\ENSURE $isEqual$ |
| 252 | +\STATE // pre-condition formula: $input_1 == input_2$ |
| 253 | +\STATE $F_{pre_1} \Leftarrow true$ |
| 254 | +\STATE $F_{pre_2} \Leftarrow true$ |
| 255 | +\FOR{each value $\bm{rv}$ of register $\bm{i}$ in $input$} |
| 256 | +\STATE $F_{pre_1} \Leftarrow F_{pre_1} \wedge (rv == r_{i\_0\_0\_1})$ |
| 257 | +\STATE $F_{pre_2} \Leftarrow F_{pre_2} \wedge (rv == r_{i\_0\_0\_2})$ |
| 258 | +\ENDFOR{} |
| 259 | +\STATE // program logic formula: $F_1$, $F_2$ |
| 260 | +\STATE $F_1, output_1 \Leftarrow SMT\_Prog(instLst_1, len_1)$ |
| 261 | +\STATE $F_2, output_2 \Leftarrow SMT\_Prog(instLst_2, len_2)$ |
| 262 | +\STATE // post-condition formula: $output_1 == output_2$ |
| 263 | +\STATE $F_{post} \Leftarrow true$ |
| 264 | +\FOR{$i=0$ to $len(output_1)$} |
| 265 | +\STATE $F_{post} \Leftarrow F_{post} \wedge (output_{1\_i} == output_{2\_i})$ |
| 266 | +\ENDFOR |
| 267 | +\STATE $F \Leftarrow F_{pre_1} \wedge F_{pre_2} \wedge F_1 \wedge F_2 \rightarrow F_{post} $ |
| 268 | +\STATE $isEqual \Leftarrow Is\_Unsat(\neg F)$ |
| 269 | +\RETURN $isEqual$ |
| 270 | +\end{algorithmic} |
| 271 | +\end{algorithm} |
| 272 | + |
| 273 | +\section{Test set} |
| 274 | +\subsection{Program equivalence} |
| 275 | +\begin{enumerate} |
| 276 | +\item no branch |
| 277 | +\item branch: use more arithmetic and jmp instructions |
| 278 | +\subitem with RET, |
| 279 | +\subitem without RET |
| 280 | +\item illegal input |
| 281 | +\end{enumerate} |
| 282 | +\subsection{Specification check} |
| 283 | + |
| 284 | +\begin{enumerate} |
| 285 | +\item Instruction specification: the range of register ID,...; \\done by candidate program generator?\\ |
| 286 | +Now it is done by candidate program generator. |
| 287 | +\item Invalid instruction reach: Jmp instruction check; has done\\ |
| 288 | +End instruction check $\rightarrow$ Is the program is always ended with \textbf{RETs}? \\ |
| 289 | +For now, only with registers, if no RET, return $r_0$ |
| 290 | +\item No loop |
| 291 | +\end{enumerate} |
| 292 | + |
| 293 | +\section{Data structure} |
| 294 | +\begin{lstlisting}[caption={}] |
| 295 | +class node { |
| 296 | +private: |
| 297 | +public: |
| 298 | + unsigned int _start = 0; // start intruction ID |
| 299 | + unsigned int _end = 0; // end instruction ID |
| 300 | +}; |
| 301 | + |
| 302 | +class graph { |
| 303 | +private: |
| 304 | + vector<node> nodes; |
| 305 | + vector<vector<unsigned int> > nodesIn; |
| 306 | + vector<vector<unsigned int> > nodesOut; |
| 307 | +public: |
| 308 | +}; |
| 309 | + |
| 310 | +class progSmt { |
| 311 | +private: |
| 312 | + // f[i] is program logic FOL formula F of basic block i |
| 313 | + vector<expr> f; |
| 314 | + // postRegVal[i] is post register values of basic block i, |
| 315 | + // which are initial values for NEXT basic blocks |
| 316 | + vector<vector<expr> > postRegVal; |
| 317 | + // pathCon[i] stores pre path conditions of basic block i |
| 318 | + // There is a corresponding relationship between pathCon and g.nodesIn |
| 319 | + // more specifically, pathCon[i][j] stores the pre path condition from basic block g.nodesIn[i][j] to i |
| 320 | + vector<vector<expr> > pathCon; |
| 321 | + // program FOL formula |
| 322 | + expr smt = stringToExpr("true"); |
| 323 | + // program output FOL formula |
| 324 | + expr smtOutput = stringToExpr("true"); |
| 325 | + // return the SMT for the given program |
| 326 | + expr smtProg(inst* program, int length, smtVar* sv); |
| 327 | + // return SMT for the given instruction |
| 328 | + expr smtInst(smtVar* sv, inst* in); |
| 329 | +public: |
| 330 | +}; |
| 331 | +\end{lstlisting} |
| 332 | +\end{document} |
0 commit comments