cprover
goto-programs/module.md
Go to the documentation of this file.
1 \ingroup module_hidden
2 \defgroup module_goto-programs Goto Conversion & Instrumentation
3 
4 \author Kareem Khazem
5 
6 \section goto-conversion Goto Conversion
7 
8 In the \ref goto-programs directory.
9 
10 **Key classes:**
11 * goto_programt
12 * goto_functionst
13 * \ref goto_program_templatet::instructiont
14 
15 \dot
16 digraph G {
17  node [shape=box];
18  rankdir="LR";
19  1 [shape=none, label=""];
20  2 [label="goto conversion"];
21  3 [shape=none, label=""];
22  1 -> 2 [label="Symbol table"];
23  2 -> 3 [label="goto-programs, goto-functions, symbol table"];
24 }
25 \enddot
26 
27 At this stage, CBMC constructs a goto-program from a symbol table. It
28 does not use the parse tree or the source file at all for this step. This
29 may seem surprising, because the symbols are stored in a hash table and
30 therefore have no intrinsic order; nevertheless, every \ref symbolt is
31 associated with a \ref source_locationt, allowing CBMC to figure out the
32 lexical order.
33 
34 The structure of what is informally called a goto-program follows. The
35 entire target program is converted to a single \ref goto_functionst
36 object. The goto functions contains a set of \ref goto_programt objects;
37 each of these correspond to a "function" or "method" in the target
38 program. Each goto_program contains a list of
39 \ref goto_program_templatet::instructiont "instructions"; each
40 instruction contains an associated statement---these are subtypes of
41 \ref codet. Each instruction also contains a "target", which will be
42 empty for now.
43 
44 \dot
45 digraph G{
46  graph [nojustify=true];
47  node [shape=box];
48  compound=true;
49 
50  subgraph cluster_src {
51  1 [shape="none", label="source files"];
52  2 [label="file1.c\n-----------\nint main(){
53  int x = 5;
54  if(x > 7){
55  x = 9; } }
56 
57 void foo(){}"];
58  1 -> 2 [color=white];
59 
60  100 [label="file2.c\n--------\nchar bar(){ }"];
61  2 -> 100 [color=white];
62  }
63 
64  1 -> 3 [label="corresponds to", lhead=cluster_goto,
65  ltail=cluster_src];
66 
67  subgraph cluster_goto {
68  3 [label="a\ngoto_functionst", URL="\ref goto_functionst", shape=none];
69  4 [label="function_map\n(a map from irep_idt\nto goto_programt)"];
70  }
71  4 -> 5 [lhead=cluster_funmap, label="value:"];
72 
73  subgraph cluster_funmap {
74  9 [label="bar\n(an irep_idt)", URL="\ref irep_idt"];
75  10 [label="a goto_programt", URL="\ref goto_programt"];
76  9->10 [label="maps to"];
77 
78  5 [label="main\n(an irep_idt)", URL="\ref irep_idt"];
79 
80  7 [label="foo\n(an irep_idt)", URL="\ref irep_idt"];
81  8 [label="a goto_programt", URL="\ref goto_programt"];
82  7->8 [label="maps to"];
83 
84  subgraph cluster_goto_program {
85  11 [shape=none, label="a\ngoto_programt", URL="\ref goto_programt"];
86  12 [label="instructions\n(a list of instructiont)"];
87  }
88  5 -> 11 [lhead=cluster_goto_program, label="maps to:"];
89 
90  }
91 
92  12 -> target1 [lhead=cluster_instructions];
93 
94  subgraph cluster_instructions {
95  subgraph cluster_ins1{
96  code1 [label="code", URL="\ref codet"];
97  target1 [label="target"];
98  }
99 
100  target1 -> target2 [color=white,lhead=cluster_ins2,
101  ltail=cluster_ins1];
102 
103  subgraph cluster_ins2{
104  code2 [label="code", URL="\ref codet"];
105  target2 [label="target"];
106  }
107 
108  target2 -> target3 [color=white,lhead=cluster_ins3,
109  ltail=cluster_ins2];
110 
111  subgraph cluster_ins3{
112  code3 [label="code", URL="\ref codet"];
113  target3 [label="target"];
114  }
115 
116  target3 -> target4 [color=white,lhead=cluster_ins4,
117  ltail=cluster_ins3];
118 
119  subgraph cluster_ins4{
120  code4 [label="code", URL="\ref codet"];
121  target4 [label="target"];
122  }
123 
124  }
125 
126  subgraph cluster_decl {
127  decl1 [label="type:\ncode_declt", URL="\ref code_declt",
128  shape=none];
129  subgraph cluster_decl_in{
130  cluster_decl_in_1 [label="symbol()", shape=none];
131  cluster_decl_in_2 [label="x"];
132  cluster_decl_in_1 -> cluster_decl_in_2 [color=white];
133  }
134  decl1 -> cluster_decl_in_1 [lhead="cluster_decl_in", color=white];
135  }
136  code1 -> decl1 [lhead=cluster_decl];
137 
138  subgraph cluster_assign1 {
139  assign1 [label="type:\ncode_assignt", URL="\ref code_assignt",
140  shape=none];
141  subgraph cluster_assign1_in{
142  cluster_assign1_in_1 [label="lhs()", shape=none];
143  cluster_assign1_in_2 [label="x"];
144  cluster_assign1_in_1 -> cluster_assign1_in_2 [color=white];
145 
146  cluster_assign1_in_3 [label="rhs()", shape=none];
147  cluster_assign1_in_4 [label="5"];
148  cluster_assign1_in_3 -> cluster_assign1_in_4 [color=white];
149  }
150  assign1 -> cluster_assign1_in_1 [lhead="cluster_assign1_in", color=white];
151  }
152  code2 -> assign1 [lhead=cluster_assign1];
153 
154  subgraph cluster_if {
155  if [label="type:\ncode_ifthenelset", URL="\ref code_ifthenelset",
156  shape=none];
157  if_body [label="..."];
158  if -> if_body [color=white];
159  }
160  code3 -> if [lhead=cluster_if];
161 
162  subgraph cluster_assign2 {
163  assign2 [label="type:\ncode_assignt", URL="\ref code_assignt",
164  shape=none];
165  subgraph cluster_assign2_in{
166  cluster_assign2_in_1 [label="lhs()", shape=none];
167  cluster_assign2_in_2 [label="x"];
168  cluster_assign2_in_1 -> cluster_assign2_in_2 [color=white];
169 
170  cluster_assign2_in_3 [label="rhs()", shape=none];
171  cluster_assign2_in_4 [label="9"];
172  cluster_assign2_in_3 -> cluster_assign2_in_4 [color=white];
173  }
174  assign2 -> cluster_assign2_in_1 [lhead="cluster_assign2_in", color=white];
175  }
176  code4 -> assign2 [lhead=cluster_assign2];
177 
178 }
179 \enddot
180 
181 This is not the final form of the goto-functions, since the lists of
182 instructions will be 'normalized' in the next step (Instrumentation),
183 which removes some instructions and adds targets to others.
184 
185 Note that goto_programt and goto_functionst are each template
186 instantiations; they are currently the *only* specialization of
187 goto_program_templatet and goto_functions_templatet, respectively. This
188 means that the generated Doxygen documentation can be somewhat obtuse
189 about the actual types of things, and is unable to generate links to the
190 correct classes. Note that the
191 \ref goto_program_templatet::instructiont::code "code" member of a
192 goto_programt's instruction has type \ref codet (its type in the
193 goto_program_templatet documentation is given as "codeT", as this is the
194 name of the template's type parameter); similarly, the type of a guard
195 of an instruction is \ref guardt.
196 
197 ---
198 \section instrumentation Instrumentation
199 
200 In the \ref goto-programs directory.
201 
202 **Key classes:**
203 * goto_programt
204 * goto_functionst
205 * \ref goto_program_templatet::instructiont
206 
207 \dot
208 digraph G {
209  node [shape=box];
210  rankdir="LR";
211  1 [shape=none, label=""];
212  2 [label="goto conversion"];
213  3 [shape=none, label=""];
214  1 -> 2 [label="goto-programs, goto-functions, symbol table"];
215  2 -> 3 [label="transformed goto-programs"];
216 }
217 \enddot
218 
219 This stage applies several transformations to the goto-programs from the
220 previous stage:
221 
222 * The diagram in the previous stage showed a goto_programt with four
223  instructions, but real programs usually yield instruction lists that
224  are littered with \ref code_skipt "skip" statements. The
225  instrumentation stage removes the majority of these.
226 
227 * Function pointers are removed. They are turned into switch statements
228  (but see the next point; switch statements are further transformed).
229 
230 * Compound blocks are eliminated. There are several subclasses of
231  \ref codet that count as 'compound blocks;' therefore, later stages in
232  the CBMC pipeline that switch over the codet subtype of a particular
233  instruction should not need to consider these types. In particular:
234 
235  * code_ifthenelset is turned into GOTOs. In particular, the bodies of
236  the conditionals are flattened into lists of instructions, inline
237  with the rest of the instruction in the goto_programt. The guard of
238  the conditional is placed onto the
239  \ref goto_program_templatet::instructiont::guard "guard" member of
240  an instruction whose code member is of type \ref code_gotot. The
241  \ref goto_program_templatet::instructiont::targets "targets" member
242  of that instruction points to the appropriate branch of the
243  conditional. (Note that although instructions have a list of
244  targets, in practice an instruction should only ever have at most
245  one target; you should check this invariant with an assertion if you
246  rely on it).
247 
248  The order of instructions in a list of instructions---as well as the
249  targets of GOTOs---are both displayed as arrows when viewing a
250  goto-program as a Graphviz DOT file with `goto-instrument --dot`.
251  The semantics of a goto-program is: the next instruction is the next
252  instruction in the list, unless the current instruction has a
253  target; in that case, check the guard of the instruction, and jump
254  to the target if the guard evaluates to true.
255 
256  * switch statements, for and while loops, and try-catches are also
257  transformed into lists of instructions guarded by GOTOs.
258 
259  * \ref code_blockt "code blocks" are transformed into lists of
260  instructions.
261 
262 * \ref code_returnt "return statements" are transformed into
263  (unconditional) GOTOs whose target is the \ref END_FUNCTION
264  instruction. Each goto_programt should have precisely one such
265  instruction. Note the presence of \ref code_deadt, which has a
266  \ref code_deadt::symbol() "symbol()" member. Deads mark symbols that
267  have just gone out of scope; typically, a GOTO that jumps to an
268  END_FUNCTION instruction is preceded by a series of deads. Deads also
269  follow sequences of instructions that were part of the body of a
270  block (loop, conditional etc.) if there were symbols declared in that
271  block.
272 
273 This stage concludes the *analysis-independent* program transformations.