cprover
format_strings.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module: Format String Parser
4 
5 Author: CM Wintersteiger
6 
7 \*******************************************************************/
8 
11 
12 #include "format_strings.h"
13 
14 #include <util/exception_utils.h>
15 #include <util/std_types.h>
16 #include <util/std_expr.h>
17 
18 #include <util/c_types.h>
19 
20 #include <cctype>
21 
23  std::string::const_iterator &it,
24  format_tokent &curtok)
25 {
26  while(*it=='#' || *it=='0' ||
27  *it=='-' || *it==' ' || *it=='+')
28  {
29  switch(*it)
30  {
31  case '#':
32  curtok.flags.push_back(format_tokent::flag_typet::ALTERNATE); break;
33  case '0':
34  curtok.flags.push_back(format_tokent::flag_typet::ZERO_PAD); break;
35  case '-':
36  curtok.flags.push_back(format_tokent::flag_typet::LEFT_ADJUST); break;
37  case ' ':
38  curtok.flags.push_back(format_tokent::flag_typet::SIGNED_SPACE); break;
39  case '+':
40  curtok.flags.push_back(format_tokent::flag_typet::SIGN); break;
41  default:
43  std::string("unsupported format specifier flag: `") + *it + "'");
44  }
45  it++;
46  }
47 }
48 
50  std::string::const_iterator &it,
51  format_tokent &curtok)
52 {
53  if(*it=='*')
54  {
56  it++;
57  }
58 
59  std::string tmp;
60  for( ; isdigit(*it); it++) tmp+=*it;
61  curtok.field_width=string2integer(tmp);
62 }
63 
65  std::string::const_iterator &it,
66  format_tokent &curtok)
67 {
68  if(*it=='.')
69  {
70  it++;
71 
72  if(*it=='*')
73  {
75  it++;
76  }
77  else
78  {
79  std::string tmp;
80  for( ; isdigit(*it); it++) tmp+=*it;
81  curtok.precision=string2integer(tmp);
82  }
83  }
84 }
85 
87  std::string::const_iterator &it,
88  format_tokent &curtok)
89 {
90  if(*it=='h')
91  {
92  it++;
93  if(*it=='h')
94  it++;
96  }
97  else if(*it=='l')
98  {
99  it++;
100  if(*it=='l')
101  it++;
103  }
104  else if(*it=='L')
105  {
106  it++;
108  }
109  else if(*it=='j')
110  {
111  it++;
113  }
114  else if(*it=='t')
115  {
116  it++;
118  }
119 }
120 
122  const std::string &arg_string,
123  std::string::const_iterator &it,
124  format_tokent &curtok)
125 {
126  switch(*it)
127  {
128  case 'd':
129  case 'i':
132  break;
133  case 'o':
136  break;
137  case 'u':
140  break;
141  case 'x':
142  case 'X':
145  break;
146  case 'e':
147  case 'E': curtok.type=format_tokent::token_typet::FLOAT; break;
148  case 'f':
149  case 'F': curtok.type=format_tokent::token_typet::FLOAT; break;
150  case 'g':
151  case 'G': curtok.type=format_tokent::token_typet::FLOAT; break;
152  case 'a':
153  case 'A': curtok.type=format_tokent::token_typet::FLOAT; break;
154  case 'c': curtok.type=format_tokent::token_typet::CHAR; break;
155  case 's': curtok.type=format_tokent::token_typet::STRING; break;
156  case 'p': curtok.type=format_tokent::token_typet::POINTER; break;
157  case '%':
159  curtok.value="%";
160  break;
161  case '[': // pattern matching in, e.g., fscanf.
162  {
163  std::string tmp;
164  it++;
165  if(*it=='^') // if it's there, it must be first
166  {
167  tmp+='^'; it++;
168  if(*it==']') // if it's there, it must be here
169  {
170  tmp+=']'; it++;
171  }
172  }
173 
174  for( ; it!=arg_string.end() && *it!=']'; it++)
175  tmp+=*it;
176 
177  break;
178  }
179 
180  default:
182  std::string("unsupported format conversion specifier: `") + *it + "'");
183  }
184  it++;
185 }
186 
187 format_token_listt parse_format_string(const std::string &arg_string)
188 {
189  format_token_listt token_list;
190 
191  std::string::const_iterator it=arg_string.begin();
192 
193  while(it!=arg_string.end())
194  {
195  if(*it=='%')
196  {
197  token_list.push_back(format_tokent());
198  format_tokent &curtok=token_list.back();
199  it++;
200 
201  parse_flags(it, curtok);
202  parse_field_width(it, curtok);
203  parse_precision(it, curtok);
204  parse_length_modifier(it, curtok);
205  parse_conversion_specifier(arg_string, it, curtok);
206  }
207  else
208  {
209  if(token_list.empty() ||
210  token_list.back().type!=format_tokent::token_typet::TEXT)
211  token_list.push_back(format_tokent(format_tokent::token_typet::TEXT));
212 
213  std::string tmp;
214  for( ; it!=arg_string.end() && *it!='%'; it++)
215  tmp+=*it;
216 
217  INVARIANT(
218  !token_list.empty() &&
219  token_list.back().type == format_tokent::token_typet::TEXT,
220  "must already have a TEXT token at the back of the token list");
221 
222  token_list.back().value=tmp;
223  }
224  }
225 
226  return token_list;
227 }
228 
230 {
231  switch(token.type)
232  {
234  switch(token.length_modifier)
235  {
238  return signed_char_type();
239  else
240  return unsigned_char_type();
241 
244  return signed_short_int_type();
245  else
246  return unsigned_short_int_type();
247 
250  return signed_long_int_type();
251  else
252  return unsigned_long_int_type();
253 
256  return signed_long_long_int_type();
257  else
259 
260  default:
262  return signed_int_type();
263  else
264  return unsigned_int_type();
265  }
266 
268  switch(token.length_modifier)
269  {
272  default: return float_type();
273  }
274 
276  switch(token.length_modifier)
277  {
279  default: return char_type();
280  }
281 
283  return pointer_type(void_type());
284 
286  switch(token.length_modifier)
287  {
289  return array_typet(wchar_t_type(), nil_exprt());
290  default: return array_typet(char_type(), nil_exprt());
291  }
292 
293  default:
294  return nil_typet();
295  }
296 }
The type of an expression, extends irept.
Definition: type.h:27
typet void_type()
Definition: c_types.cpp:253
pointer_typet pointer_type(const typet &subtype)
Definition: c_types.cpp:243
const mp_integer string2integer(const std::string &n, unsigned base)
Definition: mp_arith.cpp:57
format_token_listt parse_format_string(const std::string &arg_string)
unsignedbv_typet unsigned_int_type()
Definition: c_types.cpp:44
irep_idt value
Thrown when we encounter an instruction, parameters to an instruction etc.
Format String Parser.
mp_integer precision
#define INVARIANT(CONDITION, REASON)
This macro uses the wrapper function &#39;invariant_violated_string&#39;.
Definition: invariant.h:400
typet get_type(const format_tokent &token)
length_modifierst length_modifier
The NIL expression.
Definition: std_expr.h:4461
API to expression classes.
void parse_precision(std::string::const_iterator &it, format_tokent &curtok)
signedbv_typet signed_long_int_type()
Definition: c_types.cpp:80
void parse_length_modifier(std::string::const_iterator &it, format_tokent &curtok)
token_typet type
signedbv_typet signed_short_int_type()
Definition: c_types.cpp:37
floatbv_typet long_double_type()
Definition: c_types.cpp:201
floatbv_typet float_type()
Definition: c_types.cpp:185
bitvector_typet wchar_t_type()
Definition: c_types.cpp:149
Pre-defined types.
std::list< flag_typet > flags
unsignedbv_typet unsigned_short_int_type()
Definition: c_types.cpp:51
mp_integer field_width
floatbv_typet double_type()
Definition: c_types.cpp:193
The NIL type, i.e., an invalid type, no value.
Definition: std_types.h:39
void parse_flags(std::string::const_iterator &it, format_tokent &curtok)
std::list< format_tokent > format_token_listt
unsignedbv_typet unsigned_long_long_int_type()
Definition: c_types.cpp:101
Arrays with given size.
Definition: std_types.h:1000
signedbv_typet signed_int_type()
Definition: c_types.cpp:30
representationt representation
unsignedbv_typet unsigned_char_type()
Definition: c_types.cpp:135
unsignedbv_typet unsigned_long_int_type()
Definition: c_types.cpp:94
signedbv_typet signed_long_long_int_type()
Definition: c_types.cpp:87
void parse_field_width(std::string::const_iterator &it, format_tokent &curtok)
signedbv_typet signed_char_type()
Definition: c_types.cpp:142
bitvector_typet char_type()
Definition: c_types.cpp:114
void parse_conversion_specifier(const std::string &arg_string, std::string::const_iterator &it, format_tokent &curtok)