cprover
c_preprocess.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #include "c_preprocess.h"
10 
11 #include <util/c_types.h>
12 #include <util/config.h>
13 #include <util/run.h>
14 #include <util/suffix.h>
15 #include <util/tempfile.h>
16 #include <util/unicode.h>
17 
18 #include <fstream>
19 
21 static std::string shell_quote(const std::string &src)
22 {
23  #ifdef _WIN32
24  // first check if quoting is needed at all
25 
26  if(src.find(' ')==std::string::npos &&
27  src.find('"')==std::string::npos &&
28  src.find('&')==std::string::npos &&
29  src.find('|')==std::string::npos &&
30  src.find('(')==std::string::npos &&
31  src.find(')')==std::string::npos &&
32  src.find('<')==std::string::npos &&
33  src.find('>')==std::string::npos &&
34  src.find('^')==std::string::npos)
35  {
36  // seems fine -- return as is
37  return src;
38  }
39 
40  std::string result;
41 
42  result+='"';
43 
44  for(const char ch : src)
45  {
46  if(ch=='"')
47  result+='"'; // quotes are doubled
48  result+=ch;
49  }
50 
51  result+='"';
52 
53  return result;
54 
55  #else
56 
57  // first check if quoting is needed at all
58 
59  if(src.find(' ')==std::string::npos &&
60  src.find('"')==std::string::npos &&
61  src.find('*')==std::string::npos &&
62  src.find('$')==std::string::npos &&
63  src.find('\\')==std::string::npos &&
64  src.find('?')==std::string::npos &&
65  src.find('&')==std::string::npos &&
66  src.find('|')==std::string::npos &&
67  src.find('>')==std::string::npos &&
68  src.find('<')==std::string::npos &&
69  src.find('^')==std::string::npos &&
70  src.find('\'')==std::string::npos)
71  {
72  // seems fine -- return as is
73  return src;
74  }
75 
76  std::string result;
77 
78  // the single quotes catch everything but themselves!
79  result+='\'';
80 
81  for(const char ch : src)
82  {
83  if(ch=='\'')
84  result+="'\\''";
85  result+=ch;
86  }
87 
88  result+='\'';
89 
90  return result;
91  #endif
92 }
93 
94 static void error_parse_line(
95  const std::string &line,
96  bool warning_only,
97  messaget &message)
98 {
99  std::string error_msg=line;
100  source_locationt saved_error_location;
101 
102  if(has_prefix(line, "file "))
103  {
104  const char *tptr=line.c_str();
105  int state=0;
106  std::string file, line_no, column, _error_msg, function;
107 
108  tptr+=5;
109 
110  char previous=0;
111 
112  while(*tptr!=0)
113  {
114  if(has_prefix(tptr, " line ") && state != 4)
115  {
116  state=1;
117  tptr+=6;
118  continue;
119  }
120  else if(has_prefix(tptr, " column ") && state != 4)
121  {
122  state=2;
123  tptr+=8;
124  continue;
125  }
126  else if(has_prefix(tptr, " function ") && state != 4)
127  {
128  state=3;
129  tptr+=10;
130  continue;
131  }
132  else if(*tptr==':' && state!=4)
133  {
134  if(tptr[1]==' ' && previous!=':')
135  {
136  state=4;
137  tptr++;
138  while(*tptr==' ') tptr++;
139  continue;
140  }
141  }
142 
143  if(state==0) // file
144  file+=*tptr;
145  else if(state==1) // line number
146  line_no+=*tptr;
147  else if(state==2) // column
148  column+=*tptr;
149  else if(state==3) // function
150  function+=*tptr;
151  else if(state==4) // error message
152  _error_msg+=*tptr;
153 
154  previous=*tptr;
155 
156  tptr++;
157  }
158 
159  if(state==4)
160  {
161  saved_error_location.set_file(file);
162  saved_error_location.set_function(function);
163  saved_error_location.set_line(line_no);
164  saved_error_location.set_column(column);
165  error_msg=_error_msg;
166  }
167  }
168  else if(has_prefix(line, "In file included from "))
169  {
170  }
171  else
172  {
173  const char *tptr=line.c_str();
174  int state=0;
175  std::string file, line_no;
176 
177  while(*tptr!=0)
178  {
179  if(state==0)
180  {
181  if(*tptr==':')
182  state++;
183  else
184  file+=*tptr;
185  }
186  else if(state==1)
187  {
188  if(*tptr==':')
189  state++;
190  else if(isdigit(*tptr))
191  line_no+=*tptr;
192  else
193  state=3;
194  }
195 
196  tptr++;
197  }
198 
199  if(state==2)
200  {
201  saved_error_location.set_file(file);
202  saved_error_location.set_function("");
203  saved_error_location.set_line(line_no);
204  saved_error_location.set_column("");
205  }
206  }
207 
209  warning_only ? message.warning() : message.error();
210  m.source_location=saved_error_location;
211  m << error_msg << messaget::eom;
212 }
213 
214 static void error_parse(
215  std::istream &errors,
216  bool warning_only,
217  messaget &message)
218 {
219  std::string line;
220 
221  while(std::getline(errors, line))
222  error_parse_line(line, warning_only, message);
223 }
224 
227  std::istream &instream,
228  std::ostream &outstream,
229  message_handlert &message_handler)
230 {
231  temporary_filet tmp_file("tmp.stdin", ".c");
232 
233  std::ofstream tmp(tmp_file());
234 
235  if(!tmp)
236  {
237  messaget message(message_handler);
238  message.error() << "failed to open temporary file" << messaget::eom;
239  return true; // error
240  }
241 
242  tmp << instream.rdbuf(); // copy
243 
244  tmp.close(); // flush
245 
246  bool result=c_preprocess(tmp_file(), outstream, message_handler);
247 
248  return result;
249 }
250 
252 static bool is_dot_i_file(const std::string &path)
253 {
254  return has_suffix(path, ".i") || has_suffix(path, ".ii");
255 }
256 
259  const std::string &, std::ostream &, message_handlert &);
260 bool c_preprocess_arm(
261  const std::string &, std::ostream &, message_handlert &);
263  const std::string &,
264  std::ostream &,
267 bool c_preprocess_none(
268  const std::string &, std::ostream &, message_handlert &);
270  const std::string &, std::ostream &, message_handlert &);
271 
273  const std::string &path,
274  std::ostream &outstream,
275  message_handlert &message_handler)
276 {
277  switch(config.ansi_c.preprocessor)
278  {
280  return c_preprocess_codewarrior(path, outstream, message_handler);
281 
283  return
285  path, outstream, message_handler, config.ansi_c.preprocessor);
286 
288  return
290  path, outstream, message_handler, config.ansi_c.preprocessor);
291 
293  return c_preprocess_visual_studio(path, outstream, message_handler);
294 
296  return c_preprocess_arm(path, outstream, message_handler);
297 
299  return c_preprocess_none(path, outstream, message_handler);
300  }
301 
302  // not reached
303  return true;
304 }
305 
308  const std::string &file,
309  std::ostream &outstream,
310  message_handlert &message_handler)
311 {
312  // check extension
313  if(is_dot_i_file(file))
314  return c_preprocess_none(file, outstream, message_handler);
315 
316  messaget message(message_handler);
317 
318  // use Visual Studio's CL
319 
320  temporary_filet stderr_file("tmp.stderr", "");
321  temporary_filet command_file_name("tmp.cl-cmd", "");
322 
323  {
324  std::ofstream command_file(command_file_name());
325 
326  // This marks the command file as UTF-8, which Visual Studio
327  // understands.
328  command_file << char(0xef) << char(0xbb) << char(0xbf);
329 
330  command_file << "/nologo" << '\n';
331  command_file << "/E" << '\n';
332 
333  // This option will make CL produce utf-8 output, as
334  // opposed to 8-bit with some code page.
335  // It only works on Visual Studio 2015 or newer.
336  command_file << "/source-charset:utf-8" << '\n';
337 
338  command_file << "/D__CPROVER__" << "\n";
339  command_file << "/D__WORDSIZE=" << config.ansi_c.pointer_width << "\n";
340 
342  {
343  command_file << "\"/D__PTRDIFF_TYPE__=long long int\"" << "\n";
344  // yes, both _WIN32 and _WIN64 get defined
345  command_file << "/D_WIN64" << "\n";
346  }
347  else if(config.ansi_c.int_width == 16 && config.ansi_c.pointer_width == 32)
348  {
349  // 16-bit LP32 is an artificial architecture we simulate when using --16
352  "Pointer difference expected to be long int typed");
353  command_file << "/D__PTRDIFF_TYPE__=long" << '\n';
354  }
355  else
356  {
359  "Pointer difference expected to be int typed");
360  command_file << "/D__PTRDIFF_TYPE__=int" << "\n";
361  }
362 
364  command_file << "/J" << "\n"; // This causes _CHAR_UNSIGNED to be defined
365 
366  for(const auto &define : config.ansi_c.defines)
367  command_file << "/D" << shell_quote(define) << "\n";
368 
369  for(const auto &include_path : config.ansi_c.include_paths)
370  command_file << "/I" << shell_quote(include_path) << "\n";
371 
372  for(const auto &include_file : config.ansi_c.include_files)
373  command_file << "/FI" << shell_quote(include_file) << "\n";
374 
375  // Finally, the file to be preprocessed
376  // (this is already in UTF-8).
377  command_file << shell_quote(file) << "\n";
378  }
379 
380  temporary_filet tmpi("tmp.cl", "");
381 
382  std::string command = "CL @\"" + command_file_name() + "\"";
383  command += " 2> \"" + stderr_file() + "\"";
384 
385  // _popen isn't very reliable on WIN32
386  // that's why we use run()
387  int result =
388  run("cl", {"cl", "@" + command_file_name()}, "", outstream, stderr_file());
389 
390  // errors/warnings
391  std::ifstream stderr_stream(stderr_file());
392  error_parse(stderr_stream, result==0, message);
393 
394  if(result!=0)
395  {
396  message.error() << "CL Preprocessing failed" << messaget::eom;
397  return true;
398  }
399 
400  return false;
401 }
402 
405  std::istream &instream,
406  std::ostream &outstream)
407 {
408  // CodeWarrior prepends some header to the file,
409  // marked with '#' signs.
410  // We skip over it.
411  //
412  // CodeWarrior has an ugly way of marking lines, e.g.:
413  //
414  // /* #line 1 "__ppc_eabi_init.cpp" /* stack depth 0 */
415  //
416  // We remove the initial '/* ' prefix
417 
418  std::string line;
419 
420  while(instream)
421  {
422  std::getline(instream, line);
423 
424  if(line.size()>=2 &&
425  line[0]=='#' && (line[1]=='#' || line[1]==' ' || line[1]=='\t'))
426  {
427  // skip the line!
428  }
429  else if(line.size()>=3 &&
430  line[0]=='/' && line[1]=='*' && line[2]==' ')
431  {
432  outstream << line.c_str()+3 << "\n"; // strip the '/* '
433  }
434  else
435  outstream << line << "\n";
436  }
437 }
438 
441  const std::string &file,
442  std::ostream &outstream,
443  message_handlert &message_handler)
444 {
445  // check extension
446  if(is_dot_i_file(file))
447  return c_preprocess_none(file, outstream, message_handler);
448 
449  // preprocessing
450  messaget message(message_handler);
451 
452  temporary_filet stderr_file("tmp.stderr", "");
453 
454  std::vector<std::string> command = {
455  "mwcceppc", "-E", "-P", "-D__CPROVER__", "-ppopt", "line", "-ppopt full"};
456 
457  for(const auto &define : config.ansi_c.defines)
458  command.push_back(" -D" + define);
459 
460  for(const auto &include_path : config.ansi_c.include_paths)
461  command.push_back(" -I" + include_path);
462 
463  for(const auto &include_file : config.ansi_c.include_files)
464  {
465  command.push_back(" -include");
466  command.push_back(include_file);
467  }
468 
469  for(const auto &opt : config.ansi_c.preprocessor_options)
470  command.push_back(opt);
471 
472  temporary_filet tmpi("tmp.cl", "");
473  command.push_back(file);
474  command.push_back("-o");
475  command.push_back(tmpi());
476 
477  int result = run(command[0], command, "", "", stderr_file());
478 
479  std::ifstream stream_i(tmpi());
480 
481  if(stream_i)
482  {
483  postprocess_codewarrior(stream_i, outstream);
484 
485  stream_i.close();
486  }
487  else
488  {
489  message.error() << "Preprocessing failed (fopen failed)"
490  << messaget::eom;
491  return true;
492  }
493 
494  // errors/warnings
495  std::ifstream stderr_stream(stderr_file());
496  error_parse(stderr_stream, result==0, message);
497 
498  if(result!=0)
499  {
500  message.error() << "Preprocessing failed" << messaget::eom;
501  return true;
502  }
503 
504  return false;
505 }
506 
509  const std::string &file,
510  std::ostream &outstream,
511  message_handlert &message_handler,
512  configt::ansi_ct::preprocessort preprocessor)
513 {
514  // check extension
515  if(is_dot_i_file(file))
516  return c_preprocess_none(file, outstream, message_handler);
517 
518  // preprocessing
519  messaget message(message_handler);
520 
521  temporary_filet stderr_file("tmp.stderr", "");
522 
523  std::vector<std::string> argv;
524 
526  argv.push_back("clang");
527  else
528  argv.push_back("gcc");
529 
530  argv.push_back("-E");
531  argv.push_back("-D__CPROVER__");
532 
533  const irep_idt &arch = config.ansi_c.arch;
534 
535  if(config.ansi_c.pointer_width == 16)
536  {
537  if(arch == "i386" || arch == "x86_64" || arch == "x32")
538  argv.push_back("-m16");
539  else if(has_prefix(id2string(arch), "mips"))
540  argv.push_back("-mips16");
541  }
542  else if(config.ansi_c.pointer_width == 32)
543  {
544  if(arch == "i386" || arch == "x86_64")
545  argv.push_back("-m32");
546  else if(arch == "x32")
547  argv.push_back("-mx32");
548  else if(has_prefix(id2string(arch), "mips"))
549  argv.push_back("-mabi=32");
550  else if(arch == "powerpc" || arch == "ppc64" || arch == "ppc64le")
551  argv.push_back("-m32");
552  else if(arch == "s390" || arch == "s390x")
553  argv.push_back("-m31"); // yes, 31, not 32!
554  else if(arch == "sparc" || arch == "sparc64")
555  argv.push_back("-m32");
556  }
557  else if(config.ansi_c.pointer_width == 64)
558  {
559  if(arch == "i386" || arch == "x86_64" || arch == "x32")
560  argv.push_back("-m64");
561  else if(has_prefix(id2string(arch), "mips"))
562  argv.push_back("-mabi=64");
563  else if(arch == "powerpc" || arch == "ppc64" || arch == "ppc64le")
564  argv.push_back("-m64");
565  else if(arch == "s390" || arch == "s390x")
566  argv.push_back("-m64");
567  else if(arch == "sparc" || arch == "sparc64")
568  argv.push_back("-m64");
569  }
570 
571  // The width of wchar_t depends on the OS!
573  argv.push_back("-fshort-wchar");
574 
576  argv.push_back("-funsigned-char");
577 
579  argv.push_back("-nostdinc");
580 
581  // Set the standard
582  if(has_suffix(file, ".cpp") || has_suffix(file, ".CPP") ||
583  #ifndef _WIN32
584  has_suffix(file, ".C") ||
585  #endif
586  has_suffix(file, ".c++") || has_suffix(file, ".C++") ||
587  has_suffix(file, ".cp") || has_suffix(file, ".CP"))
588  {
589  switch(config.cpp.cpp_standard)
590  {
592  argv.push_back("-std=gnu++98");
593  break;
594 
596  argv.push_back("-std=gnu++03");
597  break;
598 
600  argv.push_back("-std=gnu++11");
601  break;
602 
604  argv.push_back("-std=gnu++14");
605  break;
606  }
607  }
608  else
609  {
610  switch(config.ansi_c.c_standard)
611  {
613  argv.push_back("-std=gnu++89");
614  break;
615 
617  argv.push_back("-std=gnu99");
618  break;
619 
621  argv.push_back("-std=gnu11");
622  break;
623  }
624  }
625 
626  for(const auto &define : config.ansi_c.defines)
627  argv.push_back("-D" + define);
628 
629  for(const auto &include_path : config.ansi_c.include_paths)
630  argv.push_back("-I" + include_path);
631 
632  for(const auto &include_file : config.ansi_c.include_files)
633  {
634  argv.push_back("-include");
635  argv.push_back(include_file);
636  }
637 
638  for(const auto &opt : config.ansi_c.preprocessor_options)
639  argv.push_back(opt);
640 
641  int result;
642 
643  #if 0
644  // the following forces the mode
645  switch(config.ansi_c.mode)
646  {
647  case configt::ansi_ct::flavourt::GCC_C: command+=" -x c"; break;
648  case configt::ansi_ct::flavourt::GCC_CPP: command+=" -x c++"; break;
649  default:
650  {
651  }
652  }
653  #endif
654 
655  // the file that is to be preprocessed
656  argv.push_back(file);
657 
658  // execute clang or gcc
659  result = run(argv[0], argv, "", outstream, stderr_file());
660 
661  // errors/warnings
662  std::ifstream stderr_stream(stderr_file());
663  error_parse(stderr_stream, result==0, message);
664 
665  if(result!=0)
666  {
667  message.error() << "GCC preprocessing failed" << messaget::eom;
668  return true;
669  }
670 
671  return false;
672 }
673 
676  const std::string &file,
677  std::ostream &outstream,
678  message_handlert &message_handler)
679 {
680  // check extension
681  if(is_dot_i_file(file))
682  return c_preprocess_none(file, outstream, message_handler);
683 
684  // preprocessing using armcc
685  messaget message(message_handler);
686 
687  temporary_filet stderr_file("tmp.stderr", "");
688 
689  std::vector<std::string> argv;
690 
691  argv.push_back("armcc");
692  argv.push_back("-E");
693  argv.push_back("-D__CPROVER__");
694 
696  argv.push_back("--bigend");
697  else
698  argv.push_back("--littleend");
699 
701  argv.push_back("--unsigned_chars");
702  else
703  argv.push_back("--signed_chars");
704 
705  // Set the standard
706  switch(config.ansi_c.c_standard)
707  {
709  argv.push_back("--c90");
710  break;
711 
714  argv.push_back("--c99");
715  break;
716  }
717 
718  for(const auto &define : config.ansi_c.defines)
719  argv.push_back("-D" + define);
720 
721  for(const auto &include_path : config.ansi_c.include_paths)
722  argv.push_back("-I" + include_path);
723 
724  // the file that is to be preprocessed
725  argv.push_back(file);
726 
727  int result;
728 
729  // execute armcc
730  result = run(argv[0], argv, "", outstream, stderr_file());
731 
732  // errors/warnings
733  std::ifstream stderr_stream(stderr_file());
734  error_parse(stderr_stream, result==0, message);
735 
736  if(result!=0)
737  {
738  message.error() << "ARMCC preprocessing failed" << messaget::eom;
739  return true;
740  }
741 
742  return false;
743 }
744 
747  const std::string &file,
748  std::ostream &outstream,
749  message_handlert &message_handler)
750 {
751  #ifdef _MSC_VER
752  std::ifstream infile(widen(file));
753  #else
754  std::ifstream infile(file);
755  #endif
756 
757  if(!infile)
758  {
759  messaget message(message_handler);
760  message.error() << "failed to open `" << file << "'" << messaget::eom;
761  return true;
762  }
763 
765  {
766  // special treatment for "/* #line"
767  postprocess_codewarrior(infile, outstream);
768  }
769  else
770  {
771  char ch;
772 
773  while(infile.read(&ch, 1))
774  outstream << ch;
775  }
776 
777  return false;
778 }
779 
781 const char c_test_program[]=
782  "#include <stdlib.h>\n"
783  "\n"
784  "int main() { }\n";
785 
786 bool test_c_preprocessor(message_handlert &message_handler)
787 {
788  std::ostringstream out;
789  std::istringstream in(c_test_program);
790 
791  return c_preprocess(in, out, message_handler);
792 }
bool c_preprocess_visual_studio(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
struct configt::ansi_ct ansi_c
void set_function(const irep_idt &function)
bool c_preprocess_codewarrior(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
const std::string & id2string(const irep_idt &d)
Definition: irep.h:44
std::wstring widen(const char *s)
Definition: unicode.cpp:52
static std::string shell_quote(const std::string &src)
quote a string for bash and CMD
endiannesst endianness
Definition: config.h:76
std::list< std::string > defines
Definition: config.h:121
bool c_preprocess(std::istream &instream, std::ostream &outstream, message_handlert &message_handler)
ANSI-C preprocessing.
configt config
Definition: config.cpp:24
signedbv_typet pointer_diff_type()
Definition: c_types.cpp:228
mstreamt & warning() const
Definition: message.h:391
preprocessort preprocessor
Definition: config.h:119
std::list< std::string > include_files
Definition: config.h:125
bool c_preprocess_gcc_clang(const std::string &, std::ostream &, message_handlert &, configt::ansi_ct::preprocessort)
ANSI-C preprocessing.
flavourt mode
Definition: config.h:115
static bool is_dot_i_file(const std::string &path)
ANSI-C preprocessing.
source_locationt source_location
Definition: message.h:236
void set_file(const irep_idt &file)
bool c_preprocess_arm(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
mstreamt & error() const
Definition: message.h:386
void set_line(const irep_idt &line)
irep_idt arch
Definition: config.h:84
enum configt::cppt::cpp_standardt cpp_standard
void set_column(const irep_idt &column)
signedbv_typet signed_long_int_type()
Definition: c_types.cpp:80
int run(const std::string &what, const std::vector< std::string > &argv)
Definition: run.cpp:47
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
std::size_t int_width
Definition: config.h:30
Class that provides messages with a built-in verbosity &#39;level&#39;.
Definition: message.h:144
dstringt has one field, an unsigned integer no which is an index into a static table of strings...
Definition: dstring.h:35
std::size_t pointer_width
Definition: config.h:36
bool char_is_unsigned
Definition: config.h:43
void postprocess_codewarrior(std::istream &instream, std::ostream &outstream)
post-processing specifically for CodeWarrior
enum configt::ansi_ct::c_standardt c_standard
std::size_t wchar_t_width
Definition: config.h:40
static eomt eom
Definition: message.h:284
bool test_c_preprocessor(message_handlert &message_handler)
struct configt::cppt cpp
const char c_test_program[]
tests ANSI-C preprocessing
static void error_parse(std::istream &errors, bool warning_only, messaget &message)
static void error_parse_line(const std::string &line, bool warning_only, messaget &message)
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:15
signedbv_typet signed_int_type()
Definition: c_types.cpp:30
#define DATA_INVARIANT(CONDITION, REASON)
This condition should be used to document that assumptions that are made on goto_functions, goto_programs, exprts, etc.
Definition: invariant.h:485
signedbv_typet signed_long_long_int_type()
Definition: c_types.cpp:87
std::list< std::string > preprocessor_options
Definition: config.h:123
std::size_t short_int_width
Definition: config.h:34
bool c_preprocess_none(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
std::list< std::string > include_paths
Definition: config.h:124
Definition: kdev_t.h:19