cprover
c_preprocess.cpp
Go to the documentation of this file.
1 /*******************************************************************\
2 
3 Module:
4 
5 Author: Daniel Kroening, kroening@kroening.com
6 
7 \*******************************************************************/
8 
9 #include "c_preprocess.h"
10 
11 #include <util/c_types.h>
12 #include <util/config.h>
13 #include <util/suffix.h>
14 #include <util/tempfile.h>
15 #include <util/unicode.h>
16 
17 #include <fstream>
18 
20 static std::string shell_quote(const std::string &src)
21 {
22  #ifdef _WIN32
23  // first check if quoting is needed at all
24 
25  if(src.find(' ')==std::string::npos &&
26  src.find('"')==std::string::npos &&
27  src.find('&')==std::string::npos &&
28  src.find('|')==std::string::npos &&
29  src.find('(')==std::string::npos &&
30  src.find(')')==std::string::npos &&
31  src.find('<')==std::string::npos &&
32  src.find('>')==std::string::npos &&
33  src.find('^')==std::string::npos)
34  {
35  // seems fine -- return as is
36  return src;
37  }
38 
39  std::string result;
40 
41  result+='"';
42 
43  for(const char ch : src)
44  {
45  if(ch=='"')
46  result+='"'; // quotes are doubled
47  result+=ch;
48  }
49 
50  result+='"';
51 
52  return result;
53 
54  #else
55 
56  // first check if quoting is needed at all
57 
58  if(src.find(' ')==std::string::npos &&
59  src.find('"')==std::string::npos &&
60  src.find('*')==std::string::npos &&
61  src.find('$')==std::string::npos &&
62  src.find('\\')==std::string::npos &&
63  src.find('?')==std::string::npos &&
64  src.find('&')==std::string::npos &&
65  src.find('|')==std::string::npos &&
66  src.find('>')==std::string::npos &&
67  src.find('<')==std::string::npos &&
68  src.find('^')==std::string::npos &&
69  src.find('\'')==std::string::npos)
70  {
71  // seems fine -- return as is
72  return src;
73  }
74 
75  std::string result;
76 
77  // the single quotes catch everything but themselves!
78  result+='\'';
79 
80  for(const char ch : src)
81  {
82  if(ch=='\'')
83  result+="'\\''";
84  result+=ch;
85  }
86 
87  result+='\'';
88 
89  return result;
90  #endif
91 }
92 
93 static void error_parse_line(
94  const std::string &line,
95  bool warning_only,
96  messaget &message)
97 {
98  std::string error_msg=line;
99  source_locationt saved_error_location;
100 
101  if(has_prefix(line, "file "))
102  {
103  const char *tptr=line.c_str();
104  int state=0;
105  std::string file, line_no, column, _error_msg, function;
106 
107  tptr+=5;
108 
109  char previous=0;
110 
111  while(*tptr!=0)
112  {
113  if(has_prefix(tptr, " line ") && state != 4)
114  {
115  state=1;
116  tptr+=6;
117  continue;
118  }
119  else if(has_prefix(tptr, " column ") && state != 4)
120  {
121  state=2;
122  tptr+=8;
123  continue;
124  }
125  else if(has_prefix(tptr, " function ") && state != 4)
126  {
127  state=3;
128  tptr+=10;
129  continue;
130  }
131  else if(*tptr==':' && state!=4)
132  {
133  if(tptr[1]==' ' && previous!=':')
134  {
135  state=4;
136  tptr++;
137  while(*tptr==' ') tptr++;
138  continue;
139  }
140  }
141 
142  if(state==0) // file
143  file+=*tptr;
144  else if(state==1) // line number
145  line_no+=*tptr;
146  else if(state==2) // column
147  column+=*tptr;
148  else if(state==3) // function
149  function+=*tptr;
150  else if(state==4) // error message
151  _error_msg+=*tptr;
152 
153  previous=*tptr;
154 
155  tptr++;
156  }
157 
158  if(state==4)
159  {
160  saved_error_location.set_file(file);
161  saved_error_location.set_function(function);
162  saved_error_location.set_line(line_no);
163  saved_error_location.set_column(column);
164  error_msg=_error_msg;
165  }
166  }
167  else if(has_prefix(line, "In file included from "))
168  {
169  }
170  else
171  {
172  const char *tptr=line.c_str();
173  int state=0;
174  std::string file, line_no;
175 
176  while(*tptr!=0)
177  {
178  if(state==0)
179  {
180  if(*tptr==':')
181  state++;
182  else
183  file+=*tptr;
184  }
185  else if(state==1)
186  {
187  if(*tptr==':')
188  state++;
189  else if(isdigit(*tptr))
190  line_no+=*tptr;
191  else
192  state=3;
193  }
194 
195  tptr++;
196  }
197 
198  if(state==2)
199  {
200  saved_error_location.set_file(file);
201  saved_error_location.set_function("");
202  saved_error_location.set_line(line_no);
203  saved_error_location.set_column("");
204  }
205  }
206 
208  warning_only ? message.warning() : message.error();
209  m.source_location=saved_error_location;
210  m << error_msg << messaget::eom;
211 }
212 
213 static void error_parse(
214  std::istream &errors,
215  bool warning_only,
216  messaget &message)
217 {
218  std::string line;
219 
220  while(std::getline(errors, line))
221  error_parse_line(line, warning_only, message);
222 }
223 
226  std::istream &instream,
227  std::ostream &outstream,
229 {
230  temporary_filet tmp_file("tmp.stdin", ".c");
231 
232  std::ofstream tmp(tmp_file());
233 
234  if(!tmp)
235  {
236  messaget message(message_handler);
237  message.error() << "failed to open temporary file" << messaget::eom;
238  return true; // error
239  }
240 
241  tmp << instream.rdbuf(); // copy
242 
243  tmp.close(); // flush
244 
245  bool result=c_preprocess(tmp_file(), outstream, message_handler);
246 
247  return result;
248 }
249 
251 static bool is_dot_i_file(const std::string &path)
252 {
253  return has_suffix(path, ".i") || has_suffix(path, ".ii");
254 }
255 
258  const std::string &, std::ostream &, message_handlert &);
259 bool c_preprocess_arm(
260  const std::string &, std::ostream &, message_handlert &);
262  const std::string &,
263  std::ostream &,
266 bool c_preprocess_none(
267  const std::string &, std::ostream &, message_handlert &);
269  const std::string &, std::ostream &, message_handlert &);
270 
272  const std::string &path,
273  std::ostream &outstream,
275 {
276  switch(config.ansi_c.preprocessor)
277  {
279  return c_preprocess_codewarrior(path, outstream, message_handler);
280 
282  return
284  path, outstream, message_handler, config.ansi_c.preprocessor);
285 
287  return
289  path, outstream, message_handler, config.ansi_c.preprocessor);
290 
292  return c_preprocess_visual_studio(path, outstream, message_handler);
293 
295  return c_preprocess_arm(path, outstream, message_handler);
296 
298  return c_preprocess_none(path, outstream, message_handler);
299  }
300 
301  // not reached
302  return true;
303 }
304 
307  const std::string &file,
308  std::ostream &outstream,
310 {
311  // check extension
312  if(is_dot_i_file(file))
313  return c_preprocess_none(file, outstream, message_handler);
314 
315  messaget message(message_handler);
316 
317  // use Visual Studio's CL
318 
319  temporary_filet stderr_file("tmp.stderr", "");
320  temporary_filet command_file_name("tmp.cl-cmd", "");
321 
322  {
323  std::ofstream command_file(command_file_name());
324 
325  // This marks the command file as UTF-8, which Visual Studio
326  // understands.
327  command_file << char(0xef) << char(0xbb) << char(0xbf);
328 
329  command_file << "/nologo" << '\n';
330  command_file << "/E" << '\n';
331 
332  // This option will make CL produce utf-8 output, as
333  // opposed to 8-bit with some code page.
334  // It only works on Visual Studio 2015 or newer.
335  command_file << "/source-charset:utf-8" << '\n';
336 
337  command_file << "/D__CPROVER__" << "\n";
338  command_file << "/D__WORDSIZE=" << config.ansi_c.pointer_width << "\n";
339 
341  {
342  command_file << "\"/D__PTRDIFF_TYPE__=long long int\"" << "\n";
343  // yes, both _WIN32 and _WIN64 get defined
344  command_file << "/D_WIN64" << "\n";
345  }
346  else if(config.ansi_c.int_width == 16 && config.ansi_c.pointer_width == 32)
347  {
348  // 16-bit LP32 is an artificial architecture we simulate when using --16
351  "Pointer difference expected to be long int typed");
352  command_file << "/D__PTRDIFF_TYPE__=long" << '\n';
353  }
354  else
355  {
358  "Pointer difference expected to be int typed");
359  command_file << "/D__PTRDIFF_TYPE__=int" << "\n";
360  }
361 
363  command_file << "/J" << "\n"; // This causes _CHAR_UNSIGNED to be defined
364 
365  for(const auto &define : config.ansi_c.defines)
366  command_file << "/D" << shell_quote(define) << "\n";
367 
368  for(const auto &include_path : config.ansi_c.include_paths)
369  command_file << "/I" << shell_quote(include_path) << "\n";
370 
371  for(const auto &include_file : config.ansi_c.include_files)
372  command_file << "/FI" << shell_quote(include_file) << "\n";
373 
374  // Finally, the file to be preprocessed
375  // (this is already in UTF-8).
376  command_file << shell_quote(file) << "\n";
377  }
378 
379  temporary_filet tmpi("tmp.cl", "");
380 
381  std::string command = "CL @\"" + command_file_name() + "\"";
382  command += " > \"" + tmpi() + "\"";
383  command += " 2> \"" + stderr_file() + "\"";
384 
385  // _popen isn't very reliable on WIN32
386  // that's why we use system()
387  int result=system(command.c_str());
388 
389  std::ifstream instream(tmpi());
390 
391  if(!instream)
392  {
393  message.error() << "CL Preprocessing failed (open failed)"
394  << messaget::eom;
395  return true;
396  }
397 
398  outstream << instream.rdbuf(); // copy
399 
400  instream.close();
401 
402  // errors/warnings
403  std::ifstream stderr_stream(stderr_file());
404  error_parse(stderr_stream, result==0, message);
405 
406  if(result!=0)
407  {
408  message.error() << "CL Preprocessing failed" << messaget::eom;
409  return true;
410  }
411 
412  return false;
413 }
414 
417  std::istream &instream,
418  std::ostream &outstream)
419 {
420  // CodeWarrior prepends some header to the file,
421  // marked with '#' signs.
422  // We skip over it.
423  //
424  // CodeWarrior has an ugly way of marking lines, e.g.:
425  //
426  // /* #line 1 "__ppc_eabi_init.cpp" /* stack depth 0 */
427  //
428  // We remove the initial '/* ' prefix
429 
430  std::string line;
431 
432  while(instream)
433  {
434  std::getline(instream, line);
435 
436  if(line.size()>=2 &&
437  line[0]=='#' && (line[1]=='#' || line[1]==' ' || line[1]=='\t'))
438  {
439  // skip the line!
440  }
441  else if(line.size()>=3 &&
442  line[0]=='/' && line[1]=='*' && line[2]==' ')
443  {
444  outstream << line.c_str()+3 << "\n"; // strip the '/* '
445  }
446  else
447  outstream << line << "\n";
448  }
449 }
450 
453  const std::string &file,
454  std::ostream &outstream,
456 {
457  // check extension
458  if(is_dot_i_file(file))
459  return c_preprocess_none(file, outstream, message_handler);
460 
461  // preprocessing
462  messaget message(message_handler);
463 
464  temporary_filet stderr_file("tmp.stderr", "");
465 
466  std::string command;
467 
468  command="mwcceppc -E -P -D__CPROVER__ -ppopt line -ppopt full";
469 
470  for(const auto &define : config.ansi_c.defines)
471  command+=" -D"+shell_quote(define);
472 
473  for(const auto &include_path : config.ansi_c.include_paths)
474  command+=" -I"+shell_quote(include_path);
475 
476  for(const auto &include_file : config.ansi_c.include_files)
477  command+=" -include "+shell_quote(include_file);
478 
479  for(const auto &opt : config.ansi_c.preprocessor_options)
480  command+=" "+opt;
481 
482  int result;
483 
484  temporary_filet tmpi("tmp.cl", "");
485  command+=" \""+file+"\"";
486  command += " -o \"" + tmpi() + "\"";
487  command += " 2> \"" + stderr_file() + "\"";
488 
489  result=system(command.c_str());
490 
491  std::ifstream stream_i(tmpi());
492 
493  if(stream_i)
494  {
495  postprocess_codewarrior(stream_i, outstream);
496 
497  stream_i.close();
498  }
499  else
500  {
501  message.error() << "Preprocessing failed (fopen failed)"
502  << messaget::eom;
503  return true;
504  }
505 
506  // errors/warnings
507  std::ifstream stderr_stream(stderr_file());
508  error_parse(stderr_stream, result==0, message);
509 
510  if(result!=0)
511  {
512  message.error() << "Preprocessing failed" << messaget::eom;
513  return true;
514  }
515 
516  return false;
517 }
518 
521  const std::string &file,
522  std::ostream &outstream,
524  configt::ansi_ct::preprocessort preprocessor)
525 {
526  // check extension
527  if(is_dot_i_file(file))
528  return c_preprocess_none(file, outstream, message_handler);
529 
530  // preprocessing
531  messaget message(message_handler);
532 
533  temporary_filet stderr_file("tmp.stderr", "");
534 
535  std::string command;
536 
538  command="clang";
539  else
540  command="gcc";
541 
542  command += " -E -D__CPROVER__";
543 
544  if(config.ansi_c.pointer_width == 16)
545  command += " -m16";
546  else if(config.ansi_c.pointer_width == 32)
547  command += " -m32";
548  else if(config.ansi_c.pointer_width == 64)
549  command += " -m64";
550 
551  // The width of wchar_t depends on the OS!
553  command += " -fshort-wchar";
554 
556  command += " -funsigned-char";
557 
559  command += " -nostdinc";
560 
561  // Set the standard
562  if(has_suffix(file, ".cpp") || has_suffix(file, ".CPP") ||
563 #ifndef _WIN32
564  has_suffix(file, ".C") ||
565 #endif
566  has_suffix(file, ".c++") || has_suffix(file, ".C++") ||
567  has_suffix(file, ".cp") || has_suffix(file, ".CP"))
568  {
569  switch(config.cpp.cpp_standard)
570  {
572  command += " -std=gnu++98";
573  break;
574 
576  command += " -std=gnu++03";
577  break;
578 
580  command += " -std=gnu++11";
581  break;
582 
584  command += " -std=gnu++14";
585  break;
586  }
587  }
588  else
589  {
590  switch(config.ansi_c.c_standard)
591  {
593  command += " -std=gnu++89";
594  break;
595 
597  command += " -std=gnu99";
598  break;
599 
601  command += " -std=gnu11";
602  break;
603  }
604  }
605 
606  for(const auto &define : config.ansi_c.defines)
607  command+=" -D"+shell_quote(define);
608 
609  for(const auto &include_path : config.ansi_c.include_paths)
610  command+=" -I"+shell_quote(include_path);
611 
612  for(const auto &include_file : config.ansi_c.include_files)
613  command+=" -include "+shell_quote(include_file);
614 
615  for(const auto &opt : config.ansi_c.preprocessor_options)
616  command+=" "+opt;
617 
618  int result;
619 
620  #if 0
621  // the following forces the mode
622  switch(config.ansi_c.mode)
623  {
624  case configt::ansi_ct::flavourt::GCC_C: command+=" -x c"; break;
625  case configt::ansi_ct::flavourt::GCC_CPP: command+=" -x c++"; break;
626  default:
627  {
628  }
629  }
630  #endif
631 
632  #ifdef _WIN32
633  temporary_filet tmpi("tmp.gcc", "");
634  command+=" \""+file+"\"";
635  command += " -o \"" + tmpi() + "\"";
636  command += " 2> \"" + stderr_file() + "\"";
637 
638  // _popen isn't very reliable on WIN32
639  // that's why we use system() and a temporary file
640  result=system(command.c_str());
641 
642  std::ifstream instream(tmpi());
643 
644  // errors/warnings
645  std::ifstream stderr_stream(stderr_file());
646  error_parse(stderr_stream, result==0, message);
647 
648  if(instream)
649  {
650  outstream << instream.rdbuf();
651  instream.close();
652  }
653  else
654  {
655  message.error() << "GCC preprocessing failed (open failed)"
656  << messaget::eom;
657  result=1;
658  }
659  #else
660  command+=" \""+file+"\"";
661  command += " 2> \"" + stderr_file() + "\"";
662 
663  FILE *stream=popen(command.c_str(), "r");
664 
665  if(stream!=nullptr)
666  {
667  int ch;
668  while((ch=fgetc(stream))!=EOF)
669  outstream << (unsigned char)ch;
670 
671  result=pclose(stream);
672  }
673  else
674  {
675  message.error() << "GCC preprocessing failed (popen failed)"
676  << messaget::eom;
677  result=1;
678  }
679 
680  // errors/warnings
681  std::ifstream stderr_stream(stderr_file());
682  error_parse(stderr_stream, result==0, message);
683 
684  #endif
685 
686  if(result!=0)
687  {
688  message.error() << "GCC preprocessing failed" << messaget::eom;
689  return true;
690  }
691 
692  return false;
693 }
694 
697  const std::string &file,
698  std::ostream &outstream,
700 {
701  // check extension
702  if(is_dot_i_file(file))
703  return c_preprocess_none(file, outstream, message_handler);
704 
705  // preprocessing using armcc
706  messaget message(message_handler);
707 
708  temporary_filet stderr_file("tmp.stderr", "");
709 
710  std::string command;
711 
712  command="armcc -E -D__CPROVER__";
713 
715  command += " --bigend";
716  else
717  command += " --littleend";
718 
720  command += " --unsigned_chars";
721  else
722  command += " --signed_chars";
723 
724  // Set the standard
725  switch(config.ansi_c.c_standard)
726  {
728  command += " --c90";
729  break;
730 
733  command += " --c99";
734  break;
735  }
736 
737  for(const auto &define : config.ansi_c.defines)
738  command+=" "+shell_quote("-D"+define);
739 
740  for(const auto &include_path : config.ansi_c.include_paths)
741  command+=" "+shell_quote("-I"+include_path);
742 
743  int result;
744 
745  #ifdef _WIN32
746  temporary_filet tmpi("tmp.cl", "");
747  command+=" \""+file+"\"";
748  command += " > \"" + tmpi() + "\"";
749  command += " 2> \"" + stderr_file() + "\"";
750 
751  // _popen isn't very reliable on WIN32
752  // that's why we use system() and a temporary file
753  result=system(command.c_str());
754 
755  std::ifstream instream(tmpi());
756 
757  if(!instream)
758  {
759  outstream << instream.rdbuf(); // copy
760  instream.close();
761  }
762  else
763  {
764  message.error() << "ARMCC preprocessing failed (fopen failed)"
765  << messaget::eom;
766  return true;
767  }
768  #else
769  command+=" \""+file+"\"";
770  command += " 2> \"" + stderr_file() + "\"";
771 
772  FILE *stream=popen(command.c_str(), "r");
773 
774  if(stream!=nullptr)
775  {
776  int ch;
777  while((ch=fgetc(stream))!=EOF)
778  outstream << (unsigned char)ch;
779 
780  result=pclose(stream);
781  }
782  else
783  {
784  message.error() << "ARMCC preprocessing failed (popen failed)"
785  << messaget::eom;
786  return true;
787  }
788  #endif
789 
790  // errors/warnings
791  std::ifstream stderr_stream(stderr_file());
792  error_parse(stderr_stream, result==0, message);
793 
794  if(result!=0)
795  {
796  message.error() << "ARMCC preprocessing failed" << messaget::eom;
797  return true;
798  }
799 
800  return false;
801 }
802 
805  const std::string &file,
806  std::ostream &outstream,
808 {
809  #ifdef _MSC_VER
810  std::ifstream infile(widen(file));
811  #else
812  std::ifstream infile(file);
813  #endif
814 
815  if(!infile)
816  {
817  messaget message(message_handler);
818  message.error() << "failed to open `" << file << "'" << messaget::eom;
819  return true;
820  }
821 
823  {
824  // special treatment for "/* #line"
825  postprocess_codewarrior(infile, outstream);
826  }
827  else
828  {
829  char ch;
830 
831  while(infile.read(&ch, 1))
832  outstream << ch;
833  }
834 
835  return false;
836 }
837 
839 const char c_test_program[]=
840  "#include <stdlib.h>\n"
841  "\n"
842  "int main() { }\n";
843 
845 {
846  std::ostringstream out;
847  std::istringstream in(c_test_program);
848 
849  return c_preprocess(in, out, message_handler);
850 }
bool c_preprocess_visual_studio(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
struct configt::ansi_ct ansi_c
void set_function(const irep_idt &function)
bool c_preprocess_codewarrior(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
std::wstring widen(const char *s)
Definition: unicode.cpp:56
static std::string shell_quote(const std::string &src)
quote a string for bash and CMD
endiannesst endianness
Definition: config.h:76
std::list< std::string > defines
Definition: config.h:112
bool c_preprocess(std::istream &instream, std::ostream &outstream, message_handlert &message_handler)
ANSI-C preprocessing.
static mstreamt & eom(mstreamt &m)
Definition: message.h:272
configt config
Definition: config.cpp:23
signedbv_typet pointer_diff_type()
Definition: c_types.cpp:228
mstreamt & warning() const
Definition: message.h:307
preprocessort preprocessor
Definition: config.h:110
std::list< std::string > include_files
Definition: config.h:116
bool c_preprocess_gcc_clang(const std::string &, std::ostream &, message_handlert &, configt::ansi_ct::preprocessort)
ANSI-C preprocessing.
flavourt mode
Definition: config.h:106
static bool is_dot_i_file(const std::string &path)
ANSI-C preprocessing.
source_locationt source_location
Definition: message.h:214
void set_file(const irep_idt &file)
bool c_preprocess_arm(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
mstreamt & error() const
Definition: message.h:302
void set_line(const irep_idt &line)
enum configt::cppt::cpp_standardt cpp_standard
void set_column(const irep_idt &column)
signedbv_typet signed_long_int_type()
Definition: c_types.cpp:80
bool has_prefix(const std::string &s, const std::string &prefix)
Definition: converter.cpp:13
std::size_t int_width
Definition: config.h:30
std::size_t pointer_width
Definition: config.h:36
bool char_is_unsigned
Definition: config.h:43
void postprocess_codewarrior(std::istream &instream, std::ostream &outstream)
post-processing specifically for CodeWarrior
enum configt::ansi_ct::c_standardt c_standard
std::size_t wchar_t_width
Definition: config.h:40
bool test_c_preprocessor(message_handlert &message_handler)
struct configt::cppt cpp
const char c_test_program[]
tests ANSI-C preprocessing
static void error_parse(std::istream &errors, bool warning_only, messaget &message)
static void error_parse_line(const std::string &line, bool warning_only, messaget &message)
bool has_suffix(const std::string &s, const std::string &suffix)
Definition: suffix.h:15
goto_programt coverage_criteriont message_handlert & message_handler
Definition: cover.cpp:66
signedbv_typet signed_int_type()
Definition: c_types.cpp:30
#define DATA_INVARIANT(CONDITION, REASON)
Definition: invariant.h:257
signedbv_typet signed_long_long_int_type()
Definition: c_types.cpp:87
std::list< std::string > preprocessor_options
Definition: config.h:114
std::size_t short_int_width
Definition: config.h:34
bool c_preprocess_none(const std::string &, std::ostream &, message_handlert &)
ANSI-C preprocessing.
std::list< std::string > include_paths
Definition: config.h:115
Definition: kdev_t.h:19