diff --git a/tcc.h b/tcc.h index 331d6e4..cfe7fb4 100644 --- a/tcc.h +++ b/tcc.h @@ -875,6 +875,7 @@ struct TCCState { #define TOK_SHR 0xcd /* unsigned shift right */ #define TOK_NOSUBST 0xcf /* means following token has already been pp'd */ #define TOK_GNUCOMMA 0xd0 /* ,## preprocessing token */ +#define TOK_AUTOSP 0xd1 /* space that is not in the input */ #define TOK_SHL 0x01 /* shift left */ #define TOK_SAR 0x02 /* signed shift right */ @@ -997,12 +998,18 @@ enum tcc_token { #define TOK_UIDENT TOK_DEFINE -/* space exlcuding newline */ +/* space excluding newline */ static inline int is_space(int ch) { return ch == ' ' || ch == '\t' || ch == '\v' || ch == '\f' || ch == '\r'; } +/* space including newline */ +static inline int is_whitespace(int ch) +{ + return ch == ' ' || ch == '\t' || ch == '\v' || ch == '\f' || ch == '\r' || ch == '\n' || ch == TOK_AUTOSP; +} + static inline int isid(int c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; @@ -1146,8 +1153,9 @@ ST_DATA TokenSym **table_ident; token. line feed is also returned at eof */ #define PARSE_FLAG_ASM_FILE 0x0008 /* we processing an asm file: '#' can be used for line comment, etc. */ -#define PARSE_FLAG_SPACES 0x0010 /* next() returns space tokens (for -E) */ -#define PARSE_FLAG_ACCEPT_STRAYS 0x0020 /* next() returns '\\' token */ +#define PARSE_FLAG_SPACES 0x0010 /* next() returns space tokens (for -E, # stringification) */ +#define PARSE_FLAG_ACCEPT_STRAYS 0x0020 /* next() returns '\\' token (for -E, # stringification) */ +#define PARSE_FLAG_AUTOSPACE 0x0040 /* next() returns automatic space tokens (for -E) */ ST_FUNC TokenSym *tok_alloc(const char *str, int len); ST_FUNC char *get_tok_str(int v, CValue *cv); diff --git a/tccpp.c b/tccpp.c index 95c3f23..865a1e2 100644 --- a/tccpp.c +++ b/tccpp.c @@ -44,6 +44,7 @@ static int *macro_ptr_allocated; static const int *unget_saved_macro_ptr; static int unget_saved_buffer[TOK_MAX_SIZE + 1]; static int unget_buffer_enabled; +static int unget_buffer_preprocess[TOK_MAX_SIZE + 1]; static TokenSym *hash_ident[TOK_HASH_SIZE]; static char token_buf[STRING_MAX_SIZE + 1]; /* true if isid(c) || isnum(c) */ @@ -95,7 +96,8 @@ static void macro_subst( TokenString *tok_str, Sym **nested_list, const int *macro_str, - struct macro_level **can_read_stream + struct macro_level **can_read_stream, + int *spc ); ST_FUNC void skip(int c) @@ -342,6 +344,11 @@ ST_FUNC char *get_tok_str(int v, CValue *cv) case TOK_GT: v = '>'; goto addv; + case TOK_AUTOSP: + v = ' '; + goto addv; + case TOK_GNUCOMMA: + return strcpy(p, ",##"); case TOK_DOTS: return strcpy(p, "..."); case TOK_A_SHL: @@ -961,6 +968,35 @@ static void tok_str_add2(TokenString *s, int t, CValue *cv) s->len = len; } +static void tok_str_add3(TokenString *s, int t, CValue *cv, int *spc) +{ + if (is_whitespace(t)) { + int len, *str; + + len = s->len; + str = s->str; + + if (*spc == 1) { + int p = str[len-1]; + + if (p == TOK_AUTOSP || + (p == ' ' && t != TOK_AUTOSP)) { + str[len-1] = t; + } else if (p != TOK_LINEFEED || t != TOK_LINEFEED) { + return; + } + } else if (*spc) { + return; + } else { + *spc = 1; + } + } else { + *spc = 0; + } + + tok_str_add2(s, t, cv); +} + /* add the current parse token in token string 's' */ ST_FUNC void tok_str_add_tok(TokenString *s) { @@ -1292,29 +1328,24 @@ ST_FUNC void parse_define(void) t = MACRO_FUNC; } tok_str_new(&str); - spc = 2; + spc = TOK_LINEFEED; /* EOF testing necessary for '-D' handling */ ptok = 0; macro_list_start = 1; saved_parse_flags = parse_flags; parse_flags |= PARSE_FLAG_ACCEPT_STRAYS | PARSE_FLAG_SPACES | PARSE_FLAG_LINEFEED; while (tok != TOK_LINEFEED && tok != TOK_EOF) { - if (macro_list_start && spc == 2 && tok == TOK_TWOSHARPS) + if (spc == TOK_LINEFEED && tok == TOK_TWOSHARPS) tcc_error("'##' invalid at start of macro"); ptok = tok; /* remove spaces around ## and after '#' */ - if (TOK_TWOSHARPS == tok) { - if (1 == spc) - --str.len; - spc = 2; - } else if ('#' == tok) { + if (TOK_TWOSHARPS == tok && spc == 1) + --str.len; + tok_str_add3(&str, tok, &tokc, &spc); + /* remove spaces around ## and after '#' */ + if (TOK_TWOSHARPS == tok || '#' == tok) { spc = 2; - } else if (check_space(tok, &spc)) { - goto skip; } - tok_str_add2(&str, tok, &tokc); - macro_list_start = 0; - skip: next_nomacro_spc(); } parse_flags = saved_parse_flags; @@ -1893,6 +1924,7 @@ static void parse_escape_string(CString *outstr, const uint8_t *buf, int is_long break; default: invalid_escape: + cstr_ccat(outstr, '\\'); if (c >= '!' && c <= '~') tcc_warning("unknown escape sequence: \'\\%c\'", c); else @@ -2712,6 +2744,12 @@ static void next_nomacro_spc(void) } } } else { + if (unget_buffer_preprocess[0]) { + const int *p = unget_buffer_preprocess; + TOK_GET(&tok, &p, &tokc); + unget_buffer_preprocess[0] = 0; + return; + } next_nomacro1(); } } @@ -2725,7 +2763,7 @@ ST_FUNC void next_nomacro(void) /* substitute arguments in replacement lists in macro_str by the values in args (field d) and return allocated string */ -static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args) +static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args, struct macro_level **can_read_stream) { int last_tok, t, spc; const int *st; @@ -2749,12 +2787,35 @@ static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args) s = sym_find2(args, t); if (s) { cstr_new(&cstr); + cstr_new(&cstr2); st = s->d; spc = 0; - while (*st) { + while (1) { TOK_GET(&t, &st, &cval); - if (t != TOK_PLCHLDR && !check_space(t, &spc)) + if (t == TOK_STR || t == 0) { + int bs = 0; + while(cstr2.size && ((char *)cstr2.data)[cstr2.size-1] == '\\') { + bs++; + cstr2.size--; + } + cstr_ccat(&cstr2, '\0'); + parse_escape_string(&cstr, cstr2.data, 0); + if (cstr.size) + cstr.size--; + while(bs--) + cstr_ccat(&cstr, '\\'); + cstr_free(&cstr2); + if (t == 0) + break; + cstr_new(&cstr2); cstr_cat(&cstr, get_tok_str(t, &cval)); + } else { + if (t == TOK_AUTOSP) + /* nothing */; + else if (t != TOK_PLCHLDR && !check_space(t, &spc)) { + cstr_cat(&cstr2, get_tok_str(t, &cval)); + } + } } cstr.size -= spc; cstr_ccat(&cstr, '\0'); @@ -2764,9 +2825,7 @@ static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args) /* add string */ cstr_new(&cstr2); /* emulate GCC behaviour and parse escapes in the token string */ - parse_escape_string(&cstr2, cstr.data, 0); - cstr_free(&cstr); - cval.cstr = &cstr2; + cval.cstr = &cstr; tok_str_add2(&str, TOK_STR, &cval); cstr_free(cval.cstr); } else { @@ -2778,18 +2837,6 @@ static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args) st = s->d; /* if '##' is present before or after, no arg substitution */ if (*macro_str == TOK_TWOSHARPS || last_tok == TOK_TWOSHARPS) { - /* special case for var arg macros : ## eats the - ',' if empty VA_ARGS variable. */ - /* XXX: test of the ',' is not 100% - reliable. should fix it to avoid security - problems */ - if (gnu_ext && s->type.t && - last_tok == TOK_TWOSHARPS && - str.len >= 2 && str.str[str.len - 2] == ',') { - str.len -= 2; - tok_str_add(&str, TOK_GNUCOMMA); - } - for(;;) { int t1; TOK_GET(&t1, &st, &cval); @@ -2799,8 +2846,10 @@ static int *macro_arg_subst(Sym **nested_list, const int *macro_str, Sym *args) } } else { /* NOTE: the stream cannot be read when macro - substituing an argument */ - macro_subst(&str, nested_list, st, NULL); + substituting an argument, and the underlying + file mustn't be read either.*/ + spc = 0; + macro_subst(&str, nested_list, st, NULL, &spc); } } else { tok_str_add(&str, t); @@ -2822,22 +2871,73 @@ static char const ab_month_name[12][4] = "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" }; + +/* ideally, this should be merged with next() */ +static void next_inmacro(Sym **nested_list, struct macro_level **can_read_stream) +{ + Sym *s; + +redo: + if (macro_ptr && *macro_ptr) + next(); + else { + next_nomacro_spc(); + + if (tok == 0) { + if (can_read_stream == NULL) { + /* macro_ptr still points at the 0 */ + return; + } + /* end of macro or end of unget buffer */ + if (unget_buffer_enabled) { + macro_ptr = unget_saved_macro_ptr; + unget_buffer_enabled = 0; + goto redo; + } else { + /* end of macro string: free it */ + tok_str_free(macro_ptr_allocated); + macro_ptr_allocated = NULL; + macro_ptr = NULL; + } + if (can_read_stream && *can_read_stream) { + struct macro_level *ml = *can_read_stream; + while (ml && !macro_ptr) { + macro_ptr = ml->p; + ml->p = NULL; + *can_read_stream = ml->prev; + ml = *can_read_stream; + } + s = *nested_list; + while (s && s->v == -1) + s = s->prev; + if (s) + s->v = -1; + if (macro_ptr) + goto redo; + } + goto redo; + } else if (tok == TOK_NOSUBST) { + goto redo; + } + } +} + /* do macro substitution of current token with macro 's' and add - result to (tok_str,tok_len). 'nested_list' is the list of all - macros we got inside to avoid recursing. Return non zero if no - substitution needs to be done */ + result to tok_str. 'nested_list' is the list of all macros we got + inside to avoid recursing. */ static int macro_subst_tok(TokenString *tok_str, - Sym **nested_list, Sym *s, struct macro_level **can_read_stream) + Sym **nested_list, Sym *s, struct macro_level **can_read_stream, + int *spc) { Sym *args, *sa, *sa1; - int mstr_allocated, parlevel, *mstr, t, t1, spc; - const int *p; + int mstr_allocated, parlevel, *mstr, t1, mtok, saved_parse_flags, i; TokenString str; char *cstrval; CValue cval; CString cstr; char buf[32]; - + TokenString ws_str; /* whitespace between name and arguments */ + /* if symbol is a macro, prepare substitution */ /* special macros */ if (tok == TOK___LINE__) { @@ -2872,86 +2972,38 @@ static int macro_subst_tok(TokenString *tok_str, tok_str_add2(tok_str, t1, &cval); cstr_free(&cstr); } else { - int mtok = tok; - int saved_parse_flags = parse_flags; + mtok = tok; + saved_parse_flags = parse_flags; parse_flags |= PARSE_FLAG_ACCEPT_STRAYS | PARSE_FLAG_SPACES | PARSE_FLAG_LINEFEED; + parse_flags &= ~PARSE_FLAG_TOK_NUM; mstr = s->d; mstr_allocated = 0; if (s->type.t == MACRO_FUNC) { - TokenString ws_str; /* whitespace between macro name and - * argument list */ + int spc; + tok_str_new(&ws_str); spc = 0; - /* NOTE: we do not use next_nomacro to avoid eating the - next token. XXX: find better solution */ - redo: - if (macro_ptr) { - p = macro_ptr; - while (is_space(t = *p) || TOK_LINEFEED == t) { - if (saved_parse_flags & PARSE_FLAG_SPACES) - tok_str_add(&ws_str, t); - ++p; - } - if (t == 0 && can_read_stream) { - /* end of macro stream: we must look at the token - after in the file */ - struct macro_level *ml = *can_read_stream; - macro_ptr = NULL; - if (ml) - { - macro_ptr = ml->p; - ml->p = NULL; - *can_read_stream = ml -> prev; - } - /* also, end of scope for nested defined symbol */ - (*nested_list)->v = -1; - goto redo; - } - } else { - ch = tcc_peekc_slow(file); - while (is_space(ch) || ch == '\n' || ch == '/') - { - if (ch == '/') - { - int c; - uint8_t *p = file->buf_ptr; - PEEKC(c, p); - if (c == '*') { - p = parse_comment(p); - file->buf_ptr = p - 1; - } else if (c == '/') { - p = parse_line_comment(p); - file->buf_ptr = p - 1; - } else - break; - ch = ' '; - } + while (1) { + next_inmacro(nested_list, can_read_stream); + if (is_whitespace(tok)) { if (saved_parse_flags & PARSE_FLAG_SPACES) - tok_str_add(&ws_str, ch); - cinp(); - } - t = ch; - } - if (t != '(') { - /* not a macro substitution after all, restore the - * macro token plus all whitespace we've read. - * whitespace is intentionally not merged to preserve - * newlines. */ - int i; - tok_str_add(tok_str, mtok); - for(i=0; inext; /* NOTE: empty args are allowed, except if no args */ for(;;) { @@ -2964,20 +3016,23 @@ static int macro_subst_tok(TokenString *tok_str, tok_str_new(&str); parlevel = spc = 0; /* NOTE: non zero sa->t indicates VA_ARGS */ - while ((parlevel > 0 || - (tok != ')' && - (tok != ',' || sa->type.t)))) { - if (tok == TOK_EOF || tok == 0) + while (parlevel > 0 || + (tok != ')' && + (tok != ',' || sa->type.t))) { + if (tok == TOK_EOF || tok == 0) { break; - if (tok == '(') - parlevel++; - else if (tok == ')') - parlevel--; - if (tok == TOK_LINEFEED) - tok = ' '; - if (!check_space(tok, &spc)) - tok_str_add2(&str, tok, &tokc); - next_nomacro_spc(); + } else { + if (tok == '(') { + parlevel++; + } else if (tok == ')') { + parlevel--; + } else if (tok == TOK_LINEFEED) { + tok = ' '; + } + + tok_str_add3(&str, tok, &tokc, &spc); + next_inmacro(nested_list, can_read_stream); + } } if (parlevel) expect(")"); @@ -3006,7 +3061,7 @@ static int macro_subst_tok(TokenString *tok_str, } /* now subst each arg */ - mstr = macro_arg_subst(nested_list, mstr, args); + mstr = macro_arg_subst(nested_list, mstr, args, can_read_stream); /* free memory */ sa = args; while (sa) { @@ -3016,10 +3071,16 @@ static int macro_subst_tok(TokenString *tok_str, sa = sa1; } mstr_allocated = 1; + + tok_str_free(ws_str.str); } sym_push2(nested_list, s->v, 0, 0); parse_flags = saved_parse_flags; - macro_subst(tok_str, nested_list, mstr, can_read_stream); + macro_subst(tok_str, nested_list, mstr, can_read_stream, spc); + if (!*spc && (parse_flags & PARSE_FLAG_AUTOSPACE)) { + tok_str_add(tok_str, TOK_AUTOSP); + *spc = 1; + } /* pop nested defined symbol */ sa1 = *nested_list; *nested_list = sa1->prev; @@ -3028,6 +3089,17 @@ static int macro_subst_tok(TokenString *tok_str, tok_str_free(mstr); } return 0; + +abort: + /* it looked like we were going to process a macro, but the + * argument list was incomplete; so we fake un-getting all the + * tokens we've already consumed. */ + tok_str_add(tok_str, mtok); + for(i=0; i= 0) @@ -3069,8 +3147,12 @@ static inline int *macro_twosharps(const int *macro_str) /* given 'a##b', skip '##' */ t = *++ptr; /* given 'a##b', remove nosubsts preceding 'b' */ - while (t == TOK_NOSUBST) + while (t == TOK_NOSUBST || t == TOK_AUTOSP) t = *++ptr; + if (tok == ',') { + tok = TOK_GNUCOMMA; + break; + } if (t && t != TOK_TWOSHARPS) { CValue cval; TOK_GET(&t, &ptr, &cval); @@ -3079,13 +3161,15 @@ static inline int *macro_twosharps(const int *macro_str) if (tok != TOK_PLCHLDR) cstr_cat(&cstr, get_tok_str(tok, &tokc)); n = cstr.size; - if (t != TOK_PLCHLDR || tok == TOK_PLCHLDR) + if (t != TOK_PLCHLDR) cstr_cat(&cstr, get_tok_str(t, &cval)); cstr_ccat(&cstr, '\0'); tcc_open_bf(tcc_state, ":paste:", cstr.size); memcpy(file->buffer, cstr.data, cstr.size); for (;;) { + if (0 == *file->buf_ptr) + break; next_nomacro1(); if (0 == *file->buf_ptr) break; @@ -3096,14 +3180,31 @@ static inline int *macro_twosharps(const int *macro_str) tcc_close(); cstr_free(&cstr); } + if (tok == TOK_TWOSHARPS) { + /* two sharps twosharped together tokenize to two + * sharp tokens, not a twosharp token. */ + /* That's fun to say, but is it actually true? GCC + * stringifies #define a # ## # ## # to "## #" (and a + * warning), while we produce "###" (no warning) */ + tok_str_add(¯o_str1, '#'); + tok = '#'; + } + did_substitute = 1; } if (tok != TOK_NOSUBST) { tok_str_add2(¯o_str1, tok, &tokc); - tok = ' '; start_of_nosubsts = -1; - } - tok_str_add2(¯o_str1, tok, &tokc); + if (did_substitute && (parse_flags & PARSE_FLAG_AUTOSPACE)) + n_autosp++; + if (n_autosp) { + tok_str_add(¯o_str1, TOK_AUTOSP); + n_autosp = 0; + } + } else + tok_str_add2(¯o_str1, tok, &tokc); } + if (n_autosp) + tok_str_add(¯o_str1, TOK_AUTOSP); tok_str_add(¯o_str1, 0); return macro_str1.str; } @@ -3112,32 +3213,35 @@ static inline int *macro_twosharps(const int *macro_str) /* do macro substitution of macro_str and add result to (tok_str,tok_len). 'nested_list' is the list of all macros we got inside to avoid recursing. */ -static void macro_subst(TokenString *tok_str, Sym **nested_list, - const int *macro_str, struct macro_level ** can_read_stream) +static void macro_subst(TokenString *tok_str, Sym **nested_list, + const int *macro_str, struct macro_level ** can_read_stream, + int *spc) { Sym *s; int *macro_str1; const int *ptr; - int t, spc; + int t; CValue cval; struct macro_level ml; int force_blank; int gnucomma_index = -1; - + int ret; + /* first scan for '##' operator handling */ ptr = macro_str; macro_str1 = macro_twosharps(ptr); if (macro_str1) ptr = macro_str1; - spc = 0; force_blank = 0; while (1) { + int old_len; /* NOTE: ptr == NULL can only happen if tokens are read from file stream due to a macro function call */ if (ptr == NULL) break; + old_len = tok_str->len; TOK_GET(&t, &ptr, &cval); if (t == 0) break; @@ -3155,11 +3259,12 @@ static void macro_subst(TokenString *tok_str, Sym **nested_list, tcc_error("two GNU commas in the same macro"); gnucomma_index = tok_str->len; tok_str_add(tok_str, ','); - TOK_GET(&t, &ptr, &cval); + continue; } s = define_find(t); if (s != NULL) { - int old_len = tok_str->len; + int old_len2 = tok_str->len; + /* if nested substitution, do nothing */ if (sym_find2(*nested_list, t)) { /* and mark it as TOK_NOSUBST, so it doesn't get subst'd again */ @@ -3171,25 +3276,44 @@ static void macro_subst(TokenString *tok_str, Sym **nested_list, ml.prev = *can_read_stream, *can_read_stream = &ml; macro_ptr = (int *)ptr; tok = t; - macro_subst_tok(tok_str, nested_list, s, can_read_stream); - spc = tok_str->len && is_space(tok_str->str[tok_str->len-1]); - ptr = (int *)macro_ptr; - macro_ptr = ml.p; - if (can_read_stream && *can_read_stream == &ml) - *can_read_stream = ml.prev; - if (parse_flags & PARSE_FLAG_SPACES) + + ret = macro_subst_tok(tok_str, nested_list, s, can_read_stream, spc); + + if (ret != 0) { + TokenString new_macro_str; + tok_str_new(&new_macro_str); + macro_ptr = ml.p; + if (can_read_stream) + *can_read_stream = ml.prev; + + tok_str->len = old_len2 + 1; + if (!*ptr) { + int *q; + int i,n; + q = unget_buffer_preprocess; + *q++ = tok; + n = tok_ext_size(tok) - 1; + for(i=0;ilen) tok_str_add(tok_str, TOK_PLCHLDR); } else { no_subst: if (force_blank) { - tok_str_add(tok_str, ' '); - spc = 1; + tok_str_add3(tok_str, TOK_AUTOSP, NULL, spc); force_blank = 0; } - if (!check_space(t, &spc)) - tok_str_add2(tok_str, t, &cval); + tok_str_add3(tok_str, t, &cval, spc); } if (gnucomma_index != -1) { if (tok_str->len >= gnucomma_index+2) { @@ -3198,7 +3322,7 @@ static void macro_subst(TokenString *tok_str, Sym **nested_list, gnucomma_index = -1; } } - if (tok_str->len && tok_str->str[tok_str->len-1] == TOK_PLCHLDR) + if (tok_str->len == old_len + 1 && tok_str->str[old_len] == TOK_PLCHLDR) tok_str->len--; } if (macro_str1) @@ -3224,24 +3348,37 @@ ST_FUNC void next(void) (parse_flags & PARSE_FLAG_PREPROCESS)) { s = define_find(tok); if (s) { + int spc = 0; + int ret; /* we have a macro: we try to substitute */ tok_str_new(&str); nested_list = NULL; ml = NULL; - if (macro_subst_tok(&str, &nested_list, s, &ml) == 0) { - /* substitution done, NOTE: maybe empty */ + ret = macro_subst_tok(&str, &nested_list, s, &ml, &spc); + /* substitution done, NOTE: maybe empty */ + if (ret == 0) { tok_str_add(&str, 0); macro_ptr = str.str; macro_ptr_allocated = str.str; goto redo; } else { + int old_len = str.len; + int *q; + int i,n; + + q = unget_buffer_preprocess; + *q++ = tok; + n = tok_ext_size(tok) - 1; + for(i=0;i 1) { - macro_ptr = str.str + 1; - macro_ptr_allocated = str.str; - } - tok = str.str[0]; + macro_ptr = str.str + 1; + macro_ptr_allocated = str.str; + tok = str.str[0]; /* it's an identifier, so tokc is ignored */ + goto handle_ppnum; } + goto redo; } } } else { @@ -3262,7 +3399,7 @@ ST_FUNC void next(void) goto redo; } } - +handle_ppnum: /* convert preprocessor tokens into C tokens */ if (tok == TOK_PPNUM && (parse_flags & PARSE_FLAG_TOK_NUM)) { @@ -3367,16 +3504,17 @@ static void line_macro_output(BufferedFile *f, const char *s, TCCState *s1) ST_FUNC int tcc_preprocess(TCCState *s1) { BufferedFile *file_ref, **iptr, **iptr_new; - int token_seen, d; + int token_seen, d, auto_ws, is_sp; const char *s; preprocess_init(s1); ch = file->buf_ptr[0]; tok_flags = TOK_FLAG_BOL | TOK_FLAG_BOF; parse_flags &= PARSE_FLAG_ASM_FILE; - parse_flags |= PARSE_FLAG_PREPROCESS | PARSE_FLAG_LINEFEED | PARSE_FLAG_SPACES | PARSE_FLAG_ACCEPT_STRAYS; + parse_flags |= PARSE_FLAG_PREPROCESS | PARSE_FLAG_LINEFEED | PARSE_FLAG_SPACES | PARSE_FLAG_ACCEPT_STRAYS | PARSE_FLAG_AUTOSPACE; token_seen = 0; file->line_ref = 0; + auto_ws = 0; file_ref = NULL; iptr = s1->include_stack_ptr; @@ -3393,6 +3531,10 @@ ST_FUNC int tcc_preprocess(TCCState *s1) continue; file->line_ref++; token_seen = 0; + } else if (tok == TOK_AUTOSP) { + if (auto_ws != -1) + auto_ws = 1; + continue; } else if (!token_seen) { d = file->line_num - file->line_ref; if (file != file_ref || d >= 8) { @@ -3416,7 +3558,11 @@ print_line: if (!token_seen) continue; } + is_sp = is_whitespace(tok); + if (!is_sp && auto_ws == 1) + fputs(" ", s1->ppfp); fputs(get_tok_str(tok, &tokc), s1->ppfp); + auto_ws = is_sp ? -1 : 0; } return 0; } diff --git a/tests/tests2/80_macros.c b/tests/tests2/80_macros.c new file mode 100644 index 0000000..aa3b972 --- /dev/null +++ b/tests/tests2/80_macros.c @@ -0,0 +1,75 @@ +#include + +#define A(a,b...) g(a,##b,##b) +#define B(x...) x +#define C \ + +#define D(x,y) x ## y +#define E(x,y,z) x ## y ## z +#define F(x) x +#define G C +#define H() F +#define J \n +#define K \4 +#define L F (x +#define M ) +#define N F ((x +#define O )) +#define P 0x1e +#define Q P+1 +#define R ( +#define STRINGIFY2(x) #x +#define STRINGIFY(x) STRINGIFY2(x) + +int main(void) +{ + printf("%s\n", STRINGIFY()); // should produce the empty string + printf("%s\n", STRINGIFY(C)); // should produce the empty string + printf("%s\n", STRINGIFY( + A(a,) + A(a,b) + A(a,b,c) + )); // should produce g(a ) g(a,b,b)g(a,b,c,b,c) + printf("%s\n", STRINGIFY(B())); // should produce the empty string + printf("%s\n", STRINGIFY(B(C))); // should produce the empty string + printf("%s\n", STRINGIFY(D(,))); // should produce the empty string + printf("%s\n", STRINGIFY(E(,,))); // should produce the empty string + printf("%s\n", STRINGIFY(E(1,,))); // should produce 1 + printf("%s\n", STRINGIFY(E(,2,))); // should produce 2 + printf("%s\n", STRINGIFY(E(,,3))); // should produce 3 + printf("%s\n", STRINGIFY(E(1,2,3))); // should produce 123 + + // should produce g(a ) g(a )g(a )g(a )g(a ) + printf("%s\n", STRINGIFY(A(a,F()) A(a,C) A(a,G) A(a,) A(a))); + printf("%s\n", STRINGIFY(H()x)); + + // should produce F x, not Fx + printf("%s\n", STRINGIFY(H() +x)); + + printf("%s\n", STRINGIFY(I x)); // should produce I x + printf("%s\n", STRINGIFY(I +x)); // should produce I x + + printf("%s\n", STRINGIFY()); // the empty string + + printf("%s\n", STRINGIFY(\n)); // a newline + printf("%s\n", STRINGIFY(J)); // a newline + printf("%s\n", STRINGIFY(K)); // character \004 +#if 0 + printf("%s\n", STRINGIFY(L )); // "F (x" + printf("%s\n", STRINGIFY(L M)); // "x" + + printf("%s\n", STRINGIFY(N )); // "F ((x" + printf("%s\n", STRINGIFY(N O)); // "(x)" +#endif + + printf("%d\n", P+1); // 31, not a parse error + printf("%d\n", Q); // 31, not a parse error + printf("%s\n", STRINGIFY(Q)); // "0x1e+1", not "0x1e +1" + + printf("%s\n", STRINGIFY(/* comment */)); // " " + printf("%s\n", STRINGIFY(F R x M)); // "F ( x )", not "F( x )" + + return 0; +} diff --git a/tests/tests2/80_macros.expect b/tests/tests2/80_macros.expect new file mode 100644 index 0000000..9edb2de --- /dev/null +++ b/tests/tests2/80_macros.expect @@ -0,0 +1,27 @@ + + +g(a) g(a,b,b) g(a,b,c,b,c) + + + + +1 +2 +3 +123 +g(a) g(a) g(a) g(a) g(a) +Fx +F x +I x +I x + + + + + + +31 +31 +0x1e+1 + +F ( x ) \ No newline at end of file diff --git a/tests/tests2/Makefile b/tests/tests2/Makefile index a441674..97c22dc 100644 --- a/tests/tests2/Makefile +++ b/tests/tests2/Makefile @@ -98,7 +98,8 @@ TESTS = \ 76_dollars_in_identifiers.test \ 77_push_pop_macro.test \ 78_vla_label.test \ - 79_vla_continue.test + 79_vla_continue.test \ + 80_macros.test # 34_array_assignment.test -- array assignment is not in C standard diff --git a/testscript.pl b/testscript.pl new file mode 100644 index 0000000..994d8f9 --- /dev/null +++ b/testscript.pl @@ -0,0 +1,21 @@ +%waivers = ( + "cpp-tests/bug14.c" => 1, # we handle GNU commas, GCC doesn't. + "cpp-tests/bug28.c" => 1, # we handle GNU commas, GCC doesn't. + "cpp-tests/bug6.c" => 1, # we handle GNU commas, GCC doesn't. +); + +for my $file (glob "cpp-tests/*.c") { + my $tcc = `./tcc -E $file|grep -v '^#'`; + my $gcc = `gcc -E $file|grep -v '^#'`; + $tcc =~ s/\\0*([0-9])/\\\1/g; + $tcc =~ s/[ \n\t]*//msg; + $gcc =~ s/\\0*([0-9])/\\\1/g; + $gcc =~ s/[ \n\t]*//msg; + + if ($tcc ne $gcc and + !$waivers{$file}) { + print STDERR $tcc . "\n"; + print STDERR $gcc . "\n"; + warn $file; + } +}