diff --git a/parser/c2_tokenizer.c2 b/parser/c2_tokenizer.c2 index 1d26b3d8..fbad85d0 100644 --- a/parser/c2_tokenizer.c2 +++ b/parser/c2_tokenizer.c2 @@ -694,8 +694,7 @@ fn void Tokenizer.num_error(Tokenizer* t, Token* result, const char* p, const ch vsnprintf(t.error_msg, sizeof(t.error_msg), format, args); va_end(args); - // XXX: error position should be passed separately from token start - result.loc = t.loc_start + (SrcLoc)(p - t.input_start); + SrcLoc err_loc = t.loc_start + (SrcLoc)(p - t.input_start); // read the rest of the pp-number token for (;;) { if ((*p == 'e' || *p == 'E' || *p == 'p' || *p == 'P') && (p[1] == '+' || p[1] == '-')) { @@ -712,7 +711,7 @@ fn void Tokenizer.num_error(Tokenizer* t, Token* result, const char* p, const ch } t.cur = p; result.len = (u16)((p - t.input_start) - (result.loc - t.loc_start)); - if (t.on_warning) t.on_warning(t.fn_arg, result.loc); + if (t.on_warning) t.on_warning(t.fn_arg, err_loc); } fn void Tokenizer.lex_identifier(Tokenizer* t, Token* result) { diff --git a/tools/c2cat.c2 b/tools/c2cat.c2 index adddb2e5..6f733a8c 100644 --- a/tools/c2cat.c2 +++ b/tools/c2cat.c2 @@ -13,39 +13,28 @@ * limitations under the License. */ -module c2cat_main; +module c2cat; import c2_tokenizer; import color local; import file_utils; import keywords; +import src_loc local; import string_buffer; import string_list; import string_pool; import number_radix; import token local; -import ctype; import stdio local; import stdlib local; import string local; -Color col_keyword = Byellow; -Color col_type = Green; -Color col_feature = Blue; -Color col_attr = Blue; -Color col_identifier = Normal; -Color col_integer = Magenta; -Color col_float = Magenta; -Color col_charconst = Magenta; -Color col_string = Magenta; -Color col_comment = Bcyan; -Color col_invalid = Bred; -Color col_error = Bred; -Color col_normal = Normal; - fn void usage(const char* me) { - printf("Usage: %s file.c2 ...\n", me); + printf("Usage: %s [options] file.c2 ...\n" + " --color force colorized output\n" + " --nocolor disable colorized output\n" + , me); exit(1); } @@ -55,7 +44,108 @@ type C2cat struct { c2_tokenizer.Tokenizer* tokenizer; const char* input; u32 offset; - u32 in_attributes; // 0 no, 1 seen @, 2 (, ) -> 0 + u32 length; + u8 in_attributes; // 0 no, 1 seen @, 2 (, ) -> 0 + bool has_error; + Style* token_style; + Color* style_color; +} + +type Style enum u8 { + Normal, + Identifier, + Integer, + Float, + Charconst, + String, + Operator, + Type, + Keyword, + Function, + Attr, + Feature, + Invalid, + Comment, + Warning, + Error, +} + +Style[elemsof(Kind)] token_style; +Color[elemsof(Style)] style_color = { + [Style.Normal] = Normal, + [Style.Identifier] = None, + [Style.Integer] = Magenta, + [Style.Float] = Magenta, + [Style.Charconst] = Magenta, + [Style.String] = Magenta, + [Style.Operator] = None, + [Style.Type] = Green, + [Style.Keyword] = Byellow, + [Style.Function] = White, + [Style.Attr] = Blue, + [Style.Feature] = Blue, + [Style.Invalid] = Bred, + [Style.Comment] = Bcyan, + [Style.Warning] = Bred, + [Style.Error] = Bred, +} + +fn bool init_colors() { + // TODO: make token_style global, initialized using designated ranges + for (Kind k = Kind.min; k <= Kind.max; k++) { + Style style = Normal; + switch (k) { + case None: + style = Normal; + break; + case Identifier: + style = Identifier; + break; + case IntegerLiteral: + style = Integer; + break; + case FloatLiteral: + style = Float; + break; + case CharLiteral: + style = Charconst; + break; + case StringLiteral: + style = String; + break; + case LParen ... GreaterGreaterEqual: + style = Operator; + break; + case KW_bool ... KW_void: + style = Type; + break; + case KW_as ... KW_while: + if (k.isQualifier()) style = Type; + else style = Keyword; + break; + case Feat_if ... Feat_warning: + style = Feature; + break; + case Invalid: + style = Invalid; + break; + case LineComment: + case BlockComment: + style = Comment; + break; + case Eof: + style = Normal; + break; + case Warning: + style = Warning; + break; + case Error: + style = Error; + break; + } + token_style[k] = style; + } + return color.useColor(); } const char*[] attr_names = { @@ -80,8 +170,9 @@ const char*[] attr_names = { "auto_func", } -fn bool is_attribute(const char* str) { - for (u32 i=0; i= Kind.LParen && tok.kind < Kind.KW_bool) { - const char* str = tok.kind.str(); - out.add(str); - ctx.offset = tok.loc + (u32)strlen(str); - return; - } - if (tok.kind.isBuiltinTypeOrVoid()) { - const char* str = tok.kind.str(); - out.color(col_type); - out.add(str); - out.color(col_normal); - ctx.offset = tok.loc + (u32)strlen(str); - return; - } - if (tok.kind.isQualifier()) { - const char* str = tok.kind.str(); - out.color(col_type); - out.add(str); - out.color(col_normal); - ctx.offset = tok.loc + (u32)strlen(str); - return; - } - if (tok.kind.isKeyword()) { - const char* str = tok.kind.str(); - out.color(col_keyword); - out.add(str); - out.color(col_normal); - ctx.offset = tok.loc + (u32)strlen(str); - return; + if (pos < ctx.offset) { + // token starts before end of previous token, this is an error + // TODO: output an error message to stderr? + out.add1('\n'); + out.color(ctx.style_color[Style.Error]); + out.print("error: offset=%d pos=%d", ctx.offset, pos); + out.color(ctx.style_color[Style.Normal]); + out.add1('\n'); + ctx.offset = pos; } - if (tok.kind >= Kind.Feat_if && tok.kind <= Kind.Feat_endif) { - const char* str = tok.kind.str(); - out.color(col_feature); - out.add(str); - out.color(col_normal); - ctx.offset = tok.loc + (u32)strlen(str); - return; + if (pos > ctx.offset) { + // copy stuff from file to out (from end of last token to start of current) + // TODO: check for whitespace only + out.add2(ctx.input + ctx.offset, pos - ctx.offset); + ctx.offset = pos; } - switch (tok.kind) { - case Identifier: - const char* str = ctx.pool.idx2str(tok.name_idx); - if (ctx.in_attributes && is_attribute(str)) { - out.color(col_attr); - out.add(str); - out.color(col_normal); - } else { - out.color(col_identifier); - out.add(str); - out.color(col_normal); - } - ctx.offset = tok.loc + (u32)strlen(str); - return; - case IntegerLiteral: - out.color(col_integer); - char[64] tmp; - i32 len; - switch (tok.getRadix()) { - case Hex: - len = sprintf(tmp, "0x%x", tok.int_value); - break; - default: - len = sprintf(tmp, "%d", tok.int_value); - break; + Style s = ctx.token_style[tok.kind]; + if (tok.kind == Kind.Identifier) { + if (ctx.in_attributes && ctx.is_attribute(tok.name_idx)) { + s = Style.Attr; + } else + if (ctx.input[ctx.offset + tok_len] == '(') { + s = Style.Function; } - out.add(tmp); - ctx.offset = tok.loc + len; - break; - case FloatLiteral: - out.color(col_float); - char[64] tmp; - i32 len; - switch (tok.getRadix()) { - case Hex: - len = sprintf(tmp, "%a", tok.float_value); - break; - default: - len = sprintf(tmp, "%#.16g", tok.float_value); - break; - } - out.add(tmp); - if (tok.suffix_F) out.add1('F'); - ctx.offset = tok.loc + len; - break; - case CharLiteral: - out.color(col_charconst); - char[64] tmp; - i32 len = 0; - switch (tok.getRadix()) { - case Hex: - len = sprintf(tmp, "'\\x%02x'", tok.char_value); - break; - case Octal: - len = sprintf(tmp, "'\\%o'", tok.char_value); - break; - default: - if (ctype.isprint(tok.char_value)) { - len = sprintf(tmp, "'%c'", tok.char_value); - } else { - tmp[0] = 0; - // TODO print nicely (eg \n etc) - } - break; - } - out.add(tmp); - ctx.offset = tok.loc + len; - break; - case StringLiteral: - out.color(col_string); - out.add1('"'); - u32 len = out.encodeBytes(ctx.pool.idx2str(tok.text_idx), tok.text_len, '"'); - out.add1('"'); - ctx.offset = tok.loc + len + 2; - break; - case LineComment: - out.color(col_comment); - const char* str = ctx.pool.idx2str(tok.text_idx); - out.print("//%s", str); - ctx.offset = tok.loc + (u32)strlen(str) + 2; - break; - case BlockComment: - out.color(col_comment); - const char* str = ctx.pool.idx2str(tok.text_idx); - out.print("/*%s*/", str); - ctx.offset = tok.loc + (u32)strlen(str) + 4; - break; - case Invalid: - out.color(col_invalid); - out.print("%s", tok.invalid); - ctx.offset = tok.loc + (u32)strlen(tok.invalid); - break; - case Error: - out.add1('\n'); - out.color(col_error); - out.print("error: %s", ctx.tokenizer.error_msg); - out.color(col_normal); - out.add1('\n'); - break; - default: - out.color(col_error); - out.print("token %s\n", tok.kind.str()); - ctx.offset = tok.loc + 1; - break; } - out.color(col_normal); + if (s && ctx.style_color[s]) out.color(ctx.style_color[s]); + out.add2(ctx.input + ctx.offset, tok_len); + if (s && ctx.style_color[s]) out.color(ctx.style_color[Style.Normal]); + + ctx.offset += tok_len; +} + +fn void C2cat.on_tokenizer_error(void* arg, SrcLoc loc) { + C2cat* ctx = arg; + ctx.has_error = true; } -public fn i32 c2cat(const char* filename, bool use_color) +fn i32 c2cat_file(const char* filename, bool use_color) { file_utils.File file.init("", filename); if (!file.load()) { @@ -274,51 +243,62 @@ public fn i32 c2cat(const char* filename, bool use_color) return -1; } - C2cat ctx = { } - ctx.pool = string_pool.create(16*1024, 1024); - ctx.out = string_buffer.create(16*1024, use_color, 2); - ctx.offset = 0; - ctx.input = file.data(); - ctx.in_attributes = 0; + string_pool.Pool* pool = string_pool.create(16*1024, 1024); + string_buffer.Buf* out = string_buffer.create(16*1024, use_color, 2); + string_buffer.Buf* buf = string_buffer.create(1024, false, 0); + const char* input = file.data(); u32 file_size = file.data_size(); + keywords.Info kwinfo.init(pool); + string_list.List features.init(pool); - string_list.List features; - features.init(ctx.pool); - string_buffer.Buf* buf = string_buffer.create(1024, false, 0); - keywords.Info kwinfo; - kwinfo.init(ctx.pool); - c2_tokenizer.Tokenizer tokenizer; - tokenizer.init(ctx.pool, buf, ctx.input, 0, &kwinfo, &features, nil, nil, nil, true); + C2cat ctx = { + .pool = pool, + .out = out, + .input = input, + .offset = 0, + .length = file_size, + .in_attributes = 0, + .token_style = token_style, + .style_color = style_color, + } + + c2_tokenizer.Tokenizer tokenizer.init(pool, buf, input, 1, &kwinfo, &features, + C2cat.on_tokenizer_error, C2cat.on_tokenizer_error, &ctx, true); ctx.tokenizer = &tokenizer; - Token tok; - tok.init(); + Token tok.init(); while (!tok.done) { tokenizer.lex(&tok); - //printf("%4d %s\n", tok.loc, tok.kind.str()); - + if (ctx.has_error) { + tok.kind = Error; + ctx.has_error = false; + } ctx.update_state(&tok); - ctx.print_token(&tok); } - if (ctx.offset <= file_size) { - u32 len = file_size - ctx.offset; - if (len) ctx.out.add2(ctx.input + ctx.offset, len); + if (ctx.offset <= ctx.length) { + // TODO: EOF token should have ctx.offset == ctx.length + u32 len = ctx.length - ctx.offset; + if (len) { + out.color(style_color[Style.Error]); + out.add2(ctx.input + ctx.offset, len); + out.color(style_color[Style.Normal]); + } } else { - ctx.out.add1('\n'); - ctx.out.color(col_error); - ctx.out.print("error: offset=%d file_size=%d", ctx.offset, file_size); - ctx.out.color(col_normal); - ctx.out.add1('\n'); + out.add1('\n'); + out.color(style_color[Style.Error]); + out.print("error: offset=%d > file.size=%d", ctx.offset, ctx.length); + out.color(style_color[Style.Normal]); + out.add1('\n'); } - fputs(ctx.out.data(), stdout); + fputs(out.data(), stdout); fflush(stdout); - ctx.pool.free(); - ctx.out.free(); + out.free(); buf.free(); + pool.free(); file.close(); return 0; @@ -326,12 +306,39 @@ public fn i32 c2cat(const char* filename, bool use_color) public fn i32 main(i32 argc, const char** argv) { - bool use_color = color.useColor(); - if (argc == 1) usage(argv[0]); + bool use_color = init_colors(); + i32 filenum = 0; + i32 nfiles = 0; + for (i32 i = 1; i < argc; i++) { + nfiles += (*argv[i] != '-'); + } for (i32 i = 1; i < argc; i++) { - if (argc > 2) - printf("==> %s <==\n", argv[i]); - c2cat(argv[i], use_color); + const char* arg = argv[i]; + if (*arg == '-') { + switch (arg) { + case "--color": + use_color = true; + break; + case "--nocolor": + use_color = false; + break; + case "-?": + case "-h": + case "--help": + usage(argv[0]); + break; + default: + fprintf(stderr, "c2cat: unknown option %s\n", arg); + exit(EXIT_FAILURE); + } + } else { + if (nfiles > 1) { + if (filenum++) printf("\n"); + printf("==> %s <==\n", arg); + } + c2cat_file(arg, use_color); + } } + if (!nfiles) usage(argv[0]); return 0; }