diff --git a/Cargo.lock b/Cargo.lock index f97f2242..94d21865 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -73,6 +73,18 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" +[[package]] +name = "bumpalo" +version = "3.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" + +[[package]] +name = "bytecount" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" + [[package]] name = "byteorder" version = "1.4.3" @@ -711,6 +723,12 @@ dependencies = [ "libc", ] +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + [[package]] name = "memoffset" version = "0.6.5" @@ -720,6 +738,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "modular-bitfield" version = "0.11.2" @@ -755,6 +779,103 @@ dependencies = [ "winapi", ] +[[package]] +name = "nom" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nom_locate" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37794436ca3029a3089e0b95d42da1f0b565ad271e4d3bb4bad0c7bb70b10605" +dependencies = [ + "bytecount", + "memchr", + "nom", +] + +[[package]] +name = "num" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + [[package]] name = "num_threads" version = "0.1.6" @@ -804,6 +925,12 @@ dependencies = [ "system-deps", ] +[[package]] +name = "paste" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1de2e551fb905ac83f73f7aedf2f0cb4a0da7e35efa24a202a936269f1f18e1" + [[package]] name = "peg" version = "0.8.1" @@ -948,13 +1075,18 @@ name = "redscript-compiler" version = "0.5.9" dependencies = [ "ahash", + "bumpalo", "enum-as-inner", "flexstr", "hashbrown 0.13.1", "indexmap", "itertools", "log", + "nom", + "nom_locate", + "num", "once_cell", + "paste", "peg", "redscript", "sequence_trie", diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index d5c2e270..79c02877 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -20,6 +20,13 @@ yansi = "0.5" walkdir = "2" typed-builder = "0.11" indexmap = "1" -sequence_trie = { git = "https://github.com/jac3km4/rust_sequence_trie", rev = "a056b4c", features = ["hashbrown"] } +sequence_trie = { git = "https://github.com/jac3km4/rust_sequence_trie", rev = "a056b4c", features = [ + "hashbrown", +] } simple-interner = { version = "0.3", features = ["hashbrown"] } peg = "0.8" +nom = "7.1" +nom_locate = "4.0" +paste = "1" +bumpalo = "3.11" +num = "0.4" diff --git a/compiler/src/comb.rs b/compiler/src/comb.rs new file mode 100644 index 00000000..ad8d6af9 --- /dev/null +++ b/compiler/src/comb.rs @@ -0,0 +1,104 @@ +use std::ops::RangeTo; + +use nom::error::ParseError; +use nom::{IResult, Offset, Slice}; + +/// Matches while open, close, or inner. Returns the matched range. +/// Input is matched, recursively descending, until open and close are balanced. +pub fn many_till_balanced1( + mut open: impl FnMut(I) -> IResult, + mut inner: impl FnMut(I) -> IResult, + mut close: impl FnMut(I) -> IResult, +) -> impl FnMut(I) -> IResult +where + I: Clone + Offset + Slice>, + E: ParseError, +{ + move |start: I| { + let mut open_count = 0usize; + let mut close_count = 0usize; + let mut rem = start.clone(); + let mut end = start.clone(); + loop { + if let Ok((rem2, _)) = open(rem.clone()) { + open_count += 1; + rem = rem2; + } else if let Ok((rem2, _)) = close(rem.clone()) { + close_count += 1; + rem = rem2; + } else if let Ok((rem2, _)) = inner(rem.clone()) { + rem = rem2; + } else { + break; + } + if open_count == close_count { + end = rem.clone(); + break; + } + } + let len = end.offset(&start); + if len == 0 { + Err(nom::Err::Error(E::from_error_kind(start, nom::error::ErrorKind::Many1))) + } else { + Ok((rem, start.slice(..len))) + } + } +} + +pub fn delimited_list0( + mut open: impl FnMut(I) -> IResult, + mut separator: impl FnMut(I) -> IResult, + mut inner: impl FnMut(I) -> IResult, + mut close: impl FnMut(I) -> IResult, +) -> impl FnMut(I) -> IResult), E> +where + I: Clone, + E: ParseError, +{ + move |is| { + let (mut i, ss) = open(is)?; + + let mut parts = vec![]; + while let Ok((ip, ep)) = inner(i.clone()) { + if let Ok((ip, sp)) = separator(ip.clone()) { + parts.push((ep, sp)); + i = ip; + continue; + } else if let Ok((ip, se)) = close(ip) { + parts.push((ep, se)); + i = ip; + break; + } + // missing close match + return Err(nom::Err::Error(E::from_error_kind( + i, + nom::error::ErrorKind::TagClosure, + ))); + } + return Ok((i, (ss, parts))); + } +} + +pub fn variant(expected: T) -> impl FnMut(I) -> IResult +where + I: Clone, + T: Parsable + PartialEq, + E: ParseError, +{ + move |i| { + let (i, (o, value)) = T::parse(i)?; + if value == expected { + Ok((i, o)) + } else { + Err(nom::Err::Error(E::from_error_kind(i, nom::error::ErrorKind::Verify))) + } + } +} + +pub trait Parsable: Sized +where + I: Clone, + E: ParseError, +{ + fn parse(i: I) -> IResult; +} diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs new file mode 100644 index 00000000..65034f39 --- /dev/null +++ b/compiler/src/lexer.rs @@ -0,0 +1,487 @@ +use nom::branch::alt; +use nom::bytes::complete::{tag, take_while, take_while_m_n}; +use nom::character::complete::{ + alpha1, anychar, char, digit0, digit1, hex_digit0, line_ending, multispace1, none_of, oct_digit0, one_of, satisfy +}; +use nom::combinator::{consumed, map, not, opt, recognize}; +use nom::error::ParseError; +use nom::multi::many0; +use nom::sequence::{delimited, pair, preceded, separated_pair}; +use nom::AsChar; +use redscript::ast::{Constant, Literal, Variance}; +use redscript::Str; +use strum::{Display, IntoStaticStr}; + +use crate::comb::{many_till_balanced1, Parsable}; +use crate::parser::Qualifier; +use crate::validators::*; +use crate::*; + +pub trait ParseErr<'a>: ParseError> {} +pub type IResult<'a, O> = nom::IResult, O>; +pub type NomError<'a> = nom::Err>>; +pub type NomErrorKind = nom::error::ErrorKind; + +pub fn nom_error(input: Span, kind: NomErrorKind) -> NomError { + NomError::Error(nom::error::Error::new(input, kind)) +} + +#[derive(Debug, Clone, Copy, PartialEq, Display)] +pub enum Trivia { + Comment, + Whitespace, + LineEnd, +} + +#[derive(Debug, Clone, Copy, PartialEq, Display, IntoStaticStr)] +pub enum Num { + F32(f32), + F64(f64), + I32(i32), + I64(i64), + U32(u32), + U64(u64), +} + +impl From for Constant { + fn from(value: Num) -> Self { + match value { + Num::F32(f) => Constant::F32(f), + Num::F64(f) => Constant::F64(f), + Num::I32(f) => Constant::I32(f), + Num::I64(f) => Constant::I64(f), + Num::U32(f) => Constant::U32(f), + Num::U64(f) => Constant::U64(f), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Display, IntoStaticStr)] +pub enum Op { + Add, + Sub, + Mul, + Div, + Bang, + Eq, + Lt, + Gt, + And, + Or, + Tilde, +} + +#[derive(Debug, Clone, Copy, PartialEq, Display, IntoStaticStr)] +pub enum Ctrl { + LParen, + RParen, + LBracket, + RBracket, + LBrace, + RBrace, + Colon, + Semi, + Comma, + Period, + Dot, + Quest, + LArrow, + At, +} + +#[derive(Debug, Clone, Copy, PartialEq, Display, IntoStaticStr)] +pub enum Kw { + Module, + Class, + Struct, + Enum, + Func, + Let, + New, + If, + Else, + Switch, + Case, + Break, + While, + For, + In, + Continue, + Return, + Try, + Catch, + Finally, + Extends, +} + +// ----------------------------------------------------------------------------- +// Trivia +// ----------------------------------------------------------------------------- + +fn comment_multiline(i: Span) -> IResult { + recognize(many_till_balanced1( + tag("/*"), + recognize(many0(not(alt((tag("*/"), tag("/*")))))), + tag("*/"), + ))(i) +} + +pub fn trivia(i: Span) -> IResult<(Trivia, Span)> { + alt(( + map(comment_multiline, |s| (Trivia::Comment, s)), + map(recognize(preceded(tag("//"), many0(not(line_ending)))), |s| { + (Trivia::Comment, s) + }), + map(recognize(line_ending), |s| (Trivia::LineEnd, s)), + map(recognize(multispace1), |s| (Trivia::Whitespace, s)), + ))(i) +} + +// ----------------------------------------------------------------------------- +// Numeric +// ----------------------------------------------------------------------------- + +fn float_literal(i: Span) -> IResult { + recognize(separated_pair(digit0, tag("."), digit1))(i) +} + +fn sciexp_literal(i: Span) -> IResult { + recognize(separated_pair( + alt((float_literal, digit1)), + one_of("eE"), + pair(one_of("-+"), digit1), + ))(i) +} + +fn parse_num(i: &Span, radix: u32) -> T +where + T::FromStrRadixErr: std::fmt::Display, +{ + match T::from_str_radix(i.fragment(), radix) { + Ok(value) => value, + Err(error) => { + diag_report!(i, ERR_NUM_PARSE, i.fragment(), error); + T::default() + } + } +} + +fn int_width(suffix: Option) -> u32 { + match suffix { + Some(suffix) => match alt(( + map(tag("i32"), |_: Span| 32), + map(tag("i64"), |_| 64), + map(tag("u32"), |_| 32), + map(tag("u64"), |_| 64), + map(tag("l"), |_| 64), + ))(suffix.clone()) + { + Ok((_, width)) => width, + Err(err) => { + let _: NomError = err; // needed for type inference + diag_report!(suffix, ERR_NUM_SUFFIX, suffix.fragment()); + 32 + } + }, + None => 32, + } +} + +pub fn integer(is: Span) -> IResult { + match pair( + alt(( + map(preceded(tag("0x"), hex_digit0), |s| (16, s)), + map(preceded(tag("0o"), oct_digit0), |s| (8, s)), + map( + preceded(tag("0b"), take_while_m_n(1, 64, |c: char| c == '0' || c == '1')), + |s| (2, s), + ), + map(digit0, |s| (10, s)), + )), + opt(identifier), + )(is) + { + Ok((rem, ((radix, value), suffix))) => { + let num = match int_width(suffix) { + 64 => Num::I64(parse_num(&value, radix)), + _ => Num::I32(parse_num(&value, radix)), + }; + Ok((rem, num)) + } + Err(err) => Err(err), + } +} + +fn float_width(suffix: Option) -> u32 { + match suffix { + Some(suffix) => { + match alt((map(tag("f32"), |_| 32), map(tag("f64"), |_| 64), map(tag("d"), |_| 64)))(suffix.clone()) { + Ok((_, width)) => width, + Err(err) => { + let err: NomError = err; // needed for type inference + diag_report!(suffix, ERR_NUM_SUFFIX, suffix.fragment()); + 64 + } + } + } + None => 64, + } +} + +pub fn float(is: Span) -> IResult { + match pair(alt((sciexp_literal, float_literal)), opt(identifier))(is) { + Ok((rem, (value, suffix))) => { + let num = match float_width(suffix) { + 64 => Num::F64(parse_num(&value, 10)), + _ => Num::F32(parse_num(&value, 10)), + }; + Ok((rem, num)) + } + Err(err) => Err(err), + } +} + +// ----------------------------------------------------------------------------- +// String +// ----------------------------------------------------------------------------- +// Strings are parsed as a sequence of literal portions and interpolated portions +// The interpolated portions are parsed as part of the token stream, delimited by the start and end interpolation tokens +// The literal portions are parsed as a single token +// The entire string may be prefixed with a type specifier, a char. + +fn str_char_uni(is: Span) -> IResult> { + let parse_hex = &take_while_m_n(1, 6, char::is_hex_digit); + let parse_delimited_hex = delimited(char('{'), parse_hex, char('}')); + let (i, digits) = alt((preceded(char('u'), parse_delimited_hex), preceded(char('u'), parse_hex)))(is.clone())?; + if let Ok(hex) = u32::from_str_radix(digits.fragment(), 16) { + if let Some(c) = char::from_u32(hex) { + Ok((i, Some(c))) + } else { + diag_report!((&is..&i), ERR_CHAR_UTF8, hex); + Ok((i, None)) + } + } else { + diag_report!((&is..&i), ERR_CHAR_UTF8, digits.fragment()); + Ok((i, None)) + } +} + +fn str_char_invalid(is: Span) -> IResult> { + let (i, c) = preceded(char('\\'), anychar)(is.clone())?; + diag_report!((&is..&i), ERR_CHAR_ESCAPE, c); + Ok((i, None)) +} + +fn str_char(i: Span) -> IResult> { + alt(( + map(tag(r#"\\"#), |_| Some('\\')), + map(tag(r#"\/"#), |_| Some('/')), + map(tag(r#"\""#), |_| Some('"')), + map(tag(r#"\n"#), |_| Some('\n')), + map(tag(r#"\t"#), |_| Some('\t')), + map(tag(r#"\r"#), |_| Some('\r')), + map(tag(r#"\0"#), |_| Some('\0')), + str_char_uni, + map(none_of("\\"), Some), + str_char_invalid, + ))(i) +} + +fn str_chars(mut i: Span) -> IResult { + let mut s = String::default(); + while let Ok((i_remaining, c)) = str_char(i.clone()) { + if let Some(c) = c { + s.push(c); + } + i = i_remaining; + } + Ok((i, Str::from_ref(s))) +} + +fn string_type(i: &Span, c: Option) -> Literal { + match c { + Some(c) => match c { + 'n' => Literal::Name, + 'r' => Literal::Resource, + 't' => Literal::TweakDbId, + 's' => Literal::String, + _ => { + diag_report!(i, ERR_LITERAL_TYPE_INVALID, c); + Literal::String + } + }, + None => Literal::String, + } +} + +// a parser accepting a function and returning the result of the function, by consuming the input +pub fn string(i: Span) -> IResult<(Span, Literal, Str)> { + let (i, (o, (p, s))) = consumed(pair( + opt(satisfy(AsChar::is_alpha)), + delimited(tag("\""), str_chars, tag("\"")), + ))(i)?; + let p = string_type(&o, p); + Ok((i, (o, p, s))) +} + +// matches a string literal until the first interpolation +pub fn string_inter_start(i: Span) -> IResult<(Span, Literal, Str)> { + let (i, (o, (p, s))) = consumed(pair( + opt(satisfy(AsChar::is_alpha)), + delimited(tag("\""), str_chars, tag(r#"\("#)), + ))(i)?; + let p = string_type(&o, p); + Ok((i, (o, p, s))) +} + +// matches a string literal from the end of the first interpolation until the end of the string +pub fn string_inter_end(i: Span) -> IResult<(Span, Str)> { + consumed(delimited(tag(r#")"#), str_chars, tag("\"")))(i) +} + +// matches a string literal from the end of the first interpolation until the start of the next interpolation +pub fn string_inter_part(i: Span) -> IResult<(Span, Str)> { + consumed(delimited(tag(r#")"#), str_chars, tag(r#"\("#)))(i) +} + +// ----------------------------------------------------------------------------- +// Operator +// ----------------------------------------------------------------------------- +// one of `+-*/!=<>&|~` + +pub fn operator(i: Span) -> IResult<(Span, Op)> { + alt(( + map(tag("="), |s| (s, Op::Add)), + map(tag("-"), |s| (s, Op::Sub)), + map(tag("*"), |s| (s, Op::Mul)), + map(tag("/"), |s| (s, Op::Div)), + map(tag("!"), |s| (s, Op::Bang)), + map(tag("="), |s| (s, Op::Eq)), + map(tag("<"), |s| (s, Op::Lt)), + map(tag(">"), |s| (s, Op::Gt)), + map(tag("&"), |s| (s, Op::Add)), + map(tag("|"), |s| (s, Op::Or)), + map(tag("~"), |s| (s, Op::Tilde)), + ))(i) +} + +impl<'a> Parsable, nom::error::Error>> for Op { + fn parse(i: Span<'_>) -> IResult<(Span, Self)> { + operator(i) + } +} + +// ----------------------------------------------------------------------------- +// Control character +// ----------------------------------------------------------------------------- +// one of `()[]{}:;,.?` and -> + +pub fn control(i: Span) -> IResult<(Span, Ctrl)> { + alt(( + map(tag("("), |s| (s, Ctrl::LParen)), + map(tag(")"), |s| (s, Ctrl::RParen)), + map(tag("["), |s| (s, Ctrl::LBracket)), + map(tag("]"), |s| (s, Ctrl::RBracket)), + map(tag("{"), |s| (s, Ctrl::LBrace)), + map(tag("}"), |s| (s, Ctrl::RBrace)), + map(tag(":"), |s| (s, Ctrl::Colon)), + map(tag(";"), |s| (s, Ctrl::Semi)), + map(tag(","), |s| (s, Ctrl::Comma)), + map(tag("."), |s| (s, Ctrl::Dot)), + map(tag("?"), |s| (s, Ctrl::Quest)), + map(tag("->"), |s| (s, Ctrl::LArrow)), + map(tag("@"), |s| (s, Ctrl::At)), + ))(i) +} + +impl<'a> Parsable, nom::error::Error>> for Ctrl { + fn parse(i: Span<'_>) -> IResult<(Span, Self)> { + control(i) + } +} +// ----------------------------------------------------------------------------- +// Identifier +// ----------------------------------------------------------------------------- +// An identifier is a sequence of letters, numbers, and underscores, starting with a letter or underscore + +pub fn identifier(i: Span) -> IResult { + recognize(pair(alpha1, take_while(|c: char| c.is_alphanumeric() || c == '_')))(i) +} + +// ----------------------------------------------------------------------------- +// Keyword +// ----------------------------------------------------------------------------- +// A reserved langauge keyword +// one of module, class, struct, enum, func, let, new, if, else, switch, case, break, while, for, in, continue, return, try, catch, finally + +pub fn keyword(i: Span) -> IResult<(Span, Kw)> { + alt(( + map(tag("module"), |s| (s, Kw::Module)), + map(tag("class"), |s| (s, Kw::Class)), + map(tag("struct"), |s| (s, Kw::Struct)), + map(tag("enum"), |s| (s, Kw::Enum)), + map(tag("func"), |s| (s, Kw::Func)), + map(tag("let"), |s| (s, Kw::Let)), + map(tag("new"), |s| (s, Kw::New)), + map(tag("if"), |s| (s, Kw::If)), + map(tag("else"), |s| (s, Kw::Else)), + map(tag("switch"), |s| (s, Kw::Switch)), + map(tag("case"), |s| (s, Kw::Case)), + map(tag("break"), |s| (s, Kw::Break)), + map(tag("while"), |s| (s, Kw::While)), + map(tag("for"), |s| (s, Kw::For)), + map(tag("in"), |s| (s, Kw::In)), + map(tag("continue"), |s| (s, Kw::Continue)), + map(tag("return"), |s| (s, Kw::Return)), + map(tag("try"), |s| (s, Kw::Try)), + map(tag("catch"), |s| (s, Kw::Catch)), + map(tag("finally"), |s| (s, Kw::Finally)), + map(tag("extends"), |s| (s, Kw::Extends)), + ))(i) +} + +impl<'a> Parsable, nom::error::Error>> for Kw { + fn parse(i: Span<'_>) -> IResult<(Span, Self)> { + keyword(i) + } +} + +pub fn null(i: Span) -> IResult { + tag("null")(i) +} + +pub fn this(i: Span) -> IResult { + tag("this")(i) +} + +pub fn super_(i: Span) -> IResult { + tag("super")(i) +} + +pub fn boolean(i: Span) -> IResult<(Span, bool)> { + alt((map(tag("true"), |s| (s, true)), map(tag("false"), |s| (s, false))))(i) +} + +pub fn qualifier(i: Span) -> IResult { + alt(( + map(tag("public"), |_| Qualifier::Public), + map(tag("protected"), |_| Qualifier::Protected), + map(tag("private"), |_| Qualifier::Private), + map(tag("abstract"), |_| Qualifier::Abstract), + map(tag("static"), |_| Qualifier::Static), + map(tag("final"), |_| Qualifier::Final), + map(tag("const"), |_| Qualifier::Const), + map(tag("native"), |_| Qualifier::Native), + map(tag("exec"), |_| Qualifier::Exec), + map(tag("callback"), |_| Qualifier::Callback), + map(tag("out"), |_| Qualifier::Out), + map(tag("opt"), |_| Qualifier::Optional), + map(tag("quest"), |_| Qualifier::Quest), + map(tag("importOnly"), |_| Qualifier::ImportOnly), + map(tag("persistent"), |_| Qualifier::Persistent), + ))(i) +} + +pub fn variance(i: Span) -> IResult { + alt((map(tag("+"), |_| Variance::Co), map(tag("-"), |_| Variance::Contra)))(i) +} diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index b84da63a..e55ba3f3 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -1,15 +1,23 @@ +#![feature(macro_metavar_expr)] +#![feature(let_chains)] +#![macro_use] +extern crate paste; use simple_interner::Interner; pub mod autobox; pub mod codegen; +pub mod comb; pub mod compiler; pub mod error; +pub mod lexer; #[allow(clippy::redundant_closure_call)] pub mod parser; +pub mod parser2; mod scoped_map; pub mod source_map; pub mod type_repo; pub mod typer; +pub mod validators; pub mod visit; pub type StringInterner = Interner; diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs index d920d661..518c1485 100644 --- a/compiler/src/parser.rs +++ b/compiler/src/parser.rs @@ -110,7 +110,7 @@ pub enum Qualifier { } #[derive(Debug)] -pub struct Qualifiers(Vec); +pub struct Qualifiers(pub Vec); impl Qualifiers { pub fn visibility(&self) -> Option { diff --git a/compiler/src/parser2.rs b/compiler/src/parser2.rs new file mode 100644 index 00000000..418cdf0f --- /dev/null +++ b/compiler/src/parser2.rs @@ -0,0 +1,246 @@ +use std::str::FromStr; + +use nom::branch::alt; +use nom::combinator::{consumed, map, opt}; +use nom::multi::{many0, separated_list0, separated_list1}; +use nom::sequence::{delimited, pair, preceded, tuple}; +use nom::{Offset, Slice}; +use redscript::ast::{Constant, Ident, Literal, SourceAst, TypeName, TypeParam}; +use redscript::Str; + +use crate::comb::{delimited_list0, variant}; +use crate::lexer::*; +use crate::parser::{ + Annotation, AnnotationKind, Declaration, FieldSource, FunctionSource, ParameterSource, Qualifiers +}; +use crate::validators::*; +use crate::*; + +type Range = redscript::ast::Span; +type Expr = redscript::ast::Expr; +type Seq = redscript::ast::Seq; + +fn to_range(input: &Span, remaining: &Span) -> Range { + let len = input.slice(..remaining.offset(input)).len(); + Range::with_len(input.location_offset(), len) +} + +macro_rules! to_ok { + ($input:tt, $remaining:tt, $($value:tt)*) => { + ($($value)*, to_range(&$input, &$remaining)) + }; +} + +/// A dot separated sequence of identifiers. +fn trailer(is: Span) -> IResult<(Vec, Range)> { + map( + consumed(separated_list1(variant(Ctrl::Dot), ident)), + |(span, trailer)| (trailer, span.to_range().into()), + )(is) +} + +fn ident(is: Span) -> IResult { + map(identifier, |ident| ident.to_flex())(is) +} + +fn string_literal(is: Span) -> IResult<(Literal, Str)> { + let (i, (ii, t, s)) = string(is)?; + Ok((i, (t, s))) +} + +pub fn string_interpolation(is: Span) -> IResult<(Str, Vec<(Expr, Str)>, Range)> { + map( + consumed(delimited_list0( + map(string_inter_start, |(_, _, s)| s), + map(string_inter_part, |(_, s)| s), + expr, + map(string_inter_end, |(_, s)| s), + )), + |(r, (ss, sp))| (ss, sp, r.to_range().into()), + )(is) +} + +fn constant(is: Span) -> IResult { + alt(( + map(string_literal, |(t, s)| Constant::String(t, s)), + map(consumed(alt((float, integer))), |(_, n)| n.into()), + map(boolean, |(_, b)| Constant::Bool(b)), + ))(is) +} + +fn type_list(is: Span) -> IResult> { + separated_list1(variant(Ctrl::Comma), type_name)(is) +} + +fn type_args(is: Span) -> IResult> { + delimited(variant(Op::Lt), type_list, variant(Op::Gt))(is) +} + +fn ret_type(is: Span) -> IResult { + preceded(variant(Ctrl::LArrow), type_name)(is) +} + +fn type_name(is: Span) -> IResult { + alt(( + map(pair(ident, opt(type_args)), |(ident, args)| { + TypeName::new(ident, args.unwrap_or_default()) + }), + map( + delimited(variant(Ctrl::LBracket), type_name, variant(Ctrl::RBracket)), + TypeName::of_array, + ), + map( + pair( + delimited(variant(Ctrl::LParen), type_list, variant(Ctrl::RParen)), + ret_type, + ), + |(args, ret)| TypeName::of_function(args, ret), + ), + ))(is) +} + +fn assign_init(is: Span) -> IResult> { + opt(preceded(variant(Op::Eq), expr))(is) +} + +fn let_type(is: Span) -> IResult { + preceded(variant(Ctrl::Colon), type_name)(is) +} + +fn let_(is: Span) -> IResult { + map( + consumed(tuple((preceded(variant(Kw::Let), ident), opt(let_type), assign_init))), + |(span, (ident, ty, value))| { + Expr::Declare(ident, ty.map(Box::new), value.map(Box::new), span.to_range().into()) + }, + )(is) +} + +fn expr_list(is: Span) -> IResult> { + separated_list1(variant(Ctrl::Comma), expr)(is) +} + +fn args(is: Span) -> IResult> { + map( + consumed(delimited(variant(Ctrl::LParen), expr_list, variant(Ctrl::RParen))), + |(_, args)| args, + )(is) +} + +fn annotation(is: Span) -> IResult { + let (rem, (span, (ident, args))) = consumed(tuple((preceded(variant(Ctrl::At), ident), opt(args))))(is)?; + + match AnnotationKind::from_str(&ident) { + Ok(kind) => Ok((rem, Annotation { + kind, + args: args.unwrap_or_default(), + span: span.to_range().into(), + })), + Err(_) => Err(nom_error(rem, NomErrorKind::Tag)), + } +} + +fn annotation_list(is: Span) -> IResult> { + many0(annotation)(is) +} + +fn qualifier_list(is: Span) -> IResult { + map(many0(qualifier), |qs| Qualifiers(qs))(is) +} + +fn type_param(is: Span) -> IResult { + map( + tuple((variance, ident, opt(preceded(variant(Kw::Extends), type_name)))), + |(variance, ident, extends)| TypeParam { + name: ident, + variance, + extends, + }, + )(is) +} + +fn type_params(is: Span) -> IResult> { + delimited( + variant(Op::Lt), + separated_list1(variant(Ctrl::Comma), type_param), + variant(Op::Gt), + )(is) +} + +fn decl<'a, T>( + mut inner: impl FnMut(Span<'a>) -> IResult<'a, T>, +) -> impl FnMut(Span<'a>) -> IResult<'a, (Declaration, T)> { + move |is| { + map( + consumed(tuple((annotation_list, qualifier_list, &mut inner, ident))), + |(span, (annotations, qualifiers, inner, ident))| { + ( + Declaration { + annotations, + qualifiers, + name: ident, + span: span.to_range().into(), + }, + inner, + ) + }, + )(is) + } +} + +fn field(is: Span) -> IResult { + map( + tuple((decl(variant(Kw::Let)), let_type, assign_init)), + |((declaration, _), ty, init)| FieldSource { + declaration, + type_: ty, + default: init, + }, + )(is) +} + +fn param(is: Span) -> IResult { + map(tuple((qualifier_list, ident, let_type)), |(qualifiers, name, ty)| { + ParameterSource { + qualifiers, + name, + type_: ty, + } + })(is) +} + +fn params(is: Span) -> IResult> { + delimited( + variant(Ctrl::LParen), + separated_list0(variant(Ctrl::Comma), param), + variant(Ctrl::RParen), + )(is) +} + +fn func(is: Span) -> IResult { + map( + consumed(tuple(( + decl(variant(Kw::Func)), + opt(type_params), + params, + opt(ret_type), + opt(func_body), + ))), + |(span, ((declaration, _), tparams, parameters, ret_type, body))| FunctionSource { + tparams: tparams.unwrap_or_default(), + declaration, + type_: ret_type, + parameters, + body, + span: span.to_range().into(), + }, + )(is) +} + +fn func_body(is: Span) -> IResult { + todo!() +} + +pub fn expr(is: Span) -> IResult { + todo!() +} diff --git a/compiler/src/validators.rs b/compiler/src/validators.rs new file mode 100644 index 00000000..ed7ee6aa --- /dev/null +++ b/compiler/src/validators.rs @@ -0,0 +1,211 @@ +use std::cell::RefCell; +use std::fmt::Display; +use std::ops::Range; + +use redscript::Str; +use strum::{Display, IntoStaticStr}; + +pub type Span<'a> = nom_locate::LocatedSpan<&'a str, State<'a>>; + +pub trait AsFlexStr { + fn to_flex(&self) -> Str; +} + +impl<'a> AsFlexStr for Span<'a> { + fn to_flex(&self) -> Str { + Str::from_ref(self.fragment()) + } +} + +pub trait ToRange { + fn to_range(&self) -> Range; +} + +impl<'a> ToRange for Span<'a> { + fn to_range(&self) -> Range { + let start = self.location_offset(); + let end = start + self.fragment().len(); + start..end + } +} + +/// Error containing a text span and an error message to display. +#[derive(Debug)] +pub struct Diagnostic { + sl: usize, + sc: usize, + el: usize, + ec: usize, + file: Str, + text: String, + severity: Severity, + code: &'static str, + msg: String, +} + +impl Display for Diagnostic { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "{}[{}]: {}", self.severity.as_str(), self.code, self.msg)?; + writeln!( + f, + "--> {}:{}:{} to l{}:{}", + self.file, self.sl, self.sc, self.el, self.ec + )?; + for line in self.text.lines() { + writeln!(f, "| {}", line)?; + } + Ok(()) + } +} + +#[derive(Debug, Clone, Copy, Display, IntoStaticStr)] +#[strum(serialize_all = "snake_case")] +pub enum Severity { + Error, + Warn, + Info, + Hint, +} + +impl Severity { + pub fn as_str(&self) -> &'static str { + self.into() + } +} + +/// Carried around in the `LocatedSpan::extra` field in +/// between `nom` parsers. +#[derive(Clone, Debug)] +pub struct State<'a>(pub &'a RefCell>, pub Str); + +impl<'a> State<'a> { + /// Something not allowed by the rules of the language or other authority. + #[allow(dead_code)] + fn report_diagnostic(&self, error: Diagnostic) { + self.0.borrow_mut().push(error); + } +} + +pub trait ReportOrigin { + fn report(&self, diag: &'static DiagnosticTemplate, msg: String); +} + +impl<'a> ReportOrigin for Span<'a> { + /// Reports the entire fragment as faulty. + /// Does not terminate the compilation. + fn report(&self, diag: &'static DiagnosticTemplate, msg: String) { + let bytes = std::str::from_utf8(&self.get_line_beginning()).unwrap_or_default(); + let line = self.location_line() as usize; + let sc = self.location_offset(); + let ec = sc + self.fragment().len(); + self.extra.report_diagnostic(Diagnostic { + sl: line, + sc, + el: line, + ec, + file: self.extra.1.clone(), + text: bytes.to_string(), + severity: diag.0, + code: diag.1, + msg, + }); + } +} + +impl<'a> ReportOrigin for Range<&Span<'a>> { + /// reports the range from the start of the first span to the end of the last span. + /// The first span must contain the last span. + /// This is archived, by cloning the first span, before consuming. + fn report(&self, diag: &'static DiagnosticTemplate, msg: String) { + let sl = self.start.location_line() as usize; + let el = self.end.location_line() as usize; + let bytes = self + .start + .get(self.start.location_offset()..self.end.location_offset()) + .unwrap_or_else(|| std::str::from_utf8(&self.start.get_line_beginning()).unwrap_or_default()); + self.start.extra.report_diagnostic(Diagnostic { + sl, + sc: self.start.get_column(), + el, + ec: self.end.get_column(), + file: self.start.extra.1.clone(), + text: bytes.to_string(), + severity: diag.0, + code: diag.1, + msg, + }); + } +} + +/// A diagnostic message in the format `Severity, Code, Message format`. +pub struct DiagnosticTemplate(Severity, &'static str, &'static str); + +/// Reports a diagnostic message. +/// Requires the crate level imported, because format macros are defined there. +/// ```ignore +/// use crate::*; +/// use crate::validators::ReportOrigin; +/// ``` +#[macro_export] +macro_rules! diag_report { + ($origin:expr, $name:ident, $($arg:tt)*) =>{ + paste::paste! { + $origin.report(&$name, []!($($arg)*)) + } + }; +} + +/// A diagnostic message in the format `Severity, Code, Message format`. +/// The code is a key constructed from the initials and a id. +/// The message format is a `format!` string. +/// +/// ## Error Code +/// - The first char represents the severity: [E]rror, [W]arn, [I]nfo, [H]int. +/// - The second char represents the source: [L]lexical, S[Y}ntax, [S]emantic, [T]ype, etc. +/// - The third char represents a category. This can really be whatever. It is used to group similar errors. +/// +/// ## Example +/// ```ignore +/// diag!(ERR_INVALID_UTF8, Error, "ELS0001", "Invalid UTF-8 sequence `{}`"); +/// ``` +macro_rules! diag { + ($name:ident, $severity:ident, $code:tt, $msg:tt) => { + paste::paste! { + #[allow(dead_code)] + pub const $name: DiagnosticTemplate = DiagnosticTemplate(Severity::$severity, $code, $msg); + + // Formats the arguments with the arguments with the msg of the diagnostic. + #[allow(dead_code)] + #[macro_export] + macro_rules! [] { + ($$($$a:tt)*) => { + format!($msg, $$($a)*) + }; + } + } + }; +} + +diag!(ERR_CHAR_UTF8, Error, "ELS0001", "invalid UTF-8 sequence `{}`"); +diag!(ERR_EXPECT_HEX_DIGIT, Error, "ELS0002", "invalid hex digit `{}`"); +diag!(ERR_CHAR_ESCAPE, Error, "ELS0003", "invalid escape sequence `{}`"); +diag!(ERR_NUM_PARSE, Error, "ELN0001", "invalid number `{}`, {}"); +diag!( + ERR_NUM_OVERFLOW, + Error, + "ELN0003", + "the number `{}` exceeds the maximum value of the type {}" +); +diag!(ERR_NUM_SUFFIX, Error, "ELN0004", "invalid suffix `{}` for number"); +diag!( + ERR_INTERPOL_NOT_CONST, + Error, + "ESS0005", + "a interpolated string cannot be a constant value, `{}` was specified" +); +diag!( + ERR_LITERAL_TYPE_INVALID, + Error, + "ESS0006", + "invalid literal type `{}` expected one of [n]ame, [r]resource, [t]weakDBId or none" +); diff --git a/core/src/ast.rs b/core/src/ast.rs index 19a6ea4d..eb8aaa3b 100644 --- a/core/src/ast.rs +++ b/core/src/ast.rs @@ -1,6 +1,6 @@ use std::cmp::Ordering; use std::fmt::{self, Debug, Display}; -use std::ops::{Add, Sub}; +use std::ops::{Add, Range, Sub}; use derive_where::derive_where; use enum_as_inner::EnumAsInner; @@ -23,14 +23,7 @@ where Declare(N::Local, Option>, Option>, Span), DynCast(N::Class, Box, Span), Assign(Box, Box, N::Inferred, Span), - Call( - Box, - N::Callable, - Box<[N::Type]>, - Box<[Self]>, - N::CallMeta, - Span, - ), + Call(Box, N::Callable, Box<[N::Type]>, Box<[Self]>, N::CallMeta, Span), Lambda(N::Closure, Box, Span), Member(Box, N::Member, Span), ArrayElem(Box, Box, N::Inferred, Span), @@ -290,7 +283,7 @@ where } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Display)] pub enum Literal { String, Name, @@ -299,14 +292,20 @@ pub enum Literal { } #[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct Pos(pub u32); +pub struct Pos(pub usize); + +impl From for Pos { + fn from(value: usize) -> Self { + Pos(value) + } +} impl Pos { pub const ZERO: Pos = Pos(0); #[inline] pub fn new(n: usize) -> Self { - Pos(n as u32) + Pos(n) } } @@ -321,7 +320,7 @@ impl Add for Pos { #[inline] fn add(self, rhs: usize) -> Pos { - Pos(self.0 + rhs as u32) + Pos(self.0 + rhs) } } @@ -330,14 +329,14 @@ impl Sub for Pos { #[inline] fn sub(self, rhs: usize) -> Pos { - Pos(self.0 - rhs as u32) + Pos(self.0 - rhs) } } impl From for usize { #[inline] fn from(pos: Pos) -> Self { - pos.0 as usize + pos.0 } } @@ -347,6 +346,15 @@ pub struct Span { pub high: Pos, } +impl From> for Span { + fn from(value: Range) -> Self { + Span { + low: value.start.into(), + high: value.end.into(), + } + } +} + impl Span { pub const ZERO: Span = Span::new(Pos::ZERO, Pos::ZERO); @@ -354,6 +362,13 @@ impl Span { Self { low, high } } + pub fn with_len(low: usize, len: usize) -> Self { + Self { + low: Pos(low), + high: Pos(low + len), + } + } + pub fn merge(&self, other: Span) -> Span { Span::new(self.low.min(other.low), self.high.max(other.high)) }