From 78708aa41bc6690ad8dbe89926a151053cf65f21 Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Mon, 14 Nov 2022 14:01:58 +0100 Subject: [PATCH 01/21] Add validators --- Cargo.lock | 70 +++++++ compiler/Cargo.toml | 8 +- compiler/src/lexer.rs | 396 +++++++++++++++++++++++++++++++++++++ compiler/src/lib.rs | 5 + compiler/src/validators.rs | 180 +++++++++++++++++ 5 files changed, 658 insertions(+), 1 deletion(-) create mode 100644 compiler/src/lexer.rs create mode 100644 compiler/src/validators.rs diff --git a/Cargo.lock b/Cargo.lock index f97f2242..a728be60 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -73,6 +73,12 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" +[[package]] +name = "bytecount" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" + [[package]] name = "byteorder" version = "1.4.3" @@ -213,6 +219,18 @@ dependencies = [ "libc", ] +[[package]] +name = "cps" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb470cedd77f02572699b885e6ef5baacd1adc78017427611dd045e9c1c4ca7" +dependencies = [ + "litrs", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "crc32fast" version = "1.3.2" @@ -693,6 +711,15 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb68f22743a3fb35785f1e7f844ca5a3de2dde5bd0c0ef5b372065814699b121" +[[package]] +name = "litrs" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9275e0933cf8bb20f008924c0cb07a0692fe54d8064996520bf998de9eb79aa" +dependencies = [ + "proc-macro2", +] + [[package]] name = "log" version = "0.4.17" @@ -711,6 +738,12 @@ dependencies = [ "libc", ] +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + [[package]] name = "memoffset" version = "0.6.5" @@ -720,6 +753,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "modular-bitfield" version = "0.11.2" @@ -755,6 +794,27 @@ dependencies = [ "winapi", ] +[[package]] +name = "nom" +version = "7.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8903e5a29a317527874d0402f867152a3d21c908bb0b933e416c65e301d4c36" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "nom_locate" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37794436ca3029a3089e0b95d42da1f0b565ad271e4d3bb4bad0c7bb70b10605" +dependencies = [ + "bytecount", + "memchr", + "nom", +] + [[package]] name = "num_threads" version = "0.1.6" @@ -804,6 +864,12 @@ dependencies = [ "system-deps", ] +[[package]] +name = "paste" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1de2e551fb905ac83f73f7aedf2f0cb4a0da7e35efa24a202a936269f1f18e1" + [[package]] name = "peg" version = "0.8.1" @@ -948,13 +1014,17 @@ name = "redscript-compiler" version = "0.5.9" dependencies = [ "ahash", + "cps", "enum-as-inner", "flexstr", "hashbrown 0.13.1", "indexmap", "itertools", "log", + "nom", + "nom_locate", "once_cell", + "paste", "peg", "redscript", "sequence_trie", diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index d5c2e270..eef6c6da 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -20,6 +20,12 @@ yansi = "0.5" walkdir = "2" typed-builder = "0.11" indexmap = "1" -sequence_trie = { git = "https://github.com/jac3km4/rust_sequence_trie", rev = "a056b4c", features = ["hashbrown"] } +sequence_trie = { git = "https://github.com/jac3km4/rust_sequence_trie", rev = "a056b4c", features = [ + "hashbrown", +] } simple-interner = { version = "0.3", features = ["hashbrown"] } peg = "0.8" +nom = "7.1" +nom_locate = "4.0" +cps = "0.2.1" +paste = "1" diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs new file mode 100644 index 00000000..b33064ab --- /dev/null +++ b/compiler/src/lexer.rs @@ -0,0 +1,396 @@ +use std::cell::RefCell; + +use nom::branch::alt; +use nom::bytes::complete::{tag, take_until, take_while, take_while_m_n}; +use nom::character::complete::{ + alpha1, anychar, char, digit0, digit1, hex_digit0, line_ending, none_of, oct_digit0, one_of, satisfy +}; +use nom::combinator::{consumed, map, not, opt, recognize}; +use nom::error::ParseError; +use nom::multi::{many0, many0_count}; +use nom::sequence::{delimited, pair, preceded, separated_pair, tuple}; +use nom::AsChar; +use redscript::Str; +use strum::{Display, IntoStaticStr}; + +use crate::validators::*; +use crate::{diag_report, *}; + +pub trait ParseErr<'a>: ParseError<Span<'a>> {} +pub type IResult<'a, O> = nom::IResult<Span<'a>, O>; +pub type NomError<'a> = nom::Err<Span<'a>>; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Display)] +pub enum Token<'a> { + Trivia(Span<'a>, Trivia), + /// arbitrary numeric literal + Num(Span<'a>, Num), + /// a string literal portion + /// the initial portion can have a type prefix + /// a interpolated string needs to be parsed recursively: literal + start + token... + end + literal + Str(Span<'a>, Option<char>, Str), + /// Start of a string interpolation + StrIs(Span<'a>, Option<char>, Str), + /// End of a string interpolation + StrIe(Span<'a>, Str), + /// Inbetween part of a string interpolation + StrIp(Span<'a>, Str), + /// null + Null(Span<'a>), + /// one of true | false + Bool(Span<'a>, bool), + /// one of `+-*/!=<>&|~` + Op(Span<'a>, Op), + /// one of `()[]{}:;,.` + Ctrl(Span<'a>, Ctrl), + Ident(Span<'a>), + /// Language keywords + /// one of module, class, struct, enum, func, let, new, if, else, switch, case, break, while, for, in, continue, return, try, catch, finally + Kw(Span<'a>, Kw), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display)] +pub enum Trivia { + Comment, + Whitespace, + LineEnd, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, IntoStaticStr)] +pub enum Num { + Float, + Int, + Hex, + Oct, + Bin, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, IntoStaticStr)] +pub enum Op { + Add, + Sub, + Mul, + Div, + Bang, + Eq, + Lt, + Gt, + And, + Or, + Tilde, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, IntoStaticStr)] +pub enum Ctrl { + LParen, + RParen, + LBracket, + RBracket, + LBrace, + RBrace, + Colon, + Semi, + Comma, + Period, + Dot, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, IntoStaticStr)] +pub enum Kw { + Module, + Class, + Struct, + Enum, + Func, + Let, + New, + If, + Else, + Switch, + Case, + Break, + While, + For, + In, + Continue, + Return, + Try, + Catch, + Finally, +} + +// ----------------------------------------------------------------------------- +// Trivia +// ----------------------------------------------------------------------------- + +fn comment_multiline<'a>(i: Span<'a>) -> IResult<'a, Span<'a>> { + recognize(delimited( + tag("/*"), + recognize(many0_count(alt(( + map(comment_multiline, |s| s), + map(take_until("*/"), |s| s), + map(take_while(|c| c != '*' && c != '/'), |s| s), + )))), + tag("*/"), + ))(i) +} + +pub fn trivia<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Trivia)> { + alt(( + map(comment_multiline, |s| (s, Trivia::Comment)), + map(recognize(preceded(tag("//"), many0(not(line_ending)))), |s| { + (s, Trivia::Comment) + }), + map(recognize(line_ending), |s| (s, Trivia::LineEnd)), + map(recognize(take_while(|c: char| c.is_whitespace())), |s| { + (s, Trivia::Whitespace) + }), + ))(i) +} + +// ----------------------------------------------------------------------------- +// Numeric +// ----------------------------------------------------------------------------- + +fn float_literal<'a>(i: Span<'a>) -> IResult<'a, Span<'a>> { + recognize(separated_pair(digit0, tag("."), digit1))(i) +} + +fn sciexp_literal<'a>(i: Span<'a>) -> IResult<'a, Span<'a>> { + recognize(separated_pair(alt((float_literal, digit1)), one_of("eE"), digit1))(i) +} + +pub fn number<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Num)> { + alt(( + map(sciexp_literal, |s| (s, Num::Float)), + map(float_literal, |s| (s, Num::Float)), + map(digit1, |s| (s, Num::Int)), + map(preceded(tag("0x"), hex_digit0), |s| (s, Num::Hex)), + map(preceded(tag("0o"), oct_digit0), |s| (s, Num::Oct)), + map(preceded(tag("0b"), take_while(|c: char| c == '0' || c == '1')), |s| { + (s, Num::Bin) + }), + ))(i) +} + +// ----------------------------------------------------------------------------- +// String +// ----------------------------------------------------------------------------- +// Strings are parsed as a sequence of literal portions and interpolated portions +// The interpolated portions are parsed as part of the token stream, delimited by the start and end interpolation tokens +// The literal portions are parsed as a single token +// The entire string may be prefixed with a type specifier, a char. + +fn str_char_uni<'a>(is: Span<'a>) -> IResult<'a, Option<char>> { + let parse_hex = &take_while_m_n(1, 6, char::is_hex_digit); + let parse_delimited_hex = delimited(char('{'), parse_hex, char('}')); + let (i, digits) = alt((preceded(char('u'), parse_delimited_hex), preceded(char('u'), parse_hex)))(is.clone())?; + match u32::from_str_radix(digits.fragment(), 16) { + Ok(hex) => match char::from_u32(hex) { + Some(c) => Ok((i, Some(c))), + None => { + diag_report!((&is..&i), ERR_INVALID_UTF8, hex); + Ok((i, None)) + } + }, + Err(_) => { + diag_report!((&is..&i), ERR_INVALID_UTF8, digits.fragment()); + Ok((i, None)) + } + } +} + +fn str_char_invalid<'a>(is: Span<'a>) -> IResult<'a, Option<char>> { + let (i, c) = preceded(char('\\'), anychar)(is.clone())?; + diag_report!((&is..&i), ERR_INVALID_ESCAPE, c); + Ok((i, None)) +} + +fn str_char<'a>(i: Span<'a>) -> IResult<'a, Option<char>> { + alt(( + map(tag(r#"\\"#), |_| Some('\\')), + map(tag(r#"\/"#), |_| Some('/')), + map(tag(r#"\""#), |_| Some('"')), + map(tag(r#"\n"#), |_| Some('\n')), + map(tag(r#"\t"#), |_| Some('\t')), + map(tag(r#"\r"#), |_| Some('\r')), + map(tag(r#"\0"#), |_| Some('\0')), + str_char_uni, + map(none_of("\\"), |c| Some(c)), + str_char_invalid, + ))(i) +} + +fn str_chars<'a>(mut i: Span<'a>) -> IResult<'a, Str> { + let mut s = String::default(); + while let Ok((i_remaining, c)) = str_char(i.clone()) { + if let Some(c) = c { + s.push(c); + } + i = i_remaining; + } + Ok((i, Str::from_ref(s))) +} + +// a parser accepting a function and returning the result of the function, by consuming the input +fn string<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Option<char>, Str)> { + let (i, (o, (p, s))) = consumed(pair( + opt(satisfy(|c: char| c.is_alpha())), + delimited(tag("\""), str_chars, tag("\"")), + ))(i)?; + Ok((i, (o, p, s))) +} + +fn string_inter_start<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Option<char>, Str)> { + let (i, (o, (p, s))) = consumed(pair( + opt(satisfy(|c: char| c.is_alpha())), + delimited(tag("\""), str_chars, tag(r#"\("#)), + ))(i)?; + Ok((i, (o, p, s))) +} + +fn string_inter_end<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Str)> { + consumed(delimited(tag(r#")"#), str_chars, tag("\"")))(i) +} + +fn string_inter_part<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Str)> { + consumed(delimited(tag(r#"\("#), str_chars, tag(r#")"#)))(i) +} + +// ----------------------------------------------------------------------------- +// Operator +// ----------------------------------------------------------------------------- +// one of `+-*/!=<>&|~` + +fn operator<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Op)> { + alt(( + map(tag("="), |s| (s, Op::Add)), + map(tag("-"), |s| (s, Op::Sub)), + map(tag("*"), |s| (s, Op::Mul)), + map(tag("/"), |s| (s, Op::Div)), + map(tag("!"), |s| (s, Op::Bang)), + map(tag("="), |s| (s, Op::Eq)), + map(tag("<"), |s| (s, Op::Lt)), + map(tag(">"), |s| (s, Op::Gt)), + map(tag("&"), |s| (s, Op::Add)), + map(tag("|"), |s| (s, Op::Or)), + map(tag("~"), |s| (s, Op::Tilde)), + ))(i) +} + +// ----------------------------------------------------------------------------- +// Control character +// ----------------------------------------------------------------------------- +// one of `()[]{};,.` + +fn control<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Ctrl)> { + alt(( + map(tag("("), |s| (s, Ctrl::LParen)), + map(tag(")"), |s| (s, Ctrl::RParen)), + map(tag("["), |s| (s, Ctrl::LBracket)), + map(tag("]"), |s| (s, Ctrl::RBracket)), + map(tag("{"), |s| (s, Ctrl::LBrace)), + map(tag("}"), |s| (s, Ctrl::RBrace)), + map(tag(":"), |s| (s, Ctrl::Colon)), + map(tag(";"), |s| (s, Ctrl::Semi)), + map(tag(","), |s| (s, Ctrl::Comma)), + map(tag("."), |s| (s, Ctrl::Dot)), + ))(i) +} + +// ----------------------------------------------------------------------------- +// Identifier +// ----------------------------------------------------------------------------- +// An identifier is a sequence of letters, numbers, and underscores, starting with a letter or underscore + +fn identifier<'a>(i: Span<'a>) -> IResult<'a, Span<'a>> { + recognize(tuple((alpha1, take_while(|c: char| c.is_alphanumeric() || c == '_'))))(i) +} + +// ----------------------------------------------------------------------------- +// Keyword +// ----------------------------------------------------------------------------- +// A reserved langauge keyword +// one of module, class, struct, enum, func, let, new, if, else, switch, case, break, while, for, in, continue, return, try, catch, finally + +fn keyword<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Kw)> { + alt(( + map(tag("module"), |s| (s, Kw::Module)), + map(tag("class"), |s| (s, Kw::Class)), + map(tag("struct"), |s| (s, Kw::Struct)), + map(tag("enum"), |s| (s, Kw::Enum)), + map(tag("func"), |s| (s, Kw::Func)), + map(tag("let"), |s| (s, Kw::Let)), + map(tag("new"), |s| (s, Kw::New)), + map(tag("if"), |s| (s, Kw::If)), + map(tag("else"), |s| (s, Kw::Else)), + map(tag("switch"), |s| (s, Kw::Switch)), + map(tag("case"), |s| (s, Kw::Case)), + map(tag("break"), |s| (s, Kw::Break)), + map(tag("while"), |s| (s, Kw::While)), + map(tag("for"), |s| (s, Kw::For)), + map(tag("in"), |s| (s, Kw::In)), + map(tag("continue"), |s| (s, Kw::Continue)), + map(tag("return"), |s| (s, Kw::Return)), + map(tag("try"), |s| (s, Kw::Try)), + map(tag("catch"), |s| (s, Kw::Catch)), + map(tag("finally"), |s| (s, Kw::Finally)), + ))(i) +} + +fn null<'a>(i: Span<'a>) -> IResult<'a, Span<'a>> { + tag("null")(i) +} + +fn boolean<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, bool)> { + alt((map(tag("true"), |s| (s, true)), map(tag("false"), |s| (s, false))))(i) +} + +pub fn token<'a>(i: Span<'a>) -> IResult<'a, Token> { + alt(( + map(trivia, |(s, t)| Token::Trivia(s, t)), + map(number, |(s, n)| Token::Num(s, n)), + map(string, |(s, t, n)| Token::Str(s, t, n)), + map(string_inter_start, |(s, t, n)| Token::StrIs(s, t, n)), + map(string_inter_end, |(s, n)| Token::StrIe(s, n)), + map(string_inter_part, |(s, n)| Token::StrIp(s, n)), + map(null, |s| Token::Null(s)), + map(boolean, |(s, b)| Token::Bool(s, b)), + map(operator, |(s, o)| Token::Op(s, o)), + map(control, |(s, c)| Token::Ctrl(s, c)), + map(identifier, |s| Token::Ident(s)), + map(keyword, |(s, k)| Token::Kw(s, k)), + ))(i) +} + +pub fn tokens<'a>(i: Span<'a>) -> IResult<'a, Vec<Token>> { + many0(token)(i) +} + +pub fn parse_file<'a>( + input: &'a str, + file: Str, + diag: &'a RefCell<Vec<Diagnostic>>, +) -> Result<Vec<Token<'a>>, NomError<'a>> { + let input = Span::new_extra(input, State(diag, file)); + let (_, tokens) = tokens(input).unwrap(); + Ok(tokens) +} + +pub fn parse<'a>(input: &'a str, diag: &'a RefCell<Vec<Diagnostic>>) -> Result<Vec<Token<'a>>, NomError<'a>> { + parse_file(input, Default::default(), diag) +} + +#[cfg(test)] +mod test { + #[allow(unused_imports)] + use super::*; + + #[test] + fn parse_ternary_op() { + let diag = RefCell::new(Vec::new()); + let expr = parse("3.0 ? 5.0 : 5 + 4", &diag).unwrap(); + let text = format!("{:?}", expr); + println!("{}", text); + } +} diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index b84da63a..50d63ea9 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -1,15 +1,20 @@ +#![feature(macro_metavar_expr)] +#![macro_use] +extern crate paste; use simple_interner::Interner; pub mod autobox; pub mod codegen; pub mod compiler; pub mod error; +pub mod lexer; #[allow(clippy::redundant_closure_call)] pub mod parser; mod scoped_map; pub mod source_map; pub mod type_repo; pub mod typer; +pub mod validators; pub mod visit; pub type StringInterner = Interner<str, ahash::RandomState>; diff --git a/compiler/src/validators.rs b/compiler/src/validators.rs new file mode 100644 index 00000000..ca600d0a --- /dev/null +++ b/compiler/src/validators.rs @@ -0,0 +1,180 @@ +use std::cell::RefCell; +use std::fmt::Display; +use std::ops::Range; + +use redscript::Str; +use strum::{Display, IntoStaticStr}; + +use crate::lexer::Token; + +pub type Span<'a> = nom_locate::LocatedSpan<&'a str, State<'a>>; + +pub trait ToRange { + fn to_range(&self) -> Range<usize>; +} + +impl<'a> ToRange for Span<'a> { + fn to_range(&self) -> Range<usize> { + let start = self.location_offset(); + let end = start + self.fragment().len(); + start..end + } +} + +impl<'a> ToRange for Token<'a> { + fn to_range(&self) -> Range<usize> { + match self { + Token::Trivia(r, _) => r.to_range(), + Token::Num(r, _) => r.to_range(), + Token::Str(r, _, _) => r.to_range(), + Token::StrIs(r, _, _) => r.to_range(), + Token::StrIe(r, _) => r.to_range(), + Token::StrIp(r, _) => r.to_range(), + Token::Null(r) => r.to_range(), + Token::Bool(r, _) => r.to_range(), + Token::Op(r, _) => r.to_range(), + Token::Ctrl(r, _) => r.to_range(), + Token::Ident(r) => r.to_range(), + Token::Kw(r, _) => r.to_range(), + } + } +} + +/// Error containing a text span and an error message to display. +#[derive(Debug)] +pub struct Diagnostic { + sl: usize, + sc: usize, + el: usize, + ec: usize, + file: Str, + text: String, + severity: Severity, + code: &'static str, + msg: String, +} + +impl Display for Diagnostic { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + writeln!(f, "{}[{}]: {}", self.severity.as_str(), self.code, self.msg)?; + writeln!( + f, + "--> {}:{}:{} to l{}:{}", + self.file, self.sl, self.sc, self.el, self.ec + )?; + for line in self.text.lines() { + writeln!(f, "| {}", line)?; + } + Ok(()) + } +} + +#[derive(Debug, Clone, Copy, Display, IntoStaticStr)] +#[strum(serialize_all = "snake_case")] +pub enum Severity { + Error, + Warn, + Info, + Hint, +} + +impl Severity { + pub fn as_str(&self) -> &'static str { + self.into() + } +} + +/// Carried around in the `LocatedSpan::extra` field in +/// between `nom` parsers. +#[derive(Clone, Debug)] +pub struct State<'a>(pub &'a RefCell<Vec<Diagnostic>>, pub Str); + +impl<'a> State<'a> { + /// Something not allowed by the rules of the language or other authority. + #[allow(dead_code)] + fn report_diagnostic(&self, error: Diagnostic) { + self.0.borrow_mut().push(error); + } +} + +pub trait ReportOrigin { + fn report(&self, diag: &'static DiagnosticTemplate, msg: String); +} + +impl<'a> ReportOrigin for Span<'a> { + fn report(&self, diag: &'static DiagnosticTemplate, msg: String) { + let bytes = std::str::from_utf8(&self.get_line_beginning()).unwrap_or_default(); + self.extra.report_diagnostic(Diagnostic { + sl: self.location_line() as usize, + sc: self.get_column(), + el: self.location_line() as usize, + ec: self.get_column(), + file: self.extra.1.clone(), + text: bytes.to_string(), + severity: diag.0, + code: diag.1, + msg, + }) + } +} + +impl<'a> ReportOrigin for Range<&Span<'a>> { + fn report(&self, diag: &'static DiagnosticTemplate, msg: String) { + let sl = self.start.location_line() as usize; + let el = self.end.location_line() as usize; + let bytes = self + .start + .get(self.start.location_offset()..self.end.location_offset()) + .unwrap_or_else(|| std::str::from_utf8(&self.start.get_line_beginning()).unwrap_or_default()); + self.start.extra.report_diagnostic(Diagnostic { + sl, + sc: self.start.get_column(), + el, + ec: self.end.get_column(), + file: self.start.extra.1.clone(), + text: bytes.to_string(), + severity: diag.0, + code: diag.1, + msg, + }) + } +} + +/// A diagnostic message in the format `Severity, Code, Message format`. +pub struct DiagnosticTemplate(Severity, &'static str, &'static str); + +#[macro_export] +macro_rules! diag_report { + ($origin:expr, $name:ident, $($arg:tt)*) =>{ + paste::paste! { + $origin.report(&$name, [<format_ $name>]!($($arg)*)) + } + }; +} + +macro_rules! diag { + ($name:ident, $severity:ident, $code:tt, $msg:tt) => { + paste::paste! { + #[allow(dead_code)] + pub const $name: DiagnosticTemplate = DiagnosticTemplate(Severity::$severity, $code, $msg); + + // Formats the arguments with the arguments with the msg of the diagnostic. + #[allow(dead_code)] + #[macro_export] + macro_rules! [<format_ $name>] { + ($$($$a:tt)*) => { + format!($msg, $$($a)*) + }; + } + } + }; +} + +diag!(ERR_INVALID_UTF8, Error, "ERR_UTF8", "Invalid UTF-8 sequence `{}`"); +diag!(ERR_EXPECT_HEX_DIGIT, Error, "ERR_HEX_DIGIT", "Invalid hex digit `{}`"); +diag!( + ERR_INVALID_ESCAPE, + Error, + "ERR_INVALID_ESCAPE", + "Invalid escape sequence `{}`" +); From e9e1ad0f8b9cf52acc52a2608fceaa2c5ba93668 Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Mon, 14 Nov 2022 14:07:56 +0100 Subject: [PATCH 02/21] Code smell --- compiler/src/lexer.rs | 72 ++++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs index b33064ab..e529275f 100644 --- a/compiler/src/lexer.rs +++ b/compiler/src/lexer.rs @@ -123,7 +123,7 @@ pub enum Kw { // Trivia // ----------------------------------------------------------------------------- -fn comment_multiline<'a>(i: Span<'a>) -> IResult<'a, Span<'a>> { +fn comment_multiline(i: Span) -> IResult<Span> { recognize(delimited( tag("/*"), recognize(many0_count(alt(( @@ -135,7 +135,7 @@ fn comment_multiline<'a>(i: Span<'a>) -> IResult<'a, Span<'a>> { ))(i) } -pub fn trivia<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Trivia)> { +pub fn trivia(i: Span) -> IResult<(Span, Trivia)> { alt(( map(comment_multiline, |s| (s, Trivia::Comment)), map(recognize(preceded(tag("//"), many0(not(line_ending)))), |s| { @@ -152,15 +152,15 @@ pub fn trivia<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Trivia)> { // Numeric // ----------------------------------------------------------------------------- -fn float_literal<'a>(i: Span<'a>) -> IResult<'a, Span<'a>> { +fn float_literal(i: Span) -> IResult<Span> { recognize(separated_pair(digit0, tag("."), digit1))(i) } -fn sciexp_literal<'a>(i: Span<'a>) -> IResult<'a, Span<'a>> { +fn sciexp_literal(i: Span) -> IResult<Span> { recognize(separated_pair(alt((float_literal, digit1)), one_of("eE"), digit1))(i) } -pub fn number<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Num)> { +pub fn number(i: Span) -> IResult<(Span, Num)> { alt(( map(sciexp_literal, |s| (s, Num::Float)), map(float_literal, |s| (s, Num::Float)), @@ -181,32 +181,30 @@ pub fn number<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Num)> { // The literal portions are parsed as a single token // The entire string may be prefixed with a type specifier, a char. -fn str_char_uni<'a>(is: Span<'a>) -> IResult<'a, Option<char>> { +fn str_char_uni(is: Span) -> IResult<Option<char>> { let parse_hex = &take_while_m_n(1, 6, char::is_hex_digit); let parse_delimited_hex = delimited(char('{'), parse_hex, char('}')); let (i, digits) = alt((preceded(char('u'), parse_delimited_hex), preceded(char('u'), parse_hex)))(is.clone())?; - match u32::from_str_radix(digits.fragment(), 16) { - Ok(hex) => match char::from_u32(hex) { - Some(c) => Ok((i, Some(c))), - None => { - diag_report!((&is..&i), ERR_INVALID_UTF8, hex); - Ok((i, None)) - } - }, - Err(_) => { - diag_report!((&is..&i), ERR_INVALID_UTF8, digits.fragment()); + if let Ok(hex) = u32::from_str_radix(digits.fragment(), 16) { + if let Some(c) = char::from_u32(hex) { + Ok((i, Some(c))) + } else { + diag_report!((&is..&i), ERR_INVALID_UTF8, hex); Ok((i, None)) } + } else { + diag_report!((&is..&i), ERR_INVALID_UTF8, digits.fragment()); + Ok((i, None)) } } -fn str_char_invalid<'a>(is: Span<'a>) -> IResult<'a, Option<char>> { +fn str_char_invalid(is: Span) -> IResult<Option<char>> { let (i, c) = preceded(char('\\'), anychar)(is.clone())?; diag_report!((&is..&i), ERR_INVALID_ESCAPE, c); Ok((i, None)) } -fn str_char<'a>(i: Span<'a>) -> IResult<'a, Option<char>> { +fn str_char(i: Span) -> IResult<Option<char>> { alt(( map(tag(r#"\\"#), |_| Some('\\')), map(tag(r#"\/"#), |_| Some('/')), @@ -216,12 +214,12 @@ fn str_char<'a>(i: Span<'a>) -> IResult<'a, Option<char>> { map(tag(r#"\r"#), |_| Some('\r')), map(tag(r#"\0"#), |_| Some('\0')), str_char_uni, - map(none_of("\\"), |c| Some(c)), + map(none_of("\\"), Some), str_char_invalid, ))(i) } -fn str_chars<'a>(mut i: Span<'a>) -> IResult<'a, Str> { +fn str_chars(mut i: Span) -> IResult<Str> { let mut s = String::default(); while let Ok((i_remaining, c)) = str_char(i.clone()) { if let Some(c) = c { @@ -233,7 +231,7 @@ fn str_chars<'a>(mut i: Span<'a>) -> IResult<'a, Str> { } // a parser accepting a function and returning the result of the function, by consuming the input -fn string<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Option<char>, Str)> { +fn string(i: Span) -> IResult<(Span, Option<char>, Str)> { let (i, (o, (p, s))) = consumed(pair( opt(satisfy(|c: char| c.is_alpha())), delimited(tag("\""), str_chars, tag("\"")), @@ -241,7 +239,7 @@ fn string<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Option<char>, Str)> { Ok((i, (o, p, s))) } -fn string_inter_start<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Option<char>, Str)> { +fn string_inter_start(i: Span) -> IResult<(Span, Option<char>, Str)> { let (i, (o, (p, s))) = consumed(pair( opt(satisfy(|c: char| c.is_alpha())), delimited(tag("\""), str_chars, tag(r#"\("#)), @@ -249,11 +247,11 @@ fn string_inter_start<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Option<char>, S Ok((i, (o, p, s))) } -fn string_inter_end<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Str)> { +fn string_inter_end(i: Span) -> IResult<(Span, Str)> { consumed(delimited(tag(r#")"#), str_chars, tag("\"")))(i) } -fn string_inter_part<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Str)> { +fn string_inter_part(i: Span) -> IResult<(Span, Str)> { consumed(delimited(tag(r#"\("#), str_chars, tag(r#")"#)))(i) } @@ -262,7 +260,7 @@ fn string_inter_part<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Str)> { // ----------------------------------------------------------------------------- // one of `+-*/!=<>&|~` -fn operator<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Op)> { +fn operator(i: Span) -> IResult<(Span, Op)> { alt(( map(tag("="), |s| (s, Op::Add)), map(tag("-"), |s| (s, Op::Sub)), @@ -283,7 +281,7 @@ fn operator<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Op)> { // ----------------------------------------------------------------------------- // one of `()[]{};,.` -fn control<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Ctrl)> { +fn control(i: Span) -> IResult<(Span, Ctrl)> { alt(( map(tag("("), |s| (s, Ctrl::LParen)), map(tag(")"), |s| (s, Ctrl::RParen)), @@ -303,7 +301,7 @@ fn control<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Ctrl)> { // ----------------------------------------------------------------------------- // An identifier is a sequence of letters, numbers, and underscores, starting with a letter or underscore -fn identifier<'a>(i: Span<'a>) -> IResult<'a, Span<'a>> { +fn identifier(i: Span) -> IResult<Span> { recognize(tuple((alpha1, take_while(|c: char| c.is_alphanumeric() || c == '_'))))(i) } @@ -313,7 +311,7 @@ fn identifier<'a>(i: Span<'a>) -> IResult<'a, Span<'a>> { // A reserved langauge keyword // one of module, class, struct, enum, func, let, new, if, else, switch, case, break, while, for, in, continue, return, try, catch, finally -fn keyword<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Kw)> { +fn keyword(i: Span) -> IResult<(Span, Kw)> { alt(( map(tag("module"), |s| (s, Kw::Module)), map(tag("class"), |s| (s, Kw::Class)), @@ -338,15 +336,15 @@ fn keyword<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, Kw)> { ))(i) } -fn null<'a>(i: Span<'a>) -> IResult<'a, Span<'a>> { +fn null(i: Span) -> IResult<Span> { tag("null")(i) } -fn boolean<'a>(i: Span<'a>) -> IResult<'a, (Span<'a>, bool)> { +fn boolean(i: Span) -> IResult<(Span, bool)> { alt((map(tag("true"), |s| (s, true)), map(tag("false"), |s| (s, false))))(i) } -pub fn token<'a>(i: Span<'a>) -> IResult<'a, Token> { +pub fn token(i: Span) -> IResult<Token> { alt(( map(trivia, |(s, t)| Token::Trivia(s, t)), map(number, |(s, n)| Token::Num(s, n)), @@ -363,22 +361,18 @@ pub fn token<'a>(i: Span<'a>) -> IResult<'a, Token> { ))(i) } -pub fn tokens<'a>(i: Span<'a>) -> IResult<'a, Vec<Token>> { +pub fn tokens(i: Span) -> IResult<Vec<Token>> { many0(token)(i) } -pub fn parse_file<'a>( - input: &'a str, - file: Str, - diag: &'a RefCell<Vec<Diagnostic>>, -) -> Result<Vec<Token<'a>>, NomError<'a>> { +pub fn parse_file<'a>(input: &'a str, file: Str, diag: &'a RefCell<Vec<Diagnostic>>) -> Result<Vec<Token>, NomError> { let input = Span::new_extra(input, State(diag, file)); let (_, tokens) = tokens(input).unwrap(); Ok(tokens) } -pub fn parse<'a>(input: &'a str, diag: &'a RefCell<Vec<Diagnostic>>) -> Result<Vec<Token<'a>>, NomError<'a>> { - parse_file(input, Default::default(), diag) +pub fn parse<'a>(input: &'a str, diag: &'a RefCell<Vec<Diagnostic>>) -> Result<Vec<Token>, NomError> { + parse_file(input, Str::default(), diag) } #[cfg(test)] From 8d29e2be5df8884afd5a71d4c0a4460bd502dbff Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Mon, 14 Nov 2022 16:39:12 +0100 Subject: [PATCH 03/21] Add docs to validator methods --- compiler/src/validators.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/compiler/src/validators.rs b/compiler/src/validators.rs index ca600d0a..a2b7d552 100644 --- a/compiler/src/validators.rs +++ b/compiler/src/validators.rs @@ -102,13 +102,17 @@ pub trait ReportOrigin { } impl<'a> ReportOrigin for Span<'a> { + // reports the entire fragment as faulty fn report(&self, diag: &'static DiagnosticTemplate, msg: String) { let bytes = std::str::from_utf8(&self.get_line_beginning()).unwrap_or_default(); + let line = self.location_line() as usize; + let sc = self.location_offset(); + let ec = sc + self.fragment().len(); self.extra.report_diagnostic(Diagnostic { - sl: self.location_line() as usize, - sc: self.get_column(), - el: self.location_line() as usize, - ec: self.get_column(), + sl: line, + sc, + el: line, + ec, file: self.extra.1.clone(), text: bytes.to_string(), severity: diag.0, @@ -119,6 +123,7 @@ impl<'a> ReportOrigin for Span<'a> { } impl<'a> ReportOrigin for Range<&Span<'a>> { + // reports the range between the fragments as faulty fn report(&self, diag: &'static DiagnosticTemplate, msg: String) { let sl = self.start.location_line() as usize; let el = self.end.location_line() as usize; @@ -178,3 +183,5 @@ diag!( "ERR_INVALID_ESCAPE", "Invalid escape sequence `{}`" ); +diag!(ERR_PARSE_INT, Error, "ERR_PARSE_INT", "Invalid integer `{}`, {}"); +diag!(ERR_PARSE_FLOAT, Error, "ERR_PARSE_FLOAT", "Invalid float `{}`, {}"); From 646905c12fda43457863b7fe9398fe5b2c5c9d8e Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Mon, 14 Nov 2022 16:39:29 +0100 Subject: [PATCH 04/21] remove cps dep --- Cargo.lock | 22 ---------------------- compiler/Cargo.toml | 1 - 2 files changed, 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a728be60..48545fb7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -219,18 +219,6 @@ dependencies = [ "libc", ] -[[package]] -name = "cps" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb470cedd77f02572699b885e6ef5baacd1adc78017427611dd045e9c1c4ca7" -dependencies = [ - "litrs", - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "crc32fast" version = "1.3.2" @@ -711,15 +699,6 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb68f22743a3fb35785f1e7f844ca5a3de2dde5bd0c0ef5b372065814699b121" -[[package]] -name = "litrs" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9275e0933cf8bb20f008924c0cb07a0692fe54d8064996520bf998de9eb79aa" -dependencies = [ - "proc-macro2", -] - [[package]] name = "log" version = "0.4.17" @@ -1014,7 +993,6 @@ name = "redscript-compiler" version = "0.5.9" dependencies = [ "ahash", - "cps", "enum-as-inner", "flexstr", "hashbrown 0.13.1", diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index eef6c6da..1a7788f5 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -27,5 +27,4 @@ simple-interner = { version = "0.3", features = ["hashbrown"] } peg = "0.8" nom = "7.1" nom_locate = "4.0" -cps = "0.2.1" paste = "1" From 62ea951bbfd172d0c72660de9c6d7551910db73b Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Mon, 14 Nov 2022 16:39:49 +0100 Subject: [PATCH 05/21] Add validator docs --- compiler/src/validators.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/compiler/src/validators.rs b/compiler/src/validators.rs index a2b7d552..05f37384 100644 --- a/compiler/src/validators.rs +++ b/compiler/src/validators.rs @@ -102,7 +102,8 @@ pub trait ReportOrigin { } impl<'a> ReportOrigin for Span<'a> { - // reports the entire fragment as faulty + /// Reports the entire fragment as faulty. + /// Does not terminate the compilation. fn report(&self, diag: &'static DiagnosticTemplate, msg: String) { let bytes = std::str::from_utf8(&self.get_line_beginning()).unwrap_or_default(); let line = self.location_line() as usize; @@ -123,7 +124,9 @@ impl<'a> ReportOrigin for Span<'a> { } impl<'a> ReportOrigin for Range<&Span<'a>> { - // reports the range between the fragments as faulty + /// reports the range from the start of the first span to the end of the last span. + /// The first span must contain the last span. + /// This is archived, by cloning the first span, before consuming. fn report(&self, diag: &'static DiagnosticTemplate, msg: String) { let sl = self.start.location_line() as usize; let el = self.end.location_line() as usize; From 943a582e97d99bee3eb68efb2f81cb56df8dc758 Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Mon, 14 Nov 2022 16:40:07 +0100 Subject: [PATCH 06/21] Soft fail for integers --- compiler/src/lexer.rs | 106 +++++++++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 38 deletions(-) diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs index e529275f..c9805539 100644 --- a/compiler/src/lexer.rs +++ b/compiler/src/lexer.rs @@ -3,7 +3,7 @@ use std::cell::RefCell; use nom::branch::alt; use nom::bytes::complete::{tag, take_until, take_while, take_while_m_n}; use nom::character::complete::{ - alpha1, anychar, char, digit0, digit1, hex_digit0, line_ending, none_of, oct_digit0, one_of, satisfy + alpha1, anychar, char, digit0, digit1, hex_digit0, line_ending, multispace1, none_of, oct_digit0, one_of, satisfy }; use nom::combinator::{consumed, map, not, opt, recognize}; use nom::error::ParseError; @@ -20,7 +20,7 @@ pub trait ParseErr<'a>: ParseError<Span<'a>> {} pub type IResult<'a, O> = nom::IResult<Span<'a>, O>; pub type NomError<'a> = nom::Err<Span<'a>>; -#[derive(Debug, Clone, PartialEq, Eq, Hash, Display)] +#[derive(Debug, Clone, PartialEq, Display)] pub enum Token<'a> { Trivia(Span<'a>, Trivia), /// arbitrary numeric literal @@ -41,7 +41,7 @@ pub enum Token<'a> { Bool(Span<'a>, bool), /// one of `+-*/!=<>&|~` Op(Span<'a>, Op), - /// one of `()[]{}:;,.` + /// one of `()[]{}:;,.?` and -> Ctrl(Span<'a>, Ctrl), Ident(Span<'a>), /// Language keywords @@ -49,23 +49,20 @@ pub enum Token<'a> { Kw(Span<'a>, Kw), } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display)] +#[derive(Debug, Clone, Copy, PartialEq, Display)] pub enum Trivia { Comment, Whitespace, LineEnd, } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, IntoStaticStr)] +#[derive(Debug, Clone, Copy, PartialEq, Display, IntoStaticStr)] pub enum Num { - Float, - Int, - Hex, - Oct, - Bin, + Float(f64), + Int(u64), } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, IntoStaticStr)] +#[derive(Debug, Clone, Copy, PartialEq, Display, IntoStaticStr)] pub enum Op { Add, Sub, @@ -80,7 +77,7 @@ pub enum Op { Tilde, } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, IntoStaticStr)] +#[derive(Debug, Clone, Copy, PartialEq, Display, IntoStaticStr)] pub enum Ctrl { LParen, RParen, @@ -93,9 +90,11 @@ pub enum Ctrl { Comma, Period, Dot, + Quest, + LArrow, } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Display, IntoStaticStr)] +#[derive(Debug, Clone, Copy, PartialEq, Display, IntoStaticStr)] pub enum Kw { Module, Class, @@ -135,16 +134,14 @@ fn comment_multiline(i: Span) -> IResult<Span> { ))(i) } -pub fn trivia(i: Span) -> IResult<(Span, Trivia)> { +pub fn trivia(i: Span) -> IResult<(Trivia, Span)> { alt(( - map(comment_multiline, |s| (s, Trivia::Comment)), + map(comment_multiline, |s| (Trivia::Comment, s)), map(recognize(preceded(tag("//"), many0(not(line_ending)))), |s| { - (s, Trivia::Comment) - }), - map(recognize(line_ending), |s| (s, Trivia::LineEnd)), - map(recognize(take_while(|c: char| c.is_whitespace())), |s| { - (s, Trivia::Whitespace) + (Trivia::Comment, s) }), + map(recognize(line_ending), |s| (Trivia::LineEnd, s)), + map(recognize(multispace1), |s| (Trivia::Whitespace, s)), ))(i) } @@ -157,19 +154,43 @@ fn float_literal(i: Span) -> IResult<Span> { } fn sciexp_literal(i: Span) -> IResult<Span> { - recognize(separated_pair(alt((float_literal, digit1)), one_of("eE"), digit1))(i) + recognize(separated_pair( + alt((float_literal, digit1)), + one_of("eE"), + pair(one_of("-+"), digit1), + ))(i) +} + +fn parse_float(i: &Span) -> f64 { + match i.fragment().parse() { + Ok(value) => value, + Err(error) => { + diag_report!(i, ERR_PARSE_FLOAT, i.fragment(), error); + 0.0 + } + } } -pub fn number(i: Span) -> IResult<(Span, Num)> { +fn parse_int(i: &Span, radix: u32) -> u64 { + match u64::from_str_radix(i.fragment(), radix) { + Ok(value) => value, + Err(error) => { + diag_report!(i, ERR_PARSE_INT, i.fragment(), error); + 0 + } + } +} + +pub fn number(i: Span) -> IResult<(Num, Span)> { alt(( - map(sciexp_literal, |s| (s, Num::Float)), - map(float_literal, |s| (s, Num::Float)), - map(digit1, |s| (s, Num::Int)), - map(preceded(tag("0x"), hex_digit0), |s| (s, Num::Hex)), - map(preceded(tag("0o"), oct_digit0), |s| (s, Num::Oct)), + map(preceded(tag("0x"), hex_digit0), |s| (Num::Int(parse_int(&s, 16)), s)), + map(preceded(tag("0o"), oct_digit0), |s| (Num::Int(parse_int(&s, 8)), s)), map(preceded(tag("0b"), take_while(|c: char| c == '0' || c == '1')), |s| { - (s, Num::Bin) + (Num::Int(parse_int(&s, 2)), s) }), + map(sciexp_literal, |s| (Num::Float(parse_float(&s)), s)), + map(float_literal, |s| (Num::Float(parse_float(&s)), s)), + map(digit1, |s| (Num::Int(parse_int(&s, 10)), s)), ))(i) } @@ -233,7 +254,7 @@ fn str_chars(mut i: Span) -> IResult<Str> { // a parser accepting a function and returning the result of the function, by consuming the input fn string(i: Span) -> IResult<(Span, Option<char>, Str)> { let (i, (o, (p, s))) = consumed(pair( - opt(satisfy(|c: char| c.is_alpha())), + opt(satisfy(nom::AsChar::is_alpha)), delimited(tag("\""), str_chars, tag("\"")), ))(i)?; Ok((i, (o, p, s))) @@ -241,7 +262,7 @@ fn string(i: Span) -> IResult<(Span, Option<char>, Str)> { fn string_inter_start(i: Span) -> IResult<(Span, Option<char>, Str)> { let (i, (o, (p, s))) = consumed(pair( - opt(satisfy(|c: char| c.is_alpha())), + opt(satisfy(nom::AsChar::is_alpha)), delimited(tag("\""), str_chars, tag(r#"\("#)), ))(i)?; Ok((i, (o, p, s))) @@ -279,7 +300,7 @@ fn operator(i: Span) -> IResult<(Span, Op)> { // ----------------------------------------------------------------------------- // Control character // ----------------------------------------------------------------------------- -// one of `()[]{};,.` +// one of `()[]{}:;,.?` and -> fn control(i: Span) -> IResult<(Span, Ctrl)> { alt(( @@ -293,6 +314,8 @@ fn control(i: Span) -> IResult<(Span, Ctrl)> { map(tag(";"), |s| (s, Ctrl::Semi)), map(tag(","), |s| (s, Ctrl::Comma)), map(tag("."), |s| (s, Ctrl::Dot)), + map(tag("?"), |s| (s, Ctrl::Quest)), + map(tag("->"), |s| (s, Ctrl::LArrow)), ))(i) } @@ -346,8 +369,8 @@ fn boolean(i: Span) -> IResult<(Span, bool)> { pub fn token(i: Span) -> IResult<Token> { alt(( - map(trivia, |(s, t)| Token::Trivia(s, t)), - map(number, |(s, n)| Token::Num(s, n)), + map(trivia, |(t, s)| Token::Trivia(s, t)), + map(number, |(n, s)| Token::Num(s, n)), map(string, |(s, t, n)| Token::Str(s, t, n)), map(string_inter_start, |(s, t, n)| Token::StrIs(s, t, n)), map(string_inter_end, |(s, n)| Token::StrIe(s, n)), @@ -376,15 +399,22 @@ pub fn parse<'a>(input: &'a str, diag: &'a RefCell<Vec<Diagnostic>>) -> Result<V } #[cfg(test)] +#[allow(unused_imports, dead_code)] mod test { - #[allow(unused_imports)] use super::*; #[test] fn parse_ternary_op() { - let diag = RefCell::new(Vec::new()); - let expr = parse("3.0 ? 5.0 : 5 + 4", &diag).unwrap(); - let text = format!("{:?}", expr); - println!("{}", text); + let diag = RefCell::new(vec![]); + let tokens = parse("3.0 ? 5.0 : 5 + 4", &diag).unwrap(); + println!("{:?}", tokens); + } + + fn parse<'a>(input: &'a str, diag: &'a RefCell<Vec<Diagnostic>>) -> Result<Vec<Token<'a>>, NomError<'a>> { + let input = Span::new_extra(input, State(diag, Str::default())); + let (_, tokens) = tokens(input).unwrap(); + let text = format!("{:?}", tokens); + assert!(!text.is_empty()); + Ok(tokens) } } From a1e13ccf0d11567cb89f8e0a346935d2a85d023f Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Mon, 14 Nov 2022 17:01:57 +0100 Subject: [PATCH 07/21] Explain macros --- compiler/src/validators.rs | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/compiler/src/validators.rs b/compiler/src/validators.rs index 05f37384..f434c378 100644 --- a/compiler/src/validators.rs +++ b/compiler/src/validators.rs @@ -151,6 +151,11 @@ impl<'a> ReportOrigin for Range<&Span<'a>> { /// A diagnostic message in the format `Severity, Code, Message format`. pub struct DiagnosticTemplate(Severity, &'static str, &'static str); +/// Reports a diagnostic message. +/// Requires the crate level imported, because format macros are defined there. +/// ```ignore +/// use crate::*; +/// ``` #[macro_export] macro_rules! diag_report { ($origin:expr, $name:ident, $($arg:tt)*) =>{ @@ -160,6 +165,19 @@ macro_rules! diag_report { }; } +/// A diagnostic message in the format `Severity, Code, Message format`. +/// The code is a key constructed from the initials and a id. +/// The message format is a `format!` string. +/// +/// ## Error Code +/// - The first char represents the severity: [E]rror, [W]arn, [I]nfo, [H]int. +/// - The second char represents the source: [L]lexical, S[Y}ntax, [S]emantic, [T]ype, etc. +/// - The third char represents a category. This can really be whatever. It is used to group similar errors. +/// +/// ## Example +/// ```ignore +/// diag!(ERR_INVALID_UTF8, Error, "ELS0001", "Invalid UTF-8 sequence `{}`"); +/// ``` macro_rules! diag { ($name:ident, $severity:ident, $code:tt, $msg:tt) => { paste::paste! { @@ -178,13 +196,8 @@ macro_rules! diag { }; } -diag!(ERR_INVALID_UTF8, Error, "ERR_UTF8", "Invalid UTF-8 sequence `{}`"); -diag!(ERR_EXPECT_HEX_DIGIT, Error, "ERR_HEX_DIGIT", "Invalid hex digit `{}`"); -diag!( - ERR_INVALID_ESCAPE, - Error, - "ERR_INVALID_ESCAPE", - "Invalid escape sequence `{}`" -); -diag!(ERR_PARSE_INT, Error, "ERR_PARSE_INT", "Invalid integer `{}`, {}"); -diag!(ERR_PARSE_FLOAT, Error, "ERR_PARSE_FLOAT", "Invalid float `{}`, {}"); +diag!(ERR_INVALID_UTF8, Error, "ELS0001", "Invalid UTF-8 sequence `{}`"); +diag!(ERR_EXPECT_HEX_DIGIT, Error, "ELS0002", "Invalid hex digit `{}`"); +diag!(ERR_INVALID_ESCAPE, Error, "ELS0003", "Invalid escape sequence `{}`"); +diag!(ERR_PARSE_INT, Error, "ELN0001", "Invalid integer `{}`, {}"); +diag!(ERR_PARSE_FLOAT, Error, "ELN0002", "Invalid float `{}`, {}"); From 4424232fe9b2e2fc4b030cbc69d470d10956071e Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Mon, 14 Nov 2022 21:11:50 +0100 Subject: [PATCH 08/21] Recusive descend for multiline comment --- compiler/src/lexer.rs | 25 ++++++++++++++--------- compiler/src/lib.rs | 1 + compiler/src/nom.rs | 46 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 9 deletions(-) create mode 100644 compiler/src/nom.rs diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs index c9805539..e3b83d71 100644 --- a/compiler/src/lexer.rs +++ b/compiler/src/lexer.rs @@ -13,6 +13,7 @@ use nom::AsChar; use redscript::Str; use strum::{Display, IntoStaticStr}; +use crate::nom::many_till_balanced1; use crate::validators::*; use crate::{diag_report, *}; @@ -123,13 +124,9 @@ pub enum Kw { // ----------------------------------------------------------------------------- fn comment_multiline(i: Span) -> IResult<Span> { - recognize(delimited( + recognize(many_till_balanced1( tag("/*"), - recognize(many0_count(alt(( - map(comment_multiline, |s| s), - map(take_until("*/"), |s| s), - map(take_while(|c| c != '*' && c != '/'), |s| s), - )))), + recognize(many0(not(alt((tag("*/"), tag("/*")))))), tag("*/"), ))(i) } @@ -403,6 +400,17 @@ pub fn parse<'a>(input: &'a str, diag: &'a RefCell<Vec<Diagnostic>>) -> Result<V mod test { use super::*; + #[test] + fn parse_invalid() { + let mut diag = RefCell::new(Vec::new()); + let input = "02439853427592345284395923845320459823457324953247640359104519845834634538"; + let result = parse(input, &mut diag); + assert!(result.is_ok()); + for diag in diag.borrow().iter() { + println!("{}", diag); + } + } + #[test] fn parse_ternary_op() { let diag = RefCell::new(vec![]); @@ -411,10 +419,9 @@ mod test { } fn parse<'a>(input: &'a str, diag: &'a RefCell<Vec<Diagnostic>>) -> Result<Vec<Token<'a>>, NomError<'a>> { - let input = Span::new_extra(input, State(diag, Str::default())); + let input = Span::new_extra(input, State(diag, "test.reds".into())); let (_, tokens) = tokens(input).unwrap(); - let text = format!("{:?}", tokens); - assert!(!text.is_empty()); + println!("{:?}", tokens); Ok(tokens) } } diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 50d63ea9..0b836ec9 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -8,6 +8,7 @@ pub mod codegen; pub mod compiler; pub mod error; pub mod lexer; +pub mod nom; #[allow(clippy::redundant_closure_call)] pub mod parser; mod scoped_map; diff --git a/compiler/src/nom.rs b/compiler/src/nom.rs new file mode 100644 index 00000000..09936754 --- /dev/null +++ b/compiler/src/nom.rs @@ -0,0 +1,46 @@ +use std::ops::RangeTo; + +use nom::error::ParseError; +use nom::{IResult, Offset, Slice}; + +/// Matches while open, close, or inner. Returns the matched range. +/// Input is matched, recursively descending, until open and close are balanced. +pub fn many_till_balanced1<I, O1, O2, E>( + mut open: impl FnMut(I) -> IResult<I, O1, E>, + mut inner: impl FnMut(I) -> IResult<I, I, E>, + mut close: impl FnMut(I) -> IResult<I, O2, E>, +) -> impl FnMut(I) -> IResult<I, I, E> +where + I: Clone + Offset + Slice<RangeTo<usize>>, + E: ParseError<I>, +{ + move |start: I| { + let mut open_count = 0usize; + let mut close_count = 0usize; + let mut rem = start.clone(); + let mut end = start.clone(); + loop { + if let Ok((rem2, _)) = open(rem.clone()) { + open_count += 1; + rem = rem2; + } else if let Ok((rem2, _)) = close(rem.clone()) { + close_count += 1; + rem = rem2; + } else if let Ok((rem2, _)) = inner(rem.clone()) { + rem = rem2; + } else { + break; + } + if open_count == close_count { + end = rem.clone(); + break; + } + } + let len = end.offset(&start); + if len == 0 { + Err(nom::Err::Error(E::from_error_kind(start, nom::error::ErrorKind::Many1))) + } else { + Ok((rem, start.slice(..len))) + } + } +} From 2aff6ec7aaa3e7fdf3aa7947762653a778a3f696 Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Mon, 14 Nov 2022 21:17:37 +0100 Subject: [PATCH 09/21] Prepare lexer for parser impl --- compiler/src/{nom.rs => comb.rs} | 0 compiler/src/lexer.rs | 128 +++++-------------------------- compiler/src/lib.rs | 3 +- compiler/src/parser2.rs | 1 + compiler/src/validators.rs | 21 ----- 5 files changed, 22 insertions(+), 131 deletions(-) rename compiler/src/{nom.rs => comb.rs} (100%) create mode 100644 compiler/src/parser2.rs diff --git a/compiler/src/nom.rs b/compiler/src/comb.rs similarity index 100% rename from compiler/src/nom.rs rename to compiler/src/comb.rs diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs index e3b83d71..bc81f5af 100644 --- a/compiler/src/lexer.rs +++ b/compiler/src/lexer.rs @@ -1,19 +1,17 @@ -use std::cell::RefCell; - use nom::branch::alt; -use nom::bytes::complete::{tag, take_until, take_while, take_while_m_n}; +use nom::bytes::complete::{tag, take_while, take_while_m_n}; use nom::character::complete::{ alpha1, anychar, char, digit0, digit1, hex_digit0, line_ending, multispace1, none_of, oct_digit0, one_of, satisfy }; use nom::combinator::{consumed, map, not, opt, recognize}; use nom::error::ParseError; -use nom::multi::{many0, many0_count}; +use nom::multi::many0; use nom::sequence::{delimited, pair, preceded, separated_pair, tuple}; use nom::AsChar; use redscript::Str; use strum::{Display, IntoStaticStr}; -use crate::nom::many_till_balanced1; +use crate::comb::many_till_balanced1; use crate::validators::*; use crate::{diag_report, *}; @@ -21,35 +19,6 @@ pub trait ParseErr<'a>: ParseError<Span<'a>> {} pub type IResult<'a, O> = nom::IResult<Span<'a>, O>; pub type NomError<'a> = nom::Err<Span<'a>>; -#[derive(Debug, Clone, PartialEq, Display)] -pub enum Token<'a> { - Trivia(Span<'a>, Trivia), - /// arbitrary numeric literal - Num(Span<'a>, Num), - /// a string literal portion - /// the initial portion can have a type prefix - /// a interpolated string needs to be parsed recursively: literal + start + token... + end + literal - Str(Span<'a>, Option<char>, Str), - /// Start of a string interpolation - StrIs(Span<'a>, Option<char>, Str), - /// End of a string interpolation - StrIe(Span<'a>, Str), - /// Inbetween part of a string interpolation - StrIp(Span<'a>, Str), - /// null - Null(Span<'a>), - /// one of true | false - Bool(Span<'a>, bool), - /// one of `+-*/!=<>&|~` - Op(Span<'a>, Op), - /// one of `()[]{}:;,.?` and -> - Ctrl(Span<'a>, Ctrl), - Ident(Span<'a>), - /// Language keywords - /// one of module, class, struct, enum, func, let, new, if, else, switch, case, break, while, for, in, continue, return, try, catch, finally - Kw(Span<'a>, Kw), -} - #[derive(Debug, Clone, Copy, PartialEq, Display)] pub enum Trivia { Comment, @@ -249,28 +218,31 @@ fn str_chars(mut i: Span) -> IResult<Str> { } // a parser accepting a function and returning the result of the function, by consuming the input -fn string(i: Span) -> IResult<(Span, Option<char>, Str)> { +pub fn string(i: Span) -> IResult<(Span, Option<char>, Str)> { let (i, (o, (p, s))) = consumed(pair( - opt(satisfy(nom::AsChar::is_alpha)), + opt(satisfy(AsChar::is_alpha)), delimited(tag("\""), str_chars, tag("\"")), ))(i)?; Ok((i, (o, p, s))) } -fn string_inter_start(i: Span) -> IResult<(Span, Option<char>, Str)> { +// matches a string literal until the first interpolation +pub fn string_inter_start(i: Span) -> IResult<(Span, Option<char>, Str)> { let (i, (o, (p, s))) = consumed(pair( - opt(satisfy(nom::AsChar::is_alpha)), + opt(satisfy(AsChar::is_alpha)), delimited(tag("\""), str_chars, tag(r#"\("#)), ))(i)?; Ok((i, (o, p, s))) } -fn string_inter_end(i: Span) -> IResult<(Span, Str)> { +// matches a string literal from the end of the first interpolation until the end of the string +pub fn string_inter_end(i: Span) -> IResult<(Span, Str)> { consumed(delimited(tag(r#")"#), str_chars, tag("\"")))(i) } -fn string_inter_part(i: Span) -> IResult<(Span, Str)> { - consumed(delimited(tag(r#"\("#), str_chars, tag(r#")"#)))(i) +// matches a string literal from the end of the first interpolation until the start of the next interpolation +pub fn string_inter_part(i: Span) -> IResult<(Span, Str)> { + consumed(delimited(tag(r#")"#), str_chars, tag(r#"\("#)))(i) } // ----------------------------------------------------------------------------- @@ -278,7 +250,7 @@ fn string_inter_part(i: Span) -> IResult<(Span, Str)> { // ----------------------------------------------------------------------------- // one of `+-*/!=<>&|~` -fn operator(i: Span) -> IResult<(Span, Op)> { +pub fn operator(i: Span) -> IResult<(Span, Op)> { alt(( map(tag("="), |s| (s, Op::Add)), map(tag("-"), |s| (s, Op::Sub)), @@ -299,7 +271,7 @@ fn operator(i: Span) -> IResult<(Span, Op)> { // ----------------------------------------------------------------------------- // one of `()[]{}:;,.?` and -> -fn control(i: Span) -> IResult<(Span, Ctrl)> { +pub fn control(i: Span) -> IResult<(Span, Ctrl)> { alt(( map(tag("("), |s| (s, Ctrl::LParen)), map(tag(")"), |s| (s, Ctrl::RParen)), @@ -321,7 +293,7 @@ fn control(i: Span) -> IResult<(Span, Ctrl)> { // ----------------------------------------------------------------------------- // An identifier is a sequence of letters, numbers, and underscores, starting with a letter or underscore -fn identifier(i: Span) -> IResult<Span> { +pub fn identifier(i: Span) -> IResult<Span> { recognize(tuple((alpha1, take_while(|c: char| c.is_alphanumeric() || c == '_'))))(i) } @@ -331,7 +303,7 @@ fn identifier(i: Span) -> IResult<Span> { // A reserved langauge keyword // one of module, class, struct, enum, func, let, new, if, else, switch, case, break, while, for, in, continue, return, try, catch, finally -fn keyword(i: Span) -> IResult<(Span, Kw)> { +pub fn keyword(i: Span) -> IResult<(Span, Kw)> { alt(( map(tag("module"), |s| (s, Kw::Module)), map(tag("class"), |s| (s, Kw::Class)), @@ -356,72 +328,10 @@ fn keyword(i: Span) -> IResult<(Span, Kw)> { ))(i) } -fn null(i: Span) -> IResult<Span> { +pub fn null(i: Span) -> IResult<Span> { tag("null")(i) } -fn boolean(i: Span) -> IResult<(Span, bool)> { +pub fn boolean(i: Span) -> IResult<(Span, bool)> { alt((map(tag("true"), |s| (s, true)), map(tag("false"), |s| (s, false))))(i) } - -pub fn token(i: Span) -> IResult<Token> { - alt(( - map(trivia, |(t, s)| Token::Trivia(s, t)), - map(number, |(n, s)| Token::Num(s, n)), - map(string, |(s, t, n)| Token::Str(s, t, n)), - map(string_inter_start, |(s, t, n)| Token::StrIs(s, t, n)), - map(string_inter_end, |(s, n)| Token::StrIe(s, n)), - map(string_inter_part, |(s, n)| Token::StrIp(s, n)), - map(null, |s| Token::Null(s)), - map(boolean, |(s, b)| Token::Bool(s, b)), - map(operator, |(s, o)| Token::Op(s, o)), - map(control, |(s, c)| Token::Ctrl(s, c)), - map(identifier, |s| Token::Ident(s)), - map(keyword, |(s, k)| Token::Kw(s, k)), - ))(i) -} - -pub fn tokens(i: Span) -> IResult<Vec<Token>> { - many0(token)(i) -} - -pub fn parse_file<'a>(input: &'a str, file: Str, diag: &'a RefCell<Vec<Diagnostic>>) -> Result<Vec<Token>, NomError> { - let input = Span::new_extra(input, State(diag, file)); - let (_, tokens) = tokens(input).unwrap(); - Ok(tokens) -} - -pub fn parse<'a>(input: &'a str, diag: &'a RefCell<Vec<Diagnostic>>) -> Result<Vec<Token>, NomError> { - parse_file(input, Str::default(), diag) -} - -#[cfg(test)] -#[allow(unused_imports, dead_code)] -mod test { - use super::*; - - #[test] - fn parse_invalid() { - let mut diag = RefCell::new(Vec::new()); - let input = "02439853427592345284395923845320459823457324953247640359104519845834634538"; - let result = parse(input, &mut diag); - assert!(result.is_ok()); - for diag in diag.borrow().iter() { - println!("{}", diag); - } - } - - #[test] - fn parse_ternary_op() { - let diag = RefCell::new(vec![]); - let tokens = parse("3.0 ? 5.0 : 5 + 4", &diag).unwrap(); - println!("{:?}", tokens); - } - - fn parse<'a>(input: &'a str, diag: &'a RefCell<Vec<Diagnostic>>) -> Result<Vec<Token<'a>>, NomError<'a>> { - let input = Span::new_extra(input, State(diag, "test.reds".into())); - let (_, tokens) = tokens(input).unwrap(); - println!("{:?}", tokens); - Ok(tokens) - } -} diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index 0b836ec9..e53bb119 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -5,12 +5,13 @@ use simple_interner::Interner; pub mod autobox; pub mod codegen; +pub mod comb; pub mod compiler; pub mod error; pub mod lexer; -pub mod nom; #[allow(clippy::redundant_closure_call)] pub mod parser; +pub mod parser2; mod scoped_map; pub mod source_map; pub mod type_repo; diff --git a/compiler/src/parser2.rs b/compiler/src/parser2.rs new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/compiler/src/parser2.rs @@ -0,0 +1 @@ + diff --git a/compiler/src/validators.rs b/compiler/src/validators.rs index f434c378..4181465c 100644 --- a/compiler/src/validators.rs +++ b/compiler/src/validators.rs @@ -5,8 +5,6 @@ use std::ops::Range; use redscript::Str; use strum::{Display, IntoStaticStr}; -use crate::lexer::Token; - pub type Span<'a> = nom_locate::LocatedSpan<&'a str, State<'a>>; pub trait ToRange { @@ -21,25 +19,6 @@ impl<'a> ToRange for Span<'a> { } } -impl<'a> ToRange for Token<'a> { - fn to_range(&self) -> Range<usize> { - match self { - Token::Trivia(r, _) => r.to_range(), - Token::Num(r, _) => r.to_range(), - Token::Str(r, _, _) => r.to_range(), - Token::StrIs(r, _, _) => r.to_range(), - Token::StrIe(r, _) => r.to_range(), - Token::StrIp(r, _) => r.to_range(), - Token::Null(r) => r.to_range(), - Token::Bool(r, _) => r.to_range(), - Token::Op(r, _) => r.to_range(), - Token::Ctrl(r, _) => r.to_range(), - Token::Ident(r) => r.to_range(), - Token::Kw(r, _) => r.to_range(), - } - } -} - /// Error containing a text span and an error message to display. #[derive(Debug)] pub struct Diagnostic { From 227b7ae611a53623a36f7fc3d543f6bfe9546a89 Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Tue, 15 Nov 2022 19:11:12 +0100 Subject: [PATCH 10/21] Impl slab --- core/src/lib.rs | 1 + core/src/slab.rs | 238 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 239 insertions(+) create mode 100644 core/src/slab.rs diff --git a/core/src/lib.rs b/core/src/lib.rs index ab6ecfd0..f5e0d84f 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -5,6 +5,7 @@ pub mod decode; pub mod definition; pub mod encode; pub mod io; +pub mod slab; #[cfg(not(feature = "arc"))] pub type Str = flexstr::LocalStr; diff --git a/core/src/slab.rs b/core/src/slab.rs new file mode 100644 index 00000000..8cb664c3 --- /dev/null +++ b/core/src/slab.rs @@ -0,0 +1,238 @@ +use std::any::TypeId; +use std::ffi::c_void; +use std::ops::Range; + +use hashbrown::HashMap; + +pub trait Shatter: Sized { + // splits a cut into two adjacent cuts + fn split_at(self, index: usize) -> (Self, Self); + + // splits a cut into three adjacent cuts, the middle one being of the given range + fn split_range(self, range: Range<usize>) -> (Self, Self, Self) { + let (left, right) = self.split_at(range.start); + let (middle, right) = right.split_at(range.end - range.start); + (left, middle, right) + } + + // fuzes two adjacent cuts into one + fn fuze(self, other: Self) -> Option<Self>; +} + +impl Shatter for Range<usize> { + fn split_at(self, index: usize) -> (Self, Self) { + let start = self.start; + let end = self.end; + (start..index, index..end) + } + + fn fuze(self, other: Self) -> Option<Self> { + if self.end == other.start { + Some(self.start..other.end) + } else { + None + } + } +} +#[derive(Debug)] +pub struct Cut<'a, T>(Range<usize>, &'a Slab<T>); + +impl<'a, T> Cut<'a, T> { + pub fn len(&self) -> usize { + self.0.end - self.0.start + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn as_slice(&self) -> &'a [T] { + unsafe { self.1.slice_unchecked(self.0.clone()) } + } + + pub fn as_slice_mut(&mut self) -> &'a mut [T] { + unsafe { self.1.slice_unchecked_mut(self.0.clone()) } + } + + pub fn slab(&self) -> &'a Slab<T> { + self.1 + } + + pub fn slab_mut(&mut self) -> &'a mut Slab<T> { + unsafe { self.1.as_mut() } + } +} + +impl<'a, T> AsRef<Range<usize>> for Cut<'a, T> { + fn as_ref(&self) -> &Range<usize> { + &self.0 + } +} + +impl<'a, T> From<Cut<'a, T>> for Range<usize> { + fn from(cut: Cut<'a, T>) -> Self { + cut.0 + } +} + +impl<'a, T> From<Cut<'a, T>> for &'a [T] { + fn from(value: Cut<'a, T>) -> Self { + value.as_slice() + } +} + +impl<'a, T> Shatter for Cut<'a, T> { + fn split_at(self, at: usize) -> (Self, Self) { + let (left, right) = self.0.split_at(at); + (Cut(left, self.1), Cut(right, self.1)) + } + + fn fuze(self, other: Self) -> Option<Self> { + if self.0.end == other.0.start { + Some(Cut(self.0.start..other.0.end, self.1)) + } else { + None + } + } +} + +#[derive(Debug)] +pub struct Cut1<'a, T>(usize, &'a Slab<T>); + +impl<'a, T> Cut1<'a, T> { + pub fn as_t(&self) -> &'a T { + unsafe { self.1.get_unchecked(self.0) } + } + + pub fn as_t_mut(&mut self) -> &'a mut T { + unsafe { self.1.get_unchecked_mut(self.0) } + } + + pub fn slab(&self) -> &'a Slab<T> { + self.1 + } + + pub fn slab_mut(&mut self) -> &'a mut Slab<T> { + unsafe { self.1.as_mut() } + } +} + +impl<'a, T> From<Cut1<'a, T>> for Cut<'a, T> { + fn from(value: Cut1<'a, T>) -> Self { + Cut(value.0..value.0 + 1, value.1) + } +} + +#[derive(Debug)] +pub struct Slab<T> { + data: Vec<T>, + start: usize, +} + +impl<T> Default for Slab<T> { + fn default() -> Self { + Self { + data: Default::default(), + start: Default::default(), + } + } +} + +#[allow(clippy::mut_from_ref, clippy::cast_ref_to_mut)] +impl<T> Slab<T> { + #[inline] + unsafe fn slice_unchecked(&self, range: Range<usize>) -> &[T] { + std::slice::from_raw_parts(self.data.as_ptr().add(range.start), range.end - range.start) + } + #[inline] + unsafe fn slice_unchecked_mut(&self, range: Range<usize>) -> &mut [T] { + std::slice::from_raw_parts_mut(self.data.as_ptr().cast_mut().add(range.start), range.end - range.start) + } + + #[inline] + unsafe fn get_unchecked(&self, index: usize) -> &T { + self.data.get_unchecked(index) + } + #[inline] + unsafe fn get_unchecked_mut(&self, index: usize) -> &mut T { + self.data.as_ptr().cast_mut().add(index).as_mut().unwrap() + } + + #[inline] + unsafe fn as_mut(&self) -> &mut Self { + &mut *(self as *const _ as *mut _) + } + + pub fn cut(&mut self, len: usize) -> Result<Cut<T>, Cut<T>> { + let start = self.start; + let end = start + len; + let available = self.data.len() - start; + if available >= len { + self.start = end; + Ok(Cut(start..end, self)) + } else { + Err(Cut(start..self.data.len(), self)) + } + } + + pub fn cut1(&mut self) -> Option<Cut1<T>> { + let start = self.start; + let end = start + 1; + let available = self.data.len() - start; + if available >= 1 { + self.start = end; + Some(Cut1(start, self)) + } else { + None + } + } + + #[inline] + pub fn push(&mut self, value: T) { + self.data.push(value); + } + + #[inline] + pub fn len(&self) -> usize { + self.data.len() - self.start + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn reserve(&mut self, additional: usize) { + if let Some(additional) = additional.checked_sub(self.len()) { + self.data.reserve(additional); + } + } +} + +trait IntoSlab { + fn type_id(&self) -> TypeId; + + fn slab_ptr(&self) -> *const c_void { + self as *const _ as *const c_void + } +} + +impl<U: 'static> IntoSlab for Slab<U> { + fn type_id(&self) -> TypeId { + TypeId::of::<U>() + } +} + +pub struct Hunk { + slabs: HashMap<TypeId, Box<dyn IntoSlab>>, +} + +impl Hunk { + fn get_slab<T: 'static>(&mut self) -> &mut Slab<T> { + let type_id = TypeId::of::<T>(); + let slab = self + .slabs + .entry(type_id) + .or_insert_with(|| Box::new(Slab::<T>::default())); + unsafe { &mut *(slab.slab_ptr() as *mut Slab<T>) } + } +} From c49b3c28174ed2b0511b26492121f60d4c75fefa Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Tue, 15 Nov 2022 19:39:48 +0100 Subject: [PATCH 11/21] Use bump allocation --- Cargo.lock | 7 ++++ compiler/src/parser2.rs | 6 +++ compiler/src/validators.rs | 11 ++--- core/Cargo.toml | 1 + core/src/slab.rs | 84 +++++++++++++++++++++++--------------- 5 files changed, 70 insertions(+), 39 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 48545fb7..81027cf2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -73,6 +73,12 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" +[[package]] +name = "bumpalo" +version = "3.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba" + [[package]] name = "bytecount" version = "0.6.3" @@ -961,6 +967,7 @@ dependencies = [ name = "redscript" version = "0.5.9" dependencies = [ + "bumpalo", "byteorder", "crc32fast", "derive-where", diff --git a/compiler/src/parser2.rs b/compiler/src/parser2.rs index 8b137891..e60f62a8 100644 --- a/compiler/src/parser2.rs +++ b/compiler/src/parser2.rs @@ -1 +1,7 @@ +use crate::lexer::*; +use crate::validators::Span; +/// A dot separated sequence of identifiers. +fn trailer<'a>(i: Span<'a>) -> IResult<(&'a [Span<'a>], Span<'a>)> { + todo!() +} diff --git a/compiler/src/validators.rs b/compiler/src/validators.rs index 4181465c..47f585b1 100644 --- a/compiler/src/validators.rs +++ b/compiler/src/validators.rs @@ -2,6 +2,7 @@ use std::cell::RefCell; use std::fmt::Display; use std::ops::Range; +use redscript::slab::Hunk; use redscript::Str; use strum::{Display, IntoStaticStr}; @@ -66,7 +67,7 @@ impl Severity { /// Carried around in the `LocatedSpan::extra` field in /// between `nom` parsers. #[derive(Clone, Debug)] -pub struct State<'a>(pub &'a RefCell<Vec<Diagnostic>>, pub Str); +pub struct State<'a>(pub &'a RefCell<Vec<Diagnostic>>, pub &'a RefCell<Hunk<'a>>, pub Str); impl<'a> State<'a> { /// Something not allowed by the rules of the language or other authority. @@ -93,12 +94,12 @@ impl<'a> ReportOrigin for Span<'a> { sc, el: line, ec, - file: self.extra.1.clone(), + file: self.extra.2.clone(), text: bytes.to_string(), severity: diag.0, code: diag.1, msg, - }) + }); } } @@ -118,12 +119,12 @@ impl<'a> ReportOrigin for Range<&Span<'a>> { sc: self.start.get_column(), el, ec: self.end.get_column(), - file: self.start.extra.1.clone(), + file: self.start.extra.2.clone(), text: bytes.to_string(), severity: diag.0, code: diag.1, msg, - }) + }); } } diff --git a/core/Cargo.toml b/core/Cargo.toml index a377d38e..bb76af98 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -17,6 +17,7 @@ smallvec.workspace = true hashbrown.workspace = true modular-bitfield = "0.11" crc32fast = "1" +bumpalo = { version = "3.11", features = ["collections"] } [features] arc = [] diff --git a/core/src/slab.rs b/core/src/slab.rs index 8cb664c3..6f889160 100644 --- a/core/src/slab.rs +++ b/core/src/slab.rs @@ -2,6 +2,8 @@ use std::any::TypeId; use std::ffi::c_void; use std::ops::Range; +use bumpalo::collections::Vec; +use bumpalo::Bump; use hashbrown::HashMap; pub trait Shatter: Sized { @@ -35,7 +37,7 @@ impl Shatter for Range<usize> { } } #[derive(Debug)] -pub struct Cut<'a, T>(Range<usize>, &'a Slab<T>); +pub struct Cut<'a, T>(Range<usize>, &'a Slab<'a, T>); impl<'a, T> Cut<'a, T> { pub fn len(&self) -> usize { @@ -97,7 +99,7 @@ impl<'a, T> Shatter for Cut<'a, T> { } #[derive(Debug)] -pub struct Cut1<'a, T>(usize, &'a Slab<T>); +pub struct Cut1<'a, T>(usize, &'a Slab<'a, T>); impl<'a, T> Cut1<'a, T> { pub fn as_t(&self) -> &'a T { @@ -124,22 +126,14 @@ impl<'a, T> From<Cut1<'a, T>> for Cut<'a, T> { } #[derive(Debug)] -pub struct Slab<T> { - data: Vec<T>, +pub struct Slab<'a, T> { + tid: TypeId, + data: Vec<'a, T>, start: usize, } -impl<T> Default for Slab<T> { - fn default() -> Self { - Self { - data: Default::default(), - start: Default::default(), - } - } -} - #[allow(clippy::mut_from_ref, clippy::cast_ref_to_mut)] -impl<T> Slab<T> { +impl<'a, T> Slab<'a, T> { #[inline] unsafe fn slice_unchecked(&self, range: Range<usize>) -> &[T] { std::slice::from_raw_parts(self.data.as_ptr().add(range.start), range.end - range.start) @@ -192,6 +186,12 @@ impl<T> Slab<T> { self.data.push(value); } + pub fn reserve(&mut self, additional: usize) { + if let Some(additional) = additional.checked_sub(self.len()) { + self.data.reserve(additional); + } + } + #[inline] pub fn len(&self) -> usize { self.data.len() - self.start @@ -200,39 +200,55 @@ impl<T> Slab<T> { pub fn is_empty(&self) -> bool { self.len() == 0 } - - pub fn reserve(&mut self, additional: usize) { - if let Some(additional) = additional.checked_sub(self.len()) { - self.data.reserve(additional); - } - } } -trait IntoSlab { - fn type_id(&self) -> TypeId; +trait IntoSlab<'a> { + fn type_id(&'a self) -> &'a TypeId; - fn slab_ptr(&self) -> *const c_void { + fn slab_ptr(&'a self) -> *const c_void { self as *const _ as *const c_void } + + fn len(&'a self) -> usize; + + fn is_empty(&'a self) -> bool { + self.len() == 0 + } } -impl<U: 'static> IntoSlab for Slab<U> { - fn type_id(&self) -> TypeId { - TypeId::of::<U>() +impl<'a, U: 'a> IntoSlab<'a> for Slab<'a, U> { + fn type_id(&'a self) -> &'a TypeId { + &self.tid + } + + #[inline] + fn len(&'a self) -> usize { + self.data.len() - self.start } } -pub struct Hunk { - slabs: HashMap<TypeId, Box<dyn IntoSlab>>, +#[derive(Default)] +pub struct Hunk<'a> { + alloc: Bump, + slabs: HashMap<TypeId, Box<dyn IntoSlab<'a> + 'a>>, +} + +impl<'a> std::fmt::Debug for Hunk<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Hunk").field("slabs", &self.slabs.len()).finish() + } } -impl Hunk { - fn get_slab<T: 'static>(&mut self) -> &mut Slab<T> { +impl<'a> Hunk<'a> { + fn get_slab<T: 'static>(&'a mut self) -> &mut Slab<T> { let type_id = TypeId::of::<T>(); - let slab = self - .slabs - .entry(type_id) - .or_insert_with(|| Box::new(Slab::<T>::default())); + let slab = self.slabs.entry(type_id).or_insert_with(|| { + Box::new(Slab { + tid: type_id, + data: Vec::<'a, T>::new_in(&self.alloc), + start: 0, + }) + }); unsafe { &mut *(slab.slab_ptr() as *mut Slab<T>) } } } From 71a7580ef14685f23261c7965ff10af8eb7b4887 Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Tue, 15 Nov 2022 21:30:24 +0100 Subject: [PATCH 12/21] Use bumpalo instead --- Cargo.lock | 1 + compiler/Cargo.toml | 1 + compiler/src/lib.rs | 1 + compiler/src/parser2.rs | 16 ++- compiler/src/validators.rs | 9 +- core/src/lib.rs | 1 - core/src/slab.rs | 254 ------------------------------------- 7 files changed, 24 insertions(+), 259 deletions(-) delete mode 100644 core/src/slab.rs diff --git a/Cargo.lock b/Cargo.lock index 81027cf2..61975c68 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1000,6 +1000,7 @@ name = "redscript-compiler" version = "0.5.9" dependencies = [ "ahash", + "bumpalo", "enum-as-inner", "flexstr", "hashbrown 0.13.1", diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index 1a7788f5..5c5b7d7f 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -28,3 +28,4 @@ peg = "0.8" nom = "7.1" nom_locate = "4.0" paste = "1" +bumpalo = "3.11" diff --git a/compiler/src/lib.rs b/compiler/src/lib.rs index e53bb119..e55ba3f3 100644 --- a/compiler/src/lib.rs +++ b/compiler/src/lib.rs @@ -1,4 +1,5 @@ #![feature(macro_metavar_expr)] +#![feature(let_chains)] #![macro_use] extern crate paste; use simple_interner::Interner; diff --git a/compiler/src/parser2.rs b/compiler/src/parser2.rs index e60f62a8..ff440a57 100644 --- a/compiler/src/parser2.rs +++ b/compiler/src/parser2.rs @@ -1,7 +1,19 @@ +use bumpalo::collections::Vec; +use nom::{Offset, Slice}; + use crate::lexer::*; use crate::validators::Span; /// A dot separated sequence of identifiers. -fn trailer<'a>(i: Span<'a>) -> IResult<(&'a [Span<'a>], Span<'a>)> { - todo!() +fn trailer<'a>(is: Span<'a>) -> IResult<(Vec<'a, Span<'a>>, Span<'a>)> { + let mut vec = is.extra.create_vec(); + let (i, ident) = identifier(is.clone())?; + let mut i = i; + vec.push(ident.clone()); + while let Ok((ip, _)) = control(i.clone()) && let Ok((ip, ident)) = identifier(ip) { + i = ip; + vec.push(ident); + } + let len = is.offset(&i); + Ok((i, (vec, is.slice(..len)))) } diff --git a/compiler/src/validators.rs b/compiler/src/validators.rs index 47f585b1..78507f80 100644 --- a/compiler/src/validators.rs +++ b/compiler/src/validators.rs @@ -2,7 +2,8 @@ use std::cell::RefCell; use std::fmt::Display; use std::ops::Range; -use redscript::slab::Hunk; +use bumpalo::collections::Vec; +use bumpalo::Bump; use redscript::Str; use strum::{Display, IntoStaticStr}; @@ -67,7 +68,7 @@ impl Severity { /// Carried around in the `LocatedSpan::extra` field in /// between `nom` parsers. #[derive(Clone, Debug)] -pub struct State<'a>(pub &'a RefCell<Vec<Diagnostic>>, pub &'a RefCell<Hunk<'a>>, pub Str); +pub struct State<'a>(pub &'a RefCell<Vec<'a, Diagnostic>>, pub &'a Bump, pub Str); impl<'a> State<'a> { /// Something not allowed by the rules of the language or other authority. @@ -75,6 +76,10 @@ impl<'a> State<'a> { fn report_diagnostic(&self, error: Diagnostic) { self.0.borrow_mut().push(error); } + + pub fn create_vec<T: 'a>(&self) -> Vec<'a, T> { + Vec::new_in(&self.1) + } } pub trait ReportOrigin { diff --git a/core/src/lib.rs b/core/src/lib.rs index f5e0d84f..ab6ecfd0 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -5,7 +5,6 @@ pub mod decode; pub mod definition; pub mod encode; pub mod io; -pub mod slab; #[cfg(not(feature = "arc"))] pub type Str = flexstr::LocalStr; diff --git a/core/src/slab.rs b/core/src/slab.rs deleted file mode 100644 index 6f889160..00000000 --- a/core/src/slab.rs +++ /dev/null @@ -1,254 +0,0 @@ -use std::any::TypeId; -use std::ffi::c_void; -use std::ops::Range; - -use bumpalo::collections::Vec; -use bumpalo::Bump; -use hashbrown::HashMap; - -pub trait Shatter: Sized { - // splits a cut into two adjacent cuts - fn split_at(self, index: usize) -> (Self, Self); - - // splits a cut into three adjacent cuts, the middle one being of the given range - fn split_range(self, range: Range<usize>) -> (Self, Self, Self) { - let (left, right) = self.split_at(range.start); - let (middle, right) = right.split_at(range.end - range.start); - (left, middle, right) - } - - // fuzes two adjacent cuts into one - fn fuze(self, other: Self) -> Option<Self>; -} - -impl Shatter for Range<usize> { - fn split_at(self, index: usize) -> (Self, Self) { - let start = self.start; - let end = self.end; - (start..index, index..end) - } - - fn fuze(self, other: Self) -> Option<Self> { - if self.end == other.start { - Some(self.start..other.end) - } else { - None - } - } -} -#[derive(Debug)] -pub struct Cut<'a, T>(Range<usize>, &'a Slab<'a, T>); - -impl<'a, T> Cut<'a, T> { - pub fn len(&self) -> usize { - self.0.end - self.0.start - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn as_slice(&self) -> &'a [T] { - unsafe { self.1.slice_unchecked(self.0.clone()) } - } - - pub fn as_slice_mut(&mut self) -> &'a mut [T] { - unsafe { self.1.slice_unchecked_mut(self.0.clone()) } - } - - pub fn slab(&self) -> &'a Slab<T> { - self.1 - } - - pub fn slab_mut(&mut self) -> &'a mut Slab<T> { - unsafe { self.1.as_mut() } - } -} - -impl<'a, T> AsRef<Range<usize>> for Cut<'a, T> { - fn as_ref(&self) -> &Range<usize> { - &self.0 - } -} - -impl<'a, T> From<Cut<'a, T>> for Range<usize> { - fn from(cut: Cut<'a, T>) -> Self { - cut.0 - } -} - -impl<'a, T> From<Cut<'a, T>> for &'a [T] { - fn from(value: Cut<'a, T>) -> Self { - value.as_slice() - } -} - -impl<'a, T> Shatter for Cut<'a, T> { - fn split_at(self, at: usize) -> (Self, Self) { - let (left, right) = self.0.split_at(at); - (Cut(left, self.1), Cut(right, self.1)) - } - - fn fuze(self, other: Self) -> Option<Self> { - if self.0.end == other.0.start { - Some(Cut(self.0.start..other.0.end, self.1)) - } else { - None - } - } -} - -#[derive(Debug)] -pub struct Cut1<'a, T>(usize, &'a Slab<'a, T>); - -impl<'a, T> Cut1<'a, T> { - pub fn as_t(&self) -> &'a T { - unsafe { self.1.get_unchecked(self.0) } - } - - pub fn as_t_mut(&mut self) -> &'a mut T { - unsafe { self.1.get_unchecked_mut(self.0) } - } - - pub fn slab(&self) -> &'a Slab<T> { - self.1 - } - - pub fn slab_mut(&mut self) -> &'a mut Slab<T> { - unsafe { self.1.as_mut() } - } -} - -impl<'a, T> From<Cut1<'a, T>> for Cut<'a, T> { - fn from(value: Cut1<'a, T>) -> Self { - Cut(value.0..value.0 + 1, value.1) - } -} - -#[derive(Debug)] -pub struct Slab<'a, T> { - tid: TypeId, - data: Vec<'a, T>, - start: usize, -} - -#[allow(clippy::mut_from_ref, clippy::cast_ref_to_mut)] -impl<'a, T> Slab<'a, T> { - #[inline] - unsafe fn slice_unchecked(&self, range: Range<usize>) -> &[T] { - std::slice::from_raw_parts(self.data.as_ptr().add(range.start), range.end - range.start) - } - #[inline] - unsafe fn slice_unchecked_mut(&self, range: Range<usize>) -> &mut [T] { - std::slice::from_raw_parts_mut(self.data.as_ptr().cast_mut().add(range.start), range.end - range.start) - } - - #[inline] - unsafe fn get_unchecked(&self, index: usize) -> &T { - self.data.get_unchecked(index) - } - #[inline] - unsafe fn get_unchecked_mut(&self, index: usize) -> &mut T { - self.data.as_ptr().cast_mut().add(index).as_mut().unwrap() - } - - #[inline] - unsafe fn as_mut(&self) -> &mut Self { - &mut *(self as *const _ as *mut _) - } - - pub fn cut(&mut self, len: usize) -> Result<Cut<T>, Cut<T>> { - let start = self.start; - let end = start + len; - let available = self.data.len() - start; - if available >= len { - self.start = end; - Ok(Cut(start..end, self)) - } else { - Err(Cut(start..self.data.len(), self)) - } - } - - pub fn cut1(&mut self) -> Option<Cut1<T>> { - let start = self.start; - let end = start + 1; - let available = self.data.len() - start; - if available >= 1 { - self.start = end; - Some(Cut1(start, self)) - } else { - None - } - } - - #[inline] - pub fn push(&mut self, value: T) { - self.data.push(value); - } - - pub fn reserve(&mut self, additional: usize) { - if let Some(additional) = additional.checked_sub(self.len()) { - self.data.reserve(additional); - } - } - - #[inline] - pub fn len(&self) -> usize { - self.data.len() - self.start - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } -} - -trait IntoSlab<'a> { - fn type_id(&'a self) -> &'a TypeId; - - fn slab_ptr(&'a self) -> *const c_void { - self as *const _ as *const c_void - } - - fn len(&'a self) -> usize; - - fn is_empty(&'a self) -> bool { - self.len() == 0 - } -} - -impl<'a, U: 'a> IntoSlab<'a> for Slab<'a, U> { - fn type_id(&'a self) -> &'a TypeId { - &self.tid - } - - #[inline] - fn len(&'a self) -> usize { - self.data.len() - self.start - } -} - -#[derive(Default)] -pub struct Hunk<'a> { - alloc: Bump, - slabs: HashMap<TypeId, Box<dyn IntoSlab<'a> + 'a>>, -} - -impl<'a> std::fmt::Debug for Hunk<'a> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Hunk").field("slabs", &self.slabs.len()).finish() - } -} - -impl<'a> Hunk<'a> { - fn get_slab<T: 'static>(&'a mut self) -> &mut Slab<T> { - let type_id = TypeId::of::<T>(); - let slab = self.slabs.entry(type_id).or_insert_with(|| { - Box::new(Slab { - tid: type_id, - data: Vec::<'a, T>::new_in(&self.alloc), - start: 0, - }) - }); - unsafe { &mut *(slab.slab_ptr() as *mut Slab<T>) } - } -} From fb5c45fec2f86542a7ce306dbf8573fa43b7ba85 Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Tue, 15 Nov 2022 22:48:18 +0100 Subject: [PATCH 13/21] Remove bumpalo --- Cargo.lock | 1 - compiler/src/parser2.rs | 5 ++--- compiler/src/validators.rs | 12 +++--------- core/Cargo.toml | 1 - 4 files changed, 5 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 61975c68..7a6a795a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -967,7 +967,6 @@ dependencies = [ name = "redscript" version = "0.5.9" dependencies = [ - "bumpalo", "byteorder", "crc32fast", "derive-where", diff --git a/compiler/src/parser2.rs b/compiler/src/parser2.rs index ff440a57..531f8723 100644 --- a/compiler/src/parser2.rs +++ b/compiler/src/parser2.rs @@ -1,12 +1,11 @@ -use bumpalo::collections::Vec; use nom::{Offset, Slice}; use crate::lexer::*; use crate::validators::Span; /// A dot separated sequence of identifiers. -fn trailer<'a>(is: Span<'a>) -> IResult<(Vec<'a, Span<'a>>, Span<'a>)> { - let mut vec = is.extra.create_vec(); +fn trailer<'a>(is: Span<'a>) -> IResult<(Vec<Span<'a>>, Span<'a>)> { + let mut vec = vec![]; let (i, ident) = identifier(is.clone())?; let mut i = i; vec.push(ident.clone()); diff --git a/compiler/src/validators.rs b/compiler/src/validators.rs index 78507f80..f5c3ed8a 100644 --- a/compiler/src/validators.rs +++ b/compiler/src/validators.rs @@ -2,8 +2,6 @@ use std::cell::RefCell; use std::fmt::Display; use std::ops::Range; -use bumpalo::collections::Vec; -use bumpalo::Bump; use redscript::Str; use strum::{Display, IntoStaticStr}; @@ -68,7 +66,7 @@ impl Severity { /// Carried around in the `LocatedSpan::extra` field in /// between `nom` parsers. #[derive(Clone, Debug)] -pub struct State<'a>(pub &'a RefCell<Vec<'a, Diagnostic>>, pub &'a Bump, pub Str); +pub struct State<'a>(pub &'a RefCell<Vec<Diagnostic>>, pub Str); impl<'a> State<'a> { /// Something not allowed by the rules of the language or other authority. @@ -76,10 +74,6 @@ impl<'a> State<'a> { fn report_diagnostic(&self, error: Diagnostic) { self.0.borrow_mut().push(error); } - - pub fn create_vec<T: 'a>(&self) -> Vec<'a, T> { - Vec::new_in(&self.1) - } } pub trait ReportOrigin { @@ -99,7 +93,7 @@ impl<'a> ReportOrigin for Span<'a> { sc, el: line, ec, - file: self.extra.2.clone(), + file: self.extra.1.clone(), text: bytes.to_string(), severity: diag.0, code: diag.1, @@ -124,7 +118,7 @@ impl<'a> ReportOrigin for Range<&Span<'a>> { sc: self.start.get_column(), el, ec: self.end.get_column(), - file: self.start.extra.2.clone(), + file: self.start.extra.1.clone(), text: bytes.to_string(), severity: diag.0, code: diag.1, diff --git a/core/Cargo.toml b/core/Cargo.toml index bb76af98..a377d38e 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -17,7 +17,6 @@ smallvec.workspace = true hashbrown.workspace = true modular-bitfield = "0.11" crc32fast = "1" -bumpalo = { version = "3.11", features = ["collections"] } [features] arc = [] From b56c4f4a1df68ec3a86fbee456bd127ec0279d91 Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Tue, 15 Nov 2022 23:24:41 +0100 Subject: [PATCH 14/21] Copy stuff instead of lifetime ownership --- compiler/src/lexer.rs | 8 ++++++++ compiler/src/parser2.rs | 11 +++++++---- core/src/ast.rs | 16 ++++++++-------- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs index bc81f5af..3171d144 100644 --- a/compiler/src/lexer.rs +++ b/compiler/src/lexer.rs @@ -332,6 +332,14 @@ pub fn null(i: Span) -> IResult<Span> { tag("null")(i) } +pub fn this(i: Span) -> IResult<Span> { + tag("this")(i) +} + +pub fn super_(i: Span) -> IResult<Span> { + tag("super")(i) +} + pub fn boolean(i: Span) -> IResult<(Span, bool)> { alt((map(tag("true"), |s| (s, true)), map(tag("false"), |s| (s, false))))(i) } diff --git a/compiler/src/parser2.rs b/compiler/src/parser2.rs index 531f8723..e04d40ae 100644 --- a/compiler/src/parser2.rs +++ b/compiler/src/parser2.rs @@ -1,18 +1,21 @@ use nom::{Offset, Slice}; +use redscript::ast::Ident; use crate::lexer::*; use crate::validators::Span; +type Range = redscript::ast::Span; + /// A dot separated sequence of identifiers. -fn trailer<'a>(is: Span<'a>) -> IResult<(Vec<Span<'a>>, Span<'a>)> { +fn trailer(is: Span) -> IResult<(Vec<Ident>, Range)> { let mut vec = vec![]; let (i, ident) = identifier(is.clone())?; let mut i = i; - vec.push(ident.clone()); + vec.push(Ident::from_ref(ident.fragment())); while let Ok((ip, _)) = control(i.clone()) && let Ok((ip, ident)) = identifier(ip) { i = ip; - vec.push(ident); + vec.push(Ident::from_ref(ident.fragment())); } let len = is.offset(&i); - Ok((i, (vec, is.slice(..len)))) + Ok((i, (vec, Range::with_len(is.location_offset(), len)))) } diff --git a/core/src/ast.rs b/core/src/ast.rs index 19a6ea4d..9b5a779b 100644 --- a/core/src/ast.rs +++ b/core/src/ast.rs @@ -23,14 +23,7 @@ where Declare(N::Local, Option<Box<N::Type>>, Option<Box<Self>>, Span), DynCast(N::Class, Box<Self>, Span), Assign(Box<Self>, Box<Self>, N::Inferred, Span), - Call( - Box<Self>, - N::Callable, - Box<[N::Type]>, - Box<[Self]>, - N::CallMeta, - Span, - ), + Call(Box<Self>, N::Callable, Box<[N::Type]>, Box<[Self]>, N::CallMeta, Span), Lambda(N::Closure, Box<Self>, Span), Member(Box<Self>, N::Member, Span), ArrayElem(Box<Self>, Box<Self>, N::Inferred, Span), @@ -354,6 +347,13 @@ impl Span { Self { low, high } } + pub fn with_len(low: usize, len: usize) -> Self { + Self { + low: Pos(u32::try_from(low).unwrap_or_default()), + high: Pos(u32::try_from(low + len).unwrap_or_default()), + } + } + pub fn merge(&self, other: Span) -> Span { Span::new(self.low.min(other.low), self.high.max(other.high)) } From 3d606ae285166c57b3e4114747393829d161205e Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Tue, 15 Nov 2022 23:26:18 +0100 Subject: [PATCH 15/21] Use native sized integer for Pos --- core/src/ast.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/src/ast.rs b/core/src/ast.rs index 9b5a779b..adb2ba15 100644 --- a/core/src/ast.rs +++ b/core/src/ast.rs @@ -292,14 +292,14 @@ pub enum Literal { } #[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct Pos(pub u32); +pub struct Pos(pub usize); impl Pos { pub const ZERO: Pos = Pos(0); #[inline] pub fn new(n: usize) -> Self { - Pos(n as u32) + Pos(n) } } @@ -314,7 +314,7 @@ impl Add<usize> for Pos { #[inline] fn add(self, rhs: usize) -> Pos { - Pos(self.0 + rhs as u32) + Pos(self.0 + rhs) } } @@ -323,7 +323,7 @@ impl Sub<usize> for Pos { #[inline] fn sub(self, rhs: usize) -> Pos { - Pos(self.0 - rhs as u32) + Pos(self.0 - rhs) } } @@ -349,8 +349,8 @@ impl Span { pub fn with_len(low: usize, len: usize) -> Self { Self { - low: Pos(u32::try_from(low).unwrap_or_default()), - high: Pos(u32::try_from(low + len).unwrap_or_default()), + low: Pos(low), + high: Pos(low + len), } } From 26471e30876ae5775fefa96bf5613f1fbbcce2cb Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Wed, 16 Nov 2022 00:05:18 +0100 Subject: [PATCH 16/21] Diagnostic messages are fragments not sentences --- compiler/src/lexer.rs | 2 +- compiler/src/validators.rs | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs index 3171d144..ade138a8 100644 --- a/compiler/src/lexer.rs +++ b/compiler/src/lexer.rs @@ -13,7 +13,7 @@ use strum::{Display, IntoStaticStr}; use crate::comb::many_till_balanced1; use crate::validators::*; -use crate::{diag_report, *}; +use crate::*; pub trait ParseErr<'a>: ParseError<Span<'a>> {} pub type IResult<'a, O> = nom::IResult<Span<'a>, O>; diff --git a/compiler/src/validators.rs b/compiler/src/validators.rs index f5c3ed8a..7104ebae 100644 --- a/compiler/src/validators.rs +++ b/compiler/src/validators.rs @@ -175,8 +175,14 @@ macro_rules! diag { }; } -diag!(ERR_INVALID_UTF8, Error, "ELS0001", "Invalid UTF-8 sequence `{}`"); -diag!(ERR_EXPECT_HEX_DIGIT, Error, "ELS0002", "Invalid hex digit `{}`"); -diag!(ERR_INVALID_ESCAPE, Error, "ELS0003", "Invalid escape sequence `{}`"); -diag!(ERR_PARSE_INT, Error, "ELN0001", "Invalid integer `{}`, {}"); -diag!(ERR_PARSE_FLOAT, Error, "ELN0002", "Invalid float `{}`, {}"); +diag!(ERR_INVALID_UTF8, Error, "ELS0001", "invalid UTF-8 sequence `{}`"); +diag!(ERR_EXPECT_HEX_DIGIT, Error, "ELS0002", "invalid hex digit `{}`"); +diag!(ERR_INVALID_ESCAPE, Error, "ELS0003", "invalid escape sequence `{}`"); +diag!(ERR_PARSE_INT, Error, "ELN0001", "invalid integer `{}`, {}"); +diag!(ERR_PARSE_FLOAT, Error, "ELN0002", "invalid float `{}`, {}"); +diag!( + ERR_INTERPOL_NOT_CONST, + Error, + "ESS0005", + "a interpolated string cannot be a constant value" +); From c0879a3aacd1ed8cdbeca1fb30f1be1ba3b5195f Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Wed, 16 Nov 2022 16:23:44 +0100 Subject: [PATCH 17/21] Implement Constant parser --- Cargo.lock | 77 ++++++++++++++++++ compiler/Cargo.toml | 1 + compiler/src/lexer.rs | 159 +++++++++++++++++++++++++++++-------- compiler/src/parser2.rs | 77 +++++++++++++++--- compiler/src/validators.rs | 33 ++++++-- core/src/ast.rs | 21 ++++- 6 files changed, 318 insertions(+), 50 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7a6a795a..94d21865 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -800,6 +800,82 @@ dependencies = [ "nom", ] +[[package]] +name = "num" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + [[package]] name = "num_threads" version = "0.1.6" @@ -1008,6 +1084,7 @@ dependencies = [ "log", "nom", "nom_locate", + "num", "once_cell", "paste", "peg", diff --git a/compiler/Cargo.toml b/compiler/Cargo.toml index 5c5b7d7f..79c02877 100644 --- a/compiler/Cargo.toml +++ b/compiler/Cargo.toml @@ -29,3 +29,4 @@ nom = "7.1" nom_locate = "4.0" paste = "1" bumpalo = "3.11" +num = "0.4" diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs index ade138a8..294f4076 100644 --- a/compiler/src/lexer.rs +++ b/compiler/src/lexer.rs @@ -1,3 +1,5 @@ +use std::fmt::Display; + use nom::branch::alt; use nom::bytes::complete::{tag, take_while, take_while_m_n}; use nom::character::complete::{ @@ -6,8 +8,9 @@ use nom::character::complete::{ use nom::combinator::{consumed, map, not, opt, recognize}; use nom::error::ParseError; use nom::multi::many0; -use nom::sequence::{delimited, pair, preceded, separated_pair, tuple}; +use nom::sequence::{delimited, pair, preceded, separated_pair}; use nom::AsChar; +use redscript::ast::{Constant, Literal}; use redscript::Str; use strum::{Display, IntoStaticStr}; @@ -17,7 +20,7 @@ use crate::*; pub trait ParseErr<'a>: ParseError<Span<'a>> {} pub type IResult<'a, O> = nom::IResult<Span<'a>, O>; -pub type NomError<'a> = nom::Err<Span<'a>>; +pub type NomError<'a> = nom::Err<nom::error::Error<Span<'a>>>; #[derive(Debug, Clone, Copy, PartialEq, Display)] pub enum Trivia { @@ -28,8 +31,25 @@ pub enum Trivia { #[derive(Debug, Clone, Copy, PartialEq, Display, IntoStaticStr)] pub enum Num { - Float(f64), - Int(u64), + F32(f32), + F64(f64), + I32(i32), + I64(i64), + U32(u32), + U64(u64), +} + +impl From<Num> for Constant { + fn from(value: Num) -> Self { + match value { + Num::F32(f) => Constant::F32(f), + Num::F64(f) => Constant::F64(f), + Num::I32(f) => Constant::I32(f), + Num::I64(f) => Constant::I64(f), + Num::U32(f) => Constant::U32(f), + Num::U64(f) => Constant::U64(f), + } + } } #[derive(Debug, Clone, Copy, PartialEq, Display, IntoStaticStr)] @@ -127,37 +147,96 @@ fn sciexp_literal(i: Span) -> IResult<Span> { ))(i) } -fn parse_float(i: &Span) -> f64 { - match i.fragment().parse() { +fn parse_num<T: num::Num + Default>(i: &Span, radix: u32) -> T +where + T::FromStrRadixErr: std::fmt::Display, +{ + match T::from_str_radix(i.fragment(), radix) { Ok(value) => value, Err(error) => { - diag_report!(i, ERR_PARSE_FLOAT, i.fragment(), error); - 0.0 + diag_report!(i, ERR_NUM_PARSE, i.fragment(), error); + T::default() } } } -fn parse_int(i: &Span, radix: u32) -> u64 { - match u64::from_str_radix(i.fragment(), radix) { - Ok(value) => value, - Err(error) => { - diag_report!(i, ERR_PARSE_INT, i.fragment(), error); - 0 +fn int_width(suffix: Option<Span>) -> u32 { + match suffix { + Some(suffix) => match alt(( + map(tag("i32"), |_: Span| 32), + map(tag("i64"), |_| 64), + map(tag("u32"), |_| 32), + map(tag("u64"), |_| 64), + map(tag("l"), |_| 64), + ))(suffix.clone()) + { + Ok((_, width)) => width, + Err(err) => { + let _: NomError = err; // needed for type inference + diag_report!(suffix, ERR_NUM_SUFFIX, suffix.fragment()); + 32 + } + }, + None => 32, + } +} + +fn integer(is: Span) -> IResult<Num> { + match pair( + alt(( + map(preceded(tag("0x"), hex_digit0), |s| (16, s)), + map(preceded(tag("0o"), oct_digit0), |s| (8, s)), + map( + preceded(tag("0b"), take_while_m_n(1, 64, |c: char| c == '0' || c == '1')), + |s| (2, s), + ), + map(digit0, |s| (10, s)), + )), + opt(identifier), + )(is) + { + Ok((rem, ((radix, value), suffix))) => { + let num = match int_width(suffix) { + 64 => Num::I64(parse_num(&value, radix)), + _ => Num::I32(parse_num(&value, radix)), + }; + Ok((rem, num)) } + Err(err) => Err(err), } } -pub fn number(i: Span) -> IResult<(Num, Span)> { - alt(( - map(preceded(tag("0x"), hex_digit0), |s| (Num::Int(parse_int(&s, 16)), s)), - map(preceded(tag("0o"), oct_digit0), |s| (Num::Int(parse_int(&s, 8)), s)), - map(preceded(tag("0b"), take_while(|c: char| c == '0' || c == '1')), |s| { - (Num::Int(parse_int(&s, 2)), s) - }), - map(sciexp_literal, |s| (Num::Float(parse_float(&s)), s)), - map(float_literal, |s| (Num::Float(parse_float(&s)), s)), - map(digit1, |s| (Num::Int(parse_int(&s, 10)), s)), - ))(i) +fn float_width(suffix: Option<Span>) -> u32 { + match suffix { + Some(suffix) => { + match alt((map(tag("f32"), |_| 32), map(tag("f64"), |_| 64), map(tag("d"), |_| 64)))(suffix.clone()) { + Ok((_, width)) => width, + Err(err) => { + let err: NomError = err; // needed for type inference + diag_report!(suffix, ERR_NUM_SUFFIX, suffix.fragment()); + 64 + } + } + } + None => 64, + } +} + +fn float(is: Span) -> IResult<Num> { + match pair(alt((sciexp_literal, float_literal)), opt(identifier))(is) { + Ok((rem, (value, suffix))) => { + let num = match float_width(suffix) { + 64 => Num::F64(parse_num(&value, 10)), + _ => Num::F32(parse_num(&value, 10)), + }; + Ok((rem, num)) + } + Err(err) => Err(err), + } +} + +pub fn number(is: Span) -> IResult<(Span, Num)> { + consumed(alt((float, integer)))(is) } // ----------------------------------------------------------------------------- @@ -176,18 +255,18 @@ fn str_char_uni(is: Span) -> IResult<Option<char>> { if let Some(c) = char::from_u32(hex) { Ok((i, Some(c))) } else { - diag_report!((&is..&i), ERR_INVALID_UTF8, hex); + diag_report!((&is..&i), ERR_CHAR_UTF8, hex); Ok((i, None)) } } else { - diag_report!((&is..&i), ERR_INVALID_UTF8, digits.fragment()); + diag_report!((&is..&i), ERR_CHAR_UTF8, digits.fragment()); Ok((i, None)) } } fn str_char_invalid(is: Span) -> IResult<Option<char>> { let (i, c) = preceded(char('\\'), anychar)(is.clone())?; - diag_report!((&is..&i), ERR_INVALID_ESCAPE, c); + diag_report!((&is..&i), ERR_CHAR_ESCAPE, c); Ok((i, None)) } @@ -217,21 +296,39 @@ fn str_chars(mut i: Span) -> IResult<Str> { Ok((i, Str::from_ref(s))) } +fn string_type(i: &Span, c: Option<char>) -> Literal { + match c { + Some(c) => match c { + 'n' => Literal::Name, + 'r' => Literal::Resource, + 't' => Literal::TweakDbId, + 's' => Literal::String, + _ => { + diag_report!(i, ERR_LITERAL_TYPE_INVALID, c); + Literal::String + } + }, + None => Literal::String, + } +} + // a parser accepting a function and returning the result of the function, by consuming the input -pub fn string(i: Span) -> IResult<(Span, Option<char>, Str)> { +pub fn string(i: Span) -> IResult<(Span, Literal, Str)> { let (i, (o, (p, s))) = consumed(pair( opt(satisfy(AsChar::is_alpha)), delimited(tag("\""), str_chars, tag("\"")), ))(i)?; + let p = string_type(&o, p); Ok((i, (o, p, s))) } // matches a string literal until the first interpolation -pub fn string_inter_start(i: Span) -> IResult<(Span, Option<char>, Str)> { +pub fn string_inter_start(i: Span) -> IResult<(Span, Literal, Str)> { let (i, (o, (p, s))) = consumed(pair( opt(satisfy(AsChar::is_alpha)), delimited(tag("\""), str_chars, tag(r#"\("#)), ))(i)?; + let p = string_type(&o, p); Ok((i, (o, p, s))) } @@ -294,7 +391,7 @@ pub fn control(i: Span) -> IResult<(Span, Ctrl)> { // An identifier is a sequence of letters, numbers, and underscores, starting with a letter or underscore pub fn identifier(i: Span) -> IResult<Span> { - recognize(tuple((alpha1, take_while(|c: char| c.is_alphanumeric() || c == '_'))))(i) + recognize(pair(alpha1, take_while(|c: char| c.is_alphanumeric() || c == '_')))(i) } // ----------------------------------------------------------------------------- diff --git a/compiler/src/parser2.rs b/compiler/src/parser2.rs index e04d40ae..b96589f3 100644 --- a/compiler/src/parser2.rs +++ b/compiler/src/parser2.rs @@ -1,21 +1,76 @@ +use std::str::FromStr; + +use nom::branch::alt; +use nom::combinator::{consumed, map, verify}; +use nom::multi::separated_list1; use nom::{Offset, Slice}; -use redscript::ast::Ident; +use redscript::ast::{Constant, Ident, Literal, SourceAst}; +use redscript::Str; use crate::lexer::*; -use crate::validators::Span; +use crate::validators::*; +use crate::*; type Range = redscript::ast::Span; +type Expr = redscript::ast::Expr<SourceAst>; + +fn to_range(input: &Span, remaining: &Span) -> Range { + let len = input.slice(..remaining.offset(input)).len(); + Range::with_len(input.location_offset(), len) +} + +macro_rules! to_ok { + ($input:tt, $remaining:tt, $($value:tt)*) => { + ($($value)*, to_range(&$input, &$remaining)) + }; +} /// A dot separated sequence of identifiers. fn trailer(is: Span) -> IResult<(Vec<Ident>, Range)> { - let mut vec = vec![]; - let (i, ident) = identifier(is.clone())?; - let mut i = i; - vec.push(Ident::from_ref(ident.fragment())); - while let Ok((ip, _)) = control(i.clone()) && let Ok((ip, ident)) = identifier(ip) { - i = ip; - vec.push(Ident::from_ref(ident.fragment())); + map( + consumed(separated_list1(verify(control, |(_, c)| c == &Ctrl::Dot), ident)), + |(span, trailer)| (trailer, span.to_range().into()), + )(is) +} + +fn ident(is: Span) -> IResult<Ident> { + map(identifier, |ident| ident.to_flex())(is) +} + +fn string_literal(is: Span) -> IResult<(Literal, Str)> { + let (i, (ii, t, s)) = string(is)?; + Ok((i, (t, s))) +} + +pub fn string_interpolation(is: Span) -> IResult<(Str, Vec<(Expr, Str)>, Range)> { + let (mut i, (_, st, ss)) = string_inter_start(is.clone())?; + + let mut parts = vec![]; + while let Ok((ip, ep)) = expr(i.clone()) { + if let Ok((ip, (_, sp))) = string_inter_part(ip.clone()) { + parts.push((ep, sp)); + i = ip; + continue; + } else if let Ok((ip, (_, se))) = string_inter_end(ip) { + parts.push((ep, se)); + i = ip; + break; + } + // fallback must be implemented in the subordinate parsers. + return Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::Tag))); } - let len = is.offset(&i); - Ok((i, (vec, Range::with_len(is.location_offset(), len)))) + let r = to_ok!(is, i, ss, parts); + Ok((i, r)) +} + +fn constant(is: Span) -> IResult<Constant> { + alt(( + map(string_literal, |(t, s)| Constant::String(t, s)), + map(number, |(_, n)| n.into()), + map(boolean, |(_, b)| Constant::Bool(b)), + ))(is) +} + +fn expr(is: Span) -> IResult<Expr> { + todo!() } diff --git a/compiler/src/validators.rs b/compiler/src/validators.rs index 7104ebae..ed7ee6aa 100644 --- a/compiler/src/validators.rs +++ b/compiler/src/validators.rs @@ -7,6 +7,16 @@ use strum::{Display, IntoStaticStr}; pub type Span<'a> = nom_locate::LocatedSpan<&'a str, State<'a>>; +pub trait AsFlexStr { + fn to_flex(&self) -> Str; +} + +impl<'a> AsFlexStr for Span<'a> { + fn to_flex(&self) -> Str { + Str::from_ref(self.fragment()) + } +} + pub trait ToRange { fn to_range(&self) -> Range<usize>; } @@ -134,6 +144,7 @@ pub struct DiagnosticTemplate(Severity, &'static str, &'static str); /// Requires the crate level imported, because format macros are defined there. /// ```ignore /// use crate::*; +/// use crate::validators::ReportOrigin; /// ``` #[macro_export] macro_rules! diag_report { @@ -175,14 +186,26 @@ macro_rules! diag { }; } -diag!(ERR_INVALID_UTF8, Error, "ELS0001", "invalid UTF-8 sequence `{}`"); +diag!(ERR_CHAR_UTF8, Error, "ELS0001", "invalid UTF-8 sequence `{}`"); diag!(ERR_EXPECT_HEX_DIGIT, Error, "ELS0002", "invalid hex digit `{}`"); -diag!(ERR_INVALID_ESCAPE, Error, "ELS0003", "invalid escape sequence `{}`"); -diag!(ERR_PARSE_INT, Error, "ELN0001", "invalid integer `{}`, {}"); -diag!(ERR_PARSE_FLOAT, Error, "ELN0002", "invalid float `{}`, {}"); +diag!(ERR_CHAR_ESCAPE, Error, "ELS0003", "invalid escape sequence `{}`"); +diag!(ERR_NUM_PARSE, Error, "ELN0001", "invalid number `{}`, {}"); +diag!( + ERR_NUM_OVERFLOW, + Error, + "ELN0003", + "the number `{}` exceeds the maximum value of the type {}" +); +diag!(ERR_NUM_SUFFIX, Error, "ELN0004", "invalid suffix `{}` for number"); diag!( ERR_INTERPOL_NOT_CONST, Error, "ESS0005", - "a interpolated string cannot be a constant value" + "a interpolated string cannot be a constant value, `{}` was specified" +); +diag!( + ERR_LITERAL_TYPE_INVALID, + Error, + "ESS0006", + "invalid literal type `{}` expected one of [n]ame, [r]resource, [t]weakDBId or none" ); diff --git a/core/src/ast.rs b/core/src/ast.rs index adb2ba15..eb8aaa3b 100644 --- a/core/src/ast.rs +++ b/core/src/ast.rs @@ -1,6 +1,6 @@ use std::cmp::Ordering; use std::fmt::{self, Debug, Display}; -use std::ops::{Add, Sub}; +use std::ops::{Add, Range, Sub}; use derive_where::derive_where; use enum_as_inner::EnumAsInner; @@ -283,7 +283,7 @@ where } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Display)] pub enum Literal { String, Name, @@ -294,6 +294,12 @@ pub enum Literal { #[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] pub struct Pos(pub usize); +impl From<usize> for Pos { + fn from(value: usize) -> Self { + Pos(value) + } +} + impl Pos { pub const ZERO: Pos = Pos(0); @@ -330,7 +336,7 @@ impl Sub<usize> for Pos { impl From<Pos> for usize { #[inline] fn from(pos: Pos) -> Self { - pos.0 as usize + pos.0 } } @@ -340,6 +346,15 @@ pub struct Span { pub high: Pos, } +impl From<Range<usize>> for Span { + fn from(value: Range<usize>) -> Self { + Span { + low: value.start.into(), + high: value.end.into(), + } + } +} + impl Span { pub const ZERO: Span = Span::new(Pos::ZERO, Pos::ZERO); From d00e893a29a05dd7df01557b023b6d39060bfbef Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Wed, 16 Nov 2022 16:38:21 +0100 Subject: [PATCH 18/21] move number to parser --- compiler/src/lexer.rs | 15 +++++++-------- compiler/src/parser2.rs | 17 ++++++++++++----- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs index 294f4076..56c65258 100644 --- a/compiler/src/lexer.rs +++ b/compiler/src/lexer.rs @@ -1,5 +1,3 @@ -use std::fmt::Display; - use nom::branch::alt; use nom::bytes::complete::{tag, take_while, take_while_m_n}; use nom::character::complete::{ @@ -21,6 +19,11 @@ use crate::*; pub trait ParseErr<'a>: ParseError<Span<'a>> {} pub type IResult<'a, O> = nom::IResult<Span<'a>, O>; pub type NomError<'a> = nom::Err<nom::error::Error<Span<'a>>>; +pub type NomErrorKind = nom::error::ErrorKind; + +pub fn nom_error(input: Span, kind: NomErrorKind) -> NomError { + NomError::Error(nom::error::Error::new(input, kind)) +} #[derive(Debug, Clone, Copy, PartialEq, Display)] pub enum Trivia { @@ -181,7 +184,7 @@ fn int_width(suffix: Option<Span>) -> u32 { } } -fn integer(is: Span) -> IResult<Num> { +pub fn integer(is: Span) -> IResult<Num> { match pair( alt(( map(preceded(tag("0x"), hex_digit0), |s| (16, s)), @@ -222,7 +225,7 @@ fn float_width(suffix: Option<Span>) -> u32 { } } -fn float(is: Span) -> IResult<Num> { +pub fn float(is: Span) -> IResult<Num> { match pair(alt((sciexp_literal, float_literal)), opt(identifier))(is) { Ok((rem, (value, suffix))) => { let num = match float_width(suffix) { @@ -235,10 +238,6 @@ fn float(is: Span) -> IResult<Num> { } } -pub fn number(is: Span) -> IResult<(Span, Num)> { - consumed(alt((float, integer)))(is) -} - // ----------------------------------------------------------------------------- // String // ----------------------------------------------------------------------------- diff --git a/compiler/src/parser2.rs b/compiler/src/parser2.rs index b96589f3..b7dd7eb4 100644 --- a/compiler/src/parser2.rs +++ b/compiler/src/parser2.rs @@ -1,8 +1,7 @@ -use std::str::FromStr; - use nom::branch::alt; use nom::combinator::{consumed, map, verify}; use nom::multi::separated_list1; +use nom::sequence::delimited; use nom::{Offset, Slice}; use redscript::ast::{Constant, Ident, Literal, SourceAst}; use redscript::Str; @@ -56,8 +55,8 @@ pub fn string_interpolation(is: Span) -> IResult<(Str, Vec<(Expr, Str)>, Range)> i = ip; break; } - // fallback must be implemented in the subordinate parsers. - return Err(nom::Err::Error(nom::error::Error::new(i, nom::error::ErrorKind::Tag))); + // the string interpolation is not terminated + return Err(nom_error(i, NomErrorKind::TagClosure)); } let r = to_ok!(is, i, ss, parts); Ok((i, r)) @@ -66,11 +65,19 @@ pub fn string_interpolation(is: Span) -> IResult<(Str, Vec<(Expr, Str)>, Range)> fn constant(is: Span) -> IResult<Constant> { alt(( map(string_literal, |(t, s)| Constant::String(t, s)), - map(number, |(_, n)| n.into()), + map(consumed(alt((float, integer))), |(_, n)| n.into()), map(boolean, |(_, b)| Constant::Bool(b)), ))(is) } +fn type_args(is: Span) -> IResult<Vec<Expr>> { + delimited( + verify(operator, |(_, op)| op == &Op::Lt), + separated_list1(verify(control, |(_, c)| c == &Ctrl::Comma), expr), + verify(operator, |(_, op)| op == &Op::Gt), + )(is) +} + fn expr(is: Span) -> IResult<Expr> { todo!() } From a7a5d76cd6b52b1d7b0e895eebefa876c0b9020c Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Wed, 16 Nov 2022 16:58:05 +0100 Subject: [PATCH 19/21] Separate string interpolation combinator --- compiler/src/comb.rs | 34 ++++++++++++++++++++++++++++++++++ compiler/src/parser2.rs | 28 ++++++++++------------------ 2 files changed, 44 insertions(+), 18 deletions(-) diff --git a/compiler/src/comb.rs b/compiler/src/comb.rs index 09936754..a58465c9 100644 --- a/compiler/src/comb.rs +++ b/compiler/src/comb.rs @@ -44,3 +44,37 @@ where } } } + +pub fn delimited_list0<I, O1, O2, E>( + mut open: impl FnMut(I) -> IResult<I, O1, E>, + mut separator: impl FnMut(I) -> IResult<I, O1, E>, + mut inner: impl FnMut(I) -> IResult<I, O2, E>, + mut close: impl FnMut(I) -> IResult<I, O1, E>, +) -> impl FnMut(I) -> IResult<I, (O1, Vec<(O2, O1)>), E> +where + I: Clone, + E: ParseError<I>, +{ + move |is| { + let (mut i, ss) = open(is)?; + + let mut parts = vec![]; + while let Ok((ip, ep)) = inner(i.clone()) { + if let Ok((ip, sp)) = separator(ip.clone()) { + parts.push((ep, sp)); + i = ip; + continue; + } else if let Ok((ip, se)) = close(ip) { + parts.push((ep, se)); + i = ip; + break; + } + // missing close match + return Err(nom::Err::Error(E::from_error_kind( + i, + nom::error::ErrorKind::TagClosure, + ))); + } + return Ok((i, (ss, parts))); + } +} diff --git a/compiler/src/parser2.rs b/compiler/src/parser2.rs index b7dd7eb4..60a26224 100644 --- a/compiler/src/parser2.rs +++ b/compiler/src/parser2.rs @@ -6,6 +6,7 @@ use nom::{Offset, Slice}; use redscript::ast::{Constant, Ident, Literal, SourceAst}; use redscript::Str; +use crate::comb::delimited_list0; use crate::lexer::*; use crate::validators::*; use crate::*; @@ -42,24 +43,15 @@ fn string_literal(is: Span) -> IResult<(Literal, Str)> { } pub fn string_interpolation(is: Span) -> IResult<(Str, Vec<(Expr, Str)>, Range)> { - let (mut i, (_, st, ss)) = string_inter_start(is.clone())?; - - let mut parts = vec![]; - while let Ok((ip, ep)) = expr(i.clone()) { - if let Ok((ip, (_, sp))) = string_inter_part(ip.clone()) { - parts.push((ep, sp)); - i = ip; - continue; - } else if let Ok((ip, (_, se))) = string_inter_end(ip) { - parts.push((ep, se)); - i = ip; - break; - } - // the string interpolation is not terminated - return Err(nom_error(i, NomErrorKind::TagClosure)); - } - let r = to_ok!(is, i, ss, parts); - Ok((i, r)) + map( + consumed(delimited_list0( + map(string_inter_start, |(_, _, s)| s), + map(string_inter_part, |(_, s)| s), + expr, + map(string_inter_end, |(_, s)| s), + )), + |(r, (ss, sp))| (ss, sp, r.to_range().into()), + )(is) } fn constant(is: Span) -> IResult<Constant> { From 71dfef02d443dad4cd1d651cc6de67a528795980 Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Wed, 16 Nov 2022 18:05:20 +0100 Subject: [PATCH 20/21] Add parsable + variant combinators --- compiler/src/comb.rs | 24 +++++++++++++++++++++++ compiler/src/lexer.rs | 19 +++++++++++++++++- compiler/src/parser2.rs | 43 +++++++++++++++++++++++++++++------------ 3 files changed, 73 insertions(+), 13 deletions(-) diff --git a/compiler/src/comb.rs b/compiler/src/comb.rs index a58465c9..ad8d6af9 100644 --- a/compiler/src/comb.rs +++ b/compiler/src/comb.rs @@ -78,3 +78,27 @@ where return Ok((i, (ss, parts))); } } + +pub fn variant<I, T, E>(expected: T) -> impl FnMut(I) -> IResult<I, I, E> +where + I: Clone, + T: Parsable<I, E> + PartialEq, + E: ParseError<I>, +{ + move |i| { + let (i, (o, value)) = T::parse(i)?; + if value == expected { + Ok((i, o)) + } else { + Err(nom::Err::Error(E::from_error_kind(i, nom::error::ErrorKind::Verify))) + } + } +} + +pub trait Parsable<I, E>: Sized +where + I: Clone, + E: ParseError<I>, +{ + fn parse(i: I) -> IResult<I, (I, Self), E>; +} diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs index 56c65258..faeb70a2 100644 --- a/compiler/src/lexer.rs +++ b/compiler/src/lexer.rs @@ -12,7 +12,7 @@ use redscript::ast::{Constant, Literal}; use redscript::Str; use strum::{Display, IntoStaticStr}; -use crate::comb::many_till_balanced1; +use crate::comb::{many_till_balanced1, Parsable}; use crate::validators::*; use crate::*; @@ -362,6 +362,12 @@ pub fn operator(i: Span) -> IResult<(Span, Op)> { ))(i) } +impl<'a> Parsable<Span<'a>, nom::error::Error<Span<'a>>> for Op { + fn parse(i: Span<'_>) -> IResult<(Span, Self)> { + operator(i) + } +} + // ----------------------------------------------------------------------------- // Control character // ----------------------------------------------------------------------------- @@ -384,6 +390,11 @@ pub fn control(i: Span) -> IResult<(Span, Ctrl)> { ))(i) } +impl<'a> Parsable<Span<'a>, nom::error::Error<Span<'a>>> for Ctrl { + fn parse(i: Span<'_>) -> IResult<(Span, Self)> { + control(i) + } +} // ----------------------------------------------------------------------------- // Identifier // ----------------------------------------------------------------------------- @@ -424,6 +435,12 @@ pub fn keyword(i: Span) -> IResult<(Span, Kw)> { ))(i) } +impl<'a> Parsable<Span<'a>, nom::error::Error<Span<'a>>> for Kw { + fn parse(i: Span<'_>) -> IResult<(Span, Self)> { + keyword(i) + } +} + pub fn null(i: Span) -> IResult<Span> { tag("null")(i) } diff --git a/compiler/src/parser2.rs b/compiler/src/parser2.rs index 60a26224..d667ff2c 100644 --- a/compiler/src/parser2.rs +++ b/compiler/src/parser2.rs @@ -1,12 +1,12 @@ use nom::branch::alt; -use nom::combinator::{consumed, map, verify}; +use nom::combinator::{consumed, map, opt}; use nom::multi::separated_list1; -use nom::sequence::delimited; +use nom::sequence::{delimited, pair, preceded}; use nom::{Offset, Slice}; -use redscript::ast::{Constant, Ident, Literal, SourceAst}; +use redscript::ast::{Constant, Ident, Literal, SourceAst, TypeName}; use redscript::Str; -use crate::comb::delimited_list0; +use crate::comb::{delimited_list0, variant}; use crate::lexer::*; use crate::validators::*; use crate::*; @@ -28,7 +28,7 @@ macro_rules! to_ok { /// A dot separated sequence of identifiers. fn trailer(is: Span) -> IResult<(Vec<Ident>, Range)> { map( - consumed(separated_list1(verify(control, |(_, c)| c == &Ctrl::Dot), ident)), + consumed(separated_list1(variant(Ctrl::Dot), ident)), |(span, trailer)| (trailer, span.to_range().into()), )(is) } @@ -62,14 +62,33 @@ fn constant(is: Span) -> IResult<Constant> { ))(is) } -fn type_args(is: Span) -> IResult<Vec<Expr>> { - delimited( - verify(operator, |(_, op)| op == &Op::Lt), - separated_list1(verify(control, |(_, c)| c == &Ctrl::Comma), expr), - verify(operator, |(_, op)| op == &Op::Gt), - )(is) +fn type_list(is: Span) -> IResult<Vec<TypeName>> { + separated_list1(variant(Ctrl::Comma), type_)(is) +} + +fn type_args(is: Span) -> IResult<Vec<TypeName>> { + delimited(variant(Op::Lt), type_list, variant(Op::Gt))(is) +} + +fn type_(is: Span) -> IResult<TypeName> { + alt(( + map(pair(ident, opt(type_args)), |(ident, args)| { + TypeName::new(ident, args.unwrap_or_default()) + }), + map( + delimited(variant(Ctrl::LBracket), type_, variant(Ctrl::RBracket)), + TypeName::of_array, + ), + map( + pair( + delimited(variant(Ctrl::LParen), type_list, variant(Ctrl::RParen)), + preceded(variant(Ctrl::LArrow), type_), + ), + |(args, ret)| TypeName::of_function(args, ret), + ), + ))(is) } -fn expr(is: Span) -> IResult<Expr> { +pub fn expr(is: Span) -> IResult<Expr> { todo!() } From 0ba8f472e4959fd9928752947394c591fa5e7fb5 Mon Sep 17 00:00:00 2001 From: Prophet Lamb <prophet.lamb@gmail.com> Date: Thu, 17 Nov 2022 12:32:16 +0100 Subject: [PATCH 21/21] Impl field & func --- compiler/src/lexer.rs | 31 +++++++- compiler/src/parser.rs | 2 +- compiler/src/parser2.rs | 166 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 190 insertions(+), 9 deletions(-) diff --git a/compiler/src/lexer.rs b/compiler/src/lexer.rs index faeb70a2..65034f39 100644 --- a/compiler/src/lexer.rs +++ b/compiler/src/lexer.rs @@ -8,11 +8,12 @@ use nom::error::ParseError; use nom::multi::many0; use nom::sequence::{delimited, pair, preceded, separated_pair}; use nom::AsChar; -use redscript::ast::{Constant, Literal}; +use redscript::ast::{Constant, Literal, Variance}; use redscript::Str; use strum::{Display, IntoStaticStr}; use crate::comb::{many_till_balanced1, Parsable}; +use crate::parser::Qualifier; use crate::validators::*; use crate::*; @@ -85,6 +86,7 @@ pub enum Ctrl { Dot, Quest, LArrow, + At, } #[derive(Debug, Clone, Copy, PartialEq, Display, IntoStaticStr)] @@ -109,6 +111,7 @@ pub enum Kw { Try, Catch, Finally, + Extends, } // ----------------------------------------------------------------------------- @@ -387,6 +390,7 @@ pub fn control(i: Span) -> IResult<(Span, Ctrl)> { map(tag("."), |s| (s, Ctrl::Dot)), map(tag("?"), |s| (s, Ctrl::Quest)), map(tag("->"), |s| (s, Ctrl::LArrow)), + map(tag("@"), |s| (s, Ctrl::At)), ))(i) } @@ -432,6 +436,7 @@ pub fn keyword(i: Span) -> IResult<(Span, Kw)> { map(tag("try"), |s| (s, Kw::Try)), map(tag("catch"), |s| (s, Kw::Catch)), map(tag("finally"), |s| (s, Kw::Finally)), + map(tag("extends"), |s| (s, Kw::Extends)), ))(i) } @@ -456,3 +461,27 @@ pub fn super_(i: Span) -> IResult<Span> { pub fn boolean(i: Span) -> IResult<(Span, bool)> { alt((map(tag("true"), |s| (s, true)), map(tag("false"), |s| (s, false))))(i) } + +pub fn qualifier(i: Span) -> IResult<Qualifier> { + alt(( + map(tag("public"), |_| Qualifier::Public), + map(tag("protected"), |_| Qualifier::Protected), + map(tag("private"), |_| Qualifier::Private), + map(tag("abstract"), |_| Qualifier::Abstract), + map(tag("static"), |_| Qualifier::Static), + map(tag("final"), |_| Qualifier::Final), + map(tag("const"), |_| Qualifier::Const), + map(tag("native"), |_| Qualifier::Native), + map(tag("exec"), |_| Qualifier::Exec), + map(tag("callback"), |_| Qualifier::Callback), + map(tag("out"), |_| Qualifier::Out), + map(tag("opt"), |_| Qualifier::Optional), + map(tag("quest"), |_| Qualifier::Quest), + map(tag("importOnly"), |_| Qualifier::ImportOnly), + map(tag("persistent"), |_| Qualifier::Persistent), + ))(i) +} + +pub fn variance(i: Span) -> IResult<Variance> { + alt((map(tag("+"), |_| Variance::Co), map(tag("-"), |_| Variance::Contra)))(i) +} diff --git a/compiler/src/parser.rs b/compiler/src/parser.rs index d920d661..518c1485 100644 --- a/compiler/src/parser.rs +++ b/compiler/src/parser.rs @@ -110,7 +110,7 @@ pub enum Qualifier { } #[derive(Debug)] -pub struct Qualifiers(Vec<Qualifier>); +pub struct Qualifiers(pub Vec<Qualifier>); impl Qualifiers { pub fn visibility(&self) -> Option<Visibility> { diff --git a/compiler/src/parser2.rs b/compiler/src/parser2.rs index d667ff2c..418cdf0f 100644 --- a/compiler/src/parser2.rs +++ b/compiler/src/parser2.rs @@ -1,18 +1,24 @@ +use std::str::FromStr; + use nom::branch::alt; use nom::combinator::{consumed, map, opt}; -use nom::multi::separated_list1; -use nom::sequence::{delimited, pair, preceded}; +use nom::multi::{many0, separated_list0, separated_list1}; +use nom::sequence::{delimited, pair, preceded, tuple}; use nom::{Offset, Slice}; -use redscript::ast::{Constant, Ident, Literal, SourceAst, TypeName}; +use redscript::ast::{Constant, Ident, Literal, SourceAst, TypeName, TypeParam}; use redscript::Str; use crate::comb::{delimited_list0, variant}; use crate::lexer::*; +use crate::parser::{ + Annotation, AnnotationKind, Declaration, FieldSource, FunctionSource, ParameterSource, Qualifiers +}; use crate::validators::*; use crate::*; type Range = redscript::ast::Span; type Expr = redscript::ast::Expr<SourceAst>; +type Seq = redscript::ast::Seq<SourceAst>; fn to_range(input: &Span, remaining: &Span) -> Range { let len = input.slice(..remaining.offset(input)).len(); @@ -63,32 +69,178 @@ fn constant(is: Span) -> IResult<Constant> { } fn type_list(is: Span) -> IResult<Vec<TypeName>> { - separated_list1(variant(Ctrl::Comma), type_)(is) + separated_list1(variant(Ctrl::Comma), type_name)(is) } fn type_args(is: Span) -> IResult<Vec<TypeName>> { delimited(variant(Op::Lt), type_list, variant(Op::Gt))(is) } -fn type_(is: Span) -> IResult<TypeName> { +fn ret_type(is: Span) -> IResult<TypeName> { + preceded(variant(Ctrl::LArrow), type_name)(is) +} + +fn type_name(is: Span) -> IResult<TypeName> { alt(( map(pair(ident, opt(type_args)), |(ident, args)| { TypeName::new(ident, args.unwrap_or_default()) }), map( - delimited(variant(Ctrl::LBracket), type_, variant(Ctrl::RBracket)), + delimited(variant(Ctrl::LBracket), type_name, variant(Ctrl::RBracket)), TypeName::of_array, ), map( pair( delimited(variant(Ctrl::LParen), type_list, variant(Ctrl::RParen)), - preceded(variant(Ctrl::LArrow), type_), + ret_type, ), |(args, ret)| TypeName::of_function(args, ret), ), ))(is) } +fn assign_init(is: Span) -> IResult<Option<Expr>> { + opt(preceded(variant(Op::Eq), expr))(is) +} + +fn let_type(is: Span) -> IResult<TypeName> { + preceded(variant(Ctrl::Colon), type_name)(is) +} + +fn let_(is: Span) -> IResult<Expr> { + map( + consumed(tuple((preceded(variant(Kw::Let), ident), opt(let_type), assign_init))), + |(span, (ident, ty, value))| { + Expr::Declare(ident, ty.map(Box::new), value.map(Box::new), span.to_range().into()) + }, + )(is) +} + +fn expr_list(is: Span) -> IResult<Vec<Expr>> { + separated_list1(variant(Ctrl::Comma), expr)(is) +} + +fn args(is: Span) -> IResult<Vec<Expr>> { + map( + consumed(delimited(variant(Ctrl::LParen), expr_list, variant(Ctrl::RParen))), + |(_, args)| args, + )(is) +} + +fn annotation(is: Span) -> IResult<Annotation> { + let (rem, (span, (ident, args))) = consumed(tuple((preceded(variant(Ctrl::At), ident), opt(args))))(is)?; + + match AnnotationKind::from_str(&ident) { + Ok(kind) => Ok((rem, Annotation { + kind, + args: args.unwrap_or_default(), + span: span.to_range().into(), + })), + Err(_) => Err(nom_error(rem, NomErrorKind::Tag)), + } +} + +fn annotation_list(is: Span) -> IResult<Vec<Annotation>> { + many0(annotation)(is) +} + +fn qualifier_list(is: Span) -> IResult<Qualifiers> { + map(many0(qualifier), |qs| Qualifiers(qs))(is) +} + +fn type_param(is: Span) -> IResult<TypeParam> { + map( + tuple((variance, ident, opt(preceded(variant(Kw::Extends), type_name)))), + |(variance, ident, extends)| TypeParam { + name: ident, + variance, + extends, + }, + )(is) +} + +fn type_params(is: Span) -> IResult<Vec<TypeParam>> { + delimited( + variant(Op::Lt), + separated_list1(variant(Ctrl::Comma), type_param), + variant(Op::Gt), + )(is) +} + +fn decl<'a, T>( + mut inner: impl FnMut(Span<'a>) -> IResult<'a, T>, +) -> impl FnMut(Span<'a>) -> IResult<'a, (Declaration, T)> { + move |is| { + map( + consumed(tuple((annotation_list, qualifier_list, &mut inner, ident))), + |(span, (annotations, qualifiers, inner, ident))| { + ( + Declaration { + annotations, + qualifiers, + name: ident, + span: span.to_range().into(), + }, + inner, + ) + }, + )(is) + } +} + +fn field(is: Span) -> IResult<FieldSource> { + map( + tuple((decl(variant(Kw::Let)), let_type, assign_init)), + |((declaration, _), ty, init)| FieldSource { + declaration, + type_: ty, + default: init, + }, + )(is) +} + +fn param(is: Span) -> IResult<ParameterSource> { + map(tuple((qualifier_list, ident, let_type)), |(qualifiers, name, ty)| { + ParameterSource { + qualifiers, + name, + type_: ty, + } + })(is) +} + +fn params(is: Span) -> IResult<Vec<ParameterSource>> { + delimited( + variant(Ctrl::LParen), + separated_list0(variant(Ctrl::Comma), param), + variant(Ctrl::RParen), + )(is) +} + +fn func(is: Span) -> IResult<FunctionSource> { + map( + consumed(tuple(( + decl(variant(Kw::Func)), + opt(type_params), + params, + opt(ret_type), + opt(func_body), + ))), + |(span, ((declaration, _), tparams, parameters, ret_type, body))| FunctionSource { + tparams: tparams.unwrap_or_default(), + declaration, + type_: ret_type, + parameters, + body, + span: span.to_range().into(), + }, + )(is) +} + +fn func_body(is: Span) -> IResult<Seq> { + todo!() +} + pub fn expr(is: Span) -> IResult<Expr> { todo!() }