From 3d593b08642ce51c1301576b4d2f2b93b71f3cd3 Mon Sep 17 00:00:00 2001 From: cobaltburn <81337170+cobaltburn@users.noreply.github.com> Date: Sat, 27 May 2023 16:50:46 -0500 Subject: [PATCH 1/6] Add unison implementation write it in unison i said it will be fun i said....... --- unison/Dockerfile | 38 ++++++++++++++++++++++ unison/Makefile | 31 ++++++++++++++++++ unison/lexer.u | 80 +++++++++++++++++++++++++++++++++++++++++++++++ unison/main.u | 61 ++++++++++++++++++++++++++++++++++++ unison/test.txt | 8 +++++ unison/token.u | 21 +++++++++++++ 6 files changed, 239 insertions(+) create mode 100644 unison/Dockerfile create mode 100644 unison/Makefile create mode 100644 unison/lexer.u create mode 100644 unison/main.u create mode 100644 unison/test.txt create mode 100644 unison/token.u diff --git a/unison/Dockerfile b/unison/Dockerfile new file mode 100644 index 00000000..6b3514c9 --- /dev/null +++ b/unison/Dockerfile @@ -0,0 +1,38 @@ +# Latest Ubuntu image +FROM ubuntu:latest + +# Make a working directory +RUN mkdir unison + +# Basic system-level dependencies +RUN apt-get update && \ + apt install -y software-properties-common git curl build-essential && \ + add-apt-repository --yes ppa:neovim-ppa/unstable && \ + apt-get install -y neovim +# TODO: add yours + +# Extra dependencies +# TODO: add yours +Run mkdir unisonlanguage && \ + curl -L https://github.com/unisonweb/unison/releases/download/release%2FM4i/ucm-linux.tar.gz --output unisonlanguage/ucm.tar.gz && \ + tar -xzf unisonlanguage/ucm.tar.gz -C unisonlanguage + + +WORKDIR /unison +RUN ~/./unisonlanguage/ucm + +# has to be run inside of the unison terminal +RUN fork .base lib.base +RUN add +RUN compile test test +RUN compile testFile testFile + +# offically site showing how to compile files `https://www.unison-lang.org/learn/at-a-glance/` +# RUN ucm run.compiled test.uc +# RUN ucm run.compiled testFile.uc + +# Commands for docker run +# CMD make fmt && \ +# make lint && \ +# make test + diff --git a/unison/Makefile b/unison/Makefile new file mode 100644 index 00000000..ebaec6ed --- /dev/null +++ b/unison/Makefile @@ -0,0 +1,31 @@ +## +# Static part, don't change these: +## + +help: + @cat Makefile | grep -E "^\w+$:" + +ready: fmt lint test + +docker-build: + docker build --no-cache . -t deez_$(notdir $(shell pwd)) + +docker-ready: docker-build + docker run -v $(shell pwd):/deez -t deez_$(notdir $(shell pwd)) + +## +# Update those: +## + +fmt: + @echo "===> Formatting" + # TODO: add yours + +lint: + @echo "===> Linting" + # TODO: add yours + +test: + @echo "===> Testing" + # TODO: add yours + diff --git a/unison/lexer.u b/unison/lexer.u new file mode 100644 index 00000000..2aa51bda --- /dev/null +++ b/unison/lexer.u @@ -0,0 +1,80 @@ +nextToken : Char -> Token +nextToken ch = + c = fromCharList [ch] + match ch with + ?+ -> Plus + ?- -> Minus + ?* -> Asterisk + ?/ -> Slash + ?! -> Bang + ?< -> Lt + ?> -> Gt + ?; -> Semicolon + ?, -> Comma + ?( -> Lparen + ?) -> Rparen + ?{ -> Lbrace + ?} -> Rbrace + ?\NUL -> Eof + _ -> Illegal c + + +lexer : Text -> [Token] +lexer str = + loop: (Text, [Token]) -> [Token] + loop tup = + (str, tokens) = tup + match str with + "" -> tokens + lex -> processText str tokens |> loop + loop (str, []) + +processText : Text -> [Token] -> (Text, [Token]) +processText str tokens = + (ch, txt) = Text.uncons str |> Optional.getOrElse (?\NUL, "") + match ch with + x + | isWhitespace x -> (txt, tokens) + | x == ?= -> identEquals txt tokens + | x == ?! -> identNotEqual txt tokens + | isLetter(x) -> + (var, rem) = identChar txt + val = fromCharList([ch]) ++ var + (rem, tokens :+ keyWords(val) ) + | isDigit(x) -> + (var, rem) = identNum txt + num = fromCharList([ch]) ++ var + (rem, tokens :+ Int num) + _ ->(txt, tokens :+ nextToken(ch)) + +identEquals : Text -> [Token] -> (Text, [Token]) +identEquals str tokens = + (ch, txt) = Text.uncons str |> Optional.getOrElse (?\NUL, "") + match ch with + ?= -> (txt, tokens :+ Eq) + _ -> (str, tokens :+ Assign) + +identNotEqual : Text -> [Token] -> (Text, [Token]) +identNotEqual str tokens = + (ch, txt) = Text.uncons str |> Optional.getOrElse (?\NUL, "") + match ch with + ?= -> (txt, tokens :+ NotEq) + _ -> (str, tokens :+ Bang) + +identChar : Text -> (Text, Text) +identChar str = + var = Text.takeWhile isLetter str + size = Text.size var + (_, remain) = Text.splitAt size str + (var, remain) + +identNum : Text -> (Text, Text) +identNum str = + num = Text.takeWhile isDigit str + size = Text.size num + (_, remain) = Text.splitAt size str + (num, remain) + +isLetter : Char -> Boolean +isLetter ch = + ((?a <= ch) && (ch <= ?z)) || ((?A <= ch) && (ch <= ?Z)) || (ch == ?_) diff --git a/unison/main.u b/unison/main.u new file mode 100644 index 00000000..ff719525 --- /dev/null +++ b/unison/main.u @@ -0,0 +1,61 @@ + +test : '{IO, Exception} Boolean +test = do + result = lexer """let five = 5; + let ten = 10; + let add = fn(x, y) { + x + y; + }; + let result = add(five, ten);""" + expect = [Let, + Ident("five"), + Assign, + Int("5"), + Semicolon, + Let, + Ident("ten"), + Assign, Int("10"), + Semicolon, + Let, + Ident("add"), + Assign, + Fn, + Lparen, + Ident("x"), + Comma, + Ident("y"), + Rparen, + Lbrace, + Ident("x"), + Plus, + Ident("y"), + Semicolon, + Rbrace, + Semicolon, + Let, + Ident("result"), + Assign, + Ident("add"), + Lparen, + Ident("five"), + Comma, + Ident("ten"), + Rparen, + Semicolon] + result === expect + + +testFile : '{IO, Exception} [Token] +testFile = do + name = !readLine + path = FilePath name + file = open path Read + loop : Handle -> [Token] -> [Token] + loop file tokens = + if not (isEOF file) then + line = getLine file + tks = lexer line + loop file (tokens ++ tks) + else + tokens :+ Eof + loop file [] \ No newline at end of file diff --git a/unison/test.txt b/unison/test.txt new file mode 100644 index 00000000..083bc1fa --- /dev/null +++ b/unison/test.txt @@ -0,0 +1,8 @@ +let five = 5; +let ten = 10; +let add = fn(x, y) { + x + y; +}; +let result = add(five, ten); +!-/*5; +5 < 10 > 5; \ No newline at end of file diff --git a/unison/token.u b/unison/token.u new file mode 100644 index 00000000..43f7bdda --- /dev/null +++ b/unison/token.u @@ -0,0 +1,21 @@ + +structural type Token = Illegal Text | Ident Text | Int Text | Eof + | Assign | Plus | Comma + | Semicolon | Lparen | Rparen + | Lbrace | Rbrace | Minus + | Bang | Asterisk| Slash + | Fn | Let | Gt| Lt + | True | False | If | Else + | Return | Eq | NotEq + +keyWords : Text -> Token +keyWords word = + match word with + "fn" -> Fn + "let" -> Let + "true" -> True + "false" -> False + "if" -> If + "else" -> Else + "return" -> Return + _ -> Ident word From cae47601c302c24e39a4eb79f181d1b3c61a307a Mon Sep 17 00:00:00 2001 From: cobaltburn <81337170+cobaltburn@users.noreply.github.com> Date: Tue, 30 May 2023 16:59:28 -0500 Subject: [PATCH 2/6] Add files via upload --- unison/main.u | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/unison/main.u b/unison/main.u index ff719525..caf15aaa 100644 --- a/unison/main.u +++ b/unison/main.u @@ -47,15 +47,6 @@ test = do testFile : '{IO, Exception} [Token] testFile = do - name = !readLine - path = FilePath name - file = open path Read - loop : Handle -> [Token] -> [Token] - loop file tokens = - if not (isEOF file) then - line = getLine file - tks = lexer line - loop file (tokens ++ tks) - else - tokens :+ Eof - loop file [] \ No newline at end of file + path = FilePath !readLine + tokens = lexer << getText <| open path Read + tokens :+ Eof From 1f604ead050e4ec0173473963cb8eeace7f5051e Mon Sep 17 00:00:00 2001 From: cobaltburn <81337170+cobaltburn@users.noreply.github.com> Date: Sun, 4 Jun 2023 12:00:40 -0500 Subject: [PATCH 3/6] Add files via upload --- unison/ast.u | 20 ++++++++ unison/lexer.u | 34 +++++++------ unison/main.u | 10 ++-- unison/parTest.txt | 1 + unison/parser.u | 121 +++++++++++++++++++++++++++++++++++++++++++++ unison/token.u | 48 ++++++++++++++++-- 6 files changed, 210 insertions(+), 24 deletions(-) create mode 100644 unison/ast.u create mode 100644 unison/parTest.txt create mode 100644 unison/parser.u diff --git a/unison/ast.u b/unison/ast.u new file mode 100644 index 00000000..34c82893 --- /dev/null +++ b/unison/ast.u @@ -0,0 +1,20 @@ + +structural type Statement = LetStatement Token [Token] | ReturnStatement [Token] | ExpressionStatement [Token] | ErrorStatement [Token] Token + +Ast.toString : Statement -> Text +Ast.toString = cases + LetStatement var exp -> "let " ++ Token.toString(var) ++ " = " ++ expressionToString exp |> trim + ReturnStatement exp -> "return " ++ expressionToString exp |> trim + ExpressionStatement exp -> expressionToString exp |> trim + ErrorStatement exp err -> "" + +Ast.expressionToString: [Token] -> Text +Ast.expressionToString tks = + loop : [Token] -> Text + loop tks = + match List.uncons tks with + Some(tk, rem) -> ( Token.toString tk ) ++ " " ++ loop rem + None -> "" + loop tks + +> Ast.toString (LetStatement (Ident("var")) [Int("5"), Plus, Int("5")]) diff --git a/unison/lexer.u b/unison/lexer.u index 2aa51bda..74a28af9 100644 --- a/unison/lexer.u +++ b/unison/lexer.u @@ -1,5 +1,7 @@ -nextToken : Char -> Token -nextToken ch = + + +Lexer.nextToken : Char -> Token +Lexer.nextToken ch = c = fromCharList [ch] match ch with ?+ -> Plus @@ -19,8 +21,8 @@ nextToken ch = _ -> Illegal c -lexer : Text -> [Token] -lexer str = +Lexer.lexer : Text -> [Token] +Lexer.lexer str = loop: (Text, [Token]) -> [Token] loop tup = (str, tokens) = tup @@ -29,8 +31,8 @@ lexer str = lex -> processText str tokens |> loop loop (str, []) -processText : Text -> [Token] -> (Text, [Token]) -processText str tokens = +Lexer.processText : Text -> [Token] -> (Text, [Token]) +Lexer.processText str tokens = (ch, txt) = Text.uncons str |> Optional.getOrElse (?\NUL, "") match ch with x @@ -47,34 +49,34 @@ processText str tokens = (rem, tokens :+ Int num) _ ->(txt, tokens :+ nextToken(ch)) -identEquals : Text -> [Token] -> (Text, [Token]) -identEquals str tokens = +Lexer.identEquals : Text -> [Token] -> (Text, [Token]) +Lexer.identEquals str tokens = (ch, txt) = Text.uncons str |> Optional.getOrElse (?\NUL, "") match ch with ?= -> (txt, tokens :+ Eq) _ -> (str, tokens :+ Assign) -identNotEqual : Text -> [Token] -> (Text, [Token]) -identNotEqual str tokens = +Lexer.identNotEqual : Text -> [Token] -> (Text, [Token]) +Lexer.identNotEqual str tokens = (ch, txt) = Text.uncons str |> Optional.getOrElse (?\NUL, "") match ch with ?= -> (txt, tokens :+ NotEq) _ -> (str, tokens :+ Bang) -identChar : Text -> (Text, Text) -identChar str = +Lexer.identChar : Text -> (Text, Text) +Lexer.identChar str = var = Text.takeWhile isLetter str size = Text.size var (_, remain) = Text.splitAt size str (var, remain) -identNum : Text -> (Text, Text) -identNum str = +Lexer.identNum : Text -> (Text, Text) +Lexer.identNum str = num = Text.takeWhile isDigit str size = Text.size num (_, remain) = Text.splitAt size str (num, remain) -isLetter : Char -> Boolean -isLetter ch = +Lexer.isLetter : Char -> Boolean +Lexer.isLetter ch = ((?a <= ch) && (ch <= ?z)) || ((?A <= ch) && (ch <= ?Z)) || (ch == ?_) diff --git a/unison/main.u b/unison/main.u index caf15aaa..0eba685c 100644 --- a/unison/main.u +++ b/unison/main.u @@ -1,6 +1,6 @@ -test : '{IO, Exception} Boolean -test = do +Test.test : '{IO, Exception} Boolean +Test.test = do result = lexer """let five = 5; let ten = 10; let add = fn(x, y) { @@ -45,8 +45,8 @@ test = do result === expect -testFile : '{IO, Exception} [Token] -testFile = do +Test.testFile : '{IO, Exception} [Token] +Test.testFile = do path = FilePath !readLine tokens = lexer << getText <| open path Read - tokens :+ Eof + tokens :+ Eof \ No newline at end of file diff --git a/unison/parTest.txt b/unison/parTest.txt new file mode 100644 index 00000000..6e6da254 --- /dev/null +++ b/unison/parTest.txt @@ -0,0 +1 @@ +foobar; diff --git a/unison/parser.u b/unison/parser.u new file mode 100644 index 00000000..6c95e5a3 --- /dev/null +++ b/unison/parser.u @@ -0,0 +1,121 @@ + +Parser.parseProgram : [Token] -> {Exception}[Statement] +Parser.parseProgram tks = + loop tks stm = let + match List.uncons tks with + Some(tk, list) + | Eof === tk -> stm + | otherwise -> + (st, res) = parseStatement tk list + loop res (stm :+ st) + None -> Exception.raise (Failure (typeLink Generic) "parseProgram: failed to match" (Any None)) + loop tks [] + +Parser.parseStatement : Token -> [Token] -> {Exception}(Statement, [Token]) +Parser.parseStatement tk tks = + match tk with + Let -> parseLet tks + Return -> parseReturn tks + _ -> handleError tk tks + + + +Parser.parseLet : [Token] -> {Exception}(Statement, [Token]) +Parser.parseLet tokens = + (name, remain1) = grabIdent tokens + match name with + Token.Error _ _ -> handleError name remain1 + _ -> let + (assign, remain2) = grabAssign remain1 + match assign with + Token.Error _ _ -> handleError name remain2 + _ -> let + (expression, remain3) = expLoop [] remain2 + semicolonCheck Let expression remain3 + +Parser.parseReturn : [Token] -> {Exception}(Statement, [Token]) +Parser.parseReturn tokens = + (expression, remain) = expLoop [] tokens + semicolonCheck Return expression remain + +Parser.expLoop: [Token] -> [Token] -> {Exception}([Token], [Token]) +Parser.expLoop exp tks = + match List.uncons tks with + Some(tk, rem) -> + match tk with + Eof -> (exp, tks) + Semicolon -> (exp, tks) + Let -> (exp, tks) + Return -> (exp, tks) + _ -> expLoop (exp :+ tk) rem + None -> Exception.raise (Failure (typeLink Generic) "expLoop: let statement incorret format" (Any(take 1 tks))) + +--TODO move parsing funtions to here once done + +Parser.grabNext : [Token] -> {Exception}(Token, [Token]) +Parser.grabNext tks = + match List.uncons tks with + None -> Exception.raise (Failure (typeLink Generic) "grabNext: let statement incorret format" (Any(take 1 tks))) + Some res -> res + + +Parser.grabIdent : [Token] -> {Exception}(Token, [Token]) +Parser.grabIdent tks = + match List.uncons tks with + Some (tk, res) + | isIdent tk -> (tk, res) + | otherwise -> (Error "Ident Token not found" tk, res) + None -> Exception.raise (Failure (typeLink Generic) "grabIdnt: let statement incorret format" (Any(take 1 tks)) ) + + +Parser.grabAssign : [Token] -> {Exception}(Token, [Token]) +Parser.grabAssign tks = + match List.uncons tks with + Some (tk, res) + | tk === Assign -> (tk, res) + | otherwise -> (Error "Assign Token not found" tk, res) + _ -> Exception.raise (Failure (typeLink Generic) "gradAssign: let statement incorret format" (Any(take 1 tks))) + + +Parser.handleError: Token -> [Token] -> (Statement, [Token]) +Parser.handleError tk tks = + loop tks err = + match List.uncons tks with + Some(t, rem) + | t === Let -> (ErrorStatement err tk, tks) + | t === Semicolon -> (ErrorStatement (err :+ t) tk, rem) + | t === Eof -> (ErrorStatement (err :+ t) tk, tks) + | otherwise -> loop rem (err :+ t) + None -> Exception.raise (Failure (typeLink Generic) "handleError: let statement incorret format" (Any(take 1 tks))) + loop tks [] + +Parser.semicolonCheck : Token -> [Token] -> [Token] -> {Exception}(Statement, [Token]) +Parser.semicolonCheck tp expression remain = + match List.uncons remain with + Some (h, tail) + | h === Semicolon -> (ReturnStatement expression, tail) + | otherwise -> (ErrorStatement expression tp, remain) + None -> Exception.raise (Failure (typeLink Generic) "semicolonCheck: let statement incorret format" (Any tp)) + + +Test.testParser : '{IO, Exception}[Statement] +Test.testParser = do + path = FilePath "parTest.txt" + tokens = lexer << getText <| open path Read + program = tokens :+ Eof + parseProgram program + + +--- + +Parser.prefixParse : [Token] -> [Token] +Parser.prefixParse tks = + tks + +Parser.infixParse : [Token] -> [Token] +Parser.infixParse tks = + tks + +Parser.parseExpression : [Token] -> {Exception}(Statement, [Token]) +Parser.parseExpression tks = + (ExpressionStatement [], []) \ No newline at end of file diff --git a/unison/token.u b/unison/token.u index 43f7bdda..925335af 100644 --- a/unison/token.u +++ b/unison/token.u @@ -1,5 +1,6 @@ -structural type Token = Illegal Text | Ident Text | Int Text | Eof +structural type Token = Illegal Text | Ident Text | Int Text + | Error Text Token | Eof | Assign | Plus | Comma | Semicolon | Lparen | Rparen | Lbrace | Rbrace | Minus @@ -8,8 +9,8 @@ structural type Token = Illegal Text | Ident Text | Int Text | Eof | True | False | If | Else | Return | Eq | NotEq -keyWords : Text -> Token -keyWords word = +Token.keyWords : Text -> Token +Token.keyWords word = match word with "fn" -> Fn "let" -> Let @@ -19,3 +20,44 @@ keyWords word = "else" -> Else "return" -> Return _ -> Ident word + +Token.isIdent : Token -> Boolean +Token.isIdent = cases + Ident _ -> true + _ -> false + +Token.isInt : Token -> Boolean +Token.isInt = cases + Int _ -> true + _ -> false + +Token.toString: Token -> Text +Token.toString = cases + Illegal t -> t + Ident t -> t + Int t -> t + Fn -> "fn" + Let -> "let" + Assign -> "=" + Plus -> "+" + Minus -> "-" + Asterisk -> "*" + Slash -> "/" + Bang -> "!" + Gt -> ">" + Lt -> "<" + Eq -> "==" + NotEq -> "!=" + True -> "true" + False -> "false" + If -> "if" + Else -> "else" + Return -> "return" + Comma -> "," + Semicolon -> ";" + Lparen -> "(" + Rparen -> ")" + Lbrace -> "{" + Rbrace -> "}" + Eof -> "end" + Error _ tk -> toString tk From fbffbae1c3f07522661dd7ed2df5313f26d9dc98 Mon Sep 17 00:00:00 2001 From: cobaltburn <81337170+cobaltburn@users.noreply.github.com> Date: Thu, 8 Jun 2023 15:50:07 -0500 Subject: [PATCH 4/6] Add files via upload edit Dockerfile to initialize unison code base and pull hashed functions into the codebase and compile test functions --- unison/Compile.md | 7 +++++ unison/Dockerfile | 16 +++------- unison/ast.u | 2 +- unison/lexer.u | 1 - unison/main.u | 4 +-- unison/parser.u | 78 ++++++++++++++++++++++++++++++----------------- unison/token.u | 2 +- 7 files changed, 66 insertions(+), 44 deletions(-) create mode 100644 unison/Compile.md diff --git a/unison/Compile.md b/unison/Compile.md new file mode 100644 index 00000000..21658170 --- /dev/null +++ b/unison/Compile.md @@ -0,0 +1,7 @@ +''' ucm + .> cd interpreter + .interpreter> fork .base lib.base + .interpreter> pull cobaltburn.public.interpreter .interpreter + .interpreter> compile test test + .interpreter> compile testFile testFile +''' diff --git a/unison/Dockerfile b/unison/Dockerfile index 6b3514c9..d1054076 100644 --- a/unison/Dockerfile +++ b/unison/Dockerfile @@ -2,7 +2,7 @@ FROM ubuntu:latest # Make a working directory -RUN mkdir unison +# WORKDIR /Unison # Basic system-level dependencies RUN apt-get update && \ @@ -13,19 +13,13 @@ RUN apt-get update && \ # Extra dependencies # TODO: add yours -Run mkdir unisonlanguage && \ +RUN mkdir unisonlanguage && \ curl -L https://github.com/unisonweb/unison/releases/download/release%2FM4i/ucm-linux.tar.gz --output unisonlanguage/ucm.tar.gz && \ tar -xzf unisonlanguage/ucm.tar.gz -C unisonlanguage - -WORKDIR /unison -RUN ~/./unisonlanguage/ucm - -# has to be run inside of the unison terminal -RUN fork .base lib.base -RUN add -RUN compile test test -RUN compile testFile testFile +COPY . / +RUN ./unisonlanguage/ucm +RUN ./unisonlanguage/ucm transcript Compile.md # offically site showing how to compile files `https://www.unison-lang.org/learn/at-a-glance/` # RUN ucm run.compiled test.uc diff --git a/unison/ast.u b/unison/ast.u index 34c82893..5ce74519 100644 --- a/unison/ast.u +++ b/unison/ast.u @@ -1,5 +1,5 @@ -structural type Statement = LetStatement Token [Token] | ReturnStatement [Token] | ExpressionStatement [Token] | ErrorStatement [Token] Token +unique type Statement = LetStatement Token [Token] | ReturnStatement [Token] | ExpressionStatement [Token] | ErrorStatement [Token] Token Ast.toString : Statement -> Text Ast.toString = cases diff --git a/unison/lexer.u b/unison/lexer.u index 74a28af9..a57c0d74 100644 --- a/unison/lexer.u +++ b/unison/lexer.u @@ -1,5 +1,4 @@ - Lexer.nextToken : Char -> Token Lexer.nextToken ch = c = fromCharList [ch] diff --git a/unison/main.u b/unison/main.u index 0eba685c..e3582f83 100644 --- a/unison/main.u +++ b/unison/main.u @@ -47,6 +47,6 @@ Test.test = do Test.testFile : '{IO, Exception} [Token] Test.testFile = do - path = FilePath !readLine + path = FilePath "test.txt" tokens = lexer << getText <| open path Read - tokens :+ Eof \ No newline at end of file + tokens :+ Eof diff --git a/unison/parser.u b/unison/parser.u index 6c95e5a3..a5b3737f 100644 --- a/unison/parser.u +++ b/unison/parser.u @@ -1,4 +1,6 @@ +unique type Precedence = LOWEST | EQUALS | LESSGREATER | SUM | PRODUCT | PREFIX | CALL + Parser.parseProgram : [Token] -> {Exception}[Statement] Parser.parseProgram tks = loop tks stm = let @@ -16,7 +18,7 @@ Parser.parseStatement tk tks = match tk with Let -> parseLet tks Return -> parseReturn tks - _ -> handleError tk tks + _ -> parseExpression tk tks @@ -30,25 +32,35 @@ Parser.parseLet tokens = match assign with Token.Error _ _ -> handleError name remain2 _ -> let - (expression, remain3) = expLoop [] remain2 - semicolonCheck Let expression remain3 + (expression, remain3) = parseLoop remain2 + match List.uncons remain3 with + Some (h, tail) + | h === Semicolon -> (LetStatement name expression, tail) + | otherwise -> (ErrorStatement expression Return, remain3) + None -> Exception.raise (Failure (typeLink Generic) "parseLet: let statement incorret format" (Any Return)) Parser.parseReturn : [Token] -> {Exception}(Statement, [Token]) Parser.parseReturn tokens = - (expression, remain) = expLoop [] tokens - semicolonCheck Return expression remain + (expression, remain) = parseLoop tokens + match List.uncons remain with + Some (h, tail) + | h === Semicolon -> (ReturnStatement expression, tail) + | otherwise -> (ErrorStatement expression Return, remain) + None -> Exception.raise (Failure (typeLink Generic) "parseReturn: let statement incorret format" (Any Return)) -Parser.expLoop: [Token] -> [Token] -> {Exception}([Token], [Token]) -Parser.expLoop exp tks = - match List.uncons tks with - Some(tk, rem) -> - match tk with - Eof -> (exp, tks) - Semicolon -> (exp, tks) - Let -> (exp, tks) - Return -> (exp, tks) - _ -> expLoop (exp :+ tk) rem - None -> Exception.raise (Failure (typeLink Generic) "expLoop: let statement incorret format" (Any(take 1 tks))) +Parser.parseLoop: [Token] -> {Exception}([Token], [Token]) +Parser.parseLoop tks = + loop exp tks = + match List.uncons tks with + Some(tk, rem) -> + match tk with + Eof -> (exp, tks) + Semicolon -> (exp, tks) + Let -> (exp, tks) + Return -> (exp, tks) + _ -> loop (exp :+ tk) rem + None -> Exception.raise (Failure (typeLink Generic) "parseLoop: let statement incorret format" (Any(take 1 tks))) + loop [] tks --TODO move parsing funtions to here once done @@ -89,14 +101,6 @@ Parser.handleError tk tks = None -> Exception.raise (Failure (typeLink Generic) "handleError: let statement incorret format" (Any(take 1 tks))) loop tks [] -Parser.semicolonCheck : Token -> [Token] -> [Token] -> {Exception}(Statement, [Token]) -Parser.semicolonCheck tp expression remain = - match List.uncons remain with - Some (h, tail) - | h === Semicolon -> (ReturnStatement expression, tail) - | otherwise -> (ErrorStatement expression tp, remain) - None -> Exception.raise (Failure (typeLink Generic) "semicolonCheck: let statement incorret format" (Any tp)) - Test.testParser : '{IO, Exception}[Statement] Test.testParser = do @@ -105,8 +109,12 @@ Test.testParser = do program = tokens :+ Eof parseProgram program +Test.testTokens : '{IO, Exception}[Token] +Test.testTokens = do + path = FilePath "parTest.txt" + tokens = lexer << getText <| open path Read + tokens :+ Eof ---- Parser.prefixParse : [Token] -> [Token] Parser.prefixParse tks = @@ -116,6 +124,20 @@ Parser.infixParse : [Token] -> [Token] Parser.infixParse tks = tks -Parser.parseExpression : [Token] -> {Exception}(Statement, [Token]) -Parser.parseExpression tks = - (ExpressionStatement [], []) \ No newline at end of file +Parser.infixFunc : Token -> Optional Statement +Parser.infixFunc = cases + _ -> None + +Parser.prefixFunc : Token -> Optional Statement +Parser.prefixFunc = cases + _ -> None + +Parser.parseExpression : Token -> [Token] -> {Exception}(Statement, [Token]) -- todo might have to return a statement +Parser.parseExpression token tokens = + (expression, remain) = parseLoop tokens + match List.uncons tokens with + None -> Exception.raise (Failure (typeLink Generic) "parseExpression: let statement incorret format" (Any token)) + Some (tk, rem) -> let + match prefixFunc tk with + None -> (ExpressionStatement [], rem) + Some x -> (ExpressionStatement [], rem) \ No newline at end of file diff --git a/unison/token.u b/unison/token.u index 925335af..be5e50a5 100644 --- a/unison/token.u +++ b/unison/token.u @@ -1,5 +1,5 @@ -structural type Token = Illegal Text | Ident Text | Int Text +unique type Token = Illegal Text | Ident Text | Int Text | Error Text Token | Eof | Assign | Plus | Comma | Semicolon | Lparen | Rparen From 8d38379b0abe6372394fe5627732ca13aa027ea9 Mon Sep 17 00:00:00 2001 From: cobaltburn <81337170+cobaltburn@users.noreply.github.com> Date: Wed, 5 Jul 2023 16:56:59 -0500 Subject: [PATCH 5/6] Add files via upload implemented chapter 2 --- unison/lexer.u | 38 ++-- unison/main.u | 7 +- unison/parTest.txt | 4 +- unison/parser.u | 425 +++++++++++++++++++++++++++++++++------------ unison/repl.u | 13 ++ unison/token.u | 70 ++++---- 6 files changed, 392 insertions(+), 165 deletions(-) create mode 100644 unison/repl.u diff --git a/unison/lexer.u b/unison/lexer.u index a57c0d74..4bf13767 100644 --- a/unison/lexer.u +++ b/unison/lexer.u @@ -26,27 +26,27 @@ Lexer.lexer str = loop tup = (str, tokens) = tup match str with - "" -> tokens - lex -> processText str tokens |> loop + "" -> tokens + lex -> processText str tokens |> loop loop (str, []) Lexer.processText : Text -> [Token] -> (Text, [Token]) Lexer.processText str tokens = (ch, txt) = Text.uncons str |> Optional.getOrElse (?\NUL, "") - match ch with - x - | isWhitespace x -> (txt, tokens) - | x == ?= -> identEquals txt tokens - | x == ?! -> identNotEqual txt tokens - | isLetter(x) -> - (var, rem) = identChar txt - val = fromCharList([ch]) ++ var - (rem, tokens :+ keyWords(val) ) - | isDigit(x) -> - (var, rem) = identNum txt - num = fromCharList([ch]) ++ var - (rem, tokens :+ Int num) - _ ->(txt, tokens :+ nextToken(ch)) + match ch with + _ + | isWhitespace ch -> (txt, tokens) + | ch == ?= -> identEquals txt tokens + | ch == ?! -> identNotEqual txt tokens + | isLetter(ch) -> + (var, rem) = identChar txt + val = fromCharList([ch]) ++ var + (rem, tokens :+ keyWords(val) ) + | isDigit(ch) -> + (var, rem) = identNum txt + num = fromCharList([ch]) ++ var + (rem, tokens :+ Int num) + | otherwise ->(txt, tokens :+ nextToken(ch)) Lexer.identEquals : Text -> [Token] -> (Text, [Token]) Lexer.identEquals str tokens = @@ -79,3 +79,9 @@ Lexer.identNum str = Lexer.isLetter : Char -> Boolean Lexer.isLetter ch = ((?a <= ch) && (ch <= ?z)) || ((?A <= ch) && (ch <= ?Z)) || (ch == ?_) + +Test.testFile : '{IO, Exception} [Token] +Test.testFile = do + path = FilePath "test.txt" + tokens = lexer << getText <| open path Read + tokens :+ Eof diff --git a/unison/main.u b/unison/main.u index e3582f83..3d755d0a 100644 --- a/unison/main.u +++ b/unison/main.u @@ -1,4 +1,4 @@ - +--- Test.test : '{IO, Exception} Boolean Test.test = do result = lexer """let five = 5; @@ -45,8 +45,3 @@ Test.test = do result === expect -Test.testFile : '{IO, Exception} [Token] -Test.testFile = do - path = FilePath "test.txt" - tokens = lexer << getText <| open path Read - tokens :+ Eof diff --git a/unison/parTest.txt b/unison/parTest.txt index 6e6da254..8c4908fd 100644 --- a/unison/parTest.txt +++ b/unison/parTest.txt @@ -1 +1,3 @@ -foobar; +a + add(b * c) + d; +add(a, b, 1, 2 * 3, 4 + 5, add(6, 7 * 8)); +add(a + b + c * d / f + g); diff --git a/unison/parser.u b/unison/parser.u index a5b3737f..0307b7e3 100644 --- a/unison/parser.u +++ b/unison/parser.u @@ -1,114 +1,337 @@ -unique type Precedence = LOWEST | EQUALS | LESSGREATER | SUM | PRODUCT | PREFIX | CALL +structural type Statement = LetStatement Token Statement | ReturnStatement Statement + | ExpressionStatement Token | ErrorStatement Statement Token | ErrorTokens [Token] + | IfStatement Token Statement Statement | BlockStatement [Statement] + | FunctionStatement [Statement] Statement | Parameter Token + | Empty -Parser.parseProgram : [Token] -> {Exception}[Statement] -Parser.parseProgram tks = - loop tks stm = let - match List.uncons tks with - Some(tk, list) - | Eof === tk -> stm - | otherwise -> - (st, res) = parseStatement tk list - loop res (stm :+ st) - None -> Exception.raise (Failure (typeLink Generic) "parseProgram: failed to match" (Any None)) - loop tks [] +structural type Precedence = LOWEST | EQUALS | LESSGREATER | SUM | PRODUCT | PREFIX | CALL + +Parser.formExpression : [Token] -> {Exception} Token +Parser.formExpression expression = + mapInt = List.map intToI64 + expressionLoop << parseGroupExpression << parsePrefix <| mapInt expression + +Parser.parseGroupExpression : [Token] -> {Exception}[Token] +Parser.parseGroupExpression tokens = + loop tokens expression = + use List :+ + match tokens with + [] -> expression + [name@(Ident _), Lparen] ++ tail -> + (call, tail) = parenLoop tail Lparen Rparen + loop (tail) (expression :+ FunCall name (parseCall call)) + Lparen +: tail -> + exp = formExpression tail + loop (balanceParen tail) (expression :+ exp) + Rparen +: tail -> expression + tk +: tail -> loop tail (expression :+ tk) + loop tokens [] + +Parser.parseCall : [Token] -> {Exception}[Token] +Parser.parseCall tokens = + loop tks = + use List +: + match tks with + [] -> [] + Fn +: tail -> + (exp, tail) = parseFunction tail + (funcToClosure exp) +: loop tail + [name@(Ident _), Lparen] ++ tail -> + (exp, tail) = parenLoop tail Lparen Rparen + formExpression ([name, Lparen] ++ exp :+ Rparen) +: loop tail + Lparen +: tail -> + (exp, tail) = parenLoop tail Lparen Rparen + formExpression exp +: loop tail + [tk, Comma] ++ tail -> tk +: loop tail + Comma +: tail -> loop tail + _ -> + (exp, tail) = parseParam tks + formExpression exp +: loop tail + loop tokens + +Parser.parseParam : [Token] -> {Exception}([Token], [Token]) +Parser.parseParam tokens = + loop tks res = + match tks with + Comma +: tail -> (res, tail) + tk +: tail -> loop tail (res :+ tk) + [] -> (res, []) + loop tokens [] + +Parser.funcToClosure : Statement -> Token +Parser.funcToClosure exp = + match exp with + FunctionStatement params (BlockStatement exp) -> + paramLoop params = + match params with + [] -> [] + Parameter tk +: tail -> tk +: paramLoop tail + _ -> [Error "parameter not found" Fn] + paramTokens = paramLoop params + expLoop exp = + match exp with + [] -> [] + ExpressionStatement tk +: tail -> tk +: expLoop tail + _ -> [Error "none expression statement found" Fn] + expTokens = expLoop exp + Closure paramTokens expTokens + _ -> Error "Function statement not found" Fn + +Parser.parsePrefix : [Token] -> [Token] +Parser.parsePrefix tokens = + loop current remain = + match remain with + [Minus, tk] ++ tail + | isExp tk-> match current with + _ :+ tk2 + | isExp tk2 -> loop (current ++ [Minus, tk]) tail + _ -> loop (current :+ PrefixExp Minus tk) tail + [Bang, tk] ++ tail + | not <| isInfix tk -> loop (current :+ PrefixExp Bang tk) tail + [Minus] -> [Error "illegeal minus found at end of expression" Minus] + tk +: tail -> loop (current :+ tk) tail + [] -> current + loop [] tokens + +Parser.infixParse : [Token] -> [Token] +Parser.infixParse tokens = + loop checked current precedence = + match current with + tk +: tail + | not <| isInfix tk -> loop (checked :+ tk) tail precedence + | lt precedence <| precedences tk -> loop (checked :+ tk) tail (precedences tk) + _ -> + match checked with + head ++ [tk1, tk2, tk3] -> head ++ [InfixExp tk1 tk2 tk3] ++ current + _ -> [ErrorExp "improperly formetted infix expression" (checked ++ current)] + loop [] tokens LOWEST -Parser.parseStatement : Token -> [Token] -> {Exception}(Statement, [Token]) -Parser.parseStatement tk tks = - match tk with - Let -> parseLet tks - Return -> parseReturn tks - _ -> parseExpression tk tks +Parser.parenLoop : [Token] -> Token -> Token -> ([Token], [Token]) +Parser.parenLoop tokens left right = + loop head tail i = + match i with + 0 -> (dropLast head, tail) + _ -> + match tail with + [] -> ([Error "failed to find right side not found" left], tail) + tk +: tail + | tk === left -> loop (head :+ tk) tail (i + 1) + | tk === right -> loop (head :+ tk) tail (i - 1) + | otherwise -> loop (head :+ tk) tail i + loop [] tokens 1 +Parser.parseReturn : [Token] -> (Statement, [Token]) +Parser.parseReturn tokens = + (expression, tail) = parseStatement tokens + match expression with + ExpressionStatement _ -> (ReturnStatement expression, tail) + _ -> + match tail with + Semicolon +: tail -> (ReturnStatement expression, tail) + _ -> (ErrorStatement expression Let, tail) + Parser.parseLet : [Token] -> {Exception}(Statement, [Token]) Parser.parseLet tokens = - (name, remain1) = grabIdent tokens - match name with - Token.Error _ _ -> handleError name remain1 - _ -> let - (assign, remain2) = grabAssign remain1 - match assign with - Token.Error _ _ -> handleError name remain2 - _ -> let - (expression, remain3) = parseLoop remain2 - match List.uncons remain3 with - Some (h, tail) - | h === Semicolon -> (LetStatement name expression, tail) - | otherwise -> (ErrorStatement expression Return, remain3) - None -> Exception.raise (Failure (typeLink Generic) "parseLet: let statement incorret format" (Any Return)) - -Parser.parseReturn : [Token] -> {Exception}(Statement, [Token]) -Parser.parseReturn tokens = - (expression, remain) = parseLoop tokens - match List.uncons remain with - Some (h, tail) - | h === Semicolon -> (ReturnStatement expression, tail) - | otherwise -> (ErrorStatement expression Return, remain) - None -> Exception.raise (Failure (typeLink Generic) "parseReturn: let statement incorret format" (Any Return)) + match tokens with + [] -> Exception.raise (Failure (typeLink Generic) "parseLet: end of file reached no end marker found" (Any([])) ) + name@tk +: tail + | not <| isIdent tk -> handleError tk tail + | otherwise -> + match tail with + [] -> Exception.raise (Failure (typeLink Generic) "parseLet: end of file reached no end marker found" (Any([])) ) + tk +: tail + | tk !== Assign -> handleError tk tail + | otherwise -> + (expression, tail) = parseStatement tail + match expression with + ExpressionStatement _ -> (LetStatement name expression, tail) + _ -> + match tail with + Semicolon +: tail -> (LetStatement name expression, tail) + _ -> (ErrorStatement expression Let, tail) + + +Parser.parseStatement : [Token] -> {Exception}(Statement, [Token]) +Parser.parseStatement tokens = + match tokens with + Let +: tail -> parseLet tail + Return +: tail -> parseReturn tail + If +: tail -> parseIfStatement tail + Fn +: tail -> parseFunction tail + _ +: tail -> + (statement, remain) = parseExpression tokens + (ExpressionStatement statement, remain) + [] -> Exception.raise (Failure (typeLink Generic) "parseStatement: end of file reached no end marker found" (Any tokens)) + + +Parser.parseFunction : [Token] -> {Exception}(Statement, [Token]) +Parser.parseFunction tokens = + match tokens with + [] -> (ErrorStatement Empty If, tokens) + tk +: tail + | tk !== Lparen -> handleError tk tail + | otherwise -> + (parameters, tail) = parseParameters tail + match tail with + [] -> (ErrorStatement Empty If, tokens) + tk +: tail + | tk !== Lbrace -> handleError tk tail + | otherwise -> + (block, tail) = parseBlockStatement tail + (FunctionStatement parameters block, tail) + +Parser.parseParameters : [Token] -> ([Statement], [Token]) +Parser.parseParameters tokens = + (param, tail) = parenLoop tokens Lparen Rparen + loop param = + match param with + [] -> [] + [tk@(Ident _)] -> [Parameter tk] + [tk@(Ident _), Comma] ++ tail -> (Parameter tk) +: loop tail + tk +: tail -> [ErrorStatement (ErrorTokens param) tk] + statements = loop param + (statements, tail) + + +Parser.parseCondition : [Token] -> {Exception}(Token, [Token]) +Parser.parseCondition tokens = + (cond, tail) = parenLoop tokens Lparen Rparen + (formExpression cond, tail) + +Parser.parseBlockStatement : [Token] -> {Exception}(Statement, [Token]) +Parser.parseBlockStatement tokens = + (block, tail) = parenLoop tokens Lbrace Rbrace + loop block = let + (statement, tail) = parseStatement let + if List.contains Semicolon block then + block + else + Return +: block :+ Semicolon + match tail with + [] -> [statement] + _ -> statement +: loop tail + (BlockStatement (loop block), tail) + +Parser.parseIfStatement : [Token] -> {Exception}(Statement, [Token]) +Parser.parseIfStatement tokens = + match tokens with + [] -> (ErrorStatement Empty If, tokens) + tk +: tail + | tk !== Lparen-> handleError tk tail + | otherwise -> + (cond, tail) = parseCondition tail + match tail with + [] -> (ErrorStatement Empty If, tokens) + tk +: tail + | tk !== Lbrace -> handleError tk tail + | otherwise -> + (ifBlock, tail) = parseBlockStatement tail + match tail with + [] -> (ErrorStatement Empty If, tokens) + tk +: tail + | tk !== Else -> (IfStatement cond ifBlock Empty, tail) + | otherwise -> + match tail with + [] -> (ErrorStatement Empty If, tokens) + tk +: tail + | tk !== Lbrace -> handleError tk tail + | otherwise -> + (elseBlock, tail) = parseBlockStatement tail + (IfStatement cond ifBlock elseBlock, tail) + +Test.testParser : '{IO, Exception}[Statement] +Test.testParser = do + path = FilePath "parTest.txt" + tokens = lexer << getText <| open path Read + program = tokens :+ Eof + parseProgram program Parser.parseLoop: [Token] -> {Exception}([Token], [Token]) Parser.parseLoop tks = loop exp tks = - match List.uncons tks with - Some(tk, rem) -> - match tk with - Eof -> (exp, tks) - Semicolon -> (exp, tks) - Let -> (exp, tks) - Return -> (exp, tks) - _ -> loop (exp :+ tk) rem - None -> Exception.raise (Failure (typeLink Generic) "parseLoop: let statement incorret format" (Any(take 1 tks))) + match tks with + Eof +: _ -> (exp, tks) + Semicolon +: _ -> (exp, tks) + Let +: _ -> (exp, tks) + Return +: _ -> (exp, tks) + tk +: tail -> loop (exp :+ tk) tail + _ -> Exception.raise (Failure (typeLink Generic) "parseLoop: let statement incorret format" (Any(take 1 tks))) loop [] tks ---TODO move parsing funtions to here once done +Parser.parseExpression : [Token] -> {Exception}(Token, [Token]) +Parser.parseExpression tokens = + (expression, remain) = parseLoop tokens + match remain with + Semicolon +: tail -> (formExpression expression, tail) + _ -> (Error "semicolon not found" Semicolon, remain) -Parser.grabNext : [Token] -> {Exception}(Token, [Token]) -Parser.grabNext tks = - match List.uncons tks with - None -> Exception.raise (Failure (typeLink Generic) "grabNext: let statement incorret format" (Any(take 1 tks))) - Some res -> res +Parser.balanceParen : [Token] -> [Token] +Parser.balanceParen tokens = + loop tks i = + match i with + 0 -> tks + _ -> + match tks with + [] -> [Error "unbalanced parenthesis" Lparen] + Lparen +: tail -> loop tail (i + 1) + Rparen +: tail -> loop tail (i - 1) + _ +: tail -> loop tail i + loop tokens 1 -Parser.grabIdent : [Token] -> {Exception}(Token, [Token]) -Parser.grabIdent tks = - match List.uncons tks with - Some (tk, res) - | isIdent tk -> (tk, res) - | otherwise -> (Error "Ident Token not found" tk, res) - None -> Exception.raise (Failure (typeLink Generic) "grabIdnt: let statement incorret format" (Any(take 1 tks)) ) +Parser.intToI64 : Token -> Token +Parser.intToI64 = cases + Int i -> match Int.fromText i with + Some e -> I64 e + None -> Illegal i + tk -> tk +Parser.expressionLoop : [Token] -> {Exception} Token +Parser.expressionLoop tokens = + match tokens with + [tk] -> tk + _ -> expressionLoop <| infixParse tokens + +Parser.precedences : Token -> Precedence +Parser.precedences = cases + Eq -> EQUALS + NotEq -> EQUALS + Lt -> LESSGREATER + Gt -> LESSGREATER + Plus -> SUM + Minus -> SUM + Slash -> PRODUCT + Asterisk -> PRODUCT + otherwise -> LOWEST -Parser.grabAssign : [Token] -> {Exception}(Token, [Token]) -Parser.grabAssign tks = - match List.uncons tks with - Some (tk, res) - | tk === Assign -> (tk, res) - | otherwise -> (Error "Assign Token not found" tk, res) - _ -> Exception.raise (Failure (typeLink Generic) "gradAssign: let statement incorret format" (Any(take 1 tks))) - +Parser.parseProgram : [Token] -> {Exception}[Statement] +Parser.parseProgram tokens = + loop tokens stm = let + match tokens with + [] -> Exception.raise (Failure (typeLink Generic) "parseProgram: end of file reached no end marker found" (Any None)) + [Eof] -> stm + _ -> + (st, res) = parseStatement tokens + loop res (stm :+ st) + loop tokens [] Parser.handleError: Token -> [Token] -> (Statement, [Token]) Parser.handleError tk tks = loop tks err = - match List.uncons tks with - Some(t, rem) - | t === Let -> (ErrorStatement err tk, tks) - | t === Semicolon -> (ErrorStatement (err :+ t) tk, rem) - | t === Eof -> (ErrorStatement (err :+ t) tk, tks) - | otherwise -> loop rem (err :+ t) - None -> Exception.raise (Failure (typeLink Generic) "handleError: let statement incorret format" (Any(take 1 tks))) + match tks with + [Eof] -> (ErrorStatement (ErrorTokens (err :+ Eof)) tk, tks) + Let +: tail -> (ErrorStatement (ErrorTokens err) Let, tks) + Semicolon +: tail -> (ErrorStatement (ErrorTokens (err :+ Semicolon)) tk, tail) + Rparen +: tail -> (ErrorStatement (ErrorTokens (err :+ Semicolon)) tk, tail) + Lbrace +: tail -> (ErrorStatement (ErrorTokens (err :+ Semicolon)) tk, tail) + Rbrace +: tail -> (ErrorStatement (ErrorTokens (err :+ Semicolon)) tk, tail) + tk +: tail -> loop tail (err :+ tk) + [] -> Exception.raise (Failure (typeLink Generic) "handleError: end of file reached no end marker found" (Any([]))) loop tks [] - -Test.testParser : '{IO, Exception}[Statement] -Test.testParser = do - path = FilePath "parTest.txt" - tokens = lexer << getText <| open path Read - program = tokens :+ Eof - parseProgram program - Test.testTokens : '{IO, Exception}[Token] Test.testTokens = do path = FilePath "parTest.txt" @@ -116,28 +339,10 @@ Test.testTokens = do tokens :+ Eof -Parser.prefixParse : [Token] -> [Token] -Parser.prefixParse tks = - tks -Parser.infixParse : [Token] -> [Token] -Parser.infixParse tks = - tks - -Parser.infixFunc : Token -> Optional Statement -Parser.infixFunc = cases - _ -> None - -Parser.prefixFunc : Token -> Optional Statement -Parser.prefixFunc = cases - _ -> None - -Parser.parseExpression : Token -> [Token] -> {Exception}(Statement, [Token]) -- todo might have to return a statement -Parser.parseExpression token tokens = - (expression, remain) = parseLoop tokens - match List.uncons tokens with - None -> Exception.raise (Failure (typeLink Generic) "parseExpression: let statement incorret format" (Any token)) - Some (tk, rem) -> let - match prefixFunc tk with - None -> (ExpressionStatement [], rem) - Some x -> (ExpressionStatement [], rem) \ No newline at end of file +-- test.testparser : '{IO, Exception}[statement] +-- test.testparser = do +-- path = filepath "partest.txt" +-- tokens = lexer << gettext <| open path read +-- program = tokens :+ eof +-- parseprogram program diff --git a/unison/repl.u b/unison/repl.u new file mode 100644 index 00000000..4c74636f --- /dev/null +++ b/unison/repl.u @@ -0,0 +1,13 @@ + + +Repl.main : '{IO, Exception}() +Repl.main = do + loop = do + input = !readLine + match input with + "exit" -> () + _ -> + tokens = lexer <| input + printLine (Debug.toText <| parseProgram (tokens :+ Eof)) + !loop + !loop \ No newline at end of file diff --git a/unison/token.u b/unison/token.u index be5e50a5..c554bc3e 100644 --- a/unison/token.u +++ b/unison/token.u @@ -1,6 +1,6 @@ -unique type Token = Illegal Text | Ident Text | Int Text - | Error Text Token | Eof +structural type Token = Illegal Text | Ident Text | Int Text + | Error Text Token | ErrorExp Text [Token] | Eof | Assign | Plus | Comma | Semicolon | Lparen | Rparen | Lbrace | Rbrace | Minus @@ -8,6 +8,9 @@ unique type Token = Illegal Text | Ident Text | Int Text | Fn | Let | Gt| Lt | True | False | If | Else | Return | Eq | NotEq + | I64 Int | InfixExp Token Token Token + | PrefixExp Token Token | Check [Token] + | FunCall Token [Token] | Closure [Token] [Token] Token.keyWords : Text -> Token Token.keyWords word = @@ -31,33 +34,36 @@ Token.isInt = cases Int _ -> true _ -> false -Token.toString: Token -> Text -Token.toString = cases - Illegal t -> t - Ident t -> t - Int t -> t - Fn -> "fn" - Let -> "let" - Assign -> "=" - Plus -> "+" - Minus -> "-" - Asterisk -> "*" - Slash -> "/" - Bang -> "!" - Gt -> ">" - Lt -> "<" - Eq -> "==" - NotEq -> "!=" - True -> "true" - False -> "false" - If -> "if" - Else -> "else" - Return -> "return" - Comma -> "," - Semicolon -> ";" - Lparen -> "(" - Rparen -> ")" - Lbrace -> "{" - Rbrace -> "}" - Eof -> "end" - Error _ tk -> toString tk +Token.isIntOrIdent : Token -> Boolean +Token.isIntOrIdent = cases + Ident _ -> true + Int _ -> true + I64 _ -> true + _ -> false + +Token.isExp : Token -> Boolean +Token.isExp = cases + InfixExp _ _ _ -> true + PrefixExp _ _ -> true + Int _ -> true + Ident _ -> true + I64 _ -> true + _ -> false + +Token.isPrefix : Token -> Boolean +Token.isPrefix = cases + Bang -> true + Minus -> true + _ -> false + +Token.isInfix : Token -> Boolean +Token.isInfix = cases + Plus -> true + Minus -> true + Asterisk -> true + Slash -> true + Gt -> true + Lt -> true + NotEq -> true + Eq -> true + _ -> false \ No newline at end of file From 9bd2f04627ce05aa385e312e00a3b6042e6b4e93 Mon Sep 17 00:00:00 2001 From: cobaltburn <81337170+cobaltburn@users.noreply.github.com> Date: Wed, 5 Jul 2023 17:11:00 -0500 Subject: [PATCH 6/6] Add files via upload implemented chapter 2 --- unison/parser.u | 319 ++++++++++++++++++++++++------------------------ 1 file changed, 157 insertions(+), 162 deletions(-) diff --git a/unison/parser.u b/unison/parser.u index 0307b7e3..b34a7060 100644 --- a/unison/parser.u +++ b/unison/parser.u @@ -7,11 +7,113 @@ structural type Statement = LetStatement Token Statement | ReturnStatement State structural type Precedence = LOWEST | EQUALS | LESSGREATER | SUM | PRODUCT | PREFIX | CALL +Parser.parseProgram : [Token] -> {Exception}[Statement] +Parser.parseProgram tokens = + loop tokens stm = let + match tokens with + [] -> Exception.raise (Failure (typeLink Generic) "parseProgram: end of file reached no end marker found" (Any None)) + [Eof] -> stm + _ -> + (st, res) = parseStatement tokens + loop res (stm :+ st) + loop tokens [] + +Parser.parseStatement : [Token] -> {Exception}(Statement, [Token]) +Parser.parseStatement tokens = + match tokens with + Let +: tail -> parseLet tail + Return +: tail -> parseReturn tail + If +: tail -> parseIfStatement tail + Fn +: tail -> parseFunction tail + _ +: tail -> + (statement, remain) = parseExpression tokens + (ExpressionStatement statement, remain) + [] -> Exception.raise (Failure (typeLink Generic) "parseStatement: end of file reached no end marker found" (Any tokens)) + +Parser.parseReturn : [Token] -> (Statement, [Token]) +Parser.parseReturn tokens = + (expression, tail) = parseStatement tokens + match expression with + ExpressionStatement _ -> (ReturnStatement expression, tail) + _ -> + match tail with + Semicolon +: tail -> (ReturnStatement expression, tail) + _ -> (ErrorStatement expression Let, tail) + +Parser.parseLet : [Token] -> {Exception}(Statement, [Token]) +Parser.parseLet tokens = + match tokens with + [] -> Exception.raise (Failure (typeLink Generic) "parseLet: end of file reached no end marker found" (Any([])) ) + name@tk +: tail + | not <| isIdent tk -> handleError tk tail + | otherwise -> + match tail with + [] -> Exception.raise (Failure (typeLink Generic) "parseLet: end of file reached no end marker found" (Any([])) ) + tk +: tail + | tk !== Assign -> handleError tk tail + | otherwise -> + (expression, tail) = parseStatement tail + match expression with + ExpressionStatement _ -> (LetStatement name expression, tail) + _ -> + match tail with + Semicolon +: tail -> (LetStatement name expression, tail) + _ -> (ErrorStatement expression Let, tail) + +Parser.parseExpression : [Token] -> {Exception}(Token, [Token]) +Parser.parseExpression tokens = + (expression, remain) = parseLoop tokens + match remain with + Semicolon +: tail -> (formExpression expression, tail) + _ -> (Error "semicolon not found" Semicolon, remain) + Parser.formExpression : [Token] -> {Exception} Token Parser.formExpression expression = mapInt = List.map intToI64 expressionLoop << parseGroupExpression << parsePrefix <| mapInt expression +Parser.expressionLoop : [Token] -> {Exception} Token +Parser.expressionLoop tokens = + match tokens with + [tk] -> tk + _ -> expressionLoop <| infixParse tokens + +Parser.intToI64 : Token -> Token +Parser.intToI64 = cases + Int i -> match Int.fromText i with + Some e -> I64 e + None -> Illegal i + tk -> tk + +Parser.parsePrefix : [Token] -> [Token] +Parser.parsePrefix tokens = + loop current remain = + match remain with + [Minus, tk] ++ tail + | isExp tk-> match current with + _ :+ tk2 + | isExp tk2 -> loop (current ++ [Minus, tk]) tail + _ -> loop (current :+ PrefixExp Minus tk) tail + [Bang, tk] ++ tail + | not <| isInfix tk -> loop (current :+ PrefixExp Bang tk) tail + [Minus] -> [Error "illegeal minus found at end of expression" Minus] + tk +: tail -> loop (current :+ tk) tail + [] -> current + loop [] tokens + +Parser.infixParse : [Token] -> [Token] +Parser.infixParse tokens = + loop checked current precedence = + match current with + tk +: tail + | not <| isInfix tk -> loop (checked :+ tk) tail precedence + | lt precedence <| precedences tk -> loop (checked :+ tk) tail (precedences tk) + _ -> + match checked with + head ++ [tk1, tk2, tk3] -> head ++ [InfixExp tk1 tk2 tk3] ++ current + _ -> [ErrorExp "improperly formetted infix expression" (checked ++ current)] + loop [] tokens LOWEST + Parser.parseGroupExpression : [Token] -> {Exception}[Token] Parser.parseGroupExpression tokens = loop tokens expression = @@ -28,6 +130,49 @@ Parser.parseGroupExpression tokens = tk +: tail -> loop tail (expression :+ tk) loop tokens [] +Parser.balanceParen : [Token] -> [Token] +Parser.balanceParen tokens = + loop tks i = + match i with + 0 -> tks + _ -> + match tks with + [] -> [Error "unbalanced parenthesis" Lparen] + Lparen +: tail -> loop tail (i + 1) + Rparen +: tail -> loop tail (i - 1) + _ +: tail -> loop tail i + loop tokens 1 + +Parser.parenLoop : [Token] -> Token -> Token -> ([Token], [Token]) +Parser.parenLoop tokens left right = + loop head tail i = + match i with + 0 -> (dropLast head, tail) + _ -> + match tail with + [] -> ([Error "failed to find right side not found" left], tail) + tk +: tail + | tk === left -> loop (head :+ tk) tail (i + 1) + | tk === right -> loop (head :+ tk) tail (i - 1) + | otherwise -> loop (head :+ tk) tail i + loop [] tokens 1 + +Parser.parseFunction : [Token] -> {Exception}(Statement, [Token]) +Parser.parseFunction tokens = + match tokens with + [] -> (ErrorStatement Empty If, tokens) + tk +: tail + | tk !== Lparen -> handleError tk tail + | otherwise -> + (parameters, tail) = parseParameters tail + match tail with + [] -> (ErrorStatement Empty If, tokens) + tk +: tail + | tk !== Lbrace -> handleError tk tail + | otherwise -> + (block, tail) = parseBlockStatement tail + (FunctionStatement parameters block, tail) + Parser.parseCall : [Token] -> {Exception}[Token] Parser.parseCall tokens = loop tks = @@ -77,112 +222,7 @@ Parser.funcToClosure exp = expTokens = expLoop exp Closure paramTokens expTokens _ -> Error "Function statement not found" Fn - -Parser.parsePrefix : [Token] -> [Token] -Parser.parsePrefix tokens = - loop current remain = - match remain with - [Minus, tk] ++ tail - | isExp tk-> match current with - _ :+ tk2 - | isExp tk2 -> loop (current ++ [Minus, tk]) tail - _ -> loop (current :+ PrefixExp Minus tk) tail - [Bang, tk] ++ tail - | not <| isInfix tk -> loop (current :+ PrefixExp Bang tk) tail - [Minus] -> [Error "illegeal minus found at end of expression" Minus] - tk +: tail -> loop (current :+ tk) tail - [] -> current - loop [] tokens - -Parser.infixParse : [Token] -> [Token] -Parser.infixParse tokens = - loop checked current precedence = - match current with - tk +: tail - | not <| isInfix tk -> loop (checked :+ tk) tail precedence - | lt precedence <| precedences tk -> loop (checked :+ tk) tail (precedences tk) - _ -> - match checked with - head ++ [tk1, tk2, tk3] -> head ++ [InfixExp tk1 tk2 tk3] ++ current - _ -> [ErrorExp "improperly formetted infix expression" (checked ++ current)] - loop [] tokens LOWEST - - -Parser.parenLoop : [Token] -> Token -> Token -> ([Token], [Token]) -Parser.parenLoop tokens left right = - loop head tail i = - match i with - 0 -> (dropLast head, tail) - _ -> - match tail with - [] -> ([Error "failed to find right side not found" left], tail) - tk +: tail - | tk === left -> loop (head :+ tk) tail (i + 1) - | tk === right -> loop (head :+ tk) tail (i - 1) - | otherwise -> loop (head :+ tk) tail i - loop [] tokens 1 - - -Parser.parseReturn : [Token] -> (Statement, [Token]) -Parser.parseReturn tokens = - (expression, tail) = parseStatement tokens - match expression with - ExpressionStatement _ -> (ReturnStatement expression, tail) - _ -> - match tail with - Semicolon +: tail -> (ReturnStatement expression, tail) - _ -> (ErrorStatement expression Let, tail) - -Parser.parseLet : [Token] -> {Exception}(Statement, [Token]) -Parser.parseLet tokens = - match tokens with - [] -> Exception.raise (Failure (typeLink Generic) "parseLet: end of file reached no end marker found" (Any([])) ) - name@tk +: tail - | not <| isIdent tk -> handleError tk tail - | otherwise -> - match tail with - [] -> Exception.raise (Failure (typeLink Generic) "parseLet: end of file reached no end marker found" (Any([])) ) - tk +: tail - | tk !== Assign -> handleError tk tail - | otherwise -> - (expression, tail) = parseStatement tail - match expression with - ExpressionStatement _ -> (LetStatement name expression, tail) - _ -> - match tail with - Semicolon +: tail -> (LetStatement name expression, tail) - _ -> (ErrorStatement expression Let, tail) - - -Parser.parseStatement : [Token] -> {Exception}(Statement, [Token]) -Parser.parseStatement tokens = - match tokens with - Let +: tail -> parseLet tail - Return +: tail -> parseReturn tail - If +: tail -> parseIfStatement tail - Fn +: tail -> parseFunction tail - _ +: tail -> - (statement, remain) = parseExpression tokens - (ExpressionStatement statement, remain) - [] -> Exception.raise (Failure (typeLink Generic) "parseStatement: end of file reached no end marker found" (Any tokens)) - -Parser.parseFunction : [Token] -> {Exception}(Statement, [Token]) -Parser.parseFunction tokens = - match tokens with - [] -> (ErrorStatement Empty If, tokens) - tk +: tail - | tk !== Lparen -> handleError tk tail - | otherwise -> - (parameters, tail) = parseParameters tail - match tail with - [] -> (ErrorStatement Empty If, tokens) - tk +: tail - | tk !== Lbrace -> handleError tk tail - | otherwise -> - (block, tail) = parseBlockStatement tail - (FunctionStatement parameters block, tail) - Parser.parseParameters : [Token] -> ([Statement], [Token]) Parser.parseParameters tokens = (param, tail) = parenLoop tokens Lparen Rparen @@ -195,7 +235,6 @@ Parser.parseParameters tokens = statements = loop param (statements, tail) - Parser.parseCondition : [Token] -> {Exception}(Token, [Token]) Parser.parseCondition tokens = (cond, tail) = parenLoop tokens Lparen Rparen @@ -242,13 +281,6 @@ Parser.parseIfStatement tokens = (elseBlock, tail) = parseBlockStatement tail (IfStatement cond ifBlock elseBlock, tail) -Test.testParser : '{IO, Exception}[Statement] -Test.testParser = do - path = FilePath "parTest.txt" - tokens = lexer << getText <| open path Read - program = tokens :+ Eof - parseProgram program - Parser.parseLoop: [Token] -> {Exception}([Token], [Token]) Parser.parseLoop tks = loop exp tks = @@ -260,40 +292,6 @@ Parser.parseLoop tks = tk +: tail -> loop (exp :+ tk) tail _ -> Exception.raise (Failure (typeLink Generic) "parseLoop: let statement incorret format" (Any(take 1 tks))) loop [] tks - -Parser.parseExpression : [Token] -> {Exception}(Token, [Token]) -Parser.parseExpression tokens = - (expression, remain) = parseLoop tokens - match remain with - Semicolon +: tail -> (formExpression expression, tail) - _ -> (Error "semicolon not found" Semicolon, remain) - - -Parser.balanceParen : [Token] -> [Token] -Parser.balanceParen tokens = - loop tks i = - match i with - 0 -> tks - _ -> - match tks with - [] -> [Error "unbalanced parenthesis" Lparen] - Lparen +: tail -> loop tail (i + 1) - Rparen +: tail -> loop tail (i - 1) - _ +: tail -> loop tail i - loop tokens 1 - -Parser.intToI64 : Token -> Token -Parser.intToI64 = cases - Int i -> match Int.fromText i with - Some e -> I64 e - None -> Illegal i - tk -> tk - -Parser.expressionLoop : [Token] -> {Exception} Token -Parser.expressionLoop tokens = - match tokens with - [tk] -> tk - _ -> expressionLoop <| infixParse tokens Parser.precedences : Token -> Precedence Parser.precedences = cases @@ -307,16 +305,6 @@ Parser.precedences = cases Asterisk -> PRODUCT otherwise -> LOWEST -Parser.parseProgram : [Token] -> {Exception}[Statement] -Parser.parseProgram tokens = - loop tokens stm = let - match tokens with - [] -> Exception.raise (Failure (typeLink Generic) "parseProgram: end of file reached no end marker found" (Any None)) - [Eof] -> stm - _ -> - (st, res) = parseStatement tokens - loop res (stm :+ st) - loop tokens [] Parser.handleError: Token -> [Token] -> (Statement, [Token]) Parser.handleError tk tks = @@ -332,11 +320,11 @@ Parser.handleError tk tks = [] -> Exception.raise (Failure (typeLink Generic) "handleError: end of file reached no end marker found" (Any([]))) loop tks [] -Test.testTokens : '{IO, Exception}[Token] -Test.testTokens = do - path = FilePath "parTest.txt" - tokens = lexer << getText <| open path Read - tokens :+ Eof +-- Test.testTokens : '{IO, Exception}[Token] +-- Test.testTokens = do +-- path = FilePath "parTest.txt" +-- tokens = lexer << getText <| open path Read +-- tokens :+ Eof @@ -346,3 +334,10 @@ Test.testTokens = do -- tokens = lexer << gettext <| open path read -- program = tokens :+ eof -- parseprogram program + +-- Test.testParser : '{IO, Exception}[Statement] +-- Test.testParser = do +-- path = FilePath "parTest.txt" +-- tokens = lexer << getText <| open path Read +-- program = tokens :+ Eof +-- parseProgram program