From f7ec10646b80d111b216031156e80784e45aecf0 Mon Sep 17 00:00:00 2001 From: Amneesh Singh Date: Sun, 13 Aug 2023 07:37:14 +0530 Subject: [PATCH] parser: initial expression parsing logic Signed-off-by: Amneesh Singh --- src/parser/entity.rs | 28 ++- src/parser/expr.rs | 395 ++++++++++++++++++++++++++++++++-------- src/parser/literal.rs | 10 + src/parser/mod.rs | 8 +- src/parser/statement.rs | 87 +++++++++ 5 files changed, 437 insertions(+), 91 deletions(-) create mode 100644 src/parser/statement.rs diff --git a/src/parser/entity.rs b/src/parser/entity.rs index 3a86ca0..2874c23 100644 --- a/src/parser/entity.rs +++ b/src/parser/entity.rs @@ -9,6 +9,7 @@ use crate::lexer::{ use std::rc::Rc; impl<'a> Parser<'a> { + /// entity ::= module | class | fn pub(super) fn parse_entity(&mut self) -> Option { use TokenKeyword::*; let token = self.peek_token(); @@ -17,7 +18,7 @@ impl<'a> Parser<'a> { match keyword { Module => Some(Entity::Module(self.parse_module()?)), Class => Some(Entity::Class(self.parse_class()?)), - Fn => Some(Entity::Fn(self.parse_function()?)), + Fn => Some(Entity::Fn(self.parse_fn()?)), _ => { self.error_expected_peek("entity"); None @@ -29,6 +30,7 @@ impl<'a> Parser<'a> { } } + /// module ::= "module" ident "{" { module | fn | static | class } "}" fn parse_module(&mut self) -> Option { self.next_token(); @@ -46,7 +48,7 @@ impl<'a> Parser<'a> { if let TokenKind::Keyword(keyword) = &self.peek_token().kind { children.push(match keyword { Module => ModuleChildren::Module(self.parse_module()?), - Fn => ModuleChildren::Fn(self.parse_function()?), + Fn => ModuleChildren::Fn(self.parse_fn()?), Static => ModuleChildren::Static(self.parse_static()?), Class => ModuleChildren::Class(self.parse_class()?), _ => { @@ -65,6 +67,7 @@ impl<'a> Parser<'a> { Some(Module { name, children }) } + /// class ::= "class" ident "{" { fn | static | let } "}" fn parse_class(&mut self) -> Option { self.next_token(); @@ -81,7 +84,7 @@ impl<'a> Parser<'a> { self.trim_newlines(); if let TokenKind::Keyword(keyword) = &self.peek_token().kind { children.push(match keyword { - Fn => ClassChildren::Fn(self.parse_function()?), + Fn => ClassChildren::Fn(self.parse_fn()?), Static => ClassChildren::Static(self.parse_static()?), Let => ClassChildren::Let(self.parse_let()?), _ => { @@ -100,7 +103,9 @@ impl<'a> Parser<'a> { Some(Class { name, children }) } - fn parse_function(&mut self) -> Option { + /// fn ::= "fn" ident "(" [ identWithTy { "," identWithTy } ] ")" [ ":" ty ] + /// "{" { statement } "}" + fn parse_fn(&mut self) -> Option { self.next_token(); let name = self.parse_ident()?; @@ -115,7 +120,7 @@ impl<'a> Parser<'a> { loop { if self.peek_token().kind == TokenKind::Identifier { - params.push(self.parse_ident_with_type()?); + params.push(self.parse_ident_with_ty()?); } if !self.skip_token(TokenKind::Symbol(TokenSymbol::Comma)) { @@ -164,16 +169,7 @@ fn test_parse_entity() { } } - - fn fn02(): int { - - - - - - - - } + fn fn02 (): int { } }"#, ); assert_eq!( @@ -190,7 +186,7 @@ fn test_parse_entity() { children: vec![Statement::Static(Let { name: "let01".into(), ty: Ty::Int, - expr: Some(Expr::Int(4)) + expr: Some(Expr::Literal(Literal::Int(4))) })] })] }), diff --git a/src/parser/expr.rs b/src/parser/expr.rs index 2ec93c8..2126a64 100644 --- a/src/parser/expr.rs +++ b/src/parser/expr.rs @@ -1,109 +1,356 @@ use super::Parser; -use crate::ast::*; +use crate::ast::{ + self, + *, +}; use crate::lexer::{ + TokenDelimiter, TokenKeyword, TokenKind, TokenLiteral, TokenSymbol, }; +use std::rc::Rc; impl<'a> Parser<'a> { - pub(super) fn parse_statement(&mut self) -> Option { - use TokenKeyword::*; - - match self.peek_token().kind { - TokenKind::Keyword(Static) => Some(Statement::Static(self.parse_static()?)), - TokenKind::Keyword(Let) => Some(Statement::Let(self.parse_let()?)), - _ => Some(Statement::Expr(self.parse_expr()?)), - } - } - - pub(super) fn parse_static(&mut self) -> Option { - self.next_token(); - if self.peek_token().kind != TokenKind::Keyword(TokenKeyword::Let) { - self.error_expected_peek("let"); - return None; - } - - self.parse_let() - } - - pub(super) fn parse_let(&mut self) -> Option { + /// exprIf ::= "if" expr block [ else (block | exprIf ) ] + fn parse_expr_if(&mut self) -> Option { + // skip "if" self.next_token(); - let (name, ty) = self.parse_ident_with_type()?; - let expr; + let cond = Box::new(self.parse_expr()?); + let then = self.parse_expr_block()?; - if self.skip_token(TokenKind::Symbol(TokenSymbol::Eq)) { - expr = Some(self.parse_expr()?); + if !self.skip_token(TokenKind::Keyword(TokenKeyword::Else)) { + return Some(If { + cond, + then, + or: None, + }); + } + + if self.peek_token().kind != TokenKind::Keyword(TokenKeyword::If) { + return Some(If { + cond, + then, + or: Some(Box::new(ElseType::Else(self.parse_expr_block()?))), + }); + } + + Some(If { + cond, + then, + or: Some(Box::new(ElseType::If(self.parse_expr_if()?))), + }) + } + + /// exprBlock ::= "{" { statement } "}" + fn parse_expr_block(&mut self) -> Option> { + let mut statements = vec![]; + + // skip { + self.next_token(); + + loop { self.trim_newlines(); - } else if self.skip_token(TokenKind::Newline) { - expr = None; - } else { - self.error_expected_peek("= or newline"); + if self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) { + break; + } + statements.push(self.parse_statement()?); + } + + Some(statements) + } + + /// exprLoop ::= "loop" exprBlock + fn parse_expr_loop(&mut self) -> Option> { + self.next_token(); + if self.peek_token().kind != TokenKind::Delimiter(TokenDelimiter::BraceOpen) { + self.error_expected_peek("{"); return None; } - Some(Let { name, ty, expr }) + self.parse_expr_block() } - fn parse_expr(&mut self) -> Option { + /// exprAtom ::= ( "(" expr ")" ) | ident | int | float | char | exprBlock | exprLoop | exprIf + fn parse_expr_atom(&mut self) -> Option { + use ast::Literal::*; use TokenKind::*; - match self.peek_token().kind { - Literal(TokenLiteral::Int) => Some(Expr::Int(self.parse_int()?)), - Literal(TokenLiteral::Float) => Some(Expr::Float(self.parse_float()?)), - Literal(TokenLiteral::Char) => Some(Expr::Char(self.parse_char()?)), + // TODO: check lvalue validity in the analysis phase + Some(match self.peek_token().kind { + Delimiter(TokenDelimiter::ParenOpen) => { + self.next_token(); // skip ( + + let expr = self.parse_expr()?; + + if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::ParenClose)) { + self.error_expected_peek(")"); + return None; + } + + expr + } + Identifier => { + let token = self.next_token(); + Expr::Identifier(Rc::clone(&token.val)) + } + Literal(TokenLiteral::Int) => Expr::Literal(Int(self.parse_int()?)), + Literal(TokenLiteral::Float) => Expr::Literal(Float(self.parse_float()?)), + Literal(TokenLiteral::Char) => Expr::Literal(Char(self.parse_char()?)), + Delimiter(TokenDelimiter::BraceOpen) => Expr::Block(self.parse_expr_block()?), + Keyword(TokenKeyword::Loop) => Expr::Loop(self.parse_expr_loop()?), + Keyword(TokenKeyword::If) => Expr::If(self.parse_expr_if()?), _ => { self.error_expected_peek("expression"); - None + return None; } - } + }) } -} -#[test] -fn test_parse_let() { - let mut parser = Parser::new( - r#"static let test01: int = 4 - let test02: char = '6' - static let test03: float - let test04 = 9"#, - ); - assert_eq!( - parser.parse_static(), - Some(Let { - name: "test01".into(), - ty: Ty::Int, - expr: Some(Expr::Int(4)) + /// exprUnary ::= [ unaryOp ] exprAtom + /// unaryOp ::= "+" | "-" | "~" + fn parse_expr_unary(&mut self) -> Option { + use TokenSymbol::*; + Some(match self.peek_token().kind { + TokenKind::Symbol(symbol @ (Minus | Plus | Tilde)) => { + self.next_token(); + Expr::Op(symbol, Box::new(self.parse_expr_atom()?), None) + } + _ => self.parse_expr_atom()?, }) - ); - assert_eq!( - parser.parse_let(), - Some(Let { - name: "test02".into(), - ty: Ty::Char, - expr: Some(Expr::Char('6')) + } + + /// exprArithmeticMul ::= exprUnary [ arithmeticMulOp exprArithmeticMul ] + /// arithmeticMulOp ::= "*" | "/" | "%" + fn parse_expr_arithmetic_mul(&mut self) -> Option { + use TokenSymbol::*; + let lhs = self.parse_expr_unary()?; + + Some(match self.peek_token().kind { + TokenKind::Symbol(symbol @ (Star | Slash | Percent)) => { + self.next_token(); + Expr::Op( + symbol, + Box::new(lhs), + Some(Box::new(self.parse_expr_arithmetic_mul()?)), + ) + } + _ => lhs, }) - ); - assert_eq!( - parser.parse_static(), - Some(Let { - name: "test03".into(), - ty: Ty::Float, - expr: None + } + + /// exprArithmeticAdd ::= exprArithmeticMul [ arithmeticAddOp exprArithmeticAdd ] + /// arithmeticAddOp ::= "+" | "-" + fn parse_expr_arithmetic_add(&mut self) -> Option { + use TokenSymbol::*; + let lhs = self.parse_expr_arithmetic_mul()?; + + Some(match self.peek_token().kind { + TokenKind::Symbol(symbol @ (Plus | Minus)) => { + self.next_token(); + Expr::Op( + symbol, + Box::new(lhs), + Some(Box::new(self.parse_expr_arithmetic_add()?)), + ) + } + _ => lhs, }) - ); - assert_eq!(parser.parse_let(), None); + } + + /// exprBitwiseShift ::= exprArithmeticAdd [ bitwiseShiftOp exprBitwiseShift ] + /// bitwiseShiftOp ::= "<<" | ">>" + fn parse_expr_bitwise_shift(&mut self) -> Option { + use TokenSymbol::*; + let lhs = self.parse_expr_arithmetic_add()?; + + Some(match self.peek_token().kind { + TokenKind::Symbol(symbol @ (Shl | Shr)) => { + self.next_token(); + Expr::Op( + symbol, + Box::new(lhs), + Some(Box::new(self.parse_expr_bitwise_shift()?)), + ) + } + _ => lhs, + }) + } + + /// exprBitwiseAnd ::= exprBitwiseShift [ "&" exprBitwiseAnd ] + fn parse_expr_bitwise_and(&mut self) -> Option { + let lhs = self.parse_expr_bitwise_shift()?; + let symbol = TokenSymbol::And; + + if !self.skip_token(TokenKind::Symbol(symbol)) { + return Some(lhs); + } + + Some(Expr::Op( + symbol, + Box::new(lhs), + Some(Box::new(self.parse_expr_bitwise_and()?)), + )) + } + + /// exprBitwiseXor ::= exprBitwiseAnd [ "^" exprBitwiseXor ] + fn parse_expr_bitwise_xor(&mut self) -> Option { + let lhs = self.parse_expr_bitwise_and()?; + let symbol = TokenSymbol::Caret; + + if !self.skip_token(TokenKind::Symbol(symbol)) { + return Some(lhs); + } + + Some(Expr::Op( + symbol, + Box::new(lhs), + Some(Box::new(self.parse_expr_bitwise_xor()?)), + )) + } + + /// exprBiwiseOr ::= exprBitwiseXor [ "|" exprBitwiseOr ] + fn parse_expr_bitwise_or(&mut self) -> Option { + let lhs = self.parse_expr_bitwise_xor()?; + let symbol = TokenSymbol::Or; + + if !self.skip_token(TokenKind::Symbol(symbol)) { + return Some(lhs); + } + + Some(Expr::Op( + symbol, + Box::new(lhs), + Some(Box::new(self.parse_expr_bitwise_or()?)), + )) + } + + /// exprAssign ::= exprBitwiseOr [ relationalOp exprRelational ] + /// relationalOp ::= ">" | "<" | ">=" | "<=" | "==" | "!=" + fn parse_expr_relational(&mut self) -> Option { + use TokenSymbol::*; + let lhs = self.parse_expr_bitwise_or()?; + + Some(match self.peek_token().kind { + TokenKind::Symbol(symbol @ (Gt | Lt | GtEq | LtEq | EqEq | Ne)) => { + self.next_token(); + Expr::Op( + symbol, + Box::new(lhs), + Some(Box::new(self.parse_expr_relational()?)), + ) + } + _ => lhs, + }) + } + + /// exprLogicalAnd ::= exprLogicalRelational [ "&&" exprLogicalAnd ] + fn parse_expr_logical_and(&mut self) -> Option { + let lhs = self.parse_expr_relational()?; + let symbol = TokenSymbol::AndAnd; + + if !self.skip_token(TokenKind::Symbol(symbol)) { + return Some(lhs); + } + + Some(Expr::Op( + symbol, + Box::new(lhs), + Some(Box::new(self.parse_expr_logical_and()?)), + )) + } + + /// exprLogicalOr ::= exprLogicalAnd [ "||" exprLogicalOr ] + fn parse_expr_logical_or(&mut self) -> Option { + let lhs = self.parse_expr_logical_and()?; + let symbol = TokenSymbol::OrOr; + + if !self.skip_token(TokenKind::Symbol(symbol)) { + return Some(lhs); + } + + Some(Expr::Op( + symbol, + Box::new(lhs), + Some(Box::new(self.parse_expr_logical_or()?)), + )) + } + + /// exprAssign ::= exprLogicalOr [ assignOp exprAssign ] + /// assignOp ::= "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "^=" | "<<=" | ">>=" | "&=" | "|=" + fn parse_expr_assign(&mut self) -> Option { + use TokenSymbol::*; + let lhs = self.parse_expr_logical_or()?; + + Some(match self.peek_token().kind { + TokenKind::Symbol( + symbol @ (Eq | PlusEq | MinusEq | StarEq | SlashEq | PercentEq | CaretEq | ShlEq + | ShrEq | AndEq | OrEq), + ) => { + self.next_token(); + Expr::Op( + symbol, + Box::new(lhs), + Some(Box::new(self.parse_expr_assign()?)), + ) + } + _ => lhs, + }) + } + + /// exprControl ::= "continue" | "break" | "return" [ exprControl ] | exprAssign + fn parse_expr_control(&mut self) -> Option { + use TokenKeyword::*; + + Some(match self.peek_token().kind { + TokenKind::Keyword(Continue) => { + self.next_token(); + Expr::Continue + } + TokenKind::Keyword(Break) => { + self.next_token(); + Expr::Break + } + TokenKind::Keyword(Return) => { + self.next_token(); + Expr::Return(self.parse_expr_control().map(Box::new)) + } + _ => self.parse_expr_assign()?, + }) + } + + /// entrypoint for expression parsing using recursive descent parsing + /// + /// + /// expr ::= exprControl + fn parse_expr(&mut self) -> Option { + self.parse_expr_control() + } + + pub(super) fn parse_expr_ln(&mut self) -> Option { + let expr = self.parse_expr(); + if !self.skip_token(TokenKind::Newline) { + self.error_expected_peek("newline"); + return None; + } + expr + } } #[test] fn test_parse_expr_literals() { + use Literal::*; + let mut parser = Parser::new("4524 3123.15e4 9e2 9083482.429455 'c' 3331.13.3"); - assert_eq!(parser.parse_expr(), Some(Expr::Int(4524))); - assert_eq!(parser.parse_expr(), Some(Expr::Float(3123.15e4))); - assert_eq!(parser.parse_expr(), Some(Expr::Float(9e2))); - assert_eq!(parser.parse_expr(), Some(Expr::Float(9083482.429455))); - assert_eq!(parser.parse_expr(), Some(Expr::Char('c'))); + assert_eq!(parser.parse_expr(), Some(Expr::Literal(Int(4524)))); + assert_eq!(parser.parse_expr(), Some(Expr::Literal(Float(3123.15e4)))); + assert_eq!(parser.parse_expr(), Some(Expr::Literal(Float(9e2)))); + assert_eq!( + parser.parse_expr(), + Some(Expr::Literal(Float(9083482.429455))) + ); + assert_eq!(parser.parse_expr(), Some(Expr::Literal(Char('c')))); assert_eq!(parser.parse_expr(), None); } diff --git a/src/parser/literal.rs b/src/parser/literal.rs index ec09a1b..f401307 100644 --- a/src/parser/literal.rs +++ b/src/parser/literal.rs @@ -1,10 +1,12 @@ use super::Parser; use crate::lexer::{ TokenKind, + TokenLiteral, TokenSymbol, }; impl<'a> Parser<'a> { + /// int ::= digit { digit } pub(super) fn parse_int(&mut self) -> Option { let val = self.next_token().val; let mut integer: i32 = 0; @@ -36,10 +38,13 @@ impl<'a> Parser<'a> { } } } + Some(integer) } + // didnt use parse() because i wanted to do this myself for some reason /// f32 can be NaN and inf as well + /// float ::= int [ "." { digit } ] [ "e" { digit } ] pub(super) fn parse_float(&mut self) -> Option { let token = self.next_token(); let mut chars = token.val.chars(); @@ -68,6 +73,10 @@ impl<'a> Parser<'a> { _ => s = 1, } + if self.peek_token().kind != TokenKind::Literal(TokenLiteral::Int) { + break; + } + exp = self.parse_int()? * s; break; } @@ -93,6 +102,7 @@ impl<'a> Parser<'a> { Some(float) } + /// char ::= "'" letter "'" pub(super) fn parse_char(&mut self) -> Option { // the lexer ensures that the 0th and 2nd characters are both ' self.next_token().val.chars().nth(1) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 716112e..698b986 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5,6 +5,7 @@ mod entity; mod expr; mod literal; +mod statement; use crate::ast::{ Parent, @@ -60,6 +61,7 @@ impl<'a> Parser<'a> { return self.lexer.peek_token(); } + /// newline ::= "\n" fn trim_newlines(&mut self) { while self.peek_token().kind == TokenKind::Newline { self.next_token(); @@ -75,6 +77,7 @@ impl<'a> Parser<'a> { false } + /// ty ::= "int" | "float" | "char" fn parse_ty(&mut self) -> Option { let ty: Ty; @@ -96,6 +99,7 @@ impl<'a> Parser<'a> { Some(ty) } + /// ident ::= ( letter | "_" ) { letter | digit | "_" } fn parse_ident(&mut self) -> Option> { if self.peek_token().kind != TokenKind::Identifier { self.error_expected_peek("identifier"); @@ -105,7 +109,8 @@ impl<'a> Parser<'a> { Some(Rc::clone(&self.next_token().val)) } - fn parse_ident_with_type(&mut self) -> Option<(Rc, Ty)> { + /// identWithTy ::= letter ":" ty + fn parse_ident_with_ty(&mut self) -> Option<(Rc, Ty)> { let ident = self.parse_ident()?; if !self.skip_token(TokenKind::Symbol(TokenSymbol::Colon)) { @@ -118,6 +123,7 @@ impl<'a> Parser<'a> { /// Returns an [`Entity`] vector after parsing /// + /// parent ::= { entity } /// [`Entity`]: crate::ast::Entity pub fn parse(&mut self) -> Option { let mut parent = vec![]; diff --git a/src/parser/statement.rs b/src/parser/statement.rs new file mode 100644 index 0000000..0926a6d --- /dev/null +++ b/src/parser/statement.rs @@ -0,0 +1,87 @@ +use super::Parser; +use crate::ast::*; +use crate::lexer::{ + TokenKeyword, + TokenKind, + TokenSymbol, +}; + +impl<'a> Parser<'a> { + /// statement ::= static | let | expr + pub(super) fn parse_statement(&mut self) -> Option { + use TokenKeyword::*; + + Some(match self.peek_token().kind { + TokenKind::Keyword(Static) => Statement::Static(self.parse_static()?), + TokenKind::Keyword(Let) => Statement::Let(self.parse_let()?), + _ => Statement::Expr(self.parse_expr_ln()?), + }) + } + + /// static ::="static" let + pub(super) fn parse_static(&mut self) -> Option { + self.next_token(); + + if self.peek_token().kind != TokenKind::Keyword(TokenKeyword::Let) { + self.error_expected_peek("let"); + return None; + } + + self.parse_let() + } + + /// let ::= "let" identWithTy "=" expr + pub(super) fn parse_let(&mut self) -> Option { + self.next_token(); + + let (name, ty) = self.parse_ident_with_ty()?; + + let expr = if self.skip_token(TokenKind::Symbol(TokenSymbol::Eq)) { + self.parse_expr_ln() + } else if self.skip_token(TokenKind::Newline) { + None + } else { + self.error_expected_peek("= or newline"); + return None; + }; + + Some(Let { name, ty, expr }) + } +} + +#[test] +fn test_parse_let() { + use Literal::*; + + let mut parser = Parser::new( + r#"static let test01: int = 4 + let test02: char = '6' + static let test03: float + let test04 = 9"#, + ); + assert_eq!( + parser.parse_static(), + Some(Let { + name: "test01".into(), + ty: Ty::Int, + expr: Some(Expr::Literal(Int(4))) + }) + ); + assert_eq!( + parser.parse_let(), + Some(Let { + name: "test02".into(), + ty: Ty::Char, + expr: Some(Expr::Literal(Char('6'))) + }) + ); + assert_eq!( + parser.parse_static(), + Some(Let { + name: "test03".into(), + ty: Ty::Float, + expr: None + }) + ); + assert_eq!(parser.parse_let(), None); +}