diff --git a/src/ast.rs b/src/ast.rs index 837c872..81fc08a 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -1,72 +1,93 @@ -/// A very naive AST definition using recursive enums -/// See the parser for implementation +//! A very naive AST definition using recursive enums +//! +//! See the parser for implementation + use std::rc::Rc; pub type Parent = Vec; /// Entities are functions, classes, and modules -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum Entity { Fn(Fn), Class(Class), Module(Module), } -#[derive(Debug)] +/// A module just provides an additional scope +/// +/// TODO: Add exporting and importing modules +#[derive(Debug, PartialEq)] pub struct Module { + /// Name of module pub name: Rc, + /// Everything inside the module pub children: Vec, } /// Modules contain functions, classes and statements -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum ModuleChildren { Fn(Fn), Class(Class), Module(Module), - Const(Let), + Static(Let), } -#[derive(Debug)] +/// Classes encapsulate functions and definitions. +#[derive(Debug, PartialEq)] pub struct Class { + /// Name of class pub name: Rc, + /// Everything inside the class pub children: Vec, } -/// Classes contain functions and statements. -/// -/// TODO: Maybe change statements to something else -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum ClassChildren { Fn(Fn), - Statement(Statement), + Let(Let), + Static(Let), } -#[derive(Debug)] +/// A Function +#[derive(Debug, PartialEq)] pub struct Fn { + /// Name of the function pub name: Rc, + /// Optional return type pub return_ty: Option, + /// Parameters pub params: Vec<(Rc, Ty)>, + /// The function block pub children: Vec, } -#[derive(Debug)] +/// Statements encapsulate expressions and definitions +#[derive(Debug, PartialEq)] pub enum Statement { - Const(Let), + Static(Let), Let(Let), Expr(Expr), } -#[derive(Debug)] +/// A variable definition +#[derive(Debug, PartialEq)] pub struct Let { + /// Name of variabe pub name: Rc, + /// Type of variable pub ty: Ty, + /// Value of variable pub expr: Option, } type Op = crate::lexer::TokenSymbol; -#[derive(Debug)] +/// Lowest form of expression +/// +/// TODO: refine +#[derive(Debug, PartialEq)] pub enum Expr { Int(i32), Float(f32), @@ -74,7 +95,7 @@ pub enum Expr { Op(Op, Box, Option>), If(Box, Box, Option>), Block(Vec), - Loop, + Loop(Vec), Break, Continue, } @@ -82,7 +103,7 @@ pub enum Expr { /// Primitives /// /// TODO: add arrays and pointers maybe -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum Ty { Int, Float, diff --git a/src/lexer.rs b/src/lexer.rs index 8015e84..6a1460c 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -75,6 +75,7 @@ pub enum TokenKeyword { Module, // statements + Static, Let, Ret, @@ -137,8 +138,9 @@ pub struct Lexer<'a> { tokens: VecDeque, /// Current line number pub line: usize, + pub col: usize, /// Start character index for the current token - pub start: usize, + start: usize, /// End character index for the current token end: usize, } @@ -170,6 +172,7 @@ impl<'a> Lexer<'a> { chars: content.chars().peekable(), tokens: VecDeque::new(), line: 1, + col: 1, start: 0, end: 0, } @@ -196,6 +199,7 @@ impl<'a> Lexer<'a> { #[inline] fn next(&mut self) -> Option { self.end += 1; + self.col += 1; self.chars.next() } @@ -236,7 +240,7 @@ impl<'a> Lexer<'a> { } is_float = true; } - 'e' => { + 'e' | 'E' => { self.next(); is_float = true; break; @@ -289,6 +293,7 @@ impl<'a> Lexer<'a> { "fn" => Keyword(Fn), "class" => Keyword(Class), "module" => Keyword(Module), + "static" => Keyword(Static), "let" => Keyword(Let), "ret" => Keyword(Ret), "if" => Keyword(If), @@ -400,6 +405,7 @@ impl<'a> Lexer<'a> { '\n' => { self.next(); self.line += 1; + self.col = 0; self.new_token(TokenKind::Newline) } '0'..='9' => self.get_numeric(), diff --git a/src/lib.rs b/src/lib.rs index c832a64..74f79cb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ pub mod args; pub mod ast; pub mod lexer; +pub mod parser; diff --git a/src/main.rs b/src/main.rs index c14239c..10683d5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,10 +4,7 @@ use std::{ }; use tricc::args::Args; -use tricc::lexer::{ - Lexer, - TokenKind, -}; +use tricc::parser::Parser; fn main() { panic::set_hook(Box::new(|panic_info| { @@ -30,11 +27,8 @@ fn main() { args.handle(); let file = args.get_file(); - let content = fs::read_to_string(&file).expect("Couldn't read the file"); + let content = fs::read_to_string(file).expect("Couldn't read the file"); + let mut parser = Parser::new(&content); - let mut lexer = Lexer::new(content.as_str()); - - while lexer.peek_token().kind != TokenKind::Eof { - println!("{:?}", lexer.next_token()); - } + println!("{:?}", parser.parse()); } diff --git a/src/parser/entity.rs b/src/parser/entity.rs new file mode 100644 index 0000000..3a86ca0 --- /dev/null +++ b/src/parser/entity.rs @@ -0,0 +1,206 @@ +use super::Parser; +use crate::ast::*; +use crate::lexer::{ + TokenDelimiter, + TokenKeyword, + TokenKind, + TokenSymbol, +}; +use std::rc::Rc; + +impl<'a> Parser<'a> { + pub(super) fn parse_entity(&mut self) -> Option { + use TokenKeyword::*; + let token = self.peek_token(); + + if let TokenKind::Keyword(keyword) = &token.kind { + match keyword { + Module => Some(Entity::Module(self.parse_module()?)), + Class => Some(Entity::Class(self.parse_class()?)), + Fn => Some(Entity::Fn(self.parse_function()?)), + _ => { + self.error_expected_peek("entity"); + None + } + } + } else { + self.error_expected_peek("entity"); + None + } + } + + fn parse_module(&mut self) -> Option { + self.next_token(); + + let name = self.parse_ident()?; + let mut children = vec![]; + + if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceOpen)) { + self.error_expected_peek("{"); + return None; + } + + loop { + use TokenKeyword::*; + self.trim_newlines(); + if let TokenKind::Keyword(keyword) = &self.peek_token().kind { + children.push(match keyword { + Module => ModuleChildren::Module(self.parse_module()?), + Fn => ModuleChildren::Fn(self.parse_function()?), + Static => ModuleChildren::Static(self.parse_static()?), + Class => ModuleChildren::Class(self.parse_class()?), + _ => { + self.error_expected_peek("module child"); + return None; + } + }); + } else if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) { + self.error_expected_peek("}"); + return None; + } else { + break; + } + } + + Some(Module { name, children }) + } + + fn parse_class(&mut self) -> Option { + self.next_token(); + + let name = self.parse_ident()?; + let mut children = vec![]; + + if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceOpen)) { + self.error_expected_peek("{"); + return None; + } + + loop { + use TokenKeyword::*; + self.trim_newlines(); + if let TokenKind::Keyword(keyword) = &self.peek_token().kind { + children.push(match keyword { + Fn => ClassChildren::Fn(self.parse_function()?), + Static => ClassChildren::Static(self.parse_static()?), + Let => ClassChildren::Let(self.parse_let()?), + _ => { + self.error_expected_peek("class child"); + return None; + } + }); + } else if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) { + self.error_expected_peek("}"); + return None; + } else { + break; + } + } + + Some(Class { name, children }) + } + + fn parse_function(&mut self) -> Option { + self.next_token(); + + let name = self.parse_ident()?; + let mut params: Vec<(Rc, Ty)> = vec![]; + let mut return_typ: Option = None; + let mut children: Vec = vec![]; + + if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::ParenOpen)) { + self.error_expected_peek("("); + return None; + } + + loop { + if self.peek_token().kind == TokenKind::Identifier { + params.push(self.parse_ident_with_type()?); + } + + if !self.skip_token(TokenKind::Symbol(TokenSymbol::Comma)) { + if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::ParenClose)) { + self.error_expected_peek(", or )"); + return None; + } else { + break; + } + } + } + + if self.skip_token(TokenKind::Symbol(TokenSymbol::Colon)) { + return_typ = Some(self.parse_ty()?); + } + + if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceOpen)) { + self.error_expected_peek("{"); + return None; + } + + loop { + self.trim_newlines(); + if self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) { + break; + } + children.push(self.parse_statement()?) + } + + Some(Fn { + name, + return_ty: return_typ, + params, + children, + }) + } +} + +#[test] +fn test_parse_entity() { + let mut parser = Parser::new( + r#"module module01 { + class class01 { + fn fn01(param01: char, param02: float) { + static let let01: int = 4 + } + } + + + fn fn02(): int { + + + + + + + + } + }"#, + ); + assert_eq!( + parser.parse_entity(), + Some(Entity::Module(Module { + name: "module01".into(), + children: vec![ + ModuleChildren::Class(Class { + name: "class01".into(), + children: vec![ClassChildren::Fn(Fn { + name: "fn01".into(), + return_ty: None, + params: vec![("param01".into(), Ty::Char), ("param02".into(), Ty::Float)], + children: vec![Statement::Static(Let { + name: "let01".into(), + ty: Ty::Int, + expr: Some(Expr::Int(4)) + })] + })] + }), + ModuleChildren::Fn(Fn { + name: "fn02".into(), + return_ty: Some(Ty::Int), + params: vec![], + children: vec![] + }) + ] + })) + ); +} diff --git a/src/parser/expr.rs b/src/parser/expr.rs new file mode 100644 index 0000000..2ec93c8 --- /dev/null +++ b/src/parser/expr.rs @@ -0,0 +1,109 @@ +use super::Parser; +use crate::ast::*; +use crate::lexer::{ + TokenKeyword, + TokenKind, + TokenLiteral, + TokenSymbol, +}; + +impl<'a> Parser<'a> { + pub(super) fn parse_statement(&mut self) -> Option { + use TokenKeyword::*; + + match self.peek_token().kind { + TokenKind::Keyword(Static) => Some(Statement::Static(self.parse_static()?)), + TokenKind::Keyword(Let) => Some(Statement::Let(self.parse_let()?)), + _ => Some(Statement::Expr(self.parse_expr()?)), + } + } + + pub(super) fn parse_static(&mut self) -> Option { + self.next_token(); + if self.peek_token().kind != TokenKind::Keyword(TokenKeyword::Let) { + self.error_expected_peek("let"); + return None; + } + + self.parse_let() + } + + pub(super) fn parse_let(&mut self) -> Option { + self.next_token(); + + let (name, ty) = self.parse_ident_with_type()?; + let expr; + + if self.skip_token(TokenKind::Symbol(TokenSymbol::Eq)) { + expr = Some(self.parse_expr()?); + self.trim_newlines(); + } else if self.skip_token(TokenKind::Newline) { + expr = None; + } else { + self.error_expected_peek("= or newline"); + return None; + } + + Some(Let { name, ty, expr }) + } + + fn parse_expr(&mut self) -> Option { + use TokenKind::*; + + match self.peek_token().kind { + Literal(TokenLiteral::Int) => Some(Expr::Int(self.parse_int()?)), + Literal(TokenLiteral::Float) => Some(Expr::Float(self.parse_float()?)), + Literal(TokenLiteral::Char) => Some(Expr::Char(self.parse_char()?)), + _ => { + self.error_expected_peek("expression"); + None + } + } + } +} + +#[test] +fn test_parse_let() { + let mut parser = Parser::new( + r#"static let test01: int = 4 + let test02: char = '6' + static let test03: float + let test04 = 9"#, + ); + assert_eq!( + parser.parse_static(), + Some(Let { + name: "test01".into(), + ty: Ty::Int, + expr: Some(Expr::Int(4)) + }) + ); + assert_eq!( + parser.parse_let(), + Some(Let { + name: "test02".into(), + ty: Ty::Char, + expr: Some(Expr::Char('6')) + }) + ); + assert_eq!( + parser.parse_static(), + Some(Let { + name: "test03".into(), + ty: Ty::Float, + expr: None + }) + ); + assert_eq!(parser.parse_let(), None); +} + +#[test] +fn test_parse_expr_literals() { + let mut parser = Parser::new("4524 3123.15e4 9e2 9083482.429455 'c' 3331.13.3"); + assert_eq!(parser.parse_expr(), Some(Expr::Int(4524))); + assert_eq!(parser.parse_expr(), Some(Expr::Float(3123.15e4))); + assert_eq!(parser.parse_expr(), Some(Expr::Float(9e2))); + assert_eq!(parser.parse_expr(), Some(Expr::Float(9083482.429455))); + assert_eq!(parser.parse_expr(), Some(Expr::Char('c'))); + assert_eq!(parser.parse_expr(), None); +} diff --git a/src/parser/literal.rs b/src/parser/literal.rs new file mode 100644 index 0000000..ec09a1b --- /dev/null +++ b/src/parser/literal.rs @@ -0,0 +1,100 @@ +use super::Parser; +use crate::lexer::{ + TokenKind, + TokenSymbol, +}; + +impl<'a> Parser<'a> { + pub(super) fn parse_int(&mut self) -> Option { + let val = self.next_token().val; + let mut integer: i32 = 0; + let error = || { + self.error(&format!( + "integer values must be in range [{}, {}]", + i32::MIN, + i32::MAX + )) + }; + + for c in val.chars() { + // c is always ['0'..='9'] + let d = c.to_digit(10)?; + + match integer.checked_mul(10) { + Some(m) => integer = m, + None => { + error(); + return None; + } + } + + match integer.checked_add(d as i32) { + Some(a) => integer = a, + None => { + error(); + return None; + } + } + } + Some(integer) + } + + /// f32 can be NaN and inf as well + pub(super) fn parse_float(&mut self) -> Option { + let token = self.next_token(); + let mut chars = token.val.chars(); + let mut float: f32 = 0.0; + let mut fraction: f32 = 0.0; + let mut prec: i32 = 0; + let mut exp: i32 = 0; + let mut decimal: bool = false; + + // lexer takes care of multiple decimals and non digit characters + for c in chars.by_ref() { + match c { + '.' => decimal = true, + 'e' | 'E' => { + // lexer takes care that decimal doesnt come after e + let s; + match self.peek_token().kind { + TokenKind::Symbol(TokenSymbol::Minus) => { + s = -1; + self.next_token(); + } + TokenKind::Symbol(TokenSymbol::Plus) => { + s = 1; + self.next_token(); + } + _ => s = 1, + } + + exp = self.parse_int()? * s; + break; + } + _ => { + // c is always ['0'..='9'] + let d = c.to_digit(10)? as f32; + if decimal { + fraction *= 10.0; + fraction += d; + prec += 1; + } else { + float *= 10.0; + float += d; + } + } + } + } + + fraction /= 10f32.powi(prec); + float += fraction; + float *= 10f32.powi(exp); + + Some(float) + } + + pub(super) fn parse_char(&mut self) -> Option { + // the lexer ensures that the 0th and 2nd characters are both ' + self.next_token().val.chars().nth(1) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..716112e --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,136 @@ +//! A naive parser just to get started +//! +//! Can only parse module, class and function declaration now along with let statements + +mod entity; +mod expr; +mod literal; + +use crate::ast::{ + Parent, + Ty, +}; +use crate::lexer::{ + Lexer, + Token, + TokenKeyword, + TokenKind, + TokenSymbol, +}; +use std::rc::Rc; + +pub struct Parser<'a> { + pub lexer: Lexer<'a>, +} + +impl<'a> Parser<'a> { + /// Creates a new [`Parser`] instance. + pub fn new(contents: &'a str) -> Parser<'a> { + Parser { + lexer: Lexer::new(contents), + } + } + + #[inline] + fn error(&self, message: &str) { + eprintln!( + "Parser: {}, at \"{}:{}\"", + message, self.lexer.line, self.lexer.col + ); + } + + #[inline] + fn error_expected(&self, expected: &str, found: &str) { + self.error(&format!("expected {}, found {}", expected, found)); + } + + #[inline] + fn error_expected_peek(&mut self, expected: &str) { + let found = &Rc::clone(&self.peek_token().val); + self.error_expected(expected, found); + } + + #[inline] + fn next_token(&mut self) -> Token { + self.lexer.next_token() + } + + #[inline] + fn peek_token(&mut self) -> &Token { + return self.lexer.peek_token(); + } + + fn trim_newlines(&mut self) { + while self.peek_token().kind == TokenKind::Newline { + self.next_token(); + } + } + + fn skip_token(&mut self, kind: TokenKind) -> bool { + if self.peek_token().kind == kind { + self.next_token(); + return true; + } + + false + } + + fn parse_ty(&mut self) -> Option { + let ty: Ty; + + if let TokenKind::Keyword(keyword) = &self.peek_token().kind { + ty = match keyword { + TokenKeyword::Int => Ty::Int, + TokenKeyword::Char => Ty::Char, + TokenKeyword::Float => Ty::Float, + _ => { + self.error_expected_peek("ty"); + return None; + } + }; + } else { + self.error_expected_peek("ty"); + return None; + } + self.next_token(); + Some(ty) + } + + fn parse_ident(&mut self) -> Option> { + if self.peek_token().kind != TokenKind::Identifier { + self.error_expected_peek("identifier"); + return None; + } + + Some(Rc::clone(&self.next_token().val)) + } + + fn parse_ident_with_type(&mut self) -> Option<(Rc, Ty)> { + let ident = self.parse_ident()?; + + if !self.skip_token(TokenKind::Symbol(TokenSymbol::Colon)) { + self.error_expected_peek(":"); + return None; + } + + Some((ident, self.parse_ty()?)) + } + + /// Returns an [`Entity`] vector after parsing + /// + /// [`Entity`]: crate::ast::Entity + pub fn parse(&mut self) -> Option { + let mut parent = vec![]; + + loop { + match self.peek_token().kind { + TokenKind::Newline => self.trim_newlines(), + TokenKind::Eof => break, + _ => { + parent.push(self.parse_entity()?); + } + } + } + Some(parent) + } +}