parser: init

does not support many expressions

Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
This commit is contained in:
2023-07-31 00:27:55 +05:30
parent 771363106f
commit d70b196042
8 changed files with 604 additions and 31 deletions

View File

@@ -1,72 +1,93 @@
/// A very naive AST definition using recursive enums //! A very naive AST definition using recursive enums
/// See the parser for implementation //!
//! See the parser for implementation
use std::rc::Rc; use std::rc::Rc;
pub type Parent = Vec<Entity>; pub type Parent = Vec<Entity>;
/// Entities are functions, classes, and modules /// Entities are functions, classes, and modules
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub enum Entity { pub enum Entity {
Fn(Fn), Fn(Fn),
Class(Class), Class(Class),
Module(Module), Module(Module),
} }
#[derive(Debug)] /// A module just provides an additional scope
///
/// TODO: Add exporting and importing modules
#[derive(Debug, PartialEq)]
pub struct Module { pub struct Module {
/// Name of module
pub name: Rc<str>, pub name: Rc<str>,
/// Everything inside the module
pub children: Vec<ModuleChildren>, pub children: Vec<ModuleChildren>,
} }
/// Modules contain functions, classes and statements /// Modules contain functions, classes and statements
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub enum ModuleChildren { pub enum ModuleChildren {
Fn(Fn), Fn(Fn),
Class(Class), Class(Class),
Module(Module), Module(Module),
Const(Let), Static(Let),
} }
#[derive(Debug)] /// Classes encapsulate functions and definitions.
#[derive(Debug, PartialEq)]
pub struct Class { pub struct Class {
/// Name of class
pub name: Rc<str>, pub name: Rc<str>,
/// Everything inside the class
pub children: Vec<ClassChildren>, pub children: Vec<ClassChildren>,
} }
/// Classes contain functions and statements. #[derive(Debug, PartialEq)]
///
/// TODO: Maybe change statements to something else
#[derive(Debug)]
pub enum ClassChildren { pub enum ClassChildren {
Fn(Fn), Fn(Fn),
Statement(Statement), Let(Let),
Static(Let),
} }
#[derive(Debug)] /// A Function
#[derive(Debug, PartialEq)]
pub struct Fn { pub struct Fn {
/// Name of the function
pub name: Rc<str>, pub name: Rc<str>,
/// Optional return type
pub return_ty: Option<Ty>, pub return_ty: Option<Ty>,
/// Parameters
pub params: Vec<(Rc<str>, Ty)>, pub params: Vec<(Rc<str>, Ty)>,
/// The function block
pub children: Vec<Statement>, pub children: Vec<Statement>,
} }
#[derive(Debug)] /// Statements encapsulate expressions and definitions
#[derive(Debug, PartialEq)]
pub enum Statement { pub enum Statement {
Const(Let), Static(Let),
Let(Let), Let(Let),
Expr(Expr), Expr(Expr),
} }
#[derive(Debug)] /// A variable definition
#[derive(Debug, PartialEq)]
pub struct Let { pub struct Let {
/// Name of variabe
pub name: Rc<str>, pub name: Rc<str>,
/// Type of variable
pub ty: Ty, pub ty: Ty,
/// Value of variable
pub expr: Option<Expr>, pub expr: Option<Expr>,
} }
type Op = crate::lexer::TokenSymbol; type Op = crate::lexer::TokenSymbol;
#[derive(Debug)] /// Lowest form of expression
///
/// TODO: refine
#[derive(Debug, PartialEq)]
pub enum Expr { pub enum Expr {
Int(i32), Int(i32),
Float(f32), Float(f32),
@@ -74,7 +95,7 @@ pub enum Expr {
Op(Op, Box<Expr>, Option<Box<Expr>>), Op(Op, Box<Expr>, Option<Box<Expr>>),
If(Box<Expr>, Box<Expr>, Option<Box<Expr>>), If(Box<Expr>, Box<Expr>, Option<Box<Expr>>),
Block(Vec<Statement>), Block(Vec<Statement>),
Loop, Loop(Vec<Statement>),
Break, Break,
Continue, Continue,
} }
@@ -82,7 +103,7 @@ pub enum Expr {
/// Primitives /// Primitives
/// ///
/// TODO: add arrays and pointers maybe /// TODO: add arrays and pointers maybe
#[derive(Debug)] #[derive(Debug, PartialEq)]
pub enum Ty { pub enum Ty {
Int, Int,
Float, Float,

View File

@@ -75,6 +75,7 @@ pub enum TokenKeyword {
Module, Module,
// statements // statements
Static,
Let, Let,
Ret, Ret,
@@ -137,8 +138,9 @@ pub struct Lexer<'a> {
tokens: VecDeque<Token>, tokens: VecDeque<Token>,
/// Current line number /// Current line number
pub line: usize, pub line: usize,
pub col: usize,
/// Start character index for the current token /// Start character index for the current token
pub start: usize, start: usize,
/// End character index for the current token /// End character index for the current token
end: usize, end: usize,
} }
@@ -170,6 +172,7 @@ impl<'a> Lexer<'a> {
chars: content.chars().peekable(), chars: content.chars().peekable(),
tokens: VecDeque::new(), tokens: VecDeque::new(),
line: 1, line: 1,
col: 1,
start: 0, start: 0,
end: 0, end: 0,
} }
@@ -196,6 +199,7 @@ impl<'a> Lexer<'a> {
#[inline] #[inline]
fn next(&mut self) -> Option<char> { fn next(&mut self) -> Option<char> {
self.end += 1; self.end += 1;
self.col += 1;
self.chars.next() self.chars.next()
} }
@@ -236,7 +240,7 @@ impl<'a> Lexer<'a> {
} }
is_float = true; is_float = true;
} }
'e' => { 'e' | 'E' => {
self.next(); self.next();
is_float = true; is_float = true;
break; break;
@@ -289,6 +293,7 @@ impl<'a> Lexer<'a> {
"fn" => Keyword(Fn), "fn" => Keyword(Fn),
"class" => Keyword(Class), "class" => Keyword(Class),
"module" => Keyword(Module), "module" => Keyword(Module),
"static" => Keyword(Static),
"let" => Keyword(Let), "let" => Keyword(Let),
"ret" => Keyword(Ret), "ret" => Keyword(Ret),
"if" => Keyword(If), "if" => Keyword(If),
@@ -400,6 +405,7 @@ impl<'a> Lexer<'a> {
'\n' => { '\n' => {
self.next(); self.next();
self.line += 1; self.line += 1;
self.col = 0;
self.new_token(TokenKind::Newline) self.new_token(TokenKind::Newline)
} }
'0'..='9' => self.get_numeric(), '0'..='9' => self.get_numeric(),

View File

@@ -1,3 +1,4 @@
pub mod args; pub mod args;
pub mod ast; pub mod ast;
pub mod lexer; pub mod lexer;
pub mod parser;

View File

@@ -4,10 +4,7 @@ use std::{
}; };
use tricc::args::Args; use tricc::args::Args;
use tricc::lexer::{ use tricc::parser::Parser;
Lexer,
TokenKind,
};
fn main() { fn main() {
panic::set_hook(Box::new(|panic_info| { panic::set_hook(Box::new(|panic_info| {
@@ -30,11 +27,8 @@ fn main() {
args.handle(); args.handle();
let file = args.get_file(); let file = args.get_file();
let content = fs::read_to_string(&file).expect("Couldn't read the file"); let content = fs::read_to_string(file).expect("Couldn't read the file");
let mut parser = Parser::new(&content);
let mut lexer = Lexer::new(content.as_str()); println!("{:?}", parser.parse());
while lexer.peek_token().kind != TokenKind::Eof {
println!("{:?}", lexer.next_token());
}
} }

206
src/parser/entity.rs Normal file
View File

@@ -0,0 +1,206 @@
use super::Parser;
use crate::ast::*;
use crate::lexer::{
TokenDelimiter,
TokenKeyword,
TokenKind,
TokenSymbol,
};
use std::rc::Rc;
impl<'a> Parser<'a> {
pub(super) fn parse_entity(&mut self) -> Option<Entity> {
use TokenKeyword::*;
let token = self.peek_token();
if let TokenKind::Keyword(keyword) = &token.kind {
match keyword {
Module => Some(Entity::Module(self.parse_module()?)),
Class => Some(Entity::Class(self.parse_class()?)),
Fn => Some(Entity::Fn(self.parse_function()?)),
_ => {
self.error_expected_peek("entity");
None
}
}
} else {
self.error_expected_peek("entity");
None
}
}
fn parse_module(&mut self) -> Option<Module> {
self.next_token();
let name = self.parse_ident()?;
let mut children = vec![];
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceOpen)) {
self.error_expected_peek("{");
return None;
}
loop {
use TokenKeyword::*;
self.trim_newlines();
if let TokenKind::Keyword(keyword) = &self.peek_token().kind {
children.push(match keyword {
Module => ModuleChildren::Module(self.parse_module()?),
Fn => ModuleChildren::Fn(self.parse_function()?),
Static => ModuleChildren::Static(self.parse_static()?),
Class => ModuleChildren::Class(self.parse_class()?),
_ => {
self.error_expected_peek("module child");
return None;
}
});
} else if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
self.error_expected_peek("}");
return None;
} else {
break;
}
}
Some(Module { name, children })
}
fn parse_class(&mut self) -> Option<Class> {
self.next_token();
let name = self.parse_ident()?;
let mut children = vec![];
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceOpen)) {
self.error_expected_peek("{");
return None;
}
loop {
use TokenKeyword::*;
self.trim_newlines();
if let TokenKind::Keyword(keyword) = &self.peek_token().kind {
children.push(match keyword {
Fn => ClassChildren::Fn(self.parse_function()?),
Static => ClassChildren::Static(self.parse_static()?),
Let => ClassChildren::Let(self.parse_let()?),
_ => {
self.error_expected_peek("class child");
return None;
}
});
} else if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
self.error_expected_peek("}");
return None;
} else {
break;
}
}
Some(Class { name, children })
}
fn parse_function(&mut self) -> Option<Fn> {
self.next_token();
let name = self.parse_ident()?;
let mut params: Vec<(Rc<str>, Ty)> = vec![];
let mut return_typ: Option<Ty> = None;
let mut children: Vec<Statement> = vec![];
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::ParenOpen)) {
self.error_expected_peek("(");
return None;
}
loop {
if self.peek_token().kind == TokenKind::Identifier {
params.push(self.parse_ident_with_type()?);
}
if !self.skip_token(TokenKind::Symbol(TokenSymbol::Comma)) {
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::ParenClose)) {
self.error_expected_peek(", or )");
return None;
} else {
break;
}
}
}
if self.skip_token(TokenKind::Symbol(TokenSymbol::Colon)) {
return_typ = Some(self.parse_ty()?);
}
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceOpen)) {
self.error_expected_peek("{");
return None;
}
loop {
self.trim_newlines();
if self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
break;
}
children.push(self.parse_statement()?)
}
Some(Fn {
name,
return_ty: return_typ,
params,
children,
})
}
}
#[test]
fn test_parse_entity() {
let mut parser = Parser::new(
r#"module module01 {
class class01 {
fn fn01(param01: char, param02: float) {
static let let01: int = 4
}
}
fn fn02(): int {
}
}"#,
);
assert_eq!(
parser.parse_entity(),
Some(Entity::Module(Module {
name: "module01".into(),
children: vec![
ModuleChildren::Class(Class {
name: "class01".into(),
children: vec![ClassChildren::Fn(Fn {
name: "fn01".into(),
return_ty: None,
params: vec![("param01".into(), Ty::Char), ("param02".into(), Ty::Float)],
children: vec![Statement::Static(Let {
name: "let01".into(),
ty: Ty::Int,
expr: Some(Expr::Int(4))
})]
})]
}),
ModuleChildren::Fn(Fn {
name: "fn02".into(),
return_ty: Some(Ty::Int),
params: vec![],
children: vec![]
})
]
}))
);
}

109
src/parser/expr.rs Normal file
View File

@@ -0,0 +1,109 @@
use super::Parser;
use crate::ast::*;
use crate::lexer::{
TokenKeyword,
TokenKind,
TokenLiteral,
TokenSymbol,
};
impl<'a> Parser<'a> {
pub(super) fn parse_statement(&mut self) -> Option<Statement> {
use TokenKeyword::*;
match self.peek_token().kind {
TokenKind::Keyword(Static) => Some(Statement::Static(self.parse_static()?)),
TokenKind::Keyword(Let) => Some(Statement::Let(self.parse_let()?)),
_ => Some(Statement::Expr(self.parse_expr()?)),
}
}
pub(super) fn parse_static(&mut self) -> Option<Let> {
self.next_token();
if self.peek_token().kind != TokenKind::Keyword(TokenKeyword::Let) {
self.error_expected_peek("let");
return None;
}
self.parse_let()
}
pub(super) fn parse_let(&mut self) -> Option<Let> {
self.next_token();
let (name, ty) = self.parse_ident_with_type()?;
let expr;
if self.skip_token(TokenKind::Symbol(TokenSymbol::Eq)) {
expr = Some(self.parse_expr()?);
self.trim_newlines();
} else if self.skip_token(TokenKind::Newline) {
expr = None;
} else {
self.error_expected_peek("= or newline");
return None;
}
Some(Let { name, ty, expr })
}
fn parse_expr(&mut self) -> Option<Expr> {
use TokenKind::*;
match self.peek_token().kind {
Literal(TokenLiteral::Int) => Some(Expr::Int(self.parse_int()?)),
Literal(TokenLiteral::Float) => Some(Expr::Float(self.parse_float()?)),
Literal(TokenLiteral::Char) => Some(Expr::Char(self.parse_char()?)),
_ => {
self.error_expected_peek("expression");
None
}
}
}
}
#[test]
fn test_parse_let() {
let mut parser = Parser::new(
r#"static let test01: int = 4
let test02: char = '6'
static let test03: float
let test04 = 9"#,
);
assert_eq!(
parser.parse_static(),
Some(Let {
name: "test01".into(),
ty: Ty::Int,
expr: Some(Expr::Int(4))
})
);
assert_eq!(
parser.parse_let(),
Some(Let {
name: "test02".into(),
ty: Ty::Char,
expr: Some(Expr::Char('6'))
})
);
assert_eq!(
parser.parse_static(),
Some(Let {
name: "test03".into(),
ty: Ty::Float,
expr: None
})
);
assert_eq!(parser.parse_let(), None);
}
#[test]
fn test_parse_expr_literals() {
let mut parser = Parser::new("4524 3123.15e4 9e2 9083482.429455 'c' 3331.13.3");
assert_eq!(parser.parse_expr(), Some(Expr::Int(4524)));
assert_eq!(parser.parse_expr(), Some(Expr::Float(3123.15e4)));
assert_eq!(parser.parse_expr(), Some(Expr::Float(9e2)));
assert_eq!(parser.parse_expr(), Some(Expr::Float(9083482.429455)));
assert_eq!(parser.parse_expr(), Some(Expr::Char('c')));
assert_eq!(parser.parse_expr(), None);
}

100
src/parser/literal.rs Normal file
View File

@@ -0,0 +1,100 @@
use super::Parser;
use crate::lexer::{
TokenKind,
TokenSymbol,
};
impl<'a> Parser<'a> {
pub(super) fn parse_int(&mut self) -> Option<i32> {
let val = self.next_token().val;
let mut integer: i32 = 0;
let error = || {
self.error(&format!(
"integer values must be in range [{}, {}]",
i32::MIN,
i32::MAX
))
};
for c in val.chars() {
// c is always ['0'..='9']
let d = c.to_digit(10)?;
match integer.checked_mul(10) {
Some(m) => integer = m,
None => {
error();
return None;
}
}
match integer.checked_add(d as i32) {
Some(a) => integer = a,
None => {
error();
return None;
}
}
}
Some(integer)
}
/// f32 can be NaN and inf as well
pub(super) fn parse_float(&mut self) -> Option<f32> {
let token = self.next_token();
let mut chars = token.val.chars();
let mut float: f32 = 0.0;
let mut fraction: f32 = 0.0;
let mut prec: i32 = 0;
let mut exp: i32 = 0;
let mut decimal: bool = false;
// lexer takes care of multiple decimals and non digit characters
for c in chars.by_ref() {
match c {
'.' => decimal = true,
'e' | 'E' => {
// lexer takes care that decimal doesnt come after e
let s;
match self.peek_token().kind {
TokenKind::Symbol(TokenSymbol::Minus) => {
s = -1;
self.next_token();
}
TokenKind::Symbol(TokenSymbol::Plus) => {
s = 1;
self.next_token();
}
_ => s = 1,
}
exp = self.parse_int()? * s;
break;
}
_ => {
// c is always ['0'..='9']
let d = c.to_digit(10)? as f32;
if decimal {
fraction *= 10.0;
fraction += d;
prec += 1;
} else {
float *= 10.0;
float += d;
}
}
}
}
fraction /= 10f32.powi(prec);
float += fraction;
float *= 10f32.powi(exp);
Some(float)
}
pub(super) fn parse_char(&mut self) -> Option<char> {
// the lexer ensures that the 0th and 2nd characters are both '
self.next_token().val.chars().nth(1)
}
}

136
src/parser/mod.rs Normal file
View File

@@ -0,0 +1,136 @@
//! A naive parser just to get started
//!
//! Can only parse module, class and function declaration now along with let statements
mod entity;
mod expr;
mod literal;
use crate::ast::{
Parent,
Ty,
};
use crate::lexer::{
Lexer,
Token,
TokenKeyword,
TokenKind,
TokenSymbol,
};
use std::rc::Rc;
pub struct Parser<'a> {
pub lexer: Lexer<'a>,
}
impl<'a> Parser<'a> {
/// Creates a new [`Parser`] instance.
pub fn new(contents: &'a str) -> Parser<'a> {
Parser {
lexer: Lexer::new(contents),
}
}
#[inline]
fn error(&self, message: &str) {
eprintln!(
"Parser: {}, at \"{}:{}\"",
message, self.lexer.line, self.lexer.col
);
}
#[inline]
fn error_expected(&self, expected: &str, found: &str) {
self.error(&format!("expected {}, found {}", expected, found));
}
#[inline]
fn error_expected_peek(&mut self, expected: &str) {
let found = &Rc::clone(&self.peek_token().val);
self.error_expected(expected, found);
}
#[inline]
fn next_token(&mut self) -> Token {
self.lexer.next_token()
}
#[inline]
fn peek_token(&mut self) -> &Token {
return self.lexer.peek_token();
}
fn trim_newlines(&mut self) {
while self.peek_token().kind == TokenKind::Newline {
self.next_token();
}
}
fn skip_token(&mut self, kind: TokenKind) -> bool {
if self.peek_token().kind == kind {
self.next_token();
return true;
}
false
}
fn parse_ty(&mut self) -> Option<Ty> {
let ty: Ty;
if let TokenKind::Keyword(keyword) = &self.peek_token().kind {
ty = match keyword {
TokenKeyword::Int => Ty::Int,
TokenKeyword::Char => Ty::Char,
TokenKeyword::Float => Ty::Float,
_ => {
self.error_expected_peek("ty");
return None;
}
};
} else {
self.error_expected_peek("ty");
return None;
}
self.next_token();
Some(ty)
}
fn parse_ident(&mut self) -> Option<Rc<str>> {
if self.peek_token().kind != TokenKind::Identifier {
self.error_expected_peek("identifier");
return None;
}
Some(Rc::clone(&self.next_token().val))
}
fn parse_ident_with_type(&mut self) -> Option<(Rc<str>, Ty)> {
let ident = self.parse_ident()?;
if !self.skip_token(TokenKind::Symbol(TokenSymbol::Colon)) {
self.error_expected_peek(":");
return None;
}
Some((ident, self.parse_ty()?))
}
/// Returns an [`Entity`] vector after parsing
///
/// [`Entity`]: crate::ast::Entity
pub fn parse(&mut self) -> Option<Parent> {
let mut parent = vec![];
loop {
match self.peek_token().kind {
TokenKind::Newline => self.trim_newlines(),
TokenKind::Eof => break,
_ => {
parent.push(self.parse_entity()?);
}
}
}
Some(parent)
}
}