parser: init
does not support many expressions Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
This commit is contained in:
206
src/parser/entity.rs
Normal file
206
src/parser/entity.rs
Normal file
@@ -0,0 +1,206 @@
|
||||
use super::Parser;
|
||||
use crate::ast::*;
|
||||
use crate::lexer::{
|
||||
TokenDelimiter,
|
||||
TokenKeyword,
|
||||
TokenKind,
|
||||
TokenSymbol,
|
||||
};
|
||||
use std::rc::Rc;
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub(super) fn parse_entity(&mut self) -> Option<Entity> {
|
||||
use TokenKeyword::*;
|
||||
let token = self.peek_token();
|
||||
|
||||
if let TokenKind::Keyword(keyword) = &token.kind {
|
||||
match keyword {
|
||||
Module => Some(Entity::Module(self.parse_module()?)),
|
||||
Class => Some(Entity::Class(self.parse_class()?)),
|
||||
Fn => Some(Entity::Fn(self.parse_function()?)),
|
||||
_ => {
|
||||
self.error_expected_peek("entity");
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
self.error_expected_peek("entity");
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_module(&mut self) -> Option<Module> {
|
||||
self.next_token();
|
||||
|
||||
let name = self.parse_ident()?;
|
||||
let mut children = vec![];
|
||||
|
||||
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceOpen)) {
|
||||
self.error_expected_peek("{");
|
||||
return None;
|
||||
}
|
||||
|
||||
loop {
|
||||
use TokenKeyword::*;
|
||||
self.trim_newlines();
|
||||
if let TokenKind::Keyword(keyword) = &self.peek_token().kind {
|
||||
children.push(match keyword {
|
||||
Module => ModuleChildren::Module(self.parse_module()?),
|
||||
Fn => ModuleChildren::Fn(self.parse_function()?),
|
||||
Static => ModuleChildren::Static(self.parse_static()?),
|
||||
Class => ModuleChildren::Class(self.parse_class()?),
|
||||
_ => {
|
||||
self.error_expected_peek("module child");
|
||||
return None;
|
||||
}
|
||||
});
|
||||
} else if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
|
||||
self.error_expected_peek("}");
|
||||
return None;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Some(Module { name, children })
|
||||
}
|
||||
|
||||
fn parse_class(&mut self) -> Option<Class> {
|
||||
self.next_token();
|
||||
|
||||
let name = self.parse_ident()?;
|
||||
let mut children = vec![];
|
||||
|
||||
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceOpen)) {
|
||||
self.error_expected_peek("{");
|
||||
return None;
|
||||
}
|
||||
|
||||
loop {
|
||||
use TokenKeyword::*;
|
||||
self.trim_newlines();
|
||||
if let TokenKind::Keyword(keyword) = &self.peek_token().kind {
|
||||
children.push(match keyword {
|
||||
Fn => ClassChildren::Fn(self.parse_function()?),
|
||||
Static => ClassChildren::Static(self.parse_static()?),
|
||||
Let => ClassChildren::Let(self.parse_let()?),
|
||||
_ => {
|
||||
self.error_expected_peek("class child");
|
||||
return None;
|
||||
}
|
||||
});
|
||||
} else if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
|
||||
self.error_expected_peek("}");
|
||||
return None;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Some(Class { name, children })
|
||||
}
|
||||
|
||||
fn parse_function(&mut self) -> Option<Fn> {
|
||||
self.next_token();
|
||||
|
||||
let name = self.parse_ident()?;
|
||||
let mut params: Vec<(Rc<str>, Ty)> = vec![];
|
||||
let mut return_typ: Option<Ty> = None;
|
||||
let mut children: Vec<Statement> = vec![];
|
||||
|
||||
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::ParenOpen)) {
|
||||
self.error_expected_peek("(");
|
||||
return None;
|
||||
}
|
||||
|
||||
loop {
|
||||
if self.peek_token().kind == TokenKind::Identifier {
|
||||
params.push(self.parse_ident_with_type()?);
|
||||
}
|
||||
|
||||
if !self.skip_token(TokenKind::Symbol(TokenSymbol::Comma)) {
|
||||
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::ParenClose)) {
|
||||
self.error_expected_peek(", or )");
|
||||
return None;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if self.skip_token(TokenKind::Symbol(TokenSymbol::Colon)) {
|
||||
return_typ = Some(self.parse_ty()?);
|
||||
}
|
||||
|
||||
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceOpen)) {
|
||||
self.error_expected_peek("{");
|
||||
return None;
|
||||
}
|
||||
|
||||
loop {
|
||||
self.trim_newlines();
|
||||
if self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
|
||||
break;
|
||||
}
|
||||
children.push(self.parse_statement()?)
|
||||
}
|
||||
|
||||
Some(Fn {
|
||||
name,
|
||||
return_ty: return_typ,
|
||||
params,
|
||||
children,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_entity() {
|
||||
let mut parser = Parser::new(
|
||||
r#"module module01 {
|
||||
class class01 {
|
||||
fn fn01(param01: char, param02: float) {
|
||||
static let let01: int = 4
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn fn02(): int {
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
}"#,
|
||||
);
|
||||
assert_eq!(
|
||||
parser.parse_entity(),
|
||||
Some(Entity::Module(Module {
|
||||
name: "module01".into(),
|
||||
children: vec![
|
||||
ModuleChildren::Class(Class {
|
||||
name: "class01".into(),
|
||||
children: vec![ClassChildren::Fn(Fn {
|
||||
name: "fn01".into(),
|
||||
return_ty: None,
|
||||
params: vec![("param01".into(), Ty::Char), ("param02".into(), Ty::Float)],
|
||||
children: vec![Statement::Static(Let {
|
||||
name: "let01".into(),
|
||||
ty: Ty::Int,
|
||||
expr: Some(Expr::Int(4))
|
||||
})]
|
||||
})]
|
||||
}),
|
||||
ModuleChildren::Fn(Fn {
|
||||
name: "fn02".into(),
|
||||
return_ty: Some(Ty::Int),
|
||||
params: vec![],
|
||||
children: vec![]
|
||||
})
|
||||
]
|
||||
}))
|
||||
);
|
||||
}
|
109
src/parser/expr.rs
Normal file
109
src/parser/expr.rs
Normal file
@@ -0,0 +1,109 @@
|
||||
use super::Parser;
|
||||
use crate::ast::*;
|
||||
use crate::lexer::{
|
||||
TokenKeyword,
|
||||
TokenKind,
|
||||
TokenLiteral,
|
||||
TokenSymbol,
|
||||
};
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub(super) fn parse_statement(&mut self) -> Option<Statement> {
|
||||
use TokenKeyword::*;
|
||||
|
||||
match self.peek_token().kind {
|
||||
TokenKind::Keyword(Static) => Some(Statement::Static(self.parse_static()?)),
|
||||
TokenKind::Keyword(Let) => Some(Statement::Let(self.parse_let()?)),
|
||||
_ => Some(Statement::Expr(self.parse_expr()?)),
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn parse_static(&mut self) -> Option<Let> {
|
||||
self.next_token();
|
||||
if self.peek_token().kind != TokenKind::Keyword(TokenKeyword::Let) {
|
||||
self.error_expected_peek("let");
|
||||
return None;
|
||||
}
|
||||
|
||||
self.parse_let()
|
||||
}
|
||||
|
||||
pub(super) fn parse_let(&mut self) -> Option<Let> {
|
||||
self.next_token();
|
||||
|
||||
let (name, ty) = self.parse_ident_with_type()?;
|
||||
let expr;
|
||||
|
||||
if self.skip_token(TokenKind::Symbol(TokenSymbol::Eq)) {
|
||||
expr = Some(self.parse_expr()?);
|
||||
self.trim_newlines();
|
||||
} else if self.skip_token(TokenKind::Newline) {
|
||||
expr = None;
|
||||
} else {
|
||||
self.error_expected_peek("= or newline");
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Let { name, ty, expr })
|
||||
}
|
||||
|
||||
fn parse_expr(&mut self) -> Option<Expr> {
|
||||
use TokenKind::*;
|
||||
|
||||
match self.peek_token().kind {
|
||||
Literal(TokenLiteral::Int) => Some(Expr::Int(self.parse_int()?)),
|
||||
Literal(TokenLiteral::Float) => Some(Expr::Float(self.parse_float()?)),
|
||||
Literal(TokenLiteral::Char) => Some(Expr::Char(self.parse_char()?)),
|
||||
_ => {
|
||||
self.error_expected_peek("expression");
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_let() {
|
||||
let mut parser = Parser::new(
|
||||
r#"static let test01: int = 4
|
||||
let test02: char = '6'
|
||||
static let test03: float
|
||||
let test04 = 9"#,
|
||||
);
|
||||
assert_eq!(
|
||||
parser.parse_static(),
|
||||
Some(Let {
|
||||
name: "test01".into(),
|
||||
ty: Ty::Int,
|
||||
expr: Some(Expr::Int(4))
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
parser.parse_let(),
|
||||
Some(Let {
|
||||
name: "test02".into(),
|
||||
ty: Ty::Char,
|
||||
expr: Some(Expr::Char('6'))
|
||||
})
|
||||
);
|
||||
assert_eq!(
|
||||
parser.parse_static(),
|
||||
Some(Let {
|
||||
name: "test03".into(),
|
||||
ty: Ty::Float,
|
||||
expr: None
|
||||
})
|
||||
);
|
||||
assert_eq!(parser.parse_let(), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_expr_literals() {
|
||||
let mut parser = Parser::new("4524 3123.15e4 9e2 9083482.429455 'c' 3331.13.3");
|
||||
assert_eq!(parser.parse_expr(), Some(Expr::Int(4524)));
|
||||
assert_eq!(parser.parse_expr(), Some(Expr::Float(3123.15e4)));
|
||||
assert_eq!(parser.parse_expr(), Some(Expr::Float(9e2)));
|
||||
assert_eq!(parser.parse_expr(), Some(Expr::Float(9083482.429455)));
|
||||
assert_eq!(parser.parse_expr(), Some(Expr::Char('c')));
|
||||
assert_eq!(parser.parse_expr(), None);
|
||||
}
|
100
src/parser/literal.rs
Normal file
100
src/parser/literal.rs
Normal file
@@ -0,0 +1,100 @@
|
||||
use super::Parser;
|
||||
use crate::lexer::{
|
||||
TokenKind,
|
||||
TokenSymbol,
|
||||
};
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
pub(super) fn parse_int(&mut self) -> Option<i32> {
|
||||
let val = self.next_token().val;
|
||||
let mut integer: i32 = 0;
|
||||
let error = || {
|
||||
self.error(&format!(
|
||||
"integer values must be in range [{}, {}]",
|
||||
i32::MIN,
|
||||
i32::MAX
|
||||
))
|
||||
};
|
||||
|
||||
for c in val.chars() {
|
||||
// c is always ['0'..='9']
|
||||
let d = c.to_digit(10)?;
|
||||
|
||||
match integer.checked_mul(10) {
|
||||
Some(m) => integer = m,
|
||||
None => {
|
||||
error();
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
match integer.checked_add(d as i32) {
|
||||
Some(a) => integer = a,
|
||||
None => {
|
||||
error();
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(integer)
|
||||
}
|
||||
|
||||
/// f32 can be NaN and inf as well
|
||||
pub(super) fn parse_float(&mut self) -> Option<f32> {
|
||||
let token = self.next_token();
|
||||
let mut chars = token.val.chars();
|
||||
let mut float: f32 = 0.0;
|
||||
let mut fraction: f32 = 0.0;
|
||||
let mut prec: i32 = 0;
|
||||
let mut exp: i32 = 0;
|
||||
let mut decimal: bool = false;
|
||||
|
||||
// lexer takes care of multiple decimals and non digit characters
|
||||
for c in chars.by_ref() {
|
||||
match c {
|
||||
'.' => decimal = true,
|
||||
'e' | 'E' => {
|
||||
// lexer takes care that decimal doesnt come after e
|
||||
let s;
|
||||
match self.peek_token().kind {
|
||||
TokenKind::Symbol(TokenSymbol::Minus) => {
|
||||
s = -1;
|
||||
self.next_token();
|
||||
}
|
||||
TokenKind::Symbol(TokenSymbol::Plus) => {
|
||||
s = 1;
|
||||
self.next_token();
|
||||
}
|
||||
_ => s = 1,
|
||||
}
|
||||
|
||||
exp = self.parse_int()? * s;
|
||||
break;
|
||||
}
|
||||
_ => {
|
||||
// c is always ['0'..='9']
|
||||
let d = c.to_digit(10)? as f32;
|
||||
if decimal {
|
||||
fraction *= 10.0;
|
||||
fraction += d;
|
||||
prec += 1;
|
||||
} else {
|
||||
float *= 10.0;
|
||||
float += d;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fraction /= 10f32.powi(prec);
|
||||
float += fraction;
|
||||
float *= 10f32.powi(exp);
|
||||
|
||||
Some(float)
|
||||
}
|
||||
|
||||
pub(super) fn parse_char(&mut self) -> Option<char> {
|
||||
// the lexer ensures that the 0th and 2nd characters are both '
|
||||
self.next_token().val.chars().nth(1)
|
||||
}
|
||||
}
|
136
src/parser/mod.rs
Normal file
136
src/parser/mod.rs
Normal file
@@ -0,0 +1,136 @@
|
||||
//! A naive parser just to get started
|
||||
//!
|
||||
//! Can only parse module, class and function declaration now along with let statements
|
||||
|
||||
mod entity;
|
||||
mod expr;
|
||||
mod literal;
|
||||
|
||||
use crate::ast::{
|
||||
Parent,
|
||||
Ty,
|
||||
};
|
||||
use crate::lexer::{
|
||||
Lexer,
|
||||
Token,
|
||||
TokenKeyword,
|
||||
TokenKind,
|
||||
TokenSymbol,
|
||||
};
|
||||
use std::rc::Rc;
|
||||
|
||||
pub struct Parser<'a> {
|
||||
pub lexer: Lexer<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
/// Creates a new [`Parser`] instance.
|
||||
pub fn new(contents: &'a str) -> Parser<'a> {
|
||||
Parser {
|
||||
lexer: Lexer::new(contents),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn error(&self, message: &str) {
|
||||
eprintln!(
|
||||
"Parser: {}, at \"{}:{}\"",
|
||||
message, self.lexer.line, self.lexer.col
|
||||
);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn error_expected(&self, expected: &str, found: &str) {
|
||||
self.error(&format!("expected {}, found {}", expected, found));
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn error_expected_peek(&mut self, expected: &str) {
|
||||
let found = &Rc::clone(&self.peek_token().val);
|
||||
self.error_expected(expected, found);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn next_token(&mut self) -> Token {
|
||||
self.lexer.next_token()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn peek_token(&mut self) -> &Token {
|
||||
return self.lexer.peek_token();
|
||||
}
|
||||
|
||||
fn trim_newlines(&mut self) {
|
||||
while self.peek_token().kind == TokenKind::Newline {
|
||||
self.next_token();
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_token(&mut self, kind: TokenKind) -> bool {
|
||||
if self.peek_token().kind == kind {
|
||||
self.next_token();
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
fn parse_ty(&mut self) -> Option<Ty> {
|
||||
let ty: Ty;
|
||||
|
||||
if let TokenKind::Keyword(keyword) = &self.peek_token().kind {
|
||||
ty = match keyword {
|
||||
TokenKeyword::Int => Ty::Int,
|
||||
TokenKeyword::Char => Ty::Char,
|
||||
TokenKeyword::Float => Ty::Float,
|
||||
_ => {
|
||||
self.error_expected_peek("ty");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
self.error_expected_peek("ty");
|
||||
return None;
|
||||
}
|
||||
self.next_token();
|
||||
Some(ty)
|
||||
}
|
||||
|
||||
fn parse_ident(&mut self) -> Option<Rc<str>> {
|
||||
if self.peek_token().kind != TokenKind::Identifier {
|
||||
self.error_expected_peek("identifier");
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Rc::clone(&self.next_token().val))
|
||||
}
|
||||
|
||||
fn parse_ident_with_type(&mut self) -> Option<(Rc<str>, Ty)> {
|
||||
let ident = self.parse_ident()?;
|
||||
|
||||
if !self.skip_token(TokenKind::Symbol(TokenSymbol::Colon)) {
|
||||
self.error_expected_peek(":");
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((ident, self.parse_ty()?))
|
||||
}
|
||||
|
||||
/// Returns an [`Entity`] vector after parsing
|
||||
///
|
||||
/// [`Entity`]: crate::ast::Entity
|
||||
pub fn parse(&mut self) -> Option<Parent> {
|
||||
let mut parent = vec![];
|
||||
|
||||
loop {
|
||||
match self.peek_token().kind {
|
||||
TokenKind::Newline => self.trim_newlines(),
|
||||
TokenKind::Eof => break,
|
||||
_ => {
|
||||
parent.push(self.parse_entity()?);
|
||||
}
|
||||
}
|
||||
}
|
||||
Some(parent)
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user