src/lexer.rs: rename enums and add backslash to escape newline

Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
This commit is contained in:
2023-04-08 17:13:47 +05:30
parent 0d7a4bdd4e
commit 879d3d3b65

View File

@@ -1,14 +1,14 @@
use std::{iter, str}; use std::{iter, str};
#[derive(Debug)] #[derive(Debug)]
pub enum Literal { pub enum TokenLiteral {
Int, Int,
Float, Float,
Char, Char,
} }
#[derive(Debug)] #[derive(Debug)]
pub enum Symbol { pub enum TokenSymbol {
// operators // operators
Plus, Plus,
Minus, Minus,
@@ -59,14 +59,15 @@ pub enum Symbol {
} }
#[derive(Debug)] #[derive(Debug)]
pub enum Keyword { pub enum TokenKeyword {
Let, Let,
Fn, Fn,
Ret,
// conditionals // conditionals
If, If,
Else, Else,
Elseif, Elif,
// loops // loops
While, While,
@@ -80,7 +81,7 @@ pub enum Keyword {
} }
#[derive(Debug)] #[derive(Debug)]
pub enum Delimiter { pub enum TokenDelimiter {
BraceOpen, BraceOpen,
BraceClose, BraceClose,
ParenOpen, ParenOpen,
@@ -88,20 +89,13 @@ pub enum Delimiter {
} }
#[derive(Debug)] #[derive(Debug)]
pub enum TokenKind { pub enum Token<'a> {
Literal(Literal),
Symbol(Symbol),
Keyword(Keyword),
Delimiter(Delimiter),
Newline, Newline,
Identifier, Literal(TokenLiteral, &'a str),
} Symbol(TokenSymbol),
Keyword(TokenKeyword),
#[derive(Debug)] Delimiter(TokenDelimiter),
pub struct Token<'a> { Identifier(&'a str),
pub kind: TokenKind,
pub value: Option<&'a str>,
pub line: usize,
} }
pub struct Lexer<'a> { pub struct Lexer<'a> {
@@ -142,6 +136,24 @@ impl<'a> Lexer<'a> {
} }
} }
fn escape_newline(&mut self) {
while let Some(c) = self.chars.peek() {
match c {
'\r' | '\t' | ' ' => {
self.next();
}
'\n' => {
self.next();
break;
}
_ => {
self.error();
panic!("expected newline");
},
}
}
}
fn get_numeric(&mut self) -> Token<'a> { fn get_numeric(&mut self) -> Token<'a> {
let mut is_float: bool = false; let mut is_float: bool = false;
while let Some(c) = self.chars.peek() { while let Some(c) = self.chars.peek() {
@@ -159,15 +171,14 @@ impl<'a> Lexer<'a> {
self.next(); self.next();
} }
Token { Token::Literal(
kind: if is_float { if is_float {
TokenKind::Literal(Literal::Float) TokenLiteral::Float
} else { } else {
TokenKind::Literal(Literal::Int) TokenLiteral::Int
}, },
value: Some(&self.text[self.start..self.end]), &self.text[self.start..self.end],
line: self.line, )
}
} }
fn get_char(&mut self) -> Token<'a> { fn get_char(&mut self) -> Token<'a> {
@@ -180,32 +191,19 @@ impl<'a> Lexer<'a> {
self.skip('\''); self.skip('\'');
Token { Token::Literal(TokenLiteral::Char, &self.text[self.start + 1..self.end - 1])
kind: TokenKind::Literal(Literal::Char),
value: Some(&self.text[self.start + 1..self.end - 1]),
line: self.line,
}
} }
fn get_delimiter(&mut self) -> Token<'a> { fn get_delimiter(&mut self) -> Token<'a> {
macro_rules! token_delimiter { use Token::Delimiter;
($a:expr) => { use TokenDelimiter::*;
Token {
kind: TokenKind::Delimiter($a),
value: None,
line: self.line,
}
};
}
use Delimiter::*;
match self.next() { match self.next() {
Some(c) => match c { Some(c) => match c {
'{' => token_delimiter!(BraceOpen), '{' => Delimiter(BraceOpen),
'}' => token_delimiter!(BraceClose), '}' => Delimiter(BraceClose),
'(' => token_delimiter!(ParenOpen), '(' => Delimiter(ParenOpen),
')' => token_delimiter!(ParenClose), ')' => Delimiter(ParenClose),
_ => { _ => {
self.error(); self.error();
panic!("expected delimiter"); panic!("expected delimiter");
@@ -219,25 +217,16 @@ impl<'a> Lexer<'a> {
} }
fn get_symbol(&mut self) -> Token<'a> { fn get_symbol(&mut self) -> Token<'a> {
// handle ~, ., : use Token::Symbol;
macro_rules! token_symbol {
($a:expr) => {
Token {
kind: TokenKind::Symbol($a),
value: None,
line: self.line,
}
};
}
// handle +, +=, -, -=, *, *=, /, /=, %, %=, ^, ^=, !, != // handle +, +=, -, -=, *, *=, /, /=, %, %=, ^, ^=, !, !=
macro_rules! token_symbol_eq { macro_rules! token_symbol_eq {
($a:expr, $b:expr) => { ($a:expr, $b:expr) => {
if self.chars.peek() == Some(&'=') { if self.chars.peek() == Some(&'=') {
self.next(); self.next();
token_symbol!($b) Symbol($b)
} else { } else {
token_symbol!($a) Symbol($a)
} }
}; };
} }
@@ -248,13 +237,13 @@ impl<'a> Lexer<'a> {
match self.chars.peek() { match self.chars.peek() {
Some('=') => { Some('=') => {
self.next(); self.next();
token_symbol!($c) Symbol($c)
} }
Some($d) => { Some($d) => {
self.next(); self.next();
token_symbol!($b) Symbol($b)
} }
_ => token_symbol!($a), _ => Symbol($a),
} }
}; };
} }
@@ -265,18 +254,18 @@ impl<'a> Lexer<'a> {
match self.chars.peek() { match self.chars.peek() {
Some('=') => { Some('=') => {
self.next(); self.next();
token_symbol!($d) Symbol($d)
} }
Some($e) => { Some($e) => {
self.next(); self.next();
token_symbol_eq!($b, $c) token_symbol_eq!($b, $c)
} }
_ => token_symbol!($a), _ => Symbol($a),
} }
}; };
} }
use Symbol::*; use TokenSymbol::*;
match self.next() { match self.next() {
Some(c) => match c { Some(c) => match c {
@@ -292,10 +281,10 @@ impl<'a> Lexer<'a> {
'|' => token_symbol_logical!(Or, OrOr, OrEq, '|'), '|' => token_symbol_logical!(Or, OrOr, OrEq, '|'),
'<' => token_symbol_compare!(Lt, Shl, ShlEq, LtEq, '<'), '<' => token_symbol_compare!(Lt, Shl, ShlEq, LtEq, '<'),
'>' => token_symbol_compare!(Gt, Shr, ShrEq, GtEq, '>'), '>' => token_symbol_compare!(Gt, Shr, ShrEq, GtEq, '>'),
'~' => token_symbol!(Tilde), '~' => Symbol(Tilde),
':' => token_symbol!(Colon), ':' => Symbol(Colon),
'.' => token_symbol!(Dot), '.' => Symbol(Dot),
'#' => token_symbol!(Hash), '#' => Symbol(Hash),
_ => { _ => {
self.error(); self.error();
panic!("expected symbol"); panic!("expected symbol");
@@ -317,35 +306,23 @@ impl<'a> Lexer<'a> {
self.next(); self.next();
} }
macro_rules! token_keyword { use Token::Keyword;
($a:expr) => { use TokenKeyword::*;
Token {
kind: TokenKind::Keyword($a),
value: None,
line: self.line,
}
};
}
use Keyword::*;
match &self.text[self.start..self.end] { match &self.text[self.start..self.end] {
"let" => token_keyword!(Let), "let" => Keyword(Let),
"fn" => token_keyword!(Fn), "fn" => Keyword(Fn),
"if" => token_keyword!(If), "ret" => Keyword(Ret),
"else" => token_keyword!(Else), "if" => Keyword(If),
"elseif" => token_keyword!(Elseif), "else" => Keyword(Else),
"while" => token_keyword!(While), "elif" => Keyword(Elif),
"do" => token_keyword!(Do), "while" => Keyword(While),
"for" => token_keyword!(For), "do" => Keyword(Do),
"int" => token_keyword!(Int), "for" => Keyword(For),
"float" => token_keyword!(Float), "int" => Keyword(Int),
"char" => token_keyword!(Char), "float" => Keyword(Float),
_ => Token { "char" => Keyword(Char),
kind: TokenKind::Identifier, _ => Token::Identifier(&self.text[self.start..self.end]),
value: Some(&self.text[self.start..self.end]),
line: self.line,
},
} }
} }
@@ -357,12 +334,12 @@ impl<'a> Lexer<'a> {
' ' | '\r' | '\t' => { ' ' | '\r' | '\t' => {
self.next(); self.next();
} }
'\\' => {
self.next();
self.escape_newline();
}
'\n' => { '\n' => {
tokens.push(Token { tokens.push(Token::Newline);
kind: TokenKind::Newline,
value: None,
line: self.line,
});
self.next(); self.next();
self.line += 1; self.line += 1;
} }