Compare commits

17 Commits

Author SHA1 Message Date
d377c8448f nix: replace rust-overlay with fenix
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-08-14 14:19:17 +05:30
af30410aab parser: fix newline checks after statements
since there is no EOL delimiter like ';', we have to check for newline to avoid something like `a = 4 + 4 b = a`
also added tests for expr parsing

Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-08-14 00:57:17 +05:30
360687f3c0 ast: make static an entity
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-08-13 22:20:51 +05:30
f7ec10646b parser: initial expression parsing logic
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-08-13 07:37:14 +05:30
ad57170010 lexer: allow underscore for identifiers
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-08-13 07:35:20 +05:30
1a2563f756 args: use PathBuf instead of string for file path
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-08-13 07:32:55 +05:30
d70b196042 parser: init
does not support many expressions

Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-07-31 00:27:55 +05:30
771363106f lexer: add exponents to floating literals
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-07-30 18:46:00 +05:30
384f382cfc ast: random changes
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-07-30 16:37:12 +05:30
bcc3b29fc5 ast: rename Type to Primitive
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-07-30 01:51:13 +05:30
589fa73d7c lexer: add comma and fix newline
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-07-30 01:50:43 +05:30
32484b3d6a ast: add a naive ast for now
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-07-29 23:38:06 +05:30
a15b3d013f ci: merge jobs
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-07-29 20:36:06 +05:30
e8192df9e2 revive project
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-07-29 20:28:14 +05:30
879d3d3b65 src/lexer.rs: rename enums and add backslash to escape newline
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-04-08 17:13:47 +05:30
0d7a4bdd4e src/lexer.rs: implement a naive and dumb lexer
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-04-08 02:10:57 +05:30
6979a02408 add better panic message and restructure src/args.rs
Signed-off-by: Amneesh Singh <natto@weirdnatto.in>
2023-04-08 02:10:36 +05:30
16 changed files with 1944 additions and 81 deletions

34
.github/workflows/ci.yml vendored Normal file
View File

@@ -0,0 +1,34 @@
name: CI
on: [ push, pull_request, workflow_dispatch ]
jobs:
build:
name: Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: cachix/install-nix-action@v20
with:
extra_nix_config: |
auto-optimise-store = true
experimental-features = nix-command flakes
- uses: cachix/cachix-action@v12
with:
name: pain
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
- name: fmt checks
run: nix build .#checks.fmt -L
- name: clippy checks
run: nix build .#checks.clippy -L
- name: nextest checks
run: nix build .#checks.nextest -L
- name: doc checks
run: nix build .#checks.doc -L
- name: build
run: nix build .#tricc -L

View File

@@ -2,6 +2,6 @@ tricc (pronounced "trick-c" or "trixie" like pixie) is a WIP toy compiler I am
writing to understand compilers better, that's it. Will probably use LLVM as writing to understand compilers better, that's it. Will probably use LLVM as
the backend. the backend.
Notes: ** Notes:
- Does not work + Does not work
- Is WIP + Is WIP

96
flake.lock generated
View File

@@ -10,11 +10,11 @@
"rust-overlay": "rust-overlay" "rust-overlay": "rust-overlay"
}, },
"locked": { "locked": {
"lastModified": 1680584903, "lastModified": 1688772518,
"narHash": "sha256-uraq+D3jcLzw/UVk0xMHcnfILfIMa0DLrtAEq2nNlxU=", "narHash": "sha256-ol7gZxwvgLnxNSZwFTDJJ49xVY5teaSvF7lzlo3YQfM=",
"owner": "ipetkov", "owner": "ipetkov",
"repo": "crane", "repo": "crane",
"rev": "65d3f6a3970cd46bef5eedfd458300f72c56b3c5", "rev": "8b08e96c9af8c6e3a2b69af5a7fa168750fcf88e",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -40,12 +40,15 @@
} }
}, },
"flake-utils": { "flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": { "locked": {
"lastModified": 1678901627, "lastModified": 1687709756,
"narHash": "sha256-U02riOqrKKzwjsxc/400XnElV+UtPUQWpANPlyazjH0=", "narHash": "sha256-Y5wKlQSkgEK2weWdOu4J3riRd+kV/VCgHsqLNTTWQ/0=",
"owner": "numtide", "owner": "numtide",
"repo": "flake-utils", "repo": "flake-utils",
"rev": "93a2b84fc4b70d9e089d029deacc3583435c2ed6", "rev": "dbabf0ca0c0c4bce6ea5eaf65af5cb694d2082c7",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -55,12 +58,15 @@
} }
}, },
"flake-utils_2": { "flake-utils_2": {
"inputs": {
"systems": "systems_2"
},
"locked": { "locked": {
"lastModified": 1678901627, "lastModified": 1689068808,
"narHash": "sha256-U02riOqrKKzwjsxc/400XnElV+UtPUQWpANPlyazjH0=", "narHash": "sha256-6ixXo3wt24N/melDWjq70UuHQLxGV8jZvooRanIHXw0=",
"owner": "numtide", "owner": "numtide",
"repo": "flake-utils", "repo": "flake-utils",
"rev": "93a2b84fc4b70d9e089d029deacc3583435c2ed6", "rev": "919d646de7be200f3bf08cb76ae1f09402b6f9b4",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -70,12 +76,15 @@
} }
}, },
"flake-utils_3": { "flake-utils_3": {
"inputs": {
"systems": "systems_3"
},
"locked": { "locked": {
"lastModified": 1659877975, "lastModified": 1681202837,
"narHash": "sha256-zllb8aq3YO3h8B/U0/J1WBgAL8EX5yWf5pMj3G0NAmc=", "narHash": "sha256-H+Rh19JDwRtpVPAWp64F+rlEtxUWBAQW28eAi3SRSzg=",
"owner": "numtide", "owner": "numtide",
"repo": "flake-utils", "repo": "flake-utils",
"rev": "c0e246b9b83f637f4681389ecabcb2681b4f3af0", "rev": "cfacdce06f30d2b68473a46042957675eebb3401",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -86,11 +95,11 @@
}, },
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1680724564, "lastModified": 1688392541,
"narHash": "sha256-eeUUGOTKTelYKDbUxKs0V7GUa186L2fym7jM2QQ4Oss=", "narHash": "sha256-lHrKvEkCPTUO+7tPfjIcb7Trk6k31rz18vkyqmkeJfY=",
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "36adaa6aaa6b03e59102df0c1b12cdc3f23fd112", "rev": "ea4c80b39be4c09702b0cb3b42eab59e2ba4f24b",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -120,11 +129,11 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1680488274, "lastModified": 1688351637,
"narHash": "sha256-0vYMrZDdokVmPQQXtFpnqA2wEgCCUXf5a3dDuDVshn0=", "narHash": "sha256-CLTufJ29VxNOIZ8UTg0lepsn3X03AmopmaLTTeHDCL4=",
"owner": "oxalica", "owner": "oxalica",
"repo": "rust-overlay", "repo": "rust-overlay",
"rev": "7ec2ff598a172c6e8584457167575b3a1a5d80d8", "rev": "f9b92316727af9e6c7fee4a761242f7f46880329",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -141,11 +150,11 @@
] ]
}, },
"locked": { "locked": {
"lastModified": 1680660688, "lastModified": 1690252178,
"narHash": "sha256-XeQTCxWBR0Ai1VMzI5ZXYpA2lu1F8FzZKjw8RtByZOg=", "narHash": "sha256-9oEz822bvbHobfCUjJLDor2BqW3I5tycIauzDlzOALY=",
"owner": "oxalica", "owner": "oxalica",
"repo": "rust-overlay", "repo": "rust-overlay",
"rev": "2f40052be98347b479c820c00fb2fc1d87b3aa28", "rev": "8d64353ca827002fb8459e44d49116c78d868eba",
"type": "github" "type": "github"
}, },
"original": { "original": {
@@ -153,6 +162,51 @@
"repo": "rust-overlay", "repo": "rust-overlay",
"type": "github" "type": "github"
} }
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"systems_2": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"systems_3": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
} }
}, },
"root": "root", "root": "root",

View File

@@ -9,25 +9,28 @@
inputs.nixpkgs.follows = "nixpkgs"; inputs.nixpkgs.follows = "nixpkgs";
}; };
rust-overlay = { fenix = {
url = github:oxalica/rust-overlay; url = github:nix-community/fenix;
inputs.nixpkgs.follows = "nixpkgs"; inputs.nixpkgs.follows = "nixpkgs";
}; };
flake-utils.url = github:numtide/flake-utils; flake-utils.url = github:numtide/flake-utils;
}; };
outputs = inputs@{ self, nixpkgs, crane, rust-overlay, flake-utils }: outputs = inputs@{ self, nixpkgs, crane, fenix, flake-utils }:
flake-utils.lib.eachDefaultSystem (system: flake-utils.lib.eachDefaultSystem (system:
let let
pkgs = import nixpkgs { pkgs = import nixpkgs {
inherit system; inherit system;
overlays = [ rust-overlay.overlays.default ]; overlays = [ fenix.overlays.default ];
}; };
inherit (pkgs) lib; inherit (pkgs) lib;
toolchain = pkgs.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml; toolchain = pkgs.fenix.fromToolchainFile {
file = ./rust-toolchain.toml;
sha256 = "sha256-n8LtGbpj/yCUGo0NFJ7FNv9fSdT9oKEUl+EPLg06JdQ=";
};
craneLib = (crane.mkLib pkgs).overrideToolchain toolchain; craneLib = (crane.mkLib pkgs).overrideToolchain toolchain;
src = craneLib.cleanCargoSource (craneLib.path ./.); src = craneLib.cleanCargoSource (craneLib.path ./.);
@@ -38,25 +41,34 @@
tricc = craneLib.buildPackage (commonArgs // { tricc = craneLib.buildPackage (commonArgs // {
inherit cargoArtifacts; inherit cargoArtifacts;
doCheck = false;
}); });
in in
{ {
checks = {
inherit tricc;
clippy = craneLib.cargoClippy (commonArgs // {
inherit cargoArtifacts;
cargoClippyExtraArgs = "--all-targets -- --deny warnings";
});
fmt = craneLib.cargoFmt {
inherit src;
};
};
packages = { packages = {
inherit tricc; inherit tricc;
default = tricc; default = tricc;
# not using flake checks to run them individually
checks = {
clippy = craneLib.cargoClippy (commonArgs // {
inherit cargoArtifacts;
});
fmt = craneLib.cargoFmt {
inherit src;
};
doc = craneLib.cargoDoc (commonArgs // {
inherit cargoArtifacts;
});
nextest = craneLib.cargoNextest (commonArgs // {
inherit cargoArtifacts;
partitions = 1;
partitionType = "count";
});
};
}; };
devShells.default = pkgs.mkShell { devShells.default = pkgs.mkShell {

View File

@@ -1,3 +1,3 @@
[toolchain] [toolchain]
channel = "nightly-2023-04-01" channel = "nightly-2023-07-15"
components = [ "rustfmt", "clippy", "rust-analyzer", "rust-src" ] components = [ "rustfmt", "clippy", "rust-analyzer", "rust-src" ]

5
rustfmt.toml Normal file
View File

@@ -0,0 +1,5 @@
comment_width = 99
format_code_in_doc_comments = true
imports_granularity = "Module"
imports_layout = "Vertical"
wrap_comments = true

View File

@@ -1,50 +1,62 @@
use std::env; use std::env;
use std::path::PathBuf;
use std::process::exit; use std::process::exit;
const VERSION: &str = env!("CARGO_PKG_VERSION"); const VERSION: &str = env!("CARGO_PKG_VERSION");
const CRATE: &str = env!("CARGO_CRATE_NAME"); const CRATE: &str = env!("CARGO_CRATE_NAME");
/// A naive argument handler
#[derive(Default)] #[derive(Default)]
struct Options { pub struct Args {
version: bool, version: bool,
file: String, file: Option<PathBuf>,
} }
// naive argument handling impl Args {
pub fn handle() -> String { /// Creates a new [`Args`] instance
let args: Vec<String> = env::args().collect(); pub fn new() -> Args {
Args::default()
if args.len() < 2 {
println!("Usage: {} [-v] <file>", CRATE);
exit(0);
} }
let mut options: Options = Default::default(); /// Checks for various arguments
pub fn handle(&mut self) {
let args: Vec<String> = env::args().collect();
for arg in &args[1..] { if args.len() < 2 {
match arg.as_str() { println!("Usage: {} [-v] <file>", CRATE);
"-v" | "--version" => options.version = true, exit(0);
flag if flag.starts_with('-') => panic!("option {} not implemented!", flag), }
file => {
if !options.file.is_empty() { for arg in &args[1..] {
panic!("please specify only a single source file!"); match arg.as_str() {
"-v" | "--version" => self.version = true,
flag if flag.starts_with('-') => panic!("option {} not implemented!", flag),
file => {
if self.file.is_some() {
panic!("please specify only a single source file!");
}
self.file = Some(PathBuf::from(file));
} }
options.file = file.to_string(); }
}
if self.version {
println!("{} version: {}", CRATE, VERSION);
}
if self.file.is_none() {
if self.version {
exit(0);
} else {
panic!("no file supplied!");
} }
} }
} }
if options.version { /// Fetches the file from the arguments.
println!("{} version: {}", CRATE, VERSION); /// Panics if there is no file in the arguments
#[inline]
pub fn get_file(self) -> PathBuf {
self.file.expect("no file supplied!")
} }
if options.file.is_empty() {
if options.version {
exit(0);
} else {
panic!("no file supplied!");
}
}
options.file
} }

132
src/ast.rs Normal file
View File

@@ -0,0 +1,132 @@
//! A very naive AST definition using recursive enums
//!
//! See the parser for implementation
use std::rc::Rc;
pub type Parent = Vec<Entity>;
/// Entities are functions, classes, and modules
#[derive(Debug, PartialEq)]
pub enum Entity {
Fn(Fn),
Class(Class),
Module(Module),
Static(Let),
}
/// A module just provides an additional scope
///
/// TODO: Add exporting and importing modules
#[derive(Debug, PartialEq)]
pub struct Module {
/// Name of module
pub name: Rc<str>,
/// Everything inside the module
pub children: Vec<ModuleChildren>,
}
/// Modules contain functions, classes and statements
#[derive(Debug, PartialEq)]
pub enum ModuleChildren {
Fn(Fn),
Class(Class),
Module(Module),
Static(Let),
}
/// Classes encapsulate functions and definitions.
#[derive(Debug, PartialEq)]
pub struct Class {
/// Name of class
pub name: Rc<str>,
/// Everything inside the class
pub children: Vec<ClassChildren>,
}
#[derive(Debug, PartialEq)]
pub enum ClassChildren {
Fn(Fn),
Let(Let),
Static(Let),
}
/// A Function
#[derive(Debug, PartialEq)]
pub struct Fn {
/// Name of the function
pub name: Rc<str>,
/// Optional return type
pub return_ty: Option<Ty>,
/// Parameters
pub params: Vec<(Rc<str>, Ty)>,
/// The function block
pub children: Vec<Statement>,
}
/// Statements encapsulate expressions and definitions
#[derive(Debug, PartialEq)]
pub enum Statement {
Static(Let),
Let(Let),
Expr(Expr),
}
/// A variable definition
#[derive(Debug, PartialEq)]
pub struct Let {
/// Name of variabe
pub name: Rc<str>,
/// Type of variable
pub ty: Ty,
/// Value of variable
pub expr: Option<Expr>,
}
/// Primitives
///
/// TODO: add arrays and pointers maybe
#[derive(Debug, PartialEq)]
pub enum Ty {
Int,
Float,
Char,
}
#[derive(Debug, PartialEq)]
pub struct If {
pub cond: Box<Expr>,
pub then: Vec<Statement>,
pub or: Option<Box<ElseType>>,
}
#[derive(Debug, PartialEq)]
pub enum ElseType {
If(If),
Else(Vec<Statement>),
}
pub(crate) type Op = crate::lexer::TokenSymbol;
#[derive(Debug, PartialEq)]
pub enum Literal {
Int(i32),
Float(f32),
Char(char),
}
/// Lowest form of expression
///
/// TODO: refine
#[derive(Debug, PartialEq)]
pub enum Expr {
Literal(Literal),
Identifier(Rc<str>),
Op(Op, Box<Expr>, Option<Box<Expr>>),
If(If),
Block(Vec<Statement>),
Loop(Vec<Statement>),
Break,
Continue,
Return(Option<Box<Expr>>),
}

View File

@@ -1,5 +1,565 @@
use nix:: use std::collections::VecDeque;
use std::iter::Peekable;
use std::rc::Rc;
use std::str;
fn threads(){ /// All token literals
println!({},"aa"); ///
} /// TODO: Add string
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum TokenLiteral {
Int,
Float,
Char,
}
/// All token symbols
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum TokenSymbol {
// arithmetic
Plus,
Minus,
Star,
Slash,
Percent,
Caret,
Tilde,
// bitwise
Shl,
Shr,
And,
Or,
// logical
Not,
AndAnd,
OrOr,
// relational
Gt,
Lt,
GtEq,
LtEq,
EqEq,
Ne,
// assignment
Eq,
PlusEq,
MinusEq,
StarEq,
SlashEq,
PercentEq,
CaretEq,
ShlEq,
ShrEq,
AndEq,
OrEq,
//misc
Colon,
Dot,
Comma,
Hash,
}
/// All token keywod
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum TokenKeyword {
// parents
Fn,
Class,
Module,
// statements
Static,
Let,
Ret,
// conditionals
If,
Else,
Elif,
// control flow
Loop,
Break,
Continue,
Return,
// primitives
Int,
Float,
Char,
}
/// All token delimiters
///
/// TODO: Maybe add \[ and \]
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum TokenDelimiter {
BraceOpen,
BraceClose,
ParenOpen,
ParenClose,
}
/// All tokens
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum TokenKind {
Newline,
Eof,
Literal(TokenLiteral),
Symbol(TokenSymbol),
Keyword(TokenKeyword),
Delimiter(TokenDelimiter),
Identifier,
Invalid,
}
#[derive(Debug)]
pub struct Token {
pub kind: TokenKind,
/// Holds the reference to the tokenized string
///
/// For example, if `kind` is of type [`TokenKind::Identifier`], this would contain the value
/// of that identifier
pub val: Rc<str>,
}
pub struct Lexer<'a> {
/// The entire text to be tokenized
text: &'a str,
/// A peekable iterate for `text`
chars: Peekable<str::Chars<'a>>,
/// A peekable double ended queue for the tokens
tokens: VecDeque<Token>,
/// Current line number
pub(crate) line: usize,
pub(crate) col: usize,
/// Start character index for the current token
start: usize,
/// End character index for the current token
end: usize,
}
impl<'a> Lexer<'a> {
/// Creates a new [`Lexer`] instance with the provided content.
///
/// The `Lexer` is responsible for tokenizing the given text, making it easier to
/// perform various parsing operations.
///
/// # Arguments
///
/// * `content`: The text to tokenize.
///
/// # Returns
///
/// A new instance of `Lexer` initialized with the provided `content`.
///
/// # Example
///
/// ```
/// use tricc::lexer::Lexer;
///
/// let lexer = Lexer::new("let example: int = 4");
/// ```
pub fn new(content: &'a str) -> Self {
Lexer {
text: content,
chars: content.chars().peekable(),
tokens: VecDeque::new(),
line: 1,
col: 1,
start: 0,
end: 0,
}
}
#[inline]
fn new_token(&self, kind: TokenKind) -> Token {
Token {
kind,
val: Rc::from(&self.text[self.start..self.end]),
}
}
#[inline]
fn error(&self, msg: &str) {
eprintln!("Lexer: {}, at \"{}:{}\"", msg, self.line, self.end);
}
#[inline]
fn peek(&mut self) -> Option<&char> {
self.chars.peek()
}
#[inline]
fn next(&mut self) -> Option<char> {
self.end += 1;
self.col += 1;
self.chars.next()
}
fn skip_whitespace(&mut self) {
let mut ignore_nl: bool = false;
while let Some(c) = self.peek() {
match c {
'\r' | '\t' | ' ' => {
self.next();
}
'\n' => {
if ignore_nl {
ignore_nl = false;
self.next();
} else {
break;
}
}
'\\' => {
self.next();
ignore_nl = true;
}
_ => break,
}
}
}
fn get_numeric(&mut self) -> Token {
let mut is_float: bool = false;
while let Some(c) = self.peek() {
match c {
'0'..='9' => {}
'.' => {
if is_float {
self.error("multiple decimals encountered");
return self.new_token(TokenKind::Invalid);
}
is_float = true;
}
'e' | 'E' => {
self.next();
is_float = true;
break;
}
_ => break,
}
self.next();
}
self.new_token(TokenKind::Literal(if is_float {
TokenLiteral::Float
} else {
TokenLiteral::Int
}))
}
fn get_char(&mut self) -> Token {
// skip '
self.next();
if matches!(self.next(), Some('\'') | None) {
self.error("Expected character literal");
return self.new_token(TokenKind::Invalid);
}
if self.peek() != Some(&'\'') {
self.error("Expected '");
return self.new_token(TokenKind::Invalid);
}
// skip '
self.next();
self.new_token(TokenKind::Literal(TokenLiteral::Char))
}
fn get_alphanumeric(&mut self) -> Token {
while let Some(c) = self.peek() {
match c {
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => {}
_ => break,
}
self.next();
}
use TokenKeyword::*;
use TokenKind::Keyword;
self.new_token(match &self.text[self.start..self.end] {
"fn" => Keyword(Fn),
"class" => Keyword(Class),
"module" => Keyword(Module),
"static" => Keyword(Static),
"let" => Keyword(Let),
"ret" => Keyword(Ret),
"if" => Keyword(If),
"else" => Keyword(Else),
"elif" => Keyword(Elif),
"loop" => Keyword(Loop),
"break" => Keyword(Break),
"continue" => Keyword(Continue),
"return" => Keyword(Return),
"int" => Keyword(Int),
"float" => Keyword(Float),
"char" => Keyword(Char),
_ => TokenKind::Identifier,
})
}
fn get_symbol(&mut self) -> Token {
let c = self.next().unwrap();
use TokenDelimiter::*;
use TokenKind::{
Delimiter,
Symbol,
};
use TokenSymbol::*;
// handle +, +=, -, -=, *, *=, /, /=, %, %=, ^, ^=, !, !=
macro_rules! token_symbol_eq {
($a:expr, $b:expr) => {
match self.peek() {
Some('=') => {
self.next();
Symbol($b)
}
_ => Symbol($a),
}
};
}
// handle &, |, ||, &&, &=, |=
macro_rules! token_symbol_logical {
($a:expr, $b:expr, $c:expr, $d:expr) => {
match self.peek() {
Some('=') => {
self.next();
Symbol($c)
}
Some($d) => {
self.next();
Symbol($b)
}
_ => Symbol($a),
}
};
}
// handle <, <=, >, >=, <<, >>, <<=, >>=
macro_rules! token_symbol_compare {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr) => {
match self.peek() {
Some('=') => {
self.next();
Symbol($d)
}
Some($e) => {
self.next();
token_symbol_eq!($b, $c)
}
_ => Symbol($a),
}
};
}
let typ = match c {
'{' => Delimiter(BraceOpen),
'}' => Delimiter(BraceClose),
'(' => Delimiter(ParenOpen),
')' => Delimiter(ParenClose),
'+' => token_symbol_eq!(Plus, PlusEq),
'-' => token_symbol_eq!(Minus, MinusEq),
'*' => token_symbol_eq!(Star, StarEq),
'/' => token_symbol_eq!(Slash, SlashEq),
'%' => token_symbol_eq!(Percent, PercentEq),
'^' => token_symbol_eq!(Caret, CaretEq),
'!' => token_symbol_eq!(Not, Ne),
'=' => token_symbol_eq!(Eq, EqEq),
'&' => token_symbol_logical!(And, AndAnd, AndEq, '&'),
'|' => token_symbol_logical!(Or, OrOr, OrEq, '|'),
'<' => token_symbol_compare!(Lt, Shl, ShlEq, LtEq, '<'),
'>' => token_symbol_compare!(Gt, Shr, ShrEq, GtEq, '>'),
'~' => Symbol(Tilde),
':' => Symbol(Colon),
'.' => Symbol(Dot),
',' => Symbol(Comma),
'#' => Symbol(Hash),
_ => {
self.error("Unknown character encountered");
TokenKind::Invalid
}
};
self.new_token(typ)
}
fn lex(&mut self) {
self.skip_whitespace();
self.start = self.end;
let token = if let Some(c) = self.peek() {
match c {
'\n' => {
self.next();
self.line += 1;
self.col = 0;
self.new_token(TokenKind::Newline)
}
'0'..='9' => self.get_numeric(),
'a'..='z' | 'A'..='Z' | '_' => self.get_alphanumeric(),
'\'' => self.get_char(),
_ => self.get_symbol(),
}
} else {
self.new_token(TokenKind::Eof)
};
self.tokens.push_back(token);
}
/// Peeks at the next token and returns a reference to it
pub fn peek_token(&mut self) -> &Token {
if self.tokens.is_empty() {
self.lex();
}
&self.tokens[0]
}
/// Returns the next token, moving the lexer forward
pub fn next_token(&mut self) -> Token {
if self.tokens.is_empty() {
self.lex();
}
self.tokens.pop_front().unwrap()
}
}
#[test]
fn test_peek_next() {
let mut lexer = Lexer::new("test01");
assert_eq!(lexer.peek(), Some(&'t'));
assert_eq!(lexer.next(), Some('t'));
assert_eq!(lexer.peek(), Some(&'e'));
assert_eq!(lexer.peek(), Some(&'e'));
assert_eq!(lexer.next(), Some('e'));
assert_eq!(lexer.next(), Some('s'));
assert_eq!(lexer.next(), Some('t'));
assert_eq!(lexer.next(), Some('0'));
assert_eq!(lexer.peek(), Some(&'1'));
assert_eq!(lexer.next(), Some('1'));
assert_eq!(lexer.peek(), None);
assert_eq!(lexer.next(), None);
assert_eq!(lexer.peek(), None);
}
#[test]
fn test_tokens() {
let mut lexer = Lexer::new("let test02 = 4 << 1");
use TokenKind::*;
assert_eq!(lexer.peek_token().kind, Keyword(TokenKeyword::Let));
assert_eq!(lexer.next_token().kind, Keyword(TokenKeyword::Let));
let mut token = lexer.next_token();
assert_eq!(token.kind, Identifier);
assert_eq!(*token.val, *"test02");
assert_eq!(lexer.next_token().kind, Symbol(TokenSymbol::Eq));
token = lexer.next_token();
assert_eq!(token.kind, Literal(TokenLiteral::Int));
assert_eq!(*token.val, *"4");
assert_eq!(lexer.next_token().kind, Symbol(TokenSymbol::Shl));
assert_eq!(lexer.peek_token().kind, Literal(TokenLiteral::Int));
assert_eq!(*lexer.peek_token().val, *"1");
token = lexer.next_token();
assert_eq!(token.kind, Literal(TokenLiteral::Int));
assert_eq!(*token.val, *"1");
assert_eq!(lexer.peek_token().kind, Eof);
assert_eq!(lexer.next_token().kind, Eof);
assert_eq!(lexer.peek_token().kind, Eof);
assert_eq!(lexer.next_token().kind, Eof);
}
#[test]
fn test_tokens_2() {
let mut lexer = Lexer::new("let test03: char = 'h'");
use TokenKind::*;
assert_eq!(lexer.peek_token().kind, Keyword(TokenKeyword::Let));
assert_eq!(lexer.next_token().kind, Keyword(TokenKeyword::Let));
let mut token = lexer.next_token();
assert_eq!(token.kind, Identifier);
assert_eq!(*token.val, *"test03");
assert_eq!(lexer.next_token().kind, Symbol(TokenSymbol::Colon));
assert_eq!(lexer.next_token().kind, Keyword(TokenKeyword::Char));
assert_eq!(lexer.next_token().kind, Symbol(TokenSymbol::Eq));
assert_eq!(lexer.peek_token().kind, Literal(TokenLiteral::Char));
assert_eq!(*lexer.peek_token().val, *"'h'");
token = lexer.next_token();
assert_eq!(token.kind, Literal(TokenLiteral::Char));
assert_eq!(*token.val, *"'h'");
assert_eq!(lexer.peek_token().kind, Eof);
assert_eq!(lexer.next_token().kind, Eof);
}
#[test]
fn test_tokens_eof() {
let mut lexer = Lexer::new("");
assert_eq!(lexer.peek_token().kind, TokenKind::Eof);
assert_eq!(lexer.next_token().kind, TokenKind::Eof);
}
#[test]
fn test_tokens_numeric() {
let mut lexer = Lexer::new("3342");
let token = lexer.next_token();
assert_eq!(token.kind, TokenKind::Literal(TokenLiteral::Int));
assert_eq!(*token.val, *"3342");
assert_eq!(lexer.next_token().kind, TokenKind::Eof);
}
#[test]
fn test_tokens_numeric_2() {
let mut lexer = Lexer::new("334.2e");
let token = lexer.next_token();
assert_eq!(token.kind, TokenKind::Literal(TokenLiteral::Float));
assert_eq!(*token.val, *"334.2e");
assert_eq!(lexer.next_token().kind, TokenKind::Eof);
}
#[test]
fn test_tokens_numeric_3() {
let mut lexer = Lexer::new("334.2e-5");
let mut token = lexer.next_token();
assert_eq!(token.kind, TokenKind::Literal(TokenLiteral::Float));
assert_eq!(*token.val, *"334.2e");
assert_eq!(
lexer.next_token().kind,
TokenKind::Symbol(TokenSymbol::Minus)
);
token = lexer.next_token();
assert_eq!(token.kind, TokenKind::Literal(TokenLiteral::Int));
assert_eq!(*token.val, *"5");
assert_eq!(lexer.next_token().kind, TokenKind::Eof);
}

View File

@@ -1 +1,4 @@
pub mod args; pub mod args;
pub mod ast;
pub mod lexer;
pub mod parser;

View File

@@ -1,7 +1,40 @@
use tricc::args; use std::{
use tricc::lexer; fs,
panic,
};
use tricc::args::Args;
use tricc::parser::Parser;
fn main() { fn main() {
let file: String = args::handle(); panic::set_hook(Box::new(|panic_info| {
println!("{}", file); if let Some(msg) = panic_info.payload().downcast_ref::<&str>() {
eprintln!("{}", msg);
} else if let Some(msg) = panic_info.payload().downcast_ref::<String>() {
eprintln!("{}", msg);
} else if let Some(location) = panic_info.location() {
eprintln!(
"panic occurred in file '{}' at line {}",
location.file(),
location.line(),
);
} else {
eprintln!("panic occurred");
}
}));
let mut args = Args::default();
args.handle();
let file = args.get_file();
let content = fs::read_to_string(&file).expect("Couldn't read the file");
let mut parser = Parser::new(&content);
let Some(parent) = parser.parse() else {
eprintln!(
"Failed to parse {} - See the errors above",
file.to_string_lossy()
);
std::process::exit(1);
};
println!("Parsed AST:\n{:#?}", parent);
} }

215
src/parser/entity.rs Normal file
View File

@@ -0,0 +1,215 @@
use super::Parser;
use crate::ast::*;
use crate::lexer::{
TokenDelimiter,
TokenKeyword,
TokenKind,
TokenSymbol,
};
use std::rc::Rc;
impl<'a> Parser<'a> {
/// entity ::= module | class | fn | static
pub(super) fn parse_entity(&mut self) -> Option<Entity> {
use TokenKeyword::*;
let token = self.peek_token();
if let TokenKind::Keyword(keyword) = &token.kind {
Some(match keyword {
Module => Entity::Module(self.parse_module()?),
Class => Entity::Class(self.parse_class()?),
Fn => Entity::Fn(self.parse_fn()?),
Static => Entity::Static(self.parse_static()?),
_ => {
self.error_expected_peek("entity");
return None;
}
})
} else {
self.error_expected_peek("entity");
None
}
}
/// module ::= "module" ident "{" { module | fn | static | class } "}"
fn parse_module(&mut self) -> Option<Module> {
self.next_token();
let name = self.parse_ident()?;
let mut children = vec![];
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceOpen)) {
self.error_expected_peek("{");
return None;
}
loop {
use TokenKeyword::*;
self.trim_newlines();
if let TokenKind::Keyword(keyword) = &self.peek_token().kind {
children.push(match keyword {
Module => ModuleChildren::Module(self.parse_module()?),
Fn => ModuleChildren::Fn(self.parse_fn()?),
Static => ModuleChildren::Static(self.parse_static()?),
Class => ModuleChildren::Class(self.parse_class()?),
_ => {
self.error_expected_peek("module child");
return None;
}
});
if !self.check_newline_or_tok(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
self.error_expected_peek("newline or }");
return None;
}
} else if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
self.error_expected_peek("}");
return None;
} else {
break;
}
}
Some(Module { name, children })
}
/// class ::= "class" ident "{" { fn | static | let } "}"
fn parse_class(&mut self) -> Option<Class> {
self.next_token();
let name = self.parse_ident()?;
let mut children = vec![];
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceOpen)) {
self.error_expected_peek("{");
return None;
}
loop {
use TokenKeyword::*;
self.trim_newlines();
if let TokenKind::Keyword(keyword) = &self.peek_token().kind {
children.push(match keyword {
Fn => ClassChildren::Fn(self.parse_fn()?),
Static => ClassChildren::Static(self.parse_static()?),
Let => ClassChildren::Let(self.parse_let()?),
_ => {
self.error_expected_peek("class child");
return None;
}
});
if !self.check_newline_or_tok(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
self.error_expected_peek("newline or }");
return None;
}
} else if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
self.error_expected_peek("}");
return None;
} else {
break;
}
}
Some(Class { name, children })
}
/// fn ::= "fn" ident "(" [ identWithTy { "," identWithTy } ] ")" [ ":" ty ]
/// "{" { statement } "}"
fn parse_fn(&mut self) -> Option<Fn> {
self.next_token();
let name = self.parse_ident()?;
let mut params: Vec<(Rc<str>, Ty)> = vec![];
let mut return_typ: Option<Ty> = None;
let mut children: Vec<Statement> = vec![];
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::ParenOpen)) {
self.error_expected_peek("(");
return None;
}
loop {
if self.peek_token().kind == TokenKind::Identifier {
params.push(self.parse_ident_with_ty()?);
}
if !self.skip_token(TokenKind::Symbol(TokenSymbol::Comma)) {
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::ParenClose)) {
self.error_expected_peek(", or )");
return None;
} else {
break;
}
}
}
if self.skip_token(TokenKind::Symbol(TokenSymbol::Colon)) {
return_typ = Some(self.parse_ty()?);
}
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceOpen)) {
self.error_expected_peek("{");
return None;
}
loop {
self.trim_newlines();
if self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
break;
}
children.push(self.parse_statement()?);
if !self.check_newline_or_tok(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
self.error_expected_peek("newline or }");
return None;
}
}
Some(Fn {
name,
return_ty: return_typ,
params,
children,
})
}
}
#[test]
fn test_parse_entity() {
let mut parser = Parser::new(
r#"module module01 {
class class01 {
fn fn01(param01: char, param02: float) {
static let let01: int = 4
}
}
fn fn02 (): int { }
}"#,
);
assert_eq!(
parser.parse_entity(),
Some(Entity::Module(Module {
name: "module01".into(),
children: vec![
ModuleChildren::Class(Class {
name: "class01".into(),
children: vec![ClassChildren::Fn(Fn {
name: "fn01".into(),
return_ty: None,
params: vec![("param01".into(), Ty::Char), ("param02".into(), Ty::Float)],
children: vec![Statement::Static(Let {
name: "let01".into(),
ty: Ty::Int,
expr: Some(Expr::Literal(Literal::Int(4)))
})]
})]
}),
ModuleChildren::Fn(Fn {
name: "fn02".into(),
return_ty: Some(Ty::Int),
params: vec![],
children: vec![]
})
]
}))
);
}

435
src/parser/expr.rs Normal file
View File

@@ -0,0 +1,435 @@
use super::Parser;
use crate::ast::{
self,
*,
};
use crate::lexer::{
TokenDelimiter,
TokenKeyword,
TokenKind,
TokenLiteral,
TokenSymbol,
};
use std::rc::Rc;
impl<'a> Parser<'a> {
/// exprIf ::= "if" expr block [ else (block | exprIf ) ]
fn parse_expr_if(&mut self) -> Option<If> {
// skip "if"
self.next_token();
let cond = Box::new(self.parse_expr()?);
let then = self.parse_expr_block()?;
if !self.skip_token(TokenKind::Keyword(TokenKeyword::Else)) {
return Some(If {
cond,
then,
or: None,
});
}
if self.peek_token().kind != TokenKind::Keyword(TokenKeyword::If) {
return Some(If {
cond,
then,
or: Some(Box::new(ElseType::Else(self.parse_expr_block()?))),
});
}
Some(If {
cond,
then,
or: Some(Box::new(ElseType::If(self.parse_expr_if()?))),
})
}
/// exprBlock ::= "{" { statement } "}"
fn parse_expr_block(&mut self) -> Option<Vec<Statement>> {
let mut statements = vec![];
// skip {
self.next_token();
loop {
self.trim_newlines();
if self.skip_token(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
break;
}
statements.push(self.parse_statement()?);
if !self.check_newline_or_tok(TokenKind::Delimiter(TokenDelimiter::BraceClose)) {
self.error_expected_peek("newline or }");
return None;
}
}
Some(statements)
}
/// exprLoop ::= "loop" exprBlock
fn parse_expr_loop(&mut self) -> Option<Vec<Statement>> {
self.next_token();
if self.peek_token().kind != TokenKind::Delimiter(TokenDelimiter::BraceOpen) {
self.error_expected_peek("{");
return None;
}
self.parse_expr_block()
}
/// exprAtom ::= ( "(" expr ")" ) | ident | int | float | char | exprBlock | exprLoop | exprIf
fn parse_expr_atom(&mut self) -> Option<Expr> {
use ast::Literal::*;
use TokenKind::*;
// TODO: check lvalue validity in the analysis phase
Some(match self.peek_token().kind {
Delimiter(TokenDelimiter::ParenOpen) => {
self.next_token(); // skip (
let expr = self.parse_expr()?;
if !self.skip_token(TokenKind::Delimiter(TokenDelimiter::ParenClose)) {
self.error_expected_peek(")");
return None;
}
expr
}
Identifier => {
let token = self.next_token();
Expr::Identifier(Rc::clone(&token.val))
}
Literal(TokenLiteral::Int) => Expr::Literal(Int(self.parse_int()?)),
Literal(TokenLiteral::Float) => Expr::Literal(Float(self.parse_float()?)),
Literal(TokenLiteral::Char) => Expr::Literal(Char(self.parse_char()?)),
Delimiter(TokenDelimiter::BraceOpen) => Expr::Block(self.parse_expr_block()?),
Keyword(TokenKeyword::Loop) => Expr::Loop(self.parse_expr_loop()?),
Keyword(TokenKeyword::If) => Expr::If(self.parse_expr_if()?),
_ => {
self.error_expected_peek("expression");
return None;
}
})
}
/// exprUnary ::= [ unaryOp ] exprAtom
/// unaryOp ::= "+" | "-" | "~"
fn parse_expr_unary(&mut self) -> Option<Expr> {
use TokenSymbol::*;
Some(match self.peek_token().kind {
TokenKind::Symbol(symbol @ (Minus | Plus | Tilde)) => {
self.next_token();
Expr::Op(symbol, Box::new(self.parse_expr_atom()?), None)
}
_ => self.parse_expr_atom()?,
})
}
/// exprArithmeticMul ::= exprUnary [ arithmeticMulOp exprArithmeticMul ]
/// arithmeticMulOp ::= "*" | "/" | "%"
fn parse_expr_arithmetic_mul(&mut self) -> Option<Expr> {
use TokenSymbol::*;
let lhs = self.parse_expr_unary()?;
Some(match self.peek_token().kind {
TokenKind::Symbol(symbol @ (Star | Slash | Percent)) => {
self.next_token();
Expr::Op(
symbol,
Box::new(lhs),
Some(Box::new(self.parse_expr_arithmetic_mul()?)),
)
}
_ => lhs,
})
}
/// exprArithmeticAdd ::= exprArithmeticMul [ arithmeticAddOp exprArithmeticAdd ]
/// arithmeticAddOp ::= "+" | "-"
fn parse_expr_arithmetic_add(&mut self) -> Option<Expr> {
use TokenSymbol::*;
let lhs = self.parse_expr_arithmetic_mul()?;
Some(match self.peek_token().kind {
TokenKind::Symbol(symbol @ (Plus | Minus)) => {
self.next_token();
Expr::Op(
symbol,
Box::new(lhs),
Some(Box::new(self.parse_expr_arithmetic_add()?)),
)
}
_ => lhs,
})
}
/// exprBitwiseShift ::= exprArithmeticAdd [ bitwiseShiftOp exprBitwiseShift ]
/// bitwiseShiftOp ::= "<<" | ">>"
fn parse_expr_bitwise_shift(&mut self) -> Option<Expr> {
use TokenSymbol::*;
let lhs = self.parse_expr_arithmetic_add()?;
Some(match self.peek_token().kind {
TokenKind::Symbol(symbol @ (Shl | Shr)) => {
self.next_token();
Expr::Op(
symbol,
Box::new(lhs),
Some(Box::new(self.parse_expr_bitwise_shift()?)),
)
}
_ => lhs,
})
}
/// exprBitwiseAnd ::= exprBitwiseShift [ "&" exprBitwiseAnd ]
fn parse_expr_bitwise_and(&mut self) -> Option<Expr> {
let lhs = self.parse_expr_bitwise_shift()?;
let symbol = TokenSymbol::And;
if !self.skip_token(TokenKind::Symbol(symbol)) {
return Some(lhs);
}
Some(Expr::Op(
symbol,
Box::new(lhs),
Some(Box::new(self.parse_expr_bitwise_and()?)),
))
}
/// exprBitwiseXor ::= exprBitwiseAnd [ "^" exprBitwiseXor ]
fn parse_expr_bitwise_xor(&mut self) -> Option<Expr> {
let lhs = self.parse_expr_bitwise_and()?;
let symbol = TokenSymbol::Caret;
if !self.skip_token(TokenKind::Symbol(symbol)) {
return Some(lhs);
}
Some(Expr::Op(
symbol,
Box::new(lhs),
Some(Box::new(self.parse_expr_bitwise_xor()?)),
))
}
/// exprBiwiseOr ::= exprBitwiseXor [ "|" exprBitwiseOr ]
fn parse_expr_bitwise_or(&mut self) -> Option<Expr> {
let lhs = self.parse_expr_bitwise_xor()?;
let symbol = TokenSymbol::Or;
if !self.skip_token(TokenKind::Symbol(symbol)) {
return Some(lhs);
}
Some(Expr::Op(
symbol,
Box::new(lhs),
Some(Box::new(self.parse_expr_bitwise_or()?)),
))
}
/// exprAssign ::= exprBitwiseOr [ relationalOp exprRelational ]
/// relationalOp ::= ">" | "<" | ">=" | "<=" | "==" | "!="
fn parse_expr_relational(&mut self) -> Option<Expr> {
use TokenSymbol::*;
let lhs = self.parse_expr_bitwise_or()?;
Some(match self.peek_token().kind {
TokenKind::Symbol(symbol @ (Gt | Lt | GtEq | LtEq | EqEq | Ne)) => {
self.next_token();
Expr::Op(
symbol,
Box::new(lhs),
Some(Box::new(self.parse_expr_relational()?)),
)
}
_ => lhs,
})
}
/// exprLogicalAnd ::= exprLogicalRelational [ "&&" exprLogicalAnd ]
fn parse_expr_logical_and(&mut self) -> Option<Expr> {
let lhs = self.parse_expr_relational()?;
let symbol = TokenSymbol::AndAnd;
if !self.skip_token(TokenKind::Symbol(symbol)) {
return Some(lhs);
}
Some(Expr::Op(
symbol,
Box::new(lhs),
Some(Box::new(self.parse_expr_logical_and()?)),
))
}
/// exprLogicalOr ::= exprLogicalAnd [ "||" exprLogicalOr ]
fn parse_expr_logical_or(&mut self) -> Option<Expr> {
let lhs = self.parse_expr_logical_and()?;
let symbol = TokenSymbol::OrOr;
if !self.skip_token(TokenKind::Symbol(symbol)) {
return Some(lhs);
}
Some(Expr::Op(
symbol,
Box::new(lhs),
Some(Box::new(self.parse_expr_logical_or()?)),
))
}
/// exprAssign ::= exprLogicalOr [ assignOp exprAssign ]
/// assignOp ::= "=" | "+=" | "-=" | "*=" | "/=" | "%=" | "^=" | "<<=" | ">>=" | "&=" | "|="
fn parse_expr_assign(&mut self) -> Option<Expr> {
use TokenSymbol::*;
let lhs = self.parse_expr_logical_or()?;
Some(match self.peek_token().kind {
TokenKind::Symbol(
symbol @ (Eq | PlusEq | MinusEq | StarEq | SlashEq | PercentEq | CaretEq | ShlEq
| ShrEq | AndEq | OrEq),
) => {
self.next_token();
Expr::Op(
symbol,
Box::new(lhs),
Some(Box::new(self.parse_expr_assign()?)),
)
}
_ => lhs,
})
}
/// exprControl ::= "continue" | "break" | "return" [ exprControl ] | exprAssign
fn parse_expr_control(&mut self) -> Option<Expr> {
use TokenKeyword::*;
Some(match self.peek_token().kind {
TokenKind::Keyword(Continue) => {
self.next_token();
Expr::Continue
}
TokenKind::Keyword(Break) => {
self.next_token();
Expr::Break
}
TokenKind::Keyword(Return) => {
self.next_token();
Expr::Return(self.parse_expr_control().map(Box::new))
}
_ => self.parse_expr_assign()?,
})
}
/// entrypoint for expression parsing using recursive descent parsing
///
/// <https://en.wikipedia.org/wiki/Recursive_descent_parser>
/// expr ::= exprControl
pub(super) fn parse_expr(&mut self) -> Option<Expr> {
self.parse_expr_control()
}
}
#[test]
fn test_parse_expr() {
use Literal::*;
use TokenSymbol::*;
macro_rules! b {
($expr:expr) => {
Box::new($expr)
};
}
let mut parser = Parser::new(
r#"if if 1 { 1 } else { 0 } + 9 {
a = 4
} else if 1 {
a = 5
} else {
}
amul ^= (4 + 93 * (1 << 3) / 1.44) ^ bhatura
stove = { 44 } + amul"#,
);
assert_eq!(
parser.parse_expr(),
Some(Expr::If(If {
cond: b!(Expr::Op(
Plus,
b!(Expr::If(If {
cond: b!(Expr::Literal(Int(1))),
then: vec![Statement::Expr(Expr::Literal(Int(1)))],
or: Some(b!(ElseType::Else(vec![Statement::Expr(Expr::Literal(
Int(0)
))])))
})),
Some(b!(Expr::Literal(Int(9))))
)),
then: vec![Statement::Expr(Expr::Op(
Eq,
b!(Expr::Identifier("a".into())),
Some(b!(Expr::Literal(Int(4))))
))],
or: Some(b!(ElseType::If(If {
cond: b!(Expr::Literal(Int(1))),
then: vec![Statement::Expr(Expr::Op(
Eq,
b!(Expr::Identifier("a".into())),
Some(b!(Expr::Literal(Int(5))))
))],
or: Some(b!(ElseType::Else(vec![])))
})))
}))
);
assert_eq!(parser.skip_token(TokenKind::Newline), true);
assert_eq!(
parser.parse_expr(),
Some(Expr::Op(
CaretEq,
b!(Expr::Identifier("amul".into())),
Some(b!(Expr::Op(
Caret,
b!(Expr::Op(
Plus,
b!(Expr::Literal(Int(4))),
Some(b!(Expr::Op(
Star,
b!(Expr::Literal(Int(93))),
Some(b!(Expr::Op(
Slash,
b!(Expr::Op(
Shl,
b!(Expr::Literal(Int(1))),
Some(b!(Expr::Literal(Int(3))))
)),
Some(b!(Expr::Literal(Float(1.44))))
)))
)))
)),
Some(b!(Expr::Identifier("bhatura".into())))
)))
))
);
assert_eq!(parser.skip_token(TokenKind::Newline), true);
assert_eq!(
parser.parse_expr(),
Some(Expr::Op(
Eq,
b!(Expr::Identifier("stove".into())),
Some(b!(Expr::Op(
Plus,
b!(Expr::Block(vec![Statement::Expr(Expr::Literal(Int(44)))])),
Some(b!(Expr::Identifier("amul".into())))
)))
))
);
}

121
src/parser/literal.rs Normal file
View File

@@ -0,0 +1,121 @@
use super::Parser;
use crate::lexer::{
TokenKind,
TokenLiteral,
TokenSymbol,
};
impl<'a> Parser<'a> {
/// int ::= digit { digit }
pub(super) fn parse_int(&mut self) -> Option<i32> {
let val = self.next_token().val;
let mut integer: i32 = 0;
let error = || {
self.error(&format!(
"integer values must be in range [{}, {}]",
i32::MIN,
i32::MAX
))
};
for c in val.chars() {
// c is always ['0'..='9']
let d = c.to_digit(10)?;
match integer.checked_mul(10) {
Some(m) => integer = m,
None => {
error();
return None;
}
}
match integer.checked_add(d as i32) {
Some(a) => integer = a,
None => {
error();
return None;
}
}
}
Some(integer)
}
// didnt use parse() because i wanted to do this myself for some reason
/// f32 can be NaN and inf as well
/// float ::= int [ "." { digit } ] [ "e" { digit } ]
pub(super) fn parse_float(&mut self) -> Option<f32> {
let token = self.next_token();
let mut chars = token.val.chars();
let mut float: f32 = 0.0;
let mut fraction: f32 = 0.0;
let mut prec: i32 = 0;
let mut exp: i32 = 0;
let mut decimal: bool = false;
// lexer takes care of multiple decimals and non digit characters
for c in chars.by_ref() {
match c {
'.' => decimal = true,
'e' | 'E' => {
// lexer takes care that decimal doesnt come after e
let s;
match self.peek_token().kind {
TokenKind::Symbol(TokenSymbol::Minus) => {
s = -1;
self.next_token();
}
TokenKind::Symbol(TokenSymbol::Plus) => {
s = 1;
self.next_token();
}
_ => s = 1,
}
if self.peek_token().kind != TokenKind::Literal(TokenLiteral::Int) {
break;
}
exp = self.parse_int()? * s;
break;
}
_ => {
// c is always ['0'..='9']
let d = c.to_digit(10)? as f32;
if decimal {
fraction *= 10.0;
fraction += d;
prec += 1;
} else {
float *= 10.0;
float += d;
}
}
}
}
fraction /= 10f32.powi(prec);
float += fraction;
float *= 10f32.powi(exp);
Some(float)
}
/// char ::= "'" letter "'"
pub(super) fn parse_char(&mut self) -> Option<char> {
// the lexer ensures that the 0th and 2nd characters are both '
self.next_token().val.chars().nth(1)
}
}
#[test]
fn test_parse_literals() {
let mut parser = Parser::new("4524 3123.15e4 9e2 9083482.429455 'c' 3331.13.1");
assert_eq!(parser.parse_int(), Some(4524));
assert_eq!(parser.parse_float(), Some(3123.15e4));
assert_eq!(parser.parse_float(), Some(9e2));
assert_eq!(parser.parse_float(), Some(9083482.429455));
assert_eq!(parser.parse_char(), Some('c'));
assert_eq!(parser.next_token().kind, TokenKind::Invalid);
}

156
src/parser/mod.rs Normal file
View File

@@ -0,0 +1,156 @@
//! A naive parser just to get started
//!
//! Can only parse module, class and function declaration now along with let statements
mod entity;
mod expr;
mod literal;
mod statement;
use crate::ast::{
Parent,
Ty,
};
use crate::lexer::{
Lexer,
Token,
TokenKeyword,
TokenKind,
TokenSymbol,
};
use std::rc::Rc;
pub struct Parser<'a> {
pub lexer: Lexer<'a>,
}
impl<'a> Parser<'a> {
/// Creates a new [`Parser`] instance.
pub fn new(contents: &'a str) -> Parser<'a> {
Parser {
lexer: Lexer::new(contents),
}
}
#[inline]
fn error(&self, message: &str) {
eprintln!(
"Parser: {}, at \"{}:{}\"",
message, self.lexer.line, self.lexer.col
);
}
#[inline]
fn error_expected(&self, expected: &str, found: &str) {
self.error(&format!("expected {}, found {}", expected, found));
}
#[inline]
fn error_expected_peek(&mut self, expected: &str) {
let found = &Rc::clone(&self.peek_token().val);
self.error_expected(expected, found);
}
#[inline]
fn next_token(&mut self) -> Token {
let t = self.lexer.next_token();
println!("{:?}", t);
t
}
#[inline]
fn peek_token(&mut self) -> &Token {
return self.lexer.peek_token();
}
/// newline ::= "}\n"
fn trim_newlines(&mut self) {
while self.peek_token().kind == TokenKind::Newline {
self.next_token();
}
}
fn skip_token(&mut self, kind: TokenKind) -> bool {
if self.peek_token().kind == kind {
self.next_token();
return true;
}
false
}
fn check_newline_or_tok(&mut self, token: TokenKind) -> bool {
match self.peek_token().kind {
TokenKind::Newline => true,
d if d == token => true,
_ => false,
}
}
/// ty ::= "int" | "float" | "char"
fn parse_ty(&mut self) -> Option<Ty> {
let ty: Ty;
if let TokenKind::Keyword(keyword) = &self.peek_token().kind {
ty = match keyword {
TokenKeyword::Int => Ty::Int,
TokenKeyword::Char => Ty::Char,
TokenKeyword::Float => Ty::Float,
_ => {
self.error_expected_peek("ty");
return None;
}
};
} else {
self.error_expected_peek("ty");
return None;
}
self.next_token();
Some(ty)
}
/// ident ::= ( letter | "_" ) { letter | digit | "_" }
fn parse_ident(&mut self) -> Option<Rc<str>> {
if self.peek_token().kind != TokenKind::Identifier {
self.error_expected_peek("identifier");
return None;
}
Some(Rc::clone(&self.next_token().val))
}
/// identWithTy ::= letter ":" ty
fn parse_ident_with_ty(&mut self) -> Option<(Rc<str>, Ty)> {
let ident = self.parse_ident()?;
if !self.skip_token(TokenKind::Symbol(TokenSymbol::Colon)) {
self.error_expected_peek(":");
return None;
}
Some((ident, self.parse_ty()?))
}
/// Returns an [`Entity`] vector after parsing
///
/// parent ::= { entity }
/// [`Entity`]: crate::ast::Entity
pub fn parse(&mut self) -> Option<Parent> {
let mut parent = vec![];
loop {
match self.peek_token().kind {
TokenKind::Newline => self.trim_newlines(),
TokenKind::Eof => break,
_ => {
parent.push(self.parse_entity()?);
if !self.check_newline_or_tok(TokenKind::Eof) {
self.error_expected_peek("newline or end of file");
return None;
}
}
}
}
Some(parent)
}
}

91
src/parser/statement.rs Normal file
View File

@@ -0,0 +1,91 @@
use super::Parser;
use crate::ast::*;
use crate::lexer::{
TokenKeyword,
TokenKind,
TokenSymbol,
};
impl<'a> Parser<'a> {
/// statement ::= static | let | expr
pub(super) fn parse_statement(&mut self) -> Option<Statement> {
use TokenKeyword::*;
println!("STMT");
Some(match self.peek_token().kind {
TokenKind::Keyword(Static) => Statement::Static(self.parse_static()?),
TokenKind::Keyword(Let) => Statement::Let(self.parse_let()?),
_ => Statement::Expr(self.parse_expr()?),
})
}
/// static ::="static" let
pub(super) fn parse_static(&mut self) -> Option<Let> {
self.next_token();
if self.peek_token().kind != TokenKind::Keyword(TokenKeyword::Let) {
self.error_expected_peek("let");
return None;
}
self.parse_let()
}
/// let ::= "let" identWithTy "=" expr
pub(super) fn parse_let(&mut self) -> Option<Let> {
self.next_token();
let (name, ty) = self.parse_ident_with_ty()?;
let expr = if self.skip_token(TokenKind::Symbol(TokenSymbol::Eq)) {
self.parse_expr()
} else if self.peek_token().kind == TokenKind::Newline {
None
} else {
self.error_expected_peek("= or newline");
return None;
};
Some(Let { name, ty, expr })
}
}
#[test]
fn test_parse_let() {
use Literal::*;
let mut parser = Parser::new(
r#"static let test01: int = 4
let test02: char = '6'
static let test03: float
let test04 = 9"#,
);
assert_eq!(
parser.parse_static(),
Some(Let {
name: "test01".into(),
ty: Ty::Int,
expr: Some(Expr::Literal(Int(4)))
})
);
assert_eq!(parser.skip_token(TokenKind::Newline), true);
assert_eq!(
parser.parse_let(),
Some(Let {
name: "test02".into(),
ty: Ty::Char,
expr: Some(Expr::Literal(Char('6')))
})
);
assert_eq!(parser.skip_token(TokenKind::Newline), true);
assert_eq!(
parser.parse_static(),
Some(Let {
name: "test03".into(),
ty: Ty::Float,
expr: None
})
);
assert_eq!(parser.skip_token(TokenKind::Newline), true);
assert_eq!(parser.parse_let(), None);
}