From 74095ffaf9803a2976c0d5ade0445dd537e400f4 Mon Sep 17 00:00:00 2001 From: minish Date: Fri, 27 Jun 2025 03:03:28 -0400 Subject: [PATCH] wip --- .gitignore | 1 + Cargo.lock | 7 ++ Cargo.toml | 6 ++ src/lexer.rs | 273 +++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 13 +++ src/parser.rs | 251 +++++++++++++++++++++++++++++++++++++++++++++ src/runtime.rs | 18 ++++ 7 files changed, 569 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/lexer.rs create mode 100644 src/main.rs create mode 100644 src/parser.rs create mode 100644 src/runtime.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..05bb6d4 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "leaf" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..9be7182 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "leaf" +version = "0.1.0" +edition = "2024" + +[dependencies] diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..99f5576 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,273 @@ +use std::iter::Peekable; + +#[derive(Debug)] +pub struct Ident(String); + +#[derive(Debug)] +pub enum Literal { + String(String), + Integer(i64), + Boolean(bool), + Nil, +} + +#[derive(Debug)] +pub enum Token { + Equals, + + Add, + Multiply, + Divide, + + Minus, + + CurlyOpen, + CurlyClose, + + ParenOpen, + ParenClose, + + Comma, + + Return, + + Not, + + EqualTo, + NotEqualTo, + + And, + Or, + + LessThan, + LessThanOrEqualTo, + GreaterThan, + GreaterThanOrEqualTo, + + Ident(Ident), + Literal(Literal), +} + +#[derive(Debug)] +pub enum LexError { + InvalidEscape(char), + UnexpectedCharacter(char), + UnexpectedEnd, +} +trait Check { + fn check(self) -> Result; +} +impl Check for Option { + fn check(self) -> Result { + self.ok_or(LexError::UnexpectedEnd) + } +} + +pub type Result = std::result::Result; + +pub struct Lexer +where + I: Iterator, +{ + chars: Peekable, +} + +impl Lexer +where + I: Iterator, +{ + pub fn new(chars: I) -> Self { + let chars = chars.peekable(); + Self { chars } + } + + fn peek(&mut self) -> Option { + self.chars.peek().copied() + } + + fn next(&mut self) -> Option { + self.chars.next() + } + + fn next_unwrap(&mut self) -> char { + match self.next() { + Some(c) => c, + None => unreachable!("called next_unwrap with nothing ahead"), + } + } + + fn eat(&mut self) { + self.next(); + } + + fn eat_to(&mut self, tk: Token) -> Option> { + self.eat(); + Some(Ok(tk)) + } + + fn eat_peek(&mut self) -> Option { + self.eat(); + self.peek() + } + + fn lex_whitespace(&mut self) -> Option { + loop { + match self.peek()? { + ' ' | '\t' | '\n' | '\r' => self.eat(), + _ => break self.peek(), + } + } + } + + fn lex_word(&mut self) -> Token { + let mut word = String::new(); + + while let Some('a'..='z' | 'A'..='Z' | '0'..='9' | '_') = self.peek() { + word.push(self.next_unwrap()); + } + + match word.as_str() { + "return" => Token::Return, + "true" => Token::Literal(Literal::Boolean(true)), + "false" => Token::Literal(Literal::Boolean(false)), + "nil" => Token::Literal(Literal::Nil), + _ => Token::Ident(Ident(word)), + } + } + + fn lex_integer(&mut self) -> Token { + let mut n_str = String::new(); + + // we don't lex negatives. the impl for that is + // a negation of a positive number at runtime. + // maybe that's kind of stupid though, lol + while let Some('0'..='9') = self.peek() { + n_str.push(self.next_unwrap()); + } + + // we can only read digits 0 to 9 so this should not fail + // .. unless we overflow + let n = n_str.parse().unwrap(); + + Token::Literal(Literal::Integer(n)) + } + + fn lex_string(&mut self) -> Result { + let delim = self.next_unwrap(); + + let mut str = String::new(); + + loop { + match self.peek().check()? { + '\\' => match self.eat_peek().check()? { + 'n' => { + self.eat(); + str.push('\n') + } + c => { + break Err(LexError::InvalidEscape(c)); + } + }, + c if c == delim => { + self.eat(); + break Ok(Token::Literal(Literal::String(str))); + } + _ => str.push(self.next_unwrap()), + } + } + } + + fn lex_comment(&mut self) -> Option> { + while self.peek() != Some('\n') { + self.eat(); + } + self.lex() + } + + fn lex(&mut self) -> Option> { + match self.lex_whitespace()? { + // { and } start/end of code block + '{' => self.eat_to(Token::CurlyOpen), + '}' => self.eat_to(Token::CurlyClose), + + // ( and ) start/end of parens (idk) + '(' => self.eat_to(Token::ParenOpen), + ')' => self.eat_to(Token::ParenClose), + + // + add + '+' => self.eat_to(Token::Add), + + // - subtract + '-' => self.eat_to(Token::Minus), + + // * multiply + '*' => self.eat_to(Token::Multiply), + + // / divide + '/' => self.eat_to(Token::Divide), + + // , comma + ',' => self.eat_to(Token::Comma), + + // = equals + // or == equal to + '=' => match self.eat_peek() { + Some('=') => self.eat_to(Token::EqualTo), + _ => Some(Ok(Token::Equals)), + }, + + // ! not + // or != not equal to + '!' => match self.eat_peek() { + Some('=') => self.eat_to(Token::NotEqualTo), + _ => Some(Ok(Token::Not)), + }, + + // && and + '&' if matches!(self.eat_peek(), Some('&')) => self.eat_to(Token::And), + + // || or + '|' if matches!(self.eat_peek(), Some('|')) => self.eat_to(Token::Or), + + // > greater than + // or >= greater than/equal to + '>' => match self.eat_peek() { + Some('=') => self.eat_to(Token::GreaterThanOrEqualTo), + _ => Some(Ok(Token::GreaterThan)), + }, + + // < less than + // or <= less than/equal to + '<' => match self.eat_peek() { + Some('=') => self.eat_to(Token::LessThanOrEqualTo), + _ => Some(Ok(Token::LessThan)), + }, + + // a-zA-Z_ start of word + 'a'..='z' | 'A'..='Z' | '_' => Some(Ok(self.lex_word())), + + // 0-9 integer + '0'..='9' => Some(Ok(self.lex_integer())), + + // " strings + '"' => Some(self.lex_string()), + + // # comments + '#' => self.lex_comment(), + + // unexpected character + c => Some(Err(LexError::UnexpectedCharacter(c))), + } + } +} + +impl Iterator for Lexer +where + T: Iterator, +{ + type Item = Result; + + fn next(&mut self) -> Option { + self.lex() + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..5230744 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,13 @@ +use crate::{lexer::Lexer, parser::Parser}; + +mod lexer; +mod parser; +mod runtime; + +fn main() { + let script = std::fs::read_to_string("./start.leaf").unwrap(); + let lexer = Lexer::new(script.chars()); + let mut parser = Parser::new(lexer.map(Result::unwrap)); + let block = parser.parse_root().unwrap(); + println!("{block:?}"); +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..cced783 --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,251 @@ +use std::iter::Peekable; + +use crate::lexer::{Ident, LexError, Literal, Token}; + +#[derive(Debug)] +pub enum Expr { + // Data and variables + Assignment(Ident, Box), + Literal(Literal), + Variable(Ident), + // Control flow + Call(Ident, Vec), + Return(Box), + // Runtime datatypes + Block(Block), + // Unary operations + Negate(Box), + Not(Box), + // Binary operations: logical + EqualTo(Box, Box), + NotEqualTo(Box, Box), + And(Box, Box), + Or(Box, Box), + // Binary operations: comparison + LessThan(Box, Box), + LessThanOrEqualTo(Box, Box), + GreaterThan(Box, Box), + GreaterThanOrEqualTo(Box, Box), + // Binary operations: arithmetic + Add(Box, Box), + Subtract(Box, Box), + Multiply(Box, Box), + Divide(Box, Box), +} + +#[derive(Debug, Default)] +pub struct Block { + exprs: Vec, +} + +#[derive(Debug)] +pub enum ParseError { + UnexpectedToken(Token), + UnexpectedEnd, + LexError(LexError), +} +impl From for ParseError { + fn from(err: LexError) -> Self { + Self::LexError(err) + } +} + +pub type Result = std::result::Result; + +pub struct Parser> { + tokens: Peekable, +} +impl Parser +where + I: Iterator, +{ + pub fn new(tokens: I) -> Self { + let tokens = tokens.peekable(); + Self { tokens } + } + + fn eat(&mut self) { + self.next_unwrap(); + } + fn next_unwrap(&mut self) -> Token { + self.try_next().unwrap() + } + fn try_peek(&mut self) -> Result<&Token> { + self.tokens.peek().ok_or(ParseError::UnexpectedEnd) + } + fn try_next(&mut self) -> Result { + self.tokens + .next() + .inspect(|t| println!("next= {t:?}")) + .ok_or(ParseError::UnexpectedEnd) + } + + fn parse_expr(&mut self) -> Result { + Ok(match self.try_next()? { + // Literal + Token::Literal(lit) => match self.try_peek() { + // Binary Op: equal to (lit, expr) + Ok(Token::EqualTo) => { + self.eat(); + Expr::EqualTo(Box::new(Expr::Literal(lit)), Box::new(self.parse_expr()?)) + } + // Binary Op: not equal to (lit, expr) + Ok(Token::NotEqualTo) => { + self.eat(); + Expr::NotEqualTo(Box::new(Expr::Literal(lit)), Box::new(self.parse_expr()?)) + } + + // Binary Op: less than (lit, expr) + Ok(Token::LessThan) => { + self.eat(); + Expr::LessThan(Box::new(Expr::Literal(lit)), Box::new(self.parse_expr()?)) + } + // Binary Op: less than or equal to (lit, expr) + Ok(Token::LessThanOrEqualTo) => { + self.eat(); + Expr::LessThanOrEqualTo( + Box::new(Expr::Literal(lit)), + Box::new(self.parse_expr()?), + ) + } + // Binary Op: greater than (lit, expr) + Ok(Token::GreaterThan) => { + Expr::GreaterThan(Box::new(Expr::Literal(lit)), Box::new(self.parse_expr()?)) + } + // Binary Op: greater than or equal to (lit, expr) + Ok(Token::GreaterThanOrEqualTo) => { + self.eat(); + Expr::GreaterThanOrEqualTo( + Box::new(Expr::Literal(lit)), + Box::new(self.parse_expr()?), + ) + } + + // Binary Op: and (lit, expr) + Ok(Token::And) => { + self.eat(); + Expr::And(Box::new(Expr::Literal(lit)), Box::new(self.parse_expr()?)) + } + // Binary Op: or (lit, expr) + Ok(Token::Or) => { + self.eat(); + Expr::Or(Box::new(Expr::Literal(lit)), Box::new(self.parse_expr()?)) + } + + _ => Expr::Literal(lit), + }, + + // Unary Op: negate + Token::Minus => Expr::Negate(Box::new(self.parse_expr()?)), + // Unary Op: not + Token::Not => Expr::Not(Box::new(self.parse_expr()?)), + + // Start of a block + Token::CurlyOpen => { + let mut exprs = Vec::new(); + while !matches!(self.try_peek()?, Token::CurlyClose) { + exprs.push(self.parse_expr()?); + } + self.eat(); + Expr::Block(Block { exprs }) + } + + // Return + Token::Return => Expr::Return(Box::new(self.parse_expr()?)), + + Token::Ident(id) => { + match self.try_peek() { + // Assignment + Ok(Token::Equals) => { + self.eat(); + let rhs = self.parse_expr()?; + Expr::Assignment(id, Box::new(rhs)) + } + + // Block call + Ok(Token::ParenOpen) => { + self.eat(); + let mut args = Vec::new(); + while !matches!(self.try_peek()?, Token::ParenClose) { + args.push(self.parse_expr()?); + + // require comma for next arg if it's not the end + let tk = self.try_peek()?; + if matches!(tk, Token::Comma) { + self.eat(); + } else if !matches!(tk, Token::ParenClose) { + // no comma OR closing paren.. bad... + return Err(ParseError::UnexpectedToken(self.next_unwrap())); + } + } + // Eat closing paren + self.eat(); + Expr::Call(id, args) + } + + // Binary Op: equal to (var, expr) + Ok(Token::EqualTo) => { + self.eat(); + Expr::EqualTo(Box::new(Expr::Variable(id)), Box::new(self.parse_expr()?)) + } + // Binary Op: not equal to (var, expr) + Ok(Token::NotEqualTo) => { + self.eat(); + Expr::NotEqualTo(Box::new(Expr::Variable(id)), Box::new(self.parse_expr()?)) + } + + // Binary Op: less than (var, expr) + Ok(Token::LessThan) => { + self.eat(); + Expr::LessThan(Box::new(Expr::Variable(id)), Box::new(self.parse_expr()?)) + } + // Binary Op: less than or equal to (var, expr) + Ok(Token::LessThanOrEqualTo) => { + self.eat(); + Expr::LessThanOrEqualTo( + Box::new(Expr::Variable(id)), + Box::new(self.parse_expr()?), + ) + } + // Binary Op: greater than (var, expr) + Ok(Token::GreaterThan) => { + self.eat(); + Expr::GreaterThan( + Box::new(Expr::Variable(id)), + Box::new(self.parse_expr()?), + ) + } + // Binary Op: greater than or equal to (var, expr) + Ok(Token::GreaterThanOrEqualTo) => { + self.eat(); + Expr::GreaterThanOrEqualTo( + Box::new(Expr::Variable(id)), + Box::new(self.parse_expr()?), + ) + } + + // Binary Op: and (var, expr) + Ok(Token::And) => { + self.eat(); + Expr::And(Box::new(Expr::Variable(id)), Box::new(self.parse_expr()?)) + } + // Binary Op: or (var, expr) + Ok(Token::Or) => { + self.eat(); + Expr::Or(Box::new(Expr::Variable(id)), Box::new(self.parse_expr()?)) + } + + _ => Expr::Variable(id), + } + } + t => return Err(ParseError::UnexpectedToken(t)), + }) + } + pub fn parse_root(&mut self) -> Result { + let mut exprs = Vec::new(); + while self.try_peek().is_ok() { + exprs.push(self.parse_expr()?); + } + Ok(Block { exprs }) + } +} diff --git a/src/runtime.rs b/src/runtime.rs new file mode 100644 index 0000000..0efd100 --- /dev/null +++ b/src/runtime.rs @@ -0,0 +1,18 @@ +use crate::parser::{Block, Expr}; + +enum Value { + String(String), + Integer(i64), + Boolean(bool), + Nil, + Block(Block), +} + +fn eval(e: Expr) -> Value { + todo!() +} + +/// Evaluates all expressions of a block. +pub fn exec(b: Block) -> Value { + todo!() +}