From 1a721a22ac039e3f58444b1ac9b9044a6ddfdc60 Mon Sep 17 00:00:00 2001 From: minish Date: Sat, 28 Jun 2025 01:41:50 -0400 Subject: [PATCH] pratt parse everything --- src/lexer.rs | 69 +++++++++++-- src/main.rs | 4 +- src/parser.rs | 263 +++++++++++++++++++++++++++++--------------------- 3 files changed, 216 insertions(+), 120 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index dfb8cc0..6fb66cb 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,7 +1,12 @@ -use std::iter::Peekable; +use std::{fmt, iter::Peekable}; #[derive(Debug)] pub struct Ident(String); +impl fmt::Display for Ident { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} #[derive(Debug)] pub enum Literal { @@ -11,16 +16,27 @@ pub enum Literal { Nil, Ident(Ident), } +impl fmt::Display for Literal { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Literal::String(s) => write!(f, "\"{s}\""), + Literal::Integer(n) => write!(f, "{n}"), + Literal::Boolean(b) => write!(f, "{b}"), + Literal::Ident(id) => write!(f, "{id}"), + Literal::Nil => write!(f, "nil"), + } + } +} #[derive(Debug)] pub enum Token { Equals, - Add, - Multiply, - Divide, - + Plus, Minus, + Star, + Slash, + Caret, CurlyOpen, CurlyClose, @@ -47,6 +63,40 @@ pub enum Token { Literal(Literal), } +#[derive(PartialEq, Eq, PartialOrd, Ord)] +pub enum Precedence { + Min, + Equality, + Relational, + Logical, + AddSub, + MulDiv, + Pow, + Assign, +} +#[derive(PartialEq, Eq)] +pub enum Associativity { + Left, + Right, +} +impl Token { + // binop precedence ^_^ + pub fn precedence(&self) -> Option<(Precedence, Associativity)> { + Some(match self { + Token::EqualTo | Token::NotEqualTo => (Precedence::Equality, Associativity::Left), + Token::LessThan + | Token::LessThanOrEqualTo + | Token::GreaterThan + | Token::GreaterThanOrEqualTo => (Precedence::Relational, Associativity::Left), + Token::And | Token::Or => (Precedence::Logical, Associativity::Left), + Token::Plus | Token::Minus => (Precedence::AddSub, Associativity::Left), + Token::Star | Token::Slash => (Precedence::MulDiv, Associativity::Left), + Token::Caret => (Precedence::Pow, Associativity::Right), + Token::Equals => (Precedence::Assign, Associativity::Right), + _ => return None, + }) + } +} #[derive(Debug)] pub enum LexError { @@ -195,16 +245,19 @@ where ')' => self.eat_to(Token::ParenClose), // + add - '+' => self.eat_to(Token::Add), + '+' => self.eat_to(Token::Plus), // - subtract '-' => self.eat_to(Token::Minus), // * multiply - '*' => self.eat_to(Token::Multiply), + '*' => self.eat_to(Token::Star), // / divide - '/' => self.eat_to(Token::Divide), + '/' => self.eat_to(Token::Slash), + + // ^ pow + '^' => self.eat_to(Token::Caret), // , comma ',' => self.eat_to(Token::Comma), diff --git a/src/main.rs b/src/main.rs index 5230744..f8058a9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,6 +8,6 @@ fn main() { let script = std::fs::read_to_string("./start.leaf").unwrap(); let lexer = Lexer::new(script.chars()); let mut parser = Parser::new(lexer.map(Result::unwrap)); - let block = parser.parse_root().unwrap(); - println!("{block:?}"); + let block = parser.parse().unwrap(); + println!("{block}"); } diff --git a/src/parser.rs b/src/parser.rs index 11c47a2..5b7f020 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,14 +1,17 @@ -use std::iter::Peekable; +use std::{ + fmt::{self, Pointer}, + iter::Peekable, +}; -use crate::lexer::{Ident, LexError, Literal, Token}; +use crate::lexer::{Associativity, Ident, LexError, Literal, Precedence, Token}; #[derive(Debug)] pub enum Expr { // Data and variables - Assignment(Ident, Box), + Assignment(Box, Box), Literal(Literal), // Control flow - Call(Ident, Vec), + Call(Box, Vec), Return(Box), // Runtime datatypes Block(Block), @@ -30,12 +33,63 @@ pub enum Expr { Subtract(Box, Box), Multiply(Box, Box), Divide(Box, Box), + Exponent(Box, Box), +} +impl fmt::Display for Expr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Expr::Assignment(l, r) => write!(f, "({l} = {r})\n"), + Expr::Literal(l) => write!(f, "{l}"), + Expr::Call(l, r) => { + write!(f, "{l}(")?; + for e in r { + write!(f, "{e}, ")?; + } + write!(f, ")")?; + Ok(()) + } + Expr::Return(l) => { + write!(f, "return {l}") + } + Expr::Block(b) => { + write!(f, "{{\n")?; + for e in &b.exprs { + write!(f, "\t{e}\n")?; + } + write!(f, "}}")?; + Ok(()) + } + Expr::Negate(l) => write!(f, "(-{l})"), + Expr::Not(l) => write!(f, "(!{l})"), + Expr::EqualTo(l, r) => write!(f, "({l} == {r})"), + Expr::NotEqualTo(l, r) => write!(f, "({l} != {r})"), + Expr::And(l, r) => write!(f, "({l} && {r})"), + Expr::Or(l, r) => write!(f, "({l} !|| {r})"), + Expr::LessThan(l, r) => write!(f, "({l} < {r})"), + Expr::LessThanOrEqualTo(l, r) => write!(f, "({l} <= {r})"), + Expr::GreaterThan(l, r) => write!(f, "({l} > {r})"), + Expr::GreaterThanOrEqualTo(l, r) => write!(f, "({l} >= {r})"), + Expr::Add(l, r) => write!(f, "({l} + {r})"), + Expr::Subtract(l, r) => write!(f, "({l} - {r})"), + Expr::Multiply(l, r) => write!(f, "({l} * {r})"), + Expr::Divide(l, r) => write!(f, "({l} / {r})"), + Expr::Exponent(l, r) => write!(f, "({l} ^ {r})"), + } + } } #[derive(Debug, Default)] pub struct Block { exprs: Vec, } +impl fmt::Display for Block { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for e in &self.exprs { + e.fmt(f)?; + } + Ok(()) + } +} #[derive(Debug)] pub enum ParseError { @@ -73,124 +127,113 @@ where self.tokens.peek().ok_or(ParseError::UnexpectedEnd) } fn try_next(&mut self) -> Result { - self.tokens - .next() - .inspect(|t| println!("next= {t:?}")) - .ok_or(ParseError::UnexpectedEnd) + self.tokens.next().ok_or(ParseError::UnexpectedEnd) } - fn parse_expr(&mut self) -> Result { - Ok(match self.try_next()? { - // Assignment - Token::Literal(Literal::Ident(id)) if matches!(self.try_peek(), Ok(Token::Equals)) => { + fn parse_expr(&mut self, min_prec: Precedence, in_group: bool) -> Result> { + let mut lhs = match self.try_next()? { + // literal + Token::Literal(lit) => Box::new(Expr::Literal(lit)), + // start of group + Token::ParenOpen => { + // begin a new expr parse (group mode) + let e = self.parse_expr(Precedence::Min, true)?; + // eat closing paren self.eat(); - let rhs = self.parse_expr()?; - Expr::Assignment(id, Box::new(rhs)) + e } - - // Block call - Token::Literal(Literal::Ident(id)) - if matches!(self.try_peek(), Ok(Token::ParenOpen)) => - { - self.eat(); - let mut args = Vec::new(); - while !matches!(self.try_peek()?, Token::ParenClose) { - args.push(self.parse_expr()?); - - // require comma for next arg if it's not the end - let tk = self.try_peek()?; - if matches!(tk, Token::Comma) { - self.eat(); - } else if !matches!(tk, Token::ParenClose) { - // no comma OR closing paren.. bad... - return Err(ParseError::UnexpectedToken(self.next_unwrap())); - } - } - // Eat closing paren - self.eat(); - Expr::Call(id, args) - } - - Token::Literal(lit) => match self.try_peek() { - // Binary Op: equal to (lit, expr) - Ok(Token::EqualTo) => { - self.eat(); - Expr::EqualTo(Box::new(Expr::Literal(lit)), Box::new(self.parse_expr()?)) - } - // Binary Op: not equal to (lit, expr) - Ok(Token::NotEqualTo) => { - self.eat(); - Expr::NotEqualTo(Box::new(Expr::Literal(lit)), Box::new(self.parse_expr()?)) - } - - // Binary Op: less than (lit, expr) - Ok(Token::LessThan) => { - self.eat(); - Expr::LessThan(Box::new(Expr::Literal(lit)), Box::new(self.parse_expr()?)) - } - // Binary Op: less than or equal to (lit, expr) - Ok(Token::LessThanOrEqualTo) => { - self.eat(); - Expr::LessThanOrEqualTo( - Box::new(Expr::Literal(lit)), - Box::new(self.parse_expr()?), - ) - } - // Binary Op: greater than (lit, expr) - Ok(Token::GreaterThan) => { - self.eat(); - Expr::GreaterThan(Box::new(Expr::Literal(lit)), Box::new(self.parse_expr()?)) - } - // Binary Op: greater than or equal to (lit, expr) - Ok(Token::GreaterThanOrEqualTo) => { - self.eat(); - Expr::GreaterThanOrEqualTo( - Box::new(Expr::Literal(lit)), - Box::new(self.parse_expr()?), - ) - } - - // Binary Op: and (lit, expr) - Ok(Token::And) => { - self.eat(); - Expr::And(Box::new(Expr::Literal(lit)), Box::new(self.parse_expr()?)) - } - // Binary Op: or (lit, expr) - Ok(Token::Or) => { - self.eat(); - Expr::Or(Box::new(Expr::Literal(lit)), Box::new(self.parse_expr()?)) - } - - // Literal - _ => Expr::Literal(lit), - }, - - // Unary Op: negate - Token::Minus => Expr::Negate(Box::new(self.parse_expr()?)), - // Unary Op: not - Token::Not => Expr::Not(Box::new(self.parse_expr()?)), - - // Start of a block + // start of a block Token::CurlyOpen => { - let mut exprs = Vec::new(); - while !matches!(self.try_peek()?, Token::CurlyClose) { - exprs.push(self.parse_expr()?); - } + let b = self.parse_block(true)?; + // skip curly brace self.eat(); - Expr::Block(Block { exprs }) + Box::new(Expr::Block(b)) + } + // return + Token::Return => Box::new(Expr::Return(self.parse_expr(Precedence::Min, false)?)), + // not + Token::Not => Box::new(Expr::Not(self.parse_expr(Precedence::Min, false)?)), + // unexpected token + t => return Err(ParseError::UnexpectedToken(t)), + }; + + loop { + let op = match self.try_peek() { + // end (group) + Ok(Token::ParenClose) if in_group => break, + // end (stream) + Err(_) if !in_group => break, + // operator + Ok(t) if t.precedence().is_some() => t, + // unexpected token (stop trying to parse) + Ok(_) => break, + // unexpected end + Err(err) => return Err(err), + }; + + let (prec, assoc) = op.precedence().unwrap(); + + // break if this op is meant for previous recursion + // or it's equal and we would prefer to build leftward.. + if prec < min_prec || (prec == min_prec && assoc == Associativity::Left) { + break; } - // Return - Token::Return => Expr::Return(Box::new(self.parse_expr()?)), + // we're handling this op so advance the parser + let op = self.next_unwrap(); + // parse rightward expr + let rhs = self.parse_expr(prec, in_group)?; - t => return Err(ParseError::UnexpectedToken(t)), - }) + // join to lhs + lhs = Box::new(match op { + // equality + Token::EqualTo => Expr::EqualTo(lhs, rhs), + Token::NotEqualTo => Expr::NotEqualTo(lhs, rhs), + // relational + Token::LessThan => Expr::LessThan(lhs, rhs), + Token::LessThanOrEqualTo => Expr::LessThan(lhs, rhs), + Token::GreaterThan => Expr::LessThan(lhs, rhs), + Token::GreaterThanOrEqualTo => Expr::LessThan(lhs, rhs), + // logical + Token::And => Expr::And(lhs, rhs), + Token::Or => Expr::Or(lhs, rhs), + // add, subtract + Token::Plus => Expr::Add(lhs, rhs), + Token::Minus => Expr::Subtract(lhs, rhs), + // multiply, divide + Token::Star => Expr::Multiply(lhs, rhs), + Token::Slash => Expr::Divide(lhs, rhs), + // exponent + Token::Caret => Expr::Exponent(lhs, rhs), + // assignment + Token::Equals => Expr::Assignment(lhs, rhs), + // unreachable as all tokens with precedences are covered above + _ => unreachable!(), + }); + } + + Ok(lhs) } - pub fn parse_root(&mut self) -> Result { + fn parse_block(&mut self, in_block: bool) -> Result { let mut exprs = Vec::new(); - while self.try_peek().is_ok() { - exprs.push(self.parse_expr()?); + loop { + match self.try_peek() { + // end (block) + Ok(Token::CurlyClose) if in_block => break, + // end (stream) lpwkey idk if this is a good way to check for error + // need to add error nodes anyway so whatever + Err(ParseError::UnexpectedEnd) if !in_block => break, + + // try to parse expr + Ok(_) => exprs.push(*self.parse_expr(Precedence::Min, false)?), + + // invalid + Err(err) => return Err(err), + } } Ok(Block { exprs }) } + pub fn parse(&mut self) -> Result { + self.parse_block(false) + } }