From 564d90d0619a65735a022ad923cac1a8a717cd2d Mon Sep 17 00:00:00 2001 From: minish Date: Sat, 12 Jul 2025 19:51:57 -0400 Subject: [PATCH] refactor: is_next, expect_next --- Cargo.lock | 72 ++++++++++++++++++++++++++ Cargo.toml | 1 + src/lexer.rs | 13 +++-- src/parser.rs | 137 ++++++++++++++++++++------------------------------ 4 files changed, 136 insertions(+), 87 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 05bb6d4..c8bf0f5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,78 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "leaf" version = "0.1.0" +dependencies = [ + "strum", +] + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustversion" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" + +[[package]] +name = "strum" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f64def088c51c9510a8579e3c5d67c65349dcf755e5479ad3d010aa6454e2c32" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.27.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c77a8c5abcaf0f9ce05d62342b7d298c346515365c36b673df4ebe3ced01fde8" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "syn" +version = "2.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" diff --git a/Cargo.toml b/Cargo.toml index 9be7182..e309057 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,4 @@ version = "0.1.0" edition = "2024" [dependencies] +strum = { version = "0.27", features = ["derive"] } diff --git a/src/lexer.rs b/src/lexer.rs index b48e1b1..d1e4be8 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,5 +1,7 @@ use std::{fmt, iter::Peekable}; +use strum::EnumDiscriminants; + #[derive(Debug)] pub struct Ident(String); impl fmt::Display for Ident { @@ -28,7 +30,8 @@ impl fmt::Display for Literal { } } -#[derive(Debug)] +#[derive(Debug, EnumDiscriminants)] +#[strum_discriminants(name(TokenKind))] pub enum Token { Equals, @@ -46,7 +49,7 @@ pub enum Token { ParenClose, Comma, - Eol, + Semicolon, Func, If, @@ -181,7 +184,7 @@ where fn lex_whitespace(&mut self) -> Option { loop { match self.peek()? { - ' ' | '\t' | '\r' => self.eat(), + ' ' | '\t' | '\n' | '\r' => self.eat(), _ => break self.peek(), } } @@ -333,8 +336,8 @@ where continue; } - // ;, \n eol - '\n' | ';' => self.eat_to(Token::Eol), + // ; semicolon + ';' => self.eat_to(Token::Semicolon), // unexpected character c => Some(Err(LexError::UnexpectedCharacter(c))), diff --git a/src/parser.rs b/src/parser.rs index 6634218..6e09db4 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,6 +1,8 @@ use std::{fmt, iter::Peekable}; -use crate::lexer::{Associativity, LexError, Literal, Precedence, Token}; +use strum::IntoDiscriminant; + +use crate::lexer::{Associativity, LexError, Literal, Precedence, Token, TokenKind}; pub mod util; @@ -67,7 +69,6 @@ pub type Result = std::result::Result; pub struct Parser> { tokens: Peekable, - saw_eol: bool, } impl Parser where @@ -75,10 +76,7 @@ where { pub fn new(tokens: I) -> Self { let tokens = tokens.peekable(); - Self { - tokens, - saw_eol: false, - } + Self { tokens } } fn eat(&mut self) { @@ -87,29 +85,33 @@ where fn next_unwrap(&mut self) -> Token { self.try_next().unwrap() } - fn skip_eol(&mut self) -> bool { - let mut did_skip = false; - while matches!(self.tokens.peek(), Some(Token::Eol)) { - self.tokens.next(); - did_skip = true; - } - - return did_skip; - } fn try_peek(&mut self) -> Result<&Token> { - // Peek doesn't advance the token stream, so - // don't allow it to unset the EOL flag - if self.skip_eol() { - self.saw_eol = true; - } self.tokens.peek().ok_or(ParseError::UnexpectedEnd) } fn try_next(&mut self) -> Result { - self.saw_eol = self.skip_eol(); self.tokens.next().ok_or(ParseError::UnexpectedEnd) } + fn expect_next(&mut self, kind: TokenKind) -> Result<()> { + let t = self.try_next()?; + + if t.discriminant() != kind { + return Err(ParseError::UnexpectedToken(t)); + } + + Ok(()) + } + fn is_next(&mut self, kind: Option) -> bool { + match self.try_peek() { + Ok(t) if Some(t.discriminant()) == kind => true, + Ok(_) => false, + + Err(ParseError::UnexpectedEnd) if kind.is_none() => true, + Err(_) => unreachable!(), + } + } + fn parse_expr(&mut self, min_prec: Precedence, in_group: bool) -> Result> { let mut lhs = match self.try_next()? { // literal @@ -125,10 +127,11 @@ where } // start of a block Token::CurlyOpen => { - let b = self.parse_block(true)?; + let exprs = + self.parse_delimited_until(TokenKind::Semicolon, Some(TokenKind::CurlyClose))?; // skip curly brace self.eat(); - Box::new(Expr::Block(b)) + Box::new(Expr::Block(Block { exprs })) } // unary ops!! (prefix) @@ -136,20 +139,15 @@ where let prec = t.prefix_precedence().unwrap(); // parse function if matches!(t, Token::Func) { - // expect opening paren - let next = self.try_next()?; - if !matches!(next, Token::ParenOpen) { - return Err(ParseError::UnexpectedToken(next)); - } // parse args - let args = self.parse_args()?; - // expect closing paren - if !matches!(self.try_peek(), Ok(Token::ParenClose)) { - return Err(ParseError::UnexpectedToken(self.next_unwrap())); - } + self.expect_next(TokenKind::ParenOpen)?; + let args = + self.parse_delimited_until(TokenKind::Comma, Some(TokenKind::ParenClose))?; self.eat(); + // parse body let body = self.parse_expr(prec, in_group)?; + // pack Box::new(Expr::Func(args, body)) } else { @@ -194,25 +192,12 @@ where // function call Ok(Token::ParenOpen) => { - if self.saw_eol { - break; - } - // eat opening paren self.eat(); - let mut exprs = Vec::new(); - while !matches!(self.try_peek()?, Token::ParenClose) { - exprs.push(*self.parse_expr(Precedence::Min, false)?); + let exprs = + self.parse_delimited_until(TokenKind::Comma, Some(TokenKind::ParenClose))?; - // Continue if there is a comma, - // ignore closing parens - match self.try_peek()? { - Token::Comma => self.eat(), - Token::ParenClose => {} - _ => return Err(ParseError::UnexpectedToken(self.next_unwrap())), - } - } // eat closing paren self.eat(); @@ -267,49 +252,37 @@ where Ok(lhs) } - fn parse_args(&mut self) -> Result> { + + fn parse_delimited_until( + &mut self, + delim: TokenKind, + until: Option, + ) -> Result> { let mut exprs = Vec::new(); - while !matches!(self.try_peek(), Ok(Token::ParenClose)) { + + while !self.is_next(until) { + // skip delimiter + if self.is_next(Some(delim)) { + self.eat(); + continue; + } + // try to parse expr exprs.push(*self.parse_expr(Precedence::Min, false)?); - // advance - let next = self.try_next()?; - // check if its the end - if matches!(next, Token::ParenClose) { + + // check for end + if self.is_next(until) { break; } - // expect comma - if !matches!(next, Token::Comma) { - return Err(ParseError::UnexpectedToken(next)); - } + + // check for delim + self.expect_next(delim)?; } Ok(exprs) } - fn parse_block(&mut self, in_block: bool) -> Result { - let mut exprs = Vec::new(); - loop { - match self.try_peek() { - // end (block) - Ok(Token::CurlyClose) if in_block => break, - // end (stream) - Err(ParseError::UnexpectedEnd) if !in_block => break, - // try to parse expr - Ok(_) => { - exprs.push(*self.parse_expr(Precedence::Min, false)?); - // expect eol or eof - if !matches!(self.try_peek(), Err(ParseError::UnexpectedEnd)) && !self.saw_eol { - return Err(ParseError::UnexpectedToken(self.next_unwrap())); - } - } - - // invalid - Err(err) => return Err(err), - } - } + pub fn parse(&mut self) -> Result { + let exprs = self.parse_delimited_until(TokenKind::Semicolon, None)?; Ok(Block { exprs }) } - pub fn parse(&mut self) -> Result { - self.parse_block(false) - } }