use std::{ fmt, iter::Peekable, num::{ParseFloatError, ParseIntError}, }; use crate::kinds; #[derive(Debug)] pub struct Ident(String); impl fmt::Display for Ident { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.0.fmt(f) } } #[derive(Debug)] pub enum Literal { String(String), Integer(i64), Float(f64), Boolean(bool), Nil, Ident(Ident), } impl fmt::Display for Literal { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Literal::String(s) => write!(f, "\"{s}\""), Literal::Integer(n) => write!(f, "{n}"), Literal::Float(n) => write!(f, "{n}"), Literal::Boolean(b) => write!(f, "{b}"), Literal::Ident(id) => write!(f, "{id}"), Literal::Nil => write!(f, "nil"), } } } kinds!( Token, TokenKind, Equals, Plus, Minus, Star, Slash, Percent, StarStar, PlusEquals, MinusEquals, StarEquals, SlashEquals, CurlyOpen, CurlyClose, ParenOpen, ParenClose, Comma, Eol, Func, If, Else, Return, Not, EqualTo, NotEqualTo, And, Or, LessThan, LessThanOrEqualTo, GreaterThan, GreaterThanOrEqualTo, Literal(Literal = Literal::Nil), ); #[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] pub enum Precedence { Min, Assign, WithAssign, Logical, Equality, Relational, AddSub, MulDivMod, Pow, Prefix, } #[derive(PartialEq, Eq)] pub enum Associativity { Left, Right, } impl Token { pub fn prefix_precedence(&self) -> Option { Some(match self { Token::Return | Token::If | Token::Func | Token::Minus | Token::Not => { Precedence::Prefix } _ => return None, }) } pub fn infix_precedence(&self) -> Option<(Precedence, Associativity)> { Some(match self { Token::EqualTo | Token::NotEqualTo => (Precedence::Equality, Associativity::Left), Token::LessThan | Token::LessThanOrEqualTo | Token::GreaterThan | Token::GreaterThanOrEqualTo => (Precedence::Relational, Associativity::Left), Token::And | Token::Or => (Precedence::Logical, Associativity::Left), Token::Plus | Token::Minus => (Precedence::AddSub, Associativity::Left), Token::Star | Token::Slash | Token::Percent => { (Precedence::MulDivMod, Associativity::Left) } Token::StarStar => (Precedence::Pow, Associativity::Right), Token::Equals => (Precedence::Assign, Associativity::Right), Token::PlusEquals | Token::MinusEquals | Token::StarEquals | Token::SlashEquals => { (Precedence::WithAssign, Associativity::Right) } _ => return None, }) } } #[derive(Debug)] pub enum LexError { InvalidInteger(ParseIntError), InvalidFloat(ParseFloatError), InvalidEscape(char), UnexpectedCharacter(char), UnexpectedEnd, } impl fmt::Display for LexError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::InvalidInteger(err) => write!(f, "invalid integer: {err}"), Self::InvalidFloat(err) => write!(f, "invalid float: {err}"), Self::UnexpectedEnd => write!(f, "unexpected end of source"), Self::UnexpectedCharacter(c) => write!(f, "unexpected char '{c}'"), Self::InvalidEscape(c) => write!(f, "\"\\{c}\" is not a valid string escape"), } } } impl From for LexError { fn from(err: ParseIntError) -> Self { Self::InvalidInteger(err) } } impl From for LexError { fn from(err: ParseFloatError) -> Self { Self::InvalidFloat(err) } } pub type Result = std::result::Result; pub struct Lexer where I: Iterator, { chars: Peekable, } fn t(tk: Token) -> Option> { Some(Ok(tk)) } impl Lexer where I: Iterator, { pub fn new(chars: I) -> Self { let chars = chars.peekable(); Self { chars } } fn peek(&mut self) -> Option { self.chars.peek().copied() } fn next(&mut self) -> Option { self.chars.next() } fn next_unwrap(&mut self) -> char { match self.next() { Some(c) => c, None => unreachable!("called next_unwrap with nothing ahead"), } } fn try_peek(&mut self) -> Result { self.peek().ok_or(LexError::UnexpectedEnd) } fn try_eat_peek(&mut self) -> Result { self.eat_peek().ok_or(LexError::UnexpectedEnd) } fn eat(&mut self) { self.next(); } fn eat_to(&mut self, tk: Token) -> Option> { self.eat(); Some(Ok(tk)) } fn eat_peek(&mut self) -> Option { self.eat(); self.peek() } fn lex_whitespace(&mut self) -> Option { loop { match self.peek()? { ' ' | '\t' | '\n' | '\r' => self.eat(), _ => break self.peek(), } } } fn lex_word(&mut self) -> Token { let mut word = String::new(); while let Some('a'..='z' | 'A'..='Z' | '0'..='9' | '_') = self.peek() { word.push(self.next_unwrap()); } match word.as_str() { "func" => Token::Func, "if" => Token::If, "else" => Token::Else, "return" => Token::Return, "true" => Token::Literal(Literal::Boolean(true)), "false" => Token::Literal(Literal::Boolean(false)), "nil" => Token::Literal(Literal::Nil), _ => Token::Literal(Literal::Ident(Ident(word))), } } fn lex_number(&mut self) -> Result { let mut n_str = String::new(); // we don't lex negatives. the impl for that is // a negation of a positive number at runtime. // maybe that's kind of stupid though, lol let mut is_float = false; while let Some('0'..='9' | '.') = self.peek() { if self.peek() == Some('.') { is_float = true; } n_str.push(self.next_unwrap()); } let lit = if is_float { Literal::Float(n_str.parse()?) } else { Literal::Integer(n_str.parse()?) }; Ok(Token::Literal(lit)) } fn lex_string(&mut self) -> Result { let delim = self.next_unwrap(); let mut str = String::new(); loop { match self.try_peek()? { '\\' => match self.try_eat_peek()? { 'n' => { self.eat(); str.push('\n'); } c => { break Err(LexError::InvalidEscape(c)); } }, c if c == delim => { self.eat(); break Ok(Token::Literal(Literal::String(str))); } _ => str.push(self.next_unwrap()), } } } fn lex(&mut self) -> Option> { loop { break match self.lex_whitespace()? { // { and } start/end of code block '{' => self.eat_to(Token::CurlyOpen), '}' => self.eat_to(Token::CurlyClose), // ( and ) start/end of groups '(' => self.eat_to(Token::ParenOpen), ')' => self.eat_to(Token::ParenClose), // + add // or += add eq '+' => match self.eat_peek() { Some('=') => self.eat_to(Token::PlusEquals), _ => t(Token::Plus), }, // - subtract // or -= sub eq '-' => match self.eat_peek() { Some('=') => self.eat_to(Token::MinusEquals), _ => t(Token::Minus), }, // * multiply // or *= mult eq // or ** pow '*' => match self.eat_peek() { Some('=') => self.eat_to(Token::StarEquals), Some('*') => self.eat_to(Token::StarStar), _ => t(Token::Star), }, // / divide // or /= div eq // or // comment '/' => match self.eat_peek() { Some('=') => self.eat_to(Token::SlashEquals), Some('/') => { // skip the rest of the line // this leaves the newline btw while !matches!(self.peek(), Some('\n') | None) { self.eat(); } continue; } _ => t(Token::Slash), }, // % modulo '%' => self.eat_to(Token::Percent), // , comma ',' => self.eat_to(Token::Comma), // = equals // or == equal to '=' => match self.eat_peek() { Some('=') => self.eat_to(Token::EqualTo), _ => t(Token::Equals), }, // ! not // or != not equal to '!' => match self.eat_peek() { Some('=') => self.eat_to(Token::NotEqualTo), _ => t(Token::Not), }, // && and '&' if matches!(self.eat_peek(), Some('&')) => self.eat_to(Token::And), // || or '|' if matches!(self.eat_peek(), Some('|')) => self.eat_to(Token::Or), // > greater than // or >= greater than/equal to '>' => match self.eat_peek() { Some('=') => self.eat_to(Token::GreaterThanOrEqualTo), _ => t(Token::GreaterThan), }, // < less than // or <= less than/equal to '<' => match self.eat_peek() { Some('=') => self.eat_to(Token::LessThanOrEqualTo), _ => t(Token::LessThan), }, // a-zA-Z_ start of word 'a'..='z' | 'A'..='Z' | '_' => Some(Ok(self.lex_word())), // 0-9 integer '0'..='9' | '.' => Some(self.lex_number()), // " strings '"' => Some(self.lex_string()), // unexpected character c => Some(Err(LexError::UnexpectedCharacter(c))), }; } } } impl Iterator for Lexer where T: Iterator, { type Item = Result; fn next(&mut self) -> Option { self.lex() } }