use std::iter::Peekable; #[derive(Debug)] pub struct Ident(String); #[derive(Debug)] pub enum Literal { String(String), Integer(i64), Boolean(bool), Nil, } #[derive(Debug)] pub enum Token { Equals, Add, Multiply, Divide, Minus, CurlyOpen, CurlyClose, ParenOpen, ParenClose, Comma, Return, Not, EqualTo, NotEqualTo, And, Or, LessThan, LessThanOrEqualTo, GreaterThan, GreaterThanOrEqualTo, Ident(Ident), Literal(Literal), } #[derive(Debug)] pub enum LexError { InvalidEscape(char), UnexpectedCharacter(char), UnexpectedEnd, } trait Check { fn check(self) -> Result; } impl Check for Option { fn check(self) -> Result { self.ok_or(LexError::UnexpectedEnd) } } pub type Result = std::result::Result; pub struct Lexer where I: Iterator, { chars: Peekable, } impl Lexer where I: Iterator, { pub fn new(chars: I) -> Self { let chars = chars.peekable(); Self { chars } } fn peek(&mut self) -> Option { self.chars.peek().copied() } fn next(&mut self) -> Option { self.chars.next() } fn next_unwrap(&mut self) -> char { match self.next() { Some(c) => c, None => unreachable!("called next_unwrap with nothing ahead"), } } fn eat(&mut self) { self.next(); } fn eat_to(&mut self, tk: Token) -> Option> { self.eat(); Some(Ok(tk)) } fn eat_peek(&mut self) -> Option { self.eat(); self.peek() } fn lex_whitespace(&mut self) -> Option { loop { match self.peek()? { ' ' | '\t' | '\n' | '\r' => self.eat(), _ => break self.peek(), } } } fn lex_word(&mut self) -> Token { let mut word = String::new(); while let Some('a'..='z' | 'A'..='Z' | '0'..='9' | '_') = self.peek() { word.push(self.next_unwrap()); } match word.as_str() { "return" => Token::Return, "true" => Token::Literal(Literal::Boolean(true)), "false" => Token::Literal(Literal::Boolean(false)), "nil" => Token::Literal(Literal::Nil), _ => Token::Ident(Ident(word)), } } fn lex_integer(&mut self) -> Token { let mut n_str = String::new(); // we don't lex negatives. the impl for that is // a negation of a positive number at runtime. // maybe that's kind of stupid though, lol while let Some('0'..='9') = self.peek() { n_str.push(self.next_unwrap()); } // we can only read digits 0 to 9 so this should not fail // .. unless we overflow let n = n_str.parse().unwrap(); Token::Literal(Literal::Integer(n)) } fn lex_string(&mut self) -> Result { let delim = self.next_unwrap(); let mut str = String::new(); loop { match self.peek().check()? { '\\' => match self.eat_peek().check()? { 'n' => { self.eat(); str.push('\n') } c => { break Err(LexError::InvalidEscape(c)); } }, c if c == delim => { self.eat(); break Ok(Token::Literal(Literal::String(str))); } _ => str.push(self.next_unwrap()), } } } fn lex_comment(&mut self) -> Option> { while self.peek() != Some('\n') { self.eat(); } self.lex() } fn lex(&mut self) -> Option> { match self.lex_whitespace()? { // { and } start/end of code block '{' => self.eat_to(Token::CurlyOpen), '}' => self.eat_to(Token::CurlyClose), // ( and ) start/end of parens (idk) '(' => self.eat_to(Token::ParenOpen), ')' => self.eat_to(Token::ParenClose), // + add '+' => self.eat_to(Token::Add), // - subtract '-' => self.eat_to(Token::Minus), // * multiply '*' => self.eat_to(Token::Multiply), // / divide '/' => self.eat_to(Token::Divide), // , comma ',' => self.eat_to(Token::Comma), // = equals // or == equal to '=' => match self.eat_peek() { Some('=') => self.eat_to(Token::EqualTo), _ => Some(Ok(Token::Equals)), }, // ! not // or != not equal to '!' => match self.eat_peek() { Some('=') => self.eat_to(Token::NotEqualTo), _ => Some(Ok(Token::Not)), }, // && and '&' if matches!(self.eat_peek(), Some('&')) => self.eat_to(Token::And), // || or '|' if matches!(self.eat_peek(), Some('|')) => self.eat_to(Token::Or), // > greater than // or >= greater than/equal to '>' => match self.eat_peek() { Some('=') => self.eat_to(Token::GreaterThanOrEqualTo), _ => Some(Ok(Token::GreaterThan)), }, // < less than // or <= less than/equal to '<' => match self.eat_peek() { Some('=') => self.eat_to(Token::LessThanOrEqualTo), _ => Some(Ok(Token::LessThan)), }, // a-zA-Z_ start of word 'a'..='z' | 'A'..='Z' | '_' => Some(Ok(self.lex_word())), // 0-9 integer '0'..='9' => Some(Ok(self.lex_integer())), // " strings '"' => Some(self.lex_string()), // # comments '#' => self.lex_comment(), // unexpected character c => Some(Err(LexError::UnexpectedCharacter(c))), } } } impl Iterator for Lexer where T: Iterator, { type Item = Result; fn next(&mut self) -> Option { self.lex() } }