leaf/src/lexer.rs

274 lines
6.5 KiB
Rust

use std::iter::Peekable;
#[derive(Debug)]
pub struct Ident(String);
#[derive(Debug)]
pub enum Literal {
String(String),
Integer(i64),
Boolean(bool),
Nil,
}
#[derive(Debug)]
pub enum Token {
Equals,
Add,
Multiply,
Divide,
Minus,
CurlyOpen,
CurlyClose,
ParenOpen,
ParenClose,
Comma,
Return,
Not,
EqualTo,
NotEqualTo,
And,
Or,
LessThan,
LessThanOrEqualTo,
GreaterThan,
GreaterThanOrEqualTo,
Ident(Ident),
Literal(Literal),
}
#[derive(Debug)]
pub enum LexError {
InvalidEscape(char),
UnexpectedCharacter(char),
UnexpectedEnd,
}
trait Check {
fn check(self) -> Result<char>;
}
impl Check for Option<char> {
fn check(self) -> Result<char> {
self.ok_or(LexError::UnexpectedEnd)
}
}
pub type Result<T> = std::result::Result<T, LexError>;
pub struct Lexer<I>
where
I: Iterator<Item = char>,
{
chars: Peekable<I>,
}
impl<I> Lexer<I>
where
I: Iterator<Item = char>,
{
pub fn new(chars: I) -> Self {
let chars = chars.peekable();
Self { chars }
}
fn peek(&mut self) -> Option<char> {
self.chars.peek().copied()
}
fn next(&mut self) -> Option<char> {
self.chars.next()
}
fn next_unwrap(&mut self) -> char {
match self.next() {
Some(c) => c,
None => unreachable!("called next_unwrap with nothing ahead"),
}
}
fn eat(&mut self) {
self.next();
}
fn eat_to(&mut self, tk: Token) -> Option<Result<Token>> {
self.eat();
Some(Ok(tk))
}
fn eat_peek(&mut self) -> Option<char> {
self.eat();
self.peek()
}
fn lex_whitespace(&mut self) -> Option<char> {
loop {
match self.peek()? {
' ' | '\t' | '\n' | '\r' => self.eat(),
_ => break self.peek(),
}
}
}
fn lex_word(&mut self) -> Token {
let mut word = String::new();
while let Some('a'..='z' | 'A'..='Z' | '0'..='9' | '_') = self.peek() {
word.push(self.next_unwrap());
}
match word.as_str() {
"return" => Token::Return,
"true" => Token::Literal(Literal::Boolean(true)),
"false" => Token::Literal(Literal::Boolean(false)),
"nil" => Token::Literal(Literal::Nil),
_ => Token::Ident(Ident(word)),
}
}
fn lex_integer(&mut self) -> Token {
let mut n_str = String::new();
// we don't lex negatives. the impl for that is
// a negation of a positive number at runtime.
// maybe that's kind of stupid though, lol
while let Some('0'..='9') = self.peek() {
n_str.push(self.next_unwrap());
}
// we can only read digits 0 to 9 so this should not fail
// .. unless we overflow
let n = n_str.parse().unwrap();
Token::Literal(Literal::Integer(n))
}
fn lex_string(&mut self) -> Result<Token> {
let delim = self.next_unwrap();
let mut str = String::new();
loop {
match self.peek().check()? {
'\\' => match self.eat_peek().check()? {
'n' => {
self.eat();
str.push('\n')
}
c => {
break Err(LexError::InvalidEscape(c));
}
},
c if c == delim => {
self.eat();
break Ok(Token::Literal(Literal::String(str)));
}
_ => str.push(self.next_unwrap()),
}
}
}
fn lex_comment(&mut self) -> Option<Result<Token>> {
while self.peek() != Some('\n') {
self.eat();
}
self.lex()
}
fn lex(&mut self) -> Option<Result<Token>> {
match self.lex_whitespace()? {
// { and } start/end of code block
'{' => self.eat_to(Token::CurlyOpen),
'}' => self.eat_to(Token::CurlyClose),
// ( and ) start/end of parens (idk)
'(' => self.eat_to(Token::ParenOpen),
')' => self.eat_to(Token::ParenClose),
// + add
'+' => self.eat_to(Token::Add),
// - subtract
'-' => self.eat_to(Token::Minus),
// * multiply
'*' => self.eat_to(Token::Multiply),
// / divide
'/' => self.eat_to(Token::Divide),
// , comma
',' => self.eat_to(Token::Comma),
// = equals
// or == equal to
'=' => match self.eat_peek() {
Some('=') => self.eat_to(Token::EqualTo),
_ => Some(Ok(Token::Equals)),
},
// ! not
// or != not equal to
'!' => match self.eat_peek() {
Some('=') => self.eat_to(Token::NotEqualTo),
_ => Some(Ok(Token::Not)),
},
// && and
'&' if matches!(self.eat_peek(), Some('&')) => self.eat_to(Token::And),
// || or
'|' if matches!(self.eat_peek(), Some('|')) => self.eat_to(Token::Or),
// > greater than
// or >= greater than/equal to
'>' => match self.eat_peek() {
Some('=') => self.eat_to(Token::GreaterThanOrEqualTo),
_ => Some(Ok(Token::GreaterThan)),
},
// < less than
// or <= less than/equal to
'<' => match self.eat_peek() {
Some('=') => self.eat_to(Token::LessThanOrEqualTo),
_ => Some(Ok(Token::LessThan)),
},
// a-zA-Z_ start of word
'a'..='z' | 'A'..='Z' | '_' => Some(Ok(self.lex_word())),
// 0-9 integer
'0'..='9' => Some(Ok(self.lex_integer())),
// " strings
'"' => Some(self.lex_string()),
// # comments
'#' => self.lex_comment(),
// unexpected character
c => Some(Err(LexError::UnexpectedCharacter(c))),
}
}
}
impl<T> Iterator for Lexer<T>
where
T: Iterator<Item = char>,
{
type Item = Result<Token>;
fn next(&mut self) -> Option<Self::Item> {
self.lex()
}
}