leaf/src/lexer.rs

use std::iter::Peekable;

#[derive(Debug)]
pub struct Ident(String);

#[derive(Debug)]
pub enum Literal {
    String(String),
    Integer(i64),
    Boolean(bool),
    Nil,
}

#[derive(Debug)]
pub enum Token {
    Equals,

    Add,
    Multiply,
    Divide,

    Minus,

    CurlyOpen,
    CurlyClose,

    ParenOpen,
    ParenClose,

    Comma,

    Return,

    Not,

    EqualTo,
    NotEqualTo,

    And,
    Or,

    LessThan,
    LessThanOrEqualTo,
    GreaterThan,
    GreaterThanOrEqualTo,

    Ident(Ident),
    Literal(Literal),
}

#[derive(Debug)]
pub enum LexError {
    InvalidEscape(char),
    UnexpectedCharacter(char),
    UnexpectedEnd,
}
trait Check {
    fn check(self) -> Result<char>;
}
impl Check for Option<char> {
    fn check(self) -> Result<char> {
        self.ok_or(LexError::UnexpectedEnd)
    }
}

pub type Result<T> = std::result::Result<T, LexError>;

pub struct Lexer<I>
where
    I: Iterator<Item = char>,
{
    chars: Peekable<I>,
}

impl<I> Lexer<I>
where
    I: Iterator<Item = char>,
{
    pub fn new(chars: I) -> Self {
        let chars = chars.peekable();
        Self { chars }
    }

    fn peek(&mut self) -> Option<char> {
        self.chars.peek().copied()
    }

    fn next(&mut self) -> Option<char> {
        self.chars.next()
    }

    fn next_unwrap(&mut self) -> char {
        match self.next() {
            Some(c) => c,
            None => unreachable!("called next_unwrap with nothing ahead"),
        }
    }

    fn eat(&mut self) {
        self.next();
    }

    fn eat_to(&mut self, tk: Token) -> Option<Result<Token>> {
        self.eat();
        Some(Ok(tk))
    }

    fn eat_peek(&mut self) -> Option<char> {
        self.eat();
        self.peek()
    }

    fn lex_whitespace(&mut self) -> Option<char> {
        loop {
            match self.peek()? {
                ' ' | '\t' | '\n' | '\r' => self.eat(),
                _ => break self.peek(),
            }
        }
    }

    fn lex_word(&mut self) -> Token {
        let mut word = String::new();

        while let Some('a'..='z' | 'A'..='Z' | '0'..='9' | '_') = self.peek() {
            word.push(self.next_unwrap());
        }

        match word.as_str() {
            "return" => Token::Return,
            "true" => Token::Literal(Literal::Boolean(true)),
            "false" => Token::Literal(Literal::Boolean(false)),
            "nil" => Token::Literal(Literal::Nil),
            _ => Token::Ident(Ident(word)),
        }
    }

    fn lex_integer(&mut self) -> Token {
        let mut n_str = String::new();

        // we don't lex negatives. the impl for that is
        // a negation of a positive number at runtime.
        // maybe that's kind of stupid though, lol
        while let Some('0'..='9') = self.peek() {
            n_str.push(self.next_unwrap());
        }

        // we can only read digits 0 to 9 so this should not fail
        // .. unless we overflow
        let n = n_str.parse().unwrap();

        Token::Literal(Literal::Integer(n))
    }

    fn lex_string(&mut self) -> Result<Token> {
        let delim = self.next_unwrap();

        let mut str = String::new();

        loop {
            match self.peek().check()? {
                '\\' => match self.eat_peek().check()? {
                    'n' => {
                        self.eat();
                        str.push('\n')
                    }
                    c => {
                        break Err(LexError::InvalidEscape(c));
                    }
                },
                c if c == delim => {
                    self.eat();
                    break Ok(Token::Literal(Literal::String(str)));
                }
                _ => str.push(self.next_unwrap()),
            }
        }
    }

    fn lex_comment(&mut self) -> Option<Result<Token>> {
        while self.peek() != Some('\n') {
            self.eat();
        }
        self.lex()
    }

    fn lex(&mut self) -> Option<Result<Token>> {
        match self.lex_whitespace()? {
            // { and } start/end of code block
            '{' => self.eat_to(Token::CurlyOpen),
            '}' => self.eat_to(Token::CurlyClose),

            // ( and ) start/end of parens (idk)
            '(' => self.eat_to(Token::ParenOpen),
            ')' => self.eat_to(Token::ParenClose),

            // + add
            '+' => self.eat_to(Token::Add),

            // - subtract
            '-' => self.eat_to(Token::Minus),

            // * multiply
            '*' => self.eat_to(Token::Multiply),

            // / divide
            '/' => self.eat_to(Token::Divide),

            // , comma
            ',' => self.eat_to(Token::Comma),

            // = equals
            // or == equal to
            '=' => match self.eat_peek() {
                Some('=') => self.eat_to(Token::EqualTo),
                _ => Some(Ok(Token::Equals)),
            },

            // ! not
            // or != not equal to
            '!' => match self.eat_peek() {
                Some('=') => self.eat_to(Token::NotEqualTo),
                _ => Some(Ok(Token::Not)),
            },

            // && and
            '&' if matches!(self.eat_peek(), Some('&')) => self.eat_to(Token::And),

            // || or
            '|' if matches!(self.eat_peek(), Some('|')) => self.eat_to(Token::Or),

            // > greater than
            // or >= greater than/equal to
            '>' => match self.eat_peek() {
                Some('=') => self.eat_to(Token::GreaterThanOrEqualTo),
                _ => Some(Ok(Token::GreaterThan)),
            },

            // < less than
            // or <= less than/equal to
            '<' => match self.eat_peek() {
                Some('=') => self.eat_to(Token::LessThanOrEqualTo),
                _ => Some(Ok(Token::LessThan)),
            },

            // a-zA-Z_ start of word
            'a'..='z' | 'A'..='Z' | '_' => Some(Ok(self.lex_word())),

            // 0-9 integer
            '0'..='9' => Some(Ok(self.lex_integer())),

            // " strings
            '"' => Some(self.lex_string()),

            // # comments
            '#' => self.lex_comment(),

            // unexpected character
            c => Some(Err(LexError::UnexpectedCharacter(c))),
        }
    }
}

impl<T> Iterator for Lexer<T>
where
    T: Iterator<Item = char>,
{
    type Item = Result<Token>;

    fn next(&mut self) -> Option<Self::Item> {
        self.lex()
    }
}