400 lines
11 KiB
Rust
400 lines
11 KiB
Rust
use std::{
|
|
fmt,
|
|
iter::Peekable,
|
|
num::{ParseFloatError, ParseIntError},
|
|
};
|
|
|
|
use crate::kinds;
|
|
|
|
#[derive(Debug)]
|
|
pub struct Ident(String);
|
|
impl fmt::Display for Ident {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
self.0.fmt(f)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum Literal {
|
|
String(String),
|
|
Integer(i64),
|
|
Float(f64),
|
|
Boolean(bool),
|
|
Nil,
|
|
Ident(Ident),
|
|
}
|
|
impl fmt::Display for Literal {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
Literal::String(s) => write!(f, "\"{s}\""),
|
|
Literal::Integer(n) => write!(f, "{n}"),
|
|
Literal::Float(n) => write!(f, "{n}"),
|
|
Literal::Boolean(b) => write!(f, "{b}"),
|
|
Literal::Ident(id) => write!(f, "{id}"),
|
|
Literal::Nil => write!(f, "nil"),
|
|
}
|
|
}
|
|
}
|
|
|
|
kinds!(
|
|
Token,
|
|
TokenKind,
|
|
Equals,
|
|
Plus,
|
|
Minus,
|
|
Star,
|
|
Slash,
|
|
Percent,
|
|
StarStar,
|
|
PlusEquals,
|
|
MinusEquals,
|
|
StarEquals,
|
|
SlashEquals,
|
|
CurlyOpen,
|
|
CurlyClose,
|
|
ParenOpen,
|
|
ParenClose,
|
|
Comma,
|
|
Eol,
|
|
Func,
|
|
If,
|
|
Else,
|
|
Return,
|
|
Not,
|
|
EqualTo,
|
|
NotEqualTo,
|
|
And,
|
|
Or,
|
|
LessThan,
|
|
LessThanOrEqualTo,
|
|
GreaterThan,
|
|
GreaterThanOrEqualTo,
|
|
Literal(Literal = Literal::Nil),
|
|
);
|
|
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Copy)]
|
|
pub enum Precedence {
|
|
Min,
|
|
Assign,
|
|
WithAssign,
|
|
Logical,
|
|
Equality,
|
|
Relational,
|
|
AddSub,
|
|
MulDivMod,
|
|
Pow,
|
|
Prefix,
|
|
}
|
|
#[derive(PartialEq, Eq)]
|
|
pub enum Associativity {
|
|
Left,
|
|
Right,
|
|
}
|
|
impl Token {
|
|
pub fn prefix_precedence(&self) -> Option<Precedence> {
|
|
Some(match self {
|
|
Token::Return | Token::If | Token::Func | Token::Minus | Token::Not => {
|
|
Precedence::Prefix
|
|
}
|
|
_ => return None,
|
|
})
|
|
}
|
|
pub fn infix_precedence(&self) -> Option<(Precedence, Associativity)> {
|
|
Some(match self {
|
|
Token::EqualTo | Token::NotEqualTo => (Precedence::Equality, Associativity::Left),
|
|
Token::LessThan
|
|
| Token::LessThanOrEqualTo
|
|
| Token::GreaterThan
|
|
| Token::GreaterThanOrEqualTo => (Precedence::Relational, Associativity::Left),
|
|
Token::And | Token::Or => (Precedence::Logical, Associativity::Left),
|
|
Token::Plus | Token::Minus => (Precedence::AddSub, Associativity::Left),
|
|
Token::Star | Token::Slash | Token::Percent => {
|
|
(Precedence::MulDivMod, Associativity::Left)
|
|
}
|
|
Token::StarStar => (Precedence::Pow, Associativity::Right),
|
|
Token::Equals => (Precedence::Assign, Associativity::Right),
|
|
Token::PlusEquals | Token::MinusEquals | Token::StarEquals | Token::SlashEquals => {
|
|
(Precedence::WithAssign, Associativity::Right)
|
|
}
|
|
_ => return None,
|
|
})
|
|
}
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum LexError {
|
|
InvalidInteger(ParseIntError),
|
|
InvalidFloat(ParseFloatError),
|
|
InvalidEscape(char),
|
|
UnexpectedCharacter(char),
|
|
UnexpectedEnd,
|
|
}
|
|
impl fmt::Display for LexError {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
Self::InvalidInteger(err) => write!(f, "invalid integer: {err}"),
|
|
Self::InvalidFloat(err) => write!(f, "invalid float: {err}"),
|
|
Self::UnexpectedEnd => write!(f, "unexpected end of source"),
|
|
Self::UnexpectedCharacter(c) => write!(f, "unexpected char '{c}'"),
|
|
Self::InvalidEscape(c) => write!(f, "\"\\{c}\" is not a valid string escape"),
|
|
}
|
|
}
|
|
}
|
|
impl From<ParseIntError> for LexError {
|
|
fn from(err: ParseIntError) -> Self {
|
|
Self::InvalidInteger(err)
|
|
}
|
|
}
|
|
impl From<ParseFloatError> for LexError {
|
|
fn from(err: ParseFloatError) -> Self {
|
|
Self::InvalidFloat(err)
|
|
}
|
|
}
|
|
|
|
pub type Result<T> = std::result::Result<T, LexError>;
|
|
|
|
pub struct Lexer<I>
|
|
where
|
|
I: Iterator<Item = char>,
|
|
{
|
|
chars: Peekable<I>,
|
|
}
|
|
|
|
fn t(tk: Token) -> Option<Result<Token>> {
|
|
Some(Ok(tk))
|
|
}
|
|
|
|
impl<I> Lexer<I>
|
|
where
|
|
I: Iterator<Item = char>,
|
|
{
|
|
pub fn new(chars: I) -> Self {
|
|
let chars = chars.peekable();
|
|
Self { chars }
|
|
}
|
|
|
|
fn peek(&mut self) -> Option<char> {
|
|
self.chars.peek().copied()
|
|
}
|
|
|
|
fn next(&mut self) -> Option<char> {
|
|
self.chars.next()
|
|
}
|
|
fn next_unwrap(&mut self) -> char {
|
|
match self.next() {
|
|
Some(c) => c,
|
|
None => unreachable!("called next_unwrap with nothing ahead"),
|
|
}
|
|
}
|
|
|
|
fn try_peek(&mut self) -> Result<char> {
|
|
self.peek().ok_or(LexError::UnexpectedEnd)
|
|
}
|
|
fn try_eat_peek(&mut self) -> Result<char> {
|
|
self.eat_peek().ok_or(LexError::UnexpectedEnd)
|
|
}
|
|
|
|
fn eat(&mut self) {
|
|
self.next();
|
|
}
|
|
fn eat_to(&mut self, tk: Token) -> Option<Result<Token>> {
|
|
self.eat();
|
|
Some(Ok(tk))
|
|
}
|
|
fn eat_peek(&mut self) -> Option<char> {
|
|
self.eat();
|
|
self.peek()
|
|
}
|
|
|
|
fn lex_whitespace(&mut self) -> Option<char> {
|
|
loop {
|
|
match self.peek()? {
|
|
' ' | '\t' | '\n' | '\r' => self.eat(),
|
|
_ => break self.peek(),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn lex_word(&mut self) -> Token {
|
|
let mut word = String::new();
|
|
|
|
while let Some('a'..='z' | 'A'..='Z' | '0'..='9' | '_') = self.peek() {
|
|
word.push(self.next_unwrap());
|
|
}
|
|
|
|
match word.as_str() {
|
|
"func" => Token::Func,
|
|
"if" => Token::If,
|
|
"else" => Token::Else,
|
|
"return" => Token::Return,
|
|
"true" => Token::Literal(Literal::Boolean(true)),
|
|
"false" => Token::Literal(Literal::Boolean(false)),
|
|
"nil" => Token::Literal(Literal::Nil),
|
|
_ => Token::Literal(Literal::Ident(Ident(word))),
|
|
}
|
|
}
|
|
|
|
fn lex_number(&mut self) -> Result<Token> {
|
|
let mut n_str = String::new();
|
|
|
|
// we don't lex negatives. the impl for that is
|
|
// a negation of a positive number at runtime.
|
|
// maybe that's kind of stupid though, lol
|
|
let mut is_float = false;
|
|
while let Some('0'..='9' | '.') = self.peek() {
|
|
if self.peek() == Some('.') {
|
|
is_float = true;
|
|
}
|
|
n_str.push(self.next_unwrap());
|
|
}
|
|
|
|
let lit = if is_float {
|
|
Literal::Float(n_str.parse()?)
|
|
} else {
|
|
Literal::Integer(n_str.parse()?)
|
|
};
|
|
|
|
Ok(Token::Literal(lit))
|
|
}
|
|
|
|
fn lex_string(&mut self) -> Result<Token> {
|
|
let delim = self.next_unwrap();
|
|
|
|
let mut str = String::new();
|
|
|
|
loop {
|
|
match self.try_peek()? {
|
|
'\\' => match self.try_eat_peek()? {
|
|
'n' => {
|
|
self.eat();
|
|
str.push('\n');
|
|
}
|
|
c => {
|
|
break Err(LexError::InvalidEscape(c));
|
|
}
|
|
},
|
|
c if c == delim => {
|
|
self.eat();
|
|
break Ok(Token::Literal(Literal::String(str)));
|
|
}
|
|
_ => str.push(self.next_unwrap()),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn lex(&mut self) -> Option<Result<Token>> {
|
|
loop {
|
|
break match self.lex_whitespace()? {
|
|
// { and } start/end of code block
|
|
'{' => self.eat_to(Token::CurlyOpen),
|
|
'}' => self.eat_to(Token::CurlyClose),
|
|
|
|
// ( and ) start/end of groups
|
|
'(' => self.eat_to(Token::ParenOpen),
|
|
')' => self.eat_to(Token::ParenClose),
|
|
|
|
// + add
|
|
// or += add eq
|
|
'+' => match self.eat_peek() {
|
|
Some('=') => self.eat_to(Token::PlusEquals),
|
|
_ => t(Token::Plus),
|
|
},
|
|
|
|
// - subtract
|
|
// or -= sub eq
|
|
'-' => match self.eat_peek() {
|
|
Some('=') => self.eat_to(Token::MinusEquals),
|
|
_ => t(Token::Minus),
|
|
},
|
|
|
|
// * multiply
|
|
// or *= mult eq
|
|
// or ** pow
|
|
'*' => match self.eat_peek() {
|
|
Some('=') => self.eat_to(Token::StarEquals),
|
|
Some('*') => self.eat_to(Token::StarStar),
|
|
_ => t(Token::Star),
|
|
},
|
|
|
|
// / divide
|
|
// or /= div eq
|
|
// or // comment
|
|
'/' => match self.eat_peek() {
|
|
Some('=') => self.eat_to(Token::SlashEquals),
|
|
Some('/') => {
|
|
// skip the rest of the line
|
|
// this leaves the newline btw
|
|
while !matches!(self.peek(), Some('\n') | None) {
|
|
self.eat();
|
|
}
|
|
continue;
|
|
}
|
|
_ => t(Token::Slash),
|
|
},
|
|
|
|
// % modulo
|
|
'%' => self.eat_to(Token::Percent),
|
|
|
|
// , comma
|
|
',' => self.eat_to(Token::Comma),
|
|
|
|
// = equals
|
|
// or == equal to
|
|
'=' => match self.eat_peek() {
|
|
Some('=') => self.eat_to(Token::EqualTo),
|
|
_ => t(Token::Equals),
|
|
},
|
|
|
|
// ! not
|
|
// or != not equal to
|
|
'!' => match self.eat_peek() {
|
|
Some('=') => self.eat_to(Token::NotEqualTo),
|
|
_ => t(Token::Not),
|
|
},
|
|
|
|
// && and
|
|
'&' if matches!(self.eat_peek(), Some('&')) => self.eat_to(Token::And),
|
|
|
|
// || or
|
|
'|' if matches!(self.eat_peek(), Some('|')) => self.eat_to(Token::Or),
|
|
|
|
// > greater than
|
|
// or >= greater than/equal to
|
|
'>' => match self.eat_peek() {
|
|
Some('=') => self.eat_to(Token::GreaterThanOrEqualTo),
|
|
_ => t(Token::GreaterThan),
|
|
},
|
|
|
|
// < less than
|
|
// or <= less than/equal to
|
|
'<' => match self.eat_peek() {
|
|
Some('=') => self.eat_to(Token::LessThanOrEqualTo),
|
|
_ => t(Token::LessThan),
|
|
},
|
|
|
|
// a-zA-Z_ start of word
|
|
'a'..='z' | 'A'..='Z' | '_' => Some(Ok(self.lex_word())),
|
|
|
|
// 0-9 integer
|
|
'0'..='9' | '.' => Some(self.lex_number()),
|
|
|
|
// " strings
|
|
'"' => Some(self.lex_string()),
|
|
|
|
// unexpected character
|
|
c => Some(Err(LexError::UnexpectedCharacter(c))),
|
|
};
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<T> Iterator for Lexer<T>
|
|
where
|
|
T: Iterator<Item = char>,
|
|
{
|
|
type Item = Result<Token>;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
self.lex()
|
|
}
|
|
}
|