sunflower/src/lexer.rs

201 lines
5.4 KiB
Rust

use std::hash::Hash;
use chumsky::{
prelude::{choice, filter, just, Simple},
text::{self, TextParser},
Parser,
};
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
POpen,
PClose,
SOpen,
SClose,
COpen,
CClose,
Comma,
Semicolon,
KeyWhile,
KeyReturn,
KeyLet,
Assign,
Operator(String),
Identifier(String),
ConstInt(i32),
ConstFloat(f32),
ConstString(String),
ConstBool(bool),
ConstChar(char),
}
impl Hash for Token {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
core::mem::discriminant(self).hash(state);
}
}
impl Eq for Token {}
fn lexer() -> impl Parser<char, Vec<Token>, Error = Simple<char>> {
let escape = just('\\').ignore_then(
just('\\')
.or(just('/'))
.or(just('"'))
.or(just('b').to('\x08'))
.or(just('f').to('\x0C'))
.or(just('n').to('\n'))
.or(just('r').to('\r'))
.or(just('t').to('\t')),
);
let string = just('"')
.ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated())
.then_ignore(just('"'))
.collect::<String>()
.map(Token::ConstString);
let char = just('\'')
.ignore_then(chumsky::prelude::any())
.then_ignore(just('\''))
.map(Token::ConstChar);
let number = just('-')
.or_not()
.chain::<char, _, _>(text::int(10))
.collect::<String>()
.from_str()
.unwrapped()
.map(Token::ConstInt);
let float = just('-')
.or_not()
.chain(text::int(10))
.chain::<char, _, _>(just('.').chain(text::digits(10)))
.collect::<String>()
.from_str()
.unwrapped()
.map(Token::ConstFloat);
let op = just("+")
.or(just("-"))
.or(just("*"))
.or(just("/"))
.or(just("<"))
.or(just(">"))
.or(just("<="))
.or(just(">="))
.or(just("=="))
.or(just("!="))
.map(|op| Token::Operator(op.to_string()));
choice((
just("(").to(Token::POpen),
just(")").to(Token::PClose),
just("[").to(Token::SOpen),
just("]").to(Token::SClose),
just("{").to(Token::COpen),
just("}").to(Token::CClose),
just(",").to(Token::Comma),
just(";").to(Token::Semicolon),
just("=").to(Token::Assign),
op,
text::keyword("while").to(Token::KeyWhile),
text::keyword("return").to(Token::KeyReturn),
text::keyword("let").to(Token::KeyLet),
text::keyword("true").to(Token::ConstBool(true)),
text::keyword("false").to(Token::ConstBool(false)),
text::ident().map(Token::Identifier),
float,
number,
string,
char,
))
.padded()
.repeated()
}
#[cfg(test)]
mod tests {
use chumsky::Parser;
use crate::lexer::Token;
use super::lexer;
#[test]
fn parse_string() {
let x = "\"hello\"";
let tokens = lexer().parse(x).unwrap();
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0], Token::ConstString("hello".to_string()));
}
#[test]
fn parse_char() {
let x = "'a'";
let tokens = lexer().parse(x).unwrap();
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0], Token::ConstChar('a'));
}
#[test]
fn parse_int() {
let x = "123";
let tokens = lexer().parse(x).unwrap();
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0], Token::ConstInt(123));
}
#[test]
fn parse_float() {
let x = "123.456";
let tokens = lexer().parse(x).unwrap();
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0], Token::ConstFloat(123.456));
}
#[test]
fn parse_bool() {
let x = "true";
let tokens = lexer().parse(x).unwrap();
assert_eq!(tokens.len(), 1);
assert_eq!(tokens[0], Token::ConstBool(true));
}
#[test]
fn parse_example_program() {
let x = r"
let x = 10;
let y = 200;
let z = x + y;
print(z);
";
let tokens = lexer().parse(x).unwrap();
assert_eq!(tokens.len(), 22);
assert_eq!(tokens[0], Token::KeyLet);
assert_eq!(tokens[1], Token::Identifier("x".to_string()));
assert_eq!(tokens[2], Token::Assign);
assert_eq!(tokens[3], Token::ConstInt(10));
assert_eq!(tokens[4], Token::Semicolon);
assert_eq!(tokens[5], Token::KeyLet);
assert_eq!(tokens[6], Token::Identifier("y".to_string()));
assert_eq!(tokens[7], Token::Assign);
assert_eq!(tokens[8], Token::ConstInt(200));
assert_eq!(tokens[9], Token::Semicolon);
assert_eq!(tokens[10], Token::KeyLet);
assert_eq!(tokens[11], Token::Identifier("z".to_string()));
assert_eq!(tokens[12], Token::Assign);
assert_eq!(tokens[13], Token::Identifier("x".to_string()));
assert_eq!(tokens[14], Token::Operator("+".to_string()));
assert_eq!(tokens[15], Token::Identifier("y".to_string()));
assert_eq!(tokens[16], Token::Semicolon);
assert_eq!(tokens[17], Token::Identifier("print".to_string()));
assert_eq!(tokens[18], Token::POpen);
assert_eq!(tokens[19], Token::Identifier("z".to_string()));
assert_eq!(tokens[20], Token::PClose);
assert_eq!(tokens[21], Token::Semicolon);
}
}