use std::hash::Hash; use chumsky::{ prelude::{choice, filter, just, Simple}, text::{self, TextParser}, Parser, }; #[derive(Debug, Clone, PartialEq)] pub enum Token { POpen, PClose, SOpen, SClose, COpen, CClose, Comma, Semicolon, KeyWhile, KeyReturn, KeyLet, Assign, Operator(String), Identifier(String), ConstInt(i32), ConstFloat(f32), ConstString(String), ConstBool(bool), ConstChar(char), } impl Hash for Token { fn hash(&self, state: &mut H) { core::mem::discriminant(self).hash(state); } } impl Eq for Token {} fn lexer() -> impl Parser, Error = Simple> { let escape = just('\\').ignore_then( just('\\') .or(just('/')) .or(just('"')) .or(just('b').to('\x08')) .or(just('f').to('\x0C')) .or(just('n').to('\n')) .or(just('r').to('\r')) .or(just('t').to('\t')), ); let string = just('"') .ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated()) .then_ignore(just('"')) .collect::() .map(Token::ConstString); let char = just('\'') .ignore_then(chumsky::prelude::any()) .then_ignore(just('\'')) .map(Token::ConstChar); let number = just('-') .or_not() .chain::(text::int(10)) .collect::() .from_str() .unwrapped() .map(Token::ConstInt); let float = just('-') .or_not() .chain(text::int(10)) .chain::(just('.').chain(text::digits(10))) .collect::() .from_str() .unwrapped() .map(Token::ConstFloat); let op = just("+") .or(just("-")) .or(just("*")) .or(just("/")) .or(just("<")) .or(just(">")) .or(just("<=")) .or(just(">=")) .or(just("==")) .or(just("!=")) .map(|op| Token::Operator(op.to_string())); choice(( just("(").to(Token::POpen), just(")").to(Token::PClose), just("[").to(Token::SOpen), just("]").to(Token::SClose), just("{").to(Token::COpen), just("}").to(Token::CClose), just(",").to(Token::Comma), just(";").to(Token::Semicolon), just("=").to(Token::Assign), op, text::keyword("while").to(Token::KeyWhile), text::keyword("return").to(Token::KeyReturn), text::keyword("let").to(Token::KeyLet), text::keyword("true").to(Token::ConstBool(true)), text::keyword("false").to(Token::ConstBool(false)), text::ident().map(Token::Identifier), float, number, string, char, )) .padded() .repeated() } #[cfg(test)] mod tests { use chumsky::Parser; use crate::lexer::Token; use super::lexer; #[test] fn parse_string() { let x = "\"hello\""; let tokens = lexer().parse(x).unwrap(); assert_eq!(tokens.len(), 1); assert_eq!(tokens[0], Token::ConstString("hello".to_string())); } #[test] fn parse_char() { let x = "'a'"; let tokens = lexer().parse(x).unwrap(); assert_eq!(tokens.len(), 1); assert_eq!(tokens[0], Token::ConstChar('a')); } #[test] fn parse_int() { let x = "123"; let tokens = lexer().parse(x).unwrap(); assert_eq!(tokens.len(), 1); assert_eq!(tokens[0], Token::ConstInt(123)); } #[test] fn parse_float() { let x = "123.456"; let tokens = lexer().parse(x).unwrap(); assert_eq!(tokens.len(), 1); assert_eq!(tokens[0], Token::ConstFloat(123.456)); } #[test] fn parse_bool() { let x = "true"; let tokens = lexer().parse(x).unwrap(); assert_eq!(tokens.len(), 1); assert_eq!(tokens[0], Token::ConstBool(true)); } #[test] fn parse_example_program() { let x = r" let x = 10; let y = 200; let z = x + y; print(z); "; let tokens = lexer().parse(x).unwrap(); assert_eq!(tokens.len(), 22); assert_eq!(tokens[0], Token::KeyLet); assert_eq!(tokens[1], Token::Identifier("x".to_string())); assert_eq!(tokens[2], Token::Assign); assert_eq!(tokens[3], Token::ConstInt(10)); assert_eq!(tokens[4], Token::Semicolon); assert_eq!(tokens[5], Token::KeyLet); assert_eq!(tokens[6], Token::Identifier("y".to_string())); assert_eq!(tokens[7], Token::Assign); assert_eq!(tokens[8], Token::ConstInt(200)); assert_eq!(tokens[9], Token::Semicolon); assert_eq!(tokens[10], Token::KeyLet); assert_eq!(tokens[11], Token::Identifier("z".to_string())); assert_eq!(tokens[12], Token::Assign); assert_eq!(tokens[13], Token::Identifier("x".to_string())); assert_eq!(tokens[14], Token::Operator("+".to_string())); assert_eq!(tokens[15], Token::Identifier("y".to_string())); assert_eq!(tokens[16], Token::Semicolon); assert_eq!(tokens[17], Token::Identifier("print".to_string())); assert_eq!(tokens[18], Token::POpen); assert_eq!(tokens[19], Token::Identifier("z".to_string())); assert_eq!(tokens[20], Token::PClose); assert_eq!(tokens[21], Token::Semicolon); } }