201 lines
5.4 KiB
Rust
201 lines
5.4 KiB
Rust
use std::hash::Hash;
|
|
|
|
use chumsky::{
|
|
prelude::{choice, filter, just, Simple},
|
|
text::{self, TextParser},
|
|
Parser,
|
|
};
|
|
|
|
#[derive(Debug, Clone, PartialEq)]
|
|
pub enum Token {
|
|
POpen,
|
|
PClose,
|
|
SOpen,
|
|
SClose,
|
|
COpen,
|
|
CClose,
|
|
Comma,
|
|
Semicolon,
|
|
KeyWhile,
|
|
KeyReturn,
|
|
KeyLet,
|
|
Assign,
|
|
Operator(String),
|
|
Identifier(String),
|
|
ConstInt(i32),
|
|
ConstFloat(f32),
|
|
ConstString(String),
|
|
ConstBool(bool),
|
|
ConstChar(char),
|
|
}
|
|
|
|
impl Hash for Token {
|
|
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
|
core::mem::discriminant(self).hash(state);
|
|
}
|
|
}
|
|
|
|
impl Eq for Token {}
|
|
|
|
fn lexer() -> impl Parser<char, Vec<Token>, Error = Simple<char>> {
|
|
let escape = just('\\').ignore_then(
|
|
just('\\')
|
|
.or(just('/'))
|
|
.or(just('"'))
|
|
.or(just('b').to('\x08'))
|
|
.or(just('f').to('\x0C'))
|
|
.or(just('n').to('\n'))
|
|
.or(just('r').to('\r'))
|
|
.or(just('t').to('\t')),
|
|
);
|
|
|
|
let string = just('"')
|
|
.ignore_then(filter(|c| *c != '\\' && *c != '"').or(escape).repeated())
|
|
.then_ignore(just('"'))
|
|
.collect::<String>()
|
|
.map(Token::ConstString);
|
|
|
|
let char = just('\'')
|
|
.ignore_then(chumsky::prelude::any())
|
|
.then_ignore(just('\''))
|
|
.map(Token::ConstChar);
|
|
|
|
let number = just('-')
|
|
.or_not()
|
|
.chain::<char, _, _>(text::int(10))
|
|
.collect::<String>()
|
|
.from_str()
|
|
.unwrapped()
|
|
.map(Token::ConstInt);
|
|
|
|
let float = just('-')
|
|
.or_not()
|
|
.chain(text::int(10))
|
|
.chain::<char, _, _>(just('.').chain(text::digits(10)))
|
|
.collect::<String>()
|
|
.from_str()
|
|
.unwrapped()
|
|
.map(Token::ConstFloat);
|
|
|
|
let op = just("+")
|
|
.or(just("-"))
|
|
.or(just("*"))
|
|
.or(just("/"))
|
|
.or(just("<"))
|
|
.or(just(">"))
|
|
.or(just("<="))
|
|
.or(just(">="))
|
|
.or(just("=="))
|
|
.or(just("!="))
|
|
.map(|op| Token::Operator(op.to_string()));
|
|
|
|
choice((
|
|
just("(").to(Token::POpen),
|
|
just(")").to(Token::PClose),
|
|
just("[").to(Token::SOpen),
|
|
just("]").to(Token::SClose),
|
|
just("{").to(Token::COpen),
|
|
just("}").to(Token::CClose),
|
|
just(",").to(Token::Comma),
|
|
just(";").to(Token::Semicolon),
|
|
just("=").to(Token::Assign),
|
|
op,
|
|
text::keyword("while").to(Token::KeyWhile),
|
|
text::keyword("return").to(Token::KeyReturn),
|
|
text::keyword("let").to(Token::KeyLet),
|
|
text::keyword("true").to(Token::ConstBool(true)),
|
|
text::keyword("false").to(Token::ConstBool(false)),
|
|
text::ident().map(Token::Identifier),
|
|
float,
|
|
number,
|
|
string,
|
|
char,
|
|
))
|
|
.padded()
|
|
.repeated()
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use chumsky::Parser;
|
|
|
|
use crate::lexer::Token;
|
|
|
|
use super::lexer;
|
|
|
|
#[test]
|
|
fn parse_string() {
|
|
let x = "\"hello\"";
|
|
let tokens = lexer().parse(x).unwrap();
|
|
assert_eq!(tokens.len(), 1);
|
|
assert_eq!(tokens[0], Token::ConstString("hello".to_string()));
|
|
}
|
|
|
|
#[test]
|
|
fn parse_char() {
|
|
let x = "'a'";
|
|
let tokens = lexer().parse(x).unwrap();
|
|
assert_eq!(tokens.len(), 1);
|
|
assert_eq!(tokens[0], Token::ConstChar('a'));
|
|
}
|
|
|
|
#[test]
|
|
fn parse_int() {
|
|
let x = "123";
|
|
let tokens = lexer().parse(x).unwrap();
|
|
assert_eq!(tokens.len(), 1);
|
|
assert_eq!(tokens[0], Token::ConstInt(123));
|
|
}
|
|
|
|
#[test]
|
|
fn parse_float() {
|
|
let x = "123.456";
|
|
let tokens = lexer().parse(x).unwrap();
|
|
assert_eq!(tokens.len(), 1);
|
|
assert_eq!(tokens[0], Token::ConstFloat(123.456));
|
|
}
|
|
|
|
#[test]
|
|
fn parse_bool() {
|
|
let x = "true";
|
|
let tokens = lexer().parse(x).unwrap();
|
|
assert_eq!(tokens.len(), 1);
|
|
assert_eq!(tokens[0], Token::ConstBool(true));
|
|
}
|
|
|
|
#[test]
|
|
fn parse_example_program() {
|
|
let x = r"
|
|
let x = 10;
|
|
let y = 200;
|
|
let z = x + y;
|
|
print(z);
|
|
";
|
|
|
|
let tokens = lexer().parse(x).unwrap();
|
|
assert_eq!(tokens.len(), 22);
|
|
assert_eq!(tokens[0], Token::KeyLet);
|
|
assert_eq!(tokens[1], Token::Identifier("x".to_string()));
|
|
assert_eq!(tokens[2], Token::Assign);
|
|
assert_eq!(tokens[3], Token::ConstInt(10));
|
|
assert_eq!(tokens[4], Token::Semicolon);
|
|
assert_eq!(tokens[5], Token::KeyLet);
|
|
assert_eq!(tokens[6], Token::Identifier("y".to_string()));
|
|
assert_eq!(tokens[7], Token::Assign);
|
|
assert_eq!(tokens[8], Token::ConstInt(200));
|
|
assert_eq!(tokens[9], Token::Semicolon);
|
|
assert_eq!(tokens[10], Token::KeyLet);
|
|
assert_eq!(tokens[11], Token::Identifier("z".to_string()));
|
|
assert_eq!(tokens[12], Token::Assign);
|
|
assert_eq!(tokens[13], Token::Identifier("x".to_string()));
|
|
assert_eq!(tokens[14], Token::Operator("+".to_string()));
|
|
assert_eq!(tokens[15], Token::Identifier("y".to_string()));
|
|
assert_eq!(tokens[16], Token::Semicolon);
|
|
assert_eq!(tokens[17], Token::Identifier("print".to_string()));
|
|
assert_eq!(tokens[18], Token::POpen);
|
|
assert_eq!(tokens[19], Token::Identifier("z".to_string()));
|
|
assert_eq!(tokens[20], Token::PClose);
|
|
assert_eq!(tokens[21], Token::Semicolon);
|
|
}
|
|
}
|