diff options
author | Griffin Smith <root@gws.fyi> | 2021-03-07T20·29-0500 |
---|---|---|
committer | Griffin Smith <root@gws.fyi> | 2021-03-07T20·29-0500 |
commit | 80f8ede0bbc9799d5199707e1e1ad8e80e4ca7ac (patch) | |
tree | cbb418b042583714fe09f946f1b9a03d1d98857f /src/parser/mod.rs |
Initial commit
Diffstat (limited to 'src/parser/mod.rs')
-rw-r--r-- | src/parser/mod.rs | 445 |
1 files changed, 445 insertions, 0 deletions
diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 000000000000..811450da0d61 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,445 @@ +use nom::character::complete::{digit1, multispace0, multispace1}; +use nom::error::{ErrorKind, ParseError}; +use nom::{ + alt, char, complete, delimited, do_parse, flat_map, many0, map, named, parse_to, + separated_list0, separated_list1, tag, tuple, +}; +use pratt::{Affix, Associativity, PrattParser, Precedence}; + +use crate::ast::{BinaryOperator, Decl, Expr, Fun, Ident, Literal, UnaryOperator}; + +pub type Error = nom::Err<nom::error::Error<String>>; + +#[derive(Debug)] +enum TokenTree<'a> { + Prefix(UnaryOperator), + // Postfix(char), + Infix(BinaryOperator), + Primary(Expr<'a>), + Group(Vec<TokenTree<'a>>), +} + +named!(prefix(&str) -> TokenTree, map!(alt!( + complete!(char!('-')) => { |_| UnaryOperator::Neg } | + complete!(char!('!')) => { |_| UnaryOperator::Not } +), TokenTree::Prefix)); + +named!(infix(&str) -> TokenTree, map!(alt!( + complete!(tag!("==")) => { |_| BinaryOperator::Equ } | + complete!(tag!("!=")) => { |_| BinaryOperator::Neq } | + complete!(char!('+')) => { |_| BinaryOperator::Add } | + complete!(char!('-')) => { |_| BinaryOperator::Sub } | + complete!(char!('*')) => { |_| BinaryOperator::Mul } | + complete!(char!('/')) => { |_| BinaryOperator::Div } | + complete!(char!('^')) => { |_| BinaryOperator::Pow } +), TokenTree::Infix)); + +named!(primary(&str) -> TokenTree, alt!( + do_parse!( + multispace0 >> + char!('(') >> + multispace0 >> + group: group >> + multispace0 >> + char!(')') >> + multispace0 >> + (TokenTree::Group(group)) + ) | + delimited!(multispace0, simple_expr, multispace0) => { |s| TokenTree::Primary(s) } +)); + +named!( + rest(&str) -> Vec<(TokenTree, Vec<TokenTree>, TokenTree)>, + many0!(tuple!( + infix, + delimited!(multispace0, many0!(prefix), multispace0), + primary + // many0!(postfix) + )) +); + +named!(group(&str) -> Vec<TokenTree>, do_parse!( + prefix: many0!(prefix) + >> primary: primary + // >> postfix: many0!(postfix) + >> rest: rest + >> ({ + let mut res = prefix; + res.push(primary); + // res.append(&mut postfix); + for (infix, mut prefix, primary/*, mut postfix*/) in rest { + res.push(infix); + res.append(&mut prefix); + res.push(primary); + // res.append(&mut postfix); + } + res + }) +)); + +fn token_tree(i: &str) -> nom::IResult<&str, Vec<TokenTree>> { + group(i) +} + +struct ExprParser; + +impl<'a, I> PrattParser<I> for ExprParser +where + I: Iterator<Item = TokenTree<'a>>, +{ + type Error = pratt::NoError; + type Input = TokenTree<'a>; + type Output = Expr<'a>; + + fn query(&mut self, input: &Self::Input) -> Result<Affix, Self::Error> { + use BinaryOperator::*; + use UnaryOperator::*; + + Ok(match input { + TokenTree::Infix(Add) => Affix::Infix(Precedence(6), Associativity::Left), + TokenTree::Infix(Sub) => Affix::Infix(Precedence(6), Associativity::Left), + TokenTree::Infix(Mul) => Affix::Infix(Precedence(7), Associativity::Left), + TokenTree::Infix(Div) => Affix::Infix(Precedence(7), Associativity::Left), + TokenTree::Infix(Pow) => Affix::Infix(Precedence(8), Associativity::Right), + TokenTree::Infix(Equ) => Affix::Infix(Precedence(4), Associativity::Right), + TokenTree::Infix(Neq) => Affix::Infix(Precedence(4), Associativity::Right), + TokenTree::Prefix(Neg) => Affix::Prefix(Precedence(6)), + TokenTree::Prefix(Not) => Affix::Prefix(Precedence(6)), + TokenTree::Primary(_) => Affix::Nilfix, + TokenTree::Group(_) => Affix::Nilfix, + }) + } + + fn primary(&mut self, input: Self::Input) -> Result<Self::Output, Self::Error> { + Ok(match input { + TokenTree::Primary(expr) => expr, + TokenTree::Group(group) => self.parse(&mut group.into_iter()).unwrap(), + _ => unreachable!(), + }) + } + + fn infix( + &mut self, + lhs: Self::Output, + op: Self::Input, + rhs: Self::Output, + ) -> Result<Self::Output, Self::Error> { + let op = match op { + TokenTree::Infix(op) => op, + _ => unreachable!(), + }; + Ok(Expr::BinaryOp { + lhs: Box::new(lhs), + op, + rhs: Box::new(rhs), + }) + } + + fn prefix(&mut self, op: Self::Input, rhs: Self::Output) -> Result<Self::Output, Self::Error> { + let op = match op { + TokenTree::Prefix(op) => op, + _ => unreachable!(), + }; + + Ok(Expr::UnaryOp { + op, + rhs: Box::new(rhs), + }) + } + + fn postfix( + &mut self, + _lhs: Self::Output, + _op: Self::Input, + ) -> Result<Self::Output, Self::Error> { + unreachable!() + } +} + +fn ident<'a, E>(i: &'a str) -> nom::IResult<&'a str, Ident, E> +where + E: ParseError<&'a str>, +{ + let mut chars = i.chars(); + if let Some(f) = chars.next() { + if f.is_alphabetic() || f == '_' { + let mut idx = 1; + for c in chars { + if !(c.is_alphanumeric() || c == '_') { + break; + } + idx += 1; + } + Ok((&i[idx..], Ident::from_str_unchecked(&i[..idx]))) + } else { + Err(nom::Err::Error(E::from_error_kind(i, ErrorKind::Satisfy))) + } + } else { + Err(nom::Err::Error(E::from_error_kind(i, ErrorKind::Eof))) + } +} + +named!(int(&str) -> Literal, map!(flat_map!(digit1, parse_to!(u64)), Literal::Int)); + +named!(literal(&str) -> Expr, map!(alt!(int), Expr::Literal)); + +named!(binding(&str) -> (Ident, Expr), do_parse!( + multispace0 + >> ident: ident + >> multispace0 + >> char!('=') + >> multispace0 + >> expr: expr + >> (ident, expr) +)); + +named!(let_(&str) -> Expr, do_parse!( + tag!("let") + >> multispace0 + >> bindings: separated_list1!(alt!(char!(';') | char!('\n')), binding) + >> multispace0 + >> tag!("in") + >> multispace0 + >> body: expr + >> (Expr::Let { + bindings, + body: Box::new(body) + }) +)); + +named!(if_(&str) -> Expr, do_parse! ( + tag!("if") + >> multispace0 + >> condition: expr + >> multispace0 + >> tag!("then") + >> multispace0 + >> then: expr + >> multispace0 + >> tag!("else") + >> multispace0 + >> else_: expr + >> (Expr::If { + condition: Box::new(condition), + then: Box::new(then), + else_: Box::new(else_) + }) +)); + +named!(ident_expr(&str) -> Expr, map!(ident, Expr::Ident)); + +named!(simple_expr(&str) -> Expr, alt!( + let_ | + if_ | + literal | + ident_expr +)); + +named!(pub expr(&str) -> Expr, alt!( + map!(token_tree, |tt| { + ExprParser.parse(&mut tt.into_iter()).unwrap() + }) | + simple_expr)); + +////// + +named!(fun(&str) -> Fun, do_parse!( + tag!("fn") + >> multispace0 + >> name: ident + >> multispace1 + >> args: separated_list0!(multispace1, ident) + >> multispace0 + >> char!('=') + >> multispace0 + >> body: expr + >> (Fun { + name, + args, + body + }) +)); + +named!(pub decl(&str) -> Decl, alt!( + fun => { |f| Decl::Fun(f) } +)); + +named!(pub toplevel(&str) -> Vec<Decl>, separated_list0!(multispace1, decl)); + +#[cfg(test)] +mod tests { + use std::convert::{TryFrom, TryInto}; + + use super::*; + use BinaryOperator::*; + use Expr::{BinaryOp, If, Let, UnaryOp}; + use UnaryOperator::*; + + fn ident_expr(s: &str) -> Box<Expr> { + Box::new(Expr::Ident(Ident::try_from(s).unwrap())) + } + + macro_rules! test_parse { + ($parser: ident, $src: expr) => {{ + let (rem, res) = $parser($src).unwrap(); + assert!( + rem.is_empty(), + "non-empty remainder: \"{}\", parsed: {:?}", + rem, + res + ); + res + }}; + } + + mod operators { + use super::*; + + #[test] + fn mul_plus() { + let (rem, res) = expr("x*y+z").unwrap(); + assert!(rem.is_empty()); + assert_eq!( + res, + BinaryOp { + lhs: Box::new(BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: ident_expr("y") + }), + op: Add, + rhs: ident_expr("z") + } + ) + } + + #[test] + fn mul_plus_ws() { + let (rem, res) = expr("x * y + z").unwrap(); + assert!(rem.is_empty(), "non-empty remainder: \"{}\"", rem); + assert_eq!( + res, + BinaryOp { + lhs: Box::new(BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: ident_expr("y") + }), + op: Add, + rhs: ident_expr("z") + } + ) + } + + #[test] + fn unary() { + let (rem, res) = expr("x * -z").unwrap(); + assert!(rem.is_empty(), "non-empty remainder: \"{}\"", rem); + assert_eq!( + res, + BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: Box::new(UnaryOp { + op: Neg, + rhs: ident_expr("z"), + }) + } + ) + } + + #[test] + fn mul_literal() { + let (rem, res) = expr("x * 3").unwrap(); + assert!(rem.is_empty()); + assert_eq!( + res, + BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: Box::new(Expr::Literal(Literal::Int(3))), + } + ) + } + + #[test] + fn equ() { + let res = test_parse!(expr, "x * 7 == 7"); + assert_eq!( + res, + BinaryOp { + lhs: Box::new(BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: Box::new(Expr::Literal(Literal::Int(7))) + }), + op: Equ, + rhs: Box::new(Expr::Literal(Literal::Int(7))) + } + ) + } + } + + #[test] + fn let_complex() { + let res = test_parse!(expr, "let x = 1; y = x * 7 in (x + y) * 4"); + assert_eq!( + res, + Let { + bindings: vec![ + ( + Ident::try_from("x").unwrap(), + Expr::Literal(Literal::Int(1)) + ), + ( + Ident::try_from("y").unwrap(), + Expr::BinaryOp { + lhs: ident_expr("x"), + op: Mul, + rhs: Box::new(Expr::Literal(Literal::Int(7))) + } + ) + ], + body: Box::new(Expr::BinaryOp { + lhs: Box::new(Expr::BinaryOp { + lhs: ident_expr("x"), + op: Add, + rhs: ident_expr("y"), + }), + op: Mul, + rhs: Box::new(Expr::Literal(Literal::Int(4))), + }) + } + ) + } + + #[test] + fn if_simple() { + let res = test_parse!(expr, "if x == 8 then 9 else 20"); + assert_eq!( + res, + If { + condition: Box::new(BinaryOp { + lhs: ident_expr("x"), + op: Equ, + rhs: Box::new(Expr::Literal(Literal::Int(8))), + }), + then: Box::new(Expr::Literal(Literal::Int(9))), + else_: Box::new(Expr::Literal(Literal::Int(20))) + } + ) + } + + #[test] + fn fn_decl() { + let res = test_parse!(decl, "fn id x = x"); + assert_eq!( + res, + Decl::Fun(Fun { + name: "id".try_into().unwrap(), + args: vec!["x".try_into().unwrap()], + body: *ident_expr("x"), + }) + ) + } +} |