redid the parser, patched up the project too.

2022-07-03 18:01:40 +02:00
parent 6fb4b581b1
commit 2b55fae10d
30 changed files with 967 additions and 570 deletions
--- a/src/parse/comment.rs
+++ b/src/parse/comment.rs
@@ -0,0 +1,13 @@
+pub use chumsky::{self, prelude::*, Parser};
+
+/// Parses Lua-style comments
+pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
+    choice((
+        just("--[").ignore_then(take_until(
+            just("]--").ignored()
+        )),
+        just("--").ignore_then(take_until(
+            just("\n").rewind().ignored().or(end())
+        ))
+    )).map(|(vc, ())| vc).collect().labelled("comment")
+}
--- a/src/parse/enum_parser.rs
+++ b/src/parse/enum_parser.rs
@@ -0,0 +1,26 @@
+#[macro_export]
+macro_rules! enum_parser {
+    ($p:path | $m:tt) => {
+        {
+            ::chumsky::prelude::filter_map(|s, l| {
+                if let $p(x) = l { Ok(x) }
+                else { Err(::chumsky::prelude::Simple::custom(s, $m))}
+            })
+        }
+    };
+    ($p:path >> $q:path; $i:ident) => {
+        {
+            use $p as srcpath;
+            use $q as tgtpath;
+            enum_parser!(srcpath::$i | (concat!("Expected ", stringify!($i)))).map(tgtpath::$i)
+        } 
+    };
+    ($p:path >> $q:path; $($i:ident),+) => {
+        {
+            ::chumsky::prelude::choice((
+                $( enum_parser!($p >> $q; $i) ),+
+            ))
+        }
+    };
+    ($p:path) => { enum_parser!($p | (concat!("Expected ", stringify!($p)))) };
+}
--- a/src/parse/expression.rs
+++ b/src/parse/expression.rs
@@ -1,86 +1,90 @@
-use std::{fmt::Debug};
 use chumsky::{self, prelude::*, Parser};
+use crate::{Clause, Expr, Literal, enum_parser};

-use super::string;
-use super::number;
-use super::misc;
-use super::name;
+use super::{lexer::Lexeme};

-/// An S-expression as read from a source file
-#[derive(Debug, Clone)]
-pub enum Expr {
-    Num(f64),
-    Int(u64),
-    Char(char),
-    Str(String),
-    Name(Vec<String>),
-    S(Vec<Expr>),
-    Lambda(String, Option<Box<Expr>>, Vec<Expr>),
-    Auto(Option<String>, Option<Box<Expr>>, Vec<Expr>),
-    
-    Typed(Box<Expr>, Box<Expr>)
+fn sexpr_parser<P>(
+    expr: P
+) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
+where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
+    Lexeme::paren_parser(expr.repeated()).map(|(del, b)| Clause::S(del, b))
 }

-/// Parse a type annotation
-fn typed_parser<'a>(
-    expr: Recursive<'a, char, Expr, Simple<char>>
-) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
-    just(':').ignore_then(expr)
+fn lambda_parser<P>(
+    expr: P
+) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
+where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
+    just(Lexeme::BS)
+    .then_ignore(enum_parser!(Lexeme::Comment).repeated())
+    .ignore_then(enum_parser!(Lexeme::Name))
+    .then_ignore(enum_parser!(Lexeme::Comment).repeated())
+    .then(
+        just(Lexeme::Type)
+        .then_ignore(enum_parser!(Lexeme::Comment).repeated())
+        .ignore_then(expr.clone().repeated())
+        .then_ignore(enum_parser!(Lexeme::Comment).repeated())
+        .or_not().map(Option::unwrap_or_default)
+    )
+    .then_ignore(just(Lexeme::name(".")))
+    .then_ignore(enum_parser!(Lexeme::Comment).repeated())
+    .then(expr.repeated().at_least(1))
+    .map(|((name, typ), mut body): ((String, Vec<Expr>), Vec<Expr>)| {
+        for ent in &mut body { ent.bind_parameter(&name) };
+        Clause::Lambda(name, typ, body)
+    })
+}
+
+fn auto_parser<P>(
+    expr: P
+) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
+where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
+    just(Lexeme::At)
+    .then_ignore(enum_parser!(Lexeme::Comment).repeated())
+    .ignore_then(enum_parser!(Lexeme::Name).or_not())
+    .then_ignore(enum_parser!(Lexeme::Comment).repeated())
+    .then(
+        just(Lexeme::Type)
+        .then_ignore(enum_parser!(Lexeme::Comment).repeated())
+        .ignore_then(expr.clone().repeated())
+        .then_ignore(enum_parser!(Lexeme::Comment).repeated())
+    )
+    .then_ignore(just(Lexeme::name(".")))
+    .then_ignore(enum_parser!(Lexeme::Comment).repeated())
+    .then(expr.repeated().at_least(1))
+    .try_map(|((name, typ), mut body), s| if name == None && typ.is_empty() {
+        Err(Simple::custom(s, "Auto without name or type has no effect"))
+    } else { 
+        if let Some(n) = &name {
+            for ent in &mut body { ent.bind_parameter(n) }
+        }
+        Ok(Clause::Auto(name, typ, body))
+    })
+}
+
+fn name_parser() -> impl Parser<Lexeme, Vec<String>, Error = Simple<Lexeme>> + Clone {
+    enum_parser!(Lexeme::Name).separated_by(
+        enum_parser!(Lexeme::Comment).repeated()
+        .then(just(Lexeme::NS))
+        .then(enum_parser!(Lexeme::Comment).repeated())
+    ).at_least(1)
 }

 /// Parse an expression without a type annotation
-fn untyped_xpr_parser<'a>(
-    expr: Recursive<'a, char, Expr, Simple<char>>,
-    ops: &[&'a str]
-) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
-    // basic S-expression rule
-    let sexpr = expr.clone()
-        .repeated()
-        .delimited_by(just('('), just(')'))
-        .map(Expr::S);
-    // Blocks
-    // can and therefore do match everything up to the closing paren
-    // \name. body
-    // \name:type. body
-    let lambda = just('\\')
-        .ignore_then(text::ident())
-        .then(typed_parser(expr.clone()).or_not())
-        .then_ignore(just('.'))
-        .then(expr.clone().repeated().at_least(1))
-        .map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body));
-    // @name. body
-    // @name:type. body
-    // @:type. body
-    let auto = just('@')
-        .ignore_then(text::ident().or_not())
-        .then(typed_parser(expr.clone()).or_not())
-        .then_ignore(just('.'))
-        .then(expr.clone().repeated().at_least(1))
-        .map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body));
-    choice((
-        number::int_parser().map(Expr::Int), // all ints are valid floats so it takes precedence
-        number::float_parser().map(Expr::Num),
-        string::char_parser().map(Expr::Char),
-        string::str_parser().map(Expr::Str),
-        name::name_parser(ops).map(Expr::Name), // includes namespacing
-        sexpr,
-        lambda,
-        auto
-    )).padded()
-}
-
-/// Parse any expression with a type annotation, surrounded by comments
-pub fn expression_parser<'a>(ops: &[&'a str]) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
-    // This approach to parsing comments is ugly and error-prone,
-    // but I don't have a lot of other ideas
-    return recursive(|expr| {
-        return misc::comment_parser().or_not().ignore_then(
-            untyped_xpr_parser(expr.clone(), &ops)
-                .then(typed_parser(expr).or_not())
-                .map(|(val, t)| match t {
-                    Some(typ) => Expr::Typed(Box::new(val), Box::new(typ)),
-                    None => val
-                })
-        ).then_ignore(misc::comment_parser().or_not())
+pub fn xpr_parser() -> impl Parser<Lexeme, Expr, Error = Simple<Lexeme>> {
+    recursive(|expr| {
+        let clause = 
+        enum_parser!(Lexeme::Comment).repeated()
+        .ignore_then(choice((
+            enum_parser!(Lexeme >> Literal; Int, Num, Char, Str).map(Clause::Literal),
+            name_parser().map(Clause::Name),
+            sexpr_parser(expr.clone()),
+            lambda_parser(expr.clone()),
+            auto_parser(expr.clone())
+        ))).then_ignore(enum_parser!(Lexeme::Comment).repeated());
+        clause.clone().then(
+            just(Lexeme::Type)
+            .ignore_then(expr.clone()).or_not()
+        )
+        .map(|(val, typ)| Expr(val, typ.map(Box::new)))
    })
 }
--- a/src/parse/import.rs
+++ b/src/parse/import.rs
@@ -1,7 +1,9 @@
 use std::iter;

 use chumsky::{Parser, prelude::*};
-use super::name;
+use crate::{enum_parser, utils::BoxedIter};
+
+use super::lexer::Lexeme;

 #[derive(Debug, Clone)]
 pub struct Import {
@@ -9,15 +11,10 @@ pub struct Import {
    pub name: Option<String>
 }

-
-pub type BoxedStrIter = Box<dyn Iterator<Item = String>>;
-pub type BoxedStrIterIter = Box<dyn Iterator<Item = BoxedStrIter>>;
-
-/// initialize a Box<dyn Iterator<Item = Box<dyn Iterator<Item = String>>>>
-/// with a single element.
-fn init_table(name: String) -> BoxedStrIterIter {
-    // I'm not confident at all that this is a good approach.
-    Box::new(iter::once(Box::new(iter::once(name)) as BoxedStrIter))
+/// initialize a BoxedIter<BoxedIter<String>> with a single element.
+fn init_table(name: String) -> BoxedIter<'static, BoxedIter<'static, String>> {
+    // I'm not at all confident that this is a good approach.
+    Box::new(iter::once(Box::new(iter::once(name)) as BoxedIter<String>))
 }

 /// Parse an import command
@@ -25,29 +22,38 @@ fn init_table(name: String) -> BoxedStrIterIter {
 /// and the delimiters are plain parentheses. Namespaces should preferably contain
 /// crossplatform filename-legal characters but the symbols are explicitly allowed
 /// to go wild. There's a blacklist in [name]
-pub fn import_parser() -> impl Parser<char, Vec<Import>, Error = Simple<char>> {
+pub fn import_parser() -> impl Parser<Lexeme, Vec<Import>, Error = Simple<Lexeme>> {
    // TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad.
-    recursive(|expr: Recursive<char, BoxedStrIterIter, Simple<char>>| {
-        name::modname_parser()
-        .padded()
-        .then_ignore(just("::"))
-        .repeated()
+    recursive(|expr: Recursive<Lexeme, BoxedIter<BoxedIter<String>>, Simple<Lexeme>>| {
+        enum_parser!(Lexeme::Name)
+        .separated_by(just(Lexeme::NS))
        .then(
-            choice((
-                expr.clone()
-                .separated_by(just(','))
-                .delimited_by(just('('), just(')'))
-                .map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter),
-                // Each expr returns a list of imports, flatten those into a common list
-                just("*").map(|s| init_table(s.to_string())), // Just a *, wrapped
-                name::modname_parser().map(init_table) // Just a name, wrapped
-            )).padded()
-        ).map(|(pre, post)| {
-            Box::new(post.map(move |el| {
-                Box::new(pre.clone().into_iter().chain(el)) as BoxedStrIter
-            })) as BoxedStrIterIter
+            just(Lexeme::NS)
+            .ignore_then(
+                choice((
+                    expr.clone()
+                        .separated_by(just(Lexeme::name(",")))
+                        .delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
+                        .map(|v| Box::new(v.into_iter().flatten()) as BoxedIter<BoxedIter<String>>)
+                        .labelled("import group"),
+                    // Each expr returns a list of imports, flatten those into a common list
+                    just(Lexeme::name("*")).map(|_| init_table("*".to_string()))
+                        .labelled("wildcard import"), // Just a *, wrapped
+                    enum_parser!(Lexeme::Name).map(init_table)
+                        .labelled("import terminal") // Just a name, wrapped
+                ))
+            ).or_not()
+        )
+        .map(|(name, opt_post): (Vec<String>, Option<BoxedIter<BoxedIter<String>>>)| -> BoxedIter<BoxedIter<String>> {
+            if let Some(post) = opt_post {
+                Box::new(post.map(move |el| {
+                    Box::new(name.clone().into_iter().chain(el)) as BoxedIter<String>
+                })) as BoxedIter<BoxedIter<String>>
+            } else {
+                Box::new(iter::once(Box::new(name.into_iter()) as BoxedIter<String>))
+            }
        })
-    }).padded().map(|paths| {
+    }).map(|paths| {
        paths.filter_map(|namespaces| {
            let mut path: Vec<String> = namespaces.collect();
            match path.pop()?.as_str() {
@@ -55,5 +61,5 @@ pub fn import_parser() -> impl Parser<char, Vec<Import>, Error = Simple<char>> {
                name => Some(Import { path, name: Some(name.to_owned()) })
            }
        }).collect()
-    })
+    }).labelled("import")
 }
--- a/src/parse/lexer.rs
+++ b/src/parse/lexer.rs
@@ -0,0 +1,134 @@
+use std::{ops::Range, iter};
+use ordered_float::NotNan;
+use chumsky::{Parser, prelude::*, text::whitespace};
+use std::fmt::Debug;
+use crate::utils::BoxedIter;
+
+use super::{number, string, name, comment};
+
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub struct Entry(pub Lexeme, pub Range<usize>);
+impl Debug for Entry {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?}", self.0)
+        // f.debug_tuple("Entry").field(&self.0).field(&self.1).finish()
+    }
+}
+
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub enum Lexeme {
+    Num(NotNan<f64>),
+    Int(u64),
+    Char(char),
+    Str(String),
+    Name(String),
+    Rule(NotNan<f64>),
+    NS, // namespace separator
+    LP(char),
+    RP(char),
+    BS, // Backslash
+    At,
+    Type, // type operator
+    Comment(String)
+}
+
+impl Debug for Lexeme {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Num(n) => write!(f, "{}", n),
+            Self::Int(i) => write!(f, "{}", i),
+            Self::Char(c) => write!(f, "{:?}", c),
+            Self::Str(s) => write!(f, "{:?}", s),
+            Self::Name(name) => write!(f, "{}", name),
+            Self::Rule(prio) => write!(f, "={}=>", prio),
+            Self::NS => write!(f, "::"),
+            Self::LP(l) => write!(f, "{}", l),
+            Self::RP(l) => match l {
+                '(' => write!(f, ")"),
+                '[' => write!(f, "]"),
+                '{' => write!(f, "}}"),
+                _ => f.debug_tuple("RP").field(l).finish()
+            },
+            Self::BS => write!(f, "\\"),
+            Self::At => write!(f, "@"),
+            Self::Type => write!(f, ":"),
+            Self::Comment(text) => write!(f, "--[{}]--", text),
+        }
+    }
+}
+
+impl Lexeme {
+    pub fn name<T: ToString>(n: T) -> Self {
+        Lexeme::Name(n.to_string())
+    }
+    pub fn paren_parser<T, P>(
+        expr: P
+    ) -> impl Parser<Lexeme, (char, T), Error = Simple<Lexeme>> + Clone
+    where P: Parser<Lexeme, T, Error = Simple<Lexeme>> + Clone {
+        choice((
+            expr.clone().delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
+                .map(|t| ('(', t)),
+            expr.clone().delimited_by(just(Lexeme::LP('[')), just(Lexeme::RP('[')))
+                .map(|t| ('[', t)),
+            expr.delimited_by(just(Lexeme::LP('{')), just(Lexeme::RP('{')))
+                .map(|t| ('{', t)),
+        ))
+    }
+}
+
+fn rule_parser() -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
+    just('=').ignore_then(
+        choice((
+            none_of("-0123456789").rewind().to(NotNan::new(0f64).unwrap()),
+            number::float_parser().then_ignore(just("=>"))
+        )).map_err_with_span(|err, span| {
+            panic!("Something's up! {:?} {}", span, err)
+        })
+    )
+}
+
+type LexSubres<'a> = BoxedIter<'a, Entry>;
+
+fn paren_parser<'a>(
+    expr: Recursive<'a, char, LexSubres<'a>, Simple<char>>,
+    lp: char, rp: char
+) -> impl Parser<char, LexSubres<'a>, Error=Simple<char>> + 'a {
+    expr.padded().repeated()
+    .map(|x| Box::new(x.into_iter().flatten()) as LexSubres)
+    .delimited_by(just(lp), just(rp)).map_with_span(move |b, s| {
+        Box::new(
+            iter::once(Entry(Lexeme::LP(lp), s.start..s.start+1))
+            .chain(b)
+            .chain(iter::once(Entry(Lexeme::RP(lp), s.end-1..s.end)))
+        ) as LexSubres
+    })
+}
+
+pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, Vec<Vec<Entry>>, Error=Simple<char>> + 'a
+where T: AsRef<str> + Clone {
+    let all_ops = ops.iter().map(|o| o.as_ref().to_string())
+        .chain(iter::once(".".to_string())).collect::<Vec<_>>();
+    recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
+        choice((
+            paren_parser(recurse.clone(), '(', ')'),
+            paren_parser(recurse.clone(), '[', ']'),
+            paren_parser(recurse.clone(), '{', '}'),
+            choice((
+                rule_parser().map(Lexeme::Rule),
+                comment::comment_parser().map(Lexeme::Comment),
+                just("::").padded().to(Lexeme::NS),
+                just('\\').padded().to(Lexeme::BS),
+                just('@').padded().to(Lexeme::At),
+                just(':').to(Lexeme::Type),
+                number::int_parser().map(Lexeme::Int), // all ints are valid floats so it takes precedence
+                number::float_parser().map(Lexeme::Num),
+                string::char_parser().map(Lexeme::Char),
+                string::str_parser().map(Lexeme::Str),
+                name::name_parser(&all_ops).map(Lexeme::Name), // includes namespacing
+            )).map_with_span(|lx, span| Box::new(iter::once(Entry(lx, span))) as LexSubres)
+        ))
+    }).separated_by(one_of("\t ").repeated())
+    .flatten().collect()
+    .separated_by(just('\n').then(text::whitespace()).ignored())
+    
+}
--- a/src/parse/misc.rs
+++ b/src/parse/misc.rs
@@ -1,8 +0,0 @@
-pub use chumsky::{self, prelude::*, Parser};
-
-/// Parses Lua-style comments
-pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
-    any().repeated().delimited_by(just("--["), just("]--")).or(
-        any().repeated().delimited_by(just("--"), just("\n"))
-    ).map(|vc| vc.iter().collect()).padded()
-}
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -1,18 +1,16 @@
-mod expression;
 mod string;
 mod number;
-mod misc;
-mod import;
 mod name;
-mod substitution;
+mod lexer;
+mod comment;
+mod expression;
 mod sourcefile;
+mod import;
+mod enum_parser;

-pub use substitution::Substitution;
-pub use expression::Expr;
-pub use expression::expression_parser;
 pub use sourcefile::FileEntry;
-pub use sourcefile::file_parser;
+pub use sourcefile::line_parser;
 pub use sourcefile::imports;
-pub use sourcefile::is_op;
 pub use sourcefile::exported_names;
-pub use import::Import;
+pub use lexer::{lexer, Lexeme, Entry as LexerEntry};
+pub use name::is_op;
--- a/src/parse/name.rs
+++ b/src/parse/name.rs
@@ -1,12 +1,14 @@
 use chumsky::{self, prelude::*, Parser};

 /// Matches any one of the passed operators, longest-first
-fn op_parser<'a>(ops: &[&'a str]) -> BoxedParser<'a, char, String, Simple<char>> {
-    let mut sorted_ops = ops.to_vec();
+fn op_parser<'a, T: AsRef<str> + Clone>(ops: &[T]) -> BoxedParser<'a, char, String, Simple<char>> {
+    let mut sorted_ops: Vec<String> = ops.iter().map(|t| t.as_ref().to_string()).collect();
    sorted_ops.sort_by(|a, b| b.len().cmp(&a.len()));
    sorted_ops.into_iter()
-        .map(|op| just(op.to_string()).boxed())
-        .reduce(|a, b| a.or(b).boxed()).unwrap()
+        .map(|op| just(op).boxed())
+        .reduce(|a, b| a.or(b).boxed())
+        .unwrap_or(empty().map(|()| panic!("Empty isn't meant to match")).boxed())
+        .labelled("operator").boxed()
 }

 /// Matches anything that's allowed as an operator
@@ -27,20 +29,31 @@ fn op_parser<'a>(ops: &[&'a str]) -> BoxedParser<'a, char, String, Simple<char>>
 /// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very
 /// common in maths so it's worth a try. Investigate.
 pub fn modname_parser<'a>() -> impl Parser<char, String, Error = Simple<char>> + 'a {
-    let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', '.'];
+    let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', ','];
    filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
        .repeated().at_least(1)
        .collect()
+        .labelled("modname")
 }

 /// Parse an operator or name. Failing both, parse everything up to the next whitespace or
 /// blacklisted character as a new operator.
-pub fn name_parser<'a>(
-    ops: &[&'a str]
-) -> impl Parser<char, Vec<String>, Error = Simple<char>> + 'a {
+pub fn name_parser<'a, T: AsRef<str> + Clone>(
+    ops: &[T]
+) -> impl Parser<char, String, Error = Simple<char>> + 'a {
    choice((
        op_parser(ops), // First try to parse a known operator
-        text::ident(), // Failing that, parse plain text
+        text::ident().labelled("plain text"), // Failing that, parse plain text
        modname_parser() // Finally parse everything until tne next terminal as a new operator
-    )).padded().separated_by(just("::")).padded()
+    ))
+    .labelled("name")
+}
+
+/// Decide if a string can be an operator. Operators can include digits and text, just not at the
+/// start.
+pub fn is_op<T: AsRef<str>>(s: T) -> bool {
+    return match s.as_ref().chars().next() {
+        Some(x) => !x.is_alphanumeric(), 
+        None => false
+    }
 }
--- a/src/parse/number.rs
+++ b/src/parse/number.rs
@@ -1,4 +1,5 @@
 use chumsky::{self, prelude::*, Parser};
+use ordered_float::NotNan;

 fn assert_not_digit(base: u32, c: char) {
    if base > (10 + (c as u32 - 'a' as u32)) {
@@ -51,7 +52,7 @@ fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
 }

 /// returns a mapper that converts a mantissa and an exponent into a float
-fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 {
+fn nat2f(base: u64) -> impl Fn((NotNan<f64>, i32),) -> NotNan<f64> {
    return move |(val, exp)| {
        if exp == 0 {val}
        else {val * (base as f64).powf(exp.try_into().unwrap())}
@@ -77,32 +78,35 @@ pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
 }

 /// parse a float from dot notation
-fn dotted_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
+fn dotted_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
    uint_parser(base)
-    .then_ignore(just('.'))
    .then(
-        text::digits(base).then(separated_digits_parser(base))
-    ).map(move |(wh, (frac1, frac2))| {
-        let frac = frac1 + &frac2;
-        let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
-        let dexp = base.pow(frac.len().try_into().unwrap());
-        wh as f64 + (frac_num / dexp as f64)
+        just('.').ignore_then(
+            text::digits(base).then(separated_digits_parser(base))
+        ).map(move |(frac1, frac2)| {
+            let frac = frac1 + &frac2;
+            let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
+            let dexp = base.pow(frac.len().try_into().unwrap());
+            frac_num / dexp as f64
+        }).or_not().map(|o| o.unwrap_or_default())
+    ).try_map(|(wh, f), s| {
+        NotNan::new(wh as f64 + f).map_err(|_| Simple::custom(s, "Float literal evaluates to NaN"))
    })
 }

 /// parse a float from dotted and optionally also exponential notation
-fn pow_float_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
+fn pow_float_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
    assert_not_digit(base, 'p');
    dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
 }

 /// parse a float with dotted and optionally exponential notation from a base determined by its
 /// prefix
-pub fn float_parser() -> impl Parser<char, f64, Error = Simple<char>> {
+pub fn float_parser() -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
    choice((
        just("0b").ignore_then(pow_float_parser(2)),
        just("0x").ignore_then(pow_float_parser(16)),
        just('0').ignore_then(pow_float_parser(8)),
        pow_float_parser(10),
-    ))
+    )).labelled("float")
 }
--- a/src/parse/substitution.rs
+++ b/src/parse/substitution.rs
@@ -3,7 +3,7 @@ use chumsky::{self, prelude::*, Parser};
 use super::{expression, number::float_parser};

 #[derive(Debug, Clone)]
-pub struct Substitution {
+pub struct Rule {
    pub source: expression::Expr,
    pub priority: f64,
    pub target: expression::Expr
@@ -19,15 +19,16 @@ pub struct Substitution {
 /// shadow_reee =0.9=> reee
 /// ```
 /// TBD whether this disables reee in the specified range or loops forever
-pub fn substitution_parser<'a>(
-    pattern_ops: &[&'a str],
-    ops: &[&'a str]
-) -> impl Parser<char, Substitution, Error = Simple<char>> + 'a {
-    expression::expression_parser(pattern_ops)
+pub fn rule_parser<'a, T: 'a + AsRef<str> + Clone>(
+    pattern_ops: &[T],
+    ops: &[T]
+) -> impl Parser<char, Rule, Error = Simple<char>> + 'a {
+    expression::expression_parser(pattern_ops).padded()
        .then_ignore(just('='))
        .then(
            float_parser().then_ignore(just("=>"))
            .or_not().map(|prio| prio.unwrap_or(0.0))
-        ).then(expression::expression_parser(ops))
-        .map(|((source, priority), target)| Substitution { source, priority, target })
+        ).then(expression::expression_parser(ops).padded())
+        .map(|((source, priority), target)| Rule { source, priority, target })
+        .labelled("rule")
 }
--- a/src/parse/sourcefile.rs
+++ b/src/parse/sourcefile.rs
@@ -1,20 +1,25 @@
 use std::collections::HashSet;
+use std::fs::File;
 use std::iter;

-use super::expression::Expr;
+use crate::{enum_parser, Expr, Clause};
+use crate::utils::BoxedIter;
+
+use super::expression::xpr_parser;
 use super::import;
-use super::misc;
-use super::substitution::substitution_parser;
-use super::substitution::Substitution;
+use super::import::import_parser;
+use super::lexer::Lexeme;
+use super::name;
 use chumsky::{Parser, prelude::*};
+use ordered_float::NotNan;

 /// Anything we might encounter in a file
 #[derive(Debug, Clone)]
 pub enum FileEntry {
    Import(Vec<import::Import>),
    Comment(String),
-    Substitution(Substitution),
-    Export(Substitution)
+    Rule(Vec<Expr>, NotNan<f64>, Vec<Expr>),
+    Export(Vec<Expr>, NotNan<f64>, Vec<Expr>)
 }

 /// Recursively iterate through all "names" in an expression. It also finds a lot of things that
@@ -22,19 +27,22 @@ pub enum FileEntry {
 /// sophisticated search.
 /// 
 /// TODO: find a way to exclude parameters
-fn find_all_names_recur(expr: &Expr) -> Box<dyn Iterator<Item = &Vec<String>> + '_> {
-    match expr {
-        Expr::Auto(_, typ, body) | Expr::Lambda(_, typ, body) => Box::new(match typ {
-            Some(texp) => find_all_names_recur(texp),
-            None => Box::new(iter::empty())
-        }.chain(body.into_iter().map(find_all_names_recur).flatten())),
-        Expr::S(body) => Box::new(body.into_iter().map(find_all_names_recur).flatten()),
-        Expr::Typed(val, typ) => Box::new(
-            find_all_names_recur(val).chain(find_all_names_recur(typ))
+fn find_all_names_recur<'a>(expr: &'a Expr) -> BoxedIter<&'a Vec<String>> {
+    let proc_clause = |clause: &'a Clause| match clause {
+        Clause::Auto(_, typ, body) | Clause::Lambda(_, typ, body) => Box::new(
+            typ.iter().flat_map(find_all_names_recur)
+            .chain(body.iter().flat_map(find_all_names_recur))
+        ) as BoxedIter<&'a Vec<String>>,
+        Clause::S(_, body) => Box::new(
+            body.iter().flat_map(find_all_names_recur)
        ),
-        Expr::Name(x) => Box::new(iter::once(x)),
+        Clause::Name(x) => Box::new(iter::once(x)),
        _ => Box::new(iter::empty())
-    }
+    };
+    let Expr(val, typ) = expr;
+    if let Some(t) = typ {
+        Box::new(proc_clause(val).chain(find_all_names_recur(t)))
+    } else { proc_clause(val) }
 }

 /// Collect all names that occur in an expression
@@ -42,62 +50,69 @@ fn find_all_names(expr: &Expr) -> HashSet<&Vec<String>> {
    find_all_names_recur(expr).collect()
 }

-/// Parse a file into a list of distinctive entries
-pub fn file_parser<'a>(
-    pattern_ops: &[&'a str], ops: &[&'a str]
-) -> impl Parser<char, Vec<FileEntry>, Error = Simple<char>> + 'a {
-    choice((
-        // In case the usercode wants to parse doc
-        misc::comment_parser().map(FileEntry::Comment),
-        import::import_parser().map(FileEntry::Import),
-        text::keyword("export")
-            .ignore_then(substitution_parser(pattern_ops, ops)).map(FileEntry::Export),
-        // This could match almost anything so it has to go last
-        substitution_parser(pattern_ops, ops).map(FileEntry::Substitution)
-    )).padded()
-    .separated_by(just('\n'))
-    .then_ignore(end())
+fn rule_parser() -> impl Parser<Lexeme, (Vec<Expr>, NotNan<f64>, Vec<Expr>), Error = Simple<Lexeme>> {
+    xpr_parser().repeated()
+        .then(enum_parser!(Lexeme::Rule))
+        .then(xpr_parser().repeated())
+        // .map(|((lhs, prio), rhs)| )
+        .map(|((a, b), c)| (a, b, c))
+        .labelled("Rule")
 }

-/// Decide if a string can be an operator. Operators can include digits and text, just not at the
-/// start.
-pub fn is_op(s: &str) -> bool {
-    return match s.chars().next() {
-        Some(x) => !x.is_alphanumeric(), 
-        None => false
-    }
+pub fn line_parser() -> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> {
+    choice((
+        // In case the usercode wants to parse doc
+        enum_parser!(Lexeme >> FileEntry; Comment),
+        just(Lexeme::name("import"))
+            .ignore_then(import_parser().map(FileEntry::Import))
+            .then_ignore(enum_parser!(Lexeme::Comment)),
+        just(Lexeme::name("export")).map_err_with_span(|e, s| {
+            println!("{:?} could not yield an export", s); e
+        })
+            .ignore_then(rule_parser())
+            .map(|(lhs, prio, rhs)| FileEntry::Export(lhs, prio, rhs)),
+        // This could match almost anything so it has to go last
+        rule_parser().map(|(lhs, prio, rhs)| FileEntry::Rule(lhs, prio, rhs)),
+    ))
 }

 /// Collect all exported names (and a lot of other words) from a file
 pub fn exported_names(src: &Vec<FileEntry>) -> HashSet<&Vec<String>> {
-    src.iter().filter_map(|ent| match ent {
-        FileEntry::Export(a) => Some(&a.source),
-        _ => None
+    src.iter().flat_map(|ent| match ent {
+        FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())) as BoxedIter<&Expr>,
+        _ => Box::new(iter::empty())
    }).map(find_all_names).flatten().collect()
 }

+
+// #[allow(dead_code)]
 /// Collect all operators defined in a file (and some other words)
 fn defined_ops(src: &Vec<FileEntry>, exported_only: bool) -> Vec<&String> {
-    let all_names:HashSet<&Vec<String>> = src.iter().filter_map(|ent| match ent {
-        FileEntry::Substitution(a) => if exported_only {None} else {Some(&a.source)},
-        FileEntry::Export(a) => Some(&a.source),
-        _ => None
+    let all_names:HashSet<&Vec<String>> = src.iter().flat_map(|ent| match ent {
+        FileEntry::Rule(s, _, d) =>
+            if exported_only {Box::new(iter::empty()) as BoxedIter<&Expr>}
+            else {Box::new(s.iter().chain(d.iter()))}
+        FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())),
+        _ => Box::new(iter::empty())
    }).map(find_all_names).flatten().collect();
    // Dedupe stage of dubious value; collecting into a hashset may take longer than
    // handling duplicates would with a file of sensible size.
    all_names.into_iter()
        .filter_map(|name|
            // If it's namespaced, it's imported.
-            if name.len() == 1 && is_op(&name[0]) {Some(&name[0])}
+            if name.len() == 1 && name::is_op(&name[0]) {Some(&name[0])}
            else {None}
        ).collect()
 }

+// #[allow(dead_code)]
 /// Collect all operators from a file
 pub fn all_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, false) }
+// #[allow(dead_code)]
 /// Collect exported operators from a file (plus some extra)
 pub fn exported_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, true) }

+
 /// Summarize all imports from a file in a single list of qualified names 
 pub fn imports<'a, 'b, I>(
    src: I