From ec1734e1138ec49b23d0d2e5fd23254673369a8c Mon Sep 17 00:00:00 2001
From: Lawrence Bethlenfalvy <lbfalvy@protonmail.com>
Date: Mon, 30 May 2022 05:21:00 +0200
Subject: [PATCH] Difficult ownership questions

---
 README.md                    |  18 +++---
 src/main.rs                  |   4 +-
 src/parse/expression.rs      |  55 +++++++++++-------
 src/parse/import.rs          |  77 ++++++++++++-------------
 src/parse/misc.rs            |   1 +
 src/parse/mod.rs             |   7 +++
 src/parse/name.rs            |  44 +++++++++-----
 src/parse/number.rs          |  20 +++++++
 src/parse/sourcefile.rs      | 107 +++++++++++++++++++++++++++++++++++
 src/parse/string.rs          |   6 +-
 src/parse/substitution.rs    |  24 ++++++--
 src/project/mod.rs           |  53 +++++++++++++++++
 src/project/resolve_names.rs |  87 ++++++++++++++++++++++++++++
 src/utils/cache.rs           |  25 ++++++++
 src/utils/mod.rs             |   2 +
 15 files changed, 441 insertions(+), 89 deletions(-)
 create mode 100644 src/parse/sourcefile.rs
 create mode 100644 src/project/mod.rs
 create mode 100644 src/project/resolve_names.rs
 create mode 100644 src/utils/cache.rs
 create mode 100644 src/utils/mod.rs
diff --git a/README.md b/README.md
index 3781334..965bc6a 100644
--- a/README.md
+++ b/README.md
@@ -262,14 +262,14 @@ the purposes of substitution.
 This is very far away so I don't want to make promises, but I have some
 ideas. 
 
-[ ] early execution of functions on any subset of their arguments where it
-    could provide substantial speedup
-[ ] tracking copies of expressions and evaluating them only once
-[ ] Many cases of single recursion converted to loops
-    [ ] tail recursion
-    [ ] 2 distinct loops where the tail doesn't use the arguments
-        [ ] reorder operations to favour this scenario 
-[ ] reactive calculation of values that are deemed to be read more often
+- [ ] early execution of functions on any subset of their arguments where
+    it could provide substantial speedup
+- [ ] tracking copies of expressions and evaluating them only once
+- [ ] Many cases of single recursion converted to loops
+    - [ ] tail recursion
+    - [ ] 2 distinct loops where the tail doesn't use the arguments
+        - [ ] reorder operations to favour this scenario 
+- [ ] reactive calculation of values that are deemed to be read more often
     than written
-[ ] automatic profiling based on performance metrics generated by debug
+- [ ] automatic profiling based on performance metrics generated by debug
     builds
\ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
index e0e31ab..18ee30d 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,12 +3,14 @@ use std::io::{self, Read};
 use chumsky::{Parser, prelude::*};
 
 mod parse;
+mod project;
+mod utils;
 
 fn main() {
     let mut input = String::new();
     let mut stdin = io::stdin();
     stdin.read_to_string(&mut input).unwrap();
-    let ops: Vec<String> = vec!["$", "."].iter().map(|&s| s.to_string()).collect();
+    let ops: Vec<&str> = vec!["$", "."];
     let output = parse::expression_parser(&ops).then_ignore(end()).parse(input);
     println!("\nParsed:\n{:?}", output);
 }
diff --git a/src/parse/expression.rs b/src/parse/expression.rs
index 35da5a2..da1210f 100644
--- a/src/parse/expression.rs
+++ b/src/parse/expression.rs
@@ -6,63 +6,76 @@ use super::number;
 use super::misc;
 use super::name;
 
-#[derive(Debug)]
+/// An S-expression as read from a source file
+#[derive(Debug, Clone)]
 pub enum Expr {
     Num(f64),
     Int(u64),
     Char(char),
     Str(String),
-    Name(String),
+    Name(Vec<String>),
     S(Vec<Expr>),
     Lambda(String, Option<Box<Expr>>, Vec<Expr>),
     Auto(Option<String>, Option<Box<Expr>>, Vec<Expr>),
     Typed(Box<Expr>, Box<Expr>)
 }
 
+/// Parse a type annotation
 fn typed_parser<'a>(
-    expr: Recursive<'a, char, Expr, Simple<char>>,
-    ops: &'a [String]
+    expr: Recursive<'a, char, Expr, Simple<char>>
 ) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
     just(':').ignore_then(expr)
 }
 
+/// Parse an expression without a type annotation
 fn untyped_xpr_parser<'a>(
     expr: Recursive<'a, char, Expr, Simple<char>>,
-    ops: &'a [String]
+    ops: &[&'a str]
 ) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
-    let lambda = just('\\')
-        .ignore_then(name::name_parser(ops))
-        .then(typed_parser(expr.clone(), ops).or_not())
-        .then_ignore(just('.'))
-        .then(expr.clone().repeated().at_least(1))
-        .map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body));
-    let auto = just('@')
-        .ignore_then(name::name_parser(ops).or_not())
-        .then(typed_parser(expr.clone(), ops).or_not())
-        .then_ignore(just('.'))
-        .then(expr.clone().repeated().at_least(1))
-        .map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body));
+    // basic S-expression rule
     let sexpr = expr.clone()
         .repeated()
         .delimited_by(just('('), just(')'))
         .map(Expr::S);
+    // Blocks
+    // can and therefore do match everything up to the closing paren
+    // \name. body
+    // \name:type. body
+    let lambda = just('\\')
+        .ignore_then(text::ident())
+        .then(typed_parser(expr.clone()).or_not())
+        .then_ignore(just('.'))
+        .then(expr.clone().repeated().at_least(1))
+        .map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body));
+    // @name. body
+    // @name:type. body
+    // @:type. body
+    let auto = just('@')
+        .ignore_then(text::ident().or_not())
+        .then(typed_parser(expr.clone()).or_not())
+        .then_ignore(just('.'))
+        .then(expr.clone().repeated().at_least(1))
+        .map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body));
     choice((
+        number::int_parser().map(Expr::Int), // all ints are valid floats so it takes precedence
         number::float_parser().map(Expr::Num),
-        number::int_parser().map(Expr::Int),
         string::char_parser().map(Expr::Char),
         string::str_parser().map(Expr::Str),
-        name::name_parser(ops).map(Expr::Name),
+        name::name_parser(ops).map(Expr::Name), // includes namespacing
         sexpr,
         lambda,
         auto
     )).padded()
 }
 
-pub fn expression_parser(ops: &[String]) -> impl Parser<char, Expr, Error = Simple<char>> + '_ {
+/// Parse any expression with a type annotation, surrounded by comments
+pub fn expression_parser<'a>(ops: &[&'a str]) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
+    // This approach to parsing comments is ugly and error-prone,
+    // but I don't have a lot of other ideas
     return recursive(|expr| {
         return misc::comment_parser().or_not().ignore_then(
             untyped_xpr_parser(expr.clone(), &ops)
-                .then(typed_parser(expr, ops).or_not())
+                .then(typed_parser(expr).or_not())
                 .map(|(val, t)| match t {
                     Some(typ) => Expr::Typed(Box::new(val), Box::new(typ)),
                     None => val
diff --git a/src/parse/import.rs b/src/parse/import.rs
index 57d0c75..21d85ee 100644
--- a/src/parse/import.rs
+++ b/src/parse/import.rs
@@ -1,57 +1,58 @@
-use chumsky::{Parser, prelude::*, text::Character};
+use std::iter;
+
+use chumsky::{Parser, prelude::*};
 use super::name;
 
-enum Import {
-    Name(Vec<String>, String),
-    All(Vec<String>)
-}
-
-fn prefix(pre: Vec<String>, im: Import) -> Import {
-    match im {
-        Import::Name(ns, name) => Import::Name(
-            pre.into_iter().chain(ns.into_iter()).collect(),
-            name
-        ),
-        Import::All(ns) => Import::All(
-            pre.into_iter().chain(ns.into_iter()).collect()
-        )
-    }
+#[derive(Debug, Clone)]
+pub struct Import {
+    pub path: Vec<String>,
+    pub name: Option<String>
 }
 
 
-type BoxedStrIter = Box<dyn Iterator<Item = String>>;
-type BoxedStrIterIter = Box<dyn Iterator<Item = BoxedStrIter>>;
+pub type BoxedStrIter = Box<dyn Iterator<Item = String>>;
+pub type BoxedStrIterIter = Box<dyn Iterator<Item = BoxedStrIter>>;
 
+/// initialize a Box<dyn Iterator<Item = Box<dyn Iterator<Item = String>>>>
+/// with a single element.
 fn init_table(name: String) -> BoxedStrIterIter {
-    Box::new(vec![Box::new(vec![name].into_iter()) as BoxedStrIter].into_iter())
+    // I'm not confident at all that this is a good approach.
+    Box::new(iter::once(Box::new(iter::once(name)) as BoxedStrIter))
 }
 
+/// Parse an import command
+/// Syntax is same as Rust's `use` except the verb is import, no trailing semi
+/// and the delimiters are plain parentheses. Namespaces should preferably contain
+/// crossplatform filename-legal characters but the symbols are explicitly allowed
+/// to go wild. There's a blacklist in [name]
 pub fn import_parser() -> impl Parser<char, Vec<Import>, Error = Simple<char>> {
+    // TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad.
     recursive(|expr: Recursive<char, BoxedStrIterIter, Simple<char>>| {
         name::modname_parser()
-            .padded()
-            .then_ignore(just("::"))
-            .repeated()
-            .then(
-                choice((
-                    expr.clone()
-                        .separated_by(just(','))
-                        .delimited_by(just('('), just(')'))
-                        .map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter),
-                    just("*").map(|s| init_table(s.to_string())),
-                    name::modname_parser().map(init_table)
-                )).padded()
-            ).map(|(pre, post)| {
-                Box::new(post.map(move |el| {
-                    Box::new(pre.clone().into_iter().chain(el)) as BoxedStrIter
-                })) as BoxedStrIterIter
-            })
+        .padded()
+        .then_ignore(just("::"))
+        .repeated()
+        .then(
+            choice((
+                expr.clone()
+                .separated_by(just(','))
+                .delimited_by(just('('), just(')'))
+                .map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter),
+                // Each expr returns a list of imports, flatten those into a common list
+                just("*").map(|s| init_table(s.to_string())), // Just a *, wrapped
+                name::modname_parser().map(init_table) // Just a name, wrapped
+            )).padded()
+        ).map(|(pre, post)| {
+            Box::new(post.map(move |el| {
+                Box::new(pre.clone().into_iter().chain(el)) as BoxedStrIter
+            })) as BoxedStrIterIter
+        })
     }).padded().map(|paths| {
         paths.filter_map(|namespaces| {
             let mut path: Vec<String> = namespaces.collect();
             match path.pop()?.as_str() {
-                "*" => Some(Import::All(path)),
-                name => Some(Import::Name(path, name.to_owned()))
+                "*" => Some(Import { path, name: None }),
+                name => Some(Import { path, name: Some(name.to_owned()) })
             }
         }).collect()
     })
diff --git a/src/parse/misc.rs b/src/parse/misc.rs
index fbe905d..9ad2a09 100644
--- a/src/parse/misc.rs
+++ b/src/parse/misc.rs
@@ -1,5 +1,6 @@
 pub use chumsky::{self, prelude::*, Parser};
 
+/// Parses Lua-style comments
 pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
     any().repeated().delimited_by(just("--["), just("]--")).or(
         any().repeated().delimited_by(just("--"), just("\n"))
diff --git a/src/parse/mod.rs b/src/parse/mod.rs
index 1d7b7f7..4867adf 100644
--- a/src/parse/mod.rs
+++ b/src/parse/mod.rs
@@ -5,5 +5,12 @@ mod misc;
 mod import;
 mod name;
 mod substitution;
+mod sourcefile;
 
+pub use expression::Expr;
 pub use expression::expression_parser;
+pub use sourcefile::FileEntry;
+pub use sourcefile::file_parser;
+pub use sourcefile::imports;
+pub use sourcefile::exported_names;
+pub use import::Import;
\ No newline at end of file
diff --git a/src/parse/name.rs b/src/parse/name.rs
index c8dfa67..497dec7 100644
--- a/src/parse/name.rs
+++ b/src/parse/name.rs
@@ -1,28 +1,46 @@
 use chumsky::{self, prelude::*, Parser};
 
-fn op_parser_recur<'a, 'b>(ops: &'a [String]) -> BoxedParser<'b, char, String, Simple<char>> {
-    if ops.len() == 1 { just(ops[0].clone()).boxed() }
-    else { just(ops[0].clone()).or(op_parser_recur(&ops[1..])).boxed() }
-}
-
-fn op_parser(ops: &[String]) -> BoxedParser<char, String, Simple<char>> {
+/// Matches any one of the passed operators, longest-first
+fn op_parser<'a>(ops: &[&'a str]) -> BoxedParser<'a, char, String, Simple<char>> {
     let mut sorted_ops = ops.to_vec();
     sorted_ops.sort_by(|a, b| b.len().cmp(&a.len()));
-    op_parser_recur(&sorted_ops)
+    sorted_ops.into_iter()
+        .map(|op| just(op.to_string()).boxed())
+        .reduce(|a, b| a.or(b).boxed()).unwrap()
 }
 
-pub fn modname_parser() -> impl Parser<char, String, Error = Simple<char>> {
-    let not_name_char: Vec<char> = vec![':', '\\', '"', '\'', '(', ')', '.'];
+/// Matches anything that's allowed as an operator
+/// 
+/// Blacklist rationale:
+/// - `:` is used for namespacing and type annotations, both are distinguished from operators
+/// - `\` and `@` are parametric expression starters
+/// - `"` and `'` are read as primitives and would never match.
+/// - `(` and `)` are strictly balanced and this must remain the case for automation and streaming.
+/// - `.` is the discriminator for parametrics.
+/// 
+/// FIXME: `@name` without a dot should be parsed correctly for overrides. Could be an operator but
+/// then parametrics should take precedence, which might break stuff. investigate.
+/// 
+/// TODO: `'` could work as an operator whenever it isn't closed. It's common im maths so it's
+/// worth a try
+/// 
+/// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very
+/// common in maths so it's worth a try. Investigate.
+pub fn modname_parser<'a>() -> impl Parser<char, String, Error = Simple<char>> + 'a {
+    let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', '.'];
     filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
         .repeated().at_least(1)
         .collect()
 }
 
-pub fn name_parser<'a>(ops: &'a [String]) -> impl Parser<char, String, Error = Simple<char>> + 'a {
+/// Parse an operator or name. Failing both, parse everything up to the next whitespace or
+/// blacklisted character as a new operator.
+pub fn name_parser<'a>(
+    ops: &[&'a str]
+) -> impl Parser<char, Vec<String>, Error = Simple<char>> + 'a {
     choice((
         op_parser(ops), // First try to parse a known operator
         text::ident(), // Failing that, parse plain text
-        // Finally parse everything until tne next terminal as a new operator
-        modname_parser()
-    )).padded()
+        modname_parser() // Finally parse everything until tne next terminal as a new operator
+    )).padded().separated_by(just("::")).padded()
 }
\ No newline at end of file
diff --git a/src/parse/number.rs b/src/parse/number.rs
index dde4e54..c40a7f2 100644
--- a/src/parse/number.rs
+++ b/src/parse/number.rs
@@ -6,6 +6,9 @@ fn assert_not_digit(base: u32, c: char) {
     }
 }
 
+/// Parse an arbitrarily grouped sequence of digits starting with an underscore.
+/// 
+/// TODO: this should use separated_by and parse the leading group too
 fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simple<char>> {
     just('_')
         .ignore_then(text::digits(base))
@@ -13,6 +16,9 @@ fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simpl
         .map(|sv| sv.iter().map(|s| s.chars()).flatten().collect())
 }
 
+/// parse a grouped uint
+/// 
+/// Not to be confused with [int_parser] which does a lot more
 fn uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
     text::int(base)
         .then(separated_digits_parser(base))
@@ -21,6 +27,8 @@ fn uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
         })
 }
 
+/// parse exponent notation, or return 0 as the default exponent.
+/// The exponent is always in decimal. 
 fn pow_parser() -> impl Parser<char, i32, Error = Simple<char>> {
     return choice((
         just('p')
@@ -32,6 +40,9 @@ fn pow_parser() -> impl Parser<char, i32, Error = Simple<char>> {
     )).or_else(|_| Ok(0))
 }
 
+/// returns a mapper that converts a mantissa and an exponent into an uint
+/// 
+/// TODO it panics if it finds a negative exponent
 fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
     return move |(val, exp)| {
         if exp == 0 {val}
@@ -39,6 +50,7 @@ fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
     };
 }
 
+/// returns a mapper that converts a mantissa and an exponent into a float
 fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 {
     return move |(val, exp)| {
         if exp == 0 {val}
@@ -46,11 +58,15 @@ fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 {
     }
 }
 
+/// parse an uint from exponential notation (panics if 'p' is a digit in base)
 fn pow_uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
     assert_not_digit(base, 'p');
     uint_parser(base).then(pow_parser()).map(nat2u(base.into()))
 }
 
+/// parse an uint from a base determined by its prefix or lack thereof
+/// 
+/// Not to be convused with [uint_parser] which is a component of it.
 pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
     choice((
         just("0b").ignore_then(pow_uint_parser(2)),
@@ -60,6 +76,7 @@ pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
     ))
 }
 
+/// parse a float from dot notation
 fn dotted_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
     uint_parser(base)
     .then_ignore(just('.'))
@@ -73,11 +90,14 @@ fn dotted_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
     })
 }
 
+/// parse a float from dotted and optionally also exponential notation
 fn pow_float_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
     assert_not_digit(base, 'p');
     dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
 }
 
+/// parse a float with dotted and optionally exponential notation from a base determined by its
+/// prefix
 pub fn float_parser() -> impl Parser<char, f64, Error = Simple<char>> {
     choice((
         just("0b").ignore_then(pow_float_parser(2)),
diff --git a/src/parse/sourcefile.rs b/src/parse/sourcefile.rs
new file mode 100644
index 0000000..f891a37
--- /dev/null
+++ b/src/parse/sourcefile.rs
@@ -0,0 +1,107 @@
+use std::collections::HashSet;
+use std::iter;
+
+use super::expression::Expr;
+use super::import;
+use super::misc;
+use super::substitution::substitution_parser;
+use super::substitution::Substitution;
+use chumsky::{Parser, prelude::*};
+
+/// Anything we might encounter in a file
+#[derive(Debug, Clone)]
+pub enum FileEntry {
+    Import(Vec<import::Import>),
+    Comment(String),
+    Substitution(Substitution),
+    Export(Substitution)
+}
+
+/// Recursively iterate through all "names" in an expression. It also finds a lot of things that
+/// aren't names, such as all bound parameters. Generally speaking, this is not a very
+/// sophisticated search.
+/// 
+/// TODO: find a way to exclude parameters
+fn find_all_names_recur(expr: &Expr) -> Box<dyn Iterator<Item = &Vec<String>> + '_> {
+    match expr {
+        Expr::Auto(_, typ, body) | Expr::Lambda(_, typ, body) => Box::new(match typ {
+            Some(texp) => find_all_names_recur(texp),
+            None => Box::new(iter::empty())
+        }.chain(body.into_iter().map(find_all_names_recur).flatten())),
+        Expr::S(body) => Box::new(body.into_iter().map(find_all_names_recur).flatten()),
+        Expr::Typed(val, typ) => Box::new(
+            find_all_names_recur(val).chain(find_all_names_recur(typ))
+        ),
+        Expr::Name(x) => Box::new(iter::once(x)),
+        _ => Box::new(iter::empty())
+    }
+}
+
+/// Collect all names that occur in an expression
+fn find_all_names(expr: &Expr) -> HashSet<&Vec<String>> {
+    find_all_names_recur(expr).collect()
+}
+
+/// Parse a file into a list of distinctive entries
+pub fn file_parser<'a>(
+    pattern_ops: &[&'a str], ops: &[&'a str]
+) -> impl Parser<char, Vec<FileEntry>, Error = Simple<char>> + 'a {
+    choice((
+        // In case the usercode wants to parse doc
+        misc::comment_parser().map(FileEntry::Comment),
+        import::import_parser().map(FileEntry::Import),
+        text::keyword("export")
+            .ignore_then(substitution_parser(pattern_ops, ops)).map(FileEntry::Export),
+        // This could match almost anything so it has to go last
+        substitution_parser(pattern_ops, ops).map(FileEntry::Substitution)
+    )).padded()
+    .separated_by(just('\n'))
+    .then_ignore(end())
+}
+
+/// Decide if a string can be an operator. Operators can include digits and text, just not at the
+/// start.
+fn is_op(s: &str) -> bool {
+    return match s.chars().next() {
+        Some(x) => !x.is_alphanumeric(), 
+        None => false
+    }
+}
+
+/// Collect all exported names (and a lot of other words) from a file
+pub fn exported_names(src: &Vec<FileEntry>) -> HashSet<&Vec<String>> {
+    src.iter().filter_map(|ent| match ent {
+        FileEntry::Export(a) => Some(&a.source),
+        _ => None
+    }).map(find_all_names).flatten().collect()
+}
+
+/// Collect all operators defined in a file (and some other words)
+fn defined_ops(src: &Vec<FileEntry>, exported_only: bool) -> Vec<&String> {
+    let all_names:HashSet<&Vec<String>> = src.iter().filter_map(|ent| match ent {
+        FileEntry::Substitution(a) => if exported_only {None} else {Some(&a.source)},
+        FileEntry::Export(a) => Some(&a.source),
+        _ => None
+    }).map(find_all_names).flatten().collect();
+    // Dedupe stage of dubious value; collecting into a hashset may take longer than
+    // handling duplicates would with a file of sensible size.
+    all_names.into_iter()
+        .filter_map(|name|
+            // If it's namespaced, it's imported.
+            if name.len() == 1 && is_op(&name[0]) {Some(&name[0])}
+            else {None}
+        ).collect()
+}
+
+/// Collect all operators from a file
+pub fn all_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, false) }
+/// Collect exported operators from a file (plus some extra)
+pub fn exported_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, true) }
+
+/// Summarize all imports from a file in a single list of qualified names 
+pub fn imports(src: &Vec<FileEntry>) -> Vec<&import::Import> {
+    src.into_iter().filter_map(|ent| match ent {
+        FileEntry::Import(impv) => Some(impv.iter()),
+        _ => None
+    }).flatten().collect()
+}
\ No newline at end of file
diff --git a/src/parse/string.rs b/src/parse/string.rs
index b74014d..3b66150 100644
--- a/src/parse/string.rs
+++ b/src/parse/string.rs
@@ -1,6 +1,8 @@
 use chumsky::{self, prelude::*, Parser};
 
+/// Parses a text character that is not the specified delimiter
 fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
+    // Copied directly from Chumsky's JSON example.
     let escape = just('\\').ignore_then(
         just('\\')
             .or(just('/'))
@@ -27,15 +29,17 @@ fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
     filter(move |&c| c != '\\' && c != delim).or(escape)
 }
 
+/// Parse a character literal between single quotes
 pub fn char_parser() -> impl Parser<char, char, Error = Simple<char>> {
     just('\'').ignore_then(text_parser('\'')).then_ignore(just('\''))
 }
 
+/// Parse a string between double quotes
 pub fn str_parser() -> impl Parser<char, String, Error = Simple<char>> {
     just('"')
     .ignore_then(
         text_parser('"').map(Some)
-        .or(just("\\\n").map(|_| None))
+        .or(just("\\\n").map(|_| None)) // Newlines preceded by backslashes are ignored.
         .repeated()
     ).then_ignore(just('"'))
     .flatten().collect()
diff --git a/src/parse/substitution.rs b/src/parse/substitution.rs
index ccc60c8..fb93758 100644
--- a/src/parse/substitution.rs
+++ b/src/parse/substitution.rs
@@ -2,16 +2,28 @@ use chumsky::{self, prelude::*, Parser};
 
 use super::{expression, number::float_parser};
 
+#[derive(Debug, Clone)]
 pub struct Substitution {
-    source: expression::Expr,
-    priority: f64,
-    target: expression::Expr
+    pub source: expression::Expr,
+    pub priority: f64,
+    pub target: expression::Expr
 }
 
-pub fn substitutionParser<'a>(
-    ops: &'a [String]
+/// Parses a substitution rule of the forms
+/// 
+/// ```orchid
+/// main = \x. ...
+/// $a + $b = (add $a $b)
+/// (foo bar baz) =1.1=> (foo 1 e)
+/// reee =2=> shadow_reee
+/// shadow_reee =0.9=> reee
+/// ```
+/// TBD whether this disables reee in the specified range or loops forever
+pub fn substitution_parser<'a>(
+    pattern_ops: &[&'a str],
+    ops: &[&'a str]
 ) -> impl Parser<char, Substitution, Error = Simple<char>> + 'a {
-    expression::expression_parser(ops)
+    expression::expression_parser(pattern_ops)
         .then_ignore(just('='))
         .then(
             float_parser().then_ignore(just("=>"))
diff --git a/src/project/mod.rs b/src/project/mod.rs
new file mode 100644
index 0000000..c0eafd0
--- /dev/null
+++ b/src/project/mod.rs
@@ -0,0 +1,53 @@
+use std::collections::HashMap;
+
+mod resolve_names;
+
+
+#[derive(Debug, Clone)]
+pub struct Project {
+    pub modules: HashMap<Vec<String>, Module>,
+}
+
+#[derive(Debug, Clone)]
+pub struct Export {
+    isSymbol: bool,
+    subpaths: HashMap<String, Export>
+}
+
+#[derive(Debug, Clone)]
+pub struct Module {
+    pub substitutions: Vec<Substitution>,
+    pub exports: HashMap<String, Export>,
+    pub all_ops: Vec<String>
+}
+
+#[derive(Debug, Clone)]
+pub struct Substitution {
+    pub source: Expr,
+    pub priority: f64,
+    pub target: Expr
+}
+
+#[derive(Debug, Clone)]
+pub enum Literal {
+    Num(f64),
+    Int(u64),
+    Char(char),
+    Str(String),
+}
+
+#[derive(Debug, Clone)]
+pub enum Token {
+    Literal(Literal),
+    Name(String),
+    Bound,
+    S(Vec<Expr>),
+    Lambda(Vec<Vec<usize>>, Option<Box<Expr>>, Vec<Expr>),
+    Auto(Option<Vec<Vec<usize>>>, Option<Box<Expr>>, Vec<Expr>)
+}
+
+#[derive(Debug, Clone)]
+pub struct Expr {
+    pub token: Token,
+    pub typ: Box<Expr>
+}
diff --git a/src/project/resolve_names.rs b/src/project/resolve_names.rs
new file mode 100644
index 0000000..ceca618
--- /dev/null
+++ b/src/project/resolve_names.rs
@@ -0,0 +1,87 @@
+use std::collections::HashMap;
+
+use chumsky::{Parser, prelude::Simple};
+use thiserror::Error;
+
+use crate::parse::{self, file_parser, exported_names, FileEntry};
+use crate::utils::Cache;
+
+#[derive(Debug, Clone)]
+pub enum Loaded {
+    Module(String),
+    Namespace(Vec<String>)
+}
+
+#[derive(Error, Debug)]
+pub enum ParseError {
+    #[error("Not found: {0}")]
+    NotFound(String),
+    #[error("Failed to parse {file}: {errors:?}")]
+    Syntax {
+        file: String,
+        errors: Vec<Simple<char>>
+    },
+    #[error("Expected {0}, found {1}")]
+    Mismatch(String, String),
+    
+}
+
+impl ParseError {
+    pub fn not_found(name: &str) -> ParseError { ParseError::NotFound(name.to_string()) }
+    pub fn syntax(file: &str, errors: Vec<Simple<char>>) -> ParseError {
+        ParseError::Syntax { file: file.to_string(), errors }
+    }
+    pub fn mismatch(expected: &str, found: &str) -> ParseError {
+        ParseError::Mismatch(expected.to_string(), found.to_string())
+    }
+}
+
+
+
+// Loading a module:
+//  1. [X] Parse the imports
+//  2. [ ] Build a mapping of all imported symbols to full paths
+//     -> [X] Parse the exported symbols from all imported modules
+//  3. [ ] Parse everything using the full list of operators
+//  4. [ ] Traverse and remap elements
+
+pub fn load_project<F>(
+    mut load_mod: F, prelude: &[&str], entry: &str
+) -> Result<super::Project, ParseError>
+where F: FnMut(&[&str]) -> Option<Loaded> {
+    let preparser = file_parser(prelude, &[]);
+    let mut loaded = Cache::new(|path: &[&str]| load_mod(path));
+    let mut preparsed = Cache::new(|path: &[&str]| {
+        loaded.get(path).as_ref().map(|loaded| match loaded {
+            Loaded::Module(source) => Some(preparser.parse(source.as_str()).ok()?),
+            _ => return None
+        }).flatten()
+    });
+    let exports = Cache::new(|path: &[&str]| loaded.get(path).map(|data| {
+        match data {
+            Loaded::Namespace(names) => Some(names),
+            Loaded::Module(source) => preparsed.get(path).map(|data| {
+                exported_names(&data).into_iter().map(|n| n[0]).collect()
+            })
+        }
+    }).flatten());
+    let imports = Cache::new(|path: &[&str]| preparsed.get(path).map(|data| {
+        data.iter().filter_map(|ent| match ent {
+            FileEntry::Import(imp) => Some(imp),
+            _ => None
+        }).flatten().collect::<Vec<_>>()
+    }));
+    // let main = preparsed.get(&[entry]);
+    // for imp in parse::imports(main) {
+    //     if !modules.contains_key(&imp.path) {
+    //         if modules[&imp.path] 
+    //     }
+    // }
+    // let mut project = super::Project {
+    //     modules: HashMap::new()
+    // };
+    
+    
+    // Some(project)
+    todo!("Finish this function")
+}
\ No newline at end of file
diff --git a/src/utils/cache.rs b/src/utils/cache.rs
new file mode 100644
index 0000000..230697d
--- /dev/null
+++ b/src/utils/cache.rs
@@ -0,0 +1,25 @@
+use std::{collections::HashMap, hash::Hash};
+
+/// Cache the return values of an effectless closure in a hashmap
+/// Inspired by the closure_cacher crate.
+pub struct Cache<I, O, F> where F: FnMut(I) -> O {
+    store: HashMap<I, O>,
+    closure: F
+}
+
+impl<I, O, F> Cache<I, O, F>
+where
+    F: FnMut(I) -> O,
+    I: Eq + Hash + Copy
+{
+    pub fn new(closure: F) -> Self { Self { store: HashMap::new(), closure } }
+    pub fn get(&mut self, i: I) -> &O {
+        // I copied it because I might need `drop` and I prefer `I` to be unconstrained. 
+        let closure = &mut self.closure;
+        self.store.entry(i).or_insert_with(|| closure(i))
+    }
+    /// Forget the output for the given input
+    pub fn drop(&mut self, i: &I) -> bool {
+        self.store.remove(i).is_some()
+    } 
+}
\ No newline at end of file
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
new file mode 100644
index 0000000..7869800
--- /dev/null
+++ b/src/utils/mod.rs
@@ -0,0 +1,2 @@
+mod cache;
+pub use cache::Cache;
\ No newline at end of file