From ec1734e1138ec49b23d0d2e5fd23254673369a8c Mon Sep 17 00:00:00 2001 From: Lawrence Bethlenfalvy Date: Mon, 30 May 2022 05:21:00 +0200 Subject: [PATCH] Difficult ownership questions --- README.md | 18 +++--- src/main.rs | 4 +- src/parse/expression.rs | 55 +++++++++++------- src/parse/import.rs | 77 ++++++++++++------------- src/parse/misc.rs | 1 + src/parse/mod.rs | 7 +++ src/parse/name.rs | 44 +++++++++----- src/parse/number.rs | 20 +++++++ src/parse/sourcefile.rs | 107 +++++++++++++++++++++++++++++++++++ src/parse/string.rs | 6 +- src/parse/substitution.rs | 24 ++++++-- src/project/mod.rs | 53 +++++++++++++++++ src/project/resolve_names.rs | 87 ++++++++++++++++++++++++++++ src/utils/cache.rs | 25 ++++++++ src/utils/mod.rs | 2 + 15 files changed, 441 insertions(+), 89 deletions(-) create mode 100644 src/parse/sourcefile.rs create mode 100644 src/project/mod.rs create mode 100644 src/project/resolve_names.rs create mode 100644 src/utils/cache.rs create mode 100644 src/utils/mod.rs diff --git a/README.md b/README.md index 3781334..965bc6a 100644 --- a/README.md +++ b/README.md @@ -262,14 +262,14 @@ the purposes of substitution. This is very far away so I don't want to make promises, but I have some ideas. -[ ] early execution of functions on any subset of their arguments where it - could provide substantial speedup -[ ] tracking copies of expressions and evaluating them only once -[ ] Many cases of single recursion converted to loops - [ ] tail recursion - [ ] 2 distinct loops where the tail doesn't use the arguments - [ ] reorder operations to favour this scenario -[ ] reactive calculation of values that are deemed to be read more often +- [ ] early execution of functions on any subset of their arguments where + it could provide substantial speedup +- [ ] tracking copies of expressions and evaluating them only once +- [ ] Many cases of single recursion converted to loops + - [ ] tail recursion + - [ ] 2 distinct loops where the tail doesn't use the arguments + - [ ] reorder operations to favour this scenario +- [ ] reactive calculation of values that are deemed to be read more often than written -[ ] automatic profiling based on performance metrics generated by debug +- [ ] automatic profiling based on performance metrics generated by debug builds \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index e0e31ab..18ee30d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,12 +3,14 @@ use std::io::{self, Read}; use chumsky::{Parser, prelude::*}; mod parse; +mod project; +mod utils; fn main() { let mut input = String::new(); let mut stdin = io::stdin(); stdin.read_to_string(&mut input).unwrap(); - let ops: Vec = vec!["$", "."].iter().map(|&s| s.to_string()).collect(); + let ops: Vec<&str> = vec!["$", "."]; let output = parse::expression_parser(&ops).then_ignore(end()).parse(input); println!("\nParsed:\n{:?}", output); } diff --git a/src/parse/expression.rs b/src/parse/expression.rs index 35da5a2..da1210f 100644 --- a/src/parse/expression.rs +++ b/src/parse/expression.rs @@ -6,63 +6,76 @@ use super::number; use super::misc; use super::name; -#[derive(Debug)] +/// An S-expression as read from a source file +#[derive(Debug, Clone)] pub enum Expr { Num(f64), Int(u64), Char(char), Str(String), - Name(String), + Name(Vec), S(Vec), Lambda(String, Option>, Vec), Auto(Option, Option>, Vec), Typed(Box, Box) } +/// Parse a type annotation fn typed_parser<'a>( - expr: Recursive<'a, char, Expr, Simple>, - ops: &'a [String] + expr: Recursive<'a, char, Expr, Simple> ) -> impl Parser> + 'a { just(':').ignore_then(expr) } +/// Parse an expression without a type annotation fn untyped_xpr_parser<'a>( expr: Recursive<'a, char, Expr, Simple>, - ops: &'a [String] + ops: &[&'a str] ) -> impl Parser> + 'a { - let lambda = just('\\') - .ignore_then(name::name_parser(ops)) - .then(typed_parser(expr.clone(), ops).or_not()) - .then_ignore(just('.')) - .then(expr.clone().repeated().at_least(1)) - .map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body)); - let auto = just('@') - .ignore_then(name::name_parser(ops).or_not()) - .then(typed_parser(expr.clone(), ops).or_not()) - .then_ignore(just('.')) - .then(expr.clone().repeated().at_least(1)) - .map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body)); + // basic S-expression rule let sexpr = expr.clone() .repeated() .delimited_by(just('('), just(')')) .map(Expr::S); + // Blocks + // can and therefore do match everything up to the closing paren + // \name. body + // \name:type. body + let lambda = just('\\') + .ignore_then(text::ident()) + .then(typed_parser(expr.clone()).or_not()) + .then_ignore(just('.')) + .then(expr.clone().repeated().at_least(1)) + .map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body)); + // @name. body + // @name:type. body + // @:type. body + let auto = just('@') + .ignore_then(text::ident().or_not()) + .then(typed_parser(expr.clone()).or_not()) + .then_ignore(just('.')) + .then(expr.clone().repeated().at_least(1)) + .map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body)); choice(( + number::int_parser().map(Expr::Int), // all ints are valid floats so it takes precedence number::float_parser().map(Expr::Num), - number::int_parser().map(Expr::Int), string::char_parser().map(Expr::Char), string::str_parser().map(Expr::Str), - name::name_parser(ops).map(Expr::Name), + name::name_parser(ops).map(Expr::Name), // includes namespacing sexpr, lambda, auto )).padded() } -pub fn expression_parser(ops: &[String]) -> impl Parser> + '_ { +/// Parse any expression with a type annotation, surrounded by comments +pub fn expression_parser<'a>(ops: &[&'a str]) -> impl Parser> + 'a { + // This approach to parsing comments is ugly and error-prone, + // but I don't have a lot of other ideas return recursive(|expr| { return misc::comment_parser().or_not().ignore_then( untyped_xpr_parser(expr.clone(), &ops) - .then(typed_parser(expr, ops).or_not()) + .then(typed_parser(expr).or_not()) .map(|(val, t)| match t { Some(typ) => Expr::Typed(Box::new(val), Box::new(typ)), None => val diff --git a/src/parse/import.rs b/src/parse/import.rs index 57d0c75..21d85ee 100644 --- a/src/parse/import.rs +++ b/src/parse/import.rs @@ -1,57 +1,58 @@ -use chumsky::{Parser, prelude::*, text::Character}; +use std::iter; + +use chumsky::{Parser, prelude::*}; use super::name; -enum Import { - Name(Vec, String), - All(Vec) -} - -fn prefix(pre: Vec, im: Import) -> Import { - match im { - Import::Name(ns, name) => Import::Name( - pre.into_iter().chain(ns.into_iter()).collect(), - name - ), - Import::All(ns) => Import::All( - pre.into_iter().chain(ns.into_iter()).collect() - ) - } +#[derive(Debug, Clone)] +pub struct Import { + pub path: Vec, + pub name: Option } -type BoxedStrIter = Box>; -type BoxedStrIterIter = Box>; +pub type BoxedStrIter = Box>; +pub type BoxedStrIterIter = Box>; +/// initialize a Box>>> +/// with a single element. fn init_table(name: String) -> BoxedStrIterIter { - Box::new(vec![Box::new(vec![name].into_iter()) as BoxedStrIter].into_iter()) + // I'm not confident at all that this is a good approach. + Box::new(iter::once(Box::new(iter::once(name)) as BoxedStrIter)) } +/// Parse an import command +/// Syntax is same as Rust's `use` except the verb is import, no trailing semi +/// and the delimiters are plain parentheses. Namespaces should preferably contain +/// crossplatform filename-legal characters but the symbols are explicitly allowed +/// to go wild. There's a blacklist in [name] pub fn import_parser() -> impl Parser, Error = Simple> { + // TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad. recursive(|expr: Recursive>| { name::modname_parser() - .padded() - .then_ignore(just("::")) - .repeated() - .then( - choice(( - expr.clone() - .separated_by(just(',')) - .delimited_by(just('('), just(')')) - .map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter), - just("*").map(|s| init_table(s.to_string())), - name::modname_parser().map(init_table) - )).padded() - ).map(|(pre, post)| { - Box::new(post.map(move |el| { - Box::new(pre.clone().into_iter().chain(el)) as BoxedStrIter - })) as BoxedStrIterIter - }) + .padded() + .then_ignore(just("::")) + .repeated() + .then( + choice(( + expr.clone() + .separated_by(just(',')) + .delimited_by(just('('), just(')')) + .map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter), + // Each expr returns a list of imports, flatten those into a common list + just("*").map(|s| init_table(s.to_string())), // Just a *, wrapped + name::modname_parser().map(init_table) // Just a name, wrapped + )).padded() + ).map(|(pre, post)| { + Box::new(post.map(move |el| { + Box::new(pre.clone().into_iter().chain(el)) as BoxedStrIter + })) as BoxedStrIterIter + }) }).padded().map(|paths| { paths.filter_map(|namespaces| { let mut path: Vec = namespaces.collect(); match path.pop()?.as_str() { - "*" => Some(Import::All(path)), - name => Some(Import::Name(path, name.to_owned())) + "*" => Some(Import { path, name: None }), + name => Some(Import { path, name: Some(name.to_owned()) }) } }).collect() }) diff --git a/src/parse/misc.rs b/src/parse/misc.rs index fbe905d..9ad2a09 100644 --- a/src/parse/misc.rs +++ b/src/parse/misc.rs @@ -1,5 +1,6 @@ pub use chumsky::{self, prelude::*, Parser}; +/// Parses Lua-style comments pub fn comment_parser() -> impl Parser> { any().repeated().delimited_by(just("--["), just("]--")).or( any().repeated().delimited_by(just("--"), just("\n")) diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 1d7b7f7..4867adf 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -5,5 +5,12 @@ mod misc; mod import; mod name; mod substitution; +mod sourcefile; +pub use expression::Expr; pub use expression::expression_parser; +pub use sourcefile::FileEntry; +pub use sourcefile::file_parser; +pub use sourcefile::imports; +pub use sourcefile::exported_names; +pub use import::Import; \ No newline at end of file diff --git a/src/parse/name.rs b/src/parse/name.rs index c8dfa67..497dec7 100644 --- a/src/parse/name.rs +++ b/src/parse/name.rs @@ -1,28 +1,46 @@ use chumsky::{self, prelude::*, Parser}; -fn op_parser_recur<'a, 'b>(ops: &'a [String]) -> BoxedParser<'b, char, String, Simple> { - if ops.len() == 1 { just(ops[0].clone()).boxed() } - else { just(ops[0].clone()).or(op_parser_recur(&ops[1..])).boxed() } -} - -fn op_parser(ops: &[String]) -> BoxedParser> { +/// Matches any one of the passed operators, longest-first +fn op_parser<'a>(ops: &[&'a str]) -> BoxedParser<'a, char, String, Simple> { let mut sorted_ops = ops.to_vec(); sorted_ops.sort_by(|a, b| b.len().cmp(&a.len())); - op_parser_recur(&sorted_ops) + sorted_ops.into_iter() + .map(|op| just(op.to_string()).boxed()) + .reduce(|a, b| a.or(b).boxed()).unwrap() } -pub fn modname_parser() -> impl Parser> { - let not_name_char: Vec = vec![':', '\\', '"', '\'', '(', ')', '.']; +/// Matches anything that's allowed as an operator +/// +/// Blacklist rationale: +/// - `:` is used for namespacing and type annotations, both are distinguished from operators +/// - `\` and `@` are parametric expression starters +/// - `"` and `'` are read as primitives and would never match. +/// - `(` and `)` are strictly balanced and this must remain the case for automation and streaming. +/// - `.` is the discriminator for parametrics. +/// +/// FIXME: `@name` without a dot should be parsed correctly for overrides. Could be an operator but +/// then parametrics should take precedence, which might break stuff. investigate. +/// +/// TODO: `'` could work as an operator whenever it isn't closed. It's common im maths so it's +/// worth a try +/// +/// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very +/// common in maths so it's worth a try. Investigate. +pub fn modname_parser<'a>() -> impl Parser> + 'a { + let not_name_char: Vec = vec![':', '\\', '@', '"', '\'', '(', ')', '.']; filter(move |c| !not_name_char.contains(c) && !c.is_whitespace()) .repeated().at_least(1) .collect() } -pub fn name_parser<'a>(ops: &'a [String]) -> impl Parser> + 'a { +/// Parse an operator or name. Failing both, parse everything up to the next whitespace or +/// blacklisted character as a new operator. +pub fn name_parser<'a>( + ops: &[&'a str] +) -> impl Parser, Error = Simple> + 'a { choice(( op_parser(ops), // First try to parse a known operator text::ident(), // Failing that, parse plain text - // Finally parse everything until tne next terminal as a new operator - modname_parser() - )).padded() + modname_parser() // Finally parse everything until tne next terminal as a new operator + )).padded().separated_by(just("::")).padded() } \ No newline at end of file diff --git a/src/parse/number.rs b/src/parse/number.rs index dde4e54..c40a7f2 100644 --- a/src/parse/number.rs +++ b/src/parse/number.rs @@ -6,6 +6,9 @@ fn assert_not_digit(base: u32, c: char) { } } +/// Parse an arbitrarily grouped sequence of digits starting with an underscore. +/// +/// TODO: this should use separated_by and parse the leading group too fn separated_digits_parser(base: u32) -> impl Parser> { just('_') .ignore_then(text::digits(base)) @@ -13,6 +16,9 @@ fn separated_digits_parser(base: u32) -> impl Parser impl Parser> { text::int(base) .then(separated_digits_parser(base)) @@ -21,6 +27,8 @@ fn uint_parser(base: u32) -> impl Parser> { }) } +/// parse exponent notation, or return 0 as the default exponent. +/// The exponent is always in decimal. fn pow_parser() -> impl Parser> { return choice(( just('p') @@ -32,6 +40,9 @@ fn pow_parser() -> impl Parser> { )).or_else(|_| Ok(0)) } +/// returns a mapper that converts a mantissa and an exponent into an uint +/// +/// TODO it panics if it finds a negative exponent fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 { return move |(val, exp)| { if exp == 0 {val} @@ -39,6 +50,7 @@ fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 { }; } +/// returns a mapper that converts a mantissa and an exponent into a float fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 { return move |(val, exp)| { if exp == 0 {val} @@ -46,11 +58,15 @@ fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 { } } +/// parse an uint from exponential notation (panics if 'p' is a digit in base) fn pow_uint_parser(base: u32) -> impl Parser> { assert_not_digit(base, 'p'); uint_parser(base).then(pow_parser()).map(nat2u(base.into())) } +/// parse an uint from a base determined by its prefix or lack thereof +/// +/// Not to be convused with [uint_parser] which is a component of it. pub fn int_parser() -> impl Parser> { choice(( just("0b").ignore_then(pow_uint_parser(2)), @@ -60,6 +76,7 @@ pub fn int_parser() -> impl Parser> { )) } +/// parse a float from dot notation fn dotted_parser(base: u32) -> impl Parser> { uint_parser(base) .then_ignore(just('.')) @@ -73,11 +90,14 @@ fn dotted_parser(base: u32) -> impl Parser> { }) } +/// parse a float from dotted and optionally also exponential notation fn pow_float_parser(base: u32) -> impl Parser> { assert_not_digit(base, 'p'); dotted_parser(base).then(pow_parser()).map(nat2f(base.into())) } +/// parse a float with dotted and optionally exponential notation from a base determined by its +/// prefix pub fn float_parser() -> impl Parser> { choice(( just("0b").ignore_then(pow_float_parser(2)), diff --git a/src/parse/sourcefile.rs b/src/parse/sourcefile.rs new file mode 100644 index 0000000..f891a37 --- /dev/null +++ b/src/parse/sourcefile.rs @@ -0,0 +1,107 @@ +use std::collections::HashSet; +use std::iter; + +use super::expression::Expr; +use super::import; +use super::misc; +use super::substitution::substitution_parser; +use super::substitution::Substitution; +use chumsky::{Parser, prelude::*}; + +/// Anything we might encounter in a file +#[derive(Debug, Clone)] +pub enum FileEntry { + Import(Vec), + Comment(String), + Substitution(Substitution), + Export(Substitution) +} + +/// Recursively iterate through all "names" in an expression. It also finds a lot of things that +/// aren't names, such as all bound parameters. Generally speaking, this is not a very +/// sophisticated search. +/// +/// TODO: find a way to exclude parameters +fn find_all_names_recur(expr: &Expr) -> Box> + '_> { + match expr { + Expr::Auto(_, typ, body) | Expr::Lambda(_, typ, body) => Box::new(match typ { + Some(texp) => find_all_names_recur(texp), + None => Box::new(iter::empty()) + }.chain(body.into_iter().map(find_all_names_recur).flatten())), + Expr::S(body) => Box::new(body.into_iter().map(find_all_names_recur).flatten()), + Expr::Typed(val, typ) => Box::new( + find_all_names_recur(val).chain(find_all_names_recur(typ)) + ), + Expr::Name(x) => Box::new(iter::once(x)), + _ => Box::new(iter::empty()) + } +} + +/// Collect all names that occur in an expression +fn find_all_names(expr: &Expr) -> HashSet<&Vec> { + find_all_names_recur(expr).collect() +} + +/// Parse a file into a list of distinctive entries +pub fn file_parser<'a>( + pattern_ops: &[&'a str], ops: &[&'a str] +) -> impl Parser, Error = Simple> + 'a { + choice(( + // In case the usercode wants to parse doc + misc::comment_parser().map(FileEntry::Comment), + import::import_parser().map(FileEntry::Import), + text::keyword("export") + .ignore_then(substitution_parser(pattern_ops, ops)).map(FileEntry::Export), + // This could match almost anything so it has to go last + substitution_parser(pattern_ops, ops).map(FileEntry::Substitution) + )).padded() + .separated_by(just('\n')) + .then_ignore(end()) +} + +/// Decide if a string can be an operator. Operators can include digits and text, just not at the +/// start. +fn is_op(s: &str) -> bool { + return match s.chars().next() { + Some(x) => !x.is_alphanumeric(), + None => false + } +} + +/// Collect all exported names (and a lot of other words) from a file +pub fn exported_names(src: &Vec) -> HashSet<&Vec> { + src.iter().filter_map(|ent| match ent { + FileEntry::Export(a) => Some(&a.source), + _ => None + }).map(find_all_names).flatten().collect() +} + +/// Collect all operators defined in a file (and some other words) +fn defined_ops(src: &Vec, exported_only: bool) -> Vec<&String> { + let all_names:HashSet<&Vec> = src.iter().filter_map(|ent| match ent { + FileEntry::Substitution(a) => if exported_only {None} else {Some(&a.source)}, + FileEntry::Export(a) => Some(&a.source), + _ => None + }).map(find_all_names).flatten().collect(); + // Dedupe stage of dubious value; collecting into a hashset may take longer than + // handling duplicates would with a file of sensible size. + all_names.into_iter() + .filter_map(|name| + // If it's namespaced, it's imported. + if name.len() == 1 && is_op(&name[0]) {Some(&name[0])} + else {None} + ).collect() +} + +/// Collect all operators from a file +pub fn all_ops(src: &Vec) -> Vec<&String> { defined_ops(src, false) } +/// Collect exported operators from a file (plus some extra) +pub fn exported_ops(src: &Vec) -> Vec<&String> { defined_ops(src, true) } + +/// Summarize all imports from a file in a single list of qualified names +pub fn imports(src: &Vec) -> Vec<&import::Import> { + src.into_iter().filter_map(|ent| match ent { + FileEntry::Import(impv) => Some(impv.iter()), + _ => None + }).flatten().collect() +} \ No newline at end of file diff --git a/src/parse/string.rs b/src/parse/string.rs index b74014d..3b66150 100644 --- a/src/parse/string.rs +++ b/src/parse/string.rs @@ -1,6 +1,8 @@ use chumsky::{self, prelude::*, Parser}; +/// Parses a text character that is not the specified delimiter fn text_parser(delim: char) -> impl Parser> { + // Copied directly from Chumsky's JSON example. let escape = just('\\').ignore_then( just('\\') .or(just('/')) @@ -27,15 +29,17 @@ fn text_parser(delim: char) -> impl Parser> { filter(move |&c| c != '\\' && c != delim).or(escape) } +/// Parse a character literal between single quotes pub fn char_parser() -> impl Parser> { just('\'').ignore_then(text_parser('\'')).then_ignore(just('\'')) } +/// Parse a string between double quotes pub fn str_parser() -> impl Parser> { just('"') .ignore_then( text_parser('"').map(Some) - .or(just("\\\n").map(|_| None)) + .or(just("\\\n").map(|_| None)) // Newlines preceded by backslashes are ignored. .repeated() ).then_ignore(just('"')) .flatten().collect() diff --git a/src/parse/substitution.rs b/src/parse/substitution.rs index ccc60c8..fb93758 100644 --- a/src/parse/substitution.rs +++ b/src/parse/substitution.rs @@ -2,16 +2,28 @@ use chumsky::{self, prelude::*, Parser}; use super::{expression, number::float_parser}; +#[derive(Debug, Clone)] pub struct Substitution { - source: expression::Expr, - priority: f64, - target: expression::Expr + pub source: expression::Expr, + pub priority: f64, + pub target: expression::Expr } -pub fn substitutionParser<'a>( - ops: &'a [String] +/// Parses a substitution rule of the forms +/// +/// ```orchid +/// main = \x. ... +/// $a + $b = (add $a $b) +/// (foo bar baz) =1.1=> (foo 1 e) +/// reee =2=> shadow_reee +/// shadow_reee =0.9=> reee +/// ``` +/// TBD whether this disables reee in the specified range or loops forever +pub fn substitution_parser<'a>( + pattern_ops: &[&'a str], + ops: &[&'a str] ) -> impl Parser> + 'a { - expression::expression_parser(ops) + expression::expression_parser(pattern_ops) .then_ignore(just('=')) .then( float_parser().then_ignore(just("=>")) diff --git a/src/project/mod.rs b/src/project/mod.rs new file mode 100644 index 0000000..c0eafd0 --- /dev/null +++ b/src/project/mod.rs @@ -0,0 +1,53 @@ +use std::collections::HashMap; + +mod resolve_names; + + +#[derive(Debug, Clone)] +pub struct Project { + pub modules: HashMap, Module>, +} + +#[derive(Debug, Clone)] +pub struct Export { + isSymbol: bool, + subpaths: HashMap +} + +#[derive(Debug, Clone)] +pub struct Module { + pub substitutions: Vec, + pub exports: HashMap, + pub all_ops: Vec +} + +#[derive(Debug, Clone)] +pub struct Substitution { + pub source: Expr, + pub priority: f64, + pub target: Expr +} + +#[derive(Debug, Clone)] +pub enum Literal { + Num(f64), + Int(u64), + Char(char), + Str(String), +} + +#[derive(Debug, Clone)] +pub enum Token { + Literal(Literal), + Name(String), + Bound, + S(Vec), + Lambda(Vec>, Option>, Vec), + Auto(Option>>, Option>, Vec) +} + +#[derive(Debug, Clone)] +pub struct Expr { + pub token: Token, + pub typ: Box +} diff --git a/src/project/resolve_names.rs b/src/project/resolve_names.rs new file mode 100644 index 0000000..ceca618 --- /dev/null +++ b/src/project/resolve_names.rs @@ -0,0 +1,87 @@ +use std::collections::HashMap; + +use chumsky::{Parser, prelude::Simple}; +use thiserror::Error; + +use crate::parse::{self, file_parser, exported_names, FileEntry}; +use crate::utils::Cache; + +#[derive(Debug, Clone)] +pub enum Loaded { + Module(String), + Namespace(Vec) +} + +#[derive(Error, Debug)] +pub enum ParseError { + #[error("Not found: {0}")] + NotFound(String), + #[error("Failed to parse {file}: {errors:?}")] + Syntax { + file: String, + errors: Vec> + }, + #[error("Expected {0}, found {1}")] + Mismatch(String, String), + +} + +impl ParseError { + pub fn not_found(name: &str) -> ParseError { ParseError::NotFound(name.to_string()) } + pub fn syntax(file: &str, errors: Vec>) -> ParseError { + ParseError::Syntax { file: file.to_string(), errors } + } + pub fn mismatch(expected: &str, found: &str) -> ParseError { + ParseError::Mismatch(expected.to_string(), found.to_string()) + } +} + + + +// Loading a module: +// 1. [X] Parse the imports +// 2. [ ] Build a mapping of all imported symbols to full paths +// -> [X] Parse the exported symbols from all imported modules +// 3. [ ] Parse everything using the full list of operators +// 4. [ ] Traverse and remap elements + +pub fn load_project( + mut load_mod: F, prelude: &[&str], entry: &str +) -> Result +where F: FnMut(&[&str]) -> Option { + let preparser = file_parser(prelude, &[]); + let mut loaded = Cache::new(|path: &[&str]| load_mod(path)); + let mut preparsed = Cache::new(|path: &[&str]| { + loaded.get(path).as_ref().map(|loaded| match loaded { + Loaded::Module(source) => Some(preparser.parse(source.as_str()).ok()?), + _ => return None + }).flatten() + }); + let exports = Cache::new(|path: &[&str]| loaded.get(path).map(|data| { + match data { + Loaded::Namespace(names) => Some(names), + Loaded::Module(source) => preparsed.get(path).map(|data| { + exported_names(&data).into_iter().map(|n| n[0]).collect() + }) + } + }).flatten()); + let imports = Cache::new(|path: &[&str]| preparsed.get(path).map(|data| { + data.iter().filter_map(|ent| match ent { + FileEntry::Import(imp) => Some(imp), + _ => None + }).flatten().collect::>() + })); + // let main = preparsed.get(&[entry]); + // for imp in parse::imports(main) { + // if !modules.contains_key(&imp.path) { + // if modules[&imp.path] + // } + // } + // let mut project = super::Project { + // modules: HashMap::new() + // }; + + + // Some(project) + todo!("Finish this function") +} \ No newline at end of file diff --git a/src/utils/cache.rs b/src/utils/cache.rs new file mode 100644 index 0000000..230697d --- /dev/null +++ b/src/utils/cache.rs @@ -0,0 +1,25 @@ +use std::{collections::HashMap, hash::Hash}; + +/// Cache the return values of an effectless closure in a hashmap +/// Inspired by the closure_cacher crate. +pub struct Cache where F: FnMut(I) -> O { + store: HashMap, + closure: F +} + +impl Cache +where + F: FnMut(I) -> O, + I: Eq + Hash + Copy +{ + pub fn new(closure: F) -> Self { Self { store: HashMap::new(), closure } } + pub fn get(&mut self, i: I) -> &O { + // I copied it because I might need `drop` and I prefer `I` to be unconstrained. + let closure = &mut self.closure; + self.store.entry(i).or_insert_with(|| closure(i)) + } + /// Forget the output for the given input + pub fn drop(&mut self, i: &I) -> bool { + self.store.remove(i).is_some() + } +} \ No newline at end of file diff --git a/src/utils/mod.rs b/src/utils/mod.rs new file mode 100644 index 0000000..7869800 --- /dev/null +++ b/src/utils/mod.rs @@ -0,0 +1,2 @@ +mod cache; +pub use cache::Cache; \ No newline at end of file