diff --git a/examples/dummy_project/main.orc b/examples/dummy_project/main.orc index 1307b43..1fbc3e8 100644 --- a/examples/dummy_project/main.orc +++ b/examples/dummy_project/main.orc @@ -1,4 +1,4 @@ -import std::io::(println, out) -- imports +-- import std::io::(println, out) -- imports -- single word rule (alias) greet =1=> (\name. printf out "Hello {}!\n" [name]) @@ -7,12 +7,15 @@ greet =1=> (\name. printf out "Hello {}!\n" [name]) export ;> $a =200=> (greet $a) -- single-word exported rule -export main = ( +export main == ( print "What is your name?" >> readln >>= \name. greet name ) +export < $a ...$rest /> == (createElement (tok_to_str $a) [(props_carriage ...$rest)]) +export (props_carriage $key = $value) == (tok_to_str $key) => $value + -- The broadest trait definition in existence -Foo = (Bar Baz) +Foo == (Bar Baz) -- default anyFoo = @T. @impl:(T (Bar Baz)). impl:(T Foo) \ No newline at end of file diff --git a/src/expression.rs b/src/expression.rs index 971fdde..c0bcf07 100644 --- a/src/expression.rs +++ b/src/expression.rs @@ -1,7 +1,9 @@ +use itertools::Itertools; use ordered_float::NotNan; use std::{fmt::Debug}; -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +/// An exact value +#[derive(Clone, PartialEq, Eq, Hash)] pub enum Literal { Num(NotNan), Int(u64), @@ -9,10 +11,31 @@ pub enum Literal { Str(String), } +impl Debug for Literal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Num(arg0) => write!(f, "{:?}", arg0), + Self::Int(arg0) => write!(f, "{:?}", arg0), + Self::Char(arg0) => write!(f, "{:?}", arg0), + Self::Str(arg0) => write!(f, "{:?}", arg0), + } + } +} + /// An S-expression with a type -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Clone, PartialEq, Eq, Hash)] pub struct Expr(pub Clause, pub Option>); +impl Debug for Expr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // f.debug_tuple("Expr").field(&self.0).field(&self.1).finish() + let Expr(val, typ) = self; + write!(f, "{:?}", val)?; + if let Some(typ) = typ { write!(f, "{:?}", typ) } + else { Ok(()) } + } +} + impl Expr { /// Replace all occurences of a name in the tree with a parameter, to bypass name resolution pub fn bind_parameter(&mut self, name: &str) { @@ -24,7 +47,7 @@ impl Expr { } /// An S-expression as read from a source file -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Clone, PartialEq, Eq, Hash)] pub enum Clause { Literal(Literal), Name(Vec), @@ -34,6 +57,44 @@ pub enum Clause { Parameter(String) } +fn fmt_expr_seq(it: &mut dyn Iterator, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for item in Itertools::intersperse(it.map(Some), None) { match item { + Some(expr) => write!(f, "{:?}", expr), + None => f.write_str(" "), + }? } + Ok(()) +} + +impl Debug for Clause { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Literal(arg0) => write!(f, "{:?}", arg0), + Self::Name(arg0) => write!(f, "{}", arg0.join("::")), + Self::S(del, items) => { + f.write_str(&del.to_string())?; + fmt_expr_seq(&mut items.iter(), f)?; + f.write_str(match del { + '(' => ")", '[' => "]", '{' => "}", + _ => "CLOSING_DELIM" + }) + }, + Self::Lambda(name, argtyp, body) => { + f.write_str("\\")?; + f.write_str(name)?; + f.write_str(":")?; fmt_expr_seq(&mut argtyp.iter(), f)?; f.write_str(".")?; + fmt_expr_seq(&mut body.iter(), f) + }, + Self::Auto(name, argtyp, body) => { + f.write_str("@")?; + f.write_str(&name.clone().unwrap_or_default())?; + f.write_str(":")?; fmt_expr_seq(&mut argtyp.iter(), f)?; f.write_str(".")?; + fmt_expr_seq(&mut body.iter(), f) + }, + Self::Parameter(name) => write!(f, "`{}", name) + } + } +} + impl Clause { /// Replace all occurences of a name in the tree with a parameter, to bypass name resolution pub fn bind_parameter(&mut self, name: &str) { @@ -49,4 +110,18 @@ impl Clause { _ => () } } +} + +/// A substitution rule as read from the source +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct Rule { + pub source: Vec, + pub prio: NotNan, + pub target: Vec +} + +impl Debug for Rule { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?} ={}=> {:?}", self.source, self.prio, self.target) + } } \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 1a6f35c..5539539 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,17 +1,12 @@ -use core::ops::Range; -use std::{env::current_dir, fs::read_to_string}; -use std::io; -use chumsky::prelude::end; -use chumsky::{Parser, Stream}; -use parse::{LexerEntry, FileEntry}; -// use project::{rule_collector, file_loader, Loaded}; +use std::env::current_dir; mod parse; mod project; mod utils; mod expression; -pub use expression::*; +mod rule; +use project::{rule_collector, Loaded, file_loader}; fn literal(orig: &[&str]) -> Vec { orig.iter().map(|&s| s.to_owned()).collect() @@ -27,37 +22,15 @@ export (match_sequence $lhs) >>= (match_sequence $rhs) =100=> (bind ($lhs) ($rhs "#; fn main() { - // let mut input = String::new(); - // let mut stdin = io::stdin(); - // stdin.read_to_string(&mut input).unwrap(); - let ops: Vec<&str> = vec!["...", ">>", ">>=", "[", "]", ",", "$"]; - let data = read_to_string("./main.orc").unwrap(); - let lexed = parse::lexer(&ops).parse(data).unwrap(); - println!("Lexed: {:?}", lexed); - let parsr = parse::line_parser().then_ignore(end()); - // match parsr.parse(data) { - // Ok(output) => println!("\nParsed:\n{:?}", output), - // Err(e) => println!("\nErrored:\n{:?}", e) - // } - let lines = lexed.iter().filter_map(|v| { - let parse::LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone(); - let tuples = v.into_iter().map(|LexerEntry(l, r)| (l.clone(), r.clone())); - Some(parsr.parse_recovery_verbose(Stream::from_iter(end..end+1, tuples))) - }).collect::>(); - for (id, (out, errs)) in lines.into_iter().enumerate() { - println!("Parsing line {}", id); - if let Some(output) = out { println!("Parsed:\n{:?}", output) } - else { println!("Failed to produce output")} - if errs.len() > 0 { println!("Errored:\n{:?}", errs)} + let cwd = current_dir().unwrap(); + let collect_rules = rule_collector(move |n| { + if n == vec!["prelude"] { Ok(Loaded::Module(PRELUDE.to_string())) } + else { file_loader(cwd.clone())(n) } + }, literal(&["...", ">>", ">>=", "[", "]", ",", "$", "=", "=>"])); + match collect_rules.try_find(&literal(&["main"])) { + Ok(rules) => for rule in rules.iter() { + println!("{rule:?}") + } + Err(err) => println!("{:#?}", err) } - // let output = parse::file_parser(&ops, &ops).parse(data).unwrap(); - // let cwd = current_dir().unwrap(); - // let collect_rules = rule_collector(move |n| { - // if n == vec!["prelude"] { Ok(Loaded::Module(PRELUDE.to_string())) } - // else { file_loader(cwd.clone())(n) } - // }, literal(&["...", ">>", ">>=", "[", "]", ","])); - // let rules = collect_rules.try_find(&literal(&["main"])).unwrap(); - // for rule in rules.iter() { - // println!("{:?} ={}=> {:?}", rule.source, rule.priority, rule.target) - // } } diff --git a/src/parse/expression.rs b/src/parse/expression.rs index 8bb3149..6de7d18 100644 --- a/src/parse/expression.rs +++ b/src/parse/expression.rs @@ -1,5 +1,5 @@ use chumsky::{self, prelude::*, Parser}; -use crate::{Clause, Expr, Literal, enum_parser}; +use crate::{enum_parser, expression::{Clause, Expr, Literal}}; use super::{lexer::Lexeme}; @@ -86,5 +86,5 @@ pub fn xpr_parser() -> impl Parser> { .ignore_then(expr.clone()).or_not() ) .map(|(val, typ)| Expr(val, typ.map(Box::new))) - }) + }).labelled("Expression") } \ No newline at end of file diff --git a/src/parse/lexer.rs b/src/parse/lexer.rs index 92a1549..92450e2 100644 --- a/src/parse/lexer.rs +++ b/src/parse/lexer.rs @@ -1,6 +1,6 @@ -use std::{ops::Range, iter}; +use std::{ops::Range, iter, fmt}; use ordered_float::NotNan; -use chumsky::{Parser, prelude::*, text::whitespace}; +use chumsky::{Parser, prelude::*}; use std::fmt::Debug; use crate::utils::BoxedIter; @@ -14,6 +14,11 @@ impl Debug for Entry { // f.debug_tuple("Entry").field(&self.0).field(&self.1).finish() } } +impl Into<(Lexeme, Range)> for Entry { + fn into(self) -> (Lexeme, Range) { + (self.0, self.1) + } +} #[derive(Clone, PartialEq, Eq, Hash)] pub enum Lexeme { @@ -61,6 +66,9 @@ impl Lexeme { pub fn name(n: T) -> Self { Lexeme::Name(n.to_string()) } + pub fn rule(prio: T) -> Self where T: Into { + Lexeme::Rule(NotNan::new(prio.into()).expect("Rule priority cannot be NaN")) + } pub fn paren_parser( expr: P ) -> impl Parser> + Clone @@ -76,15 +84,20 @@ impl Lexeme { } } -fn rule_parser() -> impl Parser, Error = Simple> { - just('=').ignore_then( - choice(( - none_of("-0123456789").rewind().to(NotNan::new(0f64).unwrap()), - number::float_parser().then_ignore(just("=>")) - )).map_err_with_span(|err, span| { - panic!("Something's up! {:?} {}", span, err) - }) - ) +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct LexedText(pub Vec>); + +impl Debug for LexedText { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for row in &self.0 { + for tok in row { + tok.fmt(f)?; + f.write_str(" ")? + } + f.write_str("\n")? + } + Ok(()) + } } type LexSubres<'a> = BoxedIter<'a, Entry>; @@ -104,7 +117,7 @@ fn paren_parser<'a>( }) } -pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser>, Error=Simple> + 'a +pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser> + 'a where T: AsRef + Clone { let all_ops = ops.iter().map(|o| o.as_ref().to_string()) .chain(iter::once(".".to_string())).collect::>(); @@ -114,7 +127,8 @@ where T: AsRef + Clone { paren_parser(recurse.clone(), '[', ']'), paren_parser(recurse.clone(), '{', '}'), choice(( - rule_parser().map(Lexeme::Rule), + just("==").padded().to(Lexeme::rule(0f64)), + just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule), comment::comment_parser().map(Lexeme::Comment), just("::").padded().to(Lexeme::NS), just('\\').padded().to(Lexeme::BS), @@ -130,5 +144,5 @@ where T: AsRef + Clone { }).separated_by(one_of("\t ").repeated()) .flatten().collect() .separated_by(just('\n').then(text::whitespace()).ignored()) - -} \ No newline at end of file + .map(LexedText) +} diff --git a/src/parse/mod.rs b/src/parse/mod.rs index f298457..1897c7d 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -7,10 +7,12 @@ mod expression; mod sourcefile; mod import; mod enum_parser; +mod parse; pub use sourcefile::FileEntry; pub use sourcefile::line_parser; pub use sourcefile::imports; pub use sourcefile::exported_names; pub use lexer::{lexer, Lexeme, Entry as LexerEntry}; -pub use name::is_op; \ No newline at end of file +pub use name::is_op; +pub use parse::{parse, reparse, ParseError}; \ No newline at end of file diff --git a/src/parse/parse.rs b/src/parse/parse.rs new file mode 100644 index 0000000..46976d5 --- /dev/null +++ b/src/parse/parse.rs @@ -0,0 +1,65 @@ +use std::{ops::Range, fmt::Debug}; + +use chumsky::{prelude::{Simple, end}, Stream, Parser}; +use itertools::Itertools; +use thiserror::Error; + +use crate::expression::Rule; + +use super::{Lexeme, FileEntry, lexer, line_parser, LexerEntry}; + + +#[derive(Error, Debug, Clone)] +pub enum ParseError { + #[error("Could not tokenize {0:?}")] + Lex(Vec>), + #[error("Could not parse {0:#?}")] + Ast(Vec>) +} + +pub fn parse<'a, Iter, S, Op>(ops: &[Op], stream: S) -> Result, ParseError> +where + Op: 'a + AsRef + Clone, + Iter: Iterator)> + 'a, + S: Into, Iter>> { + let lexed = lexer(ops).parse(stream).map_err(ParseError::Lex)?; + println!("Lexed:\n{:?}", lexed); + let parsr = line_parser().then_ignore(end()); + let (parsed_lines, errors_per_line) = lexed.0.into_iter().filter_map(|v| { + // Find the first invalid position for Stream::for_iter + let LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone(); + // Stream expects tuples, lexer outputs structs + let tuples = v.into_iter().map_into::<(Lexeme, Range)>(); + Some(parsr.parse(Stream::from_iter(end..end+1, tuples))) + // ^^^^^^^^^^ + // I haven't the foggiest idea why this is needed, parsers are supposed to be lazy so the + // end of input should make little difference + }).map(|res| match res { + Ok(r) => (Some(r), vec![]), + Err(e) => (None, e) + }).unzip::<_, _, Vec<_>, Vec<_>>(); + let total_err = errors_per_line.into_iter() + .map(Vec::into_iter).flatten() + .collect::>(); + if total_err.len() > 0 { Err(ParseError::Ast(total_err)) } + else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) } +} + +pub fn reparse<'a, Iter, S, Op>(ops: &[Op], stream: S, pre: &Vec) +-> Result, ParseError> +where + Op: 'a + AsRef + Clone, + Iter: Iterator)> + 'a, + S: Into, Iter>> { + let result = parse(ops, stream)?; + Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| { + if let FileEntry::Rule(Rule{source, ..}, _) = &mut output { + if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor { + *source = s2.clone() + } else { + panic!("Preparse and reparse received different row types!") + } + } + output + }).collect()) +} \ No newline at end of file diff --git a/src/parse/rule.rs b/src/parse/rule.rs deleted file mode 100644 index ccf2f3f..0000000 --- a/src/parse/rule.rs +++ /dev/null @@ -1,34 +0,0 @@ -use chumsky::{self, prelude::*, Parser}; - -use super::{expression, number::float_parser}; - -#[derive(Debug, Clone)] -pub struct Rule { - pub source: expression::Expr, - pub priority: f64, - pub target: expression::Expr -} - -/// Parses a substitution rule of the forms -/// -/// ```orchid -/// main = \x. ... -/// $a + $b = (add $a $b) -/// (foo bar baz) =1.1=> (foo 1 e) -/// reee =2=> shadow_reee -/// shadow_reee =0.9=> reee -/// ``` -/// TBD whether this disables reee in the specified range or loops forever -pub fn rule_parser<'a, T: 'a + AsRef + Clone>( - pattern_ops: &[T], - ops: &[T] -) -> impl Parser> + 'a { - expression::expression_parser(pattern_ops).padded() - .then_ignore(just('=')) - .then( - float_parser().then_ignore(just("=>")) - .or_not().map(|prio| prio.unwrap_or(0.0)) - ).then(expression::expression_parser(ops).padded()) - .map(|((source, priority), target)| Rule { source, priority, target }) - .labelled("rule") -} diff --git a/src/parse/sourcefile.rs b/src/parse/sourcefile.rs index d316f2c..e9a5d21 100644 --- a/src/parse/sourcefile.rs +++ b/src/parse/sourcefile.rs @@ -1,15 +1,13 @@ use std::collections::HashSet; -use std::fs::File; use std::iter; -use crate::{enum_parser, Expr, Clause}; +use crate::{enum_parser, expression::{Expr, Clause, Rule}}; use crate::utils::BoxedIter; use super::expression::xpr_parser; use super::import; use super::import::import_parser; use super::lexer::Lexeme; -use super::name; use chumsky::{Parser, prelude::*}; use ordered_float::NotNan; @@ -18,8 +16,7 @@ use ordered_float::NotNan; pub enum FileEntry { Import(Vec), Comment(String), - Rule(Vec, NotNan, Vec), - Export(Vec, NotNan, Vec) + Rule(Rule, bool) } /// Recursively iterate through all "names" in an expression. It also finds a lot of things that @@ -70,49 +67,21 @@ pub fn line_parser() -> impl Parser> { println!("{:?} could not yield an export", s); e }) .ignore_then(rule_parser()) - .map(|(lhs, prio, rhs)| FileEntry::Export(lhs, prio, rhs)), + .map(|(source, prio, target)| FileEntry::Rule(Rule{source, prio, target}, true)), // This could match almost anything so it has to go last - rule_parser().map(|(lhs, prio, rhs)| FileEntry::Rule(lhs, prio, rhs)), + rule_parser().map(|(source, prio, target)| FileEntry::Rule(Rule{source, prio, target}, false)), )) } /// Collect all exported names (and a lot of other words) from a file pub fn exported_names(src: &Vec) -> HashSet<&Vec> { src.iter().flat_map(|ent| match ent { - FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())) as BoxedIter<&Expr>, + FileEntry::Rule(Rule{source, target, ..}, true) => + Box::new(source.iter().chain(target.iter())) as BoxedIter<&Expr>, _ => Box::new(iter::empty()) }).map(find_all_names).flatten().collect() } - -// #[allow(dead_code)] -/// Collect all operators defined in a file (and some other words) -fn defined_ops(src: &Vec, exported_only: bool) -> Vec<&String> { - let all_names:HashSet<&Vec> = src.iter().flat_map(|ent| match ent { - FileEntry::Rule(s, _, d) => - if exported_only {Box::new(iter::empty()) as BoxedIter<&Expr>} - else {Box::new(s.iter().chain(d.iter()))} - FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())), - _ => Box::new(iter::empty()) - }).map(find_all_names).flatten().collect(); - // Dedupe stage of dubious value; collecting into a hashset may take longer than - // handling duplicates would with a file of sensible size. - all_names.into_iter() - .filter_map(|name| - // If it's namespaced, it's imported. - if name.len() == 1 && name::is_op(&name[0]) {Some(&name[0])} - else {None} - ).collect() -} - -// #[allow(dead_code)] -/// Collect all operators from a file -pub fn all_ops(src: &Vec) -> Vec<&String> { defined_ops(src, false) } -// #[allow(dead_code)] -/// Collect exported operators from a file (plus some extra) -pub fn exported_ops(src: &Vec) -> Vec<&String> { defined_ops(src, true) } - - /// Summarize all imports from a file in a single list of qualified names pub fn imports<'a, 'b, I>( src: I diff --git a/src/project/expr.rs b/src/project/expr.rs deleted file mode 100644 index 1d0fb43..0000000 --- a/src/project/expr.rs +++ /dev/null @@ -1,25 +0,0 @@ -#[derive(Debug, Clone)] -pub enum Literal { - Num(f64), - Int(u64), - Char(char), - Str(String), -} - -#[derive(Debug, Clone)] -pub enum Token { - Literal(Literal), - Name { - qualified: Vec, - local: Option - }, - S(Vec), - Lambda(String, Option>, Vec), - Auto(Option, Option>, Vec) -} - -#[derive(Debug, Clone)] -pub struct Expr { - pub token: Token, - pub typ: Option> -} \ No newline at end of file diff --git a/src/project/file_loader.rs b/src/project/file_loader.rs index ebe1a29..81f87b9 100644 --- a/src/project/file_loader.rs +++ b/src/project/file_loader.rs @@ -40,8 +40,10 @@ pub fn file_loader(proj: PathBuf) -> impl FnMut(Vec) -> Result, - pub exports: Vec, - pub references: Vec> -} - -#[derive(Debug, Clone)] -pub struct Rule { - pub source: super::Expr, - pub priority: f64, - pub target: super::Expr -} +use crate::expression::Rule; \ No newline at end of file diff --git a/src/project/module_error.rs b/src/project/module_error.rs new file mode 100644 index 0000000..a346160 --- /dev/null +++ b/src/project/module_error.rs @@ -0,0 +1,31 @@ +use thiserror::Error; + +use crate::parse::ParseError; + +use super::name_resolver::ResolutionError; + +#[derive(Error, Debug, Clone)] +pub enum ModuleError where ELoad: Clone { + #[error("Resolution cycle")] + ResolutionCycle, + #[error("File not found: {0}")] + Load(ELoad), + #[error("Failed to parse: {0:?}")] + Syntax(ParseError), + #[error("Not a module")] + None +} + +impl From for ModuleError where T: Clone { + fn from(pars: ParseError) -> Self { Self::Syntax(pars) } +} + +impl From>> for ModuleError where T: Clone { + fn from(res: ResolutionError>) -> Self { + match res { + ResolutionError::Cycle(_) => ModuleError::ResolutionCycle, + ResolutionError::NoModule(_) => ModuleError::None, + ResolutionError::Delegate(d) => d + } + } +} \ No newline at end of file diff --git a/src/project/name_resolver.rs b/src/project/name_resolver.rs index 9978872..df008d1 100644 --- a/src/project/name_resolver.rs +++ b/src/project/name_resolver.rs @@ -3,7 +3,7 @@ use thiserror::Error; use crate::utils::Substack; -use crate::{Expr, Clause, Literal}; +use crate::expression::{Expr, Clause}; type ImportMap = HashMap>; diff --git a/src/project/parse_error.rs b/src/project/parse_error.rs deleted file mode 100644 index d5c1149..0000000 --- a/src/project/parse_error.rs +++ /dev/null @@ -1,30 +0,0 @@ -use chumsky::prelude::Simple; -use thiserror::Error; - -use super::name_resolver::ResolutionError; - -#[derive(Error, Debug, Clone)] -pub enum ParseError where ELoad: Clone { - #[error("Resolution cycle")] - ResolutionCycle, - #[error("File not found: {0}")] - Load(ELoad), - #[error("Failed to parse: {0:?}")] - Syntax(Vec>), - #[error("Not a module")] - None -} - -impl From>> for ParseError where T: Clone { - fn from(simp: Vec>) -> Self { Self::Syntax(simp) } -} - -impl From>> for ParseError where T: Clone { - fn from(res: ResolutionError>) -> Self { - match res { - ResolutionError::Cycle(_) => ParseError::ResolutionCycle, - ResolutionError::NoModule(_) => ParseError::None, - ResolutionError::Delegate(d) => d - } - } -} \ No newline at end of file diff --git a/src/project/prefix.rs b/src/project/prefix.rs index d16c747..5e55c89 100644 --- a/src/project/prefix.rs +++ b/src/project/prefix.rs @@ -1,4 +1,4 @@ -use crate::{Expr, Clause}; +use crate::expression::{Expr, Clause}; /// Replaces the first element of a name with the matching prefix from a prefix map diff --git a/src/project/rule_collector.rs b/src/project/rule_collector.rs index 1bc4361..a52587c 100644 --- a/src/project/rule_collector.rs +++ b/src/project/rule_collector.rs @@ -1,193 +1,206 @@ -// use std::collections::{HashMap, HashSet, VecDeque}; -// use std::fmt::Debug; -// use std::rc::Rc; +use std::collections::{HashMap, HashSet, VecDeque}; +use std::fmt::Debug; +use std::rc::Rc; -// use chumsky::Parser; +use crate::expression::Rule; +use crate::parse::{self, FileEntry}; +use crate::utils::Cache; -// use crate::parse::{self, line_parser, FileEntry}; -// use crate::utils::Cache; +use super::name_resolver::NameResolver; +use super::module_error::ModuleError; +use super::prefix::prefix_expr; +use super::loaded::Loaded; +type ParseResult = Result>; -// use super::name_resolver::NameResolver; -// use super::parse_error::ParseError; -// use super::prefix::prefix_expr; -// use super::loaded::Loaded; +#[derive(Debug, Clone)] +pub struct Module { + pub rules: Vec, + pub exports: Vec, + pub references: Vec> +} -// type ParseResult = Result>; - -// pub fn rule_collector( -// mut load_mod: F, -// prelude: Vec -// // ) -> impl FnMut(Vec) -> Result<&'a Vec, ParseError> + 'a -// ) -> Cache, Result, ParseError>> -// where -// F: FnMut(Vec) -> Result, -// ELoad: Clone + Debug -// { -// // Map paths to a namespace with name list (folder) or module with source text (file) -// let loaded = Rc::new(Cache::new(move |path: Vec| -// -> ParseResult { -// load_mod(path).map_err(ParseError::Load) -// })); -// // Map names to the longest prefix that points to a valid module -// let modname = Rc::new(Cache::new({ -// let loaded = Rc::clone(&loaded); -// move |symbol: Vec| -> Result, Vec>> { -// let mut errv: Vec> = Vec::new(); -// let reg_err = |e, errv: &mut Vec>| { -// errv.push(e); -// if symbol.len() == errv.len() { Err(errv.clone()) } -// else { Ok(()) } -// }; -// loop { -// let (path, _) = symbol.split_at(symbol.len() - errv.len()); -// let pathv = path.to_vec(); -// match loaded.try_find(&pathv) { -// Ok(imports) => match imports.as_ref() { -// Loaded::Module(_) => break Ok(pathv.clone()), -// _ => reg_err(ParseError::None, &mut errv)? -// }, -// Err(err) => reg_err(err, &mut errv)? -// } -// } -// } -// })); -// // Preliminarily parse a file, substitution rules and imports are valid -// let preparsed = Rc::new(Cache::new({ -// let preparser = line_parser(&prelude, &prelude); -// let loaded = Rc::clone(&loaded); -// move |path: Vec| -> ParseResult, ELoad> { -// let loaded = loaded.try_find(&path)?; -// if let Loaded::Module(source) = loaded.as_ref() { -// Ok(preparser.parse(source.as_str())?) -// } else {Err(ParseError::None)} -// } -// })); -// // Collect all toplevel names exported from a given file -// let exports = Rc::new(Cache::new({ -// let loaded = Rc::clone(&loaded); -// let preparsed = Rc::clone(&preparsed); -// move |path: Vec| -> ParseResult, ELoad> { -// let loaded = loaded.try_find(&path)?; -// if let Loaded::Namespace(names) = loaded.as_ref() { -// return Ok(names.clone()); -// } -// let preparsed = preparsed.try_find(&path)?; -// Ok(parse::exported_names(&preparsed) -// .into_iter() -// .map(|n| n[0].clone()) -// .collect()) -// } -// })); -// // Collect all toplevel names imported by a given file -// let imports = Rc::new(Cache::new({ -// let preparsed = Rc::clone(&preparsed); -// let exports = Rc::clone(&exports); -// move |path: Vec| -> ParseResult>, ELoad> { -// let entv = preparsed.try_find(&path)?.clone(); -// let import_entries = parse::imports(entv.iter()); -// let mut imported_symbols: HashMap> = HashMap::new(); -// for imp in import_entries { -// let export = exports.try_find(&imp.path)?; -// if let Some(ref name) = imp.name { -// if export.contains(&name) { -// imported_symbols.insert(name.clone(), imp.path.clone()); -// } -// } else { -// for exp in export.as_ref() { -// imported_symbols.insert(exp.clone(), imp.path.clone()); -// } -// } -// } -// Ok(imported_symbols) -// } -// })); -// // Final parse, operators are correctly separated -// let parsed = Rc::new(Cache::new({ -// let imports = Rc::clone(&imports); -// let loaded = Rc::clone(&loaded); -// move |path: Vec| -> ParseResult, ELoad> { -// let imported_ops: Vec = -// imports.try_find(&path)? -// .keys() -// .chain(prelude.iter()) -// .filter(|s| parse::is_op(s)) -// .cloned() -// .collect(); -// let parser = file_parser(&prelude, &imported_ops); -// if let Loaded::Module(source) = loaded.try_find(&path)?.as_ref() { -// Ok(parser.parse(source.as_str())?) -// } else { Err(ParseError::None) } -// } -// })); -// let mut name_resolver = NameResolver::new({ -// let modname = Rc::clone(&modname); -// move |path| { -// Some(modname.try_find(path).ok()?.as_ref().clone()) -// } -// }, { -// let imports = Rc::clone(&imports); -// move |path| { -// imports.try_find(path).map(|f| f.as_ref().clone()) -// } -// }); -// // Turn parsed files into a bag of rules and a list of toplevel export names -// let resolved = Rc::new(Cache::new({ -// let parsed = Rc::clone(&parsed); -// let exports = Rc::clone(&exports); -// let imports = Rc::clone(&imports); -// let modname = Rc::clone(&modname); -// move |path: Vec| -> ParseResult { -// let module = super::Module { -// rules: parsed.try_find(&path)? -// .iter() -// .filter_map(|ent| { -// if let FileEntry::Export(s) | FileEntry::Rule(s) = ent { -// Some(super::Rule { -// source: prefix_expr(&s.source, &path), -// target: prefix_expr(&s.target, &path), -// priority: s.priority, -// }) -// } else { None } -// }) -// .map(|rule| Ok(super::Rule { -// source: name_resolver.process_expression(&rule.source)?, -// target: name_resolver.process_expression(&rule.target)?, -// ..rule -// })) -// .collect::, ELoad>>()?, -// exports: exports.try_find(&path)?.as_ref().clone(), -// references: imports.try_find(&path)? -// .values() -// .filter_map(|imps| { -// modname.try_find(&imps).ok().map(|r| r.as_ref().clone()) -// }) -// .collect() -// }; -// Ok(module) -// } -// })); -// let all_rules = Cache::new({ -// let resolved = Rc::clone(&resolved); -// move |path: Vec| -> ParseResult, ELoad> { -// let mut processed: HashSet> = HashSet::new(); -// let mut rules: Vec = Vec::new(); -// let mut pending: VecDeque> = VecDeque::new(); -// pending.push_back(path); -// while let Some(el) = pending.pop_front() { -// let resolved = resolved.try_find(&el)?; -// processed.insert(el.clone()); -// pending.extend( -// resolved.references.iter() -// .filter(|&v| !processed.contains(v)) -// .cloned() -// ); -// rules.extend( -// resolved.rules.iter().cloned() -// ) -// }; -// Ok(rules) -// } -// }); -// return all_rules; -// } +pub fn rule_collector( + mut load_mod: F, + prelude: Vec +// ) -> impl FnMut(Vec) -> Result<&'a Vec, ParseError> + 'a +) -> Cache, Result, ModuleError>> +where + F: FnMut(Vec) -> Result, + ELoad: Clone + Debug +{ + // Map paths to a namespace with name list (folder) or module with source text (file) + let loaded = Rc::new(Cache::new(move |path: Vec| + -> ParseResult { + load_mod(path).map_err(ModuleError::Load) + })); + // Map names to the longest prefix that points to a valid module + let modname = Rc::new(Cache::new({ + let loaded = Rc::clone(&loaded); + move |symbol: Vec| -> Result, Vec>> { + let mut errv: Vec> = Vec::new(); + let reg_err = |e, errv: &mut Vec>| { + errv.push(e); + if symbol.len() == errv.len() { Err(errv.clone()) } + else { Ok(()) } + }; + loop { + let (path, _) = symbol.split_at(symbol.len() - errv.len()); + let pathv = path.to_vec(); + match loaded.try_find(&pathv) { + Ok(imports) => match imports.as_ref() { + Loaded::Module(_) => break Ok(pathv.clone()), + _ => reg_err(ModuleError::None, &mut errv)? + }, + Err(err) => reg_err(err, &mut errv)? + } + } + } + })); + // Preliminarily parse a file, substitution rules and imports are valid + let preparsed = Rc::new(Cache::new({ + let loaded = Rc::clone(&loaded); + let prelude2 = prelude.clone(); + move |path: Vec| -> ParseResult, ELoad> { + let loaded = loaded.try_find(&path)?; + if let Loaded::Module(source) = loaded.as_ref() { + Ok(parse::parse(&prelude2, source.as_str())?) + } else {Err(ModuleError::None)} + } + })); + // Collect all toplevel names exported from a given file + let exports = Rc::new(Cache::new({ + let loaded = Rc::clone(&loaded); + let preparsed = Rc::clone(&preparsed); + move |path: Vec| -> ParseResult, ELoad> { + let loaded = loaded.try_find(&path)?; + if let Loaded::Namespace(names) = loaded.as_ref() { + return Ok(names.clone()); + } + let preparsed = preparsed.try_find(&path)?; + Ok(parse::exported_names(&preparsed) + .into_iter() + .map(|n| n[0].clone()) + .collect()) + } + })); + // Collect all toplevel names imported by a given file + let imports = Rc::new(Cache::new({ + let preparsed = Rc::clone(&preparsed); + let exports = Rc::clone(&exports); + move |path: Vec| -> ParseResult>, ELoad> { + let entv = preparsed.try_find(&path)?.clone(); + let import_entries = parse::imports(entv.iter()); + let mut imported_symbols: HashMap> = HashMap::new(); + for imp in import_entries { + let export = exports.try_find(&imp.path)?; + if let Some(ref name) = imp.name { + if export.contains(&name) { + imported_symbols.insert(name.clone(), imp.path.clone()); + } + } else { + for exp in export.as_ref() { + imported_symbols.insert(exp.clone(), imp.path.clone()); + } + } + } + Ok(imported_symbols) + } + })); + // Final parse, operators are correctly separated + let parsed = Rc::new(Cache::new({ + let preparsed = Rc::clone(&preparsed); + let imports = Rc::clone(&imports); + let loaded = Rc::clone(&loaded); + move |path: Vec| -> ParseResult, ELoad> { + let imported_ops: Vec = + imports.try_find(&path)? + .keys() + .chain(prelude.iter()) + .filter(|s| parse::is_op(s)) + .cloned() + .collect(); + // let parser = file_parser(&prelude, &imported_ops); + let pre = preparsed.try_find(&path)?; + if let Loaded::Module(source) = loaded.try_find(&path)?.as_ref() { + Ok(parse::reparse(&imported_ops, source.as_str(), &pre)?) + } else { Err(ModuleError::None) } + } + })); + let mut name_resolver = NameResolver::new({ + let modname = Rc::clone(&modname); + move |path| { + Some(modname.try_find(path).ok()?.as_ref().clone()) + } + }, { + let imports = Rc::clone(&imports); + move |path| { + imports.try_find(path).map(|f| f.as_ref().clone()) + } + }); + // Turn parsed files into a bag of rules and a list of toplevel export names + let resolved = Rc::new(Cache::new({ + let parsed = Rc::clone(&parsed); + let exports = Rc::clone(&exports); + let imports = Rc::clone(&imports); + let modname = Rc::clone(&modname); + move |path: Vec| -> ParseResult { + let module = Module { + rules: parsed.try_find(&path)? + .iter() + .filter_map(|ent| { + if let FileEntry::Rule(Rule{source, prio, target}, _) = ent { + Some(Rule { + source: source.iter().map(|ex| prefix_expr(ex, &path)).collect(), + target: target.iter().map(|ex| prefix_expr(ex, &path)).collect(), + prio: *prio, + }) + } else { None } + }) + .map(|rule| Ok(super::Rule { + source: rule.source.iter() + .map(|ex| name_resolver.process_expression(ex)) + .collect::, _>>()?, + target: rule.target.iter() + .map(|ex| name_resolver.process_expression(ex)) + .collect::, _>>()?, + // source: name_resolver.process_expression(&rule.source)?, + // target: name_resolver.process_expression(&rule.target)?, + ..rule + })) + .collect::, ELoad>>()?, + exports: exports.try_find(&path)?.as_ref().clone(), + references: imports.try_find(&path)? + .values() + .filter_map(|imps| { + modname.try_find(&imps).ok().map(|r| r.as_ref().clone()) + }) + .collect() + }; + Ok(module) + } + })); + let all_rules = Cache::new({ + let resolved = Rc::clone(&resolved); + move |path: Vec| -> ParseResult, ELoad> { + let mut processed: HashSet> = HashSet::new(); + let mut rules: Vec = Vec::new(); + let mut pending: VecDeque> = VecDeque::new(); + pending.push_back(path); + while let Some(el) = pending.pop_front() { + let resolved = resolved.try_find(&el)?; + processed.insert(el.clone()); + pending.extend( + resolved.references.iter() + .filter(|&v| !processed.contains(v)) + .cloned() + ); + rules.extend( + resolved.rules.iter().cloned() + ) + }; + Ok(rules) + } + }); + return all_rules; +} diff --git a/src/rule/mod.rs b/src/rule/mod.rs new file mode 100644 index 0000000..2ef3b25 --- /dev/null +++ b/src/rule/mod.rs @@ -0,0 +1 @@ +mod rule; \ No newline at end of file diff --git a/src/rule/rule.rs b/src/rule/rule.rs new file mode 100644 index 0000000..58574fc --- /dev/null +++ b/src/rule/rule.rs @@ -0,0 +1,38 @@ +use std::cmp::{min, max}; + +use hashbrown::HashSet; + +use crate::expression::Expr; + +pub trait Rule { + type OutIter: Iterator>; + /// The minimum and maximum set of symbols this rule may match. + fn len(&self) -> (Option, Option); + /// The exact tokens the pattern consumes (None if varies) + fn consumes(&self) -> Option>>; + /// The exact tokens the pattern produces (None if varies) + fn produces(&self) -> Option>>; + /// Check if the slice matches, and produce the necessary transformations + fn produce(&self, base: &[Expr]) -> Option; + /// Try all subsections of Vec of appropriate size, longest first, front-to-back + /// Match the first, execute the substitution, return the vector and whether any + /// substitutions happened + fn apply(&self, mut base: Vec) -> (Vec, bool) { + let len_range = self.len(); + let lo = max(len_range.0.unwrap_or(1), 1); + let hi = min(len_range.1.unwrap_or(base.len()), base.len()); + for width in (lo..hi).rev() { + let starts = (0..base.len() - width).into_iter(); + let first_match = starts.filter_map(|start| { + self.produce(&base[start..start+width]) + .map(|res| (start, res)) + }).next(); + if let Some((start, substitution)) = first_match { + let diff = substitution.enumerate().filter_map(|(i, opt)| opt.map(|val| (i, val))); + for (idx, item) in diff { base[start + idx] = item } + return (base, true) + } + } + (base, false) + } +} \ No newline at end of file diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 3f2c294..28e9b38 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,8 +1,6 @@ mod cache; mod substack; -mod result_iter_collect; pub use cache::Cache; pub use substack::Substack; -pub use result_iter_collect::result_iter_collect; pub type BoxedIter<'a, T> = Box + 'a>; \ No newline at end of file diff --git a/src/utils/result_iter_collect.rs b/src/utils/result_iter_collect.rs deleted file mode 100644 index 92c830a..0000000 --- a/src/utils/result_iter_collect.rs +++ /dev/null @@ -1,19 +0,0 @@ -pub fn result_iter_collect(i: &mut dyn Iterator>) --> (Vec>, Vec>) { - i.fold((Vec::new(), Vec::new()), |(mut succ, mut err), mut next| { - match next { - Ok(res) => succ.push(Some(res)), - Err(e) => err.push(Some(e)) - } - (succ, err) - }) -} - -pub fn recoverable_iter_collect(i: &mut dyn Iterator, Vec)>) --> (Vec>, Vec) { - i.fold((Vec::new(), Vec::new()), |(mut succ, mut err), (res, mut errv)| { - succ.push(res); - err.append(&mut errv); - (succ, err) - }) -} \ No newline at end of file