diff --git a/examples/dummy_project/main.orc b/examples/dummy_project/main.orc index 1fbc3e8..05c820e 100644 --- a/examples/dummy_project/main.orc +++ b/examples/dummy_project/main.orc @@ -6,6 +6,8 @@ greet =1=> (\name. printf out "Hello {}!\n" [name]) -- multi-word exported rule export ;> $a =200=> (greet $a) +reeee := \$a.b + -- single-word exported rule export main == ( print "What is your name?" >> diff --git a/src/expression.rs b/src/expression.rs index c0bcf07..bbf2f12 100644 --- a/src/expression.rs +++ b/src/expression.rs @@ -36,25 +36,37 @@ impl Debug for Expr { } } -impl Expr { - /// Replace all occurences of a name in the tree with a parameter, to bypass name resolution - pub fn bind_parameter(&mut self, name: &str) { - self.0.bind_parameter(name); - if let Some(typ) = &mut self.1 { - typ.bind_parameter(name); - } - } -} - /// An S-expression as read from a source file #[derive(Clone, PartialEq, Eq, Hash)] pub enum Clause { Literal(Literal), - Name(Vec), + Name{ + local: Option, + qualified: Vec + }, S(char, Vec), Lambda(String, Vec, Vec), Auto(Option, Vec, Vec), - Parameter(String) + /// Second parameter: + /// None => matches one token + /// Some(prio) => prio is the sizing priority for the vectorial (higher prio grows first) + Placeh(String, Option), +} +impl Clause { + pub fn body(&self) -> Option<&Vec> { + match self { + Clause::Auto(_, _, body) | + Clause::Lambda(_, _, body) | + Clause::S(_, body) => Some(body), + _ => None + } + } + pub fn typ(&self) -> Option<&Vec> { + match self { + Clause::Auto(_, typ, _) | Clause::Lambda(_, typ, _) => Some(typ), + _ => None + } + } } fn fmt_expr_seq(it: &mut dyn Iterator, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -69,7 +81,9 @@ impl Debug for Clause { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Literal(arg0) => write!(f, "{:?}", arg0), - Self::Name(arg0) => write!(f, "{}", arg0.join("::")), + Self::Name{local, qualified} => + if let Some(local) = local {write!(f, "{}<{}>", qualified.join("::"), local)} + else {write!(f, "{}", qualified.join("::"))}, Self::S(del, items) => { f.write_str(&del.to_string())?; fmt_expr_seq(&mut items.iter(), f)?; @@ -90,24 +104,9 @@ impl Debug for Clause { f.write_str(":")?; fmt_expr_seq(&mut argtyp.iter(), f)?; f.write_str(".")?; fmt_expr_seq(&mut body.iter(), f) }, - Self::Parameter(name) => write!(f, "`{}", name) - } - } -} - -impl Clause { - /// Replace all occurences of a name in the tree with a parameter, to bypass name resolution - pub fn bind_parameter(&mut self, name: &str) { - match self { - Clause::Name(n) => if n.len() == 1 && n[0] == name { - *self = Clause::Parameter(name.to_string()) - } - Clause::S(_, exprv) => for expr in exprv { expr.bind_parameter(name) } - Clause::Lambda(_, typ, body) | Clause::Auto(_, typ, body) => { - for expr in typ { expr.bind_parameter(name) } - for expr in body { expr.bind_parameter(name) } - } - _ => () + // Self::Parameter(name) => write!(f, "`{}", name), + Self::Placeh(name, None) => write!(f, "${}", name), + Self::Placeh(name, Some(prio)) => write!(f, "...${}:{}", name, prio) } } } diff --git a/src/main.rs b/src/main.rs index 5539539..d3f0163 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,7 +26,7 @@ fn main() { let collect_rules = rule_collector(move |n| { if n == vec!["prelude"] { Ok(Loaded::Module(PRELUDE.to_string())) } else { file_loader(cwd.clone())(n) } - }, literal(&["...", ">>", ">>=", "[", "]", ",", "$", "=", "=>"])); + }, literal(&["...", ">>", ">>=", "[", "]", ",", "=", "=>"])); match collect_rules.try_find(&literal(&["main"])) { Ok(rules) => for rule in rules.iter() { println!("{rule:?}") diff --git a/src/parse/expression.rs b/src/parse/expression.rs index 6de7d18..1644554 100644 --- a/src/parse/expression.rs +++ b/src/parse/expression.rs @@ -29,7 +29,7 @@ where P: Parser> + Clone { .then_ignore(enum_parser!(Lexeme::Comment).repeated()) .then(expr.repeated().at_least(1)) .map(|((name, typ), mut body): ((String, Vec), Vec)| { - for ent in &mut body { ent.bind_parameter(&name) }; + // for ent in &mut body { ent.bind_parameter(&name) }; Clause::Lambda(name, typ, body) }) } @@ -54,9 +54,9 @@ where P: Parser> + Clone { .try_map(|((name, typ), mut body), s| if name == None && typ.is_empty() { Err(Simple::custom(s, "Auto without name or type has no effect")) } else { - if let Some(n) = &name { - for ent in &mut body { ent.bind_parameter(n) } - } + // if let Some(n) = &name { + // for ent in &mut body { ent.bind_parameter(n) } + // } Ok(Clause::Auto(name, typ, body)) }) } @@ -69,6 +69,13 @@ fn name_parser() -> impl Parser, Error = Simple> + C ).at_least(1) } +fn placeholder_parser() -> impl Parser> + Clone { + enum_parser!(Lexeme::Name).try_map(|name, span| { + name.strip_prefix("$").map(&str::to_string) + .ok_or(Simple::custom(span, "Not a placeholder")) + }) +} + /// Parse an expression without a type annotation pub fn xpr_parser() -> impl Parser> { recursive(|expr| { @@ -76,7 +83,19 @@ pub fn xpr_parser() -> impl Parser> { enum_parser!(Lexeme::Comment).repeated() .ignore_then(choice(( enum_parser!(Lexeme >> Literal; Int, Num, Char, Str).map(Clause::Literal), - name_parser().map(Clause::Name), + placeholder_parser().map(|n| Clause::Placeh(n, None)), + just(Lexeme::name("...")) + .ignore_then(placeholder_parser()) + .then( + just(Lexeme::Type) + .ignore_then(enum_parser!(Lexeme::Int)) + .or_not().map(Option::unwrap_or_default) + ) + .map(|(name, prio)| Clause::Placeh(name, Some(prio.try_into().unwrap()))), + name_parser().map(|qualified| Clause::Name { + local: if qualified.len() == 1 {Some(qualified[0].clone())} else {None}, + qualified + }), sexpr_parser(expr.clone()), lambda_parser(expr.clone()), auto_parser(expr.clone()) diff --git a/src/parse/name.rs b/src/parse/name.rs index 6452a8f..6e33917 100644 --- a/src/parse/name.rs +++ b/src/parse/name.rs @@ -29,7 +29,7 @@ fn op_parser<'a, T: AsRef + Clone>(ops: &[T]) -> BoxedParser<'a, char, Stri /// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very /// common in maths so it's worth a try. Investigate. pub fn modname_parser<'a>() -> impl Parser> + 'a { - let not_name_char: Vec = vec![':', '\\', '@', '"', '\'', '(', ')', ',']; + let not_name_char: Vec = vec![':', '\\', '@', '"', '\'', '(', ')', ',', '.']; filter(move |c| !not_name_char.contains(c) && !c.is_whitespace()) .repeated().at_least(1) .collect() diff --git a/src/parse/sourcefile.rs b/src/parse/sourcefile.rs index e9a5d21..f3fb5c7 100644 --- a/src/parse/sourcefile.rs +++ b/src/parse/sourcefile.rs @@ -1,8 +1,10 @@ use std::collections::HashSet; use std::iter; -use crate::{enum_parser, expression::{Expr, Clause, Rule}}; +use crate::enum_parser; +use crate::expression::{Expr, Clause, Rule}; use crate::utils::BoxedIter; +use crate::utils::Stackframe; use super::expression::xpr_parser; use super::import; @@ -19,32 +21,74 @@ pub enum FileEntry { Rule(Rule, bool) } +fn visit_all_names_clause_recur<'a, F>( + clause: &'a Clause, + binds: Stackframe, + mut cb: &mut F +) where F: FnMut(&'a Vec) { + match clause { + Clause::Auto(name, typ, body) => { + for x in typ.iter() { + visit_all_names_expr_recur(x, binds.clone(), &mut cb) + } + let binds_dup = binds.clone(); + let new_binds = if let Some(n) = name { + binds_dup.push(n.to_owned()) + } else { + binds + }; + for x in body.iter() { + visit_all_names_expr_recur(x, new_binds.clone(), &mut cb) + } + }, + Clause::Lambda(name, typ, body) => { + for x in typ.iter() { + visit_all_names_expr_recur(x, binds.clone(), &mut cb) + } + for x in body.iter() { + visit_all_names_expr_recur(x, binds.push(name.to_owned()), &mut cb) + } + }, + Clause::S(_, body) => for x in body.iter() { + visit_all_names_expr_recur(x, binds.clone(), &mut cb) + }, + Clause::Name{ local, qualified } => { + if let Some(name) = local { + if binds.iter().all(|x| x != name) { + cb(qualified) + } + } + } + _ => (), + } +} + /// Recursively iterate through all "names" in an expression. It also finds a lot of things that /// aren't names, such as all bound parameters. Generally speaking, this is not a very /// sophisticated search. /// /// TODO: find a way to exclude parameters -fn find_all_names_recur<'a>(expr: &'a Expr) -> BoxedIter<&'a Vec> { - let proc_clause = |clause: &'a Clause| match clause { - Clause::Auto(_, typ, body) | Clause::Lambda(_, typ, body) => Box::new( - typ.iter().flat_map(find_all_names_recur) - .chain(body.iter().flat_map(find_all_names_recur)) - ) as BoxedIter<&'a Vec>, - Clause::S(_, body) => Box::new( - body.iter().flat_map(find_all_names_recur) - ), - Clause::Name(x) => Box::new(iter::once(x)), - _ => Box::new(iter::empty()) - }; +fn visit_all_names_expr_recur<'a, F>( + expr: &'a Expr, + binds: Stackframe, + cb: &mut F +) where F: FnMut(&'a Vec) { let Expr(val, typ) = expr; + visit_all_names_clause_recur(val, binds.clone(), cb); if let Some(t) = typ { - Box::new(proc_clause(val).chain(find_all_names_recur(t))) - } else { proc_clause(val) } + visit_all_names_expr_recur(t, binds, cb) + } } /// Collect all names that occur in an expression fn find_all_names(expr: &Expr) -> HashSet<&Vec> { - find_all_names_recur(expr).collect() + let mut ret = HashSet::new(); + visit_all_names_expr_recur(expr, Stackframe::new(String::new()), &mut |n| { + if !n.last().unwrap().starts_with("$") { + ret.insert(n); + } + }); + ret } fn rule_parser() -> impl Parser, NotNan, Vec), Error = Simple> { diff --git a/src/project/name_resolver.rs b/src/project/name_resolver.rs index df008d1..0e1e8a1 100644 --- a/src/project/name_resolver.rs +++ b/src/project/name_resolver.rs @@ -1,7 +1,7 @@ use std::{collections::HashMap}; use thiserror::Error; -use crate::utils::Substack; +use crate::utils::Stackframe; use crate::expression::{Expr, Clause}; @@ -43,10 +43,10 @@ where /// Obtains a symbol's originnal name /// Uses a substack to detect loops - fn find_origin_rec( + fn find_origin_rec<'a>( &mut self, - symbol: &Vec, - import_path: &Substack<'_, &Vec> + symbol: &'a Vec, + import_path: Stackframe<'a, &'a Vec> ) -> Result, ResolutionError> { if let Some(cached) = self.cache.get(symbol) { return cached.clone() } // The imports and path of the referenced file and the local name @@ -58,7 +58,7 @@ where if import_path.iter().any(|el| el == &&new_sym) { Err(ResolutionError::Cycle(import_path.iter().cloned().cloned().collect())) } else { - self.find_origin_rec(&new_sym, &import_path.push(symbol)) + self.find_origin_rec(&new_sym, import_path.push(symbol)) } } else { Ok(symbol.clone()) // If not imported, it must be locally defined @@ -92,7 +92,10 @@ where self.process_exprv_rec(typ)?, self.process_exprv_rec(body)? ), - Clause::Name(qualified) => Clause::Name(self.find_origin(qualified)?), + Clause::Name{local, qualified} => Clause::Name{ + local: local.clone(), + qualified: self.find_origin(qualified)? + }, x => x.clone() }) } @@ -105,7 +108,7 @@ where } pub fn find_origin(&mut self, symbol: &Vec) -> Result, ResolutionError> { - self.find_origin_rec(symbol, &Substack::new(symbol)) + self.find_origin_rec(symbol, Stackframe::new(symbol)) } #[allow(dead_code)] diff --git a/src/project/prefix.rs b/src/project/prefix.rs index 5e55c89..46c2566 100644 --- a/src/project/prefix.rs +++ b/src/project/prefix.rs @@ -20,9 +20,10 @@ fn prefix_clause( typ.iter().map(|e| prefix_expr(e, namespace)).collect(), body.iter().map(|e| prefix_expr(e, namespace)).collect(), ), - Clause::Name(name) => Clause::Name ( - namespace.iter().chain(name.iter()).cloned().collect() - ), + Clause::Name{local, qualified} => Clause::Name{ + local: local.clone(), + qualified: namespace.iter().chain(qualified.iter()).cloned().collect() + }, x => x.clone() } } diff --git a/src/project/rule_collector.rs b/src/project/rule_collector.rs index a52587c..8124637 100644 --- a/src/project/rule_collector.rs +++ b/src/project/rule_collector.rs @@ -24,20 +24,20 @@ pub fn rule_collector( mut load_mod: F, prelude: Vec // ) -> impl FnMut(Vec) -> Result<&'a Vec, ParseError> + 'a -) -> Cache, Result, ModuleError>> +) -> Cache<'static, Vec, Result, ModuleError>> where F: FnMut(Vec) -> Result, ELoad: Clone + Debug { // Map paths to a namespace with name list (folder) or module with source text (file) - let loaded = Rc::new(Cache::new(move |path: Vec| + let loaded = Rc::new(Cache::new(move |path: Vec, _| -> ParseResult { load_mod(path).map_err(ModuleError::Load) })); // Map names to the longest prefix that points to a valid module let modname = Rc::new(Cache::new({ let loaded = Rc::clone(&loaded); - move |symbol: Vec| -> Result, Vec>> { + move |symbol: Vec, _| -> Result, Vec>> { let mut errv: Vec> = Vec::new(); let reg_err = |e, errv: &mut Vec>| { errv.push(e); @@ -61,7 +61,7 @@ where let preparsed = Rc::new(Cache::new({ let loaded = Rc::clone(&loaded); let prelude2 = prelude.clone(); - move |path: Vec| -> ParseResult, ELoad> { + move |path: Vec, _| -> ParseResult, ELoad> { let loaded = loaded.try_find(&path)?; if let Loaded::Module(source) = loaded.as_ref() { Ok(parse::parse(&prelude2, source.as_str())?) @@ -72,7 +72,7 @@ where let exports = Rc::new(Cache::new({ let loaded = Rc::clone(&loaded); let preparsed = Rc::clone(&preparsed); - move |path: Vec| -> ParseResult, ELoad> { + move |path: Vec, _| -> ParseResult, ELoad> { let loaded = loaded.try_find(&path)?; if let Loaded::Namespace(names) = loaded.as_ref() { return Ok(names.clone()); @@ -88,7 +88,7 @@ where let imports = Rc::new(Cache::new({ let preparsed = Rc::clone(&preparsed); let exports = Rc::clone(&exports); - move |path: Vec| -> ParseResult>, ELoad> { + move |path: Vec, _| -> ParseResult>, ELoad> { let entv = preparsed.try_find(&path)?.clone(); let import_entries = parse::imports(entv.iter()); let mut imported_symbols: HashMap> = HashMap::new(); @@ -112,7 +112,7 @@ where let preparsed = Rc::clone(&preparsed); let imports = Rc::clone(&imports); let loaded = Rc::clone(&loaded); - move |path: Vec| -> ParseResult, ELoad> { + move |path: Vec, _| -> ParseResult, ELoad> { let imported_ops: Vec = imports.try_find(&path)? .keys() @@ -144,7 +144,7 @@ where let exports = Rc::clone(&exports); let imports = Rc::clone(&imports); let modname = Rc::clone(&modname); - move |path: Vec| -> ParseResult { + move |path: Vec, _| -> ParseResult { let module = Module { rules: parsed.try_find(&path)? .iter() @@ -182,7 +182,7 @@ where })); let all_rules = Cache::new({ let resolved = Rc::clone(&resolved); - move |path: Vec| -> ParseResult, ELoad> { + move |path: Vec, _| -> ParseResult, ELoad> { let mut processed: HashSet> = HashSet::new(); let mut rules: Vec = Vec::new(); let mut pending: VecDeque> = VecDeque::new(); diff --git a/src/rule/bad_state_error.rs b/src/rule/bad_state_error.rs deleted file mode 100644 index 91bde8c..0000000 --- a/src/rule/bad_state_error.rs +++ /dev/null @@ -1,11 +0,0 @@ -use std::{fmt, error::Error}; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct BadState(Vec); - -impl fmt::Display for BadState { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "The following key(s) weren't produced by the matching pattern: {:?}", self.0) - } -} -impl Error for BadState {} \ No newline at end of file diff --git a/src/rule/executor.rs b/src/rule/executor.rs deleted file mode 100644 index ad9d8ed..0000000 --- a/src/rule/executor.rs +++ /dev/null @@ -1,14 +0,0 @@ -use crate::expression::Expr; - -use super::{Rule, BadState}; - -pub fn execute(src: &Src, tgt: &Tgt, mut input: Vec) --> Result<(Vec, bool), BadState> where Src: Rule, Tgt: Rule { - let (range, state) = match src.scan_slice(&input) { - Some(res) => res, - None => return Ok((input, false)) - }; - let output = tgt.write(&state)?; - input.splice(range, output); - Ok((input, true)) -} \ No newline at end of file diff --git a/src/rule/executor/mod.rs b/src/rule/executor/mod.rs new file mode 100644 index 0000000..70927a7 --- /dev/null +++ b/src/rule/executor/mod.rs @@ -0,0 +1,4 @@ +mod slice_matcher; +mod state; + +use state::State; \ No newline at end of file diff --git a/src/rule/executor/slice_matcher.rs b/src/rule/executor/slice_matcher.rs new file mode 100644 index 0000000..2a7accd --- /dev/null +++ b/src/rule/executor/slice_matcher.rs @@ -0,0 +1,346 @@ +use hashbrown::HashMap; +use itertools::Itertools; + +use crate::expression::{Expr, Clause}; +use crate::unwrap_or_continue; +use crate::utils::{Side, Cache}; +use super::super::RuleError; +use super::State; + +fn split_at_max_vec(pattern: &[Expr]) -> Option<(&[Expr], (&str, usize), &[Expr])> { + let rngidx = pattern.iter().position_max_by_key(|ex| { + if let Expr(Clause::Placeh(_, Some(prio)), _) = ex { *prio as i64 } else { -1 } + })?; + let (left, not_left) = pattern.split_at(rngidx); + let (placeh, right) = if rngidx == pattern.len() { + (¬_left[0].0, [].as_slice()) + } else { + let (placeh_unary_slice, right) = pattern.split_at(rngidx + 1); + (&placeh_unary_slice[0].0, right) + }; + if let Clause::Placeh(name, Some(prio)) = placeh { + Some((left, (name, *prio), right)) + } else {None} +} + +/// Matcher that applies a pattern to a slice via divide-and-conquer +/// +/// Upon construction, it selects the clause of highest priority, then +/// initializes its internal state for matching that clause and delegates +/// the left and right halves of the pattern to two submatchers. +/// +/// Upon matching, it uses a cache to accelerate the process of executing +/// a pattern on the entire tree. +#[derive(Debug, Clone, Eq)] +pub struct SliceMatcherDnC<'a> { + /// The entire pattern this will match + pattern: &'a [Expr], + /// The exact clause this can match + clause: &'a Clause, + /// Matcher for the parts of the pattern right from us + right_subm: Option>>, + /// Matcher for the parts of the pattern left from us + left_subm: Option>>, + /// Matcher for the body of this clause if it has one. + /// Must be Some if pattern is (Auto, Lambda or S) + body_subm: Option>>, + /// Matcher for the type of this expression if it has one (Auto usually does) + /// Optional + typ_subm: Option>>, +} + +impl<'a> PartialEq for SliceMatcherDnC<'a> { + fn eq(&self, other: &Self) -> bool { + self.pattern == other.pattern + } +} + +impl<'a> std::hash::Hash for SliceMatcherDnC<'a> { + fn hash(&self, state: &mut H) { + self.pattern.hash(state); + } +} + +impl<'a> SliceMatcherDnC<'a> { + /// If this is true, `clause`, `typ_subm`, `body_subm` and `clause_qual_name` are meaningless. + /// If it's false, it's also false for both side matchers. + pub fn clause_is_vectorial(&self) -> bool { + if let Clause::Placeh(_, Some(_)) = self.clause {true} else {false} + } + /// If clause is a name, the qualified name this can match + pub fn clause_qual_name(&self) -> Option<&'a Vec> { + if let Clause::Name { qualified, .. } = self.clause {Some(qualified)} else {None} + } + /// If clause is a Placeh, the key in the state the match will be stored at + pub fn state_key(&self) -> Option<&'a String> { + if let Clause::Placeh(key, _) = self.clause {Some(key)} else {None} + } + pub fn own_max_size(&self, total: usize) -> usize { + if !self.clause_is_vectorial() {return self.len()} + return total - self.min(Side::Left) - self.min(Side::Right) + } + /// Enumerate all valid subdivisions based on the reported size constraints of self and + /// the two subranges + pub fn valid_subdivisions<'b>(&self, + range: &'b [Expr] + ) -> impl Iterator { + let own_size = self.own_max_size(range.len()); + let lmin = self.min(Side::Left); + let lmax = self.max(Side::Left, range.len()); + let rmin = self.min(Side::Right); + let rmax = self.max(Side::Right, range.len()); + let full_len = range.len(); + (1..=own_size).rev().flat_map(move |own_len| { + let wiggle = full_len - lmin - rmin - own_len; + (0..wiggle).map(move |offset| { + let first_break = lmin + offset; + let (left, rest) = range.split_at(first_break); + let (mid, right) = rest.split_at(own_len); + (left, mid, right) + }) + }) + } + + pub fn new(pattern: &'a [Expr]) -> Self { + let (Expr(clause, _), left_subm, right_subm) = if pattern.len() == 1 { + (&pattern[0], None, None) + } else if let Some((left, _, right)) = split_at_max_vec(pattern) {( + &pattern[left.len()], + Some(Box::new(Self::new(left))), + Some(Box::new(Self::new(right))) + )} else {( + &pattern[0], + None, + Some(Box::new(Self::new(&pattern[1..]))) + )}; + Self { + pattern, right_subm, left_subm, clause, + body_subm: clause.body().map(|b| Box::new(Self::new(b))), + typ_subm: clause.typ().map(|t| Box::new(Self::new(t))) + } + } + + /// The shortest slice this pattern can match + fn len(&self) -> usize {self.pattern.len()} + /// Pick a subpattern based on the parameter + fn side(&self, side: Side) -> Option<&Box>> { + match side { + Side::Left => &self.left_subm, + Side::Right => &self.right_subm + }.as_ref() + } + /// The shortest slice the given side can match + fn min(&self, side: Side) -> usize {self.side(side).map_or(0, |right| right.len())} + /// The longest slice the given side can match + fn max(&self, side: Side, total: usize) -> usize { + self.side(side).map_or(0, |m| if m.clause_is_vectorial() { + total - self.min(side.opposite()) - 1 + } else {m.len()}) + } + /// Take the smallest possible slice from the given side + fn slice_min<'b>(&self, side: Side, range: &'b [Expr]) -> &'b [Expr] { + side.slice(self.min(side), range) + } + + /// Matches the body on a range + /// # Panics + /// when called on an instance that does not have a body (not Auto, Lambda or S) + fn match_body<'b>(&'a self, + range: &'b [Expr], cache: &Cache<(&'b [Expr], &'a SliceMatcherDnC<'a>), Option> + ) -> Option { + self.body_subm.as_ref().unwrap().match_range_cached(range, cache) + } + /// Matches the type and body on respective ranges + /// # Panics + /// when called on an instance that does not have a body (not Auto, Lambda or S) + fn match_parts<'b>(&'a self, + typ_range: &'b [Expr], body_range: &'b [Expr], + cache: &Cache<(&'b [Expr], &'a SliceMatcherDnC<'a>), Option> + ) -> Option { + let typ_state = if let Some(typ) = &self.typ_subm { + typ.match_range_cached(&typ_range, cache)? + } else {State::new()}; + let body_state = self.match_body(body_range, cache)?; + typ_state + body_state + } + + /// Match the specified side-submatcher on the specified range with the cache + /// In absence of a side-submatcher empty ranges are matched to empty state + fn apply_side_with_cache<'b>(&'a self, + side: Side, range: &'b [Expr], + cache: &Cache<(&'b [Expr], &'a SliceMatcherDnC<'a>), Option> + ) -> Option { + match &self.side(side) { + None => { + if range.len() != 0 {None} + else {Some(State::new())} + }, + Some(m) => cache.try_find(&(range, &m)).map(|s| s.as_ref().to_owned()) + } + } + + fn match_range_scalar_cached<'b>(&'a self, + target: &'b [Expr], + cache: &Cache<(&'b [Expr], &'a SliceMatcherDnC<'a>), Option> + ) -> Option { + let pos = self.min(Side::Left); + if target.len() != self.pattern.len() {return None} + let mut own_state = ( + self.apply_side_with_cache(Side::Left, &target[0..pos], cache)? + + self.apply_side_with_cache(Side::Right, &target[pos+1..], cache) + )?; + match (self.clause, &target[pos].0) { + (Clause::Literal(val), Clause::Literal(tgt)) => { + if val == tgt {Some(own_state)} else {None} + } + (Clause::Placeh(name, None), _) => { + own_state.insert(name, &[target[pos].clone()]) + } + (Clause::S(c, _), Clause::S(c_tgt, body_range)) => { + if c != c_tgt {return None} + own_state + self.match_parts(&[], body_range, cache) + } + (Clause::Name{qualified, ..}, Clause::Name{qualified: q_tgt, ..}) => { + if qualified == q_tgt {Some(own_state)} else {None} + } + (Clause::Lambda(name, _, _), Clause::Lambda(name_tgt, typ_tgt, body_tgt)) => { + // Primarily, the name works as a placeholder + if let Some(state_key) = name.strip_prefix("$") { + own_state = own_state.insert( + state_key, + &[Expr(Clause::Name{ + local: Some(name_tgt.clone()), + qualified: vec![name_tgt.clone()] + }, None)] + )? + // But if you're weird like that, it can also work as a constraint + } else if name != name_tgt {return None} + own_state + self.match_parts(typ_tgt, body_tgt, cache) + } + (Clause::Auto(name_opt, _, _), Clause::Auto(name_range, typ_range, body_range)) => { + if let Some(name) = name_opt { + if let Some(state_name) = name.strip_prefix("$") { + own_state = own_state.insert( + state_name, + &[Expr(Clause::Name{ + local: name_range.clone(), + qualified: name_range.as_ref() + .map(|s| vec![s.clone()]) + .unwrap_or_default() + }, None)] + )? + // TODO: Enforce this at construction, on a type system level + } else {panic!("Auto patterns may only reference, never enforce the name")} + } + own_state + self.match_parts(typ_range, body_range, cache) + }, + _ => None + } + } + + /// Match the range with a vectorial _assuming we are a vectorial_ + fn match_range_vectorial_cached<'b>(&'a self, + name: &str, + target: &'b [Expr], + cache: &Cache<(&'b [Expr], &'a SliceMatcherDnC<'a>), Option> + ) -> Option { + // Step through valid slicings based on reported size constraints in order + // from longest own section to shortest and from left to right + for (left, own, right) in self.valid_subdivisions(target) { + let left_result = unwrap_or_continue!(self.apply_side_with_cache(Side::Left, left, cache)); + let right_result = unwrap_or_continue!(self.apply_side_with_cache(Side::Right, right, cache)); + return Some(unwrap_or_continue!( + right_result.clone() + + left_result.insert(name, own) + )) + } + return None + } + + /// Try and match the specified range + pub fn match_range_cached<'b>(&'a self, + target: &'b [Expr], + cache: &Cache<(&'b [Expr], &'a SliceMatcherDnC<'a>), Option> + ) -> Option { + if self.pattern.len() == 0 { + return if target.len() == 0 {Some(State::new())} else {None} + } + match self.clause { + Clause::Placeh(name, Some(_)) => self.match_range_vectorial_cached(name, target, cache), + _ => self.match_range_scalar_cached(target, cache) + } + } + + pub fn match_range(&self, target: &[Expr]) -> Option { + self.match_range_cached(target,&Cache::<(&[Expr], &SliceMatcherDnC), _>::new( + |(tgt, matcher), cache| { + matcher.match_range_cached(tgt, cache) + } + )) + } +} + +pub fn verify_scalar_vec(pattern: &Expr, is_vec: &mut HashMap) +-> Result<(), String> { + let verify_clause = |clause: &Clause, is_vec: &mut HashMap| -> Result<(), String> { + match clause { + Clause::Placeh(name, prio) => { + if let Some(known) = is_vec.get(name) { + if known != &prio.is_some() { return Err(name.to_string()) } + } else { + is_vec.insert(name.clone(), prio.is_some()); + } + } + Clause::Auto(name, typ, body) => { + if let Some(key) = name.as_ref().map(|key| key.strip_prefix("$")).flatten() { + if is_vec.get(key) == Some(&true) { return Err(key.to_string()) } + } + typ.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; + body.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; + } + Clause::Lambda(name, typ, body) => { + if let Some(key) = name.strip_prefix("$") { + if is_vec.get(key) == Some(&true) { return Err(key.to_string()) } + } + typ.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; + body.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; + } + Clause::S(_, body) => { + body.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; + } + _ => () + }; + Ok(()) + }; + let Expr(val, typ_opt) = pattern; + verify_clause(val, is_vec)?; + if let Some(typ) = typ_opt { + verify_scalar_vec(typ, is_vec)?; + } + return Ok(()) +} + +pub fn execute(mut src: Vec, mut tgt: Vec, mut input: Vec) +-> Result<(Vec, bool), RuleError> { + // Static values + let prefix_expr = Expr(Clause::Placeh("::prefix".to_string(), Some(0)), None); + let postfix_expr = Expr(Clause::Placeh("::postfix".to_string(), Some(0)), None); + // Dimension check + let mut is_vec_db = HashMap::new(); + src.iter().try_for_each(|e| verify_scalar_vec(e, &mut is_vec_db)) + .map_err(RuleError::ScalarVecMismatch)?; + tgt.iter().try_for_each(|e| verify_scalar_vec(e, &mut is_vec_db)) + .map_err(RuleError::ScalarVecMismatch)?; + // Prefix or postfix to match the full vector + let head_multi = if let Clause::Placeh(_, Some(_)) = src.first().unwrap().0 {true} else {false}; + let tail_multi = if let Clause::Placeh(_, Some(_)) = src.last().unwrap().0 {true} else {false}; + if !head_multi { + src.insert(0, prefix_expr.clone()); + tgt.insert(0, prefix_expr.clone()); + } + if !tail_multi { + src.push(postfix_expr.clone()); + tgt.push(postfix_expr.clone()); + } + todo!() +} \ No newline at end of file diff --git a/src/rule/executor/state.rs b/src/rule/executor/state.rs new file mode 100644 index 0000000..75fbe9f --- /dev/null +++ b/src/rule/executor/state.rs @@ -0,0 +1,89 @@ +use std::ops::{Add, Index}; + +use hashbrown::{HashMap, hash_map::IntoIter}; +use mappable_rc::Mrc; + +use crate::expression::Expr; + +/// A bucket of indexed expression fragments. Addition may fail if there's a conflict. +#[derive(PartialEq, Eq)] +pub struct State(HashMap>>); + +/// Clone without also cloning arbitrarily heavy Expr objects. +/// Key is expected to be a very short string with an allocator overhead close to zero. +impl Clone for State { + fn clone(&self) -> Self { + Self(HashMap::from_iter( + self.0.iter().map(|(k, v)| (k.clone(), Mrc::clone(v))) + )) + } +} + +impl State { + pub fn new() -> Self { + Self(HashMap::new()) + } + /// Insert a new element, return None on conflict, clone only on success + pub fn insert(mut self, k: &S, v: &[Expr]) -> Option + where S: AsRef + ToString + ?Sized { + if let Some(old) = self.0.get(k.as_ref()) { + if old.as_ref() != v {return None} + } else { + self.0.insert(k.to_string(), Mrc::new(v.to_vec())); + } + return Some(self) + } + /// Insert a new entry, return None on conflict + pub fn insert_pair(mut self, (k, v): (String, Mrc>)) -> Option { + if let Some(old) = self.0.get(&k) { + if old != &v {return None} + } else { + self.0.insert(k, v); + } + return Some(self) + } + /// Returns `true` if the state contains no data + pub fn empty(&self) -> bool { + self.0.is_empty() + } +} + +impl Add for State { + type Output = Option; + + fn add(mut self, rhs: Self) -> Self::Output { + if self.empty() { + return Some(rhs) + } + for pair in rhs.0 { + self = self.insert_pair(pair)? + } + return Some(self); + } +} + +impl Add> for State { + type Output = Option; + + fn add(self, rhs: Option) -> Self::Output { + rhs.and_then(|s| self + s) + } +} + +impl<'a, S> Index<&S> for State where S: AsRef { + type Output = Vec; + + fn index(&self, index: &S) -> &Self::Output { + return &self.0[index.as_ref()] + } +} + +impl IntoIterator for State { + type Item = (String, Mrc>); + + type IntoIter = IntoIter>>; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} \ No newline at end of file diff --git a/src/rule/mod.rs b/src/rule/mod.rs index cef008b..77da85e 100644 --- a/src/rule/mod.rs +++ b/src/rule/mod.rs @@ -1,6 +1,6 @@ -mod rule; +// mod rule; mod executor; -mod bad_state_error; +mod rule_error; -pub use rule::Rule; -pub use bad_state_error::BadState; \ No newline at end of file +// pub use rule::Rule; +pub use rule_error::RuleError; \ No newline at end of file diff --git a/src/rule/rule_error.rs b/src/rule/rule_error.rs new file mode 100644 index 0000000..d6a32b2 --- /dev/null +++ b/src/rule/rule_error.rs @@ -0,0 +1,18 @@ +use std::{fmt, error::Error}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RuleError { + BadState(String), + ScalarVecMismatch(String) +} + +impl fmt::Display for RuleError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::BadState(key) => write!(f, "Key {:?} not in match pattern", key), + Self::ScalarVecMismatch(key) => + write!(f, "Key {:?} used inconsistently with and without ellipsis", key) + } + } +} +impl Error for RuleError {} \ No newline at end of file diff --git a/src/utils/cache.rs b/src/utils/cache.rs index 3c3f68a..97de3e6 100644 --- a/src/utils/cache.rs +++ b/src/utils/cache.rs @@ -4,16 +4,21 @@ use mappable_rc::Mrc; /// Cache the return values of an effectless closure in a hashmap /// Inspired by the closure_cacher crate. -pub struct Cache where O: Clone { +pub struct Cache<'a, I, O: 'static> /*where O: Clone*/ { store: RefCell>>, - closure: RefCell O + 'static>> + closure: RefCell Mrc + 'a>> } -impl Cache where - I: Eq + Hash + Clone, - O: Clone +impl<'a, I, O> Cache<'a, I, O> where + I: Eq + Hash + Clone { - pub fn new(closure: F) -> Self where F: FnMut(I) -> O { + pub fn new(mut closure: F) -> Self where F: FnMut(I, &Self) -> O { + Self::new_raw(move |o, s| Mrc::new(closure(o, s))) + } + + /// Take an Mrc closure rather than an O closure + /// Used internally to derive caches from other systems working with Mrc-s + pub fn new_raw(closure: F) -> Self where F: FnMut(I, &Self) -> Mrc { Self { store: RefCell::new(HashMap::new()), closure: RefCell::new(Box::new(closure)) @@ -25,7 +30,7 @@ impl Cache where let mut closure = self.closure.borrow_mut(); let mut store = self.store.borrow_mut(); Mrc::clone(store.raw_entry_mut().from_key(i) - .or_insert_with(|| (i.clone(), Mrc::new(closure(i.clone())))).1) + .or_insert_with(|| (i.clone(), closure(i.clone(), self))).1) } #[allow(dead_code)] /// Return the result if it has already been computed @@ -40,9 +45,9 @@ impl Cache where } } -impl Cache> where +impl<'a, I, O, E> Cache<'a, I, Result> where I: Eq + Hash + Clone, - O: Clone, + // O: Clone, E: Clone { /// Sink the ref from a Result into the Ok value, such that cloning only occurs on the sad path @@ -54,9 +59,9 @@ impl Cache> where } } -impl Cache> where +impl<'a, I, O> Cache<'a, I, Option> where I: Eq + Hash + Clone, - O: Clone + // O: Clone { #[allow(dead_code)] /// Sink the ref from an Option into the Some value such that the return value can be @@ -65,4 +70,4 @@ impl Cache> where let ent = self.find(i); Mrc::try_map(ent, |o| o.as_ref()).ok() } -} +} \ No newline at end of file diff --git a/src/utils/merge_sorted.rs b/src/utils/merge_sorted.rs new file mode 100644 index 0000000..35130b3 --- /dev/null +++ b/src/utils/merge_sorted.rs @@ -0,0 +1,27 @@ +use std::mem; + +// use itertools::Itertools; + +/// Merge two sorted iterators into a sorted iterator. +pub fn merge_sorted(mut i: I, mut j: J, mut f: F) -> impl Iterator +where + I: Iterator, J: Iterator, + F: FnMut(&T) -> O, O: Ord, +{ + let mut i_item: Option = None; + let mut j_item: Option = None; + std::iter::from_fn(move || { + match (&mut i_item, &mut j_item) { + (&mut None, &mut None) => None, + (&mut None, j_item @ &mut Some(_)) => Some((j_item, None)), + (i_item @ &mut Some(_), &mut None) => Some((i_item, i.next())), + (Some(i_val), Some(j_val)) => Some( + if f(i_val) < f(j_val) { + (&mut i_item, i.next()) + } else { + (&mut j_item, j.next()) + } + ) + }.and_then(|(dest, value)| mem::replace(dest, value)) + }) +} \ No newline at end of file diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 28e9b38..3edcdd4 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,6 +1,12 @@ mod cache; mod substack; +mod side; +mod merge_sorted; +mod sorted_pairs; +mod unwrap_or_continue; pub use cache::Cache; -pub use substack::Substack; +pub use substack::Stackframe; +pub use side::Side; +pub use merge_sorted::merge_sorted; pub type BoxedIter<'a, T> = Box + 'a>; \ No newline at end of file diff --git a/src/utils/side.rs b/src/utils/side.rs new file mode 100644 index 0000000..e72b398 --- /dev/null +++ b/src/utils/side.rs @@ -0,0 +1,53 @@ +use std::fmt::Display; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Side {Left, Right} + +impl Display for Side { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Left => write!(f, "Left"), + Self::Right => write!(f, "Right"), + } + } +} + +impl Side { + pub fn opposite(&self) -> Self { + match self { + Self::Left => Self::Right, + Self::Right => Self::Left + } + } + /// Shorthand for opposite + pub fn inv(&self) -> Self { self.opposite() } + /// take N elements from this end of a slice + pub fn slice<'a, T>(&self, size: usize, slice: &'a [T]) -> &'a [T] { + match self { + Side::Left => &slice[..size], + Side::Right => &slice[slice.len() - size..] + } + } + /// ignore N elements from this end of a slice + pub fn crop<'a, T>(&self, margin: usize, slice: &'a [T]) -> &'a [T] { + self.opposite().slice(slice.len() - margin, slice) + } + /// ignore N elements from this end and M elements from the other end of a slice + pub fn crop_both<'a, T>(&self, margin: usize, opposite: usize, slice: &'a [T]) -> &'a [T] { + self.crop(margin, self.opposite().crop(opposite, slice)) + } + /// Pick this side from a pair of things + pub fn pick(&self, pair: (T, T)) -> T { + match self { + Side::Left => pair.0, + Side::Right => pair.1 + } + } + /// Make a pair with the first element on this side + pub fn pair(&self, this: T, opposite: T) -> (T, T) { + match self { + Side::Left => (this, opposite), + Side::Right => (opposite, this) + } + } +} \ No newline at end of file diff --git a/src/utils/sorted_pairs.rs b/src/utils/sorted_pairs.rs new file mode 100644 index 0000000..c728f42 --- /dev/null +++ b/src/utils/sorted_pairs.rs @@ -0,0 +1,35 @@ +use std::ops::Add; + +/// Combine two sorted iterators with their mapper function into a sorted iterator of pairs +pub struct SortedPairs { + left: IL, right: IR, + left_map: ML, right_map: MR, + left_buf: Vec<(L, O)>, right_buf: Vec<(R, O)> +} + +impl SortedPairs +where IL: Iterator, IR: Iterator, + ML: Fn(L) -> O, MR: Fn(R) -> O, + O: Ord + Add + Clone +{ + pub fn new(left: IL, right: IR, left_map: ML, right_map: MR) -> Self { + Self { + left, right, left_map, right_map, + left_buf: Vec::new(), + right_buf: Vec::new() + } + } +} + +impl<'a, L: 'a, R: 'a, IL: 'a, IR: 'a, ML: 'a, MR: 'a, O: 'a> Iterator +for &'a mut SortedPairs +where IL: Iterator, IR: Iterator, + ML: Fn(L) -> O, MR: Fn(R) -> O, + O: Ord + Add + Clone, +{ + type Item = (&'a L, &'a R); + + fn next(&mut self) -> Option { + todo!() + } +} \ No newline at end of file diff --git a/src/utils/substack.rs b/src/utils/substack.rs index 286b98a..500c303 100644 --- a/src/utils/substack.rs +++ b/src/utils/substack.rs @@ -1,45 +1,55 @@ +use std::fmt::Debug; /// Implement a FILO stack that lives on the regular call stack as a linked list. /// Mainly useful to detect loops in recursive algorithms where the recursion isn't /// deep enough to warrant a heap-allocated set -#[derive(Debug, Clone, Copy)] -pub struct Substack<'a, T> { +#[derive(Clone, Copy)] +pub struct Stackframe<'a, T> { pub item: T, - pub prev: Option<&'a Self> + pub prev: Option<&'a Stackframe<'a, T>> } -impl<'a, T> Substack<'a, T> { - #[allow(dead_code)] - pub fn item(&self) -> &T { &self.item } - #[allow(dead_code)] - pub fn prev(&self) -> Option<&'a Substack<'a, T>> { self.prev } - +impl<'a, T: 'a> Stackframe<'a, T> { pub fn new(item: T) -> Self { Self { item, prev: None } } - pub fn push(&'a self, item: T) -> Self { - Self { + /// Get the item owned by this listlike, very fast O(1) + pub fn item(&self) -> &T { &self.item } + /// Get the next link in the list, very fast O(1) + pub fn prev(&self) -> Option<&'a Stackframe> { self.prev } + /// Construct an iterator over the listlike, very fast O(1) + pub fn iter(&self) -> StackframeIterator { + StackframeIterator { curr: Some(self) } + } + pub fn push(&self, item: T) -> Stackframe<'_, T> { + Stackframe { item, prev: Some(self) } } - pub fn iter(&'a self) -> SubstackIterator<'a, T> { - SubstackIterator { curr: Some(self) } +} + +impl<'a, T> Debug for Stackframe<'a, T> where T: Debug { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Substack")?; + f.debug_list().entries(self.iter()).finish() } } -pub struct SubstackIterator<'a, T> { - curr: Option<&'a Substack<'a, T>> +pub struct StackframeIterator<'a, T> { + curr: Option<&'a Stackframe<'a, T>> } -impl<'a, T> Iterator for SubstackIterator<'a, T> { +impl<'a, T> Iterator for StackframeIterator<'a, T> { type Item = &'a T; fn next(&mut self) -> Option<&'a T> { - let Substack{ item, prev } = self.curr?; - self.curr = *prev; + let curr = self.curr?; + let item = curr.item(); + let prev = curr.prev(); + self.curr = prev; Some(item) } -} \ No newline at end of file +} diff --git a/src/utils/unwrap_or_continue.rs b/src/utils/unwrap_or_continue.rs new file mode 100644 index 0000000..5dd0e2b --- /dev/null +++ b/src/utils/unwrap_or_continue.rs @@ -0,0 +1,6 @@ +#[macro_export] +macro_rules! unwrap_or_continue { + ($m:expr) => { + { if let Some(res) = ($m) {res} else {continue} } + } +} \ No newline at end of file diff --git a/swap.md b/swap.md new file mode 100644 index 0000000..a90eb61 --- /dev/null +++ b/swap.md @@ -0,0 +1 @@ +Optimizations mostly left for later, len() was critical, should make most things O(N) instead of O(N!). A trivial keyword cache in the executor should prevent trying variable length patterns onto windows of unrelated sequences. Investigate different strategies as issue likely to re-emerge with marginal added pattern complexity \ No newline at end of file