in midst of refactor

This commit is contained in:
2024-04-29 21:46:42 +02:00
parent ed0d64d52e
commit aa3f7e99ab
221 changed files with 5431 additions and 685 deletions

View File

@@ -0,0 +1,23 @@
//! Abstract definition of a rule matcher, so that the implementation can
//! eventually be swapped out for a different one.
use std::rc::Rc;
use super::state::State;
use crate::name::Sym;
use crate::parse::parsed::Expr;
/// The same as [Expr], just extracted for flexibility
pub type RuleExpr = Expr;
/// Cacheable optimized structures for matching patterns on slices. This is
/// injected to allow experimentation in the matcher implementation.
pub trait Matcher {
/// Build matcher for a pattern
#[must_use]
fn new(pattern: Rc<Vec<RuleExpr>>) -> Self;
/// Apply matcher to a token sequence
#[must_use]
fn apply<'a>(&self, source: &'a [RuleExpr], save_loc: &impl Fn(Sym) -> bool)
-> Option<State<'a>>;
}

View File

@@ -0,0 +1,29 @@
use super::scal_match::scalv_match;
use super::shared::AnyMatcher;
use super::vec_match::vec_match;
use crate::name::Sym;
use crate::rule::matcher::RuleExpr;
use crate::rule::state::State;
#[must_use]
pub fn any_match<'a>(
matcher: &AnyMatcher,
seq: &'a [RuleExpr],
save_loc: &impl Fn(Sym) -> bool,
) -> Option<State<'a>> {
match matcher {
AnyMatcher::Scalar(scalv) => scalv_match(scalv, seq, save_loc),
AnyMatcher::Vec { left, mid, right } => {
if seq.len() < left.len() + right.len() {
return None;
};
let left_split = left.len();
let right_split = seq.len() - right.len();
Some(
scalv_match(left, &seq[..left_split], save_loc)?
.combine(scalv_match(right, &seq[right_split..], save_loc)?)
.combine(vec_match(mid, &seq[left_split..right_split], save_loc)?),
)
},
}
}

View File

@@ -0,0 +1,149 @@
use intern_all::Tok;
use itertools::Itertools;
use super::shared::{AnyMatcher, ScalMatcher, VecMatcher};
use crate::parse::parsed::{Clause, PHClass, Placeholder};
use crate::rule::matcher::RuleExpr;
use crate::rule::vec_attrs::vec_attrs;
use crate::utils::side::Side;
pub type MaxVecSplit<'a> = (&'a [RuleExpr], (Tok<String>, usize, bool), &'a [RuleExpr]);
/// Derive the details of the central vectorial and the two sides from a
/// slice of Expr's
#[must_use]
fn split_at_max_vec(pattern: &[RuleExpr]) -> Option<MaxVecSplit> {
let rngidx = pattern
.iter()
.position_max_by_key(|expr| vec_attrs(expr).map(|attrs| attrs.1 as i64).unwrap_or(-1))?;
let (left, not_left) = pattern.split_at(rngidx);
let (placeh, right) =
not_left.split_first().expect("The index of the greatest element must be less than the length");
vec_attrs(placeh).map(|attrs| (left, attrs, right))
}
#[must_use]
fn scal_cnt<'a>(iter: impl Iterator<Item = &'a RuleExpr>) -> usize {
iter.take_while(|expr| vec_attrs(expr).is_none()).count()
}
#[must_use]
pub fn mk_any(pattern: &[RuleExpr]) -> AnyMatcher {
let left_split = scal_cnt(pattern.iter());
if pattern.len() <= left_split {
return AnyMatcher::Scalar(mk_scalv(pattern));
}
let (left, not_left) = pattern.split_at(left_split);
let right_split = not_left.len() - scal_cnt(pattern.iter().rev());
let (mid, right) = not_left.split_at(right_split);
AnyMatcher::Vec { left: mk_scalv(left), mid: mk_vec(mid), right: mk_scalv(right) }
}
/// Pattern MUST NOT contain vectorial placeholders
#[must_use]
fn mk_scalv(pattern: &[RuleExpr]) -> Vec<ScalMatcher> { pattern.iter().map(mk_scalar).collect() }
/// Pattern MUST start and end with a vectorial placeholder
#[must_use]
fn mk_vec(pattern: &[RuleExpr]) -> VecMatcher {
debug_assert!(!pattern.is_empty(), "pattern cannot be empty");
debug_assert!(pattern.first().map(vec_attrs).is_some(), "pattern must start with a vectorial");
debug_assert!(pattern.last().map(vec_attrs).is_some(), "pattern must end with a vectorial");
let (left, (key, _, nonzero), right) = split_at_max_vec(pattern)
.expect("pattern must have vectorial placeholders at least at either end");
let r_sep_size = scal_cnt(right.iter());
let (r_sep, r_side) = right.split_at(r_sep_size);
let l_sep_size = scal_cnt(left.iter().rev());
let (l_side, l_sep) = left.split_at(left.len() - l_sep_size);
let main = VecMatcher::Placeh { key: key.clone(), nonzero };
match (left, right) {
(&[], &[]) => VecMatcher::Placeh { key, nonzero },
(&[], _) => VecMatcher::Scan {
direction: Side::Left,
left: Box::new(main),
sep: mk_scalv(r_sep),
right: Box::new(mk_vec(r_side)),
},
(_, &[]) => VecMatcher::Scan {
direction: Side::Right,
left: Box::new(mk_vec(l_side)),
sep: mk_scalv(l_sep),
right: Box::new(main),
},
(..) => {
let mut key_order =
l_side.iter().chain(r_side.iter()).filter_map(vec_attrs).collect::<Vec<_>>();
key_order.sort_by_key(|(_, prio, _)| -(*prio as i64));
VecMatcher::Middle {
left: Box::new(mk_vec(l_side)),
left_sep: mk_scalv(l_sep),
mid: Box::new(main),
right_sep: mk_scalv(r_sep),
right: Box::new(mk_vec(r_side)),
key_order: key_order.into_iter().map(|(n, ..)| n).collect(),
}
},
}
}
/// Pattern MUST NOT be a vectorial placeholder
#[must_use]
fn mk_scalar(pattern: &RuleExpr) -> ScalMatcher {
match &pattern.value {
Clause::Atom(a) => ScalMatcher::Atom(a.clone()),
Clause::Name(n) => ScalMatcher::Name(n.clone()),
Clause::Placeh(Placeholder { name, class }) => match class {
PHClass::Vec { .. } => {
panic!("Scalar matcher cannot be built from vector pattern")
},
PHClass::Scalar | PHClass::Name =>
ScalMatcher::Placeh { key: name.clone(), name_only: class == &PHClass::Name },
},
Clause::S(c, body) => ScalMatcher::S(*c, Box::new(mk_any(body))),
Clause::Lambda(arg, body) => ScalMatcher::Lambda(Box::new(mk_any(arg)), Box::new(mk_any(body))),
}
}
#[cfg(test)]
mod test {
use std::rc::Rc;
use intern_all::i;
use super::mk_any;
use crate::location::SourceRange;
use crate::parse::parsed::{Clause, PHClass, PType, Placeholder};
use crate::sym;
#[test]
fn test_scan() {
let ex = |c: Clause| c.into_expr(SourceRange::mock());
let pattern = vec![
ex(Clause::Placeh(Placeholder {
class: PHClass::Vec { nonzero: false, prio: 0 },
name: i!(str: "::prefix"),
})),
ex(Clause::Name(sym!(prelude::do))),
ex(Clause::S(
PType::Par,
Rc::new(vec![
ex(Clause::Placeh(Placeholder {
class: PHClass::Vec { nonzero: false, prio: 0 },
name: i!(str: "expr"),
})),
ex(Clause::Name(sym!(prelude::;))),
ex(Clause::Placeh(Placeholder {
class: PHClass::Vec { nonzero: false, prio: 1 },
name: i!(str: "rest"),
})),
]),
)),
ex(Clause::Placeh(Placeholder {
class: PHClass::Vec { nonzero: false, prio: 0 },
name: i!(str: "::suffix"),
})),
];
let matcher = mk_any(&pattern);
println!("{matcher}");
}
}

View File

@@ -0,0 +1,21 @@
//! Optimized form of macro pattern that can be quickly tested against the AST.
//!
//! # Construction
//!
//! convert pattern into hierarchy of plain, scan, middle
//! - plain: accept any sequence or any non-empty sequence
//! - scan: a single scalar pattern moves LTR or RTL, submatchers on either
//! side
//! - middle: two scalar patterns walk over all permutations of matches
//! while getting progressively closer to each other
//!
//! # Application
//!
//! walk over the current matcher's valid options and poll the submatchers
//! for each of them
mod any_match;
mod build;
mod scal_match;
pub mod shared;
mod vec_match;

View File

@@ -0,0 +1,47 @@
use super::any_match::any_match;
use super::shared::ScalMatcher;
use crate::name::Sym;
use crate::parse::parsed::Clause;
use crate::rule::matcher::RuleExpr;
use crate::rule::state::{State, StateEntry};
#[must_use]
pub fn scal_match<'a>(
matcher: &ScalMatcher,
expr: &'a RuleExpr,
save_loc: &impl Fn(Sym) -> bool,
) -> Option<State<'a>> {
match (matcher, &expr.value) {
(ScalMatcher::Atom(a1), Clause::Atom(a2)) if a1.run().0.parser_eq(&*a2.run().0) =>
Some(State::default()),
(ScalMatcher::Name(n1), Clause::Name(n2)) if n1 == n2 => Some(match save_loc(n1.clone()) {
true => State::from_name(n1.clone(), expr.range.clone()),
false => State::default(),
}),
(ScalMatcher::Placeh { key, name_only: true }, Clause::Name(n)) =>
Some(State::from_ph(key.clone(), StateEntry::Name(n, &expr.range))),
(ScalMatcher::Placeh { key, name_only: false }, _) =>
Some(State::from_ph(key.clone(), StateEntry::Scalar(expr))),
(ScalMatcher::S(c1, b_mat), Clause::S(c2, body)) if c1 == c2 =>
any_match(b_mat, &body[..], save_loc),
(ScalMatcher::Lambda(arg_mat, b_mat), Clause::Lambda(arg, body)) =>
Some(any_match(arg_mat, arg, save_loc)?.combine(any_match(b_mat, body, save_loc)?)),
_ => None,
}
}
#[must_use]
pub fn scalv_match<'a>(
matchers: &[ScalMatcher],
seq: &'a [RuleExpr],
save_loc: &impl Fn(Sym) -> bool,
) -> Option<State<'a>> {
if seq.len() != matchers.len() {
return None;
}
let mut state = State::default();
for (matcher, expr) in matchers.iter().zip(seq.iter()) {
state = state.combine(scal_match(matcher, expr, save_loc)?);
}
Some(state)
}

View File

@@ -0,0 +1,141 @@
//! Datastructures for cached pattern
use std::fmt;
use std::rc::Rc;
use intern_all::Tok;
use itertools::Itertools;
use super::any_match::any_match;
use super::build::mk_any;
use crate::foreign::atom::AtomGenerator;
use crate::name::Sym;
use crate::parse::parsed::PType;
use crate::rule::matcher::{Matcher, RuleExpr};
use crate::rule::state::State;
use crate::utils::side::Side;
pub(super) enum ScalMatcher {
Atom(AtomGenerator),
Name(Sym),
S(PType, Box<AnyMatcher>),
Lambda(Box<AnyMatcher>, Box<AnyMatcher>),
Placeh { key: Tok<String>, name_only: bool },
}
pub(super) enum VecMatcher {
Placeh {
key: Tok<String>,
nonzero: bool,
},
Scan {
left: Box<VecMatcher>,
sep: Vec<ScalMatcher>,
right: Box<VecMatcher>,
/// The separator traverses the sequence towards this side
direction: Side,
},
Middle {
/// Matches the left outer region
left: Box<VecMatcher>,
/// Matches the left separator
left_sep: Vec<ScalMatcher>,
/// Matches the middle - can only ever be a plain placeholder
mid: Box<VecMatcher>,
/// Matches the right separator
right_sep: Vec<ScalMatcher>,
/// Matches the right outer region
right: Box<VecMatcher>,
/// Order of significance for sorting equally good projects based on
/// the length of matches on either side.
///
/// Vectorial keys that appear on either side, in priority order
key_order: Vec<Tok<String>>,
},
}
pub(super) enum AnyMatcher {
Scalar(Vec<ScalMatcher>),
Vec { left: Vec<ScalMatcher>, mid: VecMatcher, right: Vec<ScalMatcher> },
}
impl Matcher for AnyMatcher {
fn new(pattern: Rc<Vec<RuleExpr>>) -> Self { mk_any(&pattern) }
fn apply<'a>(
&self,
source: &'a [RuleExpr],
save_loc: &impl Fn(Sym) -> bool,
) -> Option<State<'a>> {
any_match(self, source, save_loc)
}
}
// ################ Display ################
impl fmt::Display for ScalMatcher {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Atom(a) => write!(f, "{a:?}"),
Self::Placeh { key, name_only } => match name_only {
false => write!(f, "${key}"),
true => write!(f, "$_{key}"),
},
Self::Name(n) => write!(f, "{n}"),
Self::S(t, body) => write!(f, "{}{body}{}", t.l(), t.r()),
Self::Lambda(arg, body) => write!(f, "\\{arg}.{body}"),
}
}
}
impl fmt::Display for VecMatcher {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Placeh { key, nonzero: true } => write!(f, "...${key}"),
Self::Placeh { key, nonzero: false } => write!(f, "..${key}"),
Self::Scan { left, sep, right, direction } => {
let arrow = if direction == &Side::Left { "<==" } else { "==>" };
write!(f, "Scan{{{left} {arrow} {} {arrow} {right}}}", sep.iter().join(" "))
},
Self::Middle { left, left_sep, mid, right_sep, right, .. } => {
let left_sep_s = left_sep.iter().join(" ");
let right_sep_s = right_sep.iter().join(" ");
write!(f, "Middle{{{left}|{left_sep_s}|{mid}|{right_sep_s}|{right}}}")
},
}
}
}
impl fmt::Display for AnyMatcher {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Scalar(s) => {
write!(f, "({})", s.iter().join(" "))
},
Self::Vec { left, mid, right } => {
let lefts = left.iter().join(" ");
let rights = right.iter().join(" ");
write!(f, "[{lefts}|{mid}|{rights}]")
},
}
}
}
// ################ External ################
/// A [Matcher] implementation that builds a priority-order tree of the
/// vectorial placeholders and handles the scalars on leaves.
pub struct VectreeMatcher(AnyMatcher);
impl Matcher for VectreeMatcher {
fn new(pattern: Rc<Vec<RuleExpr>>) -> Self { Self(AnyMatcher::new(pattern)) }
fn apply<'a>(
&self,
source: &'a [RuleExpr],
save_loc: &impl Fn(Sym) -> bool,
) -> Option<State<'a>> {
self.0.apply(source, save_loc)
}
}
impl fmt::Display for VectreeMatcher {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.0.fmt(f) }
}

View File

@@ -0,0 +1,95 @@
use std::cmp::Ordering;
use itertools::Itertools;
use super::scal_match::scalv_match;
use super::shared::VecMatcher;
use crate::name::Sym;
use crate::rule::matcher::RuleExpr;
use crate::rule::state::{State, StateEntry};
#[must_use]
pub fn vec_match<'a>(
matcher: &VecMatcher,
seq: &'a [RuleExpr],
save_loc: &impl Fn(Sym) -> bool,
) -> Option<State<'a>> {
match matcher {
VecMatcher::Placeh { key, nonzero } => {
if *nonzero && seq.is_empty() {
return None;
}
return Some(State::from_ph(key.clone(), StateEntry::Vec(seq)));
},
VecMatcher::Scan { left, sep, right, direction } => {
if seq.len() < sep.len() {
return None;
}
for lpos in direction.walk(0..=seq.len() - sep.len()) {
let rpos = lpos + sep.len();
let state = vec_match(left, &seq[..lpos], save_loc)
.and_then(|s| Some(s.combine(scalv_match(sep, &seq[lpos..rpos], save_loc)?)))
.and_then(|s| Some(s.combine(vec_match(right, &seq[rpos..], save_loc)?)));
if let Some(s) = state {
return Some(s);
}
}
None
},
// XXX predict heap space usage and allocation count
VecMatcher::Middle { left, left_sep, mid, right_sep, right, key_order } => {
if seq.len() < left_sep.len() + right_sep.len() {
return None;
}
// Valid locations for the left separator
let lposv = seq[..seq.len() - right_sep.len()]
.windows(left_sep.len())
.enumerate()
.filter_map(|(i, window)| scalv_match(left_sep, window, save_loc).map(|s| (i, s)))
.collect::<Vec<_>>();
// Valid locations for the right separator
let rposv = seq[left_sep.len()..]
.windows(right_sep.len())
.enumerate()
.filter_map(|(i, window)| scalv_match(right_sep, window, save_loc).map(|s| (i, s)))
.collect::<Vec<_>>();
// Valid combinations of locations for the separators
let mut pos_pairs = lposv
.into_iter()
.cartesian_product(rposv)
.filter(|((lpos, _), (rpos, _))| lpos + left_sep.len() <= *rpos)
.map(|((lpos, lstate), (rpos, rstate))| (lpos, rpos, lstate.combine(rstate)))
.collect::<Vec<_>>();
// In descending order of size
pos_pairs.sort_by_key(|(l, r, _)| -((r - l) as i64));
let eql_clusters = pos_pairs.into_iter().group_by(|(al, ar, _)| ar - al);
for (_gap_size, cluster) in eql_clusters.into_iter() {
let best_candidate = cluster
.into_iter()
.filter_map(|(lpos, rpos, state)| {
Some(
state
.combine(vec_match(left, &seq[..lpos], save_loc)?)
.combine(vec_match(mid, &seq[lpos + left_sep.len()..rpos], save_loc)?)
.combine(vec_match(right, &seq[rpos + right_sep.len()..], save_loc)?),
)
})
.max_by(|a, b| {
for key in key_order {
let alen = a.ph_len(key).expect("key_order references scalar or missing");
let blen = b.ph_len(key).expect("key_order references scalar or missing");
match alen.cmp(&blen) {
Ordering::Equal => (),
any => return any,
}
}
Ordering::Equal
});
if let Some(state) = best_candidate {
return Some(state);
}
}
None
},
}
}

View File

@@ -0,0 +1,9 @@
//! Substitution rule processing
pub mod matcher;
pub mod matcher_vectree;
mod prepare_rule;
pub mod repository;
pub mod rule_error;
mod state;
mod update_first_seq;
mod vec_attrs;

View File

@@ -0,0 +1,107 @@
use hashbrown::HashMap;
use intern_all::{i, Tok};
use itertools::Itertools;
use super::matcher::RuleExpr;
use super::rule_error::RuleError;
use super::vec_attrs::vec_attrs;
use crate::parse::parsed::{Clause, PHClass, Placeholder};
use crate::pipeline::project::ProjRule;
/// Ensure that the rule's source begins and ends with a vectorial without
/// changing its meaning
#[must_use]
fn pad(rule: ProjRule) -> ProjRule {
let prefix_name = i!(str: "__gen__orchid__rule__prefix");
let suffix_name = i!(str: "__gen__orchid__rule__suffix");
let class: PHClass = PHClass::Vec { nonzero: false, prio: 0 };
let ProjRule { comments, pattern, prio, template } = rule;
let rule_head = pattern.first().expect("Pattern can never be empty!");
let rule_tail = pattern.last().unwrap();
let prefix = vec_attrs(rule_head).is_none().then(|| {
Clause::Placeh(Placeholder { name: prefix_name, class })
.into_expr(rule_head.range.map_range(|r| r.start..r.start))
});
let suffix = vec_attrs(rule_tail).is_none().then(|| {
Clause::Placeh(Placeholder { name: suffix_name, class })
.into_expr(rule_tail.range.map_range(|r| r.start..r.start))
});
let pattern = prefix.iter().cloned().chain(pattern).chain(suffix.clone()).collect();
let template = prefix.into_iter().chain(template).chain(suffix).collect();
ProjRule { comments, prio, pattern, template }
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum PHType {
Scalar,
Name,
Vec { nonzero: bool },
}
impl From<PHClass> for PHType {
fn from(value: PHClass) -> Self {
match value {
PHClass::Scalar => Self::Scalar,
PHClass::Vec { nonzero, .. } => Self::Vec { nonzero },
PHClass::Name => Self::Name,
}
}
}
fn check_rec_expr(
expr: &RuleExpr,
types: &mut HashMap<Tok<String>, PHType>,
in_template: bool,
) -> Result<(), RuleError> {
match &expr.value {
Clause::Name(_) | Clause::Atom(_) => Ok(()),
Clause::Placeh(Placeholder { name, class }) => {
let typ = (*class).into();
// in a template, the type must be known and identical
// outside template (in pattern) the type must be unknown
if let Some(known) = types.insert(name.clone(), typ) {
if !in_template {
Err(RuleError::Multiple(name.clone()))
} else if known != typ {
Err(RuleError::ArityMismatch(name.clone()))
} else {
Ok(())
}
} else if in_template {
Err(RuleError::Missing(name.clone()))
} else {
Ok(())
}
},
Clause::Lambda(arg, body) => {
check_rec_exprv(arg, types, in_template)?;
check_rec_exprv(body, types, in_template)
},
Clause::S(_, body) => check_rec_exprv(body, types, in_template),
}
}
fn check_rec_exprv(
exprv: &[RuleExpr],
types: &mut HashMap<Tok<String>, PHType>,
in_template: bool,
) -> Result<(), RuleError> {
for (l, r) in exprv.iter().tuple_windows::<(_, _)>() {
check_rec_expr(l, types, in_template)?;
if !in_template {
// in a pattern vectorials cannot follow each other
if let (Some(ld), Some(rd)) = (vec_attrs(l), vec_attrs(r)) {
return Err(RuleError::VecNeighbors(ld.0, rd.0));
}
}
}
if let Some(e) = exprv.last() { check_rec_expr(e, types, in_template) } else { Ok(()) }
}
pub fn prepare_rule(rule: ProjRule) -> Result<ProjRule, RuleError> {
// Dimension check
let mut types = HashMap::new();
check_rec_exprv(&rule.pattern, &mut types, false)?;
check_rec_exprv(&rule.template, &mut types, true)?;
// Padding
Ok(pad(rule))
}

View File

@@ -0,0 +1,158 @@
//! Collects, prioritizes and executes rules.
use std::fmt;
use std::rc::Rc;
use hashbrown::HashSet;
use itertools::Itertools;
use ordered_float::NotNan;
use super::matcher::{Matcher, RuleExpr};
use super::matcher_vectree::shared::VectreeMatcher;
use super::prepare_rule::prepare_rule;
use super::state::apply_exprv;
use super::update_first_seq;
use crate::error::Reporter;
use crate::name::Sym;
use crate::parse::numeric::print_nat16;
use crate::pipeline::project::ProjRule;
#[derive(Debug)]
pub(super) struct CachedRule<M: Matcher> {
matcher: M,
pattern: Vec<RuleExpr>,
pat_glossary: HashSet<Sym>,
template: Vec<RuleExpr>,
save_location: HashSet<Sym>,
}
impl<M: fmt::Display + Matcher> fmt::Display for CachedRule<M> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let patterns = self.pattern.iter().join(" ");
write!(
f,
"{patterns} is matched by {} and generates {}",
self.matcher,
self.template.iter().map(|e| e.to_string()).join(" ")
)
}
}
/// Substitution rule scheduler
///
/// Manages a priority queue of rules and offers functions to apply them. The
/// rules are stored in an optimized structure but the repository is generic
/// over the implementation of this optimized form.
///
/// If you don't know what to put in the generic parameter, use [Repo]
pub struct Repository<M: Matcher> {
cache: Vec<(CachedRule<M>, NotNan<f64>)>,
}
impl<M: Matcher> Repository<M> {
/// Build a new repository to hold the given set of rules
pub fn new(mut rules: Vec<ProjRule>, reporter: &Reporter) -> Self {
rules.sort_by_key(|r| -r.prio);
let cache = rules
.into_iter()
.filter_map(|r| {
let ProjRule { pattern, prio, template, comments: _ } = prepare_rule(r.clone())
.inspect_err(|e| reporter.report(e.clone().into_project(&r)))
.ok()?;
let mut pat_glossary = HashSet::new();
pat_glossary.extend(pattern.iter().flat_map(|e| e.value.collect_names().into_iter()));
let mut tpl_glossary = HashSet::new();
tpl_glossary.extend(template.iter().flat_map(|e| e.value.collect_names().into_iter()));
let save_location = pat_glossary.intersection(&tpl_glossary).cloned().collect();
let matcher = M::new(Rc::new(pattern.clone()));
let prep = CachedRule { matcher, pattern, template, pat_glossary, save_location };
Some((prep, prio))
})
.collect::<Vec<_>>();
Self { cache }
}
/// Attempt to run each rule in priority order once
#[must_use]
pub fn step(&self, code: &RuleExpr) -> Option<RuleExpr> {
let glossary = code.value.collect_names();
for (rule, _) in self.cache.iter() {
if !rule.pat_glossary.is_subset(&glossary) {
continue;
}
let product = update_first_seq::expr(code, &mut |exprv| {
let save_loc = |n| rule.save_location.contains(&n);
let state = rule.matcher.apply(exprv.as_slice(), &save_loc)?;
let result = apply_exprv(&rule.template, &state);
Some(Rc::new(result))
});
if let Some(newcode) = product {
return Some(newcode);
}
}
None
}
/// Keep running the matching rule with the highest priority until no
/// rules match. WARNING: this function might not terminate
#[must_use]
pub fn pass(&self, code: &RuleExpr) -> Option<RuleExpr> {
if let Some(mut processed) = self.step(code) {
while let Some(out) = self.step(&processed) {
processed = out
}
Some(processed)
} else {
None
}
}
/// Attempt to run each rule in priority order `limit` times. Returns
/// the final tree and the number of iterations left to the limit.
#[must_use]
pub fn long_step(&self, code: &RuleExpr, mut limit: usize) -> (RuleExpr, usize) {
if limit == 0 {
return (code.clone(), 0);
}
if let Some(mut processed) = self.step(code) {
limit -= 1;
if limit == 0 {
return (processed, 0);
}
while let Some(out) = self.step(&processed) {
limit -= 1;
if limit == 0 {
return (out, 0);
}
processed = out;
}
(processed, limit)
} else {
(code.clone(), limit)
}
}
}
impl<M: fmt::Debug + Matcher> fmt::Debug for Repository<M> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for rule in self.cache.iter() {
writeln!(f, "{rule:?}")?
}
Ok(())
}
}
impl<M: fmt::Display + Matcher> fmt::Display for Repository<M> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "Repository[")?;
for (rule, p) in self.cache.iter() {
let prio = print_nat16(*p);
let deps = rule.pat_glossary.iter().join(", ");
writeln!(f, " priority: {prio}\tdependencies: [{deps}]")?;
writeln!(f, " {rule}")?;
}
write!(f, "]")
}
}
/// Repository with the default matcher implementation
pub type Repo = Repository<VectreeMatcher>;

View File

@@ -0,0 +1,203 @@
//! Error conditions encountered by the rule processor
use std::fmt;
use hashbrown::HashSet;
use intern_all::Tok;
use crate::error::{ErrorPosition, ProjectError, ProjectErrorObj};
use crate::location::{CodeOrigin, SourceRange};
use crate::parse::parsed::{search_all_slcs, Clause, PHClass, Placeholder};
use crate::pipeline::project::ProjRule;
/// Various reasons why a substitution rule may be invalid
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RuleError {
/// A key is present in the template but not the pattern
Missing(Tok<String>),
/// A key uses a different arity in the template and in the pattern
ArityMismatch(Tok<String>),
/// Multiple occurences of a placeholder in a pattern
Multiple(Tok<String>),
/// Two vectorial placeholders are next to each other
VecNeighbors(Tok<String>, Tok<String>),
}
impl RuleError {
/// Convert into a unified error trait object shared by all Orchid errors
#[must_use]
pub fn into_project(self, rule: &ProjRule) -> ProjectErrorObj {
match self {
Self::Missing(name) => Missing::new(rule, name).pack(),
Self::Multiple(name) => Multiple::new(rule, name).pack(),
Self::ArityMismatch(name) => ArityMismatch::new(rule, name).pack(),
Self::VecNeighbors(n1, n2) => VecNeighbors::new(rule, n1, n2).pack(),
}
}
}
impl fmt::Display for RuleError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Missing(key) => write!(f, "Key {key} not in match pattern"),
Self::ArityMismatch(key) => {
write!(f, "Key {key} used inconsistently with and without ellipsis")
},
Self::Multiple(key) => {
write!(f, "Key {key} appears multiple times in match pattern")
},
Self::VecNeighbors(left, right) => {
write!(f, "vectorials {left} and {right} are next to each other")
},
}
}
}
/// A key is present in the template but not the pattern of a rule
#[derive(Debug)]
struct Missing {
locations: HashSet<SourceRange>,
name: Tok<String>,
}
impl Missing {
#[must_use]
pub fn new(rule: &ProjRule, name: Tok<String>) -> Self {
let mut locations = HashSet::new();
for expr in rule.template.iter() {
expr.search_all(&mut |e| {
if let Clause::Placeh(ph) = &e.value {
if ph.name == name {
locations.insert(e.range.clone());
}
}
None::<()>
});
}
Self { locations, name }
}
}
impl ProjectError for Missing {
const DESCRIPTION: &'static str = "A key appears in the template but not the pattern of a rule";
fn message(&self) -> String {
format!("The key {} appears in the template but not the pattern of this rule", self.name)
}
fn positions(&self) -> impl IntoIterator<Item = ErrorPosition> {
self.locations.iter().map(|range| CodeOrigin::Source(range.clone()).into())
}
}
/// A key is present multiple times in the pattern of a rule
#[derive(Debug)]
struct Multiple {
locations: HashSet<SourceRange>,
name: Tok<String>,
}
impl Multiple {
#[must_use]
pub fn new(rule: &ProjRule, name: Tok<String>) -> Self {
let mut locations = HashSet::new();
for expr in rule.template.iter() {
expr.search_all(&mut |e| {
if let Clause::Placeh(ph) = &e.value {
if ph.name == name {
locations.insert(e.range.clone());
}
}
None::<()>
});
}
Self { locations, name }
}
}
impl ProjectError for Multiple {
const DESCRIPTION: &'static str = "A key appears multiple times in the pattern of a rule";
fn message(&self) -> String {
format!("The key {} appears multiple times in this pattern", self.name)
}
fn positions(&self) -> impl IntoIterator<Item = ErrorPosition> {
self.locations.iter().map(|range| CodeOrigin::Source(range.clone()).into())
}
}
/// A key is present multiple times in the pattern of a rule
#[derive(Debug)]
struct ArityMismatch {
locations: HashSet<(SourceRange, PHClass)>,
name: Tok<String>,
}
impl ArityMismatch {
#[must_use]
pub fn new(rule: &ProjRule, name: Tok<String>) -> Self {
let mut locations = HashSet::new();
for expr in rule.template.iter() {
expr.search_all(&mut |e| {
if let Clause::Placeh(ph) = &e.value {
if ph.name == name {
locations.insert((e.range.clone(), ph.class));
}
}
None::<()>
});
}
Self { locations, name }
}
}
impl ProjectError for ArityMismatch {
const DESCRIPTION: &'static str = "A key appears with different arities in a rule";
fn message(&self) -> String {
format!("The key {} appears multiple times with different arities in this rule", self.name)
}
fn positions(&self) -> impl IntoIterator<Item = ErrorPosition> {
self.locations.iter().map(|(origin, class)| ErrorPosition {
origin: CodeOrigin::Source(origin.clone()),
message: Some(
"This instance represents ".to_string()
+ match class {
PHClass::Scalar => "one clause",
PHClass::Name => "one name",
PHClass::Vec { nonzero: true, .. } => "one or more clauses",
PHClass::Vec { nonzero: false, .. } => "any number of clauses",
},
),
})
}
}
/// Two vectorial placeholders appear next to each other
#[derive(Debug)]
struct VecNeighbors {
locations: HashSet<SourceRange>,
n1: Tok<String>,
n2: Tok<String>,
}
impl VecNeighbors {
#[must_use]
pub fn new(rule: &ProjRule, n1: Tok<String>, n2: Tok<String>) -> Self {
let mut locations = HashSet::new();
search_all_slcs(&rule.template[..], &mut |ev| {
for pair in ev.windows(2) {
let (a, b) = (&pair[0], &pair[1]);
let a_vec = matches!(&a.value, Clause::Placeh(
Placeholder{ class: PHClass::Vec { .. }, name }
) if name == &n1);
let b_vec = matches!(&b.value, Clause::Placeh(
Placeholder{ class: PHClass::Vec { .. }, name }
) if name == &n2);
if a_vec && b_vec {
locations.insert(a.range.clone());
locations.insert(b.range.clone());
}
}
None::<()>
});
Self { locations, n1, n2 }
}
}
impl ProjectError for VecNeighbors {
const DESCRIPTION: &'static str = "Two vectorial placeholders appear next to each other";
fn message(&self) -> String {
format!("The keys {} and {} appear next to each other with a vectorial arity", self.n1, self.n2)
}
fn positions(&self) -> impl IntoIterator<Item = ErrorPosition> {
self.locations.iter().map(|range| CodeOrigin::Source(range.clone()).into())
}
}

View File

@@ -0,0 +1,89 @@
use std::rc::Rc;
use hashbrown::HashMap;
use intern_all::Tok;
use super::matcher::RuleExpr;
use crate::location::SourceRange;
use crate::name::Sym;
use crate::parse::parsed::{Clause, Expr, PHClass, Placeholder};
use crate::utils::join::join_maps;
use crate::utils::unwrap_or::unwrap_or;
#[derive(Clone, Copy, Debug)]
pub enum StateEntry<'a> {
Vec(&'a [RuleExpr]),
Scalar(&'a RuleExpr),
Name(&'a Sym, &'a SourceRange),
}
#[derive(Clone)]
pub struct State<'a> {
placeholders: HashMap<Tok<String>, StateEntry<'a>>,
name_locations: HashMap<Sym, Vec<SourceRange>>,
}
impl<'a> State<'a> {
pub fn from_ph(key: Tok<String>, entry: StateEntry<'a>) -> Self {
Self { placeholders: HashMap::from([(key, entry)]), name_locations: HashMap::new() }
}
pub fn combine(self, s: Self) -> Self {
Self {
placeholders: self.placeholders.into_iter().chain(s.placeholders).collect(),
name_locations: join_maps(self.name_locations, s.name_locations, |_, l, r| {
l.into_iter().chain(r).collect()
}),
}
}
pub fn ph_len(&self, key: &Tok<String>) -> Option<usize> {
match self.placeholders.get(key)? {
StateEntry::Vec(slc) => Some(slc.len()),
_ => None,
}
}
pub fn from_name(name: Sym, location: SourceRange) -> Self {
Self { name_locations: HashMap::from([(name, vec![location])]), placeholders: HashMap::new() }
}
}
impl Default for State<'static> {
fn default() -> Self { Self { name_locations: HashMap::new(), placeholders: HashMap::new() } }
}
#[must_use]
pub fn apply_exprv(template: &[RuleExpr], state: &State) -> Vec<RuleExpr> {
template.iter().map(|e| apply_expr(e, state)).flat_map(Vec::into_iter).collect()
}
#[must_use]
pub fn apply_expr(template: &RuleExpr, state: &State) -> Vec<RuleExpr> {
let Expr { range, value } = template;
match value {
Clause::Name(n) => match state.name_locations.get(n) {
None => vec![template.clone()],
Some(locs) => vec![Expr { value: value.clone(), range: locs[0].clone() }],
},
Clause::Atom(_) => vec![template.clone()],
Clause::S(c, body) => vec![Expr {
range: range.clone(),
value: Clause::S(*c, Rc::new(apply_exprv(body.as_slice(), state))),
}],
Clause::Placeh(Placeholder { name, class }) => {
let value = *unwrap_or!(state.placeholders.get(name);
panic!("Placeholder does not have a value in state")
);
match (class, value) {
(PHClass::Scalar, StateEntry::Scalar(item)) => vec![item.clone()],
(PHClass::Vec { .. }, StateEntry::Vec(chunk)) => chunk.to_vec(),
(PHClass::Name, StateEntry::Name(n, r)) => {
vec![RuleExpr { value: Clause::Name(n.clone()), range: r.clone() }]
},
_ => panic!("Type mismatch between template and state"),
}
},
Clause::Lambda(arg, body) => vec![Expr {
range: range.clone(),
value: Clause::Lambda(
Rc::new(apply_exprv(arg, state)),
Rc::new(apply_exprv(&body[..], state)),
),
}],
}
}

View File

@@ -0,0 +1,60 @@
use std::iter;
use std::rc::Rc;
use super::matcher::RuleExpr;
use crate::parse::parsed::{Clause, Expr};
/// Traverse the tree, calling pred on every sibling list until it returns
/// some vec then replace the sibling list with that vec and return true
/// return false if pred never returned some
#[must_use]
pub fn exprv<F: FnMut(Rc<Vec<RuleExpr>>) -> Option<Rc<Vec<RuleExpr>>>>(
input: Rc<Vec<RuleExpr>>,
pred: &mut F,
) -> Option<Rc<Vec<RuleExpr>>> {
if let Some(v) = pred(input.clone()) {
return Some(v);
}
replace_first(input.as_ref(), |ex| expr(ex, pred)).map(|i| Rc::new(i.collect()))
}
#[must_use]
pub fn expr<F: FnMut(Rc<Vec<RuleExpr>>) -> Option<Rc<Vec<RuleExpr>>>>(
input: &RuleExpr,
pred: &mut F,
) -> Option<RuleExpr> {
clause(&input.value, pred).map(|value| Expr { value, range: input.range.clone() })
}
#[must_use]
pub fn clause<F: FnMut(Rc<Vec<RuleExpr>>) -> Option<Rc<Vec<RuleExpr>>>>(
c: &Clause,
pred: &mut F,
) -> Option<Clause> {
match c {
Clause::Atom(_) | Clause::Placeh { .. } | Clause::Name { .. } => None,
Clause::Lambda(arg, body) =>
if let Some(arg) = exprv(arg.clone(), pred) {
Some(Clause::Lambda(arg, body.clone()))
} else {
exprv(body.clone(), pred).map(|body| Clause::Lambda(arg.clone(), body))
},
Clause::S(c, body) => Some(Clause::S(*c, exprv(body.clone(), pred)?)),
}
}
/// Iterate over a sequence with the first element updated for which the
/// function returns Some(), but only if there is such an element.
pub fn replace_first<T: Clone, F: FnMut(&T) -> Option<T>>(
slice: &[T],
mut f: F,
) -> Option<impl Iterator<Item = T> + '_> {
for i in 0..slice.len() {
if let Some(new) = f(&slice[i]) {
let subbed_iter =
slice[0..i].iter().cloned().chain(iter::once(new)).chain(slice[i + 1..].iter().cloned());
return Some(subbed_iter);
}
}
None
}

View File

@@ -0,0 +1,15 @@
use intern_all::Tok;
use super::matcher::RuleExpr;
use crate::parse::parsed::{Clause, PHClass, Placeholder};
/// Returns the name, priority and nonzero of the expression if it is
/// a vectorial placeholder
#[must_use]
pub fn vec_attrs(expr: &RuleExpr) -> Option<(Tok<String>, usize, bool)> {
match expr.value.clone() {
Clause::Placeh(Placeholder { class: PHClass::Vec { prio, nonzero }, name }) =>
Some((name, prio, nonzero)),
_ => None,
}
}