Public API and docs

This commit is contained in:
2023-05-26 15:23:15 +01:00
parent 3c1a6e2be2
commit fdf18e6ff8
99 changed files with 503 additions and 406 deletions

View File

@@ -0,0 +1,25 @@
use super::scal_match::scalv_match;
use super::shared::AnyMatcher;
use super::vec_match::vec_match;
use crate::ast::Expr;
use crate::rule::state::State;
pub fn any_match<'a>(
matcher: &AnyMatcher,
seq: &'a [Expr],
) -> Option<State<'a>> {
match matcher {
AnyMatcher::Scalar(scalv) => scalv_match(scalv, seq),
AnyMatcher::Vec { left, mid, right } => {
if seq.len() < left.len() + right.len() {
return None;
};
let left_split = left.len();
let right_split = seq.len() - right.len();
let mut state = scalv_match(left, &seq[..left_split])?;
state.extend(scalv_match(right, &seq[right_split..])?);
state.extend(vec_match(mid, &seq[left_split..right_split])?);
Some(state)
},
}
}

View File

@@ -0,0 +1,164 @@
use itertools::Itertools;
use super::shared::{AnyMatcher, ScalMatcher, VecMatcher};
use crate::ast::{Clause, Expr, PHClass, Placeholder};
use crate::interner::Tok;
use crate::rule::vec_attrs::vec_attrs;
use crate::utils::Side;
pub type MaxVecSplit<'a> = (&'a [Expr], (Tok<String>, u64, bool), &'a [Expr]);
/// Derive the details of the central vectorial and the two sides from a
/// slice of Expr's
fn split_at_max_vec(pattern: &[Expr]) -> Option<MaxVecSplit> {
let rngidx = pattern.iter().position_max_by_key(|expr| {
vec_attrs(expr).map(|attrs| attrs.1 as i64).unwrap_or(-1)
})?;
let (left, not_left) = pattern.split_at(rngidx);
let (placeh, right) = not_left
.split_first()
.expect("The index of the greatest element must be less than the length");
vec_attrs(placeh).map(|attrs| (left, attrs, right))
}
fn scal_cnt<'a>(iter: impl Iterator<Item = &'a Expr>) -> usize {
iter.take_while(|expr| vec_attrs(expr).is_none()).count()
}
pub fn mk_any(pattern: &[Expr]) -> AnyMatcher {
let left_split = scal_cnt(pattern.iter());
if pattern.len() <= left_split {
return AnyMatcher::Scalar(mk_scalv(pattern));
}
let (left, not_left) = pattern.split_at(left_split);
let right_split = not_left.len() - scal_cnt(pattern.iter().rev());
let (mid, right) = not_left.split_at(right_split);
AnyMatcher::Vec {
left: mk_scalv(left),
mid: mk_vec(mid),
right: mk_scalv(right),
}
}
/// Pattern MUST NOT contain vectorial placeholders
fn mk_scalv(pattern: &[Expr]) -> Vec<ScalMatcher> {
pattern.iter().map(mk_scalar).collect()
}
/// Pattern MUST start and end with a vectorial placeholder
fn mk_vec(pattern: &[Expr]) -> VecMatcher {
debug_assert!(!pattern.is_empty(), "pattern cannot be empty");
debug_assert!(
pattern.first().map(vec_attrs).is_some(),
"pattern must start with a vectorial"
);
debug_assert!(
pattern.last().map(vec_attrs).is_some(),
"pattern must end with a vectorial"
);
let (left, (key, prio, nonzero), right) = split_at_max_vec(pattern)
.expect("pattern must have vectorial placeholders at least at either end");
if prio >= 1 {
println!("Nondefault priority {} found", prio)
}
let r_sep_size = scal_cnt(right.iter());
let (r_sep, r_side) = right.split_at(r_sep_size);
let l_sep_size = scal_cnt(left.iter().rev());
let (l_side, l_sep) = left.split_at(left.len() - l_sep_size);
let main = VecMatcher::Placeh { key, nonzero };
match (left, right) {
(&[], &[]) => VecMatcher::Placeh { key, nonzero },
(&[], _) => VecMatcher::Scan {
direction: Side::Left,
left: Box::new(main),
sep: mk_scalv(r_sep),
right: Box::new(mk_vec(r_side)),
},
(_, &[]) => VecMatcher::Scan {
direction: Side::Right,
left: Box::new(mk_vec(l_side)),
sep: mk_scalv(l_sep),
right: Box::new(main),
},
(..) => {
let mut key_order = l_side
.iter()
.chain(r_side.iter())
.filter_map(vec_attrs)
.collect::<Vec<_>>();
key_order.sort_by_key(|(_, prio, _)| -(*prio as i64));
VecMatcher::Middle {
left: Box::new(mk_vec(l_side)),
left_sep: mk_scalv(l_sep),
mid: Box::new(main),
right_sep: mk_scalv(r_sep),
right: Box::new(mk_vec(r_side)),
key_order: key_order.into_iter().map(|(n, ..)| n).collect(),
}
},
}
}
/// Pattern MUST NOT be a vectorial placeholder
fn mk_scalar(pattern: &Expr) -> ScalMatcher {
match &pattern.value {
Clause::P(p) => ScalMatcher::P(p.clone()),
Clause::Name(n) => ScalMatcher::Name(*n),
Clause::Placeh(Placeholder { name, class }) => {
debug_assert!(
!matches!(class, PHClass::Vec { .. }),
"Scalar matcher cannot be built from vector pattern"
);
ScalMatcher::Placeh(*name)
},
Clause::S(c, body) => ScalMatcher::S(*c, Box::new(mk_any(body))),
Clause::Lambda(arg, body) =>
ScalMatcher::Lambda(Box::new(mk_scalar(arg)), Box::new(mk_any(body))),
}
}
#[cfg(test)]
mod test {
use std::rc::Rc;
use super::mk_any;
use crate::ast::{Clause, PHClass, Placeholder};
use crate::interner::{InternedDisplay, Interner};
#[test]
fn test_scan() {
let i = Interner::new();
let pattern = vec![
Clause::Placeh(Placeholder {
class: PHClass::Vec { nonzero: false, prio: 0 },
name: i.i("::prefix"),
})
.into_expr(),
Clause::Name(i.i(&[i.i("prelude"), i.i("do")][..])).into_expr(),
Clause::S(
'(',
Rc::new(vec![
Clause::Placeh(Placeholder {
class: PHClass::Vec { nonzero: false, prio: 0 },
name: i.i("expr"),
})
.into_expr(),
Clause::Name(i.i(&[i.i("prelude"), i.i(";")][..])).into_expr(),
Clause::Placeh(Placeholder {
class: PHClass::Vec { nonzero: false, prio: 1 },
name: i.i("rest"),
})
.into_expr(),
]),
)
.into_expr(),
Clause::Placeh(Placeholder {
class: PHClass::Vec { nonzero: false, prio: 0 },
name: i.i("::suffix"),
})
.into_expr(),
];
let matcher = mk_any(&pattern);
println!("{}", matcher.bundle(&i));
}
}

View File

@@ -0,0 +1,20 @@
// Construction:
// convert pattern into hierarchy of plain, scan, middle
// - plain: accept any sequence or any non-empty sequence
// - scan: a single scalar pattern moves LTR or RTL, submatchers on either
// side
// - middle: two scalar patterns walk over all permutations of matches
// while getting progressively closer to each other
//
// Application:
// walk over the current matcher's valid options and poll the submatchers
// for each of them
mod any_match;
mod build;
mod scal_match;
mod shared;
mod vec_match;
// pub use build::mk_matcher;
pub use shared::VectreeMatcher;

View File

@@ -0,0 +1,38 @@
use super::any_match::any_match;
use super::shared::ScalMatcher;
use crate::ast::{Clause, Expr};
use crate::rule::state::{State, StateEntry};
pub fn scal_match<'a>(
matcher: &ScalMatcher,
expr: &'a Expr,
) -> Option<State<'a>> {
match (matcher, &expr.value) {
(ScalMatcher::P(p1), Clause::P(p2)) if p1 == p2 => Some(State::new()),
(ScalMatcher::Name(n1), Clause::Name(n2)) if n1 == n2 => Some(State::new()),
(ScalMatcher::Placeh(key), _) =>
Some(State::from([(*key, StateEntry::Scalar(expr))])),
(ScalMatcher::S(c1, b_mat), Clause::S(c2, body)) if c1 == c2 =>
any_match(b_mat, &body[..]),
(ScalMatcher::Lambda(arg_mat, b_mat), Clause::Lambda(arg, body)) => {
let mut state = scal_match(arg_mat, arg)?;
state.extend(any_match(b_mat, body)?);
Some(state)
},
_ => None,
}
}
pub fn scalv_match<'a>(
matchers: &[ScalMatcher],
seq: &'a [Expr],
) -> Option<State<'a>> {
if seq.len() != matchers.len() {
return None;
}
let mut state = State::new();
for (matcher, expr) in matchers.iter().zip(seq.iter()) {
state.extend(scal_match(matcher, expr)?);
}
Some(state)
}

View File

@@ -0,0 +1,202 @@
use std::fmt::Write;
use std::rc::Rc;
use super::any_match::any_match;
use super::build::mk_any;
use crate::ast::Expr;
use crate::interner::{InternedDisplay, Interner, Sym, Tok};
use crate::representations::Primitive;
use crate::rule::matcher::Matcher;
use crate::rule::state::State;
use crate::utils::{sym2string, unwrap_or, Side};
pub enum ScalMatcher {
P(Primitive),
Name(Sym),
S(char, Box<AnyMatcher>),
Lambda(Box<ScalMatcher>, Box<AnyMatcher>),
Placeh(Tok<String>),
}
pub enum VecMatcher {
Placeh {
key: Tok<String>,
nonzero: bool,
},
Scan {
left: Box<VecMatcher>,
sep: Vec<ScalMatcher>,
right: Box<VecMatcher>,
/// The separator traverses the sequence towards this side
direction: Side,
},
Middle {
/// Matches the left outer region
left: Box<VecMatcher>,
/// Matches the left separator
left_sep: Vec<ScalMatcher>,
/// Matches the middle - can only ever be a plain placeholder
mid: Box<VecMatcher>,
/// Matches the right separator
right_sep: Vec<ScalMatcher>,
/// Matches the right outer region
right: Box<VecMatcher>,
/// Order of significance for sorting equally good solutions based on
/// the length of matches on either side.
///
/// Vectorial keys that appear on either side, in priority order
key_order: Vec<Tok<String>>,
},
}
pub enum AnyMatcher {
Scalar(Vec<ScalMatcher>),
Vec { left: Vec<ScalMatcher>, mid: VecMatcher, right: Vec<ScalMatcher> },
}
impl Matcher for AnyMatcher {
fn new(pattern: Rc<Vec<Expr>>) -> Self {
mk_any(&pattern)
}
fn apply<'a>(&self, source: &'a [Expr]) -> Option<State<'a>> {
any_match(self, source)
}
}
// ################ InternedDisplay ################
fn disp_scalv(
scalv: &[ScalMatcher],
f: &mut std::fmt::Formatter<'_>,
i: &Interner,
) -> std::fmt::Result {
let (head, tail) = unwrap_or!(scalv.split_first(); return Ok(()));
head.fmt_i(f, i)?;
for s in tail.iter() {
write!(f, " ")?;
s.fmt_i(f, i)?;
}
Ok(())
}
impl InternedDisplay for ScalMatcher {
fn fmt_i(
&self,
f: &mut std::fmt::Formatter<'_>,
i: &Interner,
) -> std::fmt::Result {
match self {
Self::P(p) => write!(f, "{:?}", p),
Self::Placeh(n) => write!(f, "${}", i.r(*n)),
Self::Name(n) => write!(f, "{}", sym2string(*n, i)),
Self::S(c, body) => {
f.write_char(*c)?;
body.fmt_i(f, i)?;
f.write_char(match c {
'(' => ')',
'[' => ']',
'{' => '}',
_ => unreachable!(),
})
},
Self::Lambda(arg, body) => {
f.write_char('\\')?;
arg.fmt_i(f, i)?;
f.write_char('.')?;
body.fmt_i(f, i)
},
}
}
}
impl InternedDisplay for VecMatcher {
fn fmt_i(
&self,
f: &mut std::fmt::Formatter<'_>,
i: &Interner,
) -> std::fmt::Result {
match self {
Self::Placeh { key, nonzero } => {
if *nonzero {
f.write_char('.')?;
};
write!(f, "..${}", i.r(*key))
},
Self::Scan { left, sep, right, direction } => {
let arrow = match direction {
Side::Left => " <== ",
Side::Right => " ==> ",
};
write!(f, "Scan{{")?;
left.fmt_i(f, i)?;
f.write_str(arrow)?;
disp_scalv(sep, f, i)?;
f.write_str(arrow)?;
right.fmt_i(f, i)?;
write!(f, "}}")
},
Self::Middle { left, left_sep, mid, right_sep, right, .. } => {
write!(f, "Middle{{")?;
left.fmt_i(f, i)?;
f.write_str("|")?;
disp_scalv(left_sep, f, i)?;
f.write_str("|")?;
mid.fmt_i(f, i)?;
f.write_str("|")?;
disp_scalv(right_sep, f, i)?;
f.write_str("|")?;
right.fmt_i(f, i)?;
write!(f, "}}")
},
}
}
}
impl InternedDisplay for AnyMatcher {
fn fmt_i(
&self,
f: &mut std::fmt::Formatter<'_>,
i: &Interner,
) -> std::fmt::Result {
match self {
Self::Scalar(s) => {
write!(f, "(")?;
disp_scalv(s, f, i)?;
write!(f, ")")
},
Self::Vec { left, mid, right } => {
write!(f, "[")?;
disp_scalv(left, f, i)?;
write!(f, "|")?;
mid.fmt_i(f, i)?;
write!(f, "|")?;
disp_scalv(right, f, i)?;
write!(f, "]")
},
}
}
}
// ################ External ################
/// A [Matcher] implementation that builds a priority-order tree of the
/// vectorial placeholders and handles the scalars on leaves.
pub struct VectreeMatcher(AnyMatcher);
impl Matcher for VectreeMatcher {
fn new(pattern: Rc<Vec<Expr>>) -> Self {
Self(AnyMatcher::new(pattern))
}
fn apply<'a>(&self, source: &'a [Expr]) -> Option<State<'a>> {
self.0.apply(source)
}
}
impl InternedDisplay for VectreeMatcher {
fn fmt_i(
&self,
f: &mut std::fmt::Formatter<'_>,
i: &Interner,
) -> std::fmt::Result {
self.0.fmt_i(f, i)
}
}

View File

@@ -0,0 +1,102 @@
use std::cmp::Ordering;
use itertools::Itertools;
use super::scal_match::scalv_match;
use super::shared::VecMatcher;
use crate::ast::Expr;
use crate::rule::state::{State, StateEntry};
use crate::utils::unwrap_or;
pub fn vec_match<'a>(
matcher: &VecMatcher,
seq: &'a [Expr],
) -> Option<State<'a>> {
match matcher {
VecMatcher::Placeh { key, nonzero } => {
if *nonzero && seq.is_empty() {
return None;
}
return Some(State::from([(*key, StateEntry::Vec(seq))]));
},
VecMatcher::Scan { left, sep, right, direction } => {
if seq.len() < sep.len() {
return None;
}
for lpos in direction.walk(0..=seq.len() - sep.len()) {
let rpos = lpos + sep.len();
let mut state = unwrap_or!(vec_match(left, &seq[..lpos]); continue);
state.extend(unwrap_or!(scalv_match(sep, &seq[lpos..rpos]); continue));
state.extend(unwrap_or!(vec_match(right, &seq[rpos..]); continue));
return Some(state);
}
None
},
// XXX predict heap space usage and allocation count
VecMatcher::Middle { left, left_sep, mid, right_sep, right, key_order } => {
if seq.len() < left_sep.len() + right_sep.len() {
return None;
}
// Valid locations for the left separator
let lposv = seq[..seq.len() - right_sep.len()]
.windows(left_sep.len())
.enumerate()
.filter_map(|(i, window)| scalv_match(left_sep, window).map(|s| (i, s)))
.collect::<Vec<_>>();
// Valid locations for the right separator
let rposv = seq[left_sep.len()..]
.windows(right_sep.len())
.enumerate()
.filter_map(|(i, window)| {
scalv_match(right_sep, window).map(|s| (i, s))
})
.collect::<Vec<_>>();
// Valid combinations of locations for the separators
let mut pos_pairs = lposv
.into_iter()
.cartesian_product(rposv.into_iter())
.filter(|((lpos, _), (rpos, _))| lpos + left_sep.len() <= *rpos)
.map(|((lpos, mut lstate), (rpos, rstate))| {
lstate.extend(rstate);
(lpos, rpos, lstate)
})
.collect::<Vec<_>>();
// In descending order of size
pos_pairs.sort_by_key(|(l, r, _)| -((r - l) as i64));
let eql_clusters = pos_pairs.into_iter().group_by(|(al, ar, _)| ar - al);
for (_gap_size, cluster) in eql_clusters.into_iter() {
let best_candidate = cluster
.into_iter()
.filter_map(|(lpos, rpos, mut state)| {
state.extend(vec_match(left, &seq[..lpos])?);
state.extend(vec_match(mid, &seq[lpos + left_sep.len()..rpos])?);
state.extend(vec_match(right, &seq[rpos + right_sep.len()..])?);
Some(state)
})
.max_by(|a, b| {
for key in key_order {
let aslc = if let Some(StateEntry::Vec(s)) = a.get(key) {
s
} else {
panic!("key_order references scalar or missing")
};
let bslc = if let Some(StateEntry::Vec(s)) = b.get(key) {
s
} else {
panic!("key_order references scalar or missing")
};
match aslc.len().cmp(&bslc.len()) {
Ordering::Equal => (),
any => return any,
}
}
Ordering::Equal
});
if let Some(state) = best_candidate {
return Some(state);
}
}
None
},
}
}