Files
orchid/orchid-std/src/macros/resolve.rs
2026-03-27 23:50:58 +01:00

284 lines
10 KiB
Rust

use std::collections::VecDeque;
use std::ops::{Add, Range};
use async_fn_stream::stream;
use futures::{FutureExt, StreamExt, stream};
use hashbrown::{HashMap, HashSet};
use itertools::Itertools;
use orchid_base::{NameLike, Paren, Pos, Sym, VPath, fmt, is, log, mk_errv};
use orchid_extension::gen_expr::{GExpr, arg, bot, call, call_v, dyn_lambda, new_atom};
use orchid_extension::{ReflMemKind, TAtom, ToExpr, exec, refl};
use subslice_offset::SubsliceOffset;
use substack::Substack;
use crate::macros::macro_value::{Macro, Rule};
use crate::macros::mactree::MacTreeSeq;
use crate::macros::rule::state::{MatchState, StateEntry};
use crate::{MacTok, MacTree};
pub async fn resolve(val: MacTree) -> GExpr {
exec(async move |mut h| {
writeln!(log("debug"), "Macro-resolving {}", fmt(&val).await).await;
let root = refl();
let mut macros = HashMap::new();
for n in val.glossary() {
let (foot, body) = n.split_last_seg();
let new_name = VPath::new(body.iter().cloned())
.name_with_suffix(is(&format!("__macro__{foot}")).await)
.to_sym()
.await;
if let Ok(ReflMemKind::Const) = root.get_by_path(&new_name).await.map(|m| m.kind()) {
let Ok(mac) = h.exec::<TAtom<Macro>>(new_name).await else { continue };
let mac = mac.own().await;
macros.entry(mac.0.canonical_name.clone()).or_insert(mac);
}
}
let mut exclusive = Vec::new();
let mut prios = Vec::<u64>::new();
let mut priod = Vec::<FilteredMacroRecord>::new();
for (_, mac) in macros.iter() {
let mut record = FilteredMacroRecord { mac, rules: Vec::new() };
for (rule_i, rule) in mac.0.rules.iter().enumerate() {
if rule.pattern.glossary.is_subset(val.glossary()) {
record.rules.push(rule_i);
}
}
if !record.rules.is_empty() {
match mac.0.prio {
None => exclusive.push(record),
Some(prio) => {
let i = prios.partition_point(|p| *p > prio);
prios.insert(i, prio);
priod.insert(i, record);
},
}
}
}
let mut rctx = ResolveCtx { exclusive, priod };
let gex = resolve_one(&mut rctx, Substack::Bottom, &val).await;
writeln!(
log("debug"),
"Macro-resolution over {}\nreturned {}",
fmt(&val).await,
fmt(&gex).await
)
.await;
gex
})
.await
}
/// Rules belonging to one macro that passed a particular filter
pub struct FilteredMacroRecord<'a> {
mac: &'a Macro,
/// The rules in increasing order of index
rules: Vec<usize>,
}
struct ResolveCtx<'a> {
/// If these overlap, that's a compile-time error
pub exclusive: Vec<FilteredMacroRecord<'a>>,
/// If these overlap, the priorities decide the order. In case of a tie, the
/// order is unspecified
pub priod: Vec<FilteredMacroRecord<'a>>,
}
async fn resolve_one(
ctx: &mut ResolveCtx<'_>,
arg_stk: Substack<'_, Sym>,
value: &MacTree,
) -> GExpr {
match value.tok() {
MacTok::Ph(_) | MacTok::Slot => panic!("Forbidden element in value mactree"),
MacTok::Bottom(err) => bot(err.clone()),
MacTok::Value(v) => v.clone().to_gen().await,
MacTok::Name(n) => match arg_stk.iter().position(|arg| arg == n) {
Some(de_bruijn) => arg((arg_stk.len() - 1 - de_bruijn).try_into().unwrap()),
None => n.clone().to_gen().await,
},
MacTok::Lambda(arg, body) => {
let MacTok::Name(name) = &*arg.tok else {
return bot(mk_errv(
is("Syntax error after macros").await,
"This token ends up as a binding, consider replacing it with a name",
[arg.pos()],
));
};
let arg_pos = arg_stk.len() as u64;
let arg_stk = arg_stk.push(name.clone());
dyn_lambda(arg_pos, resolve_seq(ctx, arg_stk, body.clone(), value.pos()).await).await
},
MacTok::S(Paren::Round, body) => resolve_seq(ctx, arg_stk, body.clone(), value.pos()).await,
MacTok::S(..) => bot(mk_errv(
is("Leftover [] or {} not matched by macro").await,
format!("{} was not matched by any macro", fmt(value).await),
[value.pos()],
)),
}
}
type XMatches<'a> = Vec<(Range<usize>, &'a Macro, &'a Rule, MatchState<'a>)>;
/// find the subsection of the slice that satisfies both the lower and upper
/// limit.
fn subsection<T>(
slice: &[T],
lower_limit: impl FnMut(&T) -> bool,
mut upper_limit: impl FnMut(&T) -> bool,
) -> Range<usize> {
let start = slice.partition_point(lower_limit);
let len = slice[start..].partition_point(|t| !upper_limit(t));
start..start + len
}
async fn resolve_seq(
ctx: &mut ResolveCtx<'_>,
arg_stk: Substack<'_, Sym>,
val: MacTreeSeq,
fallback_pos: Pos,
) -> GExpr {
if val.items.is_empty() {
return bot(mk_errv(
is("Empty sequence").await,
"() or (\\arg ) left after macro execution. \
This is usually caused by an incomplete call to a macro with bad error detection",
[fallback_pos],
));
}
// A sorted collection of overlapping but non-nested matches to exclusive
// macros
let mut x_matches: XMatches = Vec::new();
let top_glossary = val.top_glossary.clone();
let mut new_val = val.items.to_vec();
'x_macros: for x in &ctx.exclusive {
let mut rules_iter = x.rules.iter();
let ((before, state, after), rule) = 'rules: loop {
let Some(ridx) = rules_iter.next() else { continue 'x_macros };
let rule = &x.mac.0.rules[*ridx];
if rule.pattern.top_glossary.is_subset(&top_glossary)
&& let Some(record) = rule.matcher.apply(&val.items[..], &|_| true).await
{
break 'rules (record, rule);
};
};
let new_r = (before.len()..new_val.len() - after.len(), x.mac, rule, state);
// elements that overlap with us
let overlap =
subsection(&x_matches[..], |r| new_r.0.start < r.0.end, |r| r.0.start < new_r.0.end);
let overlapping = &x_matches[overlap.clone()];
// elements that fully contain us
let geq_range =
subsection(overlapping, |r| r.0.start <= new_r.0.start, |r| new_r.0.end <= r.0.end);
let geq = &overlapping[geq_range.clone()];
// if any of these is equal to us, all of them must be, otherwise the larger
// ranges would have overridden the smaller ones
if let Some(example) = geq.first() {
// if they are equal to us, record the conflict.
if example.0 == new_r.0 {
let idx = (x_matches.subslice_offset(geq))
.expect("this slice is statically derived from x_matches");
x_matches.insert(idx, new_r);
}
// either way, we matched so no further rules can run.
continue 'x_macros;
}
// elements we fully contain. Equal ranges have been handled above
let lt_range =
subsection(overlapping, |r| new_r.0.start <= r.0.start, |r| r.0.end <= new_r.0.end);
let lt = &overlapping[lt_range.clone()];
if lt.is_empty() {
// an empty range
let i = x_matches.partition_point(|r| r.0.start < new_r.0.start);
x_matches.insert(i, new_r);
} else {
let lt_start =
x_matches.subslice_offset(overlapping).expect("Slice statically derived from x_matches");
x_matches.splice(lt_start..lt_start + lt_range.len(), [new_r]);
}
}
// apply exclusive matches
if !x_matches.is_empty() {
// ranges of indices into x_matches which setwise conflict with each other.
// Pairwise conflict reporting is excess noise, but a single conflict error
// doesn't reveal where within the parenthesized block to look, so it's easiest
// to group them setwise even if these sets may associate macros which don't
// directly conflict.
let conflict_sets = (0..x_matches.len()).map(|x| x..x + 1).coalesce(|lran, rran| {
// each index was mapped to a range that contains only itself. Now we check if
// the last match in the first range overlaps the first match in the second
// range, and combine them if this is the case.
if x_matches[rran.start].0.start < x_matches[lran.end].0.end {
Ok(lran.start..rran.end)
} else {
Err((lran, rran))
}
});
let mac_conflict_tk = is("Macro conflict").await;
let error = conflict_sets
.filter(|r| 1 < r.len())
.map(|set| {
mk_errv(
mac_conflict_tk.clone(),
"Multiple partially overlapping syntax elements detected. \n\
Try parenthesizing whichever side is supposed to be the subexpression.",
x_matches[set].iter().flat_map(|rec| rec.3.names()).flat_map(|name| name.1).cloned(),
)
})
.reduce(|l, r| l + r);
if let Some(error) = error {
return bot(error);
}
// no conflicts, apply all exclusive matches
for (range, mac, rule, state) in x_matches.into_iter().rev() {
// backwards so that the non-overlapping ranges remain valid
let pos = (state.names().flat_map(|r| r.1).cloned().reduce(Pos::add))
.expect("All macro rules must contain at least one locally defined name");
let subex = mk_body_call(mac, rule, &state, pos.clone()).await.to_expr().await;
new_val.splice(range, [MacTok::Value(subex).at(pos)]);
}
};
// Does this glossary refresh actually pay off?
let top_glossary = (new_val.iter())
.flat_map(|t| if let MacTok::Name(t) = t.tok() { Some(t.clone()) } else { None })
.collect::<HashSet<_>>();
for FilteredMacroRecord { mac, rules } in &ctx.priod {
for ridx in rules {
let rule = &mac.0.rules[*ridx];
if !rule.pattern.top_glossary.is_subset(&top_glossary) {
continue;
}
let Some((pre, state, suf)) = rule.matcher.apply(&new_val, &|_| true).await else { continue };
let range = pre.len()..new_val.len() - suf.len();
let pos = (state.names().flat_map(|pair| pair.1).cloned().reduce(Pos::add))
.expect("All macro rules must contain at least one locally defined name");
let subex = mk_body_call(mac, rule, &state, pos.clone()).await.to_expr().await;
std::mem::drop(state);
new_val.splice(range, [MacTok::Value(subex).at(pos)]);
}
}
let mut exprs = stream(async |mut h| {
for mt in new_val {
h.emit(resolve_one(ctx, arg_stk.clone(), &mt).await).await
}
})
.collect::<VecDeque<_>>()
.boxed_local()
.await;
let first = exprs.pop_front().expect(
"We checked first that it isn't empty, and named macros get replaced with their results",
);
stream::iter(exprs).fold(first, async |f, x| call(f, x).await).await
}
async fn mk_body_call(mac: &Macro, rule: &Rule, state: &MatchState<'_>, pos: Pos) -> GExpr {
let mut call_args = vec![];
for name in rule.ph_names.iter() {
call_args.push(match state.get(name).expect("Missing state entry for placeholder") {
StateEntry::Scalar(scal) => new_atom((**scal).clone()),
StateEntry::Vec(vec) =>
new_atom(MacTok::S(Paren::Round, MacTreeSeq::new(vec.iter().cloned())).at(Pos::None)),
});
}
call_v(mac.0.module.suffix([rule.body.clone()]).await, call_args).await.at(pos.clone())
}