partway through fixes, macro system needs resdesign

This commit is contained in:
2026-04-08 18:02:20 +02:00
parent 0909524dee
commit 9b4c7fa7d7
76 changed files with 1391 additions and 1065 deletions

View File

@@ -1,71 +1,146 @@
use std::borrow::Cow;
use std::collections::VecDeque;
use std::ops::{Add, Range};
use std::rc::Rc;
use async_fn_stream::stream;
use futures::{FutureExt, StreamExt, stream};
use hashbrown::{HashMap, HashSet};
use itertools::Itertools;
use never::Never;
use orchid_base::{NameLike, Paren, Pos, Sym, VPath, fmt, is, log, mk_errv};
use orchid_extension::gen_expr::{GExpr, arg, bot, call, call_v, dyn_lambda, new_atom};
use orchid_extension::{ReflMemKind, TAtom, ToExpr, exec, refl};
use orchid_extension::gen_expr::{GExpr, GExprKind, bot, call, call_v, new_atom};
use orchid_extension::{
Atomic, ExecHandle, OwnedAtom, OwnedVariant, ReflMemKind, TAtom, ToExpr, ToExprFuture, exec, refl,
};
use subslice_offset::SubsliceOffset;
use substack::Substack;
use crate::macros::macro_value::{Macro, Rule};
use crate::macros::mactree::MacTreeSeq;
use crate::macros::postmac::{PostMac, PostMacAtom};
use crate::macros::rule::state::{MatchState, StateEntry};
use crate::{MacTok, MacTree};
pub async fn resolve(val: MacTree) -> GExpr {
exec(async move |mut h| {
writeln!(log("debug"), "Macro-resolving {}", fmt(&val).await).await;
let root = refl();
let mut macros = HashMap::new();
for n in val.glossary() {
let (foot, body) = n.split_last_seg();
let new_name = VPath::new(body.iter().cloned())
.name_with_suffix(is(&format!("__macro__{foot}")).await)
.to_sym()
.await;
if let Ok(ReflMemKind::Const) = root.get_by_path(&new_name).await.map(|m| m.kind()) {
let Ok(mac) = h.exec::<TAtom<Macro>>(new_name).await else { continue };
let mac = mac.own().await;
macros.entry(mac.0.canonical_name.clone()).or_insert(mac);
pub enum ArgStackKind {
End,
Cons(Sym, ArgStack),
}
#[derive(Clone)]
pub struct ArgStack {
kind: Rc<ArgStackKind>,
len: usize,
}
impl ArgStack {
pub fn end() -> Self { ArgStack { kind: Rc::new(ArgStackKind::End), len: 0 } }
}
impl Default for ArgStack {
fn default() -> Self { Self::end() }
}
impl Atomic for ArgStack {
type Data = ();
type Variant = OwnedVariant;
}
impl OwnedAtom for ArgStack {
type Refs = Never;
async fn val(&self) -> Cow<'_, Self::Data> { Cow::Owned(()) }
}
/// # TODO
///
/// convert macro system to return MacTree or otherwise bring it up to
/// speed with the new [ToExpr] / [GExpr] division
///
/// Idea: MacTree needs to be passed wherever the meaning of an expression can
/// change depending on where in the tree it is bound
///
/// Idea: lowering MacTree to ToExpr implementors is possible by just knowing
/// what names are bound, not their values, but lowering it to GExpr is not.
///
/// Problem: The required information is stackbound, so recursive macro matching
/// needs to be a single coroutine. Even when it forks out to Orchid, recursive
/// calls need to point back to this coroutine. Being a coroutine, this
/// recursion can overflow the Rust stack.
///
/// Limits:
///
/// - The concrete MacTree being generated sometimes depends on recursive macro
/// calls which need to complete before we return a MacTree
/// - Any placeholders representing expressions must be recursed over before
/// returning in a MacTree
/// - Exactly one of these things must be done on a subtree
///
/// Takeaways:
///
/// - Resolution should not lower to GExpr
/// - Consider separate types MacTree vs resolved tree
/// - MacTree can be built for the purpose of passing into recur
/// - Resolved tree can be built for the purpose of returning
/// - cannot contain [...], {...}, (), ( ... \. )
/// - is pretty much GExpr with sym / dynamic arg binding instead of
/// numbered. Can this be a wrapper type over ToExpr instead?
/// - In order to move recursive state off the stack, we need a loophole
/// for lambdas
/// - Ensures that resolution only happens exactly once which is important
/// because double resolve can produce bugs that are difficult to catch
/// - Macros may return ResolvedTree but they can also return a datastructure
/// containing MacTree
/// - Macros may never lower ResolvedTree to GExpr directly because it may
/// refer to bound arguments by name
/// - Macros returning datastructures can only ever be called as logic while
/// those returning ResolvedTree can only ever be inlined
/// - this is a type system concern so addressing it here is unnecessary
///
/// Problems:
/// - ToExpr are not usually copiable by default
/// - plain-orchid macros should be able to annotate data-to-return and
/// data-to-resolve with the same tick symbol to limit conceptual complexity,
/// - the case where a macro deliberately wants to bind a name explicitly within
/// a subexpression is tricky
///
/// The best option probably remains for resolve to process and return MacTree,
/// and for there to be a separate "lower" function. Nothing as yet suggests
/// however that macros can't be allowed to return different types
pub async fn resolve(h: &mut ExecHandle<'_>, val: MacTree, arg_stk: ArgStack) -> PostMacAtom {
writeln!(log("debug"), "Macro-resolving {}", fmt(&val).await).await;
let root = refl();
let mut macros = HashMap::new();
for n in val.glossary() {
let (foot, body) = n.split_last_seg();
let new_name = VPath::new(body.iter().cloned())
.name_with_suffix(is(&format!("__macro__{foot}")).await)
.to_sym()
.await;
if let Ok(ReflMemKind::Const) = root.get_by_path(&new_name).await.map(|m| m.kind()) {
let Ok(mac) = h.exec::<TAtom<Macro>>(new_name).await else { continue };
let mac = mac.own().await;
macros.entry(mac.0.canonical_name.clone()).or_insert(mac);
}
}
let mut exclusive = Vec::new();
let mut prios = Vec::<u64>::new();
let mut priod = Vec::<FilteredMacroRecord>::new();
for (_, mac) in macros.iter() {
let mut record = FilteredMacroRecord { mac, rules: Vec::new() };
for (rule_i, rule) in mac.0.rules.iter().enumerate() {
if rule.pattern.glossary.is_subset(val.glossary()) {
record.rules.push(rule_i);
}
}
let mut exclusive = Vec::new();
let mut prios = Vec::<u64>::new();
let mut priod = Vec::<FilteredMacroRecord>::new();
for (_, mac) in macros.iter() {
let mut record = FilteredMacroRecord { mac, rules: Vec::new() };
for (rule_i, rule) in mac.0.rules.iter().enumerate() {
if rule.pattern.glossary.is_subset(val.glossary()) {
record.rules.push(rule_i);
}
}
if !record.rules.is_empty() {
match mac.0.prio {
None => exclusive.push(record),
Some(prio) => {
let i = prios.partition_point(|p| *p > prio);
prios.insert(i, prio);
priod.insert(i, record);
},
}
if !record.rules.is_empty() {
match mac.0.prio {
None => exclusive.push(record),
Some(prio) => {
let i = prios.partition_point(|p| *p > prio);
prios.insert(i, prio);
priod.insert(i, record);
},
}
}
let mut rctx = ResolveCtx { exclusive, priod };
let gex = resolve_one(&mut rctx, Substack::Bottom, &val).await;
writeln!(
log("debug"),
"Macro-resolution over {}\nreturned {}",
fmt(&val).await,
fmt(&gex).await
)
.await;
gex
})
.await
}
let mut rctx = ResolveCtx { exclusive, priod };
let gex = resolve_one(&mut rctx, arg_stk, &val).await;
writeln!(log("debug"), "Macro-resolution over {}", fmt(&val).await).await;
gex
}
/// Rules belonging to one macro that passed a particular filter
@@ -83,37 +158,54 @@ struct ResolveCtx<'a> {
pub priod: Vec<FilteredMacroRecord<'a>>,
}
async fn resolve_one(
ctx: &mut ResolveCtx<'_>,
arg_stk: Substack<'_, Sym>,
value: &MacTree,
) -> GExpr {
async fn resolve_one(ctx: &mut ResolveCtx<'_>, arg_stk: ArgStack, value: &MacTree) -> PostMacAtom {
eprintln!("Resolving unit {}", fmt(value).await);
match value.tok() {
MacTok::Ph(_) | MacTok::Slot => panic!("Forbidden element in value mactree"),
MacTok::Bottom(err) => bot(err.clone()),
MacTok::Value(v) => v.clone().to_gen().await,
MacTok::Name(n) => match arg_stk.iter().position(|arg| arg == n) {
Some(de_bruijn) => arg((arg_stk.len() - 1 - de_bruijn).try_into().unwrap()),
None => n.clone().to_gen().await,
MacTok::Bottom(err) => PostMac::new(bot(err.clone())).atom(),
MacTok::Value(v) => {
eprintln!("Found value {}", fmt(v).await);
PostMac::new(v.clone()).atom()
},
MacTok::Name(n) => {
eprintln!("Looking for {n} among [");
let mut cur = &arg_stk;
let mut counter = 0;
while let ArgStackKind::Cons(name, next) = &*cur.kind {
cur = next;
counter += 1;
eprintln!("{name}, ");
if name == n {
return PostMac::new(GExprKind::Arg(counter).at(value.pos())).atom();
}
}
PostMac::new(n.clone()).atom()
},
MacTok::Lambda(arg, body) => {
eprintln!("Found lambda \\{} {}", fmt(arg).await, fmt(body).await);
let MacTok::Name(name) = &*arg.tok else {
return bot(mk_errv(
return PostMac::new(bot(mk_errv(
is("Syntax error after macros").await,
"This token ends up as a binding, consider replacing it with a name",
[arg.pos()],
));
)))
.atom();
};
let arg_pos = arg_stk.len() as u64;
let arg_stk = arg_stk.push(name.clone());
dyn_lambda(arg_pos, resolve_seq(ctx, arg_stk, body.clone(), value.pos()).await).await
let arg_stk =
ArgStack { len: arg_stk.len + 1, kind: Rc::new(ArgStackKind::Cons(name.clone(), arg_stk)) };
let body = resolve_seq(ctx, arg_stk, body.clone(), value.pos()).await;
let body2 = body.clone();
let pos = value.pos();
PostMac::with(async |cx| GExprKind::Lambda(Box::new(cx.ex(body).to_gen().await)).at(pos))
.atom()
},
MacTok::S(Paren::Round, body) => resolve_seq(ctx, arg_stk, body.clone(), value.pos()).await,
MacTok::S(..) => bot(mk_errv(
MacTok::S(..) => PostMac::new(bot(mk_errv(
is("Leftover [] or {} not matched by macro").await,
format!("{} was not matched by any macro", fmt(value).await),
[value.pos()],
)),
)))
.atom(),
}
}
@@ -133,17 +225,18 @@ fn subsection<T>(
async fn resolve_seq(
ctx: &mut ResolveCtx<'_>,
arg_stk: Substack<'_, Sym>,
arg_stk: ArgStack,
val: MacTreeSeq,
fallback_pos: Pos,
) -> GExpr {
) -> PostMacAtom {
if val.items.is_empty() {
return bot(mk_errv(
return PostMac::new(bot(mk_errv(
is("Empty sequence").await,
"() or (\\arg ) left after macro execution. \
This is usually caused by an incomplete call to a macro with bad error detection",
[fallback_pos],
));
)))
.atom();
}
// A sorted collection of overlapping but non-nested matches to exclusive
// macros
@@ -226,14 +319,15 @@ async fn resolve_seq(
})
.reduce(|l, r| l + r);
if let Some(error) = error {
return bot(error);
return PostMac::new(bot(error)).atom();
}
// no conflicts, apply all exclusive matches
for (range, mac, rule, state) in x_matches.into_iter().rev() {
// backwards so that the non-overlapping ranges remain valid
let pos = (state.names().flat_map(|r| r.1).cloned().reduce(Pos::add))
.expect("All macro rules must contain at least one locally defined name");
let subex = mk_body_call(mac, rule, &state, pos.clone()).await.to_expr().await;
let subex =
mk_body_call(mac, rule, &state, pos.clone(), arg_stk.clone()).await.to_expr().await;
new_val.splice(range, [MacTok::Value(subex).at(pos)]);
}
};
@@ -251,7 +345,8 @@ async fn resolve_seq(
let range = pre.len()..new_val.len() - suf.len();
let pos = (state.names().flat_map(|pair| pair.1).cloned().reduce(Pos::add))
.expect("All macro rules must contain at least one locally defined name");
let subex = mk_body_call(mac, rule, &state, pos.clone()).await.to_expr().await;
let subex =
mk_body_call(mac, rule, &state, pos.clone(), arg_stk.clone()).await.to_expr().await;
std::mem::drop(state);
new_val.splice(range, [MacTok::Value(subex).at(pos)]);
}
@@ -267,11 +362,21 @@ async fn resolve_seq(
let first = exprs.pop_front().expect(
"We checked first that it isn't empty, and named macros get replaced with their results",
);
stream::iter(exprs).fold(first, async |f, x| call(f, x).await).await
PostMac::with(async move |cx| {
stream::iter(exprs).fold(cx.ex(first), async |f, x| call(f, cx.ex(x)).await).await
})
.await
.atom()
}
async fn mk_body_call(mac: &Macro, rule: &Rule, state: &MatchState<'_>, pos: Pos) -> GExpr {
let mut call_args = vec![];
async fn mk_body_call(
mac: &Macro,
rule: &Rule,
state: &MatchState<'_>,
pos: Pos,
arg_stk: ArgStack,
) -> GExpr {
let mut call_args = vec![new_atom(arg_stk).at(Pos::None)];
for name in rule.ph_names.iter() {
call_args.push(match state.get(name).expect("Missing state entry for placeholder") {
StateEntry::Scalar(scal) => new_atom((**scal).clone()),