orchid/orchid-std/src/macros/resolve.rs

use std::borrow::Cow;
use std::collections::VecDeque;
use std::ops::{Add, Range};
use std::rc::Rc;

use async_fn_stream::stream;
use futures::{FutureExt, StreamExt, stream};
use hashbrown::{HashMap, HashSet};
use itertools::Itertools;
use never::Never;
use orchid_base::{NameLike, Paren, Pos, Sym, VPath, fmt, is, log, mk_errv};
use orchid_extension::gen_expr::{GExpr, GExprKind, bot, call, call_v, new_atom};
use orchid_extension::{
	Atomic, ExecHandle, OwnedAtom, OwnedVariant, ReflMemKind, TAtom, ToExpr, ToExprFuture, exec, refl,
};
use subslice_offset::SubsliceOffset;

use crate::macros::macro_value::{Macro, Rule};
use crate::macros::mactree::MacTreeSeq;
use crate::macros::postmac::{PostMac, PostMacAtom};
use crate::macros::rule::state::{MatchState, StateEntry};
use crate::{MacTok, MacTree};

pub enum ArgStackKind {
	End,
	Cons(Sym, ArgStack),
}
#[derive(Clone)]
pub struct ArgStack {
	kind: Rc<ArgStackKind>,
	len: usize,
}
impl ArgStack {
	pub fn end() -> Self { ArgStack { kind: Rc::new(ArgStackKind::End), len: 0 } }
}
impl Default for ArgStack {
	fn default() -> Self { Self::end() }
}
impl Atomic for ArgStack {
	type Data = ();
	type Variant = OwnedVariant;
}
impl OwnedAtom for ArgStack {
	type Refs = Never;
	async fn val(&self) -> Cow<'_, Self::Data> { Cow::Owned(()) }
}

/// # TODO
///
/// convert macro system to return MacTree or otherwise bring it up to
/// speed with the new [ToExpr] / [GExpr] division
///
/// Idea: MacTree needs to be passed wherever the meaning of an expression can
/// change depending on where in the tree it is bound
///
/// Idea: lowering MacTree to ToExpr implementors is possible by just knowing
/// what names are bound, not their values, but lowering it to GExpr is not.
///
/// Problem: The required information is stackbound, so recursive macro matching
/// needs to be a single coroutine. Even when it forks out to Orchid, recursive
/// calls need to point back to this coroutine. Being a coroutine, this
/// recursion can overflow the Rust stack.
///
/// Limits:
///
/// - The concrete MacTree being generated sometimes depends on recursive macro
///   calls which need to complete before we return a MacTree
/// - Any placeholders representing expressions must be recursed over before
///   returning in a MacTree
/// - Exactly one of these things must be done on a subtree
///
/// Takeaways:
///
/// - Resolution should not lower to GExpr
/// - Consider separate types MacTree vs resolved tree
///   - MacTree can be built for the purpose of passing into recur
///   - Resolved tree can be built for the purpose of returning
///     - cannot contain [...], {...}, (), ( ... \. )
///     - is pretty much GExpr with sym / dynamic arg binding instead of
///       numbered. Can this be a wrapper type over ToExpr instead?
///       - In order to move recursive state off the stack, we need a loophole
///         for lambdas
///   - Ensures that resolution only happens exactly once which is important
///     because double resolve can produce bugs that are difficult to catch
///   - Macros may return ResolvedTree but they can also return a datastructure
///     containing MacTree
///     - Macros may never lower ResolvedTree to GExpr directly because it may
///       refer to bound arguments by name
///     - Macros returning datastructures can only ever be called as logic while
///       those returning ResolvedTree can only ever be inlined
///       - this is a type system concern so addressing it here is unnecessary
///
/// Problems:
/// - ToExpr are not usually copiable by default
/// - plain-orchid macros should be able to annotate data-to-return and
///   data-to-resolve with the same tick symbol to limit conceptual complexity,
/// - the case where a macro deliberately wants to bind a name explicitly within
///   a subexpression is tricky
///
/// The best option probably remains for resolve to process and return MacTree,
/// and for there to be a separate "lower" function. Nothing as yet suggests
/// however that macros can't be allowed to return different types
pub async fn resolve(h: &mut ExecHandle<'_>, val: MacTree, arg_stk: ArgStack) -> PostMacAtom {
	writeln!(log("debug"), "Macro-resolving {}", fmt(&val).await).await;
	let root = refl();
	let mut macros = HashMap::new();
	for n in val.glossary() {
		let (foot, body) = n.split_last_seg();
		let new_name = VPath::new(body.iter().cloned())
			.name_with_suffix(is(&format!("__macro__{foot}")).await)
			.to_sym()
			.await;
		if let Ok(ReflMemKind::Const) = root.get_by_path(&new_name).await.map(|m| m.kind()) {
			let Ok(mac) = h.exec::<TAtom<Macro>>(new_name).await else { continue };
			let mac = mac.own().await;
			macros.entry(mac.0.canonical_name.clone()).or_insert(mac);
		}
	}
	let mut exclusive = Vec::new();
	let mut prios = Vec::<u64>::new();
	let mut priod = Vec::<FilteredMacroRecord>::new();
	for (_, mac) in macros.iter() {
		let mut record = FilteredMacroRecord { mac, rules: Vec::new() };
		for (rule_i, rule) in mac.0.rules.iter().enumerate() {
			if rule.pattern.glossary.is_subset(val.glossary()) {
				record.rules.push(rule_i);
			}
		}
		if !record.rules.is_empty() {
			match mac.0.prio {
				None => exclusive.push(record),
				Some(prio) => {
					let i = prios.partition_point(|p| *p > prio);
					prios.insert(i, prio);
					priod.insert(i, record);
				},
			}
		}
	}
	let mut rctx = ResolveCtx { exclusive, priod };
	let gex = resolve_one(&mut rctx, arg_stk, &val).await;
	writeln!(log("debug"), "Macro-resolution over {}", fmt(&val).await).await;
	gex
}

/// Rules belonging to one macro that passed a particular filter
pub struct FilteredMacroRecord<'a> {
	mac: &'a Macro,
	/// The rules in increasing order of index
	rules: Vec<usize>,
}

struct ResolveCtx<'a> {
	/// If these overlap, that's a compile-time error
	pub exclusive: Vec<FilteredMacroRecord<'a>>,
	/// If these overlap, the priorities decide the order. In case of a tie, the
	/// order is unspecified
	pub priod: Vec<FilteredMacroRecord<'a>>,
}

async fn resolve_one(ctx: &mut ResolveCtx<'_>, arg_stk: ArgStack, value: &MacTree) -> PostMacAtom {
	eprintln!("Resolving unit {}", fmt(value).await);
	match value.tok() {
		MacTok::Ph(_) | MacTok::Slot => panic!("Forbidden element in value mactree"),
		MacTok::Bottom(err) => PostMac::new(bot(err.clone())).atom(),
		MacTok::Value(v) => {
			eprintln!("Found value {}", fmt(v).await);
			PostMac::new(v.clone()).atom()
		},
		MacTok::Name(n) => {
			eprintln!("Looking for {n} among [");
			let mut cur = &arg_stk;
			let mut counter = 0;
			while let ArgStackKind::Cons(name, next) = &*cur.kind {
				cur = next;
				counter += 1;
				eprintln!("{name}, ");
				if name == n {
					return PostMac::new(GExprKind::Arg(counter).at(value.pos())).atom();
				}
			}
			PostMac::new(n.clone()).atom()
		},
		MacTok::Lambda(arg, body) => {
			eprintln!("Found lambda \\{}   {}", fmt(arg).await, fmt(body).await);
			let MacTok::Name(name) = &*arg.tok else {
				return PostMac::new(bot(mk_errv(
					is("Syntax error after macros").await,
					"This token ends up as a binding, consider replacing it with a name",
					[arg.pos()],
				)))
				.atom();
			};
			let arg_stk =
				ArgStack { len: arg_stk.len + 1, kind: Rc::new(ArgStackKind::Cons(name.clone(), arg_stk)) };
			let body = resolve_seq(ctx, arg_stk, body.clone(), value.pos()).await;
			let body2 = body.clone();
			let pos = value.pos();
			PostMac::with(async |cx| GExprKind::Lambda(Box::new(cx.ex(body).to_gen().await)).at(pos))
				.atom()
		},
		MacTok::S(Paren::Round, body) => resolve_seq(ctx, arg_stk, body.clone(), value.pos()).await,
		MacTok::S(..) => PostMac::new(bot(mk_errv(
			is("Leftover [] or {} not matched by macro").await,
			format!("{} was not matched by any macro", fmt(value).await),
			[value.pos()],
		)))
		.atom(),
	}
}

type XMatches<'a> = Vec<(Range<usize>, &'a Macro, &'a Rule, MatchState<'a>)>;

/// find the subsection of the slice that satisfies both the lower and upper
/// limit.
fn subsection<T>(
	slice: &[T],
	lower_limit: impl FnMut(&T) -> bool,
	mut upper_limit: impl FnMut(&T) -> bool,
) -> Range<usize> {
	let start = slice.partition_point(lower_limit);
	let len = slice[start..].partition_point(|t| !upper_limit(t));
	start..start + len
}

async fn resolve_seq(
	ctx: &mut ResolveCtx<'_>,
	arg_stk: ArgStack,
	val: MacTreeSeq,
	fallback_pos: Pos,
) -> PostMacAtom {
	if val.items.is_empty() {
		return PostMac::new(bot(mk_errv(
			is("Empty sequence").await,
			"() or (\\arg ) left after macro execution. \
			This is usually caused by an incomplete call to a macro with bad error detection",
			[fallback_pos],
		)))
		.atom();
	}
	// A sorted collection of overlapping but non-nested matches to exclusive
	// macros
	let mut x_matches: XMatches = Vec::new();
	let top_glossary = val.top_glossary.clone();
	let mut new_val = val.items.to_vec();
	'x_macros: for x in &ctx.exclusive {
		let mut rules_iter = x.rules.iter();
		let ((before, state, after), rule) = 'rules: loop {
			let Some(ridx) = rules_iter.next() else { continue 'x_macros };
			let rule = &x.mac.0.rules[*ridx];
			if rule.pattern.top_glossary.is_subset(&top_glossary)
				&& let Some(record) = rule.matcher.apply(&val.items[..], &|_| true).await
			{
				break 'rules (record, rule);
			};
		};
		let new_r = (before.len()..new_val.len() - after.len(), x.mac, rule, state);
		// elements that overlap with us
		let overlap =
			subsection(&x_matches[..], |r| new_r.0.start < r.0.end, |r| r.0.start < new_r.0.end);
		let overlapping = &x_matches[overlap.clone()];
		// elements that fully contain us
		let geq_range =
			subsection(overlapping, |r| r.0.start <= new_r.0.start, |r| new_r.0.end <= r.0.end);
		let geq = &overlapping[geq_range.clone()];
		// if any of these is equal to us, all of them must be, otherwise the larger
		// ranges would have overridden the smaller ones
		if let Some(example) = geq.first() {
			// if they are equal to us, record the conflict.
			if example.0 == new_r.0 {
				let idx = (x_matches.subslice_offset(geq))
					.expect("this slice is statically derived from x_matches");
				x_matches.insert(idx, new_r);
			}
			// either way, we matched so no further rules can run.
			continue 'x_macros;
		}
		// elements we fully contain. Equal ranges have been handled above
		let lt_range =
			subsection(overlapping, |r| new_r.0.start <= r.0.start, |r| r.0.end <= new_r.0.end);
		let lt = &overlapping[lt_range.clone()];
		if lt.is_empty() {
			// an empty range
			let i = x_matches.partition_point(|r| r.0.start < new_r.0.start);
			x_matches.insert(i, new_r);
		} else {
			let lt_start =
				x_matches.subslice_offset(overlapping).expect("Slice statically derived from x_matches");
			x_matches.splice(lt_start..lt_start + lt_range.len(), [new_r]);
		}
	}
	// apply exclusive matches
	if !x_matches.is_empty() {
		// ranges of indices into x_matches which setwise conflict with each other.
		// Pairwise conflict reporting is excess noise, but a single conflict error
		// doesn't reveal where within the parenthesized block to look, so it's easiest
		// to group them setwise even if these sets may associate macros which don't
		// directly conflict.
		let conflict_sets = (0..x_matches.len()).map(|x| x..x + 1).coalesce(|lran, rran| {
			// each index was mapped to a range that contains only itself. Now we check if
			// the last match in the first range overlaps the first match in the second
			// range, and combine them if this is the case.
			if x_matches[rran.start].0.start < x_matches[lran.end].0.end {
				Ok(lran.start..rran.end)
			} else {
				Err((lran, rran))
			}
		});
		let mac_conflict_tk = is("Macro conflict").await;
		let error = conflict_sets
			.filter(|r| 1 < r.len())
			.map(|set| {
				mk_errv(
					mac_conflict_tk.clone(),
					"Multiple partially overlapping syntax elements detected. \n\
							Try parenthesizing whichever side is supposed to be the subexpression.",
					x_matches[set].iter().flat_map(|rec| rec.3.names()).flat_map(|name| name.1).cloned(),
				)
			})
			.reduce(|l, r| l + r);
		if let Some(error) = error {
			return PostMac::new(bot(error)).atom();
		}
		// no conflicts, apply all exclusive matches
		for (range, mac, rule, state) in x_matches.into_iter().rev() {
			// backwards so that the non-overlapping ranges remain valid
			let pos = (state.names().flat_map(|r| r.1).cloned().reduce(Pos::add))
				.expect("All macro rules must contain at least one locally defined name");
			let subex =
				mk_body_call(mac, rule, &state, pos.clone(), arg_stk.clone()).await.to_expr().await;
			new_val.splice(range, [MacTok::Value(subex).at(pos)]);
		}
	};
	// Does this glossary refresh actually pay off?
	let top_glossary = (new_val.iter())
		.flat_map(|t| if let MacTok::Name(t) = t.tok() { Some(t.clone()) } else { None })
		.collect::<HashSet<_>>();
	for FilteredMacroRecord { mac, rules } in &ctx.priod {
		for ridx in rules {
			let rule = &mac.0.rules[*ridx];
			if !rule.pattern.top_glossary.is_subset(&top_glossary) {
				continue;
			}
			let Some((pre, state, suf)) = rule.matcher.apply(&new_val, &|_| true).await else { continue };
			let range = pre.len()..new_val.len() - suf.len();
			let pos = (state.names().flat_map(|pair| pair.1).cloned().reduce(Pos::add))
				.expect("All macro rules must contain at least one locally defined name");
			let subex =
				mk_body_call(mac, rule, &state, pos.clone(), arg_stk.clone()).await.to_expr().await;
			std::mem::drop(state);
			new_val.splice(range, [MacTok::Value(subex).at(pos)]);
		}
	}
	let mut exprs = stream(async |mut h| {
		for mt in new_val {
			h.emit(resolve_one(ctx, arg_stk.clone(), &mt).await).await
		}
	})
	.collect::<VecDeque<_>>()
	.boxed_local()
	.await;
	let first = exprs.pop_front().expect(
		"We checked first that it isn't empty, and named macros get replaced with their results",
	);
	PostMac::with(async move |cx| {
		stream::iter(exprs).fold(cx.ex(first), async |f, x| call(f, cx.ex(x)).await).await
	})
	.await
	.atom()
}

async fn mk_body_call(
	mac: &Macro,
	rule: &Rule,
	state: &MatchState<'_>,
	pos: Pos,
	arg_stk: ArgStack,
) -> GExpr {
	let mut call_args = vec![new_atom(arg_stk).at(Pos::None)];
	for name in rule.ph_names.iter() {
		call_args.push(match state.get(name).expect("Missing state entry for placeholder") {
			StateEntry::Scalar(scal) => new_atom((**scal).clone()),
			StateEntry::Vec(vec) =>
				new_atom(MacTok::S(Paren::Round, MacTreeSeq::new(vec.iter().cloned())).at(Pos::None)),
		});
	}
	call_v(mac.0.module.suffix([rule.body.clone()]).await, call_args).await.at(pos.clone())
}