Files
orchid/orchid-base/src/tree.rs
Lawrence Bethlenfalvy 09cfcb1839 partway towards commands
I got very confused and started mucking about with "spawn" when in fact all I needed was the "inline" extension type in orcx that allows the interpreter to expose custom constants.
2026-03-13 16:48:42 +01:00

337 lines
12 KiB
Rust

use std::fmt::{self, Debug, Display};
use std::future::Future;
use std::marker::PhantomData;
use std::rc::Rc;
use async_fn_stream::stream;
use futures::future::join_all;
use futures::{FutureExt, StreamExt};
use itertools::Itertools;
use never::Never;
use orchid_api_traits::Coding;
use trait_set::trait_set;
use crate::{
FmtCtx, FmtUnit, Format, IStr, OrcErrv, Pos, Snippet, SrcRange, Sym, VName, VPath, Variants, api,
es, match_mapping, tl_cache,
};
/// The 3 types of parentheses Orchid's lexer recognizes as intrinsic groups in
/// the S-tree
pub type Paren = api::Paren;
/// Helper table with different kinds of parentheses recognized by the language.
/// opening, closing, variant name
pub const PARENS: &[(char, char, Paren)] =
&[('(', ')', Paren::Round), ('[', ']', Paren::Square), ('{', '}', Paren::Curly)];
/// Extension interface for embedded expressions and expression construction
/// commands inside token trees
pub trait TokenVariant<ApiEquiv: Clone + Debug + Coding>: Format + Clone + fmt::Debug {
/// Additional arguments to the deserializer. If deserialization of a token
/// type is impossible, set this to a sentinel unit type that describes why.
/// If you set this to [Never], your token tree type can never be
/// deserialized.
type FromApiCtx<'a>;
/// Additional arguments to the serializer. If serialization of a token type
/// is forbidden, set this to a sentinel unit type that describes how to avoid
/// it.
/// If you set this to [Never], your token tree type can never be serialized.
type ToApiCtx<'a>;
/// Deserializer
#[must_use]
fn from_api(
api: ApiEquiv,
ctx: &mut Self::FromApiCtx<'_>,
pos: SrcRange,
) -> impl Future<Output = Self>;
/// Serializer
#[must_use]
fn into_api(self, ctx: &mut Self::ToApiCtx<'_>) -> impl Future<Output = ApiEquiv>;
}
impl<T: Clone + Debug + Coding> TokenVariant<T> for Never {
type FromApiCtx<'a> = ();
type ToApiCtx<'a> = ();
async fn from_api(_: T, _: &mut Self::FromApiCtx<'_>, _: SrcRange) -> Self {
panic!("Cannot deserialize Never")
}
async fn into_api(self, _: &mut Self::ToApiCtx<'_>) -> T { match self {} }
}
trait_set! {
/// [api::Token::Handle] variant
pub trait ExprRepr = TokenVariant<api::ExprTicket>;
/// [api::Token::NewExpr] variant
pub trait ExtraTok = TokenVariant<api::Expression>;
}
trait_set! {
/// Callback to callback to [recur].
pub trait RecurCB<H: ExprRepr, X: ExtraTok> = Fn(TokTree<H, X>) -> TokTree<H, X>;
}
/// An atom that can be passed through the API boundary as part of an
/// expression. In particular, atoms created by extensions use this form.
pub trait AtomRepr: Clone + Format {
type Ctx: ?Sized;
#[must_use]
fn from_api(api: &api::Atom, pos: Pos, ctx: &mut Self::Ctx) -> impl Future<Output = Self>;
#[must_use]
fn to_api(&self) -> impl Future<Output = orchid_api::Atom> + '_;
}
impl AtomRepr for Never {
type Ctx = Never;
async fn from_api(_: &api::Atom, _: Pos, ctx: &mut Self::Ctx) -> Self { match *ctx {} }
async fn to_api(&self) -> orchid_api::Atom { match *self {} }
}
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
pub struct TokHandle<'a>(api::TreeTicket, PhantomData<&'a ()>);
impl TokHandle<'static> {
pub fn new(tt: api::TreeTicket) -> Self { TokHandle(tt, PhantomData) }
}
impl TokHandle<'_> {
pub fn ticket(self) -> api::TreeTicket { self.0 }
}
impl Display for TokHandle<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "Handle({})", self.0.0) }
}
/// Lexer output
#[derive(Clone, Debug)]
pub struct TokTree<H: ExprRepr, X: ExtraTok> {
pub tok: Token<H, X>,
/// The protocol has a Range<u32> because these are always transmitted in the
/// context of a given snippet, but internal logic and error reporting is
/// easier if the in-memory representation also includes the snippet path.
pub sr: SrcRange,
}
impl<H: ExprRepr, X: ExtraTok> TokTree<H, X> {
/// Visit all tokens, modify them at will, and optionally recurse into them by
/// calling the callback passed to your callback
pub fn recur(self, f: &impl Fn(Self, &dyn RecurCB<H, X>) -> Self) -> Self {
f(self, &|TokTree { sr: range, tok }| {
let tok = match tok {
tok @ (Token::BR | Token::Bottom(_) | Token::Comment(_) | Token::Name(_)) => tok,
tok @ (Token::Handle(_) | Token::NewExpr(_)) => tok,
Token::NS(n, b) => Token::NS(n, Box::new(b.recur(f))),
Token::LambdaHead(arg) => Token::LambdaHead(Box::new(arg.recur(f))),
Token::S(p, b) => Token::S(p, b.into_iter().map(|tt| tt.recur(f)).collect_vec()),
};
TokTree { sr: range, tok }
})
}
pub async fn from_api(
tt: api::TokenTree,
hctx: &mut H::FromApiCtx<'_>,
xctx: &mut X::FromApiCtx<'_>,
src: &Sym,
) -> Self {
let pos = SrcRange::new(tt.range, src);
let tok = match_mapping!(tt.token, api::Token => Token::<H, X> {
BR,
NS(n => es(n).await,
b => Box::new(Self::from_api(*b, hctx, xctx, src).boxed_local().await)),
Bottom(e => OrcErrv::from_api(e).await),
LambdaHead(arg => Box::new(Self::from_api(*arg, hctx, xctx, src).boxed_local().await)),
Name(n => es(n).await),
S(par, b => ttv_from_api(b, hctx, xctx, src).await),
Comment(c => es(c).await),
NewExpr(expr => X::from_api(expr, xctx, pos.clone()).await),
Handle(tk => H::from_api(tk, hctx, pos.clone()).await)
});
Self { sr: pos, tok }
}
pub async fn into_api(
self,
hctx: &mut H::ToApiCtx<'_>,
xctx: &mut X::ToApiCtx<'_>,
) -> api::TokenTree {
let token = match_mapping!(self.tok, Token => api::Token {
BR,
NS(n.to_api(), b => Box::new(b.into_api(hctx, xctx).boxed_local().await)),
Bottom(e.to_api()),
Comment(c.to_api()),
LambdaHead(arg => Box::new(arg.into_api(hctx, xctx).boxed_local().await)),
Name(nn.to_api()),
S(p, b => ttv_into_api(b, hctx, xctx).boxed_local().await),
Handle(hand.into_api(hctx).await),
NewExpr(expr.into_api(xctx).await),
});
api::TokenTree { range: self.sr.range.clone(), token }
}
pub fn is_kw(&self, tk: IStr) -> bool { self.tok.is_kw(tk) }
pub fn as_name(&self) -> Option<IStr> {
if let Token::Name(n) = &self.tok { Some(n.clone()) } else { None }
}
pub fn as_multiname(&self) -> Result<VName, &TokTree<H, X>> {
let mut segs = VPath::new([]);
let mut cur = self;
loop {
match &cur.tok {
Token::Name(last) => return Ok(segs.name_with_suffix(last.clone())),
Token::NS(seg, inner) => {
segs = segs.suffix([seg.clone()]);
cur = inner;
},
_ => return Err(cur),
}
}
}
pub fn as_s(&self, par: Paren) -> Option<Snippet<'_, H, X>> {
self.tok.as_s(par).map(|slc| Snippet::new(self, slc))
}
pub fn as_lambda(&self) -> Option<&Self> {
match &self.tok {
Token::LambdaHead(arg) => Some(&**arg),
_ => None,
}
}
pub fn is_fluff(&self) -> bool { matches!(self.tok, Token::Comment(_) | Token::BR) }
pub fn lambda(arg: Self, mut body: Vec<Self>) -> Self {
let arg_range = arg.sr();
let mut s_range = arg_range.clone();
s_range.range.end = body.last().expect("Lambda with empty body!").sr.range.end;
body.insert(0, Token::LambdaHead(Box::new(arg)).at(arg_range));
Token::S(Paren::Round, body).at(s_range)
}
pub fn sr(&self) -> SrcRange { self.sr.clone() }
}
impl<H: ExprRepr, X: ExtraTok> Format for TokTree<H, X> {
async fn print<'a>(&'a self, c: &'a (impl FmtCtx + ?Sized + 'a)) -> FmtUnit {
self.tok.print(c).await
}
}
/// Receive a token sequence from API
pub async fn ttv_from_api<H: ExprRepr, X: ExtraTok>(
tokv: impl IntoIterator<Item = api::TokenTree>,
hctx: &mut H::FromApiCtx<'_>,
xctx: &mut X::FromApiCtx<'_>,
src: &Sym,
) -> Vec<TokTree<H, X>> {
stream(async |mut cx| {
for tok in tokv {
cx.emit(TokTree::<H, X>::from_api(tok, hctx, xctx, src).boxed_local().await).await
}
})
.collect()
.await
}
/// Encode a token sequence for sending
pub async fn ttv_into_api<H: ExprRepr, X: ExtraTok>(
tokv: impl IntoIterator<Item = TokTree<H, X>>,
hctx: &mut H::ToApiCtx<'_>,
xctx: &mut X::ToApiCtx<'_>,
) -> Vec<api::TokenTree> {
stream(async |mut cx| {
for tok in tokv {
cx.emit(tok.into_api(hctx, xctx).await).await
}
})
.collect()
.await
}
/// Enclose the tokens in `()` if there is more than one
pub fn wrap_tokv<H: ExprRepr, X: ExtraTok>(
items: impl IntoIterator<Item = TokTree<H, X>>,
) -> TokTree<H, X> {
let items_v = items.into_iter().collect_vec();
match items_v.len() {
0 => panic!("A tokv with no elements is illegal"),
1 => items_v.into_iter().next().unwrap(),
_ => {
let sr = ttv_range(&items_v).expect("empty handled above");
Token::S(api::Paren::Round, items_v).at(sr)
},
}
}
/// Lexer output variant
#[derive(Clone, Debug)]
pub enum Token<H: ExprRepr, X: ExtraTok> {
/// Information about the code addressed to the human reader or dev tooling
/// It has no effect on the behaviour of the program unless it's explicitly
/// read via reflection
Comment(IStr),
/// The part of a lambda between `\` and `.` enclosing the argument. The body
/// stretches to the end of the enclosing parens or the end of the const line
LambdaHead(Box<TokTree<H, X>>),
/// A binding, operator, or a segment of a namespaced::name
Name(IStr),
/// A namespace prefix, like `my_ns::` followed by a token
NS(IStr, Box<TokTree<H, X>>),
/// A line break
BR,
/// `()`, `[]`, or `{}`
S(Paren, Vec<TokTree<H, X>>),
/// A newly instantiated expression
NewExpr(X),
/// An existing expr from a nested lexer
Handle(H),
/// A grammar error emitted by a lexer plugin if it was possible to continue
/// reading. Parsers should treat it as an atom unless it prevents parsing,
/// in which case both this and a relevant error should be returned.
Bottom(OrcErrv),
}
impl<H: ExprRepr, X: ExtraTok> Token<H, X> {
pub fn at(self, sr: SrcRange) -> TokTree<H, X> { TokTree { sr, tok: self } }
pub fn is_kw(&self, tk: IStr) -> bool { matches!(self, Token::Name(n) if *n == tk) }
pub fn as_s(&self, par: Paren) -> Option<&[TokTree<H, X>]> {
match self {
Self::S(p, b) if *p == par => Some(b),
_ => None,
}
}
}
impl<H: ExprRepr, X: ExtraTok> Format for Token<H, X> {
async fn print<'a>(&'a self, c: &'a (impl FmtCtx + ?Sized + 'a)) -> FmtUnit {
match self {
Self::BR => "\n".to_string().into(),
Self::Bottom(err) => match err.one() {
Some(err) => format!("Bottom({err}) ").into(),
None => format!("Botttom(\n{}) ", indent(&err.to_string())).into(),
},
Self::Comment(c) => format!("--[{c}]--").into(),
Self::LambdaHead(arg) =>
tl_cache!(Rc<Variants>: Rc::new(Variants::default().bounded("\\{0b}.")))
.units([arg.print(c).boxed_local().await]),
Self::NS(n, b) => tl_cache!(Rc<Variants>: Rc::new(Variants::default().bounded("{0}::{1l}")))
.units([n.to_string().into(), b.print(c).boxed_local().await]),
Self::Name(n) => format!("{n}").into(),
Self::S(p, b) => FmtUnit::new(
match *p {
Paren::Round => tl_cache!(Rc<Variants>: Rc::new(Variants::default().bounded("({0b})"))),
Paren::Curly => tl_cache!(Rc<Variants>: Rc::new(Variants::default().bounded("{{{0b}}}"))),
Paren::Square => tl_cache!(Rc<Variants>: Rc::new(Variants::default().bounded("[{0b}]"))),
},
[ttv_fmt(b, c).await],
),
Self::Handle(h) => h.print(c).await,
Self::NewExpr(ex) => ex.print(c).await,
}
}
}
/// Find the location that best describes a sequence of tokens if the sequence
/// isn't empty
pub fn ttv_range<'a>(ttv: &[TokTree<impl ExprRepr + 'a, impl ExtraTok + 'a>]) -> Option<SrcRange> {
let range = ttv.first()?.sr.range.start..ttv.last().unwrap().sr.range.end;
Some(SrcRange { path: ttv.first().unwrap().sr.path(), range })
}
/// Pretty-print a token sequence
pub async fn ttv_fmt<'a: 'b, 'b>(
ttv: impl IntoIterator<Item = &'b TokTree<impl ExprRepr + 'a, impl ExtraTok + 'a>>,
c: &(impl FmtCtx + ?Sized),
) -> FmtUnit {
FmtUnit::sequence("", " ", "", true, join_all(ttv.into_iter().map(|t| t.print(c))).await)
}
/// Indent a string by two spaces
pub fn indent(s: &str) -> String { s.replace("\n", "\n ") }