Custom lexers can now terminate operators
Some checks failed
Rust / build (push) Has been cancelled

New constraint: custom lexer output is dropped whenever it is used to terminate an operator nested inside another custom lexer, because the recursive call has to return exactly one lexeme
This commit is contained in:
2026-01-25 17:52:18 +01:00
parent b9f1bb74d7
commit c461f82de1
17 changed files with 333 additions and 142 deletions

View File

@@ -28,6 +28,6 @@ pub mod reqnot;
pub mod sequence;
pub mod side;
pub mod stash;
mod tl_cache;
pub mod tl_cache;
pub mod tokens;
pub mod tree;

View File

@@ -198,15 +198,11 @@ impl Sym {
let items = v.into_iter().collect_vec();
Self::from_tok(iv(&items).await)
}
/// Read a `::` separated namespaced name.
/// Read a `::` separated namespaced name. Do not use this for statically
/// known names, use the [sym] macro instead which is cached.
pub async fn parse(s: &str) -> Result<Self, EmptyNameError> {
Ok(Sym(iv(&VName::parse(s).await?.into_vec()).await))
}
/// Read a `::` separated namespaced name from a static string where.
pub async fn literal(s: &'static str) -> Self {
assert!(!s.is_empty(), "Literal cannot be empty");
Self::parse(s).await.unwrap()
}
/// Assert that a token isn't empty, and wrap it in a [Sym]
pub fn from_tok(t: IStrv) -> Result<Self, EmptyNameError> {
if t.is_empty() { Err(EmptyNameError) } else { Ok(Self(t)) }
@@ -290,20 +286,28 @@ impl NameLike for VName {}
/// Create a [Sym] literal.
///
/// Both the name and its components will be cached in a thread-local static so
/// The name and its components will be cached in a thread-local static so
/// that subsequent executions of the expression only incur an Arc-clone for
/// cloning the token.
#[macro_export]
macro_rules! sym {
($seg1:tt $( :: $seg:tt)*) => {
$crate::name::Sym::from_tok(
$crate::interner::iv(&[
$crate::interner::is(stringify!($seg1)).await
$( , $crate::interner::is(stringify!($seg)).await )*
])
.await
).unwrap()
$crate::tl_cache!(async $crate::name::Sym : {
$crate::name::Sym::from_tok(
$crate::interner::iv(&[
$crate::interner::is($crate::sym!(@SEG $seg1)).await
$( , $crate::interner::is($crate::sym!(@SEG $seg)).await )*
])
.await
).unwrap()
})
};
(@SEG [ $($data:tt)* ]) => {
stringify!($($data)*)
};
(@SEG $data:tt) => {
stringify!($data)
};
}
/// Create a [VName] literal.
@@ -312,10 +316,12 @@ macro_rules! sym {
#[macro_export]
macro_rules! vname {
($seg1:tt $( :: $seg:tt)*) => {
$crate::name::VName::new([
$crate::interner::is(stringify!($seg1)).await
$( , $crate::interner::is(stringify!($seg)).await )*
]).unwrap()
$crate::tl_cache!(async $crate::name::VName : {
$crate::name::VName::new([
$crate::interner::is(stringify!($seg1)).await
$( , $crate::interner::is(stringify!($seg)).await )*
]).unwrap()
})
};
}
@@ -325,10 +331,12 @@ macro_rules! vname {
#[macro_export]
macro_rules! vpath {
($seg1:tt $( :: $seg:tt)*) => {
$crate::name::VPath(vec![
$crate::interner::is(stringify!($seg1)).await
$( , $crate::interner::is(stringify!($seg)).await )*
])
$crate::tl_cache!(async $crate::name::VPath : {
$crate::name::VPath(vec![
$crate::interner::is(stringify!($seg1)).await
$( , $crate::interner::is(stringify!($seg)).await )*
])
})
};
() => {
$crate::name::VPath(vec![])
@@ -339,30 +347,43 @@ macro_rules! vpath {
pub mod test {
use std::borrow::Borrow;
use orchid_api_traits::spin_on;
use super::{NameLike, Sym, VName};
use crate::interner::{IStr, is};
use crate::interner::local_interner::local_interner;
use crate::interner::{IStr, is, with_interner};
use crate::name::VPath;
pub async fn recur() {
let myname = vname!(foo::bar);
let _borrowed_slice: &[IStr] = myname.borrow();
let _deref_pathslice: &[IStr] = &myname;
let _as_slice_out: &[IStr] = myname.as_slice();
#[test]
pub fn recur() {
spin_on(with_interner(local_interner(), async {
let myname = vname!(foo::bar);
let _borrowed_slice: &[IStr] = myname.borrow();
let _deref_pathslice: &[IStr] = &myname;
let _as_slice_out: &[IStr] = myname.as_slice();
}))
}
/// Tests that literals are correctly interned as equal
pub async fn literals() {
assert_eq!(
sym!(foo::bar::baz),
Sym::new([is("foo").await, is("bar").await, is("baz").await]).await.unwrap()
);
assert_eq!(
vname!(foo::bar::baz),
VName::new([is("foo").await, is("bar").await, is("baz").await]).unwrap()
);
assert_eq!(
vpath!(foo::bar::baz),
VPath::new([is("foo").await, is("bar").await, is("baz").await])
);
#[test]
pub fn literals() {
spin_on(with_interner(local_interner(), async {
assert_eq!(
sym!(foo::bar::baz),
Sym::new([is("foo").await, is("bar").await, is("baz").await]).await.unwrap()
);
assert_eq!(
sym!(foo::bar::[|>]),
Sym::new([is("foo").await, is("bar").await, is("|>").await]).await.unwrap()
);
assert_eq!(
vname!(foo::bar::baz),
VName::new([is("foo").await, is("bar").await, is("baz").await]).unwrap()
);
assert_eq!(
{ vpath!(foo::bar::baz) },
VPath::new([is("foo").await, is("bar").await, is("baz").await])
);
}))
}
}

View File

@@ -16,7 +16,7 @@ use crate::tree::{ExprRepr, ExtraTok, Paren, TokTree, Token, ttv_fmt, ttv_range}
pub fn name_start(c: char) -> bool { c.is_alphabetic() || c == '_' }
pub fn name_char(c: char) -> bool { name_start(c) || c.is_numeric() }
pub fn op_char(c: char) -> bool { !name_char(c) && !c.is_whitespace() && !"()[]{}\\".contains(c) }
pub fn op_char(c: char) -> bool { !name_char(c) && !unrep_space(c) && !"()[]{}\\".contains(c) }
pub fn unrep_space(c: char) -> bool { c.is_whitespace() && !"\r\n".contains(c) }
/// A cheaply copiable subsection of a document that holds onto context data and

View File

@@ -6,4 +6,18 @@ macro_rules! tl_cache {
}
V.with(|v| v.clone())
}};
(async $ty:ty : $expr:expr) => {{
type CellType = std::cell::OnceCell<$ty>;
thread_local! {
static V: CellType = std::cell::OnceCell::default();
}
match V.with(|cell: &CellType| cell.get().cloned()) {
Some(val) => val as $ty,
None => {
let val = $expr;
let _ = V.with(|cell: &CellType| cell.set(val.clone()));
val as $ty
},
}
}};
}