forked from Orchid/orchid
New constraint: custom lexer output is dropped whenever it is used to terminate an operator nested inside another custom lexer, because the recursive call has to return exactly one lexeme
390 lines
13 KiB
Rust
390 lines
13 KiB
Rust
//! Various datatypes that all represent namespaced names.
|
|
|
|
use std::borrow::Borrow;
|
|
use std::hash::Hash;
|
|
use std::num::{NonZeroU64, NonZeroUsize};
|
|
use std::ops::{Deref, Index};
|
|
use std::path::Path;
|
|
use std::{fmt, vec};
|
|
|
|
use futures::future::{OptionFuture, join_all};
|
|
use itertools::Itertools;
|
|
use trait_set::trait_set;
|
|
|
|
use crate::api;
|
|
use crate::interner::{IStr, IStrv, es, ev, is, iv};
|
|
|
|
trait_set! {
|
|
/// Traits that all name iterators should implement
|
|
pub trait NameIter = Iterator<Item = IStr> + DoubleEndedIterator + ExactSizeIterator;
|
|
}
|
|
|
|
/// A token path which may be empty. [VName] is the non-empty version
|
|
#[derive(Clone, Default, Hash, PartialEq, Eq)]
|
|
pub struct VPath(Vec<IStr>);
|
|
impl VPath {
|
|
/// Collect segments into a vector
|
|
pub fn new(items: impl IntoIterator<Item = IStr>) -> Self { Self(items.into_iter().collect()) }
|
|
/// Number of path segments
|
|
pub fn len(&self) -> usize { self.0.len() }
|
|
/// Whether there are any path segments. In other words, whether this is a
|
|
/// valid name
|
|
pub fn is_empty(&self) -> bool { self.len() == 0 }
|
|
/// Prepend some tokens to the path
|
|
pub fn prefix(self, items: impl IntoIterator<Item = IStr>) -> Self {
|
|
Self(items.into_iter().chain(self.0).collect())
|
|
}
|
|
/// Append some tokens to the path
|
|
pub fn suffix(self, items: impl IntoIterator<Item = IStr>) -> Self {
|
|
Self(self.0.into_iter().chain(items).collect())
|
|
}
|
|
/// Partition the string by `::` namespace separators
|
|
pub async fn parse(s: &str) -> Self {
|
|
Self(if s.is_empty() { vec![] } else { join_all(s.split("::").map(is)).await })
|
|
}
|
|
/// Walk over the segments
|
|
pub fn str_iter(&self) -> impl Iterator<Item = &'_ str> { Box::new(self.0.iter().map(|s| &**s)) }
|
|
/// Try to convert into non-empty version
|
|
pub fn into_name(self) -> Result<VName, EmptyNameError> { VName::new(self.0) }
|
|
/// Add a token to the path. Since now we know that it can't be empty, turn it
|
|
/// into a name.
|
|
pub fn name_with_suffix(self, name: IStr) -> VName {
|
|
VName(self.into_iter().chain([name]).collect())
|
|
}
|
|
/// Add a token to the beginning of the. Since now we know that it can't be
|
|
/// empty, turn it into a name.
|
|
pub fn name_with_prefix(self, name: IStr) -> VName {
|
|
VName([name].into_iter().chain(self).collect())
|
|
}
|
|
|
|
/// Convert a fs path to a vpath
|
|
pub async fn from_path(path: &Path, ext: &str) -> Option<(Self, bool)> {
|
|
async fn to_vpath(p: &Path) -> Option<VPath> {
|
|
let tok_opt_v = join_all(p.iter().map(|c| OptionFuture::from(c.to_str().map(is)))).await;
|
|
tok_opt_v.into_iter().collect::<Option<_>>().map(VPath)
|
|
}
|
|
match path.extension().map(|s| s.to_str()) {
|
|
Some(Some(s)) if s == ext => Some((to_vpath(&path.with_extension("")).await?, true)),
|
|
None => Some((to_vpath(path).await?, false)),
|
|
Some(_) => None,
|
|
}
|
|
}
|
|
}
|
|
impl fmt::Debug for VPath {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "VName({self})") }
|
|
}
|
|
impl fmt::Display for VPath {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "{}", self.str_iter().join("::"))
|
|
}
|
|
}
|
|
impl FromIterator<IStr> for VPath {
|
|
fn from_iter<T: IntoIterator<Item = IStr>>(iter: T) -> Self { Self(iter.into_iter().collect()) }
|
|
}
|
|
impl IntoIterator for VPath {
|
|
type Item = IStr;
|
|
type IntoIter = vec::IntoIter<Self::Item>;
|
|
fn into_iter(self) -> Self::IntoIter { self.0.into_iter() }
|
|
}
|
|
impl Borrow<[IStr]> for VPath {
|
|
fn borrow(&self) -> &[IStr] { &self.0[..] }
|
|
}
|
|
impl Deref for VPath {
|
|
type Target = [IStr];
|
|
fn deref(&self) -> &Self::Target { self.borrow() }
|
|
}
|
|
|
|
impl<T> Index<T> for VPath
|
|
where [IStr]: Index<T>
|
|
{
|
|
type Output = <[IStr] as Index<T>>::Output;
|
|
|
|
fn index(&self, index: T) -> &Self::Output { &Borrow::<[IStr]>::borrow(self)[index] }
|
|
}
|
|
|
|
/// A mutable representation of a namespaced identifier of at least one segment.
|
|
///
|
|
/// These names may be relative or otherwise partially processed.
|
|
///
|
|
/// See also [Sym] for the immutable representation, and [VPath] for possibly
|
|
/// empty values
|
|
#[derive(Clone, Hash, PartialEq, Eq)]
|
|
pub struct VName(Vec<IStr>);
|
|
impl VName {
|
|
/// Assert that the sequence isn't empty and wrap it in [VName] to represent
|
|
/// this invariant
|
|
pub fn new(items: impl IntoIterator<Item = IStr>) -> Result<Self, EmptyNameError> {
|
|
let data: Vec<_> = items.into_iter().collect();
|
|
if data.is_empty() { Err(EmptyNameError) } else { Ok(Self(data)) }
|
|
}
|
|
pub async fn deintern(name: impl IntoIterator<Item = api::TStr>) -> Result<Self, EmptyNameError> {
|
|
Self::new(join_all(name.into_iter().map(es)).await)
|
|
}
|
|
/// Unwrap the enclosed vector
|
|
pub fn into_vec(self) -> Vec<IStr> { self.0 }
|
|
/// Get a reference to the enclosed vector
|
|
pub fn vec(&self) -> &Vec<IStr> { &self.0 }
|
|
/// Mutable access to the underlying vector. To ensure correct results, this
|
|
/// must never be empty.
|
|
pub fn vec_mut(&mut self) -> &mut Vec<IStr> { &mut self.0 }
|
|
/// Intern the name and return a [Sym]
|
|
pub async fn to_sym(&self) -> Sym { Sym(iv(&self.0[..]).await) }
|
|
/// If this name has only one segment, return it
|
|
pub fn as_root(&self) -> Option<IStr> { self.0.iter().exactly_one().ok().cloned() }
|
|
/// Prepend the segments to this name
|
|
#[must_use = "This is a pure function"]
|
|
pub fn prefix(self, items: impl IntoIterator<Item = IStr>) -> Self {
|
|
Self(items.into_iter().chain(self.0).collect())
|
|
}
|
|
/// Append the segments to this name
|
|
#[must_use = "This is a pure function"]
|
|
pub fn suffix(self, items: impl IntoIterator<Item = IStr>) -> Self {
|
|
Self(self.0.into_iter().chain(items).collect())
|
|
}
|
|
/// Read a `::` separated namespaced name
|
|
pub async fn parse(s: &str) -> Result<Self, EmptyNameError> { Self::new(VPath::parse(s).await) }
|
|
pub async fn literal(s: &'static str) -> Self { Self::parse(s).await.expect("empty literal !?") }
|
|
/// Obtain an iterator over the segments of the name
|
|
pub fn iter(&self) -> impl Iterator<Item = IStr> + '_ { self.0.iter().cloned() }
|
|
}
|
|
impl fmt::Debug for VName {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "VName({self})") }
|
|
}
|
|
impl fmt::Display for VName {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "{}", self.str_iter().join("::"))
|
|
}
|
|
}
|
|
impl IntoIterator for VName {
|
|
type Item = IStr;
|
|
type IntoIter = vec::IntoIter<Self::Item>;
|
|
fn into_iter(self) -> Self::IntoIter { self.0.into_iter() }
|
|
}
|
|
impl<T> Index<T> for VName
|
|
where [IStr]: Index<T>
|
|
{
|
|
type Output = <[IStr] as Index<T>>::Output;
|
|
|
|
fn index(&self, index: T) -> &Self::Output { &self.deref()[index] }
|
|
}
|
|
impl Borrow<[IStr]> for VName {
|
|
fn borrow(&self) -> &[IStr] { self.0.borrow() }
|
|
}
|
|
impl Deref for VName {
|
|
type Target = [IStr];
|
|
fn deref(&self) -> &Self::Target { self.borrow() }
|
|
}
|
|
|
|
/// Error produced when a non-empty name [VName] or [Sym] is constructed with an
|
|
/// empty sequence
|
|
#[derive(Debug, Copy, Clone, Default, Hash, PartialEq, Eq, PartialOrd, Ord)]
|
|
pub struct EmptyNameError;
|
|
impl TryFrom<&[IStr]> for VName {
|
|
type Error = EmptyNameError;
|
|
fn try_from(value: &[IStr]) -> Result<Self, Self::Error> { Self::new(value.iter().cloned()) }
|
|
}
|
|
|
|
/// An interned representation of a namespaced identifier.
|
|
///
|
|
/// These names are always absolute.
|
|
///
|
|
/// See also [VName]
|
|
#[derive(Clone, Hash, PartialEq, Eq)]
|
|
pub struct Sym(IStrv);
|
|
impl Sym {
|
|
/// Assert that the sequence isn't empty, intern it and wrap it in a [Sym] to
|
|
/// represent this invariant
|
|
pub async fn new(v: impl IntoIterator<Item = IStr>) -> Result<Self, EmptyNameError> {
|
|
let items = v.into_iter().collect_vec();
|
|
Self::from_tok(iv(&items).await)
|
|
}
|
|
/// Read a `::` separated namespaced name. Do not use this for statically
|
|
/// known names, use the [sym] macro instead which is cached.
|
|
pub async fn parse(s: &str) -> Result<Self, EmptyNameError> {
|
|
Ok(Sym(iv(&VName::parse(s).await?.into_vec()).await))
|
|
}
|
|
/// Assert that a token isn't empty, and wrap it in a [Sym]
|
|
pub fn from_tok(t: IStrv) -> Result<Self, EmptyNameError> {
|
|
if t.is_empty() { Err(EmptyNameError) } else { Ok(Self(t)) }
|
|
}
|
|
/// Grab the interner token
|
|
pub fn tok(&self) -> IStrv { self.0.clone() }
|
|
/// Get a number unique to this name suitable for arbitrary ordering.
|
|
pub fn id(&self) -> NonZeroU64 { self.0.to_api().0 }
|
|
/// Extern the sym for editing
|
|
pub fn to_vname(&self) -> VName { VName(self[..].to_vec()) }
|
|
pub async fn from_api(marker: api::TStrv) -> Sym {
|
|
Self::from_tok(ev(marker).await).expect("Empty sequence found for serialized Sym")
|
|
}
|
|
pub fn to_api(&self) -> api::TStrv { self.tok().to_api() }
|
|
pub async fn suffix(&self, tokv: impl IntoIterator<Item = IStr>) -> Sym {
|
|
Self::new(self.0.iter().cloned().chain(tokv)).await.unwrap()
|
|
}
|
|
}
|
|
impl fmt::Debug for Sym {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "Sym({self})") }
|
|
}
|
|
impl fmt::Display for Sym {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
write!(f, "{}", self.str_iter().join("::"))
|
|
}
|
|
}
|
|
impl<T> Index<T> for Sym
|
|
where [IStr]: Index<T>
|
|
{
|
|
type Output = <[IStr] as Index<T>>::Output;
|
|
|
|
fn index(&self, index: T) -> &Self::Output { &self.deref()[index] }
|
|
}
|
|
impl Borrow<[IStr]> for Sym {
|
|
fn borrow(&self) -> &[IStr] { &self.0[..] }
|
|
}
|
|
impl Deref for Sym {
|
|
type Target = [IStr];
|
|
fn deref(&self) -> &Self::Target { self.borrow() }
|
|
}
|
|
|
|
/// An abstraction over tokenized vs non-tokenized names so that they can be
|
|
/// handled together in datastructures. The names can never be empty
|
|
#[allow(clippy::len_without_is_empty)] // never empty
|
|
pub trait NameLike:
|
|
'static + Clone + Eq + Hash + fmt::Debug + fmt::Display + Borrow<[IStr]>
|
|
{
|
|
/// Convert into held slice
|
|
fn as_slice(&self) -> &[IStr] { Borrow::<[IStr]>::borrow(self) }
|
|
/// Get iterator over tokens
|
|
fn segs(&self) -> impl NameIter + '_ { self.as_slice().iter().cloned() }
|
|
/// Get iterator over string segments
|
|
fn str_iter(&self) -> impl Iterator<Item = &'_ str> + '_ { self.as_slice().iter().map(|t| &**t) }
|
|
/// Fully resolve the name for printing
|
|
#[must_use]
|
|
fn to_strv(&self) -> Vec<String> { self.segs().map(|s| s.to_string()).collect() }
|
|
/// Format the name as an approximate filename
|
|
fn as_src_path(&self) -> String { format!("{}.orc", self.segs().join("/")) }
|
|
/// Return the number of segments in the name
|
|
fn len_nz(&self) -> NonZeroUsize {
|
|
NonZeroUsize::try_from(self.segs().count()).expect("NameLike never empty")
|
|
}
|
|
/// Like slice's `split_first` except we know that it always returns Some
|
|
fn split_first_seg(&self) -> (IStr, &[IStr]) {
|
|
let (foot, torso) = self.as_slice().split_last().expect("NameLike never empty");
|
|
(foot.clone(), torso)
|
|
}
|
|
/// Like slice's `split_last` except we know that it always returns Some
|
|
fn split_last_seg(&self) -> (IStr, &[IStr]) {
|
|
let (foot, torso) = self.as_slice().split_last().expect("NameLike never empty");
|
|
(foot.clone(), torso)
|
|
}
|
|
/// Get the first element
|
|
fn first_seg(&self) -> IStr { self.split_first_seg().0 }
|
|
/// Get the last element
|
|
fn last_seg(&self) -> IStr { self.split_last_seg().0 }
|
|
}
|
|
|
|
impl NameLike for Sym {}
|
|
impl NameLike for VName {}
|
|
|
|
/// Create a [Sym] literal.
|
|
///
|
|
/// The name and its components will be cached in a thread-local static so
|
|
/// that subsequent executions of the expression only incur an Arc-clone for
|
|
/// cloning the token.
|
|
#[macro_export]
|
|
macro_rules! sym {
|
|
($seg1:tt $( :: $seg:tt)*) => {
|
|
$crate::tl_cache!(async $crate::name::Sym : {
|
|
$crate::name::Sym::from_tok(
|
|
$crate::interner::iv(&[
|
|
$crate::interner::is($crate::sym!(@SEG $seg1)).await
|
|
$( , $crate::interner::is($crate::sym!(@SEG $seg)).await )*
|
|
])
|
|
.await
|
|
).unwrap()
|
|
})
|
|
};
|
|
(@SEG [ $($data:tt)* ]) => {
|
|
stringify!($($data)*)
|
|
};
|
|
(@SEG $data:tt) => {
|
|
stringify!($data)
|
|
};
|
|
}
|
|
|
|
/// Create a [VName] literal.
|
|
///
|
|
/// The components are interned much like in [sym].
|
|
#[macro_export]
|
|
macro_rules! vname {
|
|
($seg1:tt $( :: $seg:tt)*) => {
|
|
$crate::tl_cache!(async $crate::name::VName : {
|
|
$crate::name::VName::new([
|
|
$crate::interner::is(stringify!($seg1)).await
|
|
$( , $crate::interner::is(stringify!($seg)).await )*
|
|
]).unwrap()
|
|
})
|
|
};
|
|
}
|
|
|
|
/// Create a [VPath] literal.
|
|
///
|
|
/// The components are interned much like in [sym].
|
|
#[macro_export]
|
|
macro_rules! vpath {
|
|
($seg1:tt $( :: $seg:tt)*) => {
|
|
$crate::tl_cache!(async $crate::name::VPath : {
|
|
$crate::name::VPath(vec![
|
|
$crate::interner::is(stringify!($seg1)).await
|
|
$( , $crate::interner::is(stringify!($seg)).await )*
|
|
])
|
|
})
|
|
};
|
|
() => {
|
|
$crate::name::VPath(vec![])
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
pub mod test {
|
|
use std::borrow::Borrow;
|
|
|
|
use orchid_api_traits::spin_on;
|
|
|
|
use super::{NameLike, Sym, VName};
|
|
use crate::interner::local_interner::local_interner;
|
|
use crate::interner::{IStr, is, with_interner};
|
|
use crate::name::VPath;
|
|
|
|
#[test]
|
|
pub fn recur() {
|
|
spin_on(with_interner(local_interner(), async {
|
|
let myname = vname!(foo::bar);
|
|
let _borrowed_slice: &[IStr] = myname.borrow();
|
|
let _deref_pathslice: &[IStr] = &myname;
|
|
let _as_slice_out: &[IStr] = myname.as_slice();
|
|
}))
|
|
}
|
|
|
|
/// Tests that literals are correctly interned as equal
|
|
#[test]
|
|
pub fn literals() {
|
|
spin_on(with_interner(local_interner(), async {
|
|
assert_eq!(
|
|
sym!(foo::bar::baz),
|
|
Sym::new([is("foo").await, is("bar").await, is("baz").await]).await.unwrap()
|
|
);
|
|
assert_eq!(
|
|
sym!(foo::bar::[|>]),
|
|
Sym::new([is("foo").await, is("bar").await, is("|>").await]).await.unwrap()
|
|
);
|
|
assert_eq!(
|
|
vname!(foo::bar::baz),
|
|
VName::new([is("foo").await, is("bar").await, is("baz").await]).unwrap()
|
|
);
|
|
assert_eq!(
|
|
{ vpath!(foo::bar::baz) },
|
|
VPath::new([is("foo").await, is("bar").await, is("baz").await])
|
|
);
|
|
}))
|
|
}
|
|
}
|