Files
orchid/orchid-base/src/interner.rs
Lawrence Bethlenfalvy 32d6237dc5 task_local context over context objects
- interner impls logically separate from API in orchid-base (default host interner still in base for testing)
- error reporting, logging, and a variety of other features passed down via context in extension, not yet in host to maintain library-ish profile, should consider options
- no global spawn mechanic, the host has a spawn function but extensions only get a stash for enqueuing async work in sync callbacks which is then explicitly, manually, and with strict order popped and awaited
- still deadlocks nondeterministically for some ungodly reason
2026-01-01 14:54:29 +00:00

383 lines
12 KiB
Rust

use std::fmt::{Debug, Display};
use std::future::Future;
use std::hash::Hash;
use std::ops::Deref;
use std::rc::Rc;
use std::{fmt, hash};
use futures::future::LocalBoxFuture;
use task_local::task_local;
use crate::api;
pub trait IStrHandle: AsRef<str> {
fn rc(&self) -> Rc<String>;
}
pub trait IStrvHandle: AsRef<[IStr]> {
fn rc(&self) -> Rc<Vec<IStr>>;
}
#[derive(Clone)]
pub struct IStr(pub api::TStr, pub Rc<dyn IStrHandle>);
impl IStr {
/// Obtain a unique ID for this interned data.
///
/// NOTICE: the ID is guaranteed to be the same for any interned instance of
/// the same value only as long as at least one instance exists. If a value is
/// no longer interned, the interner is free to forget about it.
pub fn to_api(&self) -> api::TStr { self.0 }
pub fn rc(&self) -> Rc<String> { self.1.rc() }
}
impl Deref for IStr {
type Target = str;
fn deref(&self) -> &Self::Target { self.1.as_ref().as_ref() }
}
impl Eq for IStr {}
impl PartialEq for IStr {
fn eq(&self, other: &Self) -> bool { self.0 == other.0 }
}
impl Hash for IStr {
fn hash<H: hash::Hasher>(&self, state: &mut H) { self.0.hash(state) }
}
impl Display for IStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "{}", self.deref()) }
}
impl Debug for IStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "IStr({self}") }
}
#[derive(Clone)]
pub struct IStrv(pub api::TStrv, pub Rc<dyn IStrvHandle>);
impl IStrv {
/// Obtain a unique ID for this interned data.
///
/// NOTICE: the ID is guaranteed to be the same for any interned instance of
/// the same value only as long as at least one instance exists. If a value is
/// no longer interned, the interner is free to forget about it.
pub fn to_api(&self) -> api::TStrv { self.0 }
pub fn rc(&self) -> Rc<Vec<IStr>> { self.1.rc() }
}
impl Deref for IStrv {
type Target = [IStr];
fn deref(&self) -> &Self::Target { self.1.as_ref().as_ref() }
}
impl Eq for IStrv {}
impl PartialEq for IStrv {
fn eq(&self, other: &Self) -> bool { self.0 == other.0 }
}
impl Hash for IStrv {
fn hash<H: hash::Hasher>(&self, state: &mut H) { self.0.0.hash(state) }
}
impl Display for IStrv {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut iter = self.deref().iter();
match iter.next() {
None => return Ok(()),
Some(s) => write!(f, "{s}")?,
}
for s in iter {
write!(f, "::{s}")?
}
Ok(())
}
}
impl Debug for IStrv {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "IStrv({self})") }
}
pub trait InternerSrv {
fn is<'a>(&'a self, v: &'a str) -> LocalBoxFuture<'a, IStr>;
fn es(&self, t: api::TStr) -> LocalBoxFuture<'_, IStr>;
fn iv<'a>(&'a self, v: &'a [IStr]) -> LocalBoxFuture<'a, IStrv>;
fn ev(&self, t: api::TStrv) -> LocalBoxFuture<'_, IStrv>;
}
task_local! {
static INTERNER: Rc<dyn InternerSrv>;
}
pub async fn with_interner<F: Future>(val: Rc<dyn InternerSrv>, fut: F) -> F::Output {
INTERNER.scope(val, fut).await
}
fn get_interner() -> Rc<dyn InternerSrv> {
INTERNER.try_with(|i| i.clone()).expect("Interner not initialized")
}
pub async fn is(v: &str) -> IStr { get_interner().is(v).await }
pub async fn iv(v: &[IStr]) -> IStrv { get_interner().iv(v).await }
pub async fn es(v: api::TStr) -> IStr { get_interner().es(v).await }
pub async fn ev(v: api::TStrv) -> IStrv { get_interner().ev(v).await }
pub mod local_interner {
use std::borrow::Borrow;
use std::cell::RefCell;
use std::fmt::Debug;
use std::future;
use std::hash::{BuildHasher, Hash};
use std::num::NonZeroU64;
use std::rc::{Rc, Weak};
use futures::future::LocalBoxFuture;
use hashbrown::hash_table::{Entry, OccupiedEntry, VacantEntry};
use hashbrown::{DefaultHashBuilder, HashTable};
use orchid_api_traits::Coding;
use super::{IStr, IStrHandle, IStrv, IStrvHandle, InternerSrv};
use crate::api;
/// Associated types and methods for parallel concepts between scalar and
/// vector interning
pub trait InternableCard: 'static + Sized + Default + Debug {
/// API representation of an interner key
type Token: Clone + Copy + Debug + Hash + Eq + PartialOrd + Ord + Coding + 'static;
/// Owned version of interned value physically held by `'static` interner
/// and token
type Data: 'static + Borrow<Self::Borrow> + Eq + Hash + Debug;
/// Borrowed version of interned value placed in intern queries to avoid a
/// copy
type Borrow: ToOwned<Owned = Self::Data> + ?Sized + Eq + Hash + Debug;
/// Smart object handed out by the interner for storage and comparison in
/// third party code. [IStr] or [IStrv]
type Interned: Clone + Debug;
/// Create smart object from token for fast comparison and a handle for
/// everything else incl. virtual drop
fn new_interned(token: Self::Token, handle: Rc<Handle<Self>>) -> Self::Interned;
}
#[derive(Default, Debug)]
pub struct StrBranch;
impl InternableCard for StrBranch {
type Data = String;
type Token = api::TStr;
type Borrow = str;
type Interned = IStr;
fn new_interned(t: Self::Token, h: Rc<Handle<Self>>) -> Self::Interned { IStr(t, h) }
}
#[derive(Default, Debug)]
pub struct StrvBranch;
impl InternableCard for StrvBranch {
type Data = Vec<IStr>;
type Token = api::TStrv;
type Borrow = [IStr];
type Interned = IStrv;
fn new_interned(t: Self::Token, h: Rc<Handle<Self>>) -> Self::Interned { IStrv(t, h) }
}
/// Pairs interned data with its internment key
#[derive(Debug)]
struct Data<B: InternableCard> {
token: B::Token,
data: Rc<B::Data>,
}
impl<B: InternableCard> Clone for Data<B> {
fn clone(&self) -> Self { Self { token: self.token, data: self.data.clone() } }
}
/// Implementor for the trait objects held by [IStr] and [IStrv]
pub struct Handle<B: InternableCard> {
data: Data<B>,
parent: Weak<RefCell<IntData<B>>>,
}
impl IStrHandle for Handle<StrBranch> {
fn rc(&self) -> Rc<String> { self.data.data.clone() }
}
impl AsRef<str> for Handle<StrBranch> {
fn as_ref(&self) -> &str { self.data.data.as_ref().as_ref() }
}
impl IStrvHandle for Handle<StrvBranch> {
fn rc(&self) -> Rc<Vec<IStr>> { self.data.data.clone() }
}
impl AsRef<[IStr]> for Handle<StrvBranch> {
fn as_ref(&self) -> &[IStr] { self.data.data.as_ref().as_ref() }
}
impl<B: InternableCard> Drop for Handle<B> {
fn drop(&mut self) {
let Some(parent) = self.parent.upgrade() else { return };
if let Entry::Occupied(ent) =
parent.borrow_mut().entry_by_data(self.data.data.as_ref().borrow())
{
ent.remove();
}
if let Entry::Occupied(ent) = parent.borrow_mut().entry_by_tok(self.data.token) {
ent.remove();
}
}
}
/// Information retained about an interned token indexed both by key and
/// value.
struct Rec<B: InternableCard> {
/// This reference is weak, but the [Drop] handler of [Handle] removes all
/// [Rec]s from the interner so it is guaranteed to be live.
handle: Weak<Handle<B>>,
/// Keys for indexing from either table
data: Data<B>,
}
/// Read data from an occupied entry in an interner. The equivalent insert
/// command is [insert]
fn read<B: InternableCard>(entry: OccupiedEntry<'_, Rec<B>>) -> B::Interned {
let hand = entry.get().handle.upgrade().expect("Found entry but handle already dropped");
B::new_interned(entry.get().data.token, hand)
}
/// Insert some data into an entry borrowed from this same interner.
/// The equivalent read command is [read]
fn insert<B: InternableCard>(entry: VacantEntry<'_, Rec<B>>, handle: Rc<Handle<B>>) {
entry.insert(Rec { data: handle.data.clone(), handle: Rc::downgrade(&handle) });
}
#[derive(Default)]
struct IntData<B: InternableCard> {
by_tok: HashTable<Rec<B>>,
by_data: HashTable<Rec<B>>,
hasher: DefaultHashBuilder,
}
impl<B: InternableCard> IntData<B> {
fn entry_by_data(&mut self, query: &B::Borrow) -> Entry<'_, Rec<B>> {
self.by_data.entry(
self.hasher.hash_one(query),
|rec| rec.data.data.as_ref().borrow() == query,
|rec| self.hasher.hash_one(rec.data.data.as_ref().borrow()),
)
}
fn entry_by_tok(&mut self, token: B::Token) -> Entry<'_, Rec<B>> {
self.by_tok.entry(
self.hasher.hash_one(token),
|rec| rec.data.token == token,
|rec| self.hasher.hash_one(rec.data.token),
)
}
}
/// Failing intern command that can be recovered if the value is found
/// elsewhere
pub struct InternError<'a, B: InternableCard> {
int: &'a Int<B>,
query: &'a B::Borrow,
}
impl<B: InternableCard> InternError<'_, B> {
/// If a racing write populates the entry, the continuation returns that
/// value and discards its argument
pub fn set_if_empty(self, token: B::Token) -> B::Interned {
let mut int_data = self.int.0.borrow_mut();
match int_data.entry_by_data(self.query) {
Entry::Occupied(ent) => read(ent),
Entry::Vacant(ent) => {
let hand = self.int.mk_handle(Data { token, data: Rc::new(self.query.to_owned()) });
insert(ent, hand.clone());
let Entry::Vacant(other_ent) = int_data.entry_by_tok(token) else {
panic!("Data and key tables out of sync")
};
insert(other_ent, hand.clone());
B::new_interned(token, hand)
},
}
}
}
impl<B: InternableCard> Debug for InternError<'_, B> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("InternEntry").field(&self.query).finish()
}
}
/// Failing extern command that can be recovered if the value is found
/// elsewhere
pub struct ExternError<'a, B: InternableCard> {
int: &'a Int<B>,
token: B::Token,
}
impl<B: InternableCard> ExternError<'_, B> {
/// If a racing write populates the entry, the continuation returns that
/// value and discards its argument
pub fn set_if_empty(&self, data: Rc<B::Data>) -> B::Interned {
let mut int_data = self.int.0.borrow_mut();
match int_data.entry_by_tok(self.token) {
Entry::Occupied(ent) => read(ent),
Entry::Vacant(ent) => {
let hand = self.int.mk_handle(Data { token: self.token, data: data.clone() });
insert(ent, hand.clone());
let Entry::Vacant(other_ent) = int_data.entry_by_data(data.as_ref().borrow()) else {
panic!("Data and key tables out of sync")
};
insert(other_ent, hand.clone());
B::new_interned(self.token, hand)
},
}
}
}
impl<B: InternableCard> Debug for ExternError<'_, B> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("ExternEntry").field(&self.token).finish()
}
}
#[derive(Default)]
pub struct Int<B: InternableCard>(Rc<RefCell<IntData<B>>>);
impl<B: InternableCard> Int<B> {
fn mk_handle(&self, data: Data<B>) -> Rc<Handle<B>> {
Rc::new(Handle { data: data.clone(), parent: Rc::downgrade(&self.0.clone()) })
}
/// Look up by value, or yield to figure out its ID from elsewhere
pub fn i<'a>(&'a self, query: &'a B::Borrow) -> Result<B::Interned, InternError<'a, B>> {
if let Entry::Occupied(val) = self.0.borrow_mut().entry_by_data(query) {
return Ok(read(val));
}
Err(InternError { int: self, query })
}
/// Look up by key or yield to figure out its value from elsewhere
pub fn e(&self, token: B::Token) -> Result<B::Interned, ExternError<'_, B>> {
if let Entry::Occupied(ent) = self.0.borrow_mut().entry_by_tok(token) {
return Ok(read(ent));
}
Err(ExternError { int: self, token })
}
}
thread_local! {
static NEXT_ID: RefCell<u64> = 0.into();
}
fn with_new_id<T>(fun: impl FnOnce(NonZeroU64) -> T) -> T {
fun(
NonZeroU64::new(NEXT_ID.with_borrow_mut(|id| {
*id += 1;
*id
}))
.unwrap(),
)
}
#[derive(Default)]
struct LocalInterner {
str: Int<StrBranch>,
strv: Int<StrvBranch>,
}
impl InternerSrv for LocalInterner {
fn is<'a>(&'a self, v: &'a str) -> LocalBoxFuture<'a, IStr> {
match self.str.i(v) {
Ok(int) => Box::pin(future::ready(int)),
Err(e) => with_new_id(|id| Box::pin(future::ready(e.set_if_empty(api::TStr(id))))),
}
}
fn es(&self, t: api::TStr) -> LocalBoxFuture<'_, IStr> {
Box::pin(future::ready(self.str.e(t).expect("Unrecognized token cannot be externed")))
}
fn iv<'a>(&'a self, v: &'a [IStr]) -> LocalBoxFuture<'a, IStrv> {
match self.strv.i(v) {
Ok(int) => Box::pin(future::ready(int)),
Err(e) => with_new_id(|id| Box::pin(future::ready(e.set_if_empty(api::TStrv(id))))),
}
}
fn ev(&self, t: orchid_api::TStrv) -> LocalBoxFuture<'_, IStrv> {
Box::pin(future::ready(self.strv.e(t).expect("Unrecognized token cannot be externed")))
}
}
/// Creates a basic thread-local interner for testing and root role.
pub fn local_interner() -> Rc<dyn InternerSrv> { Rc::<LocalInterner>::default() }
}