use std::borrow::Borrow; use std::hash::BuildHasher as _; use std::num::NonZeroU64; use std::ops::{Deref, DerefMut}; use std::sync::{atomic, Arc, Mutex, MutexGuard}; use std::{fmt, hash, mem}; use hashbrown::{HashMap, HashSet}; use itertools::Itertools as _; use orchid_api_traits::{Decode, Encode, Request}; use crate::api; use orchid_api_traits::{ApiEquiv, FromApi, ToApi}; use crate::reqnot::{DynRequester, Requester}; /// Clippy crashes while verifying `Tok: Sized` without this and I cba to create /// a minimal example #[derive(Clone)] struct ForceSized(T); #[derive(Clone)] pub struct Tok { data: Arc, marker: ForceSized, } impl Tok { pub fn new(data: Arc, marker: T::Marker) -> Self { Self { data, marker: ForceSized(marker) } } pub fn marker(&self) -> T::Marker { self.marker.0 } pub fn arc(&self) -> Arc { self.data.clone() } } impl Deref for Tok { type Target = T; fn deref(&self) -> &Self::Target { self.data.as_ref() } } impl Ord for Tok { fn cmp(&self, other: &Self) -> std::cmp::Ordering { self.marker().cmp(&other.marker()) } } impl PartialOrd for Tok { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Eq for Tok {} impl PartialEq for Tok { fn eq(&self, other: &Self) -> bool { self.cmp(other).is_eq() } } impl hash::Hash for Tok { fn hash(&self, state: &mut H) { self.marker().hash(state) } } impl fmt::Display for Tok { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", &*self.data) } } impl fmt::Debug for Tok { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!(f, "Token({} -> {:?})", self.marker().get_id(), self.data.as_ref()) } } impl Encode for Tok { fn encode(&self, write: &mut W) { self.data.encode(write) } } impl Decode for Tok { fn decode(read: &mut R) -> Self { intern(&T::decode(read)) } } pub trait Interned: Eq + hash::Hash + Clone + fmt::Debug + Internable { type Marker: InternMarker + Sized; fn intern( self: Arc, req: &(impl DynRequester + ?Sized), ) -> Self::Marker; fn bimap(interner: &mut TypedInterners) -> &mut Bimap; } pub trait Internable: fmt::Debug { type Interned: Interned; fn get_owned(&self) -> Arc; } pub trait InternMarker: Copy + PartialEq + Eq + PartialOrd + Ord + hash::Hash + Sized { type Interned: Interned; fn resolve( self, req: &(impl DynRequester + ?Sized), ) -> Tok; fn get_id(self) -> NonZeroU64; fn from_id(id: NonZeroU64) -> Self; } impl Interned for String { type Marker = api::TStr; fn intern( self: Arc, req: &(impl DynRequester + ?Sized), ) -> Self::Marker { req.request(api::InternStr(self)) } fn bimap(interners: &mut TypedInterners) -> &mut Bimap { &mut interners.strings } } impl InternMarker for api::TStr { type Interned = String; fn resolve( self, req: &(impl DynRequester + ?Sized), ) -> Tok { Tok::new(req.request(api::ExternStr(self)), self) } fn get_id(self) -> NonZeroU64 { self.0 } fn from_id(id: NonZeroU64) -> Self { Self(id) } } impl Internable for str { type Interned = String; fn get_owned(&self) -> Arc { Arc::new(self.to_string()) } } impl Internable for String { type Interned = String; fn get_owned(&self) -> Arc { Arc::new(self.to_string()) } } impl ApiEquiv for Tok { type Api = api::TStr; } impl ToApi for Tok { type Ctx = (); fn to_api(&self, _: &mut Self::Ctx) -> Self::Api { self.marker() } } impl FromApi for Tok { type Ctx = (); fn from_api(api: &Self::Api, _: &mut Self::Ctx) -> Self { deintern(*api) } } impl Interned for Vec> { type Marker = api::TStrv; fn intern( self: Arc, req: &(impl DynRequester + ?Sized), ) -> Self::Marker { req.request(api::InternStrv(Arc::new(self.iter().map(|t| t.marker()).collect()))) } fn bimap(interners: &mut TypedInterners) -> &mut Bimap { &mut interners.vecs } } impl InternMarker for api::TStrv { type Interned = Vec>; fn resolve( self, req: &(impl DynRequester + ?Sized), ) -> Tok { let data = Arc::new(req.request(api::ExternStrv(self)).iter().map(|m| deintern(*m)).collect_vec()); Tok::new(data, self) } fn get_id(self) -> NonZeroU64 { self.0 } fn from_id(id: NonZeroU64) -> Self { Self(id) } } impl Internable for [Tok] { type Interned = Vec>; fn get_owned(&self) -> Arc { Arc::new(self.to_vec()) } } impl Internable for Vec> { type Interned = Vec>; fn get_owned(&self) -> Arc { Arc::new(self.to_vec()) } } impl Internable for Vec { type Interned = Vec>; fn get_owned(&self) -> Arc { Arc::new(self.iter().map(|ts| deintern(*ts)).collect()) } } impl Internable for [api::TStr] { type Interned = Vec>; fn get_owned(&self) -> Arc { Arc::new(self.iter().map(|ts| deintern(*ts)).collect()) } } impl ApiEquiv for Tok>> { type Api = api::TStrv; } impl ToApi for Tok>> { type Ctx = (); fn to_api(&self, _: &mut Self::Ctx) -> Self::Api { self.marker() } } impl FromApi for Tok>> { type Ctx = (); fn from_api(api: &Self::Api, _: &mut Self::Ctx) -> Self { deintern(*api) } } /// The number of references held to any token by the interner. const BASE_RC: usize = 3; #[test] fn base_rc_correct() { let tok = Tok::new(Arc::new("foo".to_string()), api::TStr(1.try_into().unwrap())); let mut bimap = Bimap::default(); bimap.insert(tok.clone()); assert_eq!(Arc::strong_count(&tok.data), BASE_RC + 1, "the bimap plus the current instance"); } pub struct Bimap { intern: HashMap, Tok>, by_id: HashMap>, } impl Bimap { pub fn insert(&mut self, token: Tok) { self.intern.insert(token.data.clone(), token.clone()); self.by_id.insert(token.marker(), token); } pub fn by_marker(&self, marker: T::Marker) -> Option> { self.by_id.get(&marker).cloned() } pub fn by_value(&self, q: &Q) -> Option> where T: Borrow { (self.intern.raw_entry()) .from_hash(self.intern.hasher().hash_one(q), |k| k.as_ref().borrow() == q) .map(|p| p.1.clone()) } pub fn sweep_replica(&mut self) -> Vec { (self.intern) .extract_if(|k, _| Arc::strong_count(k) == BASE_RC) .map(|(_, v)| { self.by_id.remove(&v.marker()); v.marker() }) .collect() } pub fn sweep_master(&mut self, retained: HashSet) { self.intern.retain(|k, v| BASE_RC < Arc::strong_count(k) || retained.contains(&v.marker())) } } impl Default for Bimap { fn default() -> Self { Self { by_id: HashMap::new(), intern: HashMap::new() } } } pub trait UpComm { fn up(&self, req: R) -> R::Response; } #[derive(Default)] pub struct TypedInterners { strings: Bimap, vecs: Bimap>>, } #[derive(Default)] pub struct Interner { interners: TypedInterners, master: Option>>, } static ID: atomic::AtomicU64 = atomic::AtomicU64::new(1); static INTERNER: Mutex> = Mutex::new(None); pub fn interner() -> impl DerefMut { struct G(MutexGuard<'static, Option>); impl Deref for G { type Target = Interner; fn deref(&self) -> &Self::Target { self.0.as_ref().expect("Guard pre-initialized") } } impl DerefMut for G { fn deref_mut(&mut self) -> &mut Self::Target { self.0.as_mut().expect("Guard pre-iniitialized") } } let mut g = INTERNER.lock().unwrap(); g.get_or_insert_with(Interner::default); G(g) } pub fn init_replica(req: impl DynRequester + 'static) { let mut g = INTERNER.lock().unwrap(); assert!(g.is_none(), "Attempted to initialize replica interner after first use"); *g = Some(Interner { master: Some(Box::new(req)), interners: TypedInterners { strings: Bimap::default(), vecs: Bimap::default() }, }) } pub fn intern(t: &(impl Internable + ?Sized)) -> Tok { let data = t.get_owned(); let mut g = interner(); let job = format!("{t:?} in {}", if g.master.is_some() { "replica" } else { "master" }); eprintln!("Interning {job}"); let typed = T::bimap(&mut g.interners); if let Some(tok) = typed.by_value(&data) { return tok; } let marker = match &mut g.master { Some(c) => data.clone().intern(&**c), None => T::Marker::from_id(NonZeroU64::new(ID.fetch_add(1, atomic::Ordering::Relaxed)).unwrap()), }; let tok = Tok::new(data, marker); T::bimap(&mut g.interners).insert(tok.clone()); mem::drop(g); eprintln!("Interned {job}"); tok } pub fn deintern(marker: M) -> Tok { let mut g = interner(); if let Some(tok) = M::Interned::bimap(&mut g.interners).by_marker(marker) { return tok; } let master = g.master.as_mut().expect("ID not in local interner and this is master"); let token = marker.resolve(&**master); M::Interned::bimap(&mut g.interners).insert(token.clone()); token } pub fn merge_retained(into: &mut api::Retained, from: &api::Retained) { into.strings = into.strings.iter().chain(&from.strings).copied().unique().collect(); into.vecs = into.vecs.iter().chain(&from.vecs).copied().unique().collect(); } pub fn sweep_replica() -> api::Retained { let mut g = interner(); assert!(g.master.is_some(), "Not a replica"); api::Retained { strings: g.interners.strings.sweep_replica(), vecs: g.interners.vecs.sweep_replica(), } } /// Create a thread-local token instance and copy it. This ensures that the /// interner will only be called the first time the expresion is executed, /// and subsequent calls will just copy the token. Accepts a single static /// expression (i.e. a literal). #[macro_export] macro_rules! intern { ($ty:ty : $expr:expr) => {{ thread_local! { static VALUE: $crate::interner::Tok<<$ty as $crate::interner::Internable>::Interned> = $crate::interner::intern::< <$ty as $crate::interner::Internable>::Interned >($expr as &$ty); } VALUE.with(|v| v.clone()) }}; } pub fn sweep_master(retained: api::Retained) { let mut g = interner(); assert!(g.master.is_none(), "Not master"); g.interners.strings.sweep_master(retained.strings.into_iter().collect()); g.interners.vecs.sweep_master(retained.vecs.into_iter().collect()); } #[cfg(test)] mod test { use std::num::NonZero; use orchid_api_traits::{enc_vec, Decode}; use super::*; use crate::api; #[test] fn test_i() { let _: Tok = intern!(str: "foo"); let _: Tok>> = intern!([Tok]: &[ intern!(str: "bar"), intern!(str: "baz") ]); } #[test] fn test_coding() { let coded = api::TStr(NonZero::new(3u64).unwrap()); let mut enc = &enc_vec(&coded)[..]; api::TStr::decode(&mut enc); assert_eq!(enc, [], "Did not consume all of {enc:?}") } }