forked from Orchid/orchid
Preparation for sharing
- rustfmt - clippy - comments - README
This commit is contained in:
@@ -6,13 +6,14 @@ use crate::interner::Interner;
|
||||
/// A variant of [std::fmt::Display] for objects that contain interned
|
||||
/// strings and therefore can only be stringified in the presence of a
|
||||
/// string interner
|
||||
///
|
||||
///
|
||||
/// The functions defined here are suffixed to distinguish them from
|
||||
/// the ones in Display and ToString respectively, because Rust can't
|
||||
/// identify functions based on arity
|
||||
pub trait InternedDisplay {
|
||||
/// formats the value using the given formatter and string interner
|
||||
fn fmt_i(&self,
|
||||
fn fmt_i(
|
||||
&self,
|
||||
f: &mut std::fmt::Formatter<'_>,
|
||||
i: &Interner,
|
||||
) -> std::fmt::Result;
|
||||
@@ -28,26 +29,31 @@ pub trait InternedDisplay {
|
||||
buf
|
||||
}
|
||||
|
||||
fn bundle<'a>(&'a self, interner: &'a Interner)
|
||||
-> DisplayBundle<'a, Self>
|
||||
{
|
||||
fn bundle<'a>(&'a self, interner: &'a Interner) -> DisplayBundle<'a, Self> {
|
||||
DisplayBundle { interner, data: self }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> InternedDisplay for T where T: Display {
|
||||
fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, _i: &Interner) -> std::fmt::Result {
|
||||
<Self as Display>::fmt(&self, f)
|
||||
impl<T> InternedDisplay for T
|
||||
where
|
||||
T: Display,
|
||||
{
|
||||
fn fmt_i(
|
||||
&self,
|
||||
f: &mut std::fmt::Formatter<'_>,
|
||||
_i: &Interner,
|
||||
) -> std::fmt::Result {
|
||||
<Self as Display>::fmt(self, f)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DisplayBundle<'a, T: InternedDisplay + ?Sized> {
|
||||
interner: &'a Interner,
|
||||
data: &'a T
|
||||
data: &'a T,
|
||||
}
|
||||
|
||||
impl<'a, T: InternedDisplay> Display for DisplayBundle<'a, T> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
self.data.fmt_i(f, self.interner)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,9 +1,21 @@
|
||||
mod display;
|
||||
mod monotype;
|
||||
mod multitype;
|
||||
mod token;
|
||||
mod display;
|
||||
|
||||
pub use display::{DisplayBundle, InternedDisplay};
|
||||
pub use monotype::TypedInterner;
|
||||
pub use multitype::Interner;
|
||||
pub use token::Token;
|
||||
pub use display::{DisplayBundle, InternedDisplay};
|
||||
pub use token::Tok;
|
||||
|
||||
/// A symbol, nsname, nname or namespaced name is a sequence of namespaces
|
||||
/// and an identifier. The [Vec] can never be empty.
|
||||
///
|
||||
/// Throughout different stages of processing, these names can be
|
||||
///
|
||||
/// - local names to be prefixed with the current module
|
||||
/// - imported names starting with a segment
|
||||
/// - ending a single import or
|
||||
/// - defined in one of the glob imported modules
|
||||
/// - absolute names
|
||||
pub type Sym = Tok<Vec<Tok<String>>>;
|
||||
|
||||
@@ -1,50 +1,54 @@
|
||||
use std::num::NonZeroU32;
|
||||
use std::cell::RefCell;
|
||||
use std::borrow::Borrow;
|
||||
use std::hash::{Hash, BuildHasher};
|
||||
use std::cell::RefCell;
|
||||
use std::hash::{BuildHasher, Hash};
|
||||
use std::num::NonZeroU32;
|
||||
|
||||
use hashbrown::HashMap;
|
||||
|
||||
use super::token::Token;
|
||||
use super::token::Tok;
|
||||
|
||||
pub struct TypedInterner<T: 'static + Eq + Hash + Clone>{
|
||||
tokens: RefCell<HashMap<&'static T, Token<T>>>,
|
||||
values: RefCell<Vec<(&'static T, bool)>>
|
||||
/// An interner for any type that implements [Borrow]. This is inspired by
|
||||
/// Lasso but much simpler, in part because not much can be known about the type.
|
||||
pub struct TypedInterner<T: 'static + Eq + Hash + Clone> {
|
||||
tokens: RefCell<HashMap<&'static T, Tok<T>>>,
|
||||
values: RefCell<Vec<(&'static T, bool)>>,
|
||||
}
|
||||
impl<T: Eq + Hash + Clone> TypedInterner<T> {
|
||||
/// Create a fresh interner instance
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
tokens: RefCell::new(HashMap::new()),
|
||||
values: RefCell::new(Vec::new())
|
||||
values: RefCell::new(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Intern an object, returning a token
|
||||
pub fn i<Q: ?Sized + Eq + Hash + ToOwned<Owned = T>>(&self, q: &Q)
|
||||
-> Token<T> where T: Borrow<Q>
|
||||
pub fn i<Q: ?Sized + Eq + Hash + ToOwned<Owned = T>>(&self, q: &Q) -> Tok<T>
|
||||
where
|
||||
T: Borrow<Q>,
|
||||
{
|
||||
let mut tokens = self.tokens.borrow_mut();
|
||||
let hash = compute_hash(tokens.hasher(), q);
|
||||
let raw_entry = tokens.raw_entry_mut().from_hash(hash, |k| {
|
||||
<T as Borrow<Q>>::borrow(k) == q
|
||||
});
|
||||
let raw_entry = tokens
|
||||
.raw_entry_mut()
|
||||
.from_hash(hash, |k| <T as Borrow<Q>>::borrow(k) == q);
|
||||
let kv = raw_entry.or_insert_with(|| {
|
||||
let mut values = self.values.borrow_mut();
|
||||
let uniq_key: NonZeroU32 = (values.len() as u32 + 1u32)
|
||||
.try_into().expect("can never be zero");
|
||||
let uniq_key: NonZeroU32 =
|
||||
(values.len() as u32 + 1u32).try_into().expect("can never be zero");
|
||||
let keybox = Box::new(q.to_owned());
|
||||
let keyref = Box::leak(keybox);
|
||||
values.push((keyref, true));
|
||||
let token = Token::<T>::from_id(uniq_key);
|
||||
let token = Tok::<T>::from_id(uniq_key);
|
||||
(keyref, token)
|
||||
});
|
||||
*kv.1
|
||||
}
|
||||
|
||||
/// Resolve a token, obtaining an object
|
||||
/// It is illegal to use a token obtained from one interner with another.
|
||||
pub fn r(&self, t: Token<T>) -> &T {
|
||||
/// It is illegal to use a token obtained from one interner with
|
||||
/// another.
|
||||
pub fn r(&self, t: Tok<T>) -> &T {
|
||||
let values = self.values.borrow();
|
||||
let key = t.into_usize() - 1;
|
||||
values[key].0
|
||||
@@ -52,17 +56,20 @@ impl<T: Eq + Hash + Clone> TypedInterner<T> {
|
||||
|
||||
/// Intern a static reference without allocating the data on the heap
|
||||
#[allow(unused)]
|
||||
pub fn intern_static(&self, tref: &'static T) -> Token<T> {
|
||||
pub fn intern_static(&self, tref: &'static T) -> Tok<T> {
|
||||
let mut tokens = self.tokens.borrow_mut();
|
||||
let token = *tokens.raw_entry_mut().from_key(tref)
|
||||
.or_insert_with(|| {
|
||||
let mut values = self.values.borrow_mut();
|
||||
let uniq_key: NonZeroU32 = (values.len() as u32 + 1u32)
|
||||
.try_into().expect("can never be zero");
|
||||
values.push((tref, false));
|
||||
let token = Token::<T>::from_id(uniq_key);
|
||||
(tref, token)
|
||||
}).1;
|
||||
let token = *tokens
|
||||
.raw_entry_mut()
|
||||
.from_key(tref)
|
||||
.or_insert_with(|| {
|
||||
let mut values = self.values.borrow_mut();
|
||||
let uniq_key: NonZeroU32 =
|
||||
(values.len() as u32 + 1u32).try_into().expect("can never be zero");
|
||||
values.push((tref, false));
|
||||
let token = Tok::<T>::from_id(uniq_key);
|
||||
(tref, token)
|
||||
})
|
||||
.1;
|
||||
token
|
||||
}
|
||||
}
|
||||
@@ -74,10 +81,10 @@ impl<T: Eq + Hash + Clone> Drop for TypedInterner<T> {
|
||||
// which negates the need for unsafe here
|
||||
let mut values = self.values.borrow_mut();
|
||||
for (item, owned) in values.drain(..) {
|
||||
if !owned {continue}
|
||||
unsafe {
|
||||
Box::from_raw((item as *const T).cast_mut())
|
||||
};
|
||||
if !owned {
|
||||
continue;
|
||||
}
|
||||
unsafe { Box::from_raw((item as *const T).cast_mut()) };
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -85,10 +92,10 @@ impl<T: Eq + Hash + Clone> Drop for TypedInterner<T> {
|
||||
/// Helper function to compute hashes outside a hashmap
|
||||
fn compute_hash(
|
||||
hash_builder: &impl BuildHasher,
|
||||
key: &(impl Hash + ?Sized)
|
||||
key: &(impl Hash + ?Sized),
|
||||
) -> u64 {
|
||||
use core::hash::Hasher;
|
||||
let mut state = hash_builder.build_hasher();
|
||||
key.hash(&mut state);
|
||||
state.finish()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,14 +1,17 @@
|
||||
use std::any::{Any, TypeId};
|
||||
use std::borrow::Borrow;
|
||||
use std::cell::{RefCell, RefMut};
|
||||
use std::any::{TypeId, Any};
|
||||
use std::hash::Hash;
|
||||
use std::rc::Rc;
|
||||
|
||||
use hashbrown::HashMap;
|
||||
|
||||
use super::monotype::TypedInterner;
|
||||
use super::token::Token;
|
||||
use super::token::Tok;
|
||||
|
||||
/// A collection of interners based on their type. Allows to intern any object
|
||||
/// that implements [ToOwned]. Objects of the same type are stored together in a
|
||||
/// [TypedInterner].
|
||||
pub struct Interner {
|
||||
interners: RefCell<HashMap<TypeId, Rc<dyn Any>>>,
|
||||
}
|
||||
@@ -17,56 +20,59 @@ impl Interner {
|
||||
Self { interners: RefCell::new(HashMap::new()) }
|
||||
}
|
||||
|
||||
pub fn i<Q: ?Sized + Eq + Hash + ToOwned>(&self, q: &Q)
|
||||
-> Token<Q::Owned>
|
||||
where Q::Owned: 'static + Eq + Hash + Clone + Borrow<Q>
|
||||
pub fn i<Q: ?Sized + Eq + Hash + ToOwned>(&self, q: &Q) -> Tok<Q::Owned>
|
||||
where
|
||||
Q::Owned: 'static + Eq + Hash + Clone + Borrow<Q>,
|
||||
{
|
||||
let mut interners = self.interners.borrow_mut();
|
||||
let interner = get_interner(&mut interners);
|
||||
interner.i(q)
|
||||
}
|
||||
|
||||
pub fn r<T: 'static + Eq + Hash + Clone>(&self, t: Token<T>) -> &T {
|
||||
pub fn r<T: 'static + Eq + Hash + Clone>(&self, t: Tok<T>) -> &T {
|
||||
let mut interners = self.interners.borrow_mut();
|
||||
let interner = get_interner(&mut interners);
|
||||
// TODO: figure this out
|
||||
unsafe{ (interner.r(t) as *const T).as_ref().unwrap() }
|
||||
unsafe { (interner.r(t) as *const T).as_ref().unwrap() }
|
||||
}
|
||||
|
||||
/// Fully resolve
|
||||
/// TODO: make this generic over containers
|
||||
pub fn extern_vec<T: 'static + Eq + Hash + Clone>(&self,
|
||||
t: Token<Vec<Token<T>>>
|
||||
pub fn extern_vec<T: 'static + Eq + Hash + Clone>(
|
||||
&self,
|
||||
t: Tok<Vec<Tok<T>>>,
|
||||
) -> Vec<T> {
|
||||
let mut interners = self.interners.borrow_mut();
|
||||
let v_int = get_interner(&mut interners);
|
||||
let t_int = get_interner(&mut interners);
|
||||
let v = v_int.r(t);
|
||||
v.iter()
|
||||
.map(|t| t_int.r(*t))
|
||||
.cloned()
|
||||
.collect()
|
||||
v.iter().map(|t| t_int.r(*t)).cloned().collect()
|
||||
}
|
||||
|
||||
pub fn extern_all<T: 'static + Eq + Hash + Clone>(&self,
|
||||
s: &[Token<T>]
|
||||
pub fn extern_all<T: 'static + Eq + Hash + Clone>(
|
||||
&self,
|
||||
s: &[Tok<T>],
|
||||
) -> Vec<T> {
|
||||
s.iter()
|
||||
.map(|t| self.r(*t))
|
||||
.cloned()
|
||||
.collect()
|
||||
s.iter().map(|t| self.r(*t)).cloned().collect()
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Interner {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Get or create an interner for a given type.
|
||||
fn get_interner<T: 'static + Eq + Hash + Clone>(
|
||||
interners: &mut RefMut<HashMap<TypeId, Rc<dyn Any>>>
|
||||
interners: &mut RefMut<HashMap<TypeId, Rc<dyn Any>>>,
|
||||
) -> Rc<TypedInterner<T>> {
|
||||
let boxed = interners.raw_entry_mut().from_key(&TypeId::of::<T>())
|
||||
.or_insert_with(|| (
|
||||
TypeId::of::<T>(),
|
||||
Rc::new(TypedInterner::<T>::new())
|
||||
)).1.clone();
|
||||
let boxed = interners
|
||||
.raw_entry_mut()
|
||||
.from_key(&TypeId::of::<T>())
|
||||
.or_insert_with(|| (TypeId::of::<T>(), Rc::new(TypedInterner::<T>::new())))
|
||||
.1
|
||||
.clone();
|
||||
boxed.downcast().expect("the typeid is supposed to protect from this")
|
||||
}
|
||||
|
||||
@@ -94,8 +100,9 @@ mod test {
|
||||
#[allow(unused)]
|
||||
pub fn test_str_slice() {
|
||||
let interner = Interner::new();
|
||||
let key1 = interner.i(&vec!["a".to_string(), "b".to_string(), "c".to_string()]);
|
||||
let key1 =
|
||||
interner.i(&vec!["a".to_string(), "b".to_string(), "c".to_string()]);
|
||||
let key2 = interner.i(&["a", "b", "c"][..]);
|
||||
// assert_eq!(key1, key2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,14 +1,18 @@
|
||||
use std::{num::NonZeroU32, marker::PhantomData};
|
||||
use std::cmp::PartialEq;
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
use std::marker::PhantomData;
|
||||
use std::num::NonZeroU32;
|
||||
|
||||
use std::cmp::PartialEq;
|
||||
|
||||
pub struct Token<T>{
|
||||
/// A number representing an object of type `T` stored in some interner. It is a
|
||||
/// logic error to compare tokens obtained from different interners, or to use a
|
||||
/// token with an interner other than the one that created it, but this is
|
||||
/// currently not enforced.
|
||||
pub struct Tok<T> {
|
||||
id: NonZeroU32,
|
||||
phantom_data: PhantomData<T>
|
||||
phantom_data: PhantomData<T>,
|
||||
}
|
||||
impl<T> Token<T> {
|
||||
impl<T> Tok<T> {
|
||||
pub fn from_id(id: NonZeroU32) -> Self {
|
||||
Self { id, phantom_data: PhantomData }
|
||||
}
|
||||
@@ -21,37 +25,39 @@ impl<T> Token<T> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Debug for Token<T> {
|
||||
impl<T> Debug for Tok<T> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "Token({})", self.id)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Copy for Token<T> {}
|
||||
impl<T> Clone for Token<T> {
|
||||
impl<T> Copy for Tok<T> {}
|
||||
impl<T> Clone for Tok<T> {
|
||||
fn clone(&self) -> Self {
|
||||
Self{ id: self.id, phantom_data: PhantomData }
|
||||
Self { id: self.id, phantom_data: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Eq for Token<T> {}
|
||||
impl<T> PartialEq for Token<T> {
|
||||
fn eq(&self, other: &Self) -> bool { self.id == other.id }
|
||||
impl<T> Eq for Tok<T> {}
|
||||
impl<T> PartialEq for Tok<T> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.id == other.id
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Ord for Token<T> {
|
||||
impl<T> Ord for Tok<T> {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.id.cmp(&other.id)
|
||||
}
|
||||
}
|
||||
impl<T> PartialOrd for Token<T> {
|
||||
impl<T> PartialOrd for Tok<T> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
Some(self.cmp(&other))
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Hash for Token<T> {
|
||||
impl<T> Hash for Tok<T> {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
state.write_u32(self.id.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user