bug fixes and performance improvements

This commit is contained in:
2023-05-07 22:35:38 +01:00
parent f3ce910f66
commit a604e40bad
167 changed files with 5965 additions and 4229 deletions

53
src/interner/display.rs Normal file
View File

@@ -0,0 +1,53 @@
use core::fmt::Formatter;
use std::fmt::Display;
use crate::interner::Interner;
/// A variant of [std::fmt::Display] for objects that contain interned
/// strings and therefore can only be stringified in the presence of a
/// string interner
///
/// The functions defined here are suffixed to distinguish them from
/// the ones in Display and ToString respectively, because Rust can't
/// identify functions based on arity
pub trait InternedDisplay {
/// formats the value using the given formatter and string interner
fn fmt_i(&self,
f: &mut std::fmt::Formatter<'_>,
i: &Interner,
) -> std::fmt::Result;
/// Converts the value to a string to be displayed
fn to_string_i(&self, i: &Interner) -> String {
// Copied from <https://doc.rust-lang.org/src/alloc/string.rs.html#2526>
let mut buf = String::new();
let mut formatter = Formatter::new(&mut buf);
// Bypass format_args!() to avoid write_str with zero-length strs
Self::fmt_i(self, &mut formatter, i)
.expect("a Display implementation returned an error unexpectedly");
buf
}
fn bundle<'a>(&'a self, interner: &'a Interner)
-> DisplayBundle<'a, Self>
{
DisplayBundle { interner, data: self }
}
}
impl<T> InternedDisplay for T where T: Display {
fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, _i: &Interner) -> std::fmt::Result {
<Self as Display>::fmt(&self, f)
}
}
pub struct DisplayBundle<'a, T: InternedDisplay + ?Sized> {
interner: &'a Interner,
data: &'a T
}
impl<'a, T: InternedDisplay> Display for DisplayBundle<'a, T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.data.fmt_i(f, self.interner)
}
}

9
src/interner/mod.rs Normal file
View File

@@ -0,0 +1,9 @@
mod monotype;
mod multitype;
mod token;
mod display;
pub use monotype::TypedInterner;
pub use multitype::Interner;
pub use token::Token;
pub use display::{DisplayBundle, InternedDisplay};

120
src/interner/monotype.rs Normal file
View File

@@ -0,0 +1,120 @@
use std::num::NonZeroU32;
use std::cell::RefCell;
use std::borrow::Borrow;
use std::hash::{Hash, BuildHasher};
use hashbrown::HashMap;
use super::token::Token;
pub struct TypedInterner<T: 'static + Eq + Hash + Clone>{
tokens: RefCell<HashMap<&'static T, Token<T>>>,
values: RefCell<Vec<(&'static T, bool)>>
}
impl<T: Eq + Hash + Clone> TypedInterner<T> {
/// Create a fresh interner instance
pub fn new() -> Self {
Self {
tokens: RefCell::new(HashMap::new()),
values: RefCell::new(Vec::new())
}
}
/// Intern an object, returning a token
pub fn i<Q: ?Sized + Eq + Hash + ToOwned<Owned = T>>(&self, q: &Q)
-> Token<T> where T: Borrow<Q>
{
let mut tokens = self.tokens.borrow_mut();
let hash = compute_hash(tokens.hasher(), q);
let raw_entry = tokens.raw_entry_mut().from_hash(hash, |k| {
<T as Borrow<Q>>::borrow(k) == q
});
let kv = raw_entry.or_insert_with(|| {
let mut values = self.values.borrow_mut();
let uniq_key: NonZeroU32 = (values.len() as u32 + 1u32)
.try_into().expect("can never be zero");
let keybox = Box::new(q.to_owned());
let keyref = Box::leak(keybox);
values.push((keyref, true));
let token = Token::<T>::from_id(uniq_key);
(keyref, token)
});
*kv.1
}
/// Resolve a token, obtaining an object
/// It is illegal to use a token obtained from one interner with another.
pub fn r(&self, t: Token<T>) -> &T {
let values = self.values.borrow();
let key = t.into_usize() - 1;
values[key].0
}
/// Intern a static reference without allocating the data on the heap
#[allow(unused)]
pub fn intern_static(&self, tref: &'static T) -> Token<T> {
let mut tokens = self.tokens.borrow_mut();
let token = *tokens.raw_entry_mut().from_key(tref)
.or_insert_with(|| {
let mut values = self.values.borrow_mut();
let uniq_key: NonZeroU32 = (values.len() as u32 + 1u32)
.try_into().expect("can never be zero");
values.push((tref, false));
let token = Token::<T>::from_id(uniq_key);
(tref, token)
}).1;
token
}
}
// impl<T: Eq + Hash + Clone> TypedInterner<Vec<T>> {
// pub fn iv<Q>(&self, qs: &[Q]) -> Token<Vec<T>>
// where
// Q: Eq + Hash + ToOwned<Owned = T>,
// T: Borrow<Q>
// {
// let mut tokens = self.tokens.borrow_mut();
// let hash = compute_hash(tokens.hasher(), qs);
// let raw_entry = tokens.raw_entry_mut().from_hash(hash, |k| {
// k.iter().zip(qs.iter()).all(|(t, q)| t.borrow() == q)
// });
// let kv = raw_entry.or_insert_with(|| {
// let mut values = self.values.borrow_mut();
// let uniq_key: NonZeroU32 = (values.len() as u32 + 1u32)
// .try_into().expect("can never be zero");
// let tv = qs.iter().map(Q::to_owned).collect::<Vec<_>>();
// let keybox = Box::new(tv);
// let keyref = Box::leak(keybox);
// values.push((keyref, true));
// let token = Token::<Vec<T>>::from_id(uniq_key);
// (keyref, token)
// });
// *kv.1
// }
// }
impl<T: Eq + Hash + Clone> Drop for TypedInterner<T> {
fn drop(&mut self) {
// make sure all values leaked by us are dropped
// FIXME: with the new hashmap logic we can actually store Rc-s
// which negates the need for unsafe here
let mut values = self.values.borrow_mut();
for (item, owned) in values.drain(..) {
if !owned {continue}
unsafe {
Box::from_raw((item as *const T).cast_mut())
};
}
}
}
/// Helper function to compute hashes outside a hashmap
fn compute_hash(
hash_builder: &impl BuildHasher,
key: &(impl Hash + ?Sized)
) -> u64 {
use core::hash::Hasher;
let mut state = hash_builder.build_hasher();
key.hash(&mut state);
state.finish()
}

102
src/interner/multitype.rs Normal file
View File

@@ -0,0 +1,102 @@
use std::borrow::Borrow;
use std::cell::{RefCell, RefMut};
use std::any::{TypeId, Any};
use std::hash::Hash;
use std::rc::Rc;
use hashbrown::HashMap;
use super::monotype::TypedInterner;
use super::token::Token;
pub struct Interner {
interners: RefCell<HashMap<TypeId, Rc<dyn Any>>>,
}
impl Interner {
pub fn new() -> Self {
Self { interners: RefCell::new(HashMap::new()) }
}
pub fn i<Q: ?Sized>(&self, q: &Q) -> Token<Q::Owned>
where Q: Eq + Hash + ToOwned,
Q::Owned: 'static + Eq + Hash + Clone,
Q::Owned: Borrow<Q>
{
let mut interners = self.interners.borrow_mut();
let interner = get_interner(&mut interners);
interner.i(q)
}
pub fn r<T: 'static + Eq + Hash + Clone>(&self, t: Token<T>) -> &T {
let mut interners = self.interners.borrow_mut();
let interner = get_interner(&mut interners);
// TODO: figure this out
unsafe{ (interner.r(t) as *const T).as_ref().unwrap() }
}
/// Fully resolve
/// TODO: make this generic over containers
pub fn extern_vec<T: 'static + Eq + Hash + Clone>(&self,
t: Token<Vec<Token<T>>>
) -> Vec<T> {
let mut interners = self.interners.borrow_mut();
let v_int = get_interner(&mut interners);
let t_int = get_interner(&mut interners);
let v = v_int.r(t);
v.iter()
.map(|t| t_int.r(*t))
.cloned()
.collect()
}
pub fn extern_all<T: 'static + Eq + Hash + Clone>(&self,
s: &[Token<T>]
) -> Vec<T> {
s.iter()
.map(|t| self.r(*t))
.cloned()
.collect()
}
}
/// Get or create an interner for a given type.
fn get_interner<T: 'static + Eq + Hash + Clone>(
interners: &mut RefMut<HashMap<TypeId, Rc<dyn Any>>>
) -> Rc<TypedInterner<T>> {
let boxed = interners.raw_entry_mut().from_key(&TypeId::of::<T>())
.or_insert_with(|| (
TypeId::of::<T>(),
Rc::new(TypedInterner::<T>::new())
)).1.clone();
boxed.downcast().expect("the typeid is supposed to protect from this")
}
#[cfg(test)]
mod test {
use super::*;
#[test]
pub fn test_string() {
let interner = Interner::new();
let key1 = interner.i("foo");
let key2 = interner.i(&"foo".to_string());
assert_eq!(key1, key2)
}
#[test]
pub fn test_slice() {
let interner = Interner::new();
let key1 = interner.i(&vec![1, 2, 3]);
let key2 = interner.i(&[1, 2, 3][..]);
assert_eq!(key1, key2);
}
// #[test]
#[allow(unused)]
pub fn test_str_slice() {
let interner = Interner::new();
let key1 = interner.i(&vec!["a".to_string(), "b".to_string(), "c".to_string()]);
let key2 = interner.i(&["a", "b", "c"][..]);
// assert_eq!(key1, key2);
}
}

57
src/interner/token.rs Normal file
View File

@@ -0,0 +1,57 @@
use std::{num::NonZeroU32, marker::PhantomData};
use std::fmt::Debug;
use std::hash::Hash;
use std::cmp::PartialEq;
pub struct Token<T>{
id: NonZeroU32,
phantom_data: PhantomData<T>
}
impl<T> Token<T> {
pub fn from_id(id: NonZeroU32) -> Self {
Self { id, phantom_data: PhantomData }
}
pub fn into_id(self) -> NonZeroU32 {
self.id
}
pub fn into_usize(self) -> usize {
let zero: u32 = self.id.into();
zero as usize
}
}
impl<T> Debug for Token<T> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Token({})", self.id)
}
}
impl<T> Copy for Token<T> {}
impl<T> Clone for Token<T> {
fn clone(&self) -> Self {
Self{ id: self.id, phantom_data: PhantomData }
}
}
impl<T> Eq for Token<T> {}
impl<T> PartialEq for Token<T> {
fn eq(&self, other: &Self) -> bool { self.id == other.id }
}
impl<T> Ord for Token<T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.id.cmp(&other.id)
}
}
impl<T> PartialOrd for Token<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(&other))
}
}
impl<T> Hash for Token<T> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
state.write_u32(self.id.into())
}
}