use std::sync::{Mutex, Arc}; use std::num::NonZeroU32; use std::hash::Hash; use lasso::{Rodeo, Spur, Key}; use base64::{engine::general_purpose::STANDARD_NO_PAD as BASE64, Engine}; /// A token representing an interned string or sequence in an interner #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)] pub struct Token(pub Spur); /// An interner that can intern strings, and sequences of things it /// interned as long as they have the same rank pub trait Interner: Sync { fn str2tok(&self, str: &str) -> Token; fn tok2str(&self, token: Token) -> String; fn slc2tok(&self, slice: &[Token]) -> Token<{RANK + 1}>; fn tok2slc(&self, token: Token) -> Vec>; fn tok2strv(&self, token: Token>>) -> Vec { self.tok2slc(token).into_iter().map(|t| self.tok2str(t)).collect() } fn tokv2strv(&self, slice: &[Token]) -> Vec { slice.iter().map(|t| self.tok2str(*t)).collect() } /// Get the first token of a sequence fn head(&self, token: Token) -> Token<{RANK - 1}>; /// Returns the length of a sequence fn len(&self, token: Token) -> usize where Token<{RANK - 1}>: Clone; /// Returns the length of the longest identical prefix of the two sequences fn coprefix(&self, a: Token, b: Token) -> usize where Token<{RANK - 1}>: Clone; } fn serialize_seq(seq: &[Token]) -> String { let data: Vec = seq.iter() .map(|t| u32::from(t.0.into_inner()).to_le_bytes().into_iter()) .flatten() .collect(); BASE64.encode(data) } fn deserialize_seq(string: &str) -> Vec> { let data = BASE64.decode(string) .expect("String is not valid base64"); assert!(data.len() % 4 == 0, "Tokens should serialize to 3 bytes each"); data.array_chunks::<4>().map(|a| { let bytes = [a[0], a[1], a[2], a[3]]; let nz32 = NonZeroU32::new(u32::from_le_bytes(bytes)) .expect("Token representation should never be zero"); Token(Spur::try_from_usize(u32::from(nz32) as usize).unwrap()) }).collect() } /// An interner that delegates the actual work to Lasso #[derive(Clone)] pub struct LassoInterner { strings: Arc>, slices: Arc> } impl LassoInterner { /// Create an empty interner. Called to create the singleton. fn new() -> Self { Self{ slices: Arc::new(Mutex::new(Rodeo::new())), strings: Arc::new(Mutex::new(Rodeo::new())) } } } impl Interner for LassoInterner { fn str2tok(&self, str: &str) -> Token { let mut strings = self.strings.lock().unwrap(); let key = strings.get_or_intern(str); Token(key) } fn tok2str<'a>(&'a self, token: Token) -> String { let key = token.0; let strings = self.strings.lock().unwrap(); strings.resolve(&key).to_string() } fn slc2tok(&self, slice: &[Token]) -> Token<{RANK + 1}> { let data = serialize_seq(slice); let mut slices = self.slices.lock().unwrap(); let key = slices.get_or_intern(data); Token(key) } fn tok2slc<'a, const RANK: u8>(&'a self, token: Token) -> Vec> { let key = token.0; let slices = self.slices.lock().unwrap(); let string = slices.resolve(&key); deserialize_seq(string) } fn head(&self, token: Token) -> Token<{RANK - 1}> { let key = token.0; let slices = self.slices.lock().unwrap(); let string = slices.resolve(&key); deserialize_seq(&string[0..5])[0] } fn len(&self, token: Token) -> usize where Token<{RANK - 1}>: Clone { let key = token.0; let slices = self.slices.lock().unwrap(); let string = slices.resolve(&key); assert!(string.len() % 4 == 0, "Tokens should serialize to 3 characters"); string.len() / 4 } fn coprefix(&self, a: Token, b: Token) -> usize where Token<{RANK - 1}>: Clone { let keya = a.0; let keyb = b.0; let slices = self.slices.lock().unwrap(); let sa = slices.resolve(&keya); let sb = slices.resolve(&keyb); sa.bytes() .zip(sb.bytes()) .take_while(|(a, b)| a == b) .count() / 4 } } /// Create an interner that inherits the singleton's data, and /// block all future interaction with the singleton. /// /// DO NOT call within [dynamic] or elsewhere pre-main pub fn mk_interner() -> impl Interner { LassoInterner::new() } pub trait StringLike: Clone + Eq + Hash { fn into_str(self, i: &Interner) -> String; fn into_tok(self, i: &Interner) -> Token; } impl StringLike for String { fn into_str(self, _i: &Interner) -> String {self} fn into_tok(self, i: &Interner) -> Token {i.str2tok(&self)} } impl StringLike for Token { fn into_str(self, i: &Interner) -> String {i.tok2str(self)} fn into_tok(self, _i: &Interner) -> Token {self} } pub trait StringVLike: Clone + Eq + Hash { fn into_strv(self, i: &Interner) -> Vec; fn into_tok(self, i: &Interner) -> Token>>; fn into_tokv(self, i: &Interner) -> Vec>; } impl StringVLike for Vec { fn into_strv(self, _i: &Interner) -> Vec {self} fn into_tok(self, i: &Interner) -> Token>> { let tokv = self.into_iter() .map(|s| i.str2tok(&s)) .collect::>(); i.slc2tok(&tokv) } fn into_tokv(self, i: &Interner) -> Vec> { self.into_iter() .map(|s| i.str2tok(&s)) .collect() } } impl StringVLike for Vec> { fn into_strv(self, i: &Interner) -> Vec {i.tokv2strv(&self)} fn into_tok(self, i: &Interner) -> Token>> {i.slc2tok(&self)} fn into_tokv(self, _i: &Interner) -> Vec> {self} } impl StringVLike for Token>> { fn into_strv(self, i: &Interner) -> Vec {i.tok2strv(self)} fn into_tok(self, _i: &Interner) -> Token>> {self} fn into_tokv(self, i: &Interner) -> Vec> {i.tok2slc(self)} }