backup commit

2022-10-24 03:16:04 +01:00
parent fbbd6ed256
commit 778c87db77
43 changed files with 1156 additions and 174 deletions
--- a/src/utils/bfs.rs
+++ b/src/utils/bfs.rs
@@ -0,0 +1,113 @@
+use std::collections::{VecDeque, HashSet};
+use std::iter;
+use std::hash::Hash;
+
+use crate::unwrap_or;
+use crate::utils::BoxedIter;
+
+/// Two-stage breadth-first search;
+/// Instead of enumerating neighbors before returning a node, it puts visited but not yet
+/// enumerated nodes in a separate queue and only enumerates them to refill the queue of children
+/// one by one once it's empty. This method is preferable for generated graphs because it doesn't
+/// allocate memory for the children until necessary, but it's also probably a bit slower since
+/// it involves additional processing.
+/// 
+/// # Performance
+/// `T` is cloned twice for each returned value. 
+pub fn bfs<T, F, I>(init: T, neighbors: F)
+-> impl Iterator<Item = T>
+where T: Eq + Hash + Clone + std::fmt::Debug,
+    F: Fn(T) -> I, I: Iterator<Item = T>
+{
+    let mut visited: HashSet<T> = HashSet::new();
+    let mut visit_queue: VecDeque<T> = VecDeque::from([init]);
+    let mut unpack_queue: VecDeque<T> = VecDeque::new();
+    iter::from_fn(move || {
+        let next = {loop {
+            let next = unwrap_or!(visit_queue.pop_front(); break None);
+            if !visited.contains(&next) { break Some(next) }
+        }}.or_else(|| loop {
+            let unpacked = unwrap_or!(unpack_queue.pop_front(); break None);
+            let mut nbv = neighbors(unpacked).filter(|t| !visited.contains(t));
+            if let Some(next) = nbv.next() {
+                visit_queue.extend(nbv);
+                break Some(next)
+            }
+        })?;
+        visited.insert(next.clone());
+        unpack_queue.push_back(next.clone());
+        Some(next)
+    })
+}
+
+/// Same as [bfs] but with a recursion depth limit
+/// 
+/// The main intent is to effectively walk infinite graphs of unknown breadth without making the
+/// recursion depth dependent on the number of nodes. If predictable runtime is more important
+/// than predictable depth, [bfs] with [std::iter::Iterator::take] should be used instead
+pub fn bfs_upto<'a, T: 'a, F: 'a, I: 'a>(init: T, neighbors: F, limit: usize)
+-> impl Iterator<Item = T> + 'a
+where T: Eq + Hash + Clone + std::fmt::Debug,
+    F: Fn(T) -> I, I: Iterator<Item = T>
+{
+    /// Newtype to store the recursion depth but exclude it from equality comparisons
+    /// Because BFS visits nodes in increasing distance order, when a node is visited for the
+    /// second time it will never override the earlier version of itself. This is not the case
+    /// with Djikstra's algorithm, which can be conceptualised as a "weighted BFS".
+    #[derive(Eq, Clone, Debug)]
+    struct Wrap<U>(usize, U);
+    impl<U: PartialEq> PartialEq for Wrap<U> {
+        fn eq(&self, other: &Self) -> bool { self.1.eq(&other.1) }
+    }
+    impl<U: Hash> Hash for Wrap<U> {
+        fn hash<H: std::hash::Hasher>(&self, state: &mut H) { self.1.hash(state) }
+    }
+    bfs(Wrap(0, init), move |Wrap(dist, t)| -> BoxedIter<Wrap<T>> { // boxed because we branch
+        if dist == limit {Box::new(iter::empty())}
+        else {Box::new(neighbors(t).map(move |t| Wrap(dist + 1, t)))}
+    }).map(|Wrap(_, t)| t)
+}
+
+#[cfg(test)]
+mod tests {
+    use itertools::Itertools;
+
+    use super::*;
+
+    type Graph = Vec<Vec<usize>>;
+    fn neighbors(graph: &Graph, pt: usize) -> impl Iterator<Item = usize> + '_ {
+        graph[pt].iter().copied()
+    }
+    fn from_neighborhood_matrix(matrix: Vec<Vec<usize>>) -> Graph {
+        matrix.into_iter().map(|v| {
+            v.into_iter().enumerate().filter_map(|(i, ent)| {
+                if ent > 1 {panic!("Neighborhood matrices must contain binary values")}
+                else if ent == 1 {Some(i)}
+                else {None}
+            }).collect()
+        }).collect()
+    }
+
+    #[test]
+    fn test_square() {
+        let simple_graph = from_neighborhood_matrix(vec![
+            vec![0,1,0,1,1,0,0,0],
+            vec![1,0,1,0,0,1,0,0],
+            vec![0,1,0,1,0,0,1,0],
+            vec![1,0,1,0,0,0,0,1],
+            vec![1,0,0,0,0,1,0,1],
+            vec![0,1,0,0,1,0,1,0],
+            vec![0,0,1,0,0,1,0,1],
+            vec![0,0,0,1,1,0,1,0],
+        ]);
+        let scan = bfs(0, |n| neighbors(&simple_graph, n)).collect_vec();
+        assert_eq!(scan, vec![0, 1, 3, 4, 2, 5, 7, 6])
+    }
+    #[test]
+    fn test_stringbuilder() {
+        let scan = bfs("".to_string(), |s| {
+                vec![s.clone()+";", s.clone()+"a", s+"aaa"].into_iter()
+        }).take(30).collect_vec();
+        println!("{scan:?}")
+    }
+}
--- a/src/utils/for_loop.rs
+++ b/src/utils/for_loop.rs
@@ -0,0 +1,91 @@
+/// Imitates a regular for loop with an exit clause using Rust's `loop` keyword.
+/// This macro brings the break value to all existing Rust loops, by allowing you to specify
+/// an exit expression in case the loop was broken by the condition and not an explicit `break`.
+/// 
+/// Since the exit expression can also be a block, this also allows you to execute other code when
+/// the condition fails. This can also be used to re-enter the loop with an explicit `continue`
+/// statement.
+/// 
+/// The macro also adds support for classic for loops familiar to everyone since C, except with
+/// the addition of an exit statement these too can be turned into expressions.
+/// 
+/// ```
+/// xloop!(for i in 0..10; {
+///     connection.try_connect()
+///     if connection.ready() {
+///         break Some(connection)
+///     }
+/// }; None)
+/// ```
+/// 
+/// While loop with reentry. This is a very convoluted example but displays the idea quite clearly.
+/// 
+/// ```
+/// xloop!(while socket.is_open(); {
+///     let (data, is_end) = socket.read();
+///     all_data.append(data)
+///     if is_end { break Ok(all_data) }
+/// }; {
+///     if let Ok(new_sock) = open_socket(socket.position()) {
+///         new_sock.set_position(socket.position());
+///         socket = new_sock;
+///         continue
+///     } else {
+///         Err(DownloadError::ConnectionLost)
+///     }
+/// })
+/// ```
+/// 
+/// CUDA algorythm for O(log n) summation using a C loop
+/// 
+/// ```
+/// xloop!(let mut leap = 1; own_id*2 + leap < batch_size; leap *= 2; {
+///     batch[own_id*2] += batch[own_id*2 + leap]
+/// })
+/// ```
+/// 
+/// The above loop isn't used as an expression, but an exit expression - or block - can be added
+/// to these as well just like the others. In all cases the exit expression is optional, its
+/// default value is `()`.
+/// 
+/// **todo** find a valid use case for While let for a demo
+#[macro_export]
+macro_rules! xloop {
+    (for $p:pat in $it:expr; $body:stmt) => {
+        xloop!(for $p in $it; $body; ())
+    };
+    (for $p:pat in $it:expr; $body:stmt; $exit:stmt) => {
+        {
+            let mut __xloop__ = $it.into_iter();
+            xloop!(let Some($p) = __xloop__.next(); $body; $exit)
+        }
+    };
+    (let $p:pat = $e:expr; $body:stmt) => {
+        xloop!(let $p = $e; $body; ())
+    };
+    (let $p:pat = $e:expr; $body:stmt; $exit:stmt) => {
+        {
+            loop {
+                if let $p = $e { $body }
+                else { break { $exit } }
+            }
+        }
+    };
+    (while $cond:expr; $body:stmt) => {
+        xloop!($cond; $body; ())
+    };
+    (while $cond:expr; $body:stmt; $exit:stmt) => {
+        {
+            loop {
+                if $cond { break { $exit } }
+                else { $body }
+            }
+        }
+    };
+    ($init:stmt; $cond:expr; $step:stmt; $body:stmt) => {
+        xloop!(for ( $init; $cond; $step ) $body; ())
+    };
+    ($init:stmt; $cond:expr; $step:stmt; $body:stmt; $exit:stmt) => {
+        { $init; xloop!(while !($cond); { $body; $step }; $exit) }
+    };
+}
--- a/src/utils/iter.rs
+++ b/src/utils/iter.rs
@@ -33,4 +33,4 @@ where
 pub fn into_boxed_iter<'a, T: 'a>(t: T) -> BoxedIter<'a, <T as IntoIterator>::Item>
 where T: IntoIterator {
    Box::new(t.into_iter())
-}
+}
--- a/src/utils/mod.rs
+++ b/src/utils/mod.rs
@@ -2,8 +2,13 @@ mod cache;
 mod substack;
 mod side;
 mod merge_sorted;
-mod unwrap_or_continue;
+mod unwrap_or;
 pub mod iter;
+mod bfs;
+mod unless_let;
+mod string_from_charset;
+mod for_loop;
+mod protomap;

 pub use cache::Cache;
 use mappable_rc::Mrc;
@@ -11,6 +16,7 @@ pub use substack::Stackframe;
 pub use side::Side;
 pub use merge_sorted::merge_sorted;
 pub use iter::BoxedIter;
+pub use string_from_charset::string_from_charset;

 pub fn mrc_derive<T: ?Sized, P, U: ?Sized>(m: &Mrc<T>, p: P) -> Mrc<U>
 where P: for<'a> FnOnce(&'a T) -> &'a U {
@@ -37,3 +43,31 @@ pub fn mrc_derive_slice<T>(mv: &Mrc<Vec<T>>) -> Mrc<[T]> {
 pub fn one_mrc_slice<T>(t: T) -> Mrc<[T]> {
    Mrc::map(Mrc::new([t; 1]), |v| v.as_slice())
 }
+
+pub fn mrc_to_iter<T>(ms: Mrc<[T]>) -> impl Iterator<Item = Mrc<T>> {
+    let mut i = 0;
+    std::iter::from_fn(move || if i < ms.len() {
+        let out = Some(mrc_derive(&ms, |s| &s[i]));
+        i += 1;
+        out
+    } else {None})
+}
+
+pub fn mrc_unnest<T>(m: &Mrc<Mrc<T>>) -> Mrc<T> {
+    Mrc::clone(m.as_ref())
+}
+
+pub fn mrc_slice_to_only<T>(m: Mrc<[T]>) -> Result<Mrc<T>, ()> {
+    Mrc::try_map(m, |slice| {
+        if slice.len() != 1 {None}
+        else {Some(&slice[0])}
+    }).map_err(|_| ())
+}
+
+pub fn mrc_slice_to_only_option<T>(m: Mrc<[T]>) -> Result<Option<Mrc<T>>, ()> {
+    if m.len() > 1 {return Err(())}
+    Ok(Mrc::try_map(m, |slice| {
+        if slice.len() == 0 {None}
+        else {Some(&slice[0])}
+    }).ok())
+}
--- a/src/utils/protomap.rs
+++ b/src/utils/protomap.rs
@@ -0,0 +1,152 @@
+use std::{iter, ops::{Index, Add}, borrow::Borrow};
+
+use smallvec::SmallVec;
+
+const INLINE_ENTRIES: usize = 2;
+
+/// Linked-array-list of key-value pairs.
+/// Lookup and modification is O(n + cachemiss * n / m)
+/// Can be extended by reference in O(m) < O(n)
+pub struct ProtoMap<'a, K, V> {
+    entries: SmallVec<[(K, Option<V>); INLINE_ENTRIES]>,
+    prototype: Option<&'a ProtoMap<'a, K, V>>
+}
+
+impl<'a, K, V> ProtoMap<'a, K, V> {
+    pub fn new() -> Self {
+        Self {
+            entries: SmallVec::new(),
+            prototype: None
+        }
+    }
+
+    /// Mutable reference to entry without checking proto in O(m)
+    fn local_entry_mut<'b, Q: ?Sized>(&'b mut self, query: &Q)
+    -> Option<(usize, &'b mut K, &'b mut Option<V>)>
+    where K: Borrow<Q>, Q: Eq
+    {
+        self.entries.iter_mut().enumerate().find_map(|(i, (k, v))| {
+            if query.eq((*k).borrow()) { Some((i, k, v)) } else { None }
+        })
+    }
+
+    /// Entry without checking proto in O(m)
+    fn local_entry<'b, Q: ?Sized>(&'b self, query: &Q)
+    -> Option<(usize, &'b K, &'b Option<V>)>
+    where K: Borrow<Q>, Q: Eq
+    {
+        self.entries.iter().enumerate().find_map(|(i, (k, v))| {
+            if query.eq((*k).borrow()) { Some((i, k, v)) } else { None }
+        })
+    }
+
+    /// Find entry in prototype chain in O(n)
+    pub fn get<'b, Q: ?Sized>(&'b self, query: &Q) -> Option<&'b V>
+    where K: Borrow<Q>, Q: Eq
+    {
+        if let Some((_, _, v)) = self.local_entry(query) {
+            v.as_ref()
+        } else {
+            self.prototype?.get(query)
+        }
+    }
+
+    /// Record a value for the given key in O(m)
+    pub fn set(&mut self, key: &K, value: V) where K: Eq + Clone {
+        if let Some((_, _, v)) = self.local_entry_mut(key) {
+            *v = Some(value);
+        } else {
+            self.entries.push((key.clone(), Some(value)))
+        }
+    }
+
+    /// Delete in a memory-efficient way in O(n)
+    pub fn delete_small(&mut self, key: &K) where K: Eq + Clone {
+        let exists_up = self.prototype.and_then(|p| p.get(key)).is_some();
+        let local_entry = self.local_entry_mut(key);
+        match (exists_up, local_entry) {
+            (false, None) => (), // nothing to do
+            (false, Some((i, _, _))) => { self.entries.remove(i); }, // forget locally
+            (true, Some((_, _, v))) => *v = None, // update local override to cover
+            (true, None) => self.entries.push((key.clone(), None)), // create new
+        }
+    }
+
+    /// Delete in O(m) without checking the prototype chain
+    /// May produce unnecessary cover over previously unknown key
+    pub fn delete_fast(&mut self, key: &K) where K: Eq + Clone {
+        if let Some((_, _, v)) = self.local_entry_mut(key) {
+            *v = None
+        } else {
+            self.entries.push((key.clone(), None))
+        }
+    }
+
+    /// Iterate over the values defined herein and on the prototype chain
+    /// Note that this will visit keys multiple times
+    pub fn iter(&self) -> impl Iterator<Item = &(K, Option<V>)> {
+        let mut map = self;
+        iter::from_fn(move || {
+            let pairs = map.entries.iter();
+            map = map.prototype?;
+            Some(pairs)
+        }).flatten()
+    }
+
+    /// Visit the keys in an unsafe random order, repeated arbitrarily many times
+    pub fn keys(&self) -> impl Iterator<Item = &K> {
+        self.iter().map(|(k, _)| k)
+    }
+
+    /// Visit the values in random order
+    pub fn values(&self) -> impl Iterator<Item = &V> {
+        self.iter().filter_map(|(_, v)| v.as_ref())
+    }
+
+    /// Update the prototype, and correspondingly the lifetime of the map
+    pub fn set_proto<'b>(self, proto: &'b ProtoMap<'b, K, V>) -> ProtoMap<'b, K, V> {
+        ProtoMap {
+            entries: self.entries,
+            prototype: Some(proto)
+        }
+    }
+}
+
+impl<T, K, V> From<T> for ProtoMap<'_, K, V> where T: IntoIterator<Item = (K, V)> {
+    fn from(value: T) -> Self {
+        Self {
+            entries: value.into_iter().map(|(k, v)| (k, Some(v))).collect(),
+            prototype: None
+        }
+    }
+}
+
+impl<Q: ?Sized, K, V> Index<&Q> for ProtoMap<'_, K, V> where K: Borrow<Q>, Q: Eq {
+    type Output = V;
+    fn index(&self, index: &Q) -> &Self::Output {
+        self.get(index).expect("Index not found in map")
+    }
+}
+
+impl<K: Clone, V: Clone> Clone for ProtoMap<'_, K, V> {
+    fn clone(&self) -> Self {
+        Self {
+            entries: self.entries.clone(),
+            prototype: self.prototype
+        }
+    }
+}
+
+impl<'a, K: 'a, V: 'a> Add<(K, V)> for &'a ProtoMap<'a, K, V> {
+    type Output = ProtoMap<'a, K, V>;
+    fn add(self, rhs: (K, V)) -> Self::Output {
+        ProtoMap::from([rhs]).set_proto(self)
+    }
+}
+
+#[macro_export]
+macro_rules! protomap {
+    ($($ent:expr),*) => {
+        ProtoMap::from([$($ent:expr),*])
+    };
+}
--- a/src/utils/string_from_charset.rs
+++ b/src/utils/string_from_charset.rs
@@ -0,0 +1,14 @@
+fn string_from_charset_rec(val: usize, digits: &str) -> String {
+    let radix = digits.len();
+    let mut prefix = if val > radix {
+        string_from_charset_rec(val / radix, digits)
+    } else {String::new()};
+    prefix.push(digits.chars().nth(val - 1).unwrap_or_else(|| {
+        panic!("Overindexed digit set \"{}\" with {}", digits, val - 1)
+    }));
+    prefix
+}
+
+pub fn string_from_charset(val: usize, digits: &str) -> String {
+    string_from_charset_rec(val + 1, digits)
+}
--- a/src/utils/unless_let.rs
+++ b/src/utils/unless_let.rs
@@ -0,0 +1,6 @@
+#[macro_export]
+macro_rules! unless_let {
+    ($m:pat_param = $expr:tt) => {
+        if let $m = $expr {} else
+    }
+}
--- a/src/utils/unwrap_or.rs
+++ b/src/utils/unwrap_or.rs
@@ -0,0 +1,6 @@
+#[macro_export]
+macro_rules! unwrap_or {
+    ($m:expr; $fail:expr) => {
+        { if let Some(res) = ($m) {res} else {$fail} }
+    }
+}
--- a/src/utils/unwrap_or_continue.rs
+++ b/src/utils/unwrap_or_continue.rs
@@ -1,6 +0,0 @@
-#[macro_export]
-macro_rules! unwrap_or_continue {
-    ($m:expr) => {
-        { if let Some(res) = ($m) {res} else {continue} }
-    }
-}