From f3ce910f664426712e7e315df0e0c5a6c646b5ee Mon Sep 17 00:00:00 2001 From: Lawrence Bethlenfalvy Date: Tue, 21 Mar 2023 19:36:40 +0000 Subject: [PATCH] Transfer commit --- .vscode/settings.json | 2 + Cargo.lock | 225 +++++++++++---------- Cargo.toml | 10 +- examples/lite/main.orc | 9 +- notes/papers/report/oss.md | 49 +++++ notes/papers/report/scratchpad.md | 47 ++++- notes/papers/report/spec/01-parsing.md | 101 ++++++++++ notes/papers/report/spec/02-macros.md | 45 +++++ notes/papers/report/spec/03-runtime.md | 32 +++ orchid.code-workspace | 11 +- src/external/bool/mod.rs | 4 +- src/external/conv/mod.rs | 4 +- src/external/cpsio/mod.rs | 4 +- src/external/num/mod.rs | 4 +- src/external/num/numeric.rs | 54 +++-- src/external/str/mod.rs | 4 +- src/foreign.rs | 42 ++-- src/main.rs | 27 ++- src/parse/expression.rs | 141 +++++++------ src/parse/import.rs | 62 +++--- src/parse/mod.rs | 4 - src/parse/parse.rs | 27 ++- src/parse/sourcefile.rs | 154 +++------------ src/project/loading/ext_loader.rs | 34 +++- src/project/loading/extlib_loader.rs | 34 ++++ src/project/loading/file_loader.rs | 22 ++- src/project/loading/fnlib_loader.rs | 23 --- src/project/loading/map_loader.rs | 2 +- src/project/loading/mod.rs | 15 +- src/project/loading/overlay_loader.rs | 19 -- src/project/loading/string_loader.rs | 2 +- src/project/mod.rs | 4 +- src/project/name_resolver.rs | 100 +++++----- src/project/prefix.rs | 35 ++-- src/project/rule_collector.rs | 190 +++++++++--------- src/representations/ast.rs | 229 ++++++++++++++-------- src/representations/ast_to_postmacro.rs | 112 +++++++---- src/representations/get_name.rs | 10 - src/representations/mod.rs | 2 +- src/representations/postmacro.rs | 16 -- src/representations/sourcefile.rs | 76 +++++++ src/rule/executor/execute.rs | 79 ++++++-- src/rule/executor/slice_matcher.rs | 29 +-- src/rule/executor/state.rs | 34 ++-- src/rule/executor/update_first_seq_rec.rs | 51 ++--- src/utils/bfs.rs | 2 + src/utils/cache.rs | 79 ++------ src/utils/interned_display.rs | 19 ++ src/utils/interner.rs | 27 --- src/utils/iter.rs | 3 +- src/utils/merge_sorted.rs | 27 --- src/utils/mod.rs | 8 +- src/utils/product2.rs | 2 + src/utils/protomap.rs | 16 +- src/utils/replace_first.rs | 5 +- src/utils/side.rs | 9 +- src/utils/string_from_charset.rs | 2 + src/utils/substack.rs | 17 +- src/utils/translate.rs | 7 + src/utils/unless_let.rs | 6 - src/utils/unwrap_or.rs | 3 + src/utils/visitor.rs | 18 -- src/utils/{for_loop.rs => xloop.rs} | 3 +- 63 files changed, 1410 insertions(+), 1023 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 notes/papers/report/oss.md create mode 100644 notes/papers/report/spec/01-parsing.md create mode 100644 notes/papers/report/spec/02-macros.md create mode 100644 notes/papers/report/spec/03-runtime.md create mode 100644 src/project/loading/extlib_loader.rs delete mode 100644 src/project/loading/fnlib_loader.rs delete mode 100644 src/project/loading/overlay_loader.rs delete mode 100644 src/representations/get_name.rs create mode 100644 src/representations/sourcefile.rs create mode 100644 src/utils/interned_display.rs delete mode 100644 src/utils/interner.rs delete mode 100644 src/utils/merge_sorted.rs delete mode 100644 src/utils/unless_let.rs delete mode 100644 src/utils/visitor.rs rename src/utils/{for_loop.rs => xloop.rs} (96%) diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..7a73a41 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,2 @@ +{ +} \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 6488968..6999f9e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,15 +2,6 @@ # It is not intended for manual editing. version = 3 -[[package]] -name = "ahash" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" -dependencies = [ - "const-random", -] - [[package]] name = "ahash" version = "0.7.6" @@ -22,6 +13,17 @@ dependencies = [ "version_check", ] +[[package]] +name = "ahash" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c99f64d1e06488f620f932677e24bc6e2897582980441ae90a671415bd7ec2f" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", +] + [[package]] name = "autocfg" version = "1.1.0" @@ -29,16 +31,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] -name = "bitvec" -version = "1.0.1" +name = "cc" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" -dependencies = [ - "funty", - "radium", - "tap", - "wyz", -] +checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" [[package]] name = "cfg-if" @@ -48,50 +44,22 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chumsky" -version = "0.8.0" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d02796e4586c6c41aeb68eae9bfb4558a522c35f1430c14b40136c3706e09e4" +checksum = "23170228b96236b5a7299057ac284a321457700bc8c41a4476052f0f4ba5349d" dependencies = [ - "ahash 0.3.8", + "hashbrown 0.12.3", + "stacker", ] [[package]] -name = "const-random" -version = "0.1.13" +name = "dashmap" +version = "4.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4" +checksum = "e77a43b28d0668df09411cb0bc9a8c2adc40f9a048afe863e05fd43251e8e39c" dependencies = [ - "const-random-macro", - "proc-macro-hack", -] - -[[package]] -name = "const-random-macro" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40" -dependencies = [ - "getrandom", - "lazy_static", - "proc-macro-hack", - "tiny-keccak", -] - -[[package]] -name = "crunchy" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" - -[[package]] -name = "derivative" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" -dependencies = [ - "proc-macro2", - "quote", - "syn", + "cfg-if", + "num_cpus", ] [[package]] @@ -106,12 +74,6 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" -[[package]] -name = "funty" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" - [[package]] name = "getrandom" version = "0.2.6" @@ -125,18 +87,39 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.12.1" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" dependencies = [ "ahash 0.7.6", ] [[package]] -name = "implicit-clone" -version = "0.3.5" +name = "hashbrown" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40fc102e70475c320b185cd18c1e48bba2d7210b63970a4d581ef903e4368ef7" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +dependencies = [ + "ahash 0.7.6", +] + +[[package]] +name = "hashbrown" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" +dependencies = [ + "ahash 0.8.3", +] + +[[package]] +name = "hermit-abi" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee512640fe35acbfb4bb779db6f0d80704c2cacfa2e39b601ef3e3f47d1ae4c7" +dependencies = [ + "libc", +] [[package]] name = "itertools" @@ -148,10 +131,14 @@ dependencies = [ ] [[package]] -name = "lazy_static" -version = "1.4.0" +name = "lasso" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" +checksum = "aeb7b21a526375c5ca55f1a6dfd4e1fad9fa4edd750f530252a718a44b2608f0" +dependencies = [ + "dashmap", + "hashbrown 0.11.2", +] [[package]] name = "libc" @@ -175,26 +162,32 @@ dependencies = [ ] [[package]] -name = "once_cell" -version = "1.12.0" +name = "num_cpus" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7709cef83f0c1f58f666e746a08b21e0085f7440fa6a29cc194d68aac97a4225" +checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "orchid" version = "0.1.0" dependencies = [ - "bitvec", "chumsky", - "derivative", "dyn-clone", - "hashbrown", - "implicit-clone", + "hashbrown 0.13.2", "itertools", - "lazy_static", + "lasso", "mappable-rc", "ordered-float", - "paste", "smallvec", "thiserror", ] @@ -208,18 +201,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "paste" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d01a5bd0424d00070b0098dd17ebca6f961a959dead1dbcbbbc1d1cd8d3deeba" - -[[package]] -name = "proc-macro-hack" -version = "0.5.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" - [[package]] name = "proc-macro2" version = "1.0.39" @@ -229,6 +210,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "psm" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +dependencies = [ + "cc", +] + [[package]] name = "quote" version = "1.0.18" @@ -238,18 +228,25 @@ dependencies = [ "proc-macro2", ] -[[package]] -name = "radium" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" - [[package]] name = "smallvec" version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" +[[package]] +name = "stacker" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c886bd4480155fd3ef527d45e9ac8dd7118a898a46530b7b94c3e21866259fce" +dependencies = [ + "cc", + "cfg-if", + "libc", + "psm", + "winapi", +] + [[package]] name = "syn" version = "1.0.95" @@ -261,12 +258,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - [[package]] name = "thiserror" version = "1.0.31" @@ -287,15 +278,6 @@ dependencies = [ "syn", ] -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - [[package]] name = "unicode-ident" version = "1.0.0" @@ -315,10 +297,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" [[package]] -name = "wyz" -version = "0.5.1" +name = "winapi" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ - "tap", + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", ] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index b151ecd..8767cba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,15 +7,11 @@ edition = "2021" [dependencies] thiserror = "1.0" -chumsky = "0.8" -derivative = "2.2" -hashbrown = "0.12" +chumsky = "0.9.2" +hashbrown = "0.13.2" mappable-rc = "0.1" ordered-float = "3.0" itertools = "0.10" smallvec = { version = "1.10.0", features = ['const_generics'] } -lazy_static = "1.4.0" -implicit-clone = "0.3.5" -bitvec = "1.0.1" dyn-clone = "1.0.11" -paste = "1.0.11" +lasso = { version = "0.6.0", features = ['multi-threaded'] } diff --git a/examples/lite/main.orc b/examples/lite/main.orc index 81a571b..36edd07 100644 --- a/examples/lite/main.orc +++ b/examples/lite/main.orc @@ -1,3 +1,4 @@ +import prelude::* import std::conv::(parse_float, to_string) import std::cpsio::(readline, print) import std::str::(concatenate) @@ -10,10 +11,10 @@ export main := do{ cps data = readline; let b = parse_float data; let result = ( - if op = "+" then a + b - else if op = "-" then a - b - else if op = "*" then a * b - else if op = "/" then a / b + if op == "+" then a + b + else if op == "-" then a - b + else if op == "*" then a * b + else if op == "/" then a / b else "Unsupported operation" -- dynamically typed shenanigans ); cps print (to_string result ++ "\n"); diff --git a/notes/papers/report/oss.md b/notes/papers/report/oss.md new file mode 100644 index 0000000..c7d6500 --- /dev/null +++ b/notes/papers/report/oss.md @@ -0,0 +1,49 @@ +# List of open-source packages I used + +## [thiserror](https://github.com/dtolnay/thiserror) + +_License: Apache 2.0 or MIT_ + +Helps derive `Error` for aggregate errors, although I eventually stopped trying to do so as it was simpler to just treat error types as bags of data about the failure. + +## [chumsky](https://github.com/zesterer/chumsky) + +_License: MIT_ + +A fantastic parser combinator that allowed me to specify things like the nuanced conditions under which a float token can be promoted to an uint token in a declarative way. In hindsight passes after tokenization could have been written by hand, tokenized Orchid is not that hard to parse into an AST and it would have probably made some tasks such as allowing `.` (dot) as a token considerably easier. + +## [hashbrown](https://github.com/rust-lang/hashbrown) + +_License: Apache 2.0 or MIT_ + +Google's swisstable. Almost perfectly identical to `HashMap` in std, with a couple additional APIs. I use it for the raw entry API which the generic processing step cache requires to avoid unnecessary clones of potentially very large trees. + +## [mappable-rc](https://github.com/JakobDegen/mappable-rc) + +_License: Apache 2.0 or MIT_ + +A refcounting pointer which can be updated to dereference to some part of the value it holds similarly to C++'s `shared_ptr`. Using this crate was ultimately a mistake on my part, in early stages of development (early stages of my Rust journey) I wanted to store arbitrary subsections of an expression during macro execution without dealing with lifetimes. Removing all uses of this crate and instead just dealing with lifetimes is on the roadmap. + +## [ordered-float](https://github.com/reem/rust-ordered-float) + +_License: MIT_ + +A wrapper around floating point numbers that removes `NaN` from the set of possible values, promoting `<` and `>` to total orderings and `==` to an equivalence relation. Orchid does not have `NaN` because it's a silent error. All operations that would produce `NaN` either abort or indicate the failure in their return type. + +## [itertools](https://github.com/rust-itertools/itertools) + +_License: Apache 2.0 or MIT_ + +A utility crate, I use it everywhere. + +## [smallvec](https://github.com/servo/references-smallvec) + +_License: Apache 2.0 or MIT_ + +small vector optimization - allocates space for a statically known number of elements on the stack to save heap allocations. This is a gamble since the stack space is wasted if the data does spill to the heap, but it can improve performance massively in hot paths. + +## [dyn-clone](https://github.com/dtolnay/dyn-clone) + +_License: Apache 2.0 or MIT_ + +All expressions in Orchid are clonable, and to allow for optimizations, Atoms have control over their own cloning logic, so this object-safe version of `Clone` is used. diff --git a/notes/papers/report/scratchpad.md b/notes/papers/report/scratchpad.md index 747520c..f8fed46 100644 --- a/notes/papers/report/scratchpad.md +++ b/notes/papers/report/scratchpad.md @@ -45,10 +45,10 @@ $1 [ 0 ] a equals a < $1 ] b 0 Some global rules are also needed, also instantiated for all possible characters in the templated positions ``` -$1 $2 < equals $2 < $1 unless $1 is | -| $1 < equals $1 | > -> $1 $2 equals $1 > $2 unless $2 is ] -> $1 ] equals [ $1 ] +$1 $2 < equals $2 < $1 unless $1 is | +| $1 < equals $1 | > +> $1 $2 equals $1 > $2 unless $2 is ] +> $1 ] equals [ $1 ] ``` What I really appreciate in this proof is how visual it is; based on this, it's easy to imagine how one would go about encoding a pushdown automaton, lambda calculus or other interesting tree-walking procedures. This is exactly why I based my preprocessor on this system. @@ -57,10 +57,41 @@ What I really appreciate in this proof is how visual it is; based on this, it's I found two major problems with C and Rust macros which vastly limit their potential. They're relatively closed systems, and prone to aliasing. Every other item in Rust follows a rigorous namespacing scheme, but the macros break this seal, I presume the reason is that macro execution happens before namespace resolution. -Orchid's macros - substitution rules - operate on namespaced tokens. This means that users can safely give their macros short and intuitive names, but it also means that the macros can hook into each other. Consider for example the following hypothetical example. - -a widely known module implements a unique way of transforming iterators using an SQL-like syntax. +Orchid's macros - substitution rules - operate on namespaced tokens. This means that users can safely give their macros short and intuitive names, but it also means that the macros can hook into each other. Consider for example the following example, which is a slightly modified version of a +real rule included in the prelude: +in _procedural.or_ ```orchid -select ...$collist from ...$ +export do { ...$statement ; ...$rest:1 } =10_001=> ( + statement (...$statement) do { ...$rest } +) +export do { ...$return } =10_000=> (...$return) +export statement (let $_name = ...$value) ...$next =10_000=> ( + (\$_name. ...$next) (...$value) +) ``` + +in _cpsio.or_ +```orchid +import procedural::statement + +export statement (cps $_name = ...$operation) ...$next =10_001=> ( + (...$operation) \$_name. ...$next +) +export statement (cps ...$operation) ...$next =10_000=> ( + (...$operation) (...$next) +) +``` + +in _main.or_ +```orchid +import procedural::(do, let, ;) +import cpsio::cps + +export main := do{ + cps data = readline; + let a = parse_float data * 2; + cps print (data ++ " doubled is " ++ stringify a) +} +``` + diff --git a/notes/papers/report/spec/01-parsing.md b/notes/papers/report/spec/01-parsing.md new file mode 100644 index 0000000..2890ab8 --- /dev/null +++ b/notes/papers/report/spec/01-parsing.md @@ -0,0 +1,101 @@ +# Parsing + +Orchid expressions are similar in nature to lambda calculus or haskell, except whitespace is mostly irrelevant. + +## Names + +`name` and `ns_name` tokens appear all over the place in this spec. They represent operators, function names, arguments, modules. A `name` is + +1. the universally recognized operators `,`, `.`, `..` and `...` (comma and single, double and triple dot) +2. any C identifier +3. any sequence of name-safe characters starting with a character that cannot begin a C identifier. A name-safe character is any non-whitespace Unicode character other than + + - digits + - the namespace separator `:`, + - the parametric expression starters `\` and `@`, + - the string and char delimiters `"` and `'`, + - the various brackets`(`, `)`, `[`, `]`, `{` and `}`, + - `,`, `.` and `$` + + This means that, in absence of a known list of names, `!importatn!` is a single name but `importatn!` is two names, as a name that starts as a C identifier cannot contain special characters. It also means that using non-English characters in Orchid variables is a really bad idea. This is intentional, identifiers that need to be repeated verbatim should only contain characters that appear on all latin keyboards. + +There are also reserved words that cannot be used as names; `export` and `import`. + +A `ns_name` is a sequence of one or more `name` tokens separated by the namespace separator `::`. + +All tokens that do not contain `::` in the code may be `name` or `ns_name` depending on their context. + +## Clauses + +Clauses are the building blocks of Orchid's syntax. They belong to one of a couple categories: + +- S-expressions are a parenthesized sequence of space-delimited `clause`s. All three types of brackets `()`, `[]` and `{}` are supported. +- Lambdas start with `\.`, followed by a sequence of `clause`s where `` is a single `name` or `$_` followed by a C identifier. This is a greedy pattern that ends at the end of an enclosing S-expression, or the end of input. +- numbers can be in decimal, binary with the `0b` prefix, hexadecimal with the `0x` prefix, or octal with the `0` prefix. All bases support the decimal point, exponential notation or both. The exponent is prefixed with `p`, always written in decimal, may be negative, and it represents a power of the base rather than a power of 10. For example, `0xf0.4p-2` is `0xf04 / 16 ^ 3` or ~0.9385. +- Strings are delimited with `"`, support `\` escapes and four digit unicode escapes of the form `\uXXXX`. They may contain line breaks. +- Chars are a single character or escape from the above description of a string delimited by `'`. +- Placeholders are either of three styles; `$name`, `..$name`, `...$name`, `..$name:p`, `...$name:p`. the name is always a C identifier, p is an integer growth priority. +- Names are a single `ns_name` + +## Files + +Files are separated into lines. A line is delimited by newlines and only contains newlines within brackets. A line may be an import, rule, exported rule, or explicit export. + +### Rules + +Rules have the following form + +``` +pattern =priority=> template +``` + +The pattern is able to define new operators implicitly by referencing them, so all tokens must be delimited by spaces. The template is inserted in place of the pattern without parentheses, so unless it's meant to be part of a pattern matched by another rule which expects a particular parenthesization, when more than one token is produced the output should be wrapped in parentheses. + +A shorthand syntax is available for functions: + +``` +name := value +``` + +name in this case must be a single `name`. Value is automatically parenthesized, and the priority of these rules is always zero. + +### Explicit exports and exported rules + +An explicit export consists of `export :: ( )` where `` is a comma-separated list of `name`s. + +An exported rule consists of the keyword `export` followed by a regular rule. It both counts as a rule and an export of all the `name`s within the pattern. + +### Imports + +An import is a line starting with the keyword `import`, followed by a tree of imported names. + +``` +import_tree = name + | name :: import_tree + | name :: * + | ( import_tree [, import_tree]+ ) +``` + +Some examples of valid imports: + +``` +import std::cpsio +import std::(conv::parse_float, cpsio, str::*) +import std +``` + +Some examples of invalid imports: + +``` +import std::() +import std::cpsio::(print, *) +import std::(cpsio) +``` + +> **info** +> +> while none of these are guaranteed to work currently, there's little reason they would have to be invalid, so future specifications may allow them. + +An import can be normalized into a list of independent imports ending either with a `*` called wildcard imports or with a `name`. wildcard imports are normalized to imports for all the `name`s exported from the parent module. All Name clauses in the file starting with the same `name` one of these imports ended with are prefixed with the full import path. The rest of the Name clauses are prefixed with the full path of the current module. + +Reference cycles in Orchid modules are never allowed, so the dependency of a module's exports on its imports and a wildcard's import's value on the referenced module's exports does not introduce the risk of circular dependencies, it just specifies the order of processing for files. diff --git a/notes/papers/report/spec/02-macros.md b/notes/papers/report/spec/02-macros.md new file mode 100644 index 0000000..cdbde43 --- /dev/null +++ b/notes/papers/report/spec/02-macros.md @@ -0,0 +1,45 @@ +# Macros + +After parsing, what remains is a set of macro rules, each with a pattern, priority and template. Modules aren't tracked in this stage, their purpose was to namespace the tokens within the rules. + +By employing custom import logic, it's also possible to add rules bypassing the parser. Starting with the macro phase, `clause`s may also be `atom`s or `externfn`s. The role of these is detailed in the [[03-runtime]] section. + +Macros are executed in reverse priority order, each macro is checked against each subsection of each clause sequence. When a match is found, the substitution is performed and all macros are executed again. + +## Placeholders + +Patterns fall into two categories + +- scalar placeholders + - `$name` matches exactly one clause + - `$_name` matches exactly one Name clause +- vectorial placeholders + - `..$name` matches zero or more clauses + - `...$name` matches one or more clauses + +`$_name` is uniquely valid in the position of an argument name within a lambda. + +Vectorial placeholders may also have a positive decimal integer growth priority specified after the name, separated with a `:` like so: `...$cond:2`. If it isn't specified, the growth priority defaults to 0. + +The template may only include placeholders referenced in the pattern. All occurences of a placeholder within a rule must match the same things. + +## Execution + +Each clause in the pattern matches clauses as follows: + +- Name matches name with the same full path. +- Lambda matches a lambda with matching argument name and matching body. If the argument name in the pattern is a name-placeholder (as in `\$_phname.`), the argument name in the source is treated as a module-local Name clause. +- Parenthesized expressions match each other if the contained sequences match and both use the same kind of parentheses. +- Placeholders' matched sets are as listed in [Placeholders]. + +If a pattern contains the same placeholder name more than once, matches where they don't match perfectly identical clauses, names or clause sequences are discarded. + +### Order of preference + +The growth order of vectorial placeholders is + +- Outside before inside parentheses +- descending growth priority +- left-to-right by occurrence in the pattern. + +If a pattern matches a sequence in more than one way, whichever match allocates more clauses to the first vectorial placeholder in growth order is preferred. diff --git a/notes/papers/report/spec/03-runtime.md b/notes/papers/report/spec/03-runtime.md new file mode 100644 index 0000000..35d5b5f --- /dev/null +++ b/notes/papers/report/spec/03-runtime.md @@ -0,0 +1,32 @@ +# Runtime + +Orchid is evaluated lazily. This means that everything operates on unevaluated expressions. This has the advantage that unused values never need to be computed, but it also introduces a great deal of complexity in interoperability. + +## Execution mode + +The executor supports step-by-step execution, multiple steps at once, and running an expression to completion. Once an Orchid program reaches a nonreducible state, it is either an external item, a literal, or a lambda function. + +## external API + +In order to do anything useful, Orchid provides an API for defining clauses that have additional behaviour implemented in Rust. Basic arithmetic is defined using these. + +### Atomic + +atomics are opaque units of foreign data, with the following operations: + +- functions for the same three execution modes the language itself supports +- downcasting to a concrete type + +Atomics can be used to represent processes. Given enough processing cycles, these return a different clause. + +They can also be used to wrap data addressed to other external code. This category of atomics reports nonreducible at all times, and relies on the downcasting API to interact with ExternFn-s. + +It's possible to use a combination of these for conditional optimizations - for instance, to recognize chains of processes that can be more efficiently expressed as a single task. + +### ExternFn + +external functions can be combined with another clause to form a new clause. Most of the time, this new clause would be an Atomic which forwards processing to the arguments until they can't be normalized any further, at which point it either returns an ExternFn to take another argument or executes the operation associated with the function and returns. + +Because this combination of operations is so common, several macros are provided to streamline it. + +Sometimes, eg. when encoding effectful functions in continuation passing style, an ExternFn returns its argument without modification. It is always a logic error to run expressions outside a run call, or to expect an expression to be of any particular shape without ensuring that run returned nonreducible in the past. diff --git a/orchid.code-workspace b/orchid.code-workspace index a4ebcde..0472125 100644 --- a/orchid.code-workspace +++ b/orchid.code-workspace @@ -10,12 +10,19 @@ "editor.unicodeHighlight.invisibleCharacters": false, "diffEditor.ignoreTrimWhitespace": false, "editor.wordWrap": "bounded", - "editor.wordWrapColumn": 100, + "editor.wordWrapColumn": 80, "editor.quickSuggestions": { "comments": "off", "strings": "off", "other": "off" - } + }, + "editor.lineNumbers": "off", + "editor.glyphMargin": false, + "editor.rulers": [], + "editor.guides.indentation": false, + }, + "[rust]": { + "editor.rulers": [74] } }, "extensions": { diff --git a/src/external/bool/mod.rs b/src/external/bool/mod.rs index c3dd66c..6d4b389 100644 --- a/src/external/bool/mod.rs +++ b/src/external/bool/mod.rs @@ -3,10 +3,10 @@ mod boolean; mod ifthenelse; pub use boolean::Boolean; -use crate::project::{Loader, fnlib_loader}; +use crate::project::{Loader, extlib_loader}; pub fn bool() -> impl Loader { - fnlib_loader(vec![ + extlib_loader(vec![ ("ifthenelse", Box::new(ifthenelse::IfThenElse1)), ("equals", Box::new(equals::Equals2)) ]) diff --git a/src/external/conv/mod.rs b/src/external/conv/mod.rs index 17f1d62..34759ee 100644 --- a/src/external/conv/mod.rs +++ b/src/external/conv/mod.rs @@ -1,11 +1,11 @@ -use crate::project::{fnlib_loader, Loader}; +use crate::project::{extlib_loader, Loader}; mod to_string; mod parse_float; mod parse_uint; pub fn conv() -> impl Loader { - fnlib_loader(vec![ + extlib_loader(vec![ ("parse_float", Box::new(parse_float::ParseFloat1)), ("parse_uint", Box::new(parse_uint::ParseUint1)), ("to_string", Box::new(to_string::ToString1)) diff --git a/src/external/cpsio/mod.rs b/src/external/cpsio/mod.rs index bea5414..c5ceff3 100644 --- a/src/external/cpsio/mod.rs +++ b/src/external/cpsio/mod.rs @@ -1,10 +1,10 @@ -use crate::project::{Loader, fnlib_loader}; +use crate::project::{Loader, extlib_loader}; mod print; mod readline; pub fn cpsio() -> impl Loader { - fnlib_loader(vec![ + extlib_loader(vec![ ("print", Box::new(print::Print2)), ("readline", Box::new(readline::Readln2)) ]) diff --git a/src/external/num/mod.rs b/src/external/num/mod.rs index dbe565b..ef7b267 100644 --- a/src/external/num/mod.rs +++ b/src/external/num/mod.rs @@ -2,10 +2,10 @@ mod numeric; pub mod operators; pub use numeric::Numeric; -use crate::project::{fnlib_loader, Loader}; +use crate::project::{extlib_loader, Loader}; pub fn num() -> impl Loader { - fnlib_loader(vec![ + extlib_loader(vec![ ("add", Box::new(operators::add::Add2)), ("subtract", Box::new(operators::subtract::Subtract2)), ("multiply", Box::new(operators::multiply::Multiply2)), diff --git a/src/external/num/numeric.rs b/src/external/num/numeric.rs index d4ea6b4..20e8698 100644 --- a/src/external/num/numeric.rs +++ b/src/external/num/numeric.rs @@ -14,15 +14,29 @@ pub enum Numeric { Num(NotNan) } +impl Numeric { + /// Wrap a f64 in a Numeric + /// + /// # Panics + /// + /// if the value is NaN or Infinity.try_into() + fn num(value: T) -> Self where T: Into { + let f = value.into(); + assert!(f.is_finite(), "unrepresentable number"); + NotNan::try_from(f).map(Self::Num).expect("not a number") + } +} + impl Add for Numeric { type Output = Numeric; fn add(self, rhs: Self) -> Self::Output { match (self, rhs) { (Numeric::Uint(a), Numeric::Uint(b)) => Numeric::Uint(a + b), - (Numeric::Num(a), Numeric::Num(b)) => Numeric::Num(a + b), - (Numeric::Uint(a), Numeric::Num(b)) | (Numeric::Num(b), Numeric::Uint(a)) - => Numeric::Num(NotNan::new(a as f64).unwrap() + b) + (Numeric::Num(a), Numeric::Num(b)) => Numeric::num(a + b), + (Numeric::Uint(a), Numeric::Num(b)) | + (Numeric::Num(b), Numeric::Uint(a)) + => Numeric::num::(a as f64 + *b) } } } @@ -34,10 +48,10 @@ impl Sub for Numeric { match (self, rhs) { (Numeric::Uint(a), Numeric::Uint(b)) if b < a => Numeric::Uint(a - b), (Numeric::Uint(a), Numeric::Uint(b)) - => Numeric::Num(NotNan::new(a as f64 - b as f64).unwrap()), - (Numeric::Num(a), Numeric::Num(b)) => Numeric::Num(a - b), - (Numeric::Uint(a), Numeric::Num(b)) | (Numeric::Num(b), Numeric::Uint(a)) - => Numeric::Num(NotNan::new(a as f64).unwrap() - b) + => Numeric::num(a as f64 - b as f64), + (Numeric::Num(a), Numeric::Num(b)) => Numeric::num(a - b), + (Numeric::Uint(a), Numeric::Num(b)) => Numeric::num(a as f64 - *b), + (Numeric::Num(a), Numeric::Uint(b)) => Numeric::num(*a - b as f64) } } } @@ -48,8 +62,9 @@ impl Mul for Numeric { fn mul(self, rhs: Self) -> Self::Output { match (self, rhs) { (Numeric::Uint(a), Numeric::Uint(b)) => Numeric::Uint(a * b), - (Numeric::Num(a), Numeric::Num(b)) => Numeric::Num(a * b), - (Numeric::Uint(a), Numeric::Num(b)) | (Numeric::Num(b), Numeric::Uint(a)) + (Numeric::Num(a), Numeric::Num(b)) => Numeric::num(a * b), + (Numeric::Uint(a), Numeric::Num(b)) | + (Numeric::Num(b), Numeric::Uint(a)) => Numeric::Num(NotNan::new(a as f64).unwrap() * b) } } @@ -59,9 +74,9 @@ impl Div for Numeric { type Output = Numeric; fn div(self, rhs: Self) -> Self::Output { - let a = match self { Numeric::Uint(i) => i as f64, Numeric::Num(f) => *f }; - let b = match rhs { Numeric::Uint(i) => i as f64, Numeric::Num(f) => *f }; - Numeric::Num(NotNan::new(a / b).unwrap()) + let a: f64 = self.into(); + let b: f64 = rhs.into(); + Numeric::num(a / b) } } @@ -71,9 +86,9 @@ impl Rem for Numeric { fn rem(self, rhs: Self) -> Self::Output { match (self, rhs) { (Numeric::Uint(a), Numeric::Uint(b)) => Numeric::Uint(a % b), - (Numeric::Num(a), Numeric::Num(b)) => Numeric::Num(a % b), - (Numeric::Uint(a), Numeric::Num(b)) | (Numeric::Num(b), Numeric::Uint(a)) - => Numeric::Num(NotNan::new(a as f64).unwrap() % b) + (Numeric::Num(a), Numeric::Num(b)) => Numeric::num(a % b), + (Numeric::Uint(a), Numeric::Num(b)) => Numeric::num(a as f64 % *b), + (Numeric::Num(a), Numeric::Uint(b)) => Numeric::num(*a % b as f64) } } } @@ -108,4 +123,13 @@ impl From for String { Numeric::Num(n) => n.to_string() } } +} + +impl Into for Numeric { + fn into(self) -> f64 { + match self { + Numeric::Num(n) => *n, + Numeric::Uint(i) => i as f64 + } + } } \ No newline at end of file diff --git a/src/external/str/mod.rs b/src/external/str/mod.rs index 149d126..7065648 100644 --- a/src/external/str/mod.rs +++ b/src/external/str/mod.rs @@ -2,10 +2,10 @@ mod concatenate; mod cls2str; mod char_at; pub use cls2str::cls2str; -use crate::project::{Loader, fnlib_loader}; +use crate::project::{Loader, extlib_loader}; pub fn str() -> impl Loader { - fnlib_loader(vec![ + extlib_loader(vec![ ("concatenate", Box::new(concatenate::Concatenate2)) ]) } \ No newline at end of file diff --git a/src/foreign.rs b/src/foreign.rs index cb6c03e..5c9e4a2 100644 --- a/src/foreign.rs +++ b/src/foreign.rs @@ -5,20 +5,26 @@ use std::rc::Rc; use dyn_clone::DynClone; -use crate::representations::interpreted::{Clause, RuntimeError, InternalError}; +use crate::representations::interpreted::{ + Clause, RuntimeError, InternalError +}; pub trait ExternError: Display { - fn into_extern(self) -> Rc where Self: 'static + Sized { + fn into_extern(self) -> Rc + where Self: 'static + Sized { Rc::new(self) } } -/// Represents an externally defined function from the perspective of the executor -/// Since Orchid lacks basic numerical operations, these are also external functions. +/// Represents an externally defined function from the perspective of +/// the executor. Since Orchid lacks basic numerical operations, +/// these are also external functions. pub trait ExternFn: DynClone { fn name(&self) -> &str; fn apply(&self, arg: Clause) -> Result>; - fn hash(&self, state: &mut dyn std::hash::Hasher) { state.write_str(self.name()) } + fn hash(&self, state: &mut dyn std::hash::Hasher) { + state.write_str(self.name()) + } } impl Eq for dyn ExternFn {} @@ -26,7 +32,9 @@ impl PartialEq for dyn ExternFn { fn eq(&self, other: &Self) -> bool { self.name() == other.name() } } impl Hash for dyn ExternFn { - fn hash(&self, state: &mut H) { self.name().hash(state) } + fn hash(&self, state: &mut H) { + self.name().hash(state) + } } impl Debug for dyn ExternFn { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -41,27 +49,31 @@ pub trait Atomic: Any + Debug + DynClone where Self: 'static { fn run_once(&self) -> Result; fn run_n_times(&self, n: usize) -> Result<(Clause, usize), RuntimeError>; fn run_to_completion(&self) -> Result; - fn typestr(&self) -> &str { "clause" } } -/// Represents a black box unit of code with its own normalization steps. Typically [ExternFn] -/// will produce an [Atom] when applied to a [Clause], this [Atom] will then forward `run_*` calls -/// to the argument until it yields [InternalError::NonReducible] at which point the [Atom] will -/// validate and process the argument, returning a different [Atom] intended for processing by -/// external code, a new [ExternFn] to capture an additional argument, or an Orchid expression +/// Represents a black box unit of code with its own normalization steps. +/// Typically [ExternFn] will produce an [Atom] when applied to a [Clause], +/// this [Atom] will then forward `run_*` calls to the argument until it +/// yields [InternalError::NonReducible] at which point the [Atom] will +/// validate and process the argument, returning a different [Atom] +/// intended for processing by external code, a new [ExternFn] to capture +/// an additional argument, or an Orchid expression /// to pass control back to the interpreter. pub struct Atom(pub Box); impl Atom { pub fn new(data: T) -> Self { Self(Box::new(data) as Box) } - pub fn data(&self) -> &dyn Atomic { self.0.as_ref() as &dyn Atomic } + pub fn data(&self) -> &dyn Atomic { + self.0.as_ref() as &dyn Atomic + } pub fn try_cast(&self) -> Result<&T, ()> { self.data().as_any().downcast_ref().ok_or(()) } pub fn is(&self) -> bool { self.data().as_any().is::() } pub fn cast(&self) -> &T { - self.data().as_any().downcast_ref().expect("Type mismatch on Atom::cast") + self.data().as_any().downcast_ref() + .expect("Type mismatch on Atom::cast") } } @@ -78,7 +90,7 @@ impl Hash for Atom { } impl Debug for Atom { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "##ATOM[{:?}]:{:?}##", self.data(), self.data().typestr()) + write!(f, "##ATOM[{:?}]##", self.data()) } } impl Eq for Atom {} diff --git a/src/main.rs b/src/main.rs index 21dbb73..6799495 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,4 @@ #![feature(specialization)] -#![feature(core_intrinsics)] #![feature(adt_const_params)] #![feature(generic_const_exprs)] #![feature(generators, generator_trait)] @@ -9,7 +8,6 @@ #![feature(hasher_prefixfree_extras)] #![feature(closure_lifetime_binder)] #![feature(generic_arg_infer)] - use std::{env::current_dir, collections::HashMap}; // mod executor; @@ -22,6 +20,7 @@ mod scheduler; pub(crate) mod foreign; mod external; mod foreign_macros; +use lasso::Rodeo; pub use representations::ast; use ast::{Expr, Clause}; // use representations::typed as t; @@ -54,13 +53,13 @@ export (...$a - ...$b:1) =1001=> (subtract (...$a) (...$b)) export (...$a * ...$b) =1000=> (multiply (...$a) (...$b)) export (...$a % ...$b:1) =1000=> (remainder (...$a) (...$b)) export (...$a / ...$b:1) =1000=> (divide (...$a) (...$b)) -export (...$a = ...$b) =1002=> (equals (...$a) (...$b)) +export (...$a == ...$b) =1002=> (equals (...$a) (...$b)) export (...$a ++ ...$b) =1003=> (concatenate (...$a) (...$b)) export do { ...$statement ; ...$rest:1 } =10_001=> ( statement (...$statement) do { ...$rest } ) -export do { ...$statement } =10_000=> (...$statement) +export do { ...$return } =10_000=> (...$return) export statement (let $_name = ...$value) ...$next =10_000=> ( (\$_name. ...$next) (...$value) @@ -86,11 +85,15 @@ fn initial_tree() -> Mrc<[Expr]> { #[allow(unused)] fn load_project() { - let collect_rules = rule_collector(map_loader(HashMap::from([ - ("std", std().boxed()), - ("prelude", string_loader(PRELUDE).boxed()), - ("mod", file_loader(current_dir().expect("Missing CWD!")).boxed()) - ]))); + let mut rodeo = Rodeo::default(); + let collect_rules = rule_collector( + rodeo, + map_loader(HashMap::from([ + ("std", std().boxed()), + ("prelude", string_loader(PRELUDE).boxed()), + ("mod", file_loader(current_dir().expect("Missing CWD!")).boxed()) + ])) + ); let rules = match collect_rules.try_find(&literal(&["mod", "main"])) { Ok(rules) => rules, Err(err) => if let ModuleError::Syntax(pe) = err { @@ -124,11 +127,5 @@ fn load_project() { } fn main() { - // lambda_notation_debug(); load_project(); - // let mut std = std(); - // match std.load(&["parse_float"]) { - // Ok(_) => println!("wtf"), - // Err(e) => panic!("{:?}", e) - // } } diff --git a/src/parse/expression.rs b/src/parse/expression.rs index 1d52544..4e9c94b 100644 --- a/src/parse/expression.rs +++ b/src/parse/expression.rs @@ -1,9 +1,10 @@ +use std::rc::Rc; + use chumsky::{self, prelude::*, Parser}; -use mappable_rc::Mrc; +use lasso::Spur; use crate::enum_parser; use crate::representations::Primitive; use crate::representations::{Literal, ast::{Clause, Expr}}; -use crate::utils::to_mrc_slice; use super::lexer::Lexeme; @@ -12,18 +13,22 @@ fn sexpr_parser

( expr: P ) -> impl Parser> + Clone where P: Parser> + Clone { - Lexeme::paren_parser(expr.repeated()).map(|(del, b)| Clause::S(del, to_mrc_slice(b))) + Lexeme::paren_parser(expr.repeated()) + .map(|(del, b)| Clause::S(del, Rc::new(b))) } -/// Parses `\name.body` or `\name:type.body` where name is any valid name and type and body are -/// both expressions. Comments are allowed and ignored everywhere in between the tokens -fn lambda_parser

( - expr: P -) -> impl Parser> + Clone -where P: Parser> + Clone { +/// Parses `\name.body` or `\name:type.body` where name is any valid name +/// and type and body are both expressions. Comments are allowed +/// and ignored everywhere in between the tokens +fn lambda_parser<'a, P, F>( + expr: P, intern: &'a F +) -> impl Parser> + Clone + 'a +where + P: Parser> + Clone + 'a, + F: Fn(&str) -> Spur + 'a { just(Lexeme::BS) .then_ignore(enum_parser!(Lexeme::Comment).repeated()) - .ignore_then(enum_parser!(Lexeme::Name)) + .ignore_then(namelike_parser(intern)) .then_ignore(enum_parser!(Lexeme::Comment).repeated()) .then( just(Lexeme::Type) @@ -35,20 +40,21 @@ where P: Parser> + Clone { .then_ignore(just(Lexeme::name("."))) .then_ignore(enum_parser!(Lexeme::Comment).repeated()) .then(expr.repeated().at_least(1)) - .map(|((name, typ), body): ((String, Vec), Vec)| { - // for ent in &mut body { ent.bind_parameter(&name) }; - Clause::Lambda(name, to_mrc_slice(typ), to_mrc_slice(body)) + .map(|((name, typ), body): ((Clause, Vec), Vec)| { + Clause::Lambda(Rc::new(name), Rc::new(typ), Rc::new(body)) }) } /// see [lambda_parser] but `@` instead of `\` and the name is optional -fn auto_parser

( - expr: P -) -> impl Parser> + Clone -where P: Parser> + Clone { +fn auto_parser<'a, P, F>( + expr: P, intern: &'a F +) -> impl Parser> + Clone + 'a +where + P: Parser> + Clone + 'a, + F: Fn(&str) -> Spur + 'a { just(Lexeme::At) .then_ignore(enum_parser!(Lexeme::Comment).repeated()) - .ignore_then(enum_parser!(Lexeme::Name).or_not()) + .ignore_then(namelike_parser(intern).or_not()) .then_ignore(enum_parser!(Lexeme::Comment).repeated()) .then( just(Lexeme::Type) @@ -60,23 +66,27 @@ where P: Parser> + Clone { .then_ignore(just(Lexeme::name("."))) .then_ignore(enum_parser!(Lexeme::Comment).repeated()) .then(expr.repeated().at_least(1)) - .try_map(|((name, typ), body): ((Option, Vec), Vec), s| { + .try_map(|((name, typ), body): ((Option, Vec), Vec), s| { if name.is_none() && typ.is_empty() { Err(Simple::custom(s, "Auto without name or type has no effect")) - } else { - Ok(Clause::Auto(name, to_mrc_slice(typ), to_mrc_slice(body))) + } else { + Ok(Clause::Auto(name.map(Rc::new), Rc::new(typ), Rc::new(body))) } }) } /// Parses a sequence of names separated by ::
/// Comments are allowed and ignored in between -fn name_parser() -> impl Parser, Error = Simple> + Clone { - enum_parser!(Lexeme::Name).separated_by( - enum_parser!(Lexeme::Comment).repeated() - .then(just(Lexeme::NS)) - .then(enum_parser!(Lexeme::Comment).repeated()) - ).at_least(1) +pub fn ns_name_parser<'a, F>(intern: &'a F) +-> impl Parser, Error = Simple> + Clone + 'a +where F: Fn(&str) -> Spur + 'a { + enum_parser!(Lexeme::Name) + .map(|s| intern(&s)) + .separated_by( + enum_parser!(Lexeme::Comment).repeated() + .then(just(Lexeme::NS)) + .then(enum_parser!(Lexeme::Comment).repeated()) + ).at_least(1) } /// Parse any legal argument name starting with a `$` @@ -87,42 +97,59 @@ fn placeholder_parser() -> impl Parser> + }) } +pub fn namelike_parser<'a, F>(intern: &'a F) +-> impl Parser> + Clone + 'a +where F: Fn(&str) -> Spur + 'a { + choice(( + just(Lexeme::name("...")).to(true) + .or(just(Lexeme::name("..")).to(false)) + .then(placeholder_parser()) + .then( + just(Lexeme::Type) + .ignore_then(enum_parser!(Lexeme::Uint)) + .or_not().map(Option::unwrap_or_default) + ) + .map(|((nonzero, key), prio)| Clause::Placeh{key, vec: Some(( + prio.try_into().unwrap(), + nonzero + ))}), + ns_name_parser(intern) + .map(|qualified| Clause::Name(Rc::new(qualified))), + )) +} + +pub fn clause_parser<'a, P, F>( + expr: P, intern: &'a F +) -> impl Parser> + Clone + 'a +where + P: Parser> + Clone + 'a, + F: Fn(&str) -> Spur + 'a { + enum_parser!(Lexeme::Comment).repeated() + .ignore_then(choice(( + enum_parser!(Lexeme >> Literal; Uint, Num, Char, Str) + .map(Primitive::Literal).map(Clause::P), + placeholder_parser().map(|key| Clause::Placeh{key, vec: None}), + namelike_parser(intern), + sexpr_parser(expr.clone()), + lambda_parser(expr.clone(), intern), + auto_parser(expr.clone(), intern), + just(Lexeme::At).ignore_then(expr.clone()).map(|arg| { + Clause::Explicit(Rc::new(arg)) + }) + ))).then_ignore(enum_parser!(Lexeme::Comment).repeated()) +} + /// Parse an expression -pub fn xpr_parser() -> impl Parser> { +pub fn xpr_parser<'a, F>(intern: &'a F) +-> impl Parser> + 'a +where F: Fn(&str) -> Spur + 'a { recursive(|expr| { - let clause = - enum_parser!(Lexeme::Comment).repeated() - .ignore_then(choice(( - enum_parser!(Lexeme >> Literal; Uint, Num, Char, Str).map(Primitive::Literal).map(Clause::P), - placeholder_parser().map(|key| Clause::Placeh{key, vec: None}), - just(Lexeme::name("...")).to(true) - .or(just(Lexeme::name("..")).to(false)) - .then(placeholder_parser()) - .then( - just(Lexeme::Type) - .ignore_then(enum_parser!(Lexeme::Uint)) - .or_not().map(Option::unwrap_or_default) - ) - .map(|((nonzero, key), prio)| Clause::Placeh{key, vec: Some(( - prio.try_into().unwrap(), - nonzero - ))}), - name_parser().map(|qualified| Clause::Name { - local: if qualified.len() == 1 {Some(qualified[0].clone())} else {None}, - qualified: to_mrc_slice(qualified) - }), - sexpr_parser(expr.clone()), - lambda_parser(expr.clone()), - auto_parser(expr.clone()), - just(Lexeme::At).ignore_then(expr.clone()).map(|arg| { - Clause::Explicit(Mrc::new(arg)) - }) - ))).then_ignore(enum_parser!(Lexeme::Comment).repeated()); + let clause = clause_parser(expr, intern); clause.clone().then( just(Lexeme::Type) .ignore_then(clause.clone()) .repeated() ) - .map(|(val, typ)| Expr(val, to_mrc_slice(typ))) + .map(|(val, typ)| Expr(val, Rc::new(typ))) }).labelled("Expression") } diff --git a/src/parse/import.rs b/src/parse/import.rs index ef3cd79..5f49a29 100644 --- a/src/parse/import.rs +++ b/src/parse/import.rs @@ -1,34 +1,33 @@ +use std::rc::Rc; + use chumsky::{Parser, prelude::*}; use itertools::Itertools; -use mappable_rc::Mrc; +use lasso::Spur; +use crate::representations::sourcefile::Import; use crate::utils::iter::{box_once, box_flatten, into_boxed_iter, BoxedIterIter}; -use crate::utils::{to_mrc_slice, mrc_derive}; use crate::{enum_parser, box_chain}; use super::lexer::Lexeme; -#[derive(Debug, Clone)] -pub struct Import { - pub path: Mrc<[String]>, - /// If name is None, this is a wildcard import - pub name: Option -} - /// initialize a BoxedIter> with a single element. -fn init_table(name: String) -> BoxedIterIter<'static, String> { +fn init_table(name: Spur) -> BoxedIterIter<'static, Spur> { // I'm not at all confident that this is a good approach. box_once(box_once(name)) } /// Parse an import command -/// Syntax is same as Rust's `use` except the verb is import, no trailing semi -/// and the delimiters are plain parentheses. Namespaces should preferably contain -/// crossplatform filename-legal characters but the symbols are explicitly allowed -/// to go wild. There's a blacklist in [name] -pub fn import_parser() -> impl Parser, Error = Simple> { - // TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad. - recursive(|expr: Recursive, Simple>| { - enum_parser!(Lexeme::Name) +/// Syntax is same as Rust's `use` except the verb is import, no trailing +/// semi and the delimiters are plain parentheses. Namespaces should +/// preferably contain crossplatform filename-legal characters but the +/// symbols are explicitly allowed to go wild. +/// There's a blacklist in [name] +pub fn import_parser<'a, F>(intern: &'a F) +-> impl Parser, Error = Simple> + 'a +where F: Fn(&str) -> Spur + 'a { + let globstar = intern("*"); + // TODO: this algorithm isn't cache friendly and copies a lot + recursive(move |expr:Recursive, Simple>| { + enum_parser!(Lexeme::Name).map(|s| intern(s.as_str())) .separated_by(just(Lexeme::NS)) .then( just(Lexeme::NS) @@ -39,15 +38,17 @@ pub fn import_parser() -> impl Parser, Error = Simple, Option>)| -> BoxedIterIter { + .map(|(name, opt_post): (Vec, Option>)| + -> BoxedIterIter { if let Some(post) = opt_post { Box::new(post.map(move |el| { box_chain!(name.clone().into_iter(), el) @@ -56,14 +57,17 @@ pub fn import_parser() -> impl Parser, Error = Simple Some(Import { path: path_prefix, name: None }), - name => Some(Import { path: path_prefix, name: Some(name.to_owned()) }) - } + let mut path = namespaces.collect_vec(); + let name = path.pop()?; + Some(Import { + path: Rc::new(path), + name: { + if name == globstar { None } + else { Some(name.to_owned()) } + } + }) }).collect() }).labelled("import") } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 63cd587..6a112e8 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -9,12 +9,8 @@ mod import; mod enum_parser; mod parse; -pub use sourcefile::FileEntry; pub use sourcefile::line_parser; -pub use sourcefile::imports; -pub use sourcefile::exported_names; pub use lexer::{lexer, Lexeme, Entry as LexerEntry}; pub use name::is_op; pub use parse::{parse, reparse, ParseError}; -pub use import::Import; pub use number::{float_parser, int_parser}; \ No newline at end of file diff --git a/src/parse/parse.rs b/src/parse/parse.rs index ecc31c1..328ebda 100644 --- a/src/parse/parse.rs +++ b/src/parse/parse.rs @@ -2,11 +2,12 @@ use std::{ops::Range, fmt::Debug}; use chumsky::{prelude::{Simple, end}, Stream, Parser}; use itertools::Itertools; +use lasso::Spur; use thiserror::Error; -use crate::{ast::Rule, parse::{lexer::LexedText, sourcefile::split_lines}}; +use crate::{ast::Rule, parse::{lexer::LexedText, sourcefile::split_lines}, representations::sourcefile::FileEntry}; -use super::{Lexeme, FileEntry, lexer, line_parser, LexerEntry}; +use super::{Lexeme, lexer, line_parser, LexerEntry}; #[derive(Error, Debug, Clone)] @@ -17,14 +18,19 @@ pub enum ParseError { Ast(Vec>) } -pub fn parse<'a, Op>(ops: &[Op], data: &str) -> Result, ParseError> -where Op: 'a + AsRef + Clone { +pub fn parse<'a, Op, F>( + ops: &[Op], data: &str, intern: &F +) -> Result, ParseError> +where + Op: 'a + AsRef + Clone, + F: Fn(&str) -> Spur +{ let lexie = lexer(ops); let token_batchv = split_lines(data).map(|line| { lexie.parse(line).map_err(ParseError::Lex) }).collect::, _>>()?; println!("Lexed:\n{:?}", LexedText(token_batchv.clone())); - let parsr = line_parser().then_ignore(end()); + let parsr = line_parser(intern).then_ignore(end()); let (parsed_lines, errors_per_line) = token_batchv.into_iter().filter(|v| { !v.is_empty() }).map(|v| { @@ -47,10 +53,15 @@ where Op: 'a + AsRef + Clone { else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) } } -pub fn reparse<'a, Op>(ops: &[Op], data: &str, pre: &[FileEntry]) +pub fn reparse<'a, Op, F>( + ops: &[Op], data: &str, pre: &[FileEntry], intern: &F +) -> Result, ParseError> -where Op: 'a + AsRef + Clone { - let result = parse(ops, data)?; +where + Op: 'a + AsRef + Clone, + F: Fn(&str) -> Spur +{ + let result = parse(ops, data, intern)?; Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| { if let FileEntry::Rule(Rule{source, ..}, _) = &mut output { if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor { diff --git a/src/parse/sourcefile.rs b/src/parse/sourcefile.rs index 7715fc4..9a9809b 100644 --- a/src/parse/sourcefile.rs +++ b/src/parse/sourcefile.rs @@ -1,164 +1,64 @@ -use std::collections::HashSet; use std::iter; +use std::rc::Rc; -use crate::{enum_parser, box_chain}; -use crate::ast::{Expr, Clause, Rule}; -use crate::utils::{to_mrc_slice, one_mrc_slice}; -use crate::utils::Stackframe; -use crate::utils::iter::box_empty; +use crate::representations::sourcefile::FileEntry; +use crate::enum_parser; +use crate::ast::{Expr, Rule}; -use super::expression::xpr_parser; -use super::import::{self, Import}; +use super::expression::{xpr_parser, ns_name_parser}; use super::import::import_parser; use super::lexer::Lexeme; use chumsky::{Parser, prelude::*}; +use lasso::Spur; use ordered_float::NotNan; -use lazy_static::lazy_static; -/// Anything we might encounter in a file -#[derive(Debug, Clone)] -pub enum FileEntry { - Import(Vec), - Comment(String), - /// The bool indicates whether the rule is exported - whether tokens uniquely defined inside it - /// should be exported - Rule(Rule, bool), - Export(Vec>) -} - -fn visit_all_names_clause_recur<'a, F>( - clause: &'a Clause, - binds: Stackframe, - cb: &mut F -) where F: FnMut(&'a [String]) { - match clause { - Clause::Auto(name, typ, body) => { - for x in typ.iter() { - visit_all_names_expr_recur(x, binds.clone(), cb) - } - let binds_dup = binds.clone(); - let new_binds = if let Some(n) = name { - binds_dup.push(n.to_owned()) - } else { - binds - }; - for x in body.iter() { - visit_all_names_expr_recur(x, new_binds.clone(), cb) - } - }, - Clause::Lambda(name, typ, body) => { - for x in typ.iter() { - visit_all_names_expr_recur(x, binds.clone(), cb) - } - for x in body.iter() { - visit_all_names_expr_recur(x, binds.push(name.to_owned()), cb) - } - }, - Clause::S(_, body) => for x in body.iter() { - visit_all_names_expr_recur(x, binds.clone(), cb) - }, - Clause::Name{ local: Some(name), qualified } => { - if binds.iter().all(|x| x != name) { - cb(qualified) - } - } - _ => (), - } -} - -/// Recursively iterate through all "names" in an expression. It also finds a lot of things that -/// aren't names, such as all bound parameters. Generally speaking, this is not a very -/// sophisticated search. -/// -/// TODO: find a way to exclude parameters -fn visit_all_names_expr_recur<'a, F>( - expr: &'a Expr, - binds: Stackframe, - cb: &mut F -) where F: FnMut(&'a [String]) { - let Expr(val, typ) = expr; - visit_all_names_clause_recur(val, binds.clone(), cb); - for typ in typ.as_ref() { - visit_all_names_clause_recur(typ, binds.clone(), cb); - } -} - -/// Collect all names that occur in an expression -fn find_all_names(expr: &Expr) -> HashSet<&[String]> { - let mut ret = HashSet::new(); - visit_all_names_expr_recur(expr, Stackframe::new(String::new()), &mut |n| { - if !n.last().unwrap().starts_with('$') { - ret.insert(n); - } - }); - ret -} - -fn rule_parser() -> impl Parser, NotNan, Vec), Error = Simple> { - xpr_parser().repeated() +fn rule_parser<'a, F>(intern: &'a F) -> impl Parser, NotNan, Vec +), Error = Simple> + 'a +where F: Fn(&str) -> Spur + 'a { + xpr_parser(intern).repeated() .then(enum_parser!(Lexeme::Rule)) - .then(xpr_parser().repeated()) - // .map(|((lhs, prio), rhs)| ) + .then(xpr_parser(intern).repeated()) .map(|((a, b), c)| (a, b, c)) .labelled("Rule") } -pub fn line_parser() -> impl Parser> { +pub fn line_parser<'a, F>(intern: &'a F) +-> impl Parser> + 'a +where F: Fn(&str) -> Spur + 'a { choice(( // In case the usercode wants to parse doc enum_parser!(Lexeme >> FileEntry; Comment), just(Lexeme::Import) - .ignore_then(import_parser().map(FileEntry::Import)) + .ignore_then(import_parser(intern).map(FileEntry::Import)) .then_ignore(enum_parser!(Lexeme::Comment).or_not()), just(Lexeme::Export).map_err_with_span(|e, s| { println!("{:?} could not yield an export", s); e }).ignore_then( just(Lexeme::NS).ignore_then( - enum_parser!(Lexeme::Name).map(|n| vec![n]) + ns_name_parser(intern).map(Rc::new) .separated_by(just(Lexeme::name(","))) .delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('('))) ).map(FileEntry::Export) - .or(rule_parser().map(|(source, prio, target)| { + .or(rule_parser(intern).map(|(source, prio, target)| { FileEntry::Rule(Rule { - source: to_mrc_slice(source), + source: Rc::new(source), prio, - target: to_mrc_slice(target) + target: Rc::new(target) }, true) })) ), // This could match almost anything so it has to go last - rule_parser().map(|(source, prio, target)| FileEntry::Rule(Rule{ - source: to_mrc_slice(source), - prio, - target: to_mrc_slice(target) - }, false)), + rule_parser(intern).map(|(source, prio, target)| { + FileEntry::Rule(Rule{ + source: Rc::new(source), + prio, + target: Rc::new(target) + }, false) + }), )) } -/// Collect all exported names (and a lot of other words) from a file -pub fn exported_names(src: &[FileEntry]) -> HashSet<&[String]> { - src.iter().flat_map(|ent| match ent { - FileEntry::Rule(Rule{source, target, ..}, true) => - box_chain!(source.iter(), target.iter()), - _ => box_empty() - }).flat_map(find_all_names).chain( - src.iter().filter_map(|ent| { - if let FileEntry::Export(names) = ent {Some(names.iter())} else {None} - }).flatten().map(Vec::as_slice) - ).collect() -} - -/// Summarize all imports from a file in a single list of qualified names -pub fn imports<'a, 'b, I>( - src: I -) -> impl Iterator + 'a -where I: Iterator + 'a { - src.filter_map(|ent| match ent { - FileEntry::Import(impv) => Some(impv.iter()), - _ => None - }).flatten() -} - pub fn split_lines(data: &str) -> impl Iterator { let mut source = data.char_indices(); let mut last_slice = 0; diff --git a/src/project/loading/ext_loader.rs b/src/project/loading/ext_loader.rs index e658dfc..06bbbbe 100644 --- a/src/project/loading/ext_loader.rs +++ b/src/project/loading/ext_loader.rs @@ -1,7 +1,33 @@ -use crate::parse::FileEntry; +use lasso::Spur; -use super::{Loader, Loaded}; +use crate::representations::sourcefile::FileEntry; -pub fn ext_loader(data: Vec) -> impl Loader { - move |_: &[&str]| Ok(Loaded::External(data.clone())) +use super::{Loader, Loaded, LoadingError}; + +pub fn ext_loader<'a, T, F>( + data: Vec, + mut submods: Vec<(&'static str, T)>, + intern: &'a F +) -> impl Loader + 'a +where + T: Loader + 'a, + F: Fn(&str) -> Spur { + move |path: &[&str]| { + let (step, rest) = match path.split_first() { + None => return Ok(Loaded::AST( + data.iter().cloned().chain( + submods.iter().map(|(s, _)| FileEntry::LazyModule(intern(s))) + ).collect() + )), + Some(t) => t + }; + if let Some((_, l)) = submods.iter_mut().find(|(s, l)| s == step) { + l.load(rest) + } else { + let errtyp = if rest.is_empty() { + LoadingError::UnknownNode + } else {LoadingError::Missing}; + Err(errtyp(step.to_string())) + } + } } \ No newline at end of file diff --git a/src/project/loading/extlib_loader.rs b/src/project/loading/extlib_loader.rs new file mode 100644 index 0000000..453ab7c --- /dev/null +++ b/src/project/loading/extlib_loader.rs @@ -0,0 +1,34 @@ +use std::rc::Rc; + +use lasso::Spur; +use ordered_float::NotNan; + +use crate::representations::Primitive; +use crate::representations::sourcefile::FileEntry; +use crate::foreign::ExternFn; +use crate::ast::{Rule, Clause}; + +use super::{Loader, ext_loader}; + +pub fn extlib_loader<'a, T, F>( + fns: Vec<(&'static str, Box)>, + submods: Vec<(&'static str, T)>, + intern: &'a F +) -> impl Loader + 'a +where + T: Loader + 'a, + F: Fn(&str) -> Spur + 'a +{ + let entries = ( + fns.into_iter().map(|(name, xfn)| FileEntry::Rule(Rule { + source: Rc::new(vec![ + Clause::Name(Rc::new(vec![intern(name)])).into_expr(), + ]), + prio: NotNan::try_from(0.0f64).unwrap(), + target: Rc::new(vec![ + Clause::P(Primitive::ExternFn(xfn)).into_expr(), + ]) + }, true)) + ).collect(); + ext_loader(entries, submods, intern) +} \ No newline at end of file diff --git a/src/project/loading/file_loader.rs b/src/project/loading/file_loader.rs index 3610b46..ff5be5c 100644 --- a/src/project/loading/file_loader.rs +++ b/src/project/loading/file_loader.rs @@ -1,22 +1,34 @@ use std::fs::read_to_string; use std::path::PathBuf; +use lasso::Spur; + +use crate::representations::sourcefile::FileEntry; + use super::{Loaded, Loader, LoadingError}; -pub fn file_loader(proj: PathBuf) -> impl Loader + 'static { +pub fn file_loader<'a, F>( + proj: PathBuf, + intern: &'a F +) -> impl Loader + 'a +where F: Fn(&str) -> Spur + 'a { move |path: &[&str]| { let dirpath = proj.join(path.join("/")); if dirpath.is_dir() || dirpath.is_symlink() { - return Ok(Loaded::Namespace( + return Ok(Loaded::AST( dirpath.read_dir()? .filter_map(|entr| { let ent = entr.ok()?; let typ = ent.file_type().ok()?; let path = ent.path(); if typ.is_dir() || typ.is_symlink() { - Some(ent.file_name().to_string_lossy().into_owned()) + let name = ent.file_name(); + let spur = intern(name.to_string_lossy().as_ref()); + Some(FileEntry::LazyModule(spur)) } else if typ.is_file() && path.extension()? == "orc" { - Some(path.file_stem()?.to_string_lossy().into_owned()) + let name = path.file_stem().expect("extension tested above"); + let spur = intern(name.to_string_lossy().as_ref()); + Some(FileEntry::LazyModule(spur)) } else { None } }) .collect() @@ -24,7 +36,7 @@ pub fn file_loader(proj: PathBuf) -> impl Loader + 'static { } let orcfile = dirpath.with_extension("orc"); if orcfile.is_file() { - read_to_string(orcfile).map(Loaded::Module).map_err(LoadingError::from) + read_to_string(orcfile).map(Loaded::Source).map_err(LoadingError::from) } else { let pathstr = dirpath.to_string_lossy().into_owned(); Err(if dirpath.exists() { LoadingError::UnknownNode(pathstr) } diff --git a/src/project/loading/fnlib_loader.rs b/src/project/loading/fnlib_loader.rs deleted file mode 100644 index ccb5c40..0000000 --- a/src/project/loading/fnlib_loader.rs +++ /dev/null @@ -1,23 +0,0 @@ -use itertools::Itertools; -use ordered_float::NotNan; - -use crate::parse::FileEntry; -use crate::representations::Primitive; -use crate::utils::{one_mrc_slice, mrc_empty_slice}; -use crate::foreign::ExternFn; -use crate::ast::{Rule, Expr, Clause}; - -use super::{Loader, ext_loader}; - -pub fn fnlib_loader(src: Vec<(&'static str, Box)>) -> impl Loader { - let entries = src.into_iter().map(|(name, xfn)| FileEntry::Rule(Rule { - source: one_mrc_slice(Expr(Clause::Name{ - local: Some(name.to_string()), - qualified: one_mrc_slice(name.to_string()) - }, mrc_empty_slice())), - prio: NotNan::try_from(0.0f64).unwrap(), - target: one_mrc_slice(Expr(Clause::P(Primitive::ExternFn(xfn)), mrc_empty_slice())) - }, true)) - .collect_vec(); - ext_loader(entries) -} \ No newline at end of file diff --git a/src/project/loading/map_loader.rs b/src/project/loading/map_loader.rs index 22e9a05..908a577 100644 --- a/src/project/loading/map_loader.rs +++ b/src/project/loading/map_loader.rs @@ -5,7 +5,7 @@ use super::{Loader, LoadingError, Loaded}; pub fn map_loader<'a, T: Loader + 'a>(mut map: HashMap<&'a str, T>) -> impl Loader + 'a { move |path: &[&str]| { let (key, subpath) = if let Some(sf) = path.split_first() {sf} - else {return Ok(Loaded::Module(map.keys().cloned().collect()))}; + else {return Ok(Loaded::Source(map.keys().cloned().collect()))}; let sub = if let Some(sub) = map.get_mut(key.to_string().as_str()) {sub} else {return Err( if subpath.len() == 0 {LoadingError::UnknownNode(path.join("::"))} diff --git a/src/project/loading/mod.rs b/src/project/loading/mod.rs index 8b49960..bd69f13 100644 --- a/src/project/loading/mod.rs +++ b/src/project/loading/mod.rs @@ -2,21 +2,19 @@ mod file_loader; mod ext_loader; mod string_loader; mod map_loader; -mod fnlib_loader; -mod overlay_loader; +mod extlib_loader; mod prefix_loader; pub use file_loader::file_loader; pub use ext_loader::ext_loader; -pub use fnlib_loader::fnlib_loader; +pub use extlib_loader::extlib_loader; pub use string_loader::string_loader; pub use map_loader::map_loader; -pub use overlay_loader::overlay_loader; pub use prefix_loader::prefix_loader; use std::{rc::Rc, io}; -use crate::parse::FileEntry; +use crate::representations::sourcefile::FileEntry; #[derive(Clone, Debug)] pub enum LoadingError { @@ -34,11 +32,10 @@ impl From for LoadingError { } } -#[derive(Debug, Clone)] +#[derive(Clone)] pub enum Loaded { - Module(String), - Namespace(Vec), - External(Vec) + Source(String), + AST(Vec) } pub trait Loader { diff --git a/src/project/loading/overlay_loader.rs b/src/project/loading/overlay_loader.rs deleted file mode 100644 index c4d0458..0000000 --- a/src/project/loading/overlay_loader.rs +++ /dev/null @@ -1,19 +0,0 @@ -use super::{Loader, LoadingError}; - -pub fn overlay_loader(mut base: impl Loader, mut overlay: impl Loader) -> impl Loader { - move |path: &[&str]| match overlay.load(path) { - ok@Ok(_) => ok, - e@Err(LoadingError::IOErr(_)) => e, - Err(_) => base.load(path) - } -} - -#[macro_export] -macro_rules! overlay_loader { - ($left:expr, $right:expr) => { - overlay_loader($left, $right) - }; - ($left:expr, $mid:expr, $($rest:expr),+) => { - overlay_loader($left, overlay_loader!($mid, $($rest),+)) - }; -} \ No newline at end of file diff --git a/src/project/loading/string_loader.rs b/src/project/loading/string_loader.rs index 423efeb..ad7e8c2 100644 --- a/src/project/loading/string_loader.rs +++ b/src/project/loading/string_loader.rs @@ -1,5 +1,5 @@ use super::{Loader, Loaded}; pub fn string_loader<'a>(data: &'a str) -> impl Loader + 'a { - move |_: &[&str]| Ok(Loaded::Module(data.to_string())) + move |_: &[&str]| Ok(Loaded::Source(data.to_string())) } \ No newline at end of file diff --git a/src/project/mod.rs b/src/project/mod.rs index 4411bc4..96eeb89 100644 --- a/src/project/mod.rs +++ b/src/project/mod.rs @@ -8,7 +8,7 @@ pub use module_error::ModuleError; pub use rule_collector::rule_collector; pub use loading::{ Loader, Loaded, LoadingError, - ext_loader, file_loader, string_loader, map_loader, fnlib_loader, - overlay_loader, prefix_loader + ext_loader, file_loader, string_loader, map_loader, extlib_loader, + prefix_loader }; use crate::ast::Rule; \ No newline at end of file diff --git a/src/project/name_resolver.rs b/src/project/name_resolver.rs index 543a0b1..0b4a458 100644 --- a/src/project/name_resolver.rs +++ b/src/project/name_resolver.rs @@ -1,73 +1,80 @@ use std::collections::HashMap; -use mappable_rc::Mrc; +use std::rc::Rc; +use itertools::Itertools; +use lasso::Spur; use thiserror::Error; -use crate::utils::{Stackframe, to_mrc_slice}; +use crate::utils::Stackframe; use crate::ast::{Expr, Clause}; -type ImportMap = HashMap>; +type ImportMap = HashMap>>; #[derive(Debug, Clone, Error)] pub enum ResolutionError { #[error("Reference cycle at {0:?}")] - Cycle(Vec>), + Cycle(Vec>>), #[error("No module provides {0:?}")] - NoModule(Mrc<[String]>), + NoModule(Rc>), #[error(transparent)] Delegate(#[from] Err) } -type ResolutionResult = Result, ResolutionError>; +type ResolutionResult = Result>, ResolutionError>; -/// Recursively resolves symbols to their original names in expressions while caching every -/// resolution. This makes the resolution process lightning fast and invalidation completely -/// impossible since the intermediate steps of a resolution aren't stored. +/// Recursively resolves symbols to their original names in expressions +/// while caching every resolution. This makes the resolution process +/// lightning fast and invalidation completely impossible since +/// the intermediate steps of a resolution aren't stored. pub struct NameResolver { - cache: HashMap, ResolutionResult>, - get_modname: FSplit, + cache: HashMap>, ResolutionResult>, + split: FSplit, get_imports: FImps } impl NameResolver where - FSplit: FnMut(Mrc<[String]>) -> Option>, - FImps: FnMut(Mrc<[String]>) -> Result, + FSplit: FnMut(Rc>) -> Option<(Rc>, Rc>)>, + FImps: FnMut(Rc>) -> Result, E: Clone { - pub fn new(get_modname: FSplit, get_imports: FImps) -> Self { + pub fn new(split: FSplit, get_imports: FImps) -> Self { Self { cache: HashMap::new(), - get_modname, + split, get_imports } } + fn split(&self, symbol: Rc>) + -> Result<(Rc>, Rc>), ResolutionError> { + let (path, name) = (self.split)(symbol.clone()) + .ok_or_else(|| ResolutionError::NoModule(symbol.clone()))?; + if name.is_empty() { + panic!("get_modname matched all to module and nothing to name") + } + Ok((path, name)) + } + /// Obtains a symbol's originnal name /// Uses a substack to detect loops fn find_origin_rec( &mut self, - symbol: Mrc<[String]>, - import_path: Stackframe> - ) -> Result, ResolutionError> { + symbol: Rc>, + import_path: Stackframe>> + ) -> Result>, ResolutionError> { if let Some(cached) = self.cache.get(&symbol) { - return cached.as_ref().map_err(|e| e.clone()).map(Mrc::clone) + return cached.clone() } // The imports and path of the referenced file and the local name - let path = (self.get_modname)(Mrc::clone(&symbol)).ok_or_else(|| { - ResolutionError::NoModule(Mrc::clone(&symbol)) - })?; - let name = &symbol[path.len()..]; - if name.is_empty() { - panic!("get_modname matched all to module and nothing to name in {:?}", import_path) - } - let imports = (self.get_imports)(Mrc::clone(&path))?; + let (path, name) = self.split(symbol)?; + let imports = (self.get_imports)(path.clone())?; let result = if let Some(source) = imports.get(&name[0]) { - let new_sym: Vec = source.iter().chain(name.iter()).cloned().collect(); + let new_sym = source.iter().chain(name.iter()).cloned().collect_vec(); if import_path.iter().any(|el| el.as_ref() == new_sym.as_slice()) { - Err(ResolutionError::Cycle(import_path.iter().map(Mrc::clone).collect())) + Err(ResolutionError::Cycle(import_path.iter().cloned().collect())) } else { - self.find_origin_rec(to_mrc_slice(new_sym), import_path.push(Mrc::clone(&symbol))) + self.find_origin_rec(Rc::new(new_sym), import_path.push(symbol.clone())) } } else { Ok(symbol.clone()) // If not imported, it must be locally defined @@ -81,30 +88,27 @@ where } fn process_exprmrcopt_rec(&mut self, - exbo: &Option> - ) -> Result>, ResolutionError> { - exbo.iter().map(|exb| Ok(Mrc::new(self.process_expression_rec(exb.as_ref())?))) + exbo: &Option> + ) -> Result>, ResolutionError> { + exbo.iter().map(|exb| Ok(Rc::new(self.process_expression_rec(exb)?))) .next().transpose() } fn process_clause_rec(&mut self, tok: &Clause) -> Result> { Ok(match tok { - Clause::S(c, exv) => Clause::S(*c, to_mrc_slice( - exv.as_ref().iter().map(|e| self.process_expression_rec(e)) - .collect::, ResolutionError>>()? + Clause::S(c, exv) => Clause::S(*c, Rc::new( + exv.iter().map(|e| self.process_expression_rec(e)) + .collect::>()? )), Clause::Lambda(name, typ, body) => Clause::Lambda(name.clone(), - to_mrc_slice(self.process_exprv_rec(typ.as_ref())?), - to_mrc_slice(self.process_exprv_rec(body.as_ref())?) + Rc::new(self.process_exprv_rec(&typ)?), + Rc::new(self.process_exprv_rec(&body)?) ), Clause::Auto(name, typ, body) => Clause::Auto(name.clone(), - to_mrc_slice(self.process_exprv_rec(typ.as_ref())?), - to_mrc_slice(self.process_exprv_rec(body.as_ref())?) + Rc::new(self.process_exprv_rec(&typ)?), + Rc::new(self.process_exprv_rec(&body)?) ), - Clause::Name{local, qualified} => Clause::Name{ - local: local.clone(), - qualified: self.find_origin(Mrc::clone(qualified))? - }, + Clause::Name(name) => Clause::Name(self.find_origin(name.clone())?), x => x.clone() }) } @@ -112,12 +116,14 @@ where fn process_expression_rec(&mut self, Expr(token, typ): &Expr) -> Result> { Ok(Expr( self.process_clause_rec(token)?, - typ.iter().map(|t| self.process_clause_rec(t)).collect::>()? + Rc::new(typ.iter().map(|t| { + self.process_clause_rec(t) + }).collect::>()?) )) } - pub fn find_origin(&mut self, symbol: Mrc<[String]>) -> Result, ResolutionError> { - self.find_origin_rec(Mrc::clone(&symbol), Stackframe::new(symbol)) + pub fn find_origin(&mut self, symbol: Rc>) -> Result>, ResolutionError> { + self.find_origin_rec(symbol.clone(), Stackframe::new(symbol)) } #[allow(dead_code)] diff --git a/src/project/prefix.rs b/src/project/prefix.rs index d22bb4f..a53c149 100644 --- a/src/project/prefix.rs +++ b/src/project/prefix.rs @@ -1,6 +1,8 @@ -use mappable_rc::Mrc; +use std::rc::Rc; -use crate::{ast::{Expr, Clause}, utils::{collect_to_mrc, to_mrc_slice}}; +use lasso::Spur; + +use crate::ast::{Expr, Clause}; /// Replaces the first element of a name with the matching prefix from a prefix map @@ -8,34 +10,33 @@ use crate::{ast::{Expr, Clause}, utils::{collect_to_mrc, to_mrc_slice}}; /// Called by [#prefix] which handles Typed. fn prefix_clause( expr: &Clause, - namespace: Mrc<[String]> + namespace: &[Spur] ) -> Clause { match expr { - Clause::S(c, v) => Clause::S(*c, - collect_to_mrc(v.iter().map(|e| prefix_expr(e, Mrc::clone(&namespace)))) - ), + Clause::S(c, v) => Clause::S(*c, Rc::new(v.iter().map(|e| { + prefix_expr(e, namespace) + }).collect())), Clause::Auto(name, typ, body) => Clause::Auto( name.clone(), - collect_to_mrc(typ.iter().map(|e| prefix_expr(e, Mrc::clone(&namespace)))), - collect_to_mrc(body.iter().map(|e| prefix_expr(e, Mrc::clone(&namespace)))), + Rc::new(typ.iter().map(|e| prefix_expr(e, namespace)).collect()), + Rc::new(body.iter().map(|e| prefix_expr(e, namespace)).collect()), ), Clause::Lambda(name, typ, body) => Clause::Lambda( name.clone(), - collect_to_mrc(typ.iter().map(|e| prefix_expr(e, Mrc::clone(&namespace)))), - collect_to_mrc(body.iter().map(|e| prefix_expr(e, Mrc::clone(&namespace)))), + Rc::new(typ.iter().map(|e| prefix_expr(e, namespace)).collect()), + Rc::new(body.iter().map(|e| prefix_expr(e, namespace)).collect()), + ), + Clause::Name(name) => Clause::Name( + Rc::new(namespace.iter().chain(name.iter()).cloned().collect()) ), - Clause::Name{local, qualified} => Clause::Name{ - local: local.clone(), - qualified: collect_to_mrc(namespace.iter().chain(qualified.iter()).cloned()) - }, x => x.clone() } } /// Produce an Expr object for any value of Expr -pub fn prefix_expr(Expr(clause, typ): &Expr, namespace: Mrc<[String]>) -> Expr { +pub fn prefix_expr(Expr(clause, typ): &Expr, namespace: &[Spur]) -> Expr { Expr( - prefix_clause(clause, Mrc::clone(&namespace)), - to_mrc_slice(typ.iter().map(|e| prefix_clause(e, Mrc::clone(&namespace))).collect()) + prefix_clause(clause, namespace), + Rc::new(typ.iter().map(|e| prefix_clause(e, namespace)).collect()) ) } diff --git a/src/project/rule_collector.rs b/src/project/rule_collector.rs index 18957b6..e0afbb0 100644 --- a/src/project/rule_collector.rs +++ b/src/project/rule_collector.rs @@ -1,85 +1,92 @@ use std::cell::RefCell; use std::collections::{HashMap, HashSet, VecDeque}; -use std::fmt::Debug; use std::rc::Rc; use itertools::Itertools; -use mappable_rc::Mrc; +use lasso::Spur; use crate::ast::Rule; -use crate::parse::{self, FileEntry}; -use crate::utils::{Cache, mrc_derive, to_mrc_slice, one_mrc_slice}; +use crate::parse; +use crate::representations::sourcefile::{FileEntry, exported_names, imports}; +use crate::utils::Cache; use super::name_resolver::NameResolver; use super::module_error::ModuleError; use super::prefix::prefix_expr; use super::loading::{Loaded, Loader, LoadingError}; -use crate::parse::Import; type ParseResult = Result>; -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct Module { pub rules: Vec, - pub exports: Vec, - pub references: HashSet> + pub exports: Vec, + pub references: HashSet>> } pub type RuleCollectionResult = Result, ModuleError>; -pub fn rule_collector( +pub fn rule_collector<'a, F: 'a, G: 'a, H: 'a>( + intern: &'a G, deintern: &'a H, load_mod: F -) -> Cache<'static, Mrc<[String]>, RuleCollectionResult> -where F: Loader +) -> Cache<'static, Rc>, RuleCollectionResult> +where F: Loader, G: Fn(&str) -> Spur, H: Fn(Spur) -> &'a str { let load_mod_rc = RefCell::new(load_mod); // Map paths to a namespace with name list (folder) or module with source text (file) - let loaded = Rc::new(Cache::new(move |path: Mrc<[String]>, _| -> ParseResult { - load_mod_rc.borrow_mut().load(&path.iter().map(|s| s.as_str()).collect_vec()).map_err(ModuleError::Load) - })); + let loaded = Cache::rc(move |path: Rc>, _| -> ParseResult> { + let load_mod = load_mod_rc.borrow_mut(); + let spath = path.iter().cloned().map(deintern).collect_vec(); + load_mod.load(&spath).map(Rc::new).map_err(ModuleError::Load) + }); // Map names to the longest prefix that points to a valid module // At least one segment must be in the prefix, and the prefix must not be the whole name - let modname = Rc::new(Cache::new({ + let modname = Cache::rc({ let loaded = loaded.clone(); - move |symbol: Mrc<[String]>, _| -> Result, Vec>> { + move |symbol: Rc>, _| -> Result>, Rc>>> { let mut errv: Vec> = Vec::new(); let reg_err = |e, errv: &mut Vec>| { errv.push(e); - if symbol.len() == errv.len() { Err(errv.clone()) } + if symbol.len() == errv.len() { Err(Rc::new(errv.clone())) } else { Ok(()) } }; loop { - let path = mrc_derive(&symbol, |s| &s[..s.len() - errv.len() - 1]); - match loaded.try_find(&path) { + // TODO: this should not live on the heap + let path = Rc::new(symbol.iter() + .take(symbol.len() - errv.len() - 1) + .cloned() + .collect_vec()); + match loaded.find(&path).as_ref() { Ok(imports) => match imports.as_ref() { - Loaded::Module(_) | Loaded::External(_) => break Ok(path), - Loaded::Namespace(_) => reg_err(ModuleError::None, &mut errv)? + Loaded::Source(_) | Loaded::AST(_) => break Ok(path), }, - Err(err) => reg_err(err, &mut errv)? + Err(err) => reg_err(err.clone(), &mut errv)? } } } - })); + }); // Preliminarily parse a file, substitution rules and imports are valid - let prelude_path = one_mrc_slice("prelude".to_string()); let preparsed = Rc::new(Cache::new({ + // let prelude_path = vec!["prelude".to_string()]; + // let interned_prelude_path = Rc::new( + // prelude_path.iter() + // .map(|s| intern(s.as_str())) + // .collect_vec() + // ); let loaded = loaded.clone(); - move |path: Mrc<[String]>, _| -> ParseResult> { - let loaded = loaded.try_find(&path)?; + move |path: Rc>, _| -> ParseResult> { + let loaded = loaded.find(&path)?; match loaded.as_ref() { - Loaded::Module(source) => { - let mut entv = parse::parse(&[] as &[&str], source.as_str())?; - if !entv.iter().any(|ent| if let FileEntry::Import(imps) = ent { - imps.iter().any(|imp| imp.path.starts_with(&prelude_path)) - } else {false}) && path != prelude_path { - entv.push(FileEntry::Import(vec![Import{ - name: None, path: Mrc::clone(&prelude_path) - }])) - } + Loaded::Source(source) => { + let mut entv = parse::parse(&[] as &[&str], source.as_str(), intern)?; + // if path != interned_prelude_path { + // entv.push(FileEntry::Import(vec![Import{ + // name: None, path: prelude_path + // }])) + // } Ok(entv) } - Loaded::External(ast) => Ok(ast.clone()), - Loaded::Namespace(_) => Err(ModuleError::None), + Loaded::AST(ast) => Ok(ast.clone()), } } })); @@ -87,13 +94,10 @@ where F: Loader let exports = Rc::new(Cache::new({ let loaded = loaded.clone(); let preparsed = preparsed.clone(); - move |path: Mrc<[String]>, _| -> ParseResult> { - let loaded = loaded.try_find(&path)?; - if let Loaded::Namespace(names) = loaded.as_ref() { - return Ok(names.clone()); - } - let preparsed = preparsed.try_find(&path)?; - Ok(parse::exported_names(&preparsed) + move |path: Rc>, _| -> ParseResult> { + let loaded = loaded.find(&path)?; + let preparsed = preparsed.find(&path)?; + Ok(exported_names(&preparsed) .into_iter() .map(|n| n[0].clone()) .collect()) @@ -103,24 +107,26 @@ where F: Loader let imports = Rc::new(Cache::new({ let preparsed = preparsed.clone(); let exports = exports.clone(); - move |path: Mrc<[String]>, _| -> ParseResult>> { - let entv = preparsed.try_find(&path)?; - let import_entries = parse::imports(entv.iter()); - let mut imported_symbols: HashMap> = HashMap::new(); + move |path: Rc>, _| -> ParseResult>>>> { + let entv = preparsed.find(&path)?; + let import_entries = imports(entv.iter()); + let mut imported_symbols = HashMap::>>::new(); for imp in import_entries { - let export = exports.try_find(&imp.path)?; + let export_list = exports.find(&path)?; if let Some(ref name) = imp.name { - if export.contains(name) { - imported_symbols.insert(name.clone(), Mrc::clone(&imp.path)); - } else {panic!("{:?} doesn't export {}", imp.path, name)} + if export_list.contains(name) { + imported_symbols.insert(name.clone(), imp.path.clone()); + } else { + panic!("{:?} doesn't export {}", imp.path, deintern(*name)) + } } else { - for exp in export.as_ref() { - imported_symbols.insert(exp.clone(), Mrc::clone(&imp.path)); + for exp in export_list { + imported_symbols.insert(exp, imp.path.clone()); } } } - println!("Imports for {:?} are {:?}", path.as_ref(), imported_symbols); - Ok(imported_symbols) + // println!("Imports for {:?} are {:?}", path.as_ref(), imported_symbols); + Ok(Rc::new(imported_symbols)) } })); // Final parse, operators are correctly separated @@ -128,69 +134,73 @@ where F: Loader let preparsed = preparsed.clone(); let imports = imports.clone(); let loaded = loaded.clone(); - move |path: Mrc<[String]>, _| -> ParseResult> { + move |path: Rc>, _| -> ParseResult> { let imported_ops: Vec = - imports.try_find(&path)? - .keys() - .filter(|s| parse::is_op(s)) - .cloned() - .collect(); - // let parser = file_parser(&prelude, &imported_ops); - let pre = preparsed.try_find(&path)?; - match loaded.try_find(&path)?.as_ref() { - Loaded::Module(source) => Ok(parse::reparse(&imported_ops, source.as_str(), &pre)?), - Loaded::External(ast) => Ok(ast.clone()), - Loaded::Namespace(_) => Err(ModuleError::None) + imports.find(&path)? + .keys() + .map(|s| deintern(*s).to_string()) + .filter(|s| parse::is_op(s)) + .collect(); + let pre = preparsed.find(&path)?; + match loaded.find(&path)?.as_ref() { + Loaded::Source(source) => Ok(parse::reparse( + &imported_ops, source.as_str(), &pre, intern + )?), + Loaded::AST(ast) => Ok(ast.clone()), } } })); - let name_resolver_rc = RefCell::new(NameResolver::new({ + let name_resolver = NameResolver::new({ let modname = modname.clone(); move |path| { - Some(modname.try_find(&path).ok()?.as_ref().clone()) + let modname = modname.find(&path).ok()?; + let symname = Rc::new(path[modname.len()..].to_vec()); + Some((modname, symname)) } }, { let imports = imports.clone(); move |path| { - imports.try_find(&path).map(|f| f.as_ref().clone()) + imports.find(&path).map(|f| f.as_ref().clone()) } - })); + }); // Turn parsed files into a bag of rules and a list of toplevel export names let resolved = Rc::new(Cache::new({ let parsed = parsed.clone(); let exports = exports.clone(); let imports = imports.clone(); - move |path: Mrc<[String]>, _| -> ParseResult { - let mut name_resolver = name_resolver_rc.borrow_mut(); + move |path: Rc>, _| -> ParseResult { let module = Module { - rules: parsed.try_find(&path)? + rules: parsed.find(&path)? .iter() .filter_map(|ent| { if let FileEntry::Rule(Rule{source, prio, target}, _) = ent { Some(Rule { - source: source.iter() - .map(|ex| { - prefix_expr(ex, Mrc::clone(&path)) - }).collect(), - target: target.iter().map(|ex| { - prefix_expr(ex, Mrc::clone(&path)) - }).collect(), + source: Rc::new( + source.iter() + .map(|ex| prefix_expr(ex, &path)) + .collect_vec() + ), + target: Rc::new( + target.iter() + .map(|ex| prefix_expr(ex, &path)) + .collect_vec() + ), prio: *prio, }) } else { None } }) .map(|Rule{ source, target, prio }| Ok(super::Rule { - source: to_mrc_slice(source.iter() + source: Rc::new(source.iter() .map(|ex| name_resolver.process_expression(ex)) .collect::, _>>()?), - target: to_mrc_slice(target.iter() + target: Rc::new(target.iter() .map(|ex| name_resolver.process_expression(ex)) .collect::, _>>()?), prio })) .collect::>>()?, - exports: exports.try_find(&path)?.as_ref().clone(), - references: imports.try_find(&path)? + exports: exports.find(&path)?.clone(), + references: imports.find(&path)? .values().cloned().collect() }; Ok(module) @@ -198,14 +208,14 @@ where F: Loader })); Cache::new({ let resolved = resolved.clone(); - move |path: Mrc<[String]>, _| -> ParseResult> { + move |path: Rc>, _| -> ParseResult> { // Breadth-first search - let mut processed: HashSet> = HashSet::new(); + let mut processed: HashSet>> = HashSet::new(); let mut rules: Vec = Vec::new(); - let mut pending: VecDeque> = VecDeque::new(); + let mut pending: VecDeque>> = VecDeque::new(); pending.push_back(path); while let Some(el) = pending.pop_front() { - let resolved = resolved.try_find(&el)?; + let resolved = resolved.find(&el)?; processed.insert(el.clone()); pending.extend( resolved.references.iter() diff --git a/src/representations/ast.rs b/src/representations/ast.rs index a4b192b..91d5d82 100644 --- a/src/representations/ast.rs +++ b/src/representations/ast.rs @@ -1,133 +1,193 @@ -use mappable_rc::Mrc; +use lasso::RodeoResolver; +use lasso::Spur; use itertools::Itertools; use ordered_float::NotNan; -use std::{hash::Hash, intrinsics::likely}; -use std::fmt::Debug; -use crate::utils::mrc_empty_slice; -use crate::utils::one_mrc_slice; +use std::hash::Hash; +use std::rc::Rc; +use crate::utils::InternedDisplay; +use crate::utils::Stackframe; use super::primitive::Primitive; /// An S-expression with a type #[derive(PartialEq, Eq, Hash)] -pub struct Expr(pub Clause, pub Mrc<[Clause]>); +pub struct Expr(pub Clause, pub Rc>); impl Expr { pub fn into_clause(self) -> Clause { - if likely(self.1.len() == 0) { self.0 } - else { Clause::S('(', one_mrc_slice(self)) } + if self.1.len() == 0 { self.0 } + else { Clause::S('(', Rc::new(vec![self])) } + } + + pub fn visit_names(&self, + binds: Stackframe>>, + cb: &mut F + ) where F: FnMut(Rc>) { + let Expr(val, typ) = self; + val.visit_names(binds.clone(), cb); + for typ in typ.as_ref() { + typ.visit_names(binds.clone(), cb); + } } } impl Clone for Expr { fn clone(&self) -> Self { - Self(self.0.clone(), Mrc::clone(&self.1)) + Self(self.0.clone(), self.1.clone()) } } -impl Debug for Expr { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl InternedDisplay for Expr { + fn fmt(&self, + f: &mut std::fmt::Formatter<'_>, + rr: RodeoResolver + ) -> std::fmt::Result { let Expr(val, typ) = self; - write!(f, "{:?}", val)?; + val.fmt(f, rr)?; for typ in typ.as_ref() { - write!(f, ":{:?}", typ)? + write!(f, ":")?; + typ.fmt(f, rr)?; } Ok(()) } } /// An S-expression as read from a source file -#[derive(PartialEq, Eq, Hash)] +#[derive(PartialEq, Eq, Hash, Clone)] pub enum Clause { P(Primitive), /// A c-style name or an operator, eg. `+`, `i`, `foo::bar` - Name{ - local: Option, - qualified: Mrc<[String]> - }, - /// A parenthesized expression, eg. `(print out "hello")`, `[1, 2, 3]`, `{Some(t) => t}` - S(char, Mrc<[Expr]>), - /// An explicit expression associated with the leftmost, outermost [Clause::Auto], eg. `read @Uint` - Explicit(Mrc), + Name(Rc>), + /// A parenthesized exmrc_empty_slice()pression + /// eg. `(print out "hello")`, `[1, 2, 3]`, `{Some(t) => t}` + S(char, Rc>), + /// An explicit expression associated with the leftmost, outermost + /// [Clause::Auto], eg. `read @Uint` + Explicit(Rc), /// A function expression, eg. `\x. x + 1` - Lambda(String, Mrc<[Expr]>, Mrc<[Expr]>), + Lambda(Rc, Rc>, Rc>), /// A parameterized expression with type inference, eg. `@T. T -> T` - Auto(Option, Mrc<[Expr]>, Mrc<[Expr]>), + Auto(Option>, Rc>, Rc>), /// A placeholder for macros, eg. `$name`, `...$body`, `...$lhs:1` Placeh{ key: String, /// None => matches one token /// Some((prio, nonzero)) => - /// prio is the sizing priority for the vectorial (higher prio grows first) + /// prio is the sizing priority for the vectorial + /// (higher prio grows first) /// nonzero is whether the vectorial matches 1..n or 0..n tokens vec: Option<(usize, bool)> }, } + impl Clause { - pub fn body(&self) -> Option> { + pub fn body(&self) -> Option>> { match self { Self::Auto(_, _, body) | Self::Lambda(_, _, body) | - Self::S(_, body) => Some(Mrc::clone(body)), + Self::S(_, body) => Some(body.clone()), _ => None } } - pub fn typ(&self) -> Option> { + pub fn typ(&self) -> Option>> { match self { - Self::Auto(_, typ, _) | Self::Lambda(_, typ, _) => Some(Mrc::clone(typ)), + Self::Auto(_, typ, _) | Self::Lambda(_, typ, _) => Some(typ.clone()), _ => None } } pub fn into_expr(self) -> Expr { if let Self::S('(', body) = &self { if body.len() == 1 { body[0].clone() } - else { Expr(self, mrc_empty_slice()) } - } else { Expr(self, mrc_empty_slice()) } + else { Expr(self, Rc::default()) } + } else { Expr(self, Rc::default()) } } - pub fn from_exprv(exprv: Mrc<[Expr]>) -> Option { + pub fn from_exprv(exprv: &[Expr]) -> Option { if exprv.len() == 0 { None } else if exprv.len() == 1 { Some(exprv[0].clone().into_clause()) } - else { Some(Self::S('(', exprv)) } + else { Some(Self::S('(', Rc::new(exprv.to_vec()))) } } -} -impl Clone for Clause { - fn clone(&self) -> Self { + /// Recursively iterate through all "names" in an expression. + /// It also finds a lot of things that aren't names, such as all + /// bound parameters. Generally speaking, this is not a very + /// sophisticated search. + pub fn visit_names(&self, + binds: Stackframe>>, + cb: &mut F + ) where F: FnMut(Rc>) { match self { - Self::S(c, b) => Self::S(*c, Mrc::clone(b)), - Self::Auto(n, t, b) => Self::Auto( - n.clone(), Mrc::clone(t), Mrc::clone(b) - ), - Self::Name { local: l, qualified: q } => Self::Name { - local: l.clone(), qualified: Mrc::clone(q) + Clause::Auto(name, typ, body) => { + for x in typ.iter() { + x.visit_names(binds.clone(), cb) + } + let binds_dup = binds.clone(); + let new_binds = if let Some(rc) = name { + if let Clause::Name(name) = rc.as_ref() { + binds_dup.push(name.clone()) + } else { binds } + } else { binds }; + for x in body.iter() { + x.visit_names(new_binds.clone(), cb) + } }, - Self::Lambda(n, t, b) => Self::Lambda( - n.clone(), Mrc::clone(t), Mrc::clone(b) - ), - Self::Placeh{key, vec} => Self::Placeh{key: key.clone(), vec: *vec}, - Self::P(p) => Self::P(p.clone()), - Self::Explicit(expr) => Self::Explicit(Mrc::clone(expr)) + Clause::Lambda(name, typ, body) => { + for x in typ.iter() { + x.visit_names(binds.clone(), cb) + } + for x in body.iter() { + let new_binds = if let Clause::Name(name) = name.as_ref() { + binds.push(name.clone()) + } else { binds }; + x.visit_names(new_binds, cb) + } + }, + Clause::S(_, body) => for x in body.iter() { + x.visit_names(binds.clone(), cb) + }, + Clause::Name(name) => { + if binds.iter().all(|x| x != name) { + cb(name.clone()) + } + } + _ => (), } } } -fn fmt_expr_seq(it: &mut dyn Iterator, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +fn fmt_expr_seq( + it: &mut dyn Iterator, + f: &mut std::fmt::Formatter<'_>, + rr: RodeoResolver +) -> std::fmt::Result { for item in Itertools::intersperse(it.map(Some), None) { match item { - Some(expr) => write!(f, "{:?}", expr), + Some(expr) => expr.fmt(f, rr), None => f.write_str(" "), }? } Ok(()) } -impl Debug for Clause { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +pub fn fmt_name( + name: &Rc>, f: &mut std::fmt::Formatter, rr: RodeoResolver +) -> std::fmt::Result { + for el in itertools::intersperse( + name.iter().map(|s| rr.resolve(s)), + "::" + ) { + write!(f, "{}", el)? + } + Ok(()) +} + +impl InternedDisplay for Clause { + fn fmt(&self, + f: &mut std::fmt::Formatter<'_>, + rr: RodeoResolver + ) -> std::fmt::Result { match self { Self::P(p) => write!(f, "{:?}", p), - Self::Name{local, qualified} => - if let Some(local) = local {write!(f, "{}`{}`", qualified.join("::"), local)} - else {write!(f, "{}", qualified.join("::"))}, + Self::Name(name) => fmt_name(name, f, rr), Self::S(del, items) => { f.write_str(&del.to_string())?; - fmt_expr_seq(&mut items.iter(), f)?; + fmt_expr_seq(&mut items.iter(), f, rr)?; f.write_str(match del { '(' => ")", '[' => "]", '{' => "}", _ => "CLOSING_DELIM" @@ -135,44 +195,49 @@ impl Debug for Clause { }, Self::Lambda(name, argtyp, body) => { f.write_str("\\")?; - f.write_str(name)?; - f.write_str(":")?; fmt_expr_seq(&mut argtyp.iter(), f)?; f.write_str(".")?; - fmt_expr_seq(&mut body.iter(), f) + name.fmt(f, rr)?; + f.write_str(":")?; + fmt_expr_seq(&mut argtyp.iter(), f, rr)?; + f.write_str(".")?; + fmt_expr_seq(&mut body.iter(), f, rr) }, - Self::Auto(name, argtyp, body) => { + Self::Auto(name_opt, argtyp, body) => { f.write_str("@")?; - f.write_str(&name.clone().unwrap_or_default())?; - f.write_str(":")?; fmt_expr_seq(&mut argtyp.iter(), f)?; f.write_str(".")?; - fmt_expr_seq(&mut body.iter(), f) + if let Some(name) = name_opt { name.fmt(f, rr)? } + f.write_str(":")?; + fmt_expr_seq(&mut argtyp.iter(), f, rr)?; + f.write_str(".")?; + fmt_expr_seq(&mut body.iter(), f, rr) }, Self::Placeh{key, vec: None} => write!(f, "${key}"), - Self::Placeh{key, vec: Some((prio, true))} => write!(f, "...${key}:{prio}"), - Self::Placeh{key, vec: Some((prio, false))} => write!(f, "..${key}:{prio}"), - Self::Explicit(expr) => write!(f, "@{:?}", expr.as_ref()) + Self::Placeh{key, vec: Some((prio, true))} => + write!(f, "...${key}:{prio}"), + Self::Placeh{key, vec: Some((prio, false))} => + write!(f, "..${key}:{prio}"), + Self::Explicit(expr) => { + write!(f, "@")?; + expr.fmt(f, rr) + } } } } /// A substitution rule as read from the source -#[derive(PartialEq, Eq, Hash)] +#[derive(Clone, PartialEq, Eq, Hash)] pub struct Rule { - pub source: Mrc<[Expr]>, + pub source: Rc>, pub prio: NotNan, - pub target: Mrc<[Expr]> + pub target: Rc> } -impl Clone for Rule { - fn clone(&self) -> Self { - Self { - source: Mrc::clone(&self.source), - prio: self.prio, - target: Mrc::clone(&self.target) - } - } -} - -impl Debug for Rule { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?} ={}=> {:?}", self.source, self.prio, self.target) +impl InternedDisplay for Rule { + fn fmt(&self, + f: &mut std::fmt::Formatter<'_>, + rr: RodeoResolver + ) -> std::fmt::Result { + for e in self.source.iter() { e.fmt(f, rr)?; write!(f, " ")?; } + write!(f, "={}=>", self.prio)?; + for e in self.target.iter() { write!(f, " ")?; e.fmt(f, rr)?; } + Ok(()) } } \ No newline at end of file diff --git a/src/representations/ast_to_postmacro.rs b/src/representations/ast_to_postmacro.rs index 2b64161..8e05ae2 100644 --- a/src/representations/ast_to_postmacro.rs +++ b/src/representations/ast_to_postmacro.rs @@ -1,5 +1,7 @@ use std::{rc::Rc, fmt::Display}; +use lasso::{Spur, RodeoResolver}; + use crate::utils::Stackframe; use super::{ast, postmacro}; @@ -8,27 +10,30 @@ use super::{ast, postmacro}; pub enum Error { /// `()` as a clause is meaningless in lambda calculus EmptyS, - /// Only `(...)` may be converted to typed lambdas. `[...]` and `{...}` left in the code are - /// signs of incomplete macro execution + /// Only `(...)` may be converted to typed lambdas. `[...]` and `{...}` + /// left in the code are signs of incomplete macro execution BadGroup(char), - /// `foo:bar:baz` will be parsed as `(foo:bar):baz`. Explicitly specifying `foo:(bar:baz)` - /// is forbidden and it's also meaningless since `baz` can only ever be the kind of types + /// `foo:bar:baz` will be parsed as `(foo:bar):baz`. Explicitly + /// specifying `foo:(bar:baz)` is forbidden and it's also meaningless + /// since `baz` can only ever be the kind of types ExplicitKindOfType, - /// Name never bound in an enclosing scope - indicates incomplete macro substitution - Unbound(String), - /// Namespaced names can never occur in the code, these are signs of incomplete macro execution - Symbol, - /// Placeholders shouldn't even occur in the code during macro execution. Something is clearly - /// terribly wrong + /// Name never bound in an enclosing scope - indicates incomplete + /// macro substitution + Unbound(Vec), + /// Placeholders shouldn't even occur in the code during macro execution. + /// Something is clearly terribly wrong Placeholder, - /// It's possible to try and transform the clause `(foo:bar)` into a typed clause, - /// however the correct value of this ast clause is a typed expression (included in the error) + /// It's possible to try and transform the clause `(foo:bar)` into a + /// typed clause, however the correct value of this ast clause is a + /// typed expression (included in the error) /// /// [expr] handles this case, so it's only really possible to get this /// error if you're calling [clause] directly ExprToClause(postmacro::Expr), /// @ tokens only ever occur between a function and a parameter - NonInfixAt + NonInfixAt, + /// Arguments can be either [ast::Clause::Name] or [ast::Clause::Placeh] + InvalidArg } impl Display for Error { @@ -37,44 +42,64 @@ impl Display for Error { Error::EmptyS => write!(f, "`()` as a clause is meaningless in lambda calculus"), Error::BadGroup(c) => write!(f, "Only `(...)` may be converted to typed lambdas. `[...]` and `{{...}}` left in the code are signs of incomplete macro execution"), Error::ExplicitKindOfType => write!(f, "`foo:bar:baz` will be parsed as `(foo:bar):baz`. Explicitly specifying `foo:(bar:baz)` is forbidden and meaningless since `baz` can only ever be the kind of types"), - Error::Unbound(name) => write!(f, "Name \"{name}\" never bound in an enclosing scope. This indicates incomplete macro substitution"), - Error::Symbol => write!(f, "Namespaced names not matching any macros found in the code."), + Error::Unbound(name) => { + write!(f, "Name \""); + for el in itertools::intersperse( + name.iter().map(String::as_str), + "::" + ) { write!(f, "{}", el)? } + write!(f, "\" never bound in an enclosing scope. This indicates incomplete macro substitution") + } Error::Placeholder => write!(f, "Placeholders shouldn't even occur in the code during macro execution, this is likely a compiler bug"), Error::ExprToClause(expr) => write!(f, "Attempted to transform the clause (foo:bar) into a typed clause. This is likely a compiler bug"), - Error::NonInfixAt => write!(f, "@ as a token can only ever occur between a generic and a type parameter.") + Error::NonInfixAt => write!(f, "@ as a token can only ever occur between a generic and a type parameter."), + Error::InvalidArg => write!(f, "Arguments can be either Name or Placeholder nodes") } } } +#[derive(Clone, Copy)] +struct Init<'a>(&'a RodeoResolver); + /// Try to convert an expression from AST format to typed lambda -pub fn expr(expr: &ast::Expr) -> Result { - expr_rec(expr, Context::default()) +pub fn expr(expr: &ast::Expr, i: Init) -> Result { + expr_rec(expr, Context::new(i)) } /// Try and convert a single clause from AST format to typed lambda -pub fn clause(clause: &ast::Clause) -> Result { - clause_rec(clause, Context::default()) +pub fn clause( + clause: &ast::Clause, i: Init +) -> Result { + clause_rec(clause, Context::new(i)) } -/// Try and convert a sequence of expressions from AST format to typed lambda -pub fn exprv(exprv: &[ast::Expr]) -> Result { - exprv_rec(exprv, Context::default()) +/// Try and convert a sequence of expressions from AST format to +/// typed lambda +pub fn exprv( + exprv: &[ast::Expr], i: Init +) -> Result { + exprv_rec(exprv, Context::new(i)) } #[derive(Clone, Copy)] struct Context<'a> { - names: Stackframe<'a, (&'a str, bool)> + names: Stackframe<'a, (&'a [Spur], bool)>, + rr: &'a RodeoResolver } impl<'a> Context<'a> { - fn w_name<'b>(&'b self, name: &'b str, is_auto: bool) -> Context<'b> where 'a: 'b { - Context { names: self.names.push((name, is_auto)) } + fn w_name<'b>(&'b self, + name: &'b [Spur], + is_auto: bool + ) -> Context<'b> where 'a: 'b { + Context { + names: self.names.push((name, is_auto)), + rr: self.rr + } } -} -impl Default for Context<'static> { - fn default() -> Self { - Self { names: Stackframe::new(("", false)) } + fn new(i: Init) -> Context<'static> { + Context { names: Stackframe::new((&[], false)), rr: i.0 } } } @@ -138,8 +163,12 @@ fn clause_rec<'a>( if t.len() > 0 {return Err(Error::ExplicitKindOfType)} else {Rc::new(vec![c])} }; - let body_ctx = if let Some(name) = no { - ctx.w_name(&&**name, true) + let body_ctx = if let Some(rc) = no { + match rc.as_ref() { + ast::Clause::Name(name) => ctx.w_name(&&**name, true), + ast::Clause::Placeh { .. } => return Err(Error::Placeholder), + _ => return Err(Error::InvalidArg) + } } else {ctx}; let body = exprv_rec(b.as_ref(), body_ctx)?; Ok(postmacro::Clause::Auto(typ, Rc::new(body))) @@ -150,14 +179,22 @@ fn clause_rec<'a>( if t.len() > 0 {return Err(Error::ExplicitKindOfType)} else {Rc::new(vec![c])} }; - let body_ctx = ctx.w_name(&&**n, false); + let body_ctx = match n.as_ref() { + ast::Clause::Name(name) => ctx.w_name(&&**name, true), + ast::Clause::Placeh { .. } => return Err(Error::Placeholder), + _ => return Err(Error::InvalidArg) + }; let body = exprv_rec(b.as_ref(), body_ctx)?; Ok(postmacro::Clause::Lambda(typ, Rc::new(body))) } - ast::Clause::Name { local: Some(arg), .. } => { - let (level, (_, is_auto)) = ctx.names.iter().enumerate().find(|(_, (n, _))| n == arg) - .ok_or_else(|| Error::Unbound(arg.clone()))?; - let label = if *is_auto {postmacro::Clause::AutoArg} else {postmacro::Clause::LambdaArg}; + ast::Clause::Name(name) => { + let (level, (_, is_auto)) = ctx.names.iter().enumerate() + .find(|(_, (n, _))| n == &name.as_slice()) + .ok_or_else(|| Error::Unbound( + name.iter().map(|s| ctx.rr.resolve(s).to_string()).collect() + ))?; + let label = if *is_auto {postmacro::Clause::AutoArg} + else {postmacro::Clause::LambdaArg}; Ok(label(level)) } ast::Clause::S(paren, entries) => { @@ -166,7 +203,6 @@ fn clause_rec<'a>( if typ.len() == 0 {Ok(val)} else {Err(Error::ExprToClause(postmacro::Expr(val, typ)))} }, - ast::Clause::Name { local: None, .. } => Err(Error::Symbol), ast::Clause::Placeh { .. } => Err(Error::Placeholder), ast::Clause::Explicit(..) => Err(Error::NonInfixAt) } diff --git a/src/representations/get_name.rs b/src/representations/get_name.rs deleted file mode 100644 index 425c36f..0000000 --- a/src/representations/get_name.rs +++ /dev/null @@ -1,10 +0,0 @@ -use std::sync::atomic::AtomicU64; -use lazy_static::lazy_static; - -lazy_static! { - static ref NEXT_NAME: AtomicU64 = AtomicU64::new(0); -} - -pub fn get_name() -> u64 { - NEXT_NAME.fetch_add(1, std::sync::atomic::Ordering::Relaxed) -} \ No newline at end of file diff --git a/src/representations/mod.rs b/src/representations/mod.rs index fd32ed8..977faad 100644 --- a/src/representations/mod.rs +++ b/src/representations/mod.rs @@ -2,11 +2,11 @@ pub mod ast; // pub mod typed; pub mod literal; pub mod ast_to_postmacro; -pub mod get_name; pub(crate) mod interpreted; mod postmacro; mod primitive; mod path_set; +pub mod sourcefile; pub use path_set::PathSet; pub use primitive::Primitive; pub mod postmacro_to_interpreted; diff --git a/src/representations/postmacro.rs b/src/representations/postmacro.rs index ee046df..691d8b9 100644 --- a/src/representations/postmacro.rs +++ b/src/representations/postmacro.rs @@ -1,8 +1,6 @@ use crate::utils::string_from_charset; use super::primitive::Primitive; -use super::ast_to_postmacro; -use super::ast; use std::fmt::{Debug, Write}; use std::rc::Rc; @@ -103,18 +101,4 @@ impl Debug for Clause { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.deep_fmt(f, 0, Wrap(false, false)) } -} - -impl TryFrom<&ast::Expr> for Expr { - type Error = ast_to_postmacro::Error; - fn try_from(value: &ast::Expr) -> Result { - ast_to_postmacro::expr(value) - } -} - -impl TryFrom<&ast::Clause> for Clause { - type Error = ast_to_postmacro::Error; - fn try_from(value: &ast::Clause) -> Result { - ast_to_postmacro::clause(value) - } } \ No newline at end of file diff --git a/src/representations/sourcefile.rs b/src/representations/sourcefile.rs new file mode 100644 index 0000000..4abdd9b --- /dev/null +++ b/src/representations/sourcefile.rs @@ -0,0 +1,76 @@ +use std::rc::Rc; +use std::collections::HashSet; + +use lasso::Spur; + +use crate::box_chain; +use crate::utils::{Stackframe, iter::box_empty}; +use crate::ast::{Rule, Expr}; + + +#[derive(Debug, Clone)] +pub struct Import { + pub path: Rc>, + /// If name is None, this is a wildcard import + pub name: Option +} + +/// Anything we might encounter in a file +#[derive(Clone)] +pub enum FileEntry { + Import(Vec), + Comment(String), + /// The bool indicates whether the rule is exported, that is, + /// whether tokens uniquely defined inside it should be exported + Rule(Rule, bool), + Export(Vec>>), + LazyModule(Spur) +} + +/// Collect all names that occur in an expression +fn find_all_names_expr( + expr: &Expr +) -> HashSet>> { + let mut ret = HashSet::new(); + expr.visit_names( + Stackframe::new(Rc::default()), + &mut |n| { ret.insert(n); } + ); + ret +} + +/// Collect all exported names (and a lot of other words) from a file +pub fn exported_names( + src: &[FileEntry] +) -> HashSet>> { + src.iter().flat_map(|ent| match ent { + FileEntry::Rule(Rule{source, target, ..}, true) => + box_chain!(source.iter(), target.iter()), + _ => box_empty() + }).flat_map(|e| find_all_names_expr(e)) + .chain( + src.iter().filter_map(|ent| { + if let FileEntry::Export(names) = ent { + Some(names.iter()) + } else {None} + }).flatten().cloned() + ).chain( + src.iter().filter_map(|ent| { + if let FileEntry::LazyModule(lm) = ent { + Some(Rc::new(vec![*lm])) + } else {None} + }) + ).collect() +} + +/// Summarize all imports from a file in a single list of qualified names +pub fn imports<'a, 'b, I>( + src: I +) -> impl Iterator + 'a +where I: Iterator + 'a { + src.filter_map(|ent| match ent { + FileEntry::Import(impv) => Some(impv.iter()), + _ => None + }).flatten() +} + diff --git a/src/rule/executor/execute.rs b/src/rule/executor/execute.rs index 1f53704..23657fe 100644 --- a/src/rule/executor/execute.rs +++ b/src/rule/executor/execute.rs @@ -1,10 +1,11 @@ use std::iter; +use std::rc::Rc; use hashbrown::HashMap; use mappable_rc::Mrc; use crate::unwrap_or; -use crate::utils::{to_mrc_slice, one_mrc_slice, mrc_empty_slice, replace_first}; +use crate::utils::{to_mrc_slice, one_mrc_slice, mrc_empty_slice}; use crate::utils::iter::{box_once, into_boxed_iter}; use crate::ast::{Expr, Clause}; use super::slice_matcher::SliceMatcherDnC; @@ -14,7 +15,7 @@ use super::update_first_seq_rec; fn verify_scalar_vec(pattern: &Expr, is_vec: &mut HashMap) -> Result<(), String> { - let verify_clause = |clause: &Clause, is_vec: &mut HashMap| -> Result<(), String> { + let verify_clause = |clause: &Clause, is_vec: &mut HashMap| { match clause { Clause::Placeh{key, vec} => { if let Some(known) = is_vec.get(key) { @@ -23,16 +24,24 @@ fn verify_scalar_vec(pattern: &Expr, is_vec: &mut HashMap) is_vec.insert(key.clone(), vec.is_some()); } } - Clause::Auto(name, typ, body) => { - if let Some(key) = name.as_ref().and_then(|key| key.strip_prefix('$')) { - if is_vec.get(key) == Some(&true) { return Err(key.to_string()) } + Clause::Auto(name_opt, typ, body) => { + if let Some(name) = name_opt.as_ref() { + if let Clause::Placeh { key, vec } = name.as_ref() { + if vec.is_some() || is_vec.get(key) == Some(&true) { + return Err(key.to_string()) + } + is_vec.insert(key.to_owned(), false); + } } typ.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; body.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; } Clause::Lambda(name, typ, body) => { - if let Some(key) = name.strip_prefix('$') { - if is_vec.get(key) == Some(&true) { return Err(key.to_string()) } + if let Clause::Placeh { key, vec } = name.as_ref() { + if vec.is_some() || is_vec.get(key) == Some(&true) { + return Err(key.to_string()) + } + is_vec.insert(key.to_owned(), false); } typ.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; body.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; @@ -52,33 +61,56 @@ fn verify_scalar_vec(pattern: &Expr, is_vec: &mut HashMap) Ok(()) } - -fn slice_to_vec(src: &mut Mrc<[Expr]>, tgt: &mut Mrc<[Expr]>) { - let prefix_expr = Expr(Clause::Placeh{key: "::prefix".to_string(), vec: Some((0, false))}, to_mrc_slice(vec![])); - let postfix_expr = Expr(Clause::Placeh{key: "::postfix".to_string(), vec: Some((0, false))}, to_mrc_slice(vec![])); +/// Ensure that src starts and ends with a vectorial placeholder without +/// modifying the meaning of the substitution rule +fn slice_to_vec(src: &mut Rc>, tgt: &mut Rc>) { + let prefix_expr = Expr(Clause::Placeh{ + key: "::prefix".to_string(), + vec: Some((0, false)) + }, Rc::default()); + let postfix_expr = Expr(Clause::Placeh{ + key: "::postfix".to_string(), + vec: Some((0, false)) + }, Rc::default()); // Prefix or postfix to match the full vector - let head_multi = matches!(src.first().expect("Src can never be empty!").0, Clause::Placeh{vec: Some(_), ..}); - let tail_multi = matches!(src.last().expect("Impossible branch!").0, Clause::Placeh{vec: Some(_), ..}); + let head_multi = matches!( + src.first().expect("Src can never be empty!").0, + Clause::Placeh{vec: Some(_), ..} + ); + let tail_multi = matches!( + src.last().expect("Impossible branch!").0, + Clause::Placeh{vec: Some(_), ..} + ); let prefix_vec = if head_multi {vec![]} else {vec![prefix_expr]}; let postfix_vec = if tail_multi {vec![]} else {vec![postfix_expr]}; - *src = to_mrc_slice(prefix_vec.iter().chain(src.iter()).chain(postfix_vec.iter()).cloned().collect()); - *tgt = to_mrc_slice(prefix_vec.iter().chain(tgt.iter()).chain(postfix_vec.iter()).cloned().collect()); + *src = Rc::new( + prefix_vec.iter() + .chain(src.iter()) + .chain(postfix_vec.iter()) + .cloned().collect() + ); + *tgt = Rc::new( + prefix_vec.iter() + .chain(tgt.iter()) + .chain(postfix_vec.iter()) + .cloned().collect() + ); } /// keep re-probing the input with pred until it stops matching -fn update_all_seqs(input: Mrc<[Expr]>, pred: &mut F) -> Option> -where F: FnMut(Mrc<[Expr]>) -> Option> { +fn update_all_seqs(input: Rc>, pred: &mut F) +-> Option>> +where F: FnMut(Rc>) -> Option>> { let mut tmp = update_first_seq_rec::exprv(input, pred); while let Some(xv) = tmp { - tmp = update_first_seq_rec::exprv(Mrc::clone(&xv), pred); + tmp = update_first_seq_rec::exprv(xv.clone(), pred); if tmp.is_none() {return Some(xv)} } None } -// fn write_clause_rec(state: &State, clause: &Clause) -> - -fn write_expr_rec(state: &State, Expr(tpl_clause, tpl_typ): &Expr) -> Box> { +fn write_expr_rec(state: &State, Expr(tpl_clause, tpl_typ): &Expr) +-> Box> { let out_typ = tpl_typ.iter() .flat_map(|c| write_expr_rec(state, &c.clone().into_expr())) .map(Expr::into_clause) @@ -86,6 +118,11 @@ fn write_expr_rec(state: &State, Expr(tpl_clause, tpl_typ): &Expr) -> Box box_once(Expr(Clause::Auto( name_opt.as_ref().and_then(|name| { + if let Clause::Placeh { key, .. } = name { + match &state[key] { + Entry::NameOpt(name) => name.as_ref().map(|s| s.as_ref().to_owned()) + } + } if let Some(state_key) = name.strip_prefix('$') { match &state[state_key] { Entry::NameOpt(name) => name.as_ref().map(|s| s.as_ref().to_owned()), diff --git a/src/rule/executor/slice_matcher.rs b/src/rule/executor/slice_matcher.rs index 2282512..f17a61f 100644 --- a/src/rule/executor/slice_matcher.rs +++ b/src/rule/executor/slice_matcher.rs @@ -11,14 +11,17 @@ use super::State; use super::split_at_max_vec::split_at_max_vec; /// Tuple with custom cloning logic -#[derive(Debug, Eq, PartialEq, Hash)] -pub struct CacheEntry<'a>(Mrc<[Expr]>, &'a SliceMatcherDnC); -impl<'a> Clone for CacheEntry<'a> { - fn clone(&self) -> Self { - let CacheEntry(mrc, matcher) = self; - CacheEntry(Mrc::clone(mrc), matcher) - } -} +// #[derive(Debug, Eq, PartialEq, Hash)] +// pub struct CacheEntry<'a>(Mrc<[Expr]>, &'a SliceMatcherDnC); +// impl<'a> Clone for CacheEntry<'a> { +// fn clone(&self) -> Self { +// let CacheEntry(mrc, matcher) = self; +// CacheEntry(Mrc::clone(mrc), matcher) +// } +// } +// ^^^^ +// This has been removed because the slice-based version needs no custom +// cloning logic. In the next iteration, remove the this altogether. /// Matcher that applies a pattern to a slice via divide-and-conquer @@ -66,8 +69,8 @@ impl SliceMatcherDnC { matches!(self.clause.as_ref(), Clause::Placeh{vec: Some(..), ..}) } /// If clause is a name, the qualified name this can match - pub fn clause_qual_name(&self) -> Option> { - if let Clause::Name { qualified, .. } = self.clause.as_ref() {Some(Mrc::clone(qualified))} else {None} + pub fn clause_qual_name(&self) -> Option>> { + if let Clause::Name(name) = self.clause.as_ref() {Some(name.clone())} else {None} } /// If clause is a Placeh, the key in the state the match will be stored at pub fn state_key(&self) -> Option<&String> { @@ -89,8 +92,8 @@ impl SliceMatcherDnC { /// Enumerate all valid subdivisions based on the reported size constraints of self and /// the two subranges - pub fn valid_subdivisions(&self, - range: Mrc<[Expr]> + pub fn valid_subdivisions<'a>(&'a self, + range: &'a [Expr] ) -> impl Iterator, Mrc<[Expr]>, Mrc<[Expr]>)> { let own_max = unwrap_or!(self.own_max_size(range.len()); return box_empty()); let own_min = self.own_min_size(); @@ -196,7 +199,7 @@ impl SliceMatcherDnC { if !range.is_empty() {None} else {Some(State::new())} }, - Some(m) => cache.try_find(&CacheEntry(range, m)).map(|s| s.as_ref().to_owned()) + Some(m) => cache.find(&CacheEntry(range, m)) } } diff --git a/src/rule/executor/state.rs b/src/rule/executor/state.rs index cafc870..07e3728 100644 --- a/src/rule/executor/state.rs +++ b/src/rule/executor/state.rs @@ -1,15 +1,16 @@ use std::{ops::{Add, Index}, rc::Rc, fmt::Debug}; use hashbrown::HashMap; +use lasso::Spur; use crate::ast::Expr; -#[derive(Debug, PartialEq, Eq)] +#[derive(PartialEq, Eq)] pub enum Entry { Vec(Rc>), Scalar(Rc), - Name(Rc), - NameOpt(Option>) + Name(Rc>), + NameOpt(Option>>) } /// A bucket of indexed expression fragments. Addition may fail if there's a conflict. @@ -55,33 +56,32 @@ impl State { } Some(self) } - pub fn insert_name(mut self, k: &S1, v: &S2) -> Option + pub fn insert_name(mut self, k: &S1, v: &[Spur]) -> Option where - S1: AsRef + ToString + ?Sized, - S2: AsRef + ToString + ?Sized + S1: AsRef + ToString + ?Sized { if let Some(old) = self.0.get(k.as_ref()) { if let Entry::Name(val) = old { - if val.as_str() != v.as_ref() {return None} + if val.as_ref() != v.as_ref() {return None} } else {return None} } else { - self.0.insert(k.to_string(), Entry::Name(Rc::new(v.to_string()))); + self.0.insert(k.to_string(), Entry::Name(Rc::new(v.to_vec()))); } Some(self) } - pub fn insert_name_opt(mut self, k: &S1, v: Option<&S2>) -> Option - where - S1: AsRef + ToString + ?Sized, - S2: AsRef + ToString + ?Sized + pub fn insert_name_opt(mut self, k: &S1, v: Option<&[Spur]>) + -> Option + where S1: AsRef + ToString + ?Sized { if let Some(old) = self.0.get(k.as_ref()) { if let Entry::NameOpt(val) = old { - if val.as_ref().map(|s| s.as_ref().as_str()) != v.map(|s| s.as_ref()) { + if val.as_ref().map(|s| s.as_ref().as_slice()) != v { return None } } else {return None} } else { - self.0.insert(k.to_string(), Entry::NameOpt(v.map(|s| Rc::new(s.to_string())))); + let data = v.map(|s| Rc::new(s.to_vec())); + self.0.insert(k.to_string(), Entry::NameOpt(data)); } Some(self) } @@ -138,10 +138,4 @@ impl IntoIterator for State { fn into_iter(self) -> Self::IntoIter { self.0.into_iter() } -} - -impl Debug for State { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.0) - } } \ No newline at end of file diff --git a/src/rule/executor/update_first_seq_rec.rs b/src/rule/executor/update_first_seq_rec.rs index 2ef062a..c465376 100644 --- a/src/rule/executor/update_first_seq_rec.rs +++ b/src/rule/executor/update_first_seq_rec.rs @@ -1,53 +1,54 @@ -use mappable_rc::Mrc; +use std::rc::Rc; -use crate::{ast::{Expr, Clause}, utils::{replace_first, to_mrc_slice}}; +use crate::utils::replace_first; +use crate::ast::{Expr, Clause}; -/// Traverse the tree, calling pred on every sibling list until it returns some vec -/// then replace the sibling list with that vec and return true +/// Traverse the tree, calling pred on every sibling list until it returns +/// some vec then replace the sibling list with that vec and return true /// return false if pred never returned some -pub fn exprv(input: Mrc<[Expr]>, pred: &mut F) -> Option> -where F: FnMut(Mrc<[Expr]>) -> Option> { - if let o@Some(_) = pred(Mrc::clone(&input)) {return o} +pub fn exprv(input: Rc>, pred: &mut F) -> Option>> +where F: FnMut(Rc>) -> Option>> { + if let o@Some(_) = pred(input.clone()) {return o} replace_first(input.as_ref(), |ex| expr(ex, pred)) - .map(|i| to_mrc_slice(i.collect())) + .map(|i| Rc::new(i.collect())) } pub fn expr(Expr(cls, typ): &Expr, pred: &mut F) -> Option -where F: FnMut(Mrc<[Expr]>) -> Option> { - if let Some(t) = clausev(Mrc::clone(typ), pred) {return Some(Expr(cls.clone(), t))} - if let Some(c) = clause(cls, pred) {return Some(Expr(c, Mrc::clone(typ)))} +where F: FnMut(Rc>) -> Option>> { + if let Some(t) = clausev(typ.clone(), pred) {return Some(Expr(cls.clone(), t))} + if let Some(c) = clause(cls, pred) {return Some(Expr(c, typ.clone()))} None } -pub fn clausev(input: Mrc<[Clause]>, pred: &mut F) -> Option> -where F: FnMut(Mrc<[Expr]>) -> Option> { +pub fn clausev(input: Rc>, pred: &mut F) -> Option>> +where F: FnMut(Rc>) -> Option>> { replace_first(input.as_ref(), |c| clause(c, pred)) - .map(|i| to_mrc_slice(i.collect())) + .map(|i| Rc::new(i.collect())) } pub fn clause(c: &Clause, pred: &mut F) -> Option -where F: FnMut(Mrc<[Expr]>) -> Option> { +where F: FnMut(Rc>) -> Option>> { match c { Clause::P(_) | Clause::Placeh {..} | Clause::Name {..} => None, Clause::Lambda(n, typ, body) => { - if let Some(b) = exprv(Mrc::clone(body), pred) { - return Some(Clause::Lambda(n.clone(), Mrc::clone(typ), b)) + if let Some(b) = exprv(body.clone(), pred) { + return Some(Clause::Lambda(n.clone(), typ.clone(), b)) } - if let Some(t) = exprv(Mrc::clone(typ), pred) { - return Some(Clause::Lambda(n.clone(), t, Mrc::clone(body))) + if let Some(t) = exprv(typ.clone(), pred) { + return Some(Clause::Lambda(n.clone(), t, body.clone())) } None } Clause::Auto(n, typ, body) => { - if let Some(b) = exprv(Mrc::clone(body), pred) { - return Some(Clause::Auto(n.clone(), Mrc::clone(typ), b)) + if let Some(b) = exprv(body.clone(), pred) { + return Some(Clause::Auto(n.clone(), typ.clone(), b)) } - if let Some(t) = exprv(Mrc::clone(typ), pred) { - return Some(Clause::Auto(n.clone(), t, Mrc::clone(body))) + if let Some(t) = exprv(typ.clone(), pred) { + return Some(Clause::Auto(n.clone(), t, body.clone())) } None } - Clause::S(c, body) => Some(Clause::S(*c, exprv(Mrc::clone(body), pred)?)), - Clause::Explicit(t) => Some(Clause::Explicit(Mrc::new(expr(t, pred)?))) + Clause::S(c, body) => Some(Clause::S(*c, exprv(body.clone(), pred)?)), + Clause::Explicit(t) => Some(Clause::Explicit(Rc::new(expr(t, pred)?))) } } \ No newline at end of file diff --git a/src/utils/bfs.rs b/src/utils/bfs.rs index d944984..6a2e2ce 100644 --- a/src/utils/bfs.rs +++ b/src/utils/bfs.rs @@ -5,6 +5,8 @@ use std::hash::Hash; use crate::unwrap_or; use crate::utils::BoxedIter; +// TODO: move to own crate + /// Two-stage breadth-first search; /// Instead of enumerating neighbors before returning a node, it puts visited but not yet /// enumerated nodes in a separate queue and only enumerates them to refill the queue of children diff --git a/src/utils/cache.rs b/src/utils/cache.rs index 1109dce..383d601 100644 --- a/src/utils/cache.rs +++ b/src/utils/cache.rs @@ -1,96 +1,47 @@ use std::{hash::Hash, cell::RefCell, rc::Rc}; use hashbrown::HashMap; -use mappable_rc::Mrc; -/// Convenience trait for overriding Mrc's strange cloning logic -pub trait MyClone { - fn my_clone(&self) -> Self; -} - -impl MyClone for T where T: Clone { - default fn my_clone(&self) -> Self { self.clone() } -} - -impl MyClone for Rc { - fn my_clone(&self) -> Self { Rc::clone(self) } -} -impl MyClone for Mrc { - fn my_clone(&self) -> Self { Mrc::clone(self) } -} +// TODO: make this a crate /// Cache the return values of an effectless closure in a hashmap /// Inspired by the closure_cacher crate. pub struct Cache<'a, I, O: 'static> { - store: RefCell>>, - closure: Box Mrc + 'a> + store: RefCell>, + closure: Box O + 'a> } impl<'a, I, O> Cache<'a, I, O> where - I: Eq + Hash + MyClone + I: Eq + Hash + Clone, O: Clone { pub fn new(closure: F) -> Self where F: Fn(I, &Self) -> O { - Self::new_raw(move |o, s| Mrc::new(closure(o, s))) - } - - /// Take an Mrc closure rather than an O closure - /// Used internally to derive caches from other systems working with Mrc-s - pub fn new_raw(closure: F) -> Self where F: Fn(I, &Self) -> Mrc { Self { store: RefCell::new(HashMap::new()), closure: Box::new(closure) } } + pub fn rc(closure: F) -> Rc where F: Fn(I, &Self) -> O { + Rc::new(Self::new(closure)) + } + /// Produce and cache a result by cloning I if necessary - pub fn find(&self, i: &I) -> Mrc { + pub fn find(&self, i: &I) -> O { let closure = &self.closure; if let Some(v) = self.store.borrow().get(i) { - return Mrc::clone(v) + return v.clone() } // In the moment of invocation the refcell is on immutable // this is important for recursive calculations - let result = closure(i.my_clone(), self); + let result = closure(i.clone(), self); let mut store = self.store.borrow_mut(); - Mrc::clone(store.raw_entry_mut().from_key(i) - .or_insert_with(|| (i.my_clone(), result)).1) + store.raw_entry_mut().from_key(i) + .or_insert_with(|| (i.clone(), result)).1.clone() } #[allow(dead_code)] /// Return the result if it has already been computed - pub fn known(&self, i: &I) -> Option> { + pub fn known(&self, i: &I) -> Option { let store = self.store.borrow(); - store.get(i).map(Mrc::clone) - } - #[allow(dead_code)] - /// Forget the output for the given input - pub fn drop(&self, i: &I) -> bool { - self.store.borrow_mut().remove(i).is_some() + store.get(i).cloned() } } - -impl<'a, I, O, E> Cache<'a, I, Result> where - I: Eq + Hash + MyClone, - // O: Clone, - E: Clone -{ - /// Sink the ref from a Result into the Ok value, such that cloning only occurs on the sad path - /// but the return value can be short-circuited - pub fn try_find(&self, i: &I) -> Result, E> { - let ent = self.find(i); - Mrc::try_map(ent, |t| t.as_ref().ok()) - .map_err(|res| Result::as_ref(&res).err().unwrap().to_owned()) - } -} - -impl<'a, I, O> Cache<'a, I, Option> where - I: Eq + Hash + MyClone, - // O: Clone -{ - #[allow(dead_code)] - /// Sink the ref from an Option into the Some value such that the return value can be - /// short-circuited - pub fn try_find(&self, i: &I) -> Option> where I: Clone { - let ent = self.find(i); - Mrc::try_map(ent, |o| o.as_ref()).ok() - } -} diff --git a/src/utils/interned_display.rs b/src/utils/interned_display.rs new file mode 100644 index 0000000..feab4f0 --- /dev/null +++ b/src/utils/interned_display.rs @@ -0,0 +1,19 @@ +use std::fmt::Display; + +use lasso::RodeoResolver; + +pub trait InternedDisplay { + fn fmt(&self, + f: &mut std::fmt::Formatter<'_>, + rr: RodeoResolver + ) -> std::fmt::Result; +} + +impl InternedDisplay for T where T: Display { + fn fmt(&self, + f: &mut std::fmt::Formatter<'_>, + rr: RodeoResolver + ) -> std::fmt::Result { + ::fmt(&self, f) + } +} \ No newline at end of file diff --git a/src/utils/interner.rs b/src/utils/interner.rs deleted file mode 100644 index 1d117d8..0000000 --- a/src/utils/interner.rs +++ /dev/null @@ -1,27 +0,0 @@ -// use std::{collections::HashSet, hash::Hash}; - -// use hashbrown::HashMap; - -// #[derive(Copy, Clone)] -// pub struct Interned<'a, T> { -// interner: &'a Interner, -// data: &'a T, -// } - -// impl<'a, T: Eq> Eq for Interned<'a, T> {} -// impl<'a, T: PartialEq> PartialEq for Interned<'a, T> { -// fn eq(&self, other: &Self) -> bool { -// if (self.interner as *const _) == (other.interner as *const _) { -// (self.data as *const _) == (other.data as *const _) -// } else {self.data == other.data} -// } -// } - -// pub struct Interner { -// data: HashSet, -// hash_cache: HashMap<> -// } - -// impl Interner { - -// } \ No newline at end of file diff --git a/src/utils/iter.rs b/src/utils/iter.rs index 08ea24f..ce5af08 100644 --- a/src/utils/iter.rs +++ b/src/utils/iter.rs @@ -1,6 +1,6 @@ /// Utility functions to get rid of explicit casts to BoxedIter which are tedious -use std::iter; +use std::{iter, mem}; pub type BoxedIter<'a, T> = Box + 'a>; pub type BoxedIterIter<'a, T> = BoxedIter<'a, BoxedIter<'a, T>>; @@ -30,6 +30,7 @@ where { Box::new(i.flatten()) } + pub fn into_boxed_iter<'a, T: 'a>(t: T) -> BoxedIter<'a, ::Item> where T: IntoIterator { Box::new(t.into_iter()) diff --git a/src/utils/merge_sorted.rs b/src/utils/merge_sorted.rs deleted file mode 100644 index bb7480e..0000000 --- a/src/utils/merge_sorted.rs +++ /dev/null @@ -1,27 +0,0 @@ -use std::mem; - -// use itertools::Itertools; - -/// Merge two sorted iterators into a sorted iterator. -pub fn merge_sorted(mut i: I, mut j: J, mut f: F) -> impl Iterator -where - I: Iterator, J: Iterator, - F: FnMut(&T) -> O, O: Ord, -{ - let mut i_item: Option = None; - let mut j_item: Option = None; - std::iter::from_fn(move || { - match (&mut i_item, &mut j_item) { - (&mut None, &mut None) => None, - (&mut None, j_item @ &mut Some(_)) => Some((j_item, None)), - (i_item @ &mut Some(_), &mut None) => Some((i_item, i.next())), - (Some(i_val), Some(j_val)) => Some( - if f(i_val) < f(j_val) { - (&mut i_item, i.next()) - } else { - (&mut j_item, j.next()) - } - ) - }.and_then(|(dest, value)| mem::replace(dest, value)) - }) -} \ No newline at end of file diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 799b205..2b99f8a 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,7 +1,8 @@ mod cache; pub mod translate; mod replace_first; -mod interner; +mod interned_display; +pub use interned_display::InternedDisplay; // mod visitor; pub use replace_first::replace_first; pub use cache::Cache; @@ -9,16 +10,13 @@ mod substack; pub use substack::Stackframe; mod side; pub use side::Side; -mod merge_sorted; -pub use merge_sorted::merge_sorted; mod unwrap_or; pub mod iter; pub use iter::BoxedIter; mod bfs; -mod unless_let; mod string_from_charset; pub use string_from_charset::string_from_charset; -mod for_loop; +mod xloop; mod protomap; pub use protomap::ProtoMap; mod product2; diff --git a/src/utils/product2.rs b/src/utils/product2.rs index 6dff9c3..b1078a7 100644 --- a/src/utils/product2.rs +++ b/src/utils/product2.rs @@ -8,7 +8,9 @@ use super::Side; pub enum Product2 { Left, Right, + #[allow(unused)] Either, + #[allow(unused)] New(T) } impl Product2 { diff --git a/src/utils/protomap.rs b/src/utils/protomap.rs index eecff7c..4683d8e 100644 --- a/src/utils/protomap.rs +++ b/src/utils/protomap.rs @@ -2,16 +2,18 @@ use std::{iter, ops::{Index, Add}, borrow::Borrow}; use smallvec::SmallVec; -const INLINE_ENTRIES: usize = 2; +// TODO: make this a crate alongside substack /// Linked-array-list of key-value pairs. -/// Lookup and modification is O(n + cachemiss * n / m) -/// Can be extended by reference in O(m) < O(n) +/// - Lookup and modification is O(n + cachemiss * n / m) +/// - Can be extended by reference in O(m) < O(n) /// -/// The number of elements stored inline in a stackframe is 2 by default, which is enough for most -/// recursive algorithms. The cost of overruns is a heap allocation and subsequent heap indirections, -/// plus wasted stack space which is likely wasted L1 as well. The cost of underruns is wasted stack -/// space. +/// The number of elements stored inline in a stackframe is 2 by default, +/// which is enough for most recursive algorithms. +/// - The cost of overruns is a heap allocation and subsequent +/// heap indirections, plus wasted stack space which is likely wasted L1 +/// as well. +/// - The cost of underruns is wasted stack space. pub struct ProtoMap<'a, K, V, const STACK_COUNT: usize = 2> { entries: SmallVec<[(K, Option); STACK_COUNT]>, prototype: Option<&'a ProtoMap<'a, K, V, STACK_COUNT>> diff --git a/src/utils/replace_first.rs b/src/utils/replace_first.rs index d7e642e..d1b0d15 100644 --- a/src/utils/replace_first.rs +++ b/src/utils/replace_first.rs @@ -1,6 +1,9 @@ use std::iter; -pub fn replace_first<'a, T, F>(slice: &'a [T], mut f: F) -> Option + 'a> +/// Iterate over a sequence with the first element the function returns +/// Some() for updated, but only if there is such an element. +pub fn replace_first<'a, T, F>(slice: &'a [T], mut f: F) +-> Option + 'a> where T: Clone, F: FnMut(&T) -> Option { for i in 0..slice.len() { if let Some(new) = f(&slice[i]) { diff --git a/src/utils/side.rs b/src/utils/side.rs index 4fdae20..5dde3e4 100644 --- a/src/utils/side.rs +++ b/src/utils/side.rs @@ -1,5 +1,7 @@ use std::fmt::Display; +/// A primitive for encoding the two sides Left and Right. While booleans +/// are technically usable for this purpose, they're less descriptive. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Side {Left, Right} @@ -32,8 +34,11 @@ impl Side { pub fn crop<'a, T>(&self, margin: usize, slice: &'a [T]) -> &'a [T] { self.opposite().slice(slice.len() - margin, slice) } - /// ignore N elements from this end and M elements from the other end of a slice - pub fn crop_both<'a, T>(&self, margin: usize, opposite: usize, slice: &'a [T]) -> &'a [T] { + /// ignore N elements from this end and M elements from the other end + /// of a slice + pub fn crop_both<'a, T>(&self, + margin: usize, opposite: usize, slice: &'a [T] + ) -> &'a [T] { self.crop(margin, self.opposite().crop(opposite, slice)) } /// Pick this side from a pair of things diff --git a/src/utils/string_from_charset.rs b/src/utils/string_from_charset.rs index cec678a..0815558 100644 --- a/src/utils/string_from_charset.rs +++ b/src/utils/string_from_charset.rs @@ -9,6 +9,8 @@ fn string_from_charset_rec(val: u64, digits: &str) -> String { prefix } +/// Generate alphabetized names from numbers using a set of permitted +/// characters pub fn string_from_charset(val: u64, digits: &str) -> String { string_from_charset_rec(val + 1, digits) } \ No newline at end of file diff --git a/src/utils/substack.rs b/src/utils/substack.rs index ebd9e84..f287d62 100644 --- a/src/utils/substack.rs +++ b/src/utils/substack.rs @@ -1,8 +1,10 @@ use std::fmt::Debug; -/// Implement a FILO stack that lives on the regular call stack as a linked list. -/// Mainly useful to detect loops in recursive algorithms where the recursion isn't -/// deep enough to warrant a heap-allocated set +// TODO: extract to crate + +/// A FILO stack that lives on the regular call stack as a linked list. +/// Mainly useful to detect loops in recursive algorithms where +/// the recursion isn't deep enough to warrant a heap-allocated set. #[derive(Clone, Copy)] pub struct Stackframe<'a, T> { pub item: T, @@ -33,6 +35,7 @@ impl<'a, T: 'a> Stackframe<'a, T> { len: self.len + 1 } } + #[allow(unused)] pub fn opush(prev: Option<&'a Self>, item: T) -> Self { Self { item, @@ -40,15 +43,19 @@ impl<'a, T: 'a> Stackframe<'a, T> { len: prev.map_or(1, |s| s.len) } } + #[allow(unused)] pub fn len(&self) -> usize { self.len } + #[allow(unused)] pub fn pop(&self, count: usize) -> Option<&Self> { if count == 0 {Some(self)} else {self.prev.expect("Index out of range").pop(count - 1)} } + #[allow(unused)] pub fn opop(cur: Option<&Self>, count: usize) -> Option<&Self> { if count == 0 {cur} else {Self::opop(cur.expect("Index out of range").prev, count - 1)} } + #[allow(unused)] pub fn o_into_iter(curr: Option<&Self>) -> StackframeIterator { StackframeIterator { curr } } @@ -66,7 +73,9 @@ pub struct StackframeIterator<'a, T> { } impl<'a, T> StackframeIterator<'a, T> { - pub fn first_some Option>(&mut self, f: F) -> Option { + #[allow(unused)] + pub fn first_some(&mut self, f: F) -> Option + where F: Fn(&T) -> Option { while let Some(x) = self.next() { if let Some(result) = f(x) { return Some(result) diff --git a/src/utils/translate.rs b/src/utils/translate.rs index ac184d5..0ef8f7a 100644 --- a/src/utils/translate.rs +++ b/src/utils/translate.rs @@ -1,5 +1,10 @@ use std::mem; +// TODO: extract to crate + +#[allow(unused)] +/// Map over a `&mut` with a mapper function that takes ownership of +/// the value pub fn translate T>(data: &mut T, f: F) { unsafe { let mut acc = mem::MaybeUninit::::uninit().assume_init(); @@ -10,6 +15,8 @@ pub fn translate T>(data: &mut T, f: F) { } } +/// Map over a `&mut` with a mapper function that takes ownership of +/// the value and also produces some unrelated data. pub fn process (T, U)>(data: &mut T, f: F) -> U { unsafe { let mut acc = mem::MaybeUninit::::uninit().assume_init(); diff --git a/src/utils/unless_let.rs b/src/utils/unless_let.rs deleted file mode 100644 index 70313ba..0000000 --- a/src/utils/unless_let.rs +++ /dev/null @@ -1,6 +0,0 @@ -#[macro_export] -macro_rules! unless_let { - ($m:pat_param = $expr:tt) => { - if let $m = $expr {} else - } -} \ No newline at end of file diff --git a/src/utils/unwrap_or.rs b/src/utils/unwrap_or.rs index 4ea81fa..5fa6bd2 100644 --- a/src/utils/unwrap_or.rs +++ b/src/utils/unwrap_or.rs @@ -1,3 +1,6 @@ +/// A macro version of [Option::unwrap_or_else] which supports +/// flow control statements such as `return` and `break` in the "else" +/// branch. #[macro_export] macro_rules! unwrap_or { ($m:expr; $fail:expr) => { diff --git a/src/utils/visitor.rs b/src/utils/visitor.rs deleted file mode 100644 index c7c931b..0000000 --- a/src/utils/visitor.rs +++ /dev/null @@ -1,18 +0,0 @@ -pub trait Visit { - type Return; - fn visit(&self, target: T) -> Return; -} - -pub trait ImpureVisit { - type Shard; - type Return; - fn impure_visit(&self, target: T) -> (Shard, Return); - fn merge(&mut self, s: Shard); -} - -pub struct OverlayVisitor(VBase, VOver); - -impl Visitor for OverlayVisitor -where VBase: Visitor>, VOver: Visitor> { - -} \ No newline at end of file diff --git a/src/utils/for_loop.rs b/src/utils/xloop.rs similarity index 96% rename from src/utils/for_loop.rs rename to src/utils/xloop.rs index ae51f85..43f3507 100644 --- a/src/utils/for_loop.rs +++ b/src/utils/xloop.rs @@ -48,7 +48,8 @@ /// to these as well just like the others. In all cases the exit expression is optional, its /// default value is `()`. /// -/// **todo** find a valid use case for While let for a demo +/// TODO: find a valid use case for While let for a demo +/// TODO: break out into crate #[macro_export] macro_rules! xloop { (for $p:pat in $it:expr; $body:stmt) => {