diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..a7a0bee --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.jpg filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/.gitignore b/.gitignore index d5c51fb..1f02ac1 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,5 @@ *.log *.out *.pdf -*.gz \ No newline at end of file +*.gz +!notes/papers/report/template/Figures/** \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 6999f9e..b3ce5d8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -24,12 +24,73 @@ dependencies = [ "version_check", ] +[[package]] +name = "anstream" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is-terminal", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41ed9a86bf92ae6580e0a31281f65a1b1d867c0cc68d5346e2ae128dddfa6a7d" + +[[package]] +name = "anstyle-parse" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e765fd216e48e067936442276d1d57399e37bce53c264d6fefbe298080cb57ee" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180abfa45703aebe0093f79badacc01b8fd4ea2e35118747e5811127f926e188" +dependencies = [ + "anstyle", + "windows-sys", +] + [[package]] name = "autocfg" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +[[package]] +name = "base64" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a4ddaa51a5bc52a6948f74c06d20aaaddb71924eab79b8c97a8c556e942d6a" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "cc" version = "1.0.79" @@ -42,6 +103,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + [[package]] name = "chumsky" version = "0.9.2" @@ -52,6 +119,54 @@ dependencies = [ "stacker", ] +[[package]] +name = "clap" +version = "4.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34d21f9bf1b425d2968943631ec91202fe5e837264063503708b83013f8fc938" +dependencies = [ + "clap_builder", + "clap_derive", + "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914c8c79fb560f238ef6429439a30023c862f7a28e688c58f7203f12b29970bd" +dependencies = [ + "anstream", + "anstyle", + "bitflags", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.13", +] + +[[package]] +name = "clap_lex" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + [[package]] name = "dashmap" version = "4.0.2" @@ -70,15 +185,36 @@ checksum = "68b0cf012f1230e43cd00ebb729c6bb58707ecfa8ad08b52ef3a4ccd2697fc30" [[package]] name = "either" -version = "1.6.1" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" +checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" + +[[package]] +name = "errno" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] [[package]] name = "getrandom" -version = "0.2.6" +version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" +checksum = "c05aeb6a22b8f62540c194aac980f2115af067bfe15a0734d7277a768d396b31" dependencies = [ "cfg-if", "libc", @@ -112,6 +248,12 @@ dependencies = [ "ahash 0.8.3", ] +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + [[package]] name = "hermit-abi" version = "0.2.6" @@ -122,10 +264,48 @@ dependencies = [ ] [[package]] -name = "itertools" -version = "0.10.3" +name = "hermit-abi" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" +checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "io-lifetimes" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" +dependencies = [ + "hermit-abi 0.3.1", + "libc", + "windows-sys", +] + +[[package]] +name = "is-terminal" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" +dependencies = [ + "hermit-abi 0.3.1", + "io-lifetimes", + "rustix", + "windows-sys", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" dependencies = [ "either", ] @@ -142,15 +322,37 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.126" +version = "0.2.142" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +checksum = "6a987beff54b60ffa6d51982e1aa1146bc42f19bd26be28b0586f252fccf5317" + +[[package]] +name = "linux-raw-sys" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ece97ea872ece730aed82664c424eb4c8291e1ff2480247ccf7409044bc6479f" + +[[package]] +name = "lock_api" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "435011366fe56583b16cf956f9df0095b405b82d76425bc8981c0e22e60ec4df" +dependencies = [ + "autocfg", + "scopeguard", +] [[package]] name = "mappable-rc" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b65e7f462b4fbfe1a3c857747c9d027dd55faffaeffbca68f70d0becfe7e93c5" +checksum = "204651f31b0a6a7b2128d2b92c372cd94607b210c3a6b6e542c57a8cfd4db996" + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "num-traits" @@ -167,7 +369,7 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fac9e2da13b5eb447a6ce3d392f23a29d8694bff781bf03a16cd9ac8697593b" dependencies = [ - "hermit-abi", + "hermit-abi 0.2.6", "libc", ] @@ -179,9 +381,11 @@ checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "orchid" -version = "0.1.0" +version = "0.2.0" dependencies = [ + "base64", "chumsky", + "clap", "dyn-clone", "hashbrown 0.13.2", "itertools", @@ -189,23 +393,49 @@ dependencies = [ "mappable-rc", "ordered-float", "smallvec", + "static_init", "thiserror", ] [[package]] name = "ordered-float" -version = "3.0.0" +version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96bcbab4bfea7a59c2c0fe47211a1ac4e3e96bea6eb446d704f310bc5c732ae2" +checksum = "13a384337e997e6860ffbaa83708b2ef329fd8c54cb67a5f64d421e0f943254f" dependencies = [ "num-traits", ] [[package]] -name = "proc-macro2" -version = "1.0.39" +name = "parking_lot" +version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a2cfe6f0ad2bfc16aefa463b497d5c7a5ecd44a23efa72aa342d90177356dc" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + +[[package]] +name = "proc-macro2" +version = "1.0.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" dependencies = [ "unicode-ident", ] @@ -221,13 +451,42 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.18" +version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" +checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags", +] + +[[package]] +name = "rustix" +version = "0.37.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acf8729d8542766f1b2cf77eb034d52f40d375bb8b615d0b147089946e16613d" +dependencies = [ + "bitflags", + "errno", + "io-lifetimes", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + [[package]] name = "smallvec" version = "1.10.0" @@ -248,10 +507,55 @@ dependencies = [ ] [[package]] -name = "syn" -version = "1.0.95" +name = "static_init" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942" +checksum = "8a2a1c578e98c1c16fc3b8ec1328f7659a500737d7a0c6d625e73e830ff9c1f6" +dependencies = [ + "bitflags", + "cfg_aliases", + "libc", + "parking_lot", + "parking_lot_core", + "static_init_macro", + "winapi", +] + +[[package]] +name = "static_init_macro" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70a2595fc3aa78f2d0e45dd425b22282dd863273761cc77780914b2cf3003acf" +dependencies = [ + "cfg_aliases", + "memchr", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c9da457c5285ac1f936ebd076af6dac17a61cfe7826f2076b4d015cf47bc8ec" dependencies = [ "proc-macro2", "quote", @@ -260,29 +564,35 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.31" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" +checksum = "978c9a314bd8dc99be594bc3c175faaa9794be04a5a5e153caba6915336cebac" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.31" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" +checksum = "f9456a42c5b0d803c8cd86e73dd7cc9edd429499f37a3550d286d5e86720569f" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.13", ] [[package]] name = "unicode-ident" -version = "1.0.0" +version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" +checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "version_check" @@ -292,9 +602,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" +version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "winapi" @@ -317,3 +627,69 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" diff --git a/Cargo.toml b/Cargo.toml index 8767cba..431fbdf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,10 @@ [package] name = "orchid" -version = "0.1.0" +version = "0.2.0" edition = "2021" +authors = [ + "Lawrence Bethlenfalvy " +] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -15,3 +18,6 @@ itertools = "0.10" smallvec = { version = "1.10.0", features = ['const_generics'] } dyn-clone = "1.0.11" lasso = { version = "0.6.0", features = ['multi-threaded'] } +base64 = "0.21.0" +static_init = "1.0.3" +clap = { version = "4.2.7", features = ["derive"] } diff --git a/examples/lite/main.orc b/examples/lite/main.orc index 36edd07..dbba362 100644 --- a/examples/lite/main.orc +++ b/examples/lite/main.orc @@ -1,7 +1,7 @@ import prelude::* -import std::conv::(parse_float, to_string) -import std::cpsio::(readline, print) -import std::str::(concatenate) +import std::(parse_float, to_string) +import std::(readline, print) +import std::(concatenate) export main := do{ cps data = readline; @@ -20,3 +20,5 @@ export main := do{ cps print (to_string result ++ "\n"); 0 } + +-- export main := 1 do { 1 ; 2 } 3 diff --git a/orchid.code-workspace b/orchid.code-workspace index 0472125..4899a6f 100644 --- a/orchid.code-workspace +++ b/orchid.code-workspace @@ -5,7 +5,7 @@ } ], "settings": { - "[markdown]": { + "[markdown][latex]": { "editor.unicodeHighlight.ambiguousCharacters": false, "editor.unicodeHighlight.invisibleCharacters": false, "diffEditor.ignoreTrimWhitespace": false, @@ -23,13 +23,43 @@ }, "[rust]": { "editor.rulers": [74] - } + }, + "rust-analyzer.showUnlinkedFileNotification": false, + "files.associations": { + "*.mjsd": "markdown" + }, }, "extensions": { "recommendations": [ "tomoki1207.pdf", "james-yu.latex-workshop", - "bungcip.better-toml" + "bungcip.better-toml", + "maptz.regionfolder", + "serayuzgur.crates", + "tamasfe.even-better-toml", + "haskell.haskell", + "justusadam.language-haskell", + "yzhang.markdown-all-in-one", + "goessner.mdmath", + "gruntfuggly.todo-tree" + ] + }, + "launch": { + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Cargo launch", + "cwd": "${workspaceFolder:orchid}", + "program": "${workspaceFolder}/target/debug/orchid", + "cargo": { + "args": [ + "run", + ] + }, + "args": [] + } ] } } \ No newline at end of file diff --git a/src/cli.rs b/src/cli.rs new file mode 100644 index 0000000..a12a380 --- /dev/null +++ b/src/cli.rs @@ -0,0 +1,19 @@ +use std::{fmt::Display, io::{stdin, BufRead, stdout, Write}}; + +pub fn prompt( + prompt: &str, + default: T, + mut try_cast: impl FnMut(String) -> Result +) -> T { + loop { + print!("{prompt} ({default}): "); + stdout().lock().flush().unwrap(); + let mut input = String::with_capacity(100); + stdin().lock().read_line(&mut input).unwrap(); + if input.len() == 0 {return default} + match try_cast(input) { + Ok(t) => return t, + Err(e) => println!("Error: {e}") + } + } +} \ No newline at end of file diff --git a/src/executor/apply_lambda.rs b/src/executor/apply_lambda.rs deleted file mode 100644 index 6bc1190..0000000 --- a/src/executor/apply_lambda.rs +++ /dev/null @@ -1,89 +0,0 @@ -use itertools::Itertools; -use mappable_rc::Mrc; - -use crate::utils::{collect_to_mrc, to_mrc_slice}; - -use crate::representations::typed::{Clause, Expr}; - -#[derive(Clone)] -struct Application<'a> { - id: u64, - value: &'a Expr, - types: bool -} - -// pub fn apply_lambda(app: Application, body: Expr) -> Expr { -// apply_lambda_expr_rec(id, value, body) -// .unwrap_or(body) -// } - -fn apply_lambda_expr_rec( - app@Application{ id, types, value }: Application, expr: &Expr -) -> Option { - let Expr(clause, typ) = expr; - match clause { - Clause::LambdaArg(arg_id) | Clause::AutoArg(arg_id) if *arg_id == id => { - let full_typ = - value.1.iter() - .chain(typ.iter()) - .cloned().collect_vec(); - Some(Expr(value.0.to_owned(), full_typ)) - } - cl => { - let new_cl = apply_lambda_clause_rec(app, cl); - let new_typ = if !types {None} else { - typ. - } - } - } -} - -fn apply_lambda_clause_rec( - app: Application, clause: &Clause -) -> Option { - match clause { - // Only element actually manipulated - Clause::LambdaArg(_) | Clause::AutoArg(_) => None, - // Traverse, yield Some if either had changed. - Clause::Apply(f, x) => { - let new_f = apply_lambda_expr_rec(app, f.as_ref()); - let new_x = apply_lambda_expr_rec(app, x.as_ref()); - match (new_f, new_x) { // Mind the shadows - (None, None) => None, - (None, Some(x)) => Some(Clause::Apply(f.clone(), Box::new(x))), - (Some(f), None) => Some(Clause::Apply(Box::new(f), x.clone())), - (Some(f), Some(x)) => Some(Clause::Apply(Box::new(f), Box::new(x))) - } - }, - Clause::Lambda(own_id, t, b) => apply_lambda__traverse_param(id, value, own_id, t, b, Clause::Lambda), - Clause::Auto(own_id, t, b) => apply_lambda__traverse_param(id, value, own_id, t, b, Clause::Auto), - // Leaf nodes - Clause::Atom(_) | Clause::ExternFn(_) | Clause::Literal(_) => None - } -} - -fn apply_lambda__traverse_param( - id: u64, value: Mrc, - own_id: u64, typ: Mrc<[Clause]>, b: Mrc, - wrap: impl Fn(u64, Mrc<[Clause]>, Mrc) -> Clause -) -> Option { - let any_t = false; - let mut t_acc = vec![]; - for t in typ.iter() { - let newt = apply_lambda_clause_rec(id, Mrc::clone(&value), t.clone()); - any_t |= newt.is_some(); - t_acc.push(newt.unwrap_or_else(|| t.clone())) - } - // Respect shadowing - let new_b = if own_id == id {None} else { - apply_lambda_expr_rec(id, value, Mrc::clone(&b)) - }; - if any_t { // mind the shadows - let typ = to_mrc_slice(t_acc); - if let Some(b) = new_b { - Some(wrap(own_id, typ, b)) - } else {Some(wrap(own_id, typ, b))} - } else if let Some(b) = new_b { - Some(wrap(own_id, typ, b)) - } else {Some(wrap(own_id, typ, b))} -} \ No newline at end of file diff --git a/src/executor/mod.rs b/src/executor/mod.rs deleted file mode 100644 index 5430922..0000000 --- a/src/executor/mod.rs +++ /dev/null @@ -1,6 +0,0 @@ -mod normalize; -mod partial_hash; -mod reduction_tree; -mod apply_lambda; -pub use apply_lambda::apply_lambda; -mod syntax_eq; \ No newline at end of file diff --git a/src/executor/normalize.rs b/src/executor/normalize.rs deleted file mode 100644 index aa0099d..0000000 --- a/src/executor/normalize.rs +++ /dev/null @@ -1,30 +0,0 @@ -use mappable_rc::Mrc; - -use crate::utils::collect_to_mrc; - -use super::super::representations::typed::{Clause, Expr}; - -fn normalize(Expr(clause, typ): Expr) -> Expr { - todo!() -} - -fn collect_autos( - Expr(clause, typ): Expr, - arg_types: Vec>, - indirect_argt_trees: Vec>, - sunk_types: &mut dyn Iterator -) -> (Vec>, Expr) { - if let Clause::Auto(argt, body) = clause { - - } - else {( - arg_types, - Expr( - clause, - collect_to_mrc( - typ.iter().cloned() - .chain(sunk_types) - ) - ) - )} -} \ No newline at end of file diff --git a/src/executor/partial_hash.rs b/src/executor/partial_hash.rs deleted file mode 100644 index 8b4318d..0000000 --- a/src/executor/partial_hash.rs +++ /dev/null @@ -1,48 +0,0 @@ -use std::hash::{Hasher, Hash}; - -use itertools::Itertools; - -use crate::utils::ProtoMap; - -use super::super::representations::typed::{Clause, Expr}; -use super::super::utils::Stackframe; - -const PARAMETRICS_INLINE_COUNT:usize = 5; -// type Parametrics<'a> = ProtoMap<'a, u64, bool, PARAMETRICS_INLINE_COUNT>; - -/// Hash the parts of an expression that are required to be equal for syntactic equality. -pub fn partial_hash_rec( - Expr(clause, _): &Expr, state: &mut H, - parametrics: Option<&Stackframe> -) { - match clause { - // Skip autos - Clause::Auto(id, _, body) => { - partial_hash_rec(body, state, parametrics) - } - // Annotate everything else with a prefix - // - Recurse into the tree of lambdas and calls - classic lambda calc - Clause::Lambda(id, _, body) => { - state.write_u8(0); - partial_hash_rec(body, state, Some(&Stackframe::opush(parametrics, *id))) - } - Clause::Apply(f, x) => { - state.write_u8(1); - partial_hash_rec(f, state, parametrics.clone()); - partial_hash_rec(x, state, parametrics); - } - Clause::AutoArg(..) => state.write_u8(2), - // - Only recognize the depth of an argument if it refers to a non-auto parameter - Clause::LambdaArg(own_id) => { - let pos = parametrics - .and_then(|sf| sf.iter().position(|id| id == own_id)) - .unwrap_or(usize::MAX); - state.write_u8(3); - state.write_usize(pos) - } - // - Hash leaves like normal - Clause::Literal(lit) => { state.write_u8(4); lit.hash(state) } - Clause::Atom(at) => { state.write_u8(5); at.hash(state) } - Clause::ExternFn(f) => { state.write_u8(6); f.hash(state) } - } -} \ No newline at end of file diff --git a/src/executor/reduction_tree.rs b/src/executor/reduction_tree.rs deleted file mode 100644 index e367142..0000000 --- a/src/executor/reduction_tree.rs +++ /dev/null @@ -1,102 +0,0 @@ -use mappable_rc::Mrc; - -use crate::box_chain; -use crate::utils::BoxedIter; -use crate::utils::iter::{box_once, box_empty}; - -use super::apply_lambda::apply_lambda; -use super::super::representations::typed::{Clause, Expr}; - -/// Call the function with the first Expression that isn't an Auto, -/// wrap all elements in the returned iterator back in the original sequence of Autos. -pub fn skip_autos<'a, - F: 'a + FnOnce(Mrc) -> I, - I: Iterator> + 'static ->( - expr: Mrc, function: F -) -> BoxedIter<'static, Mrc> { - if let Expr(Clause::Auto(id, arg, body), typ) = expr.as_ref() { - return Box::new(skip_autos(Mrc::clone(body), function).map({ - let arg = Mrc::clone(arg); - let typ = Mrc::clone(typ); - move |body| { - Mrc::new(Expr(Clause::Auto( - *id, - Mrc::clone(&arg), - body - ), Mrc::clone(&typ))) - } - })) as BoxedIter<'static, Mrc> - } - Box::new(function(expr)) -} - -/// Produces an iterator of every expression that can be produced from this one through B-reduction. -fn direct_reductions(ex: Mrc) -> impl Iterator> { - skip_autos(ex, |mexpr| { - let Expr(clause, typ_ref) = mexpr.as_ref(); - match clause { - Clause::Apply(f, x) => box_chain!( - skip_autos(Mrc::clone(f), |mexpr| { - let Expr(f, _) = mexpr.as_ref(); - match f { - Clause::Lambda(id, _, body) => box_once( - apply_lambda(*id, Mrc::clone(x), Mrc::clone(body)) - ), - Clause::ExternFn(xfn) => { - let Expr(xval, xtyp) = x.as_ref(); - xfn.apply(xval.clone()) - .map(|ret| box_once(Mrc::new(Expr(ret, Mrc::clone(xtyp))))) - .unwrap_or(box_empty()) - }, - // Parametric newtypes are atoms of function type - Clause::Atom(..) | Clause::LambdaArg(..) | Clause::AutoArg(..) | Clause::Apply(..) => box_empty(), - Clause::Literal(lit) => - panic!("Literal expression {lit:?} can't be applied as function"), - Clause::Auto(..) => unreachable!("skip_autos should have filtered this"), - } - }), - direct_reductions(Mrc::clone(f)).map({ - let typ = Mrc::clone(typ_ref); - let x = Mrc::clone(x); - move |f| Mrc::new(Expr(Clause::Apply( - f, - Mrc::clone(&x) - ), Mrc::clone(&typ))) - }), - direct_reductions(Mrc::clone(x)).map({ - let typ = Mrc::clone(typ_ref); - let f = Mrc::clone(f); - move |x| Mrc::new(Expr(Clause::Apply( - Mrc::clone(&f), - x - ), Mrc::clone(&typ))) - }) - ), - Clause::Lambda(id, argt, body) => { - let id = *id; - let typ = Mrc::clone(typ_ref); - let argt = Mrc::clone(argt); - let body = Mrc::clone(body); - let body_reductions = direct_reductions(body) - .map(move |body| { - let argt = Mrc::clone(&argt); - Mrc::new(Expr( - Clause::Lambda(id, argt, body), - Mrc::clone(&typ) - )) - }); - Box::new(body_reductions) - }, - Clause::Auto(..) => unreachable!("skip_autos should have filtered this"), - Clause::Literal(..) | Clause::ExternFn(..) | Clause::Atom(..) - | Clause::LambdaArg(..) | Clause::AutoArg(..) => box_empty(), - } - }) -} - -/* - - - - */ \ No newline at end of file diff --git a/src/executor/syntax_eq.rs b/src/executor/syntax_eq.rs deleted file mode 100644 index 337fa32..0000000 --- a/src/executor/syntax_eq.rs +++ /dev/null @@ -1,206 +0,0 @@ -use std::collections::HashMap; - -use itertools::Itertools; -use mappable_rc::Mrc; - -use crate::utils::{ProtoMap, Side, mrc_empty_slice, collect_to_mrc, Stackframe, mrc_concat, Product2}; - -use super::super::representations::typed::{Clause, Expr}; - -pub fn swap((t, u): (T, U)) -> (U, T) { (u, t) } - -// @ @ (0, (foo 1)) ~ @ (0, 0) - -// TODO: -// - get rid of leftovers from Explicit -// - adapt to new index-based system - -enum UnifError { - Conflict, -} - -type LambdaMap<'a> = Option<&'a Stackframe<'a, (u64, u64)>>; - -/// The context associates a given variable (by absolute index) on a given side to -/// an expression on the opposite side rooted at the specified depth. -/// The root depths are used to translate betwee de Brujin arguments and absolute indices. -struct Context(HashMap>); -impl Context { - fn set(&mut self, id: u64, value: &Mrc, lambdas: LambdaMap) -> Result>, UnifError> { - Ok( - if let Some(local) = self.0.get(&id) { - Some( - self.unify_expr(local, value, lambdas)? - .pick(Mrc::clone(local), Mrc::clone(value)) - ) - } else { None } - ) - } - - fn unify_expr(&mut self, - left: &Mrc, right: &Mrc, lambdas: LambdaMap - ) -> Result>, UnifError> { - let Expr(left_val, left_typs) = left.as_ref(); - let Expr(right_val, right_typs) = right.as_ref(); - let val = match (left_val, right_val) { - (Clause::AutoArg(l), Clause::AutoArg(r)) if l == r => Product2::Either, - (Clause::AutoArg(id), _) => self.set(*id, left, lambdas)?.as_ref() - .map_or(Product2::Left, |e| Product2::New(e.0.clone())), - (_, Clause::AutoArg(id)) => self.set(*id, right, lambdas)?.as_ref() - .map_or(Product2::Right, |e| Product2::New(e.0.clone())), - _ => self.unify_clause(left_val, right_val, lambdas)? - }; - Ok(match val { - Product2::Either if right_typs.is_empty() && left_typs.is_empty() => Product2::Either, - Product2::Left | Product2::Either if right_typs.is_empty() => Product2::Left, - Product2::Right | Product2::Either if left_typs.is_empty() => Product2::Right, - product => { - let all_types = mrc_concat(left_typs, right_typs); - Product2::New(Mrc::new(Expr( - product.pick(left_val.clone(), right_val.clone()), - all_types - ))) - } - }) - } - - fn unify_clauses(&mut self, - left: &Mrc<[Clause]>, right: &Mrc<[Clause]>, lambdas: LambdaMap - ) -> Result, UnifError> { - if left.len() != right.len() {return Err(UnifError::Conflict)} - } - - fn unify_clause(&mut self, - left: &Clause, right: &Clause, lambdas: LambdaMap - ) -> Result, UnifError> { - Ok(match (left, right) { - (Clause::Literal(l), Clause::Literal(r)) if l == r => Product2::Either, - (Clause::Atom(l), Clause::Atom(r)) if l == r => Product2::Either, - (Clause::ExternFn(l), Clause::ExternFn(r)) if l == r => Product2::Either, - (Clause::LambdaArg(l), Clause::LambdaArg(r)) => if l == r {Product2::Either} else { - let is_equal = Stackframe::o_into_iter(lambdas) - .first_some(|(l_candidate, r_candidate)| { - if l_candidate == l && r_candidate == r {Some(true)} // match - else if l_candidate == l || r_candidate == r {Some(false)} // shadow - else {None} // irrelevant - }).unwrap_or(false); - // Reference: - if is_equal {Product2::Left} else {return Err(UnifError::Conflict)} - } - (Clause::AutoArg(_), _) | (_, Clause::AutoArg(_)) => { - unreachable!("unify_expr should have handled this") - } - (Clause::Lambda(l_id, l_arg, l_body), Clause::Lambda(r_id, r_arg, r_body)) => { - let lambdas = Stackframe::opush(lambdas, (*l_id, *r_id)); - self.unify_expr(l_body, r_body, Some(&lambdas))? - .map(|ex| Clause::Lambda(*l_id, mrc_empty_slice(), ex)) - } - (Clause::Apply(l_f, l_x), Clause::Apply(r_f, r_x)) => { - self.unify_expr(l_f, r_f, lambdas)?.join((Mrc::clone(l_f), Mrc::clone(r_f)), - self.unify_expr(l_x, r_x, lambdas)?, (Mrc::clone(l_x), Mrc::clone(r_x)) - ).map(|(f, x)| Clause::Apply(f, x)) - } - (Clause::Auto(l_id, l_arg, l_body), Clause::Auto(r_id, r_arg, r_body)) => { - let typ = self.unify(l_arg, r_arg, lambdas)?; - let body = self.unify_expr(l_body, r_body, lambdas)?; - typ.join((l_arg, r_arg), ) - } - }) - } -} - -const IS_AUTO_INLINE:usize = 5; - -// All data to be forwarded during recursion about one half of a unification task -#[derive(Clone)] -struct UnifHalfTask<'a> { - /// The expression to be unified - expr: &'a Expr, - /// Stores whether a given uid is auto or lambda - is_auto: ProtoMap<'a, usize, bool, IS_AUTO_INLINE> -} - -impl<'a> UnifHalfTask<'a> { - fn push_auto(&mut self, body: &Expr, key: usize) { - self.expr = body; - self.is_auto.set(&key, true); - } - - fn push_lambda(&mut self, body: &Expr, key: usize) { - self.expr = body; - self.is_auto.set(&key, false); - } -} - -type Ctx = HashMap>; - -/// Ascertain syntactic equality. Syntactic equality means that -/// - lambda elements are verbatim equal -/// - auto constraints are pairwise syntactically equal after sorting -/// -/// Context associates variables with subtrees resolved on the opposite side -pub fn unify_syntax_rec( // the stacks store true for autos, false for lambdas - ctx: &mut HashMap<(Side, usize), (usize, Mrc)>, - ltask@UnifHalfTask{ expr: lexpr@Expr(lclause, _), .. }: UnifHalfTask, - rtask@UnifHalfTask{ expr: rexpr@Expr(rclause, _), .. }: UnifHalfTask -) -> Option<(UnifResult, UnifResult)> { - // Ensure that ex1 is a value-level construct - match lclause { - Clause::Auto(id, _, body) => { - let res = unify_syntax_rec(ltask.push_auto(body).0, rtask); - return if ltask.explicits.is_some() { - res.map(|(r1, r2)| (r1.useExplicit(), r2)) - } else {res} - } - _ => () - }; - // Reduce ex2's auto handling to ex1's. In the optimizer we trust - if let Clause::Auto(..) | Clause::Explicit(..) = rclause { - return unify_syntax_rec(rtask, ltask).map(swap); - } - // Neither ex1 nor ex2 can be Auto or Explicit - match (lclause, rclause) { - // recurse into both - (Clause::Lambda(_, lbody), Clause::Lambda(_, rbody)) => unify_syntax_rec( - ltask.push_lambda(lbody), - rtask.push_lambda(rbody) - ), - (Clause::Apply(lf, lx), Clause::Apply(rf, rx)) => { - let (lpart, rpart) = unify_syntax_rec( - ltask.push_expr(lf), - rtask.push_expr(rf) - )?; - lpart.dropUsedExplicits(&mut ltask); - rpart.dropUsedExplicits(&mut rtask); - unify_syntax_rec(ltask.push_expr(lx), rtask.push_expr(rx)) - } - (Clause::Atom(latom), Clause::Atom(ratom)) => { - if latom != ratom { None } - else { Some((UnifResult::default(), UnifResult::default())) } - } - (Clause::ExternFn(lf), Clause::ExternFn(rf)) => { - if lf != rf { None } - else { Some((UnifResult::default(), UnifResult::default())) } - } - (Clause::Literal(llit), Clause::Literal(rlit)) => { - if llit != rlit { None } - else { Some((UnifResult::default(), UnifResult::default())) } - } - // TODO Select a representative - (Clause::Argument(depth1), Clause::Argument(depth2)) => { - !*stack1.iter().nth(*depth1).unwrap_or(&false) - && !*stack2.iter().nth(*depth2).unwrap_or(&false) - && stack1.iter().count() - depth1 == stack2.iter().count() - depth2 - } - // TODO Assign a substitute - (Clause::Argument(placeholder), _) => { - - } - } -} - -// Tricky unifications -// @A. A A 1 ~ @B. 2 B B = fails if left-authoritative -// @A. 1 A A ~ @B. B B 2 -// @A. A 1 A ~ @B. B B 2 -// @ 0 X 0 ~ @ 0 0 Y \ No newline at end of file diff --git a/src/external/assertion_error.rs b/src/external/assertion_error.rs index f5328d8..5ea4e03 100644 --- a/src/external/assertion_error.rs +++ b/src/external/assertion_error.rs @@ -2,21 +2,21 @@ use std::rc::Rc; use std::fmt::Display; use crate::foreign::ExternError; -use crate::representations::interpreted::Clause; +use crate::representations::interpreted::ExprInst; #[derive(Clone)] pub struct AssertionError{ - pub value: Clause, + pub value: ExprInst, pub assertion: &'static str, } impl AssertionError { - pub fn fail(value: Clause, assertion: &'static str) -> Result> { + pub fn fail(value: ExprInst, assertion: &'static str) -> Result> { return Err(Self { value, assertion }.into_extern()) } - pub fn ext(value: Clause, assertion: &'static str) -> Rc { + pub fn ext(value: ExprInst, assertion: &'static str) -> Rc { return Self { value, assertion }.into_extern() } } diff --git a/src/external/bool/boolean.rs b/src/external/bool/boolean.rs index c79e2f8..366d56c 100644 --- a/src/external/bool/boolean.rs +++ b/src/external/bool/boolean.rs @@ -1,5 +1,6 @@ - -use crate::{atomic_inert, representations::{interpreted::Clause, Primitive}, foreign::Atom}; +use crate::foreign::Atom; +use crate::representations::{interpreted::{Clause, ExprInst}, Primitive}; +use crate::atomic_inert; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct Boolean(pub bool); @@ -7,11 +8,12 @@ atomic_inert!(Boolean); impl From for Boolean { fn from(value: bool) -> Self { Self(value) } } -impl<'a> TryFrom<&'a Clause> for Boolean { +impl TryFrom for Boolean { type Error = (); - fn try_from(value: &'a Clause) -> Result { - if let Clause::P(Primitive::Atom(Atom(a))) = value { + fn try_from(value: ExprInst) -> Result { + let expr = value.expr(); + if let Clause::P(Primitive::Atom(Atom(a))) = &expr.clause { if let Some(b) = a.as_any().downcast_ref::() { return Ok(*b) } diff --git a/src/external/bool/equals.rs b/src/external/bool/equals.rs index 3119f5e..3ae203d 100644 --- a/src/external/bool/equals.rs +++ b/src/external/bool/equals.rs @@ -1,10 +1,8 @@ use std::fmt::Debug; -use std::hash::Hash; +use crate::external::litconv::with_lit; +use crate::representations::{interpreted::ExprInst, Literal}; use crate::{atomic_impl, atomic_redirect, externfn_impl}; -use crate::foreign::Atom; -use crate::representations::{Primitive, Literal}; -use crate::representations::interpreted::Clause; use super::super::assertion_error::AssertionError; use super::boolean::Boolean; @@ -15,38 +13,34 @@ use super::boolean::Boolean; #[derive(Clone)] pub struct Equals2; -externfn_impl!(Equals2, |_: &Self, c: Clause| {Ok(Equals1{c})}); +externfn_impl!(Equals2, |_: &Self, x: ExprInst| {Ok(Equals1{x})}); /// Partially applied Equals function /// /// Prev state: [Equals2]; Next state: [Equals0] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Equals1{ c: Clause } -atomic_redirect!(Equals1, c); +#[derive(Debug, Clone)] +pub struct Equals1{ x: ExprInst } +atomic_redirect!(Equals1, x); atomic_impl!(Equals1); -externfn_impl!(Equals1, |this: &Self, c: Clause| { - let a: Literal = this.c.clone().try_into() - .map_err(|_| AssertionError::ext(this.c.clone(), "a primitive"))?; - Ok(Equals0{ a, c }) +externfn_impl!(Equals1, |this: &Self, x: ExprInst| { + with_lit(&this.x, |l| Ok(Equals0{ a: l.clone(), x })) }); /// Fully applied Equals function. /// /// Prev state: [Equals1] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Equals0 { a: Literal, c: Clause } -atomic_redirect!(Equals0, c); -atomic_impl!(Equals0, |Self{ a, c }: &Self| { - let b: Literal = c.clone().try_into() - .map_err(|_| AssertionError::ext(c.clone(), "a literal value"))?; - let eqls = match (a, b) { - (Literal::Char(c1), Literal::Char(c2)) => *c1 == c2, - (Literal::Num(n1), Literal::Num(n2)) => *n1 == n2, - (Literal::Str(s1), Literal::Str(s2)) => *s1 == s2, - (Literal::Uint(i1), Literal::Uint(i2)) => *i1 == i2, - (_, _) => AssertionError::fail(c.clone(), "the expected type")?, - }; - Ok(Clause::P(Primitive::Atom(Atom::new(Boolean::from(eqls))))) +#[derive(Debug, Clone)] +pub struct Equals0 { a: Literal, x: ExprInst } +atomic_redirect!(Equals0, x); +atomic_impl!(Equals0, |Self{ a, x }: &Self| { + let eqls = with_lit(x, |l| Ok(match (a, l) { + (Literal::Char(c1), Literal::Char(c2)) => c1 == c2, + (Literal::Num(n1), Literal::Num(n2)) => n1 == n2, + (Literal::Str(s1), Literal::Str(s2)) => s1 == s2, + (Literal::Uint(i1), Literal::Uint(i2)) => i1 == i2, + (_, _) => AssertionError::fail(x.clone(), "the expected type")?, + }))?; + Ok(Boolean::from(eqls).to_atom_cls()) }); diff --git a/src/external/bool/ifthenelse.rs b/src/external/bool/ifthenelse.rs index b884e8e..9f1205e 100644 --- a/src/external/bool/ifthenelse.rs +++ b/src/external/bool/ifthenelse.rs @@ -1,11 +1,9 @@ use std::fmt::Debug; -use std::hash::Hash; use std::rc::Rc; use crate::external::assertion_error::AssertionError; -use crate::representations::PathSet; +use crate::representations::{PathSet, interpreted::{Clause, ExprInst}}; use crate::{atomic_impl, atomic_redirect, externfn_impl}; -use crate::representations::interpreted::Clause; use super::Boolean; @@ -15,29 +13,29 @@ use super::Boolean; #[derive(Clone)] pub struct IfThenElse1; -externfn_impl!(IfThenElse1, |_: &Self, c: Clause| {Ok(IfThenElse0{c})}); +externfn_impl!(IfThenElse1, |_: &Self, x: ExprInst| {Ok(IfThenElse0{x})}); /// Partially applied IfThenElse function /// /// Prev state: [IfThenElse1]; Next state: [IfThenElse0] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct IfThenElse0{ c: Clause } -atomic_redirect!(IfThenElse0, c); +#[derive(Debug, Clone)] +pub struct IfThenElse0{ x: ExprInst } +atomic_redirect!(IfThenElse0, x); atomic_impl!(IfThenElse0, |this: &Self| { - let Boolean(b) = (&this.c).try_into() - .map_err(|_| AssertionError::ext(this.c.clone(), "a boolean"))?; + let Boolean(b) = this.x.clone().try_into() + .map_err(|_| AssertionError::ext(this.x.clone(), "a boolean"))?; Ok(if b { Clause::Lambda { args: Some(PathSet { steps: Rc::new(vec![]), next: None }), - body: Rc::new(Clause::Lambda { + body: Clause::Lambda { args: None, - body: Rc::new(Clause::LambdaArg) - }) + body: Clause::LambdaArg.wrap() + }.wrap() }} else { Clause::Lambda { args: None, - body: Rc::new(Clause::Lambda { + body: Clause::Lambda { args: Some(PathSet { steps: Rc::new(vec![]), next: None }), - body: Rc::new(Clause::LambdaArg) - }) + body: Clause::LambdaArg.wrap() + }.wrap() }}) }); \ No newline at end of file diff --git a/src/external/bool/mod.rs b/src/external/bool/mod.rs index 6d4b389..6d61294 100644 --- a/src/external/bool/mod.rs +++ b/src/external/bool/mod.rs @@ -3,11 +3,12 @@ mod boolean; mod ifthenelse; pub use boolean::Boolean; -use crate::project::{Loader, extlib_loader}; +use crate::{pipeline::ConstTree, interner::Interner}; -pub fn bool() -> impl Loader { - extlib_loader(vec![ - ("ifthenelse", Box::new(ifthenelse::IfThenElse1)), - ("equals", Box::new(equals::Equals2)) + +pub fn bool(i: &Interner) -> ConstTree { + ConstTree::tree([ + (i.i("ifthenelse"), ConstTree::xfn(ifthenelse::IfThenElse1)), + (i.i("equals"), ConstTree::xfn(equals::Equals2)) ]) } \ No newline at end of file diff --git a/src/external/conv/mod.rs b/src/external/conv/mod.rs index 34759ee..c272e1c 100644 --- a/src/external/conv/mod.rs +++ b/src/external/conv/mod.rs @@ -1,13 +1,13 @@ -use crate::project::{extlib_loader, Loader}; +use crate::{interner::Interner, pipeline::ConstTree}; mod to_string; mod parse_float; mod parse_uint; -pub fn conv() -> impl Loader { - extlib_loader(vec![ - ("parse_float", Box::new(parse_float::ParseFloat1)), - ("parse_uint", Box::new(parse_uint::ParseUint1)), - ("to_string", Box::new(to_string::ToString1)) +pub fn conv(i: &Interner) -> ConstTree { + ConstTree::tree([ + (i.i("parse_float"), ConstTree::xfn(parse_float::ParseFloat1)), + (i.i("parse_uint"), ConstTree::xfn(parse_uint::ParseUint1)), + (i.i("to_string"), ConstTree::xfn(to_string::ToString1)) ]) } \ No newline at end of file diff --git a/src/external/conv/parse_float.rs b/src/external/conv/parse_float.rs index 0c6d71d..cf3f500 100644 --- a/src/external/conv/parse_float.rs +++ b/src/external/conv/parse_float.rs @@ -2,14 +2,12 @@ use chumsky::Parser; use std::fmt::Debug; -use std::hash::Hash; use super::super::assertion_error::AssertionError; +use crate::external::litconv::with_lit; use crate::parse::float_parser; +use crate::representations::{interpreted::ExprInst, Literal}; use crate::{atomic_impl, atomic_redirect, externfn_impl}; -use crate::foreign::ExternError; -use crate::representations::{Primitive, Literal}; -use crate::representations::interpreted::Clause; /// ParseFloat a number /// @@ -17,30 +15,27 @@ use crate::representations::interpreted::Clause; #[derive(Clone)] pub struct ParseFloat1; -externfn_impl!(ParseFloat1, |_: &Self, c: Clause| {Ok(ParseFloat0{c})}); +externfn_impl!(ParseFloat1, |_: &Self, x: ExprInst| {Ok(ParseFloat0{x})}); /// Applied to_string function /// /// Prev state: [ParseFloat1] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct ParseFloat0{ c: Clause } -atomic_redirect!(ParseFloat0, c); -atomic_impl!(ParseFloat0, |Self{ c }: &Self| { - let literal: &Literal = c.try_into() - .map_err(|_| AssertionError::ext(c.clone(), "a literal value"))?; - let number = match literal { +#[derive(Debug, Clone)] +pub struct ParseFloat0{ x: ExprInst } +atomic_redirect!(ParseFloat0, x); +atomic_impl!(ParseFloat0, |Self{ x }: &Self| { + let number = with_lit(x, |l| Ok(match l { Literal::Str(s) => { let parser = float_parser(); - parser.parse(s.as_str()).map_err(|_| AssertionError{ - value: c.clone(), assertion: "cannot be parsed into a float" - }.into_extern())? + parser.parse(s.as_str()) + .map_err(|_| AssertionError::ext(x.clone(), "cannot be parsed into a float"))? } Literal::Num(n) => *n, Literal::Uint(i) => (*i as u32).into(), - Literal::Char(char) => char.to_digit(10).ok_or(AssertionError{ - value: c.clone(), assertion: "is not a decimal digit" - }.into_extern())?.into() - }; - Ok(Clause::P(Primitive::Literal(Literal::Num(number)))) + Literal::Char(char) => char.to_digit(10) + .ok_or(AssertionError::ext(x.clone(), "is not a decimal digit"))? + .into() + }))?; + Ok(number.into()) }); \ No newline at end of file diff --git a/src/external/conv/parse_uint.rs b/src/external/conv/parse_uint.rs index 0e19a4e..7bc6b62 100644 --- a/src/external/conv/parse_uint.rs +++ b/src/external/conv/parse_uint.rs @@ -2,14 +2,11 @@ use chumsky::Parser; use std::fmt::Debug; -use std::hash::Hash; -use super::super::assertion_error::AssertionError; -use crate::parse::int_parser; +use crate::external::{litconv::with_lit, assertion_error::AssertionError}; +use crate::representations::{interpreted::ExprInst, Literal}; use crate::{atomic_impl, atomic_redirect, externfn_impl}; -use crate::foreign::ExternError; -use crate::representations::{Primitive, Literal}; -use crate::representations::interpreted::Clause; +use crate::parse::int_parser; /// Parse a number /// @@ -17,30 +14,27 @@ use crate::representations::interpreted::Clause; #[derive(Clone)] pub struct ParseUint1; -externfn_impl!(ParseUint1, |_: &Self, c: Clause| {Ok(ParseUint0{c})}); +externfn_impl!(ParseUint1, |_: &Self, x: ExprInst| {Ok(ParseUint0{x})}); /// Applied ParseUint function /// /// Prev state: [ParseUint1] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct ParseUint0{ c: Clause } -atomic_redirect!(ParseUint0, c); -atomic_impl!(ParseUint0, |Self{ c }: &Self| { - let literal: &Literal = c.try_into() - .map_err(|_| AssertionError::ext(c.clone(), "a literal value"))?; - let uint = match literal { +#[derive(Debug, Clone)] +pub struct ParseUint0{ x: ExprInst } +atomic_redirect!(ParseUint0, x); +atomic_impl!(ParseUint0, |Self{ x }: &Self| { + let uint = with_lit(x, |l| Ok(match l { Literal::Str(s) => { let parser = int_parser(); - parser.parse(s.as_str()).map_err(|_| AssertionError{ - value: c.clone(), assertion: "cannot be parsed into an unsigned int" - }.into_extern())? + parser.parse(s.as_str()) + .map_err(|_| AssertionError::ext(x.clone(), "cannot be parsed into an unsigned int"))? } Literal::Num(n) => n.floor() as u64, Literal::Uint(i) => *i, - Literal::Char(char) => char.to_digit(10).ok_or(AssertionError{ - value: c.clone(), assertion: "is not a decimal digit" - }.into_extern())? as u64 - }; - Ok(Clause::P(Primitive::Literal(Literal::Uint(uint)))) + Literal::Char(char) => char.to_digit(10) + .ok_or(AssertionError::ext(x.clone(), "is not a decimal digit"))? + .into() + }))?; + Ok(uint.into()) }); \ No newline at end of file diff --git a/src/external/conv/to_string.rs b/src/external/conv/to_string.rs index 5c8faf6..34cb46a 100644 --- a/src/external/conv/to_string.rs +++ b/src/external/conv/to_string.rs @@ -1,11 +1,9 @@ use std::fmt::Debug; -use std::hash::Hash; -use crate::external::assertion_error::AssertionError; +use crate::external::litconv::with_lit; +use crate::representations::{interpreted::ExprInst, Literal}; use crate::{atomic_impl, atomic_redirect, externfn_impl}; -use crate::representations::{Primitive, Literal}; -use crate::representations::interpreted::Clause; /// ToString a clause /// @@ -13,23 +11,21 @@ use crate::representations::interpreted::Clause; #[derive(Clone)] pub struct ToString1; -externfn_impl!(ToString1, |_: &Self, c: Clause| {Ok(ToString0{c})}); +externfn_impl!(ToString1, |_: &Self, x: ExprInst| {Ok(ToString0{x})}); /// Applied ToString function /// /// Prev state: [ToString1] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct ToString0{ c: Clause } -atomic_redirect!(ToString0, c); -atomic_impl!(ToString0, |Self{ c }: &Self| { - let literal: &Literal = c.try_into() - .map_err(|_| AssertionError::ext(c.clone(), "a literal value"))?; - let string = match literal { +#[derive(Debug, Clone)] +pub struct ToString0{ x: ExprInst } +atomic_redirect!(ToString0, x); +atomic_impl!(ToString0, |Self{ x }: &Self| { + let string = with_lit(x, |l| Ok(match l { Literal::Char(c) => c.to_string(), Literal::Uint(i) => i.to_string(), Literal::Num(n) => n.to_string(), Literal::Str(s) => s.clone() - }; - Ok(Clause::P(Primitive::Literal(Literal::Str(string)))) + }))?; + Ok(string.into()) }); diff --git a/src/external/cpsio/mod.rs b/src/external/cpsio/mod.rs index c5ceff3..adc4b25 100644 --- a/src/external/cpsio/mod.rs +++ b/src/external/cpsio/mod.rs @@ -1,11 +1,11 @@ -use crate::project::{Loader, extlib_loader}; +use crate::{interner::Interner, pipeline::ConstTree}; mod print; mod readline; -pub fn cpsio() -> impl Loader { - extlib_loader(vec![ - ("print", Box::new(print::Print2)), - ("readline", Box::new(readline::Readln2)) +pub fn cpsio(i: &Interner) -> ConstTree { + ConstTree::tree([ + (i.i("print"), ConstTree::xfn(print::Print2)), + (i.i("readline"), ConstTree::xfn(readline::Readln2)) ]) } \ No newline at end of file diff --git a/src/external/cpsio/print.rs b/src/external/cpsio/print.rs index 94c0634..9f2ad63 100644 --- a/src/external/cpsio/print.rs +++ b/src/external/cpsio/print.rs @@ -1,11 +1,10 @@ use std::fmt::Debug; -use std::hash::Hash; use std::rc::Rc; -use crate::external::str::cls2str; +use crate::external::litconv::with_str; use crate::representations::PathSet; use crate::{atomic_impl, atomic_redirect, externfn_impl}; -use crate::representations::interpreted::Clause; +use crate::representations::interpreted::{Clause, ExprInst}; /// Print function /// @@ -13,20 +12,21 @@ use crate::representations::interpreted::Clause; #[derive(Clone)] pub struct Print2; -externfn_impl!(Print2, |_: &Self, c: Clause| {Ok(Print1{c})}); +externfn_impl!(Print2, |_: &Self, x: ExprInst| {Ok(Print1{x})}); /// Partially applied Print function /// /// Prev state: [Print2]; Next state: [Print0] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Print1{ c: Clause } -atomic_redirect!(Print1, c); -atomic_impl!(Print1, |Self{ c }: &Self| { - let message = cls2str(&c)?; - print!("{}", message); - Ok(Clause::Lambda { - args: Some(PathSet{ steps: Rc::new(vec![]), next: None }), - body: Rc::new(Clause::LambdaArg) +#[derive(Debug, Clone)] +pub struct Print1{ x: ExprInst } +atomic_redirect!(Print1, x); +atomic_impl!(Print1, |Self{ x }: &Self| { + with_str(x, |s| { + print!("{}", s); + Ok(Clause::Lambda { + args: Some(PathSet{ steps: Rc::new(vec![]), next: None }), + body: Clause::LambdaArg.wrap() + }) }) }); diff --git a/src/external/cpsio/readline.rs b/src/external/cpsio/readline.rs index 45bfa02..9954e88 100644 --- a/src/external/cpsio/readline.rs +++ b/src/external/cpsio/readline.rs @@ -1,12 +1,10 @@ use std::fmt::Debug; use std::io::stdin; -use std::rc::Rc; -use std::hash::Hash; use crate::external::runtime_error::RuntimeError; use crate::{atomic_impl, atomic_redirect, externfn_impl}; use crate::representations::{Primitive, Literal}; -use crate::representations::interpreted::Clause; +use crate::representations::interpreted::{Clause, ExprInst}; /// Readln function /// @@ -14,22 +12,21 @@ use crate::representations::interpreted::Clause; #[derive(Clone)] pub struct Readln2; -externfn_impl!(Readln2, |_: &Self, c: Clause| {Ok(Readln1{c})}); +externfn_impl!(Readln2, |_: &Self, x: ExprInst| {Ok(Readln1{x})}); /// Partially applied Readln function /// /// Prev state: [Readln2]; Next state: [Readln0] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Readln1{ c: Clause } -atomic_redirect!(Readln1, c); -atomic_impl!(Readln1, |Self{ c }: &Self| { +#[derive(Debug, Clone)] +pub struct Readln1{ x: ExprInst } +atomic_redirect!(Readln1, x); +atomic_impl!(Readln1, |Self{ x }: &Self| { let mut buf = String::new(); stdin().read_line(&mut buf).map_err(|e| RuntimeError::ext(e.to_string(), "reading from stdin"))?; buf.pop(); Ok(Clause::Apply { - f: Rc::new(c.clone()), - x: Rc::new(Clause::P(Primitive::Literal(Literal::Str(buf)))), - id: 0 + f: x.clone(), + x: Clause::P(Primitive::Literal(Literal::Str(buf))).wrap() }) }); diff --git a/src/external/litconv.rs b/src/external/litconv.rs new file mode 100644 index 0000000..7f24de6 --- /dev/null +++ b/src/external/litconv.rs @@ -0,0 +1,34 @@ +use std::rc::Rc; + +use crate::foreign::ExternError; +use crate::external::assertion_error::AssertionError; +use crate::representations::interpreted::ExprInst; +use crate::representations::Literal; + +pub fn with_lit(x: &ExprInst, + predicate: impl FnOnce(&Literal) -> Result> +) -> Result> { + x.with_literal(predicate) + .map_err(|()| AssertionError::ext(x.clone(), "a literal value")) + .and_then(|r| r) +} + +pub fn with_str(x: &ExprInst, + predicate: impl FnOnce(&String) -> Result> +) -> Result> { + with_lit(x, |l| { + if let Literal::Str(s) = l {predicate(&s)} else { + AssertionError::fail(x.clone(), "a string")? + } + }) +} + +pub fn with_uint(x: &ExprInst, + predicate: impl FnOnce(u64) -> Result> +) -> Result> { + with_lit(x, |l| { + if let Literal::Uint(u) = l {predicate(*u)} else { + AssertionError::fail(x.clone(), "an uint")? + } + }) +} \ No newline at end of file diff --git a/src/external/mod.rs b/src/external/mod.rs index 247321b..14d95b4 100644 --- a/src/external/mod.rs +++ b/src/external/mod.rs @@ -6,3 +6,4 @@ mod str; mod cpsio; mod runtime_error; mod bool; +mod litconv; diff --git a/src/external/num/mod.rs b/src/external/num/mod.rs index ef7b267..db44373 100644 --- a/src/external/num/mod.rs +++ b/src/external/num/mod.rs @@ -2,14 +2,14 @@ mod numeric; pub mod operators; pub use numeric::Numeric; -use crate::project::{extlib_loader, Loader}; +use crate::{interner::Interner, pipeline::ConstTree}; -pub fn num() -> impl Loader { - extlib_loader(vec![ - ("add", Box::new(operators::add::Add2)), - ("subtract", Box::new(operators::subtract::Subtract2)), - ("multiply", Box::new(operators::multiply::Multiply2)), - ("divide", Box::new(operators::divide::Divide2)), - ("remainder", Box::new(operators::remainder::Remainder2)) +pub fn num(i: &Interner) -> ConstTree { + ConstTree::tree([ + (i.i("add"), ConstTree::xfn(operators::add::Add2)), + (i.i("subtract"), ConstTree::xfn(operators::subtract::Subtract2)), + (i.i("multiply"), ConstTree::xfn(operators::multiply::Multiply2)), + (i.i("divide"), ConstTree::xfn(operators::divide::Divide2)), + (i.i("remainder"), ConstTree::xfn(operators::remainder::Remainder2)) ]) } \ No newline at end of file diff --git a/src/external/num/numeric.rs b/src/external/num/numeric.rs index 20e8698..7d001ca 100644 --- a/src/external/num/numeric.rs +++ b/src/external/num/numeric.rs @@ -4,9 +4,11 @@ use std::rc::Rc; use ordered_float::NotNan; use crate::external::assertion_error::AssertionError; +use crate::external::litconv::with_lit; use crate::foreign::ExternError; -use crate::representations::{Primitive, Literal}; -use crate::representations::interpreted::Clause; +use crate::representations::Literal; +use crate::representations::Primitive; +use crate::representations::interpreted::{Clause, ExprInst}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Numeric { @@ -93,17 +95,14 @@ impl Rem for Numeric { } } -impl TryFrom for Numeric { +impl TryFrom for Numeric { type Error = Rc; - fn try_from(value: Clause) -> Result { - let l = if let Clause::P(Primitive::Literal(l)) = value.clone() {l} else { - AssertionError::fail(value, "a literal value")? - }; - match l { - Literal::Uint(i) => Ok(Numeric::Uint(i)), - Literal::Num(n) => Ok(Numeric::Num(n)), + fn try_from(value: ExprInst) -> Result { + with_lit(&value.clone(), |l| match l { + Literal::Uint(i) => Ok(Numeric::Uint(*i)), + Literal::Num(n) => Ok(Numeric::Num(*n)), _ => AssertionError::fail(value, "an integer or number")? - } + }) } } diff --git a/src/external/num/operators/add.rs b/src/external/num/operators/add.rs index 9ed441a..5aec23d 100644 --- a/src/external/num/operators/add.rs +++ b/src/external/num/operators/add.rs @@ -2,10 +2,9 @@ use super::super::Numeric; use std::fmt::Debug; -use std::hash::Hash; use crate::{atomic_impl, atomic_redirect, externfn_impl}; -use crate::representations::interpreted::Clause; +use crate::representations::interpreted::ExprInst; /// Add function /// @@ -13,29 +12,29 @@ use crate::representations::interpreted::Clause; #[derive(Clone)] pub struct Add2; -externfn_impl!(Add2, |_: &Self, c: Clause| {Ok(Add1{c})}); +externfn_impl!(Add2, |_: &Self, x: ExprInst| {Ok(Add1{x})}); /// Partially applied Add function /// /// Prev state: [Add2]; Next state: [Add0] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Add1{ c: Clause } -atomic_redirect!(Add1, c); +#[derive(Debug, Clone)] +pub struct Add1{ x: ExprInst } +atomic_redirect!(Add1, x); atomic_impl!(Add1); -externfn_impl!(Add1, |this: &Self, c: Clause| { - let a: Numeric = this.c.clone().try_into()?; - Ok(Add0{ a, c }) +externfn_impl!(Add1, |this: &Self, x: ExprInst| { + let a: Numeric = this.x.clone().try_into()?; + Ok(Add0{ a, x }) }); /// Fully applied Add function. /// /// Prev state: [Add1] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Add0 { a: Numeric, c: Clause } -atomic_redirect!(Add0, c); -atomic_impl!(Add0, |Self{ a, c }: &Self| { - let b: Numeric = c.clone().try_into()?; +#[derive(Debug, Clone)] +pub struct Add0 { a: Numeric, x: ExprInst } +atomic_redirect!(Add0, x); +atomic_impl!(Add0, |Self{ a, x }: &Self| { + let b: Numeric = x.clone().try_into()?; Ok((*a + b).into()) }); diff --git a/src/external/num/operators/divide.rs b/src/external/num/operators/divide.rs index f96817b..aeda6a6 100644 --- a/src/external/num/operators/divide.rs +++ b/src/external/num/operators/divide.rs @@ -2,10 +2,9 @@ use super::super::Numeric; use std::fmt::Debug; -use std::hash::Hash; use crate::{atomic_impl, atomic_redirect, externfn_impl}; -use crate::representations::interpreted::Clause; +use crate::representations::interpreted::ExprInst; /// Divide function /// @@ -13,29 +12,29 @@ use crate::representations::interpreted::Clause; #[derive(Clone)] pub struct Divide2; -externfn_impl!(Divide2, |_: &Self, c: Clause| {Ok(Divide1{c})}); +externfn_impl!(Divide2, |_: &Self, x: ExprInst| {Ok(Divide1{x})}); /// Partially applied Divide function /// /// Prev state: [Divide2]; Next state: [Divide0] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Divide1{ c: Clause } -atomic_redirect!(Divide1, c); +#[derive(Debug, Clone)] +pub struct Divide1{ x: ExprInst } +atomic_redirect!(Divide1, x); atomic_impl!(Divide1); -externfn_impl!(Divide1, |this: &Self, c: Clause| { - let a: Numeric = this.c.clone().try_into()?; - Ok(Divide0{ a, c }) +externfn_impl!(Divide1, |this: &Self, x: ExprInst| { + let a: Numeric = this.x.clone().try_into()?; + Ok(Divide0{ a, x }) }); /// Fully applied Divide function. /// /// Prev state: [Divide1] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Divide0 { a: Numeric, c: Clause } -atomic_redirect!(Divide0, c); -atomic_impl!(Divide0, |Self{ a, c }: &Self| { - let b: Numeric = c.clone().try_into()?; +#[derive(Debug, Clone)] +pub struct Divide0 { a: Numeric, x: ExprInst } +atomic_redirect!(Divide0, x); +atomic_impl!(Divide0, |Self{ a, x }: &Self| { + let b: Numeric = x.clone().try_into()?; Ok((*a / b).into()) }); \ No newline at end of file diff --git a/src/external/num/operators/multiply.rs b/src/external/num/operators/multiply.rs index ccfe6c3..1f38379 100644 --- a/src/external/num/operators/multiply.rs +++ b/src/external/num/operators/multiply.rs @@ -2,10 +2,9 @@ use super::super::Numeric; use std::fmt::Debug; -use std::hash::Hash; use crate::{atomic_impl, atomic_redirect, externfn_impl}; -use crate::representations::interpreted::Clause; +use crate::representations::interpreted::ExprInst; /// Multiply function /// @@ -13,29 +12,29 @@ use crate::representations::interpreted::Clause; #[derive(Clone)] pub struct Multiply2; -externfn_impl!(Multiply2, |_: &Self, c: Clause| {Ok(Multiply1{c})}); +externfn_impl!(Multiply2, |_: &Self, x: ExprInst| {Ok(Multiply1{x})}); /// Partially applied Multiply function /// /// Prev state: [Multiply2]; Next state: [Multiply0] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Multiply1{ c: Clause } -atomic_redirect!(Multiply1, c); +#[derive(Debug, Clone)] +pub struct Multiply1{ x: ExprInst } +atomic_redirect!(Multiply1, x); atomic_impl!(Multiply1); -externfn_impl!(Multiply1, |this: &Self, c: Clause| { - let a: Numeric = this.c.clone().try_into()?; - Ok(Multiply0{ a, c }) +externfn_impl!(Multiply1, |this: &Self, x: ExprInst| { + let a: Numeric = this.x.clone().try_into()?; + Ok(Multiply0{ a, x }) }); /// Fully applied Multiply function. /// /// Prev state: [Multiply1] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Multiply0 { a: Numeric, c: Clause } -atomic_redirect!(Multiply0, c); -atomic_impl!(Multiply0, |Self{ a, c }: &Self| { - let b: Numeric = c.clone().try_into()?; +#[derive(Debug, Clone)] +pub struct Multiply0 { a: Numeric, x: ExprInst } +atomic_redirect!(Multiply0, x); +atomic_impl!(Multiply0, |Self{ a, x }: &Self| { + let b: Numeric = x.clone().try_into()?; Ok((*a * b).into()) }); \ No newline at end of file diff --git a/src/external/num/operators/remainder.rs b/src/external/num/operators/remainder.rs index 164f617..c2b3b23 100644 --- a/src/external/num/operators/remainder.rs +++ b/src/external/num/operators/remainder.rs @@ -2,10 +2,9 @@ use super::super::Numeric; use std::fmt::Debug; -use std::hash::Hash; use crate::{atomic_impl, atomic_redirect, externfn_impl}; -use crate::representations::interpreted::Clause; +use crate::representations::interpreted::ExprInst; /// Remainder function /// @@ -13,29 +12,29 @@ use crate::representations::interpreted::Clause; #[derive(Clone)] pub struct Remainder2; -externfn_impl!(Remainder2, |_: &Self, c: Clause| {Ok(Remainder1{c})}); +externfn_impl!(Remainder2, |_: &Self, x: ExprInst| {Ok(Remainder1{x})}); /// Partially applied Remainder function /// /// Prev state: [Remainder2]; Next state: [Remainder0] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Remainder1{ c: Clause } -atomic_redirect!(Remainder1, c); +#[derive(Debug, Clone)] +pub struct Remainder1{ x: ExprInst } +atomic_redirect!(Remainder1, x); atomic_impl!(Remainder1); -externfn_impl!(Remainder1, |this: &Self, c: Clause| { - let a: Numeric = this.c.clone().try_into()?; - Ok(Remainder0{ a, c }) +externfn_impl!(Remainder1, |this: &Self, x: ExprInst| { + let a: Numeric = this.x.clone().try_into()?; + Ok(Remainder0{ a, x }) }); /// Fully applied Remainder function. /// /// Prev state: [Remainder1] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Remainder0 { a: Numeric, c: Clause } -atomic_redirect!(Remainder0, c); -atomic_impl!(Remainder0, |Self{ a, c }: &Self| { - let b: Numeric = c.clone().try_into()?; +#[derive(Debug, Clone)] +pub struct Remainder0 { a: Numeric, x: ExprInst } +atomic_redirect!(Remainder0, x); +atomic_impl!(Remainder0, |Self{ a, x }: &Self| { + let b: Numeric = x.clone().try_into()?; Ok((*a % b).into()) }); \ No newline at end of file diff --git a/src/external/num/operators/subtract.rs b/src/external/num/operators/subtract.rs index b4bd0b6..35d88bd 100644 --- a/src/external/num/operators/subtract.rs +++ b/src/external/num/operators/subtract.rs @@ -2,10 +2,9 @@ use super::super::Numeric; use std::fmt::Debug; -use std::hash::Hash; use crate::{atomic_impl, atomic_redirect, externfn_impl}; -use crate::representations::interpreted::{Clause}; +use crate::representations::interpreted::ExprInst; /// Subtract function /// @@ -13,29 +12,29 @@ use crate::representations::interpreted::{Clause}; #[derive(Clone)] pub struct Subtract2; -externfn_impl!(Subtract2, |_: &Self, c: Clause| {Ok(Subtract1{c})}); +externfn_impl!(Subtract2, |_: &Self, x: ExprInst| {Ok(Subtract1{x})}); /// Partially applied Subtract function /// /// Prev state: [Subtract2]; Next state: [Subtract0] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Subtract1{ c: Clause } -atomic_redirect!(Subtract1, c); +#[derive(Debug, Clone)] +pub struct Subtract1{ x: ExprInst } +atomic_redirect!(Subtract1, x); atomic_impl!(Subtract1); -externfn_impl!(Subtract1, |this: &Self, c: Clause| { - let a: Numeric = this.c.clone().try_into()?; - Ok(Subtract0{ a, c }) +externfn_impl!(Subtract1, |this: &Self, x: ExprInst| { + let a: Numeric = this.x.clone().try_into()?; + Ok(Subtract0{ a, x }) }); /// Fully applied Subtract function. /// /// Prev state: [Subtract1] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Subtract0 { a: Numeric, c: Clause } -atomic_redirect!(Subtract0, c); -atomic_impl!(Subtract0, |Self{ a, c }: &Self| { - let b: Numeric = c.clone().try_into()?; +#[derive(Debug, Clone)] +pub struct Subtract0 { a: Numeric, x: ExprInst } +atomic_redirect!(Subtract0, x); +atomic_impl!(Subtract0, |Self{ a, x }: &Self| { + let b: Numeric = x.clone().try_into()?; Ok((*a - b).into()) }); \ No newline at end of file diff --git a/src/external/std.rs b/src/external/std.rs index ebb18c6..f90ee5b 100644 --- a/src/external/std.rs +++ b/src/external/std.rs @@ -1,6 +1,5 @@ -use std::collections::HashMap; - -use crate::project::{map_loader, Loader}; +use crate::pipeline::ConstTree; +use crate::interner::Interner; use super::bool::bool; use super::cpsio::cpsio; @@ -8,12 +7,10 @@ use super::conv::conv; use super::str::str; use super::num::num; -pub fn std() -> impl Loader { - map_loader(HashMap::from([ - ("cpsio", cpsio().boxed()), - ("conv", conv().boxed()), - ("bool", bool().boxed()), - ("str", str().boxed()), - ("num", num().boxed()), - ])) +pub fn std(i: &Interner) -> ConstTree { + cpsio(i) + + conv(i) + + bool(i) + + str(i) + + num(i) } \ No newline at end of file diff --git a/src/external/str/char_at.rs b/src/external/str/char_at.rs index ce09eff..4a120fb 100644 --- a/src/external/str/char_at.rs +++ b/src/external/str/char_at.rs @@ -1,11 +1,10 @@ use std::fmt::Debug; -use std::hash::Hash; -use crate::external::assertion_error::AssertionError; +use crate::external::litconv::{with_str, with_uint}; use crate::external::runtime_error::RuntimeError; use crate::representations::{Literal, Primitive}; use crate::{atomic_impl, atomic_redirect, externfn_impl}; -use crate::representations::interpreted::Clause; +use crate::representations::interpreted::{Clause, ExprInst}; /// CharAt function /// @@ -13,35 +12,31 @@ use crate::representations::interpreted::Clause; #[derive(Clone)] pub struct CharAt2; -externfn_impl!(CharAt2, |_: &Self, c: Clause| {Ok(CharAt1{c})}); +externfn_impl!(CharAt2, |_: &Self, x: ExprInst| {Ok(CharAt1{x})}); /// Partially applied CharAt function /// /// Prev state: [CharAt2]; Next state: [CharAt0] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct CharAt1{ c: Clause } -atomic_redirect!(CharAt1, c); +#[derive(Debug, Clone)] +pub struct CharAt1{ x: ExprInst } +atomic_redirect!(CharAt1, x); atomic_impl!(CharAt1); -externfn_impl!(CharAt1, |this: &Self, c: Clause| { - let s = if let Ok(Literal::Str(s)) = this.c.clone().try_into() {s} - else {AssertionError::fail(this.c.clone(), "a string")?}; - Ok(CharAt0{ s, c }) +externfn_impl!(CharAt1, |this: &Self, x: ExprInst| { + with_str(&this.x, |s| Ok(CharAt0{ s: s.clone(), x })) }); /// Fully applied CharAt function. /// /// Prev state: [CharAt1] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct CharAt0 { s: String, c: Clause } -atomic_redirect!(CharAt0, c); -atomic_impl!(CharAt0, |Self{ s, c }: &Self| { - let i = if let Ok(Literal::Uint(i)) = c.clone().try_into() {i} - else {AssertionError::fail(c.clone(), "an uint")?}; - if let Some(c) = s.chars().nth(i as usize) { +#[derive(Debug, Clone)] +pub struct CharAt0 { s: String, x: ExprInst } +atomic_redirect!(CharAt0, x); +atomic_impl!(CharAt0, |Self{ s, x }: &Self| { + with_uint(x, |i| if let Some(c) = s.chars().nth(i as usize) { Ok(Clause::P(Primitive::Literal(Literal::Char(c)))) } else { RuntimeError::fail("Character index out of bounds".to_string(), "indexing string")? - } + }) }); diff --git a/src/external/str/cls2str.rs b/src/external/str/cls2str.rs deleted file mode 100644 index 821c7ad..0000000 --- a/src/external/str/cls2str.rs +++ /dev/null @@ -1,13 +0,0 @@ -use std::rc::Rc; - -use crate::foreign::ExternError; -use crate::external::assertion_error::AssertionError; -use crate::representations::{interpreted::Clause, Literal}; - -pub fn cls2str(c: &Clause) -> Result<&String, Rc> { - let literal: &Literal = c.try_into() - .map_err(|_| AssertionError::ext(c.clone(), "a literal value"))?; - if let Literal::Str(s) = literal {Ok(s)} else { - AssertionError::fail(c.clone(), "a string")? - } -} \ No newline at end of file diff --git a/src/external/str/concatenate.rs b/src/external/str/concatenate.rs index f4bdc1b..9741a4f 100644 --- a/src/external/str/concatenate.rs +++ b/src/external/str/concatenate.rs @@ -1,11 +1,9 @@ -use super::cls2str; - use std::fmt::Debug; -use std::hash::Hash; +use crate::external::litconv::with_str; use crate::{atomic_impl, atomic_redirect, externfn_impl}; use crate::representations::{Primitive, Literal}; -use crate::representations::interpreted::Clause; +use crate::representations::interpreted::{Clause, ExprInst}; /// Concatenate function /// @@ -13,29 +11,29 @@ use crate::representations::interpreted::Clause; #[derive(Clone)] pub struct Concatenate2; -externfn_impl!(Concatenate2, |_: &Self, c: Clause| {Ok(Concatenate1{c})}); +externfn_impl!(Concatenate2, |_: &Self, c: ExprInst| {Ok(Concatenate1{c})}); /// Partially applied Concatenate function /// /// Prev state: [Concatenate2]; Next state: [Concatenate0] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Concatenate1{ c: Clause } +#[derive(Debug, Clone)] +pub struct Concatenate1{ c: ExprInst } atomic_redirect!(Concatenate1, c); atomic_impl!(Concatenate1); -externfn_impl!(Concatenate1, |this: &Self, c: Clause| { - let a: String = cls2str(&this.c)?.clone(); - Ok(Concatenate0{ a, c }) +externfn_impl!(Concatenate1, |this: &Self, c: ExprInst| { + with_str(&this.c, |a| Ok(Concatenate0{ a: a.clone(), c })) }); /// Fully applied Concatenate function. /// /// Prev state: [Concatenate1] -#[derive(Debug, Clone, PartialEq, Hash)] -pub struct Concatenate0 { a: String, c: Clause } +#[derive(Debug, Clone)] +pub struct Concatenate0 { a: String, c: ExprInst } atomic_redirect!(Concatenate0, c); atomic_impl!(Concatenate0, |Self{ a, c }: &Self| { - let b: &String = cls2str(c)?; - Ok(Clause::P(Primitive::Literal(Literal::Str(a.to_owned() + b)))) + with_str(c, |b| Ok(Clause::P(Primitive::Literal( + Literal::Str(a.to_owned() + b) + )))) }); diff --git a/src/external/str/mod.rs b/src/external/str/mod.rs index 7065648..cb6b322 100644 --- a/src/external/str/mod.rs +++ b/src/external/str/mod.rs @@ -1,11 +1,10 @@ mod concatenate; -mod cls2str; mod char_at; -pub use cls2str::cls2str; -use crate::project::{Loader, extlib_loader}; -pub fn str() -> impl Loader { - extlib_loader(vec![ - ("concatenate", Box::new(concatenate::Concatenate2)) +use crate::{pipeline::ConstTree, interner::Interner}; + +pub fn str(i: &Interner) -> ConstTree { + ConstTree::tree([ + (i.i("concatenate"), ConstTree::xfn(concatenate::Concatenate2)) ]) } \ No newline at end of file diff --git a/src/foreign.rs b/src/foreign.rs index 5c9e4a2..6d3ee49 100644 --- a/src/foreign.rs +++ b/src/foreign.rs @@ -5,9 +5,17 @@ use std::rc::Rc; use dyn_clone::DynClone; -use crate::representations::interpreted::{ - Clause, RuntimeError, InternalError -}; +use crate::interpreter::{RuntimeError, Context}; + +use crate::representations::Primitive; +pub use crate::representations::interpreted::Clause; +use crate::representations::interpreted::ExprInst; + +// Aliases for concise macros +pub type RcError = Rc; +pub type AtomicResult = Result<(Clause, Option), RuntimeError>; +pub type XfnResult = Result<(Clause, Option), RcError>; +pub type RcExpr = ExprInst; pub trait ExternError: Display { fn into_extern(self) -> Rc @@ -21,10 +29,13 @@ pub trait ExternError: Display { /// these are also external functions. pub trait ExternFn: DynClone { fn name(&self) -> &str; - fn apply(&self, arg: Clause) -> Result>; + fn apply(&self, arg: ExprInst, ctx: Context) -> XfnResult; fn hash(&self, state: &mut dyn std::hash::Hasher) { state.write_str(self.name()) } + fn to_xfn_cls(self) -> Clause where Self: Sized + 'static { + Clause::P(Primitive::ExternFn(Box::new(self))) + } } impl Eq for dyn ExternFn {} @@ -44,11 +55,10 @@ impl Debug for dyn ExternFn { pub trait Atomic: Any + Debug + DynClone where Self: 'static { fn as_any(&self) -> &dyn Any; - fn definitely_eq(&self, _other: &dyn Any) -> bool; - fn hash(&self, hasher: &mut dyn std::hash::Hasher); - fn run_once(&self) -> Result; - fn run_n_times(&self, n: usize) -> Result<(Clause, usize), RuntimeError>; - fn run_to_completion(&self) -> Result; + fn run(&self, ctx: Context) -> AtomicResult; + fn to_atom_cls(self) -> Clause where Self: Sized { + Clause::P(Primitive::Atom(Atom(Box::new(self)))) + } } /// Represents a black box unit of code with its own normalization steps. @@ -83,19 +93,8 @@ impl Clone for Atom { } } -impl Hash for Atom { - fn hash(&self, state: &mut H) { - self.0.hash(state) - } -} impl Debug for Atom { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "##ATOM[{:?}]##", self.data()) } -} -impl Eq for Atom {} -impl PartialEq for Atom { - fn eq(&self, other: &Self) -> bool { - self.data().definitely_eq(other.data().as_any()) - } } \ No newline at end of file diff --git a/src/foreign_macros/atomic_defaults.rs b/src/foreign_macros/atomic_defaults.rs index 1d1fffc..33348be 100644 --- a/src/foreign_macros/atomic_defaults.rs +++ b/src/foreign_macros/atomic_defaults.rs @@ -9,11 +9,5 @@ use crate::foreign::Atomic; macro_rules! atomic_defaults { () => { fn as_any(&self) -> &dyn std::any::Any { self } - fn definitely_eq(&self, _other: &dyn std::any::Any) -> bool { - _other.downcast_ref().map(|o| self == o).unwrap_or(false) - } - fn hash(&self, mut hasher: &mut dyn std::hash::Hasher) { - ::hash(self, &mut hasher) - } }; } \ No newline at end of file diff --git a/src/foreign_macros/atomic_impl.rs b/src/foreign_macros/atomic_impl.rs index 1e05ff8..155c6e4 100644 --- a/src/foreign_macros/atomic_impl.rs +++ b/src/foreign_macros/atomic_impl.rs @@ -39,66 +39,40 @@ use std::fmt::Debug; #[macro_export] macro_rules! atomic_impl { ($typ:ident) => { - atomic_impl!{$typ, |this: &Self| Ok(Clause::P( - $crate::representations::Primitive::ExternFn(Box::new(this.clone())) - ))} + atomic_impl!{$typ, |this: &Self| { + use $crate::foreign::ExternFn; + Ok(this.clone().to_xfn_cls()) + }} }; ($typ:ident, $next_phase:expr) => { impl $crate::foreign::Atomic for $typ { $crate::atomic_defaults!{} - fn run_once(&self) -> Result< - $crate::representations::interpreted::Clause, - $crate::representations::interpreted::InternalError - > { - match >::as_ref(self).run_once() { - Err($crate::representations::interpreted::InternalError::NonReducible) => { - ($next_phase)(self) - .map_err($crate::representations::interpreted::RuntimeError::Extern) - .map_err($crate::representations::interpreted::InternalError::Runtime) - } - Ok(arg) => Ok($crate::representations::interpreted::Clause::P( - $crate::representations::Primitive::Atom( - $crate::foreign::Atom::new( - >::from((self, arg)) - ) + + fn run(&self, ctx: $crate::interpreter::Context) + -> $crate::foreign::AtomicResult + { + // extract the expression + let expr = + >::as_ref(self).clone(); + // run the expression + let ret = $crate::interpreter::run(expr, ctx)?; + let $crate::interpreter::Return{ gas, state } = ret; + // rebuild the atomic + let next_self = + >::from((self, state)); + // branch off or wrap up + let next_clause = if gas.map(|g| g > 0).unwrap_or(true) { + match ($next_phase)(&next_self) { + Ok(r) => r, + Err(e) => return Err( + $crate::interpreter::RuntimeError::Extern(e) ) - )), - Err(e) => Err(e), - } - } - fn run_n_times(&self, n: usize) -> Result< - ( - $crate::representations::interpreted::Clause, - usize - ), - $crate::representations::interpreted::RuntimeError - > { - match >::as_ref(self).run_n_times(n) { - Ok((arg, k)) if k == n => Ok((Clause::P( - $crate::representations::Primitive::Atom( - $crate::foreign::Atom::new( - >::from((self, arg)) - ) - ) - ), k)), - Ok((arg, k)) => { - let intermediate = >::from((self, arg)); - ($next_phase)(&intermediate) - .map(|cls| (cls, k)) - .map_err($crate::representations::interpreted::RuntimeError::Extern) } - Err(e) => Err(e), - } - } - fn run_to_completion(&self) -> Result { - match >::as_ref(self).run_to_completion() { - Ok(arg) => { - let intermediate = >::from((self, arg)); - ($next_phase)(&intermediate) - .map_err($crate::representations::interpreted::RuntimeError::Extern) - }, - Err(e) => Err(e) - } + } else { next_self.to_atom_cls() }; + // package and return + Ok((next_clause, gas)) } } }; diff --git a/src/foreign_macros/atomic_inert.rs b/src/foreign_macros/atomic_inert.rs index e06f3a6..4a6b3e5 100644 --- a/src/foreign_macros/atomic_inert.rs +++ b/src/foreign_macros/atomic_inert.rs @@ -14,34 +14,11 @@ macro_rules! atomic_inert { ($typ:ident) => { impl $crate::foreign::Atomic for $typ { $crate::atomic_defaults!{} - fn run_once(&self) -> Result< - $crate::representations::interpreted::Clause, - $crate::representations::interpreted::InternalError - > { - Err($crate::representations::interpreted::InternalError::NonReducible) - } - fn run_n_times(&self, _: usize) -> Result< - ( - $crate::representations::interpreted::Clause, - usize - ), - $crate::representations::interpreted::RuntimeError - > { - Ok(($crate::representations::interpreted::Clause::P( - $crate::representations::Primitive::Atom( - $crate::foreign::Atom::new(self.clone()) - ) - ), 0)) - } - fn run_to_completion(&self) -> Result< - $crate::representations::interpreted::Clause, - $crate::representations::interpreted::RuntimeError - > { - Ok($crate::representations::interpreted::Clause::P( - $crate::representations::Primitive::Atom( - $crate::foreign::Atom::new(self.clone()) - ) - )) + + fn run(&self, ctx: $crate::interpreter::Context) + -> $crate::foreign::AtomicResult + { + Ok((self.clone().to_atom_cls(), ctx.gas)) } } }; diff --git a/src/foreign_macros/atomic_redirect.rs b/src/foreign_macros/atomic_redirect.rs index 3b4fa76..c6e4f7f 100644 --- a/src/foreign_macros/atomic_redirect.rs +++ b/src/foreign_macros/atomic_redirect.rs @@ -6,21 +6,23 @@ use super::atomic_impl; #[macro_export] macro_rules! atomic_redirect { ($typ:ident) => { - impl AsRef for $typ { + impl AsRef<$crate::foreign::RcExpr> for $typ { fn as_ref(&self) -> &Clause { &self.0 } } - impl From<(&Self, Clause)> for $typ { + impl From<(&Self, $crate::foreign::RcExpr)> for $typ { fn from((old, clause): (&Self, Clause)) -> Self { Self{ 0: clause, ..old.clone() } } } }; ($typ:ident, $field:ident) => { - impl AsRef for $typ { - fn as_ref(&self) -> &Clause { &self.$field } + impl AsRef<$crate::foreign::RcExpr> + for $typ { + fn as_ref(&self) -> &$crate::foreign::RcExpr { &self.$field } } - impl From<(&Self, Clause)> for $typ { - fn from((old, $field): (&Self, Clause)) -> Self { + impl From<(&Self, $crate::foreign::RcExpr)> + for $typ { + fn from((old, $field): (&Self, $crate::foreign::RcExpr)) -> Self { Self{ $field, ..old.clone() } } } diff --git a/src/foreign_macros/externfn_impl.rs b/src/foreign_macros/externfn_impl.rs index 7a2388e..ee3159c 100644 --- a/src/foreign_macros/externfn_impl.rs +++ b/src/foreign_macros/externfn_impl.rs @@ -22,19 +22,18 @@ macro_rules! externfn_impl { impl $crate::foreign::ExternFn for $typ { fn name(&self) -> &str {stringify!($typ)} fn apply(&self, - c: $crate::representations::interpreted::Clause - ) -> Result< - $crate::representations::interpreted::Clause, - std::rc::Rc - > { - match ($next_atomic)(self, c) { // ? casts the result but we want to strictly forward it - Ok(r) => Ok( + arg: $crate::foreign::RcExpr, + ctx: $crate::interpreter::Context + ) -> $crate::foreign::XfnResult { + match ($next_atomic)(self, arg) { // ? casts the result but we want to strictly forward it + Ok(r) => Ok(( $crate::representations::interpreted::Clause::P( $crate::representations::Primitive::Atom( $crate::foreign::Atom::new(r) ) - ) - ), + ), + ctx.gas.map(|g| g - 1) + )), Err(e) => Err(e) } } diff --git a/src/interner/display.rs b/src/interner/display.rs new file mode 100644 index 0000000..a9bd707 --- /dev/null +++ b/src/interner/display.rs @@ -0,0 +1,53 @@ +use core::fmt::Formatter; +use std::fmt::Display; + +use crate::interner::Interner; + +/// A variant of [std::fmt::Display] for objects that contain interned +/// strings and therefore can only be stringified in the presence of a +/// string interner +/// +/// The functions defined here are suffixed to distinguish them from +/// the ones in Display and ToString respectively, because Rust can't +/// identify functions based on arity +pub trait InternedDisplay { + /// formats the value using the given formatter and string interner + fn fmt_i(&self, + f: &mut std::fmt::Formatter<'_>, + i: &Interner, + ) -> std::fmt::Result; + + /// Converts the value to a string to be displayed + fn to_string_i(&self, i: &Interner) -> String { + // Copied from + let mut buf = String::new(); + let mut formatter = Formatter::new(&mut buf); + // Bypass format_args!() to avoid write_str with zero-length strs + Self::fmt_i(self, &mut formatter, i) + .expect("a Display implementation returned an error unexpectedly"); + buf + } + + fn bundle<'a>(&'a self, interner: &'a Interner) + -> DisplayBundle<'a, Self> + { + DisplayBundle { interner, data: self } + } +} + +impl InternedDisplay for T where T: Display { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, _i: &Interner) -> std::fmt::Result { + ::fmt(&self, f) + } +} + +pub struct DisplayBundle<'a, T: InternedDisplay + ?Sized> { + interner: &'a Interner, + data: &'a T +} + +impl<'a, T: InternedDisplay> Display for DisplayBundle<'a, T> { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + self.data.fmt_i(f, self.interner) + } +} \ No newline at end of file diff --git a/src/interner/mod.rs b/src/interner/mod.rs new file mode 100644 index 0000000..53bdacf --- /dev/null +++ b/src/interner/mod.rs @@ -0,0 +1,9 @@ +mod monotype; +mod multitype; +mod token; +mod display; + +pub use monotype::TypedInterner; +pub use multitype::Interner; +pub use token::Token; +pub use display::{DisplayBundle, InternedDisplay}; diff --git a/src/interner/monotype.rs b/src/interner/monotype.rs new file mode 100644 index 0000000..7b9c6fe --- /dev/null +++ b/src/interner/monotype.rs @@ -0,0 +1,120 @@ +use std::num::NonZeroU32; +use std::cell::RefCell; +use std::borrow::Borrow; +use std::hash::{Hash, BuildHasher}; + +use hashbrown::HashMap; + +use super::token::Token; + +pub struct TypedInterner{ + tokens: RefCell>>, + values: RefCell> +} +impl TypedInterner { + /// Create a fresh interner instance + pub fn new() -> Self { + Self { + tokens: RefCell::new(HashMap::new()), + values: RefCell::new(Vec::new()) + } + } + + /// Intern an object, returning a token + pub fn i>(&self, q: &Q) + -> Token where T: Borrow + { + let mut tokens = self.tokens.borrow_mut(); + let hash = compute_hash(tokens.hasher(), q); + let raw_entry = tokens.raw_entry_mut().from_hash(hash, |k| { + >::borrow(k) == q + }); + let kv = raw_entry.or_insert_with(|| { + let mut values = self.values.borrow_mut(); + let uniq_key: NonZeroU32 = (values.len() as u32 + 1u32) + .try_into().expect("can never be zero"); + let keybox = Box::new(q.to_owned()); + let keyref = Box::leak(keybox); + values.push((keyref, true)); + let token = Token::::from_id(uniq_key); + (keyref, token) + }); + *kv.1 + } + + /// Resolve a token, obtaining an object + /// It is illegal to use a token obtained from one interner with another. + pub fn r(&self, t: Token) -> &T { + let values = self.values.borrow(); + let key = t.into_usize() - 1; + values[key].0 + } + + /// Intern a static reference without allocating the data on the heap + #[allow(unused)] + pub fn intern_static(&self, tref: &'static T) -> Token { + let mut tokens = self.tokens.borrow_mut(); + let token = *tokens.raw_entry_mut().from_key(tref) + .or_insert_with(|| { + let mut values = self.values.borrow_mut(); + let uniq_key: NonZeroU32 = (values.len() as u32 + 1u32) + .try_into().expect("can never be zero"); + values.push((tref, false)); + let token = Token::::from_id(uniq_key); + (tref, token) + }).1; + token + } +} + +// impl TypedInterner> { +// pub fn iv(&self, qs: &[Q]) -> Token> +// where +// Q: Eq + Hash + ToOwned, +// T: Borrow +// { +// let mut tokens = self.tokens.borrow_mut(); +// let hash = compute_hash(tokens.hasher(), qs); +// let raw_entry = tokens.raw_entry_mut().from_hash(hash, |k| { +// k.iter().zip(qs.iter()).all(|(t, q)| t.borrow() == q) +// }); +// let kv = raw_entry.or_insert_with(|| { +// let mut values = self.values.borrow_mut(); +// let uniq_key: NonZeroU32 = (values.len() as u32 + 1u32) +// .try_into().expect("can never be zero"); +// let tv = qs.iter().map(Q::to_owned).collect::>(); +// let keybox = Box::new(tv); +// let keyref = Box::leak(keybox); +// values.push((keyref, true)); +// let token = Token::>::from_id(uniq_key); +// (keyref, token) +// }); +// *kv.1 +// } +// } + +impl Drop for TypedInterner { + fn drop(&mut self) { + // make sure all values leaked by us are dropped + // FIXME: with the new hashmap logic we can actually store Rc-s + // which negates the need for unsafe here + let mut values = self.values.borrow_mut(); + for (item, owned) in values.drain(..) { + if !owned {continue} + unsafe { + Box::from_raw((item as *const T).cast_mut()) + }; + } + } +} + +/// Helper function to compute hashes outside a hashmap +fn compute_hash( + hash_builder: &impl BuildHasher, + key: &(impl Hash + ?Sized) +) -> u64 { + use core::hash::Hasher; + let mut state = hash_builder.build_hasher(); + key.hash(&mut state); + state.finish() +} \ No newline at end of file diff --git a/src/interner/multitype.rs b/src/interner/multitype.rs new file mode 100644 index 0000000..067cf24 --- /dev/null +++ b/src/interner/multitype.rs @@ -0,0 +1,102 @@ +use std::borrow::Borrow; +use std::cell::{RefCell, RefMut}; +use std::any::{TypeId, Any}; +use std::hash::Hash; +use std::rc::Rc; + +use hashbrown::HashMap; + +use super::monotype::TypedInterner; +use super::token::Token; + +pub struct Interner { + interners: RefCell>>, +} +impl Interner { + pub fn new() -> Self { + Self { interners: RefCell::new(HashMap::new()) } + } + + pub fn i(&self, q: &Q) -> Token + where Q: Eq + Hash + ToOwned, + Q::Owned: 'static + Eq + Hash + Clone, + Q::Owned: Borrow + { + let mut interners = self.interners.borrow_mut(); + let interner = get_interner(&mut interners); + interner.i(q) + } + + pub fn r(&self, t: Token) -> &T { + let mut interners = self.interners.borrow_mut(); + let interner = get_interner(&mut interners); + // TODO: figure this out + unsafe{ (interner.r(t) as *const T).as_ref().unwrap() } + } + + /// Fully resolve + /// TODO: make this generic over containers + pub fn extern_vec(&self, + t: Token>> + ) -> Vec { + let mut interners = self.interners.borrow_mut(); + let v_int = get_interner(&mut interners); + let t_int = get_interner(&mut interners); + let v = v_int.r(t); + v.iter() + .map(|t| t_int.r(*t)) + .cloned() + .collect() + } + + pub fn extern_all(&self, + s: &[Token] + ) -> Vec { + s.iter() + .map(|t| self.r(*t)) + .cloned() + .collect() + } +} + +/// Get or create an interner for a given type. +fn get_interner( + interners: &mut RefMut>> +) -> Rc> { + let boxed = interners.raw_entry_mut().from_key(&TypeId::of::()) + .or_insert_with(|| ( + TypeId::of::(), + Rc::new(TypedInterner::::new()) + )).1.clone(); + boxed.downcast().expect("the typeid is supposed to protect from this") +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + pub fn test_string() { + let interner = Interner::new(); + let key1 = interner.i("foo"); + let key2 = interner.i(&"foo".to_string()); + assert_eq!(key1, key2) + } + + #[test] + pub fn test_slice() { + let interner = Interner::new(); + let key1 = interner.i(&vec![1, 2, 3]); + let key2 = interner.i(&[1, 2, 3][..]); + assert_eq!(key1, key2); + } + + // #[test] + #[allow(unused)] + pub fn test_str_slice() { + let interner = Interner::new(); + let key1 = interner.i(&vec!["a".to_string(), "b".to_string(), "c".to_string()]); + let key2 = interner.i(&["a", "b", "c"][..]); + // assert_eq!(key1, key2); + } +} \ No newline at end of file diff --git a/src/interner/token.rs b/src/interner/token.rs new file mode 100644 index 0000000..4283720 --- /dev/null +++ b/src/interner/token.rs @@ -0,0 +1,57 @@ +use std::{num::NonZeroU32, marker::PhantomData}; +use std::fmt::Debug; +use std::hash::Hash; + +use std::cmp::PartialEq; + +pub struct Token{ + id: NonZeroU32, + phantom_data: PhantomData +} +impl Token { + pub fn from_id(id: NonZeroU32) -> Self { + Self { id, phantom_data: PhantomData } + } + pub fn into_id(self) -> NonZeroU32 { + self.id + } + pub fn into_usize(self) -> usize { + let zero: u32 = self.id.into(); + zero as usize + } +} + +impl Debug for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Token({})", self.id) + } +} + +impl Copy for Token {} +impl Clone for Token { + fn clone(&self) -> Self { + Self{ id: self.id, phantom_data: PhantomData } + } +} + +impl Eq for Token {} +impl PartialEq for Token { + fn eq(&self, other: &Self) -> bool { self.id == other.id } +} + +impl Ord for Token { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.id.cmp(&other.id) + } +} +impl PartialOrd for Token { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(&other)) + } +} + +impl Hash for Token { + fn hash(&self, state: &mut H) { + state.write_u32(self.id.into()) + } +} \ No newline at end of file diff --git a/src/interpreter/apply.rs b/src/interpreter/apply.rs new file mode 100644 index 0000000..4147893 --- /dev/null +++ b/src/interpreter/apply.rs @@ -0,0 +1,104 @@ +use crate::foreign::Atom; +use crate::representations::Primitive; +use crate::representations::PathSet; +use crate::representations::interpreted::{ExprInst, Clause}; +use crate::utils::Side; + +use super::Return; +use super::error::RuntimeError; +use super::context::Context; + +/// Process the clause at the end of the provided path. +/// Note that paths always point to at least one target. +/// Note also that this is not cached as a normalization step in the +/// intermediate expressions. +fn map_at( + path: &[Side], source: ExprInst, + mapper: &mut impl FnMut(&Clause) -> Result +) -> Result { + source.try_update(|value| { + // Pass right through lambdas + if let Clause::Lambda { args, body } = value { + return Ok(Clause::Lambda { + args: args.clone(), + body: map_at(path, body.clone(), mapper)? + }) + } + // If the path ends here, process the next (non-lambda) node + let (head, tail) = if let Some(sf) = path.split_first() {sf} else { + return Ok(mapper(value)?) + }; + // If it's an Apply, execute the next step in the path + if let Clause::Apply { f, x } = value { + return Ok(match head { + Side::Left => Clause::Apply { + f: map_at(tail, f.clone(), mapper)?, + x: x.clone(), + }, + Side::Right => Clause::Apply { + f: f.clone(), + x: map_at(tail, x.clone(), mapper)?, + } + }) + } + panic!("Invalid path") + }) +} + +fn substitute(paths: &PathSet, value: Clause, body: ExprInst) -> ExprInst { + let PathSet{ steps, next } = paths; + map_at(&steps, body, &mut |checkpoint| -> Result { + match (checkpoint, next) { + (Clause::Lambda{..}, _) => unreachable!("Handled by map_at"), + (Clause::Apply { f, x }, Some((left, right))) => Ok(Clause::Apply { + f: substitute(&left, value.clone(), f.clone()), + x: substitute(&right, value.clone(), x.clone()), + }), + (Clause::LambdaArg, None) => Ok(value.clone()), + (_, None) => panic!("Substitution path ends in something other than LambdaArg"), + (_, Some(_)) => panic!("Substitution path leads into something other than Apply"), + } + }).into_ok() +} + +/// Apply a function-like expression to a parameter. +/// If any work is being done, gas will be deducted. +pub fn apply( + f: ExprInst, x: ExprInst, mut ctx: Context +) -> Result { + let state = f.clone().try_update(|clause| match clause { + // apply an ExternFn or an internal function + Clause::P(Primitive::ExternFn(f)) => { + let (clause, gas) = f.apply(x, ctx.clone()) + .map_err(|e| RuntimeError::Extern(e))?; + ctx.gas = gas.map(|g| g - 1); // cost of extern call + Ok(clause) + } + Clause::Lambda{args, body} => Ok(if let Some(args) = args { + let x_cls = x.expr().clause.clone(); + let new_xpr_inst = substitute(args, x_cls, body.clone()); + let new_xpr = new_xpr_inst.expr(); + // cost of substitution + // XXX: should this be the number of occurrences instead? + ctx.gas = ctx.gas.map(|x| x - 1); + new_xpr.clause.clone() + } else {body.expr().clause.clone()}), + Clause::Constant(name) => { + let symval = ctx.symbols.get(name).expect("missing symbol for function").clone(); + ctx.gas = ctx.gas.map(|x| x - 1); // cost of lookup + Ok(Clause::Apply { f: symval, x, }) + } + Clause::P(Primitive::Atom(Atom(atom))) => { // take a step in expanding atom + let (clause, gas) = atom.run(ctx.clone())?; + ctx.gas = gas.map(|x| x - 1); // cost of dispatch + Ok(Clause::Apply { f: clause.wrap(), x }) + }, + Clause::Apply{ f: fun, x: arg } => { // take a step in resolving pre-function + let res = apply(fun.clone(), arg.clone(), ctx.clone())?; + ctx.gas = res.gas; // if work has been done, it has been paid + Ok(Clause::Apply{ f: res.state, x }) + }, + _ => Err(RuntimeError::NonFunctionApplication(f.clone())) + })?; + Ok(Return { state, gas: ctx.gas }) +} \ No newline at end of file diff --git a/src/interpreter/context.rs b/src/interpreter/context.rs new file mode 100644 index 0000000..b4155ee --- /dev/null +++ b/src/interpreter/context.rs @@ -0,0 +1,27 @@ +use hashbrown::HashMap; + +use crate::representations::interpreted::ExprInst; +use crate::interner::Token; + +#[derive(Clone)] +pub struct Context<'a> { + pub symbols: &'a HashMap>>, ExprInst>, + pub gas: Option, +} + +impl Context<'_> { + pub fn is_stuck(&self, res: Option) -> bool { + match (res, self.gas) { + (Some(a), Some(b)) => a == b, + (None, None) => false, + (None, Some(_)) => panic!("gas not tracked despite limit"), + (Some(_), None) => panic!("gas tracked without request"), + } + } +} + +#[derive(Clone)] +pub struct Return { + pub state: ExprInst, + pub gas: Option, +} diff --git a/src/interpreter/error.rs b/src/interpreter/error.rs new file mode 100644 index 0000000..c7a67f6 --- /dev/null +++ b/src/interpreter/error.rs @@ -0,0 +1,27 @@ +use std::fmt::Display; +use std::rc::Rc; + +use crate::representations::interpreted::ExprInst; +use crate::foreign::ExternError; + +/// Problems in the process of execution +#[derive(Clone)] +pub enum RuntimeError { + Extern(Rc), + NonFunctionApplication(ExprInst), +} + +impl From> for RuntimeError { + fn from(value: Rc) -> Self { + Self::Extern(value) + } +} + +impl Display for RuntimeError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Extern(e) => write!(f, "Error in external function: {e}"), + Self::NonFunctionApplication(loc) => write!(f, "Primitive applied as function at {loc:?}") + } + } +} \ No newline at end of file diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs new file mode 100644 index 0000000..5aa04b1 --- /dev/null +++ b/src/interpreter/mod.rs @@ -0,0 +1,8 @@ +mod apply; +mod error; +mod context; +mod run; + +pub use context::{Context, Return}; +pub use error::RuntimeError; +pub use run::{run}; \ No newline at end of file diff --git a/src/interpreter/run.rs b/src/interpreter/run.rs new file mode 100644 index 0000000..6650299 --- /dev/null +++ b/src/interpreter/run.rs @@ -0,0 +1,39 @@ +use crate::foreign::Atom; +use crate::representations::Primitive; +use crate::representations::interpreted::{Clause, ExprInst}; + +use super::apply::apply; +use super::error::RuntimeError; +use super::context::{Context, Return}; + +pub fn run(expr: ExprInst, mut ctx: Context) +-> Result +{ + let state = expr.try_normalize(|cls| -> Result { + let mut i = cls.clone(); + while ctx.gas.map(|g| g > 0).unwrap_or(true) { + match &i { + Clause::Apply { f, x } => { + let res = apply(f.clone(), x.clone(), ctx.clone())?; + if ctx.is_stuck(res.gas) {return Ok(i)} + ctx.gas = res.gas; + i = res.state.expr().clause.clone(); + } + Clause::P(Primitive::Atom(Atom(data))) => { + let (clause, gas) = data.run(ctx.clone())?; + if ctx.is_stuck(gas) {return Ok(i)} + ctx.gas = gas; + i = clause.clone(); + } + Clause::Constant(c) => { + let symval = ctx.symbols.get(c).expect("missing symbol for value"); + ctx.gas = ctx.gas.map(|g| g - 1); // cost of lookup + i = symval.expr().clause.clone(); + } + _ => return Ok(i) + } + } + Ok(i) + })?; + Ok(Return { state, gas: ctx.gas }) +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 6799495..3712840 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,3 @@ -#![feature(specialization)] -#![feature(adt_const_params)] -#![feature(generic_const_exprs)] #![feature(generators, generator_trait)] #![feature(never_type)] #![feature(unwrap_infallible)] @@ -8,124 +5,51 @@ #![feature(hasher_prefixfree_extras)] #![feature(closure_lifetime_binder)] #![feature(generic_arg_infer)] -use std::{env::current_dir, collections::HashMap}; +#![feature(array_chunks)] +#![feature(fmt_internals)] +#![feature(map_try_insert)] +#![feature(slice_group_by)] +#![feature(trait_alias)] -// mod executor; mod parse; -pub(crate) mod project; +mod interner; +mod interpreter; mod utils; mod representations; mod rule; -mod scheduler; pub(crate) mod foreign; mod external; mod foreign_macros; -use lasso::Rodeo; +mod pipeline; +mod run_dir; +mod cli; +use std::{path::PathBuf, fs::File}; + +use clap::Parser; +use cli::prompt; pub use representations::ast; -use ast::{Expr, Clause}; -// use representations::typed as t; -use mappable_rc::Mrc; -use project::{rule_collector, file_loader}; -use rule::Repository; -use utils::to_mrc_slice; +use run_dir::run_dir; -use crate::external::std::std; -use crate::project::{map_loader, string_loader, Loader, ModuleError}; -use crate::representations::{ast_to_postmacro, postmacro_to_interpreted}; - -fn literal(orig: &[&str]) -> Mrc<[String]> { - to_mrc_slice(vliteral(orig)) -} - -fn vliteral(orig: &[&str]) -> Vec { - orig.iter().map(|&s| s.to_owned()).collect() -} - -static PRELUDE:&str = r#" -import std::( - num::(add, subtract, multiply, remainder, divide), - bool::(equals, ifthenelse), - str::concatenate -) - -export (...$a + ...$b) =1001=> (add (...$a) (...$b)) -export (...$a - ...$b:1) =1001=> (subtract (...$a) (...$b)) -export (...$a * ...$b) =1000=> (multiply (...$a) (...$b)) -export (...$a % ...$b:1) =1000=> (remainder (...$a) (...$b)) -export (...$a / ...$b:1) =1000=> (divide (...$a) (...$b)) -export (...$a == ...$b) =1002=> (equals (...$a) (...$b)) -export (...$a ++ ...$b) =1003=> (concatenate (...$a) (...$b)) - -export do { ...$statement ; ...$rest:1 } =10_001=> ( - statement (...$statement) do { ...$rest } -) -export do { ...$return } =10_000=> (...$return) - -export statement (let $_name = ...$value) ...$next =10_000=> ( - (\$_name. ...$next) (...$value) -) -export statement (cps $_name = ...$operation) ...$next =10_001=> ( - (...$operation) \$_name. ...$next -) -export statement (cps ...$operation) ...$next =10_000=> ( - (...$operation) (...$next) -) - -export if ...$cond then ...$true else ...$false:1 =5_000=> ( - ifthenelse (...$cond) (...$true) (...$false) -) -"#; - -fn initial_tree() -> Mrc<[Expr]> { - to_mrc_slice(vec![Expr(Clause::Name { - local: None, - qualified: literal(&["mod", "main", "main"]) - }, to_mrc_slice(vec![]))]) -} - -#[allow(unused)] -fn load_project() { - let mut rodeo = Rodeo::default(); - let collect_rules = rule_collector( - rodeo, - map_loader(HashMap::from([ - ("std", std().boxed()), - ("prelude", string_loader(PRELUDE).boxed()), - ("mod", file_loader(current_dir().expect("Missing CWD!")).boxed()) - ])) - ); - let rules = match collect_rules.try_find(&literal(&["mod", "main"])) { - Ok(rules) => rules, - Err(err) => if let ModuleError::Syntax(pe) = err { - panic!("{}", pe); - } else {panic!("{:#?}", err)} - }; - let mut tree = initial_tree(); - println!("Start processing {tree:?}"); - let repo = Repository::new(rules.as_ref().to_owned()); - println!("Ruleset: {repo:?}"); - xloop!(let mut i = 0; i < 100; i += 1; { - match repo.step(Mrc::clone(&tree)) { - Ok(Some(phase)) => { - //println!("Step {i}: {phase:?}"); - tree = phase; - }, - Ok(None) => { - println!("Execution complete"); - break - }, - Err(e) => panic!("Rule error: {e:?}") - } - }; panic!("Macro execution didn't halt")); - let pmtree = ast_to_postmacro::exprv(tree.as_ref()) - .unwrap_or_else(|e| panic!("Postmacro conversion error: {e}")); - let runtree = postmacro_to_interpreted::expr_rec(&pmtree) - .unwrap_or_else(|e| panic!("Interpreted conversion error: {e}")); - let stable = runtree.run_to_completion() - .unwrap_or_else(|e| panic!("Runtime error {e}")); - println!("Settled at {stable:?}") +/// Orchid interpreter +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + /// Folder containing main.orc + #[arg(short, long)] + pub project: Option } fn main() { - load_project(); + let args = Args::parse(); + let path = args.project.unwrap_or_else(|| { + prompt("Enter a project root", ".".to_string(), |p| { + let mut path: PathBuf = p.trim().into(); + path.push("main.orc"); + match File::open(&path) { + Ok(_) => Ok(p), + Err(e) => Err(format!("{}: {e}", path.display())) + } + }) + }); + run_dir(&PathBuf::try_from(path).unwrap()); } diff --git a/src/parse/context.rs b/src/parse/context.rs new file mode 100644 index 0000000..892480f --- /dev/null +++ b/src/parse/context.rs @@ -0,0 +1,48 @@ +use std::rc::Rc; + +use crate::interner::Interner; + +/// Trait enclosing all context features +/// +/// Hiding type parameters in associated types allows for simpler +/// parser definitions +pub trait Context: Clone { + type Op: AsRef; + + fn ops<'a>(&'a self) -> &'a [Self::Op]; + fn file(&self) -> Rc>; + fn interner<'a>(&'a self) -> &'a Interner; +} + +/// Struct implementing context +/// +/// Hiding type parameters in associated types allows for simpler +/// parser definitions +pub struct ParsingContext<'a, Op> { + pub ops: &'a [Op], + pub interner: &'a Interner, + pub file: Rc> +} + +impl<'a, Op> ParsingContext<'a, Op> { + pub fn new(ops: &'a [Op], interner: &'a Interner, file: Rc>) + -> Self { Self { ops, interner, file } } +} + +impl<'a, Op> Clone for ParsingContext<'a, Op> { + fn clone(&self) -> Self { + Self { + ops: self.ops, + interner: self.interner, + file: self.file.clone() + } + } +} + +impl> Context for ParsingContext<'_, Op> { + type Op = Op; + + fn interner<'a>(&'a self) -> &'a Interner { self.interner } + fn file(&self) -> Rc> {self.file.clone()} + fn ops<'a>(&'a self) -> &'a [Self::Op] { self.ops } +} \ No newline at end of file diff --git a/src/parse/enum_filter.rs b/src/parse/enum_filter.rs new file mode 100644 index 0000000..2de48e9 --- /dev/null +++ b/src/parse/enum_filter.rs @@ -0,0 +1,46 @@ +/// Produces filter_mapping functions for enum types: +/// ```rs +/// enum_parser!(Foo::Bar | "Some error!") // Accepts Foo::Bar(T) into T +/// enum_parser!(Foo::Bar) // same as above but with the default error "Expected Foo::Bar" +/// enum_parser!(Foo >> Quz; Bar, Baz) // Parses Foo::Bar(T) into Quz::Bar(T) and Foo::Baz(U) into Quz::Baz(U) +/// ``` +#[macro_export] +macro_rules! enum_filter { + ($p:path | $m:tt) => { + { + |l| { + if let $p(x) = l { Ok(x) } + else { Err($m) } + } + } + }; + ($p:path >> $q:path; $i:ident | $m:tt) => { + { + use $p as srcpath; + use $q as tgtpath; + let base = enum_filter!(srcpath::$i | $m); + move |l| base(l).map(tgtpath::$i) + } + }; + ($p:path >> $q:path; $i:ident) => { + enum_filter!($p >> $q; $i | {concat!("Expected ", stringify!($i))}) + }; + ($p:path >> $q:path; $($i:ident),+ | $m:tt) => { + { + use $p as srcpath; + use $q as tgtpath; + |l| match l { + $( srcpath::$i(x) => Ok(tgtpath::$i(x)), )+ + _ => Err($m) + } + } + }; + ($p:path >> $q:path; $($i:ident),+) => { + enum_filter!($p >> $q; $($i),+ | { + concat!("Expected one of ", $(stringify!($i), " "),+) + }) + }; + ($p:path) => { + enum_filter!($p | {concat!("Expected ", stringify!($p))}) + }; +} \ No newline at end of file diff --git a/src/parse/enum_parser.rs b/src/parse/enum_parser.rs deleted file mode 100644 index 453406a..0000000 --- a/src/parse/enum_parser.rs +++ /dev/null @@ -1,32 +0,0 @@ -/// Produces parsers for tokenized sequences of enum types: -/// ```rs -/// enum_parser!(Foo::Bar | "Some error!") // Parses Foo::Bar(T) into T -/// enum_parser!(Foo::Bar) // same as above but with the default error "Expected Foo::Bar" -/// enum_parser!(Foo >> Quz; Bar, Baz) // Parses Foo::Bar(T) into Quz::Bar(T) and Foo::Baz(U) into Quz::Baz(U) -/// ``` -#[macro_export] -macro_rules! enum_parser { - ($p:path | $m:tt) => { - { - ::chumsky::prelude::filter_map(|s, l| { - if let $p(x) = l { Ok(x) } - else { Err(::chumsky::prelude::Simple::custom(s, $m))} - }) - } - }; - ($p:path >> $q:path; $i:ident) => { - { - use $p as srcpath; - use $q as tgtpath; - enum_parser!(srcpath::$i | (concat!("Expected ", stringify!($i)))).map(tgtpath::$i) - } - }; - ($p:path >> $q:path; $($i:ident),+) => { - { - ::chumsky::prelude::choice(( - $( enum_parser!($p >> $q; $i) ),+ - )) - } - }; - ($p:path) => { enum_parser!($p | (concat!("Expected ", stringify!($p)))) }; -} \ No newline at end of file diff --git a/src/parse/expression.rs b/src/parse/expression.rs index 4e9c94b..8009536 100644 --- a/src/parse/expression.rs +++ b/src/parse/expression.rs @@ -1,155 +1,107 @@ +use std::ops::Range; use std::rc::Rc; use chumsky::{self, prelude::*, Parser}; -use lasso::Spur; -use crate::enum_parser; -use crate::representations::Primitive; -use crate::representations::{Literal, ast::{Clause, Expr}}; -use super::lexer::Lexeme; +use crate::enum_filter; +use crate::representations::Primitive; +use crate::representations::ast::{Clause, Expr}; +use crate::representations::location::Location; +use crate::interner::Token; + +use super::context::Context; +use super::lexer::{Lexeme, Entry, filter_map_lex}; /// Parses any number of expr wrapped in (), [] or {} -fn sexpr_parser

( - expr: P -) -> impl Parser> + Clone -where P: Parser> + Clone { - Lexeme::paren_parser(expr.repeated()) - .map(|(del, b)| Clause::S(del, Rc::new(b))) +fn sexpr_parser( + expr: impl Parser> + Clone +) -> impl Parser), Error = Simple> + Clone { + let body = expr.repeated(); + choice(( + Lexeme::LP('(').parser().then(body.clone()) + .then(Lexeme::RP('(').parser()), + Lexeme::LP('[').parser().then(body.clone()) + .then(Lexeme::RP('[').parser()), + Lexeme::LP('{').parser().then(body.clone()) + .then(Lexeme::RP('{').parser()), + )).map(|((lp, body), rp)| { + let Entry{lexeme, range: Range{start, ..}} = lp; + let end = rp.range.end; + let char = if let Lexeme::LP(c) = lexeme {c} + else {unreachable!("The parser only matches Lexeme::LP")}; + (Clause::S(char, Rc::new(body)), start..end) + }).labelled("S-expression") } /// Parses `\name.body` or `\name:type.body` where name is any valid name /// and type and body are both expressions. Comments are allowed /// and ignored everywhere in between the tokens -fn lambda_parser<'a, P, F>( - expr: P, intern: &'a F -) -> impl Parser> + Clone + 'a -where - P: Parser> + Clone + 'a, - F: Fn(&str) -> Spur + 'a { - just(Lexeme::BS) - .then_ignore(enum_parser!(Lexeme::Comment).repeated()) - .ignore_then(namelike_parser(intern)) - .then_ignore(enum_parser!(Lexeme::Comment).repeated()) - .then( - just(Lexeme::Type) - .then_ignore(enum_parser!(Lexeme::Comment).repeated()) - .ignore_then(expr.clone().repeated()) - .then_ignore(enum_parser!(Lexeme::Comment).repeated()) - .or_not().map(Option::unwrap_or_default) - ) - .then_ignore(just(Lexeme::name("."))) - .then_ignore(enum_parser!(Lexeme::Comment).repeated()) +fn lambda_parser<'a>( + expr: impl Parser> + Clone + 'a, + ctx: impl Context + 'a +) -> impl Parser), Error = Simple> + Clone + 'a { + Lexeme::BS.parser() + .ignore_then(expr.clone()) + .then_ignore(Lexeme::Name(ctx.interner().i(".")).parser()) .then(expr.repeated().at_least(1)) - .map(|((name, typ), body): ((Clause, Vec), Vec)| { - Clause::Lambda(Rc::new(name), Rc::new(typ), Rc::new(body)) - }) -} - -/// see [lambda_parser] but `@` instead of `\` and the name is optional -fn auto_parser<'a, P, F>( - expr: P, intern: &'a F -) -> impl Parser> + Clone + 'a -where - P: Parser> + Clone + 'a, - F: Fn(&str) -> Spur + 'a { - just(Lexeme::At) - .then_ignore(enum_parser!(Lexeme::Comment).repeated()) - .ignore_then(namelike_parser(intern).or_not()) - .then_ignore(enum_parser!(Lexeme::Comment).repeated()) - .then( - just(Lexeme::Type) - .then_ignore(enum_parser!(Lexeme::Comment).repeated()) - .ignore_then(expr.clone().repeated()) - .then_ignore(enum_parser!(Lexeme::Comment).repeated()) - .or_not().map(Option::unwrap_or_default) - ) - .then_ignore(just(Lexeme::name("."))) - .then_ignore(enum_parser!(Lexeme::Comment).repeated()) - .then(expr.repeated().at_least(1)) - .try_map(|((name, typ), body): ((Option, Vec), Vec), s| { - if name.is_none() && typ.is_empty() { - Err(Simple::custom(s, "Auto without name or type has no effect")) - } else { - Ok(Clause::Auto(name.map(Rc::new), Rc::new(typ), Rc::new(body))) - } - }) + .map_with_span(move |(arg, body), span| { + (Clause::Lambda(Rc::new(arg), Rc::new(body)), span) + }).labelled("Lambda") } /// Parses a sequence of names separated by ::
-/// Comments are allowed and ignored in between -pub fn ns_name_parser<'a, F>(intern: &'a F) --> impl Parser, Error = Simple> + Clone + 'a -where F: Fn(&str) -> Spur + 'a { - enum_parser!(Lexeme::Name) - .map(|s| intern(&s)) - .separated_by( - enum_parser!(Lexeme::Comment).repeated() - .then(just(Lexeme::NS)) - .then(enum_parser!(Lexeme::Comment).repeated()) - ).at_least(1) +/// Comments and line breaks are allowed and ignored in between +pub fn ns_name_parser<'a>(ctx: impl Context + 'a) +-> impl Parser>>, Range), Error = Simple> + Clone + 'a +{ + filter_map_lex(enum_filter!(Lexeme::Name)) + .separated_by(Lexeme::NS.parser()).at_least(1) + .map(move |elements| { + let start = elements.first().expect("can never be empty").1.start; + let end = elements.last().expect("can never be empty").1.end; + let tokens = + /*ctx.prefix().iter().copied().chain*/( + elements.iter().map(|(t, _)| *t) + ).collect::>(); + (ctx.interner().i(&tokens), start..end) + }).labelled("Namespaced name") } -/// Parse any legal argument name starting with a `$` -fn placeholder_parser() -> impl Parser> + Clone { - enum_parser!(Lexeme::Name).try_map(|name, span| { - name.strip_prefix('$').map(&str::to_string) - .ok_or_else(|| Simple::custom(span, "Not a placeholder")) - }) -} - -pub fn namelike_parser<'a, F>(intern: &'a F) --> impl Parser> + Clone + 'a -where F: Fn(&str) -> Spur + 'a { +pub fn namelike_parser<'a>(ctx: impl Context + 'a) +-> impl Parser), Error = Simple> + Clone + 'a +{ choice(( - just(Lexeme::name("...")).to(true) - .or(just(Lexeme::name("..")).to(false)) - .then(placeholder_parser()) - .then( - just(Lexeme::Type) - .ignore_then(enum_parser!(Lexeme::Uint)) - .or_not().map(Option::unwrap_or_default) - ) - .map(|((nonzero, key), prio)| Clause::Placeh{key, vec: Some(( - prio.try_into().unwrap(), - nonzero - ))}), - ns_name_parser(intern) - .map(|qualified| Clause::Name(Rc::new(qualified))), + filter_map_lex(enum_filter!(Lexeme::PH)) + .map(|(ph, range)| (Clause::Placeh(ph), range)), + ns_name_parser(ctx) + .map(|(token, range)| (Clause::Name(token), range)), )) } -pub fn clause_parser<'a, P, F>( - expr: P, intern: &'a F -) -> impl Parser> + Clone + 'a -where - P: Parser> + Clone + 'a, - F: Fn(&str) -> Spur + 'a { - enum_parser!(Lexeme::Comment).repeated() - .ignore_then(choice(( - enum_parser!(Lexeme >> Literal; Uint, Num, Char, Str) - .map(Primitive::Literal).map(Clause::P), - placeholder_parser().map(|key| Clause::Placeh{key, vec: None}), - namelike_parser(intern), +pub fn clause_parser<'a>( + expr: impl Parser> + Clone + 'a, + ctx: impl Context + 'a +) -> impl Parser), Error = Simple> + Clone + 'a { + choice(( + filter_map_lex(enum_filter!(Lexeme >> Primitive; Literal)) + .map(|(p, s)| (Clause::P(p), s)).labelled("Literal"), sexpr_parser(expr.clone()), - lambda_parser(expr.clone(), intern), - auto_parser(expr.clone(), intern), - just(Lexeme::At).ignore_then(expr.clone()).map(|arg| { - Clause::Explicit(Rc::new(arg)) - }) - ))).then_ignore(enum_parser!(Lexeme::Comment).repeated()) + lambda_parser(expr.clone(), ctx.clone()), + namelike_parser(ctx), + )).labelled("Clause") } /// Parse an expression -pub fn xpr_parser<'a, F>(intern: &'a F) --> impl Parser> + 'a -where F: Fn(&str) -> Spur + 'a { - recursive(|expr| { - let clause = clause_parser(expr, intern); - clause.clone().then( - just(Lexeme::Type) - .ignore_then(clause.clone()) - .repeated() - ) - .map(|(val, typ)| Expr(val, Rc::new(typ))) +pub fn xpr_parser<'a>(ctx: impl Context + 'a) +-> impl Parser> + 'a +{ + recursive(move |expr| { + clause_parser(expr, ctx.clone()) + .map(move |(value, range)| { + Expr{ + value: value.clone(), + location: Location::Range { file: ctx.file(), range } + } + }) }).labelled("Expression") -} +} \ No newline at end of file diff --git a/src/parse/import.rs b/src/parse/import.rs index 5f49a29..38c2a88 100644 --- a/src/parse/import.rs +++ b/src/parse/import.rs @@ -1,16 +1,16 @@ -use std::rc::Rc; - use chumsky::{Parser, prelude::*}; use itertools::Itertools; -use lasso::Spur; use crate::representations::sourcefile::Import; use crate::utils::iter::{box_once, box_flatten, into_boxed_iter, BoxedIterIter}; -use crate::{enum_parser, box_chain}; +use crate::interner::Token; +use crate::{box_chain, enum_filter}; -use super::lexer::Lexeme; +use super::Entry; +use super::context::Context; +use super::lexer::{Lexeme, filter_map_lex}; /// initialize a BoxedIter> with a single element. -fn init_table(name: Spur) -> BoxedIterIter<'static, Spur> { +fn init_table(name: Token) -> BoxedIterIter<'static, Token> { // I'm not at all confident that this is a good approach. box_once(box_once(name)) } @@ -21,51 +21,54 @@ fn init_table(name: Spur) -> BoxedIterIter<'static, Spur> { /// preferably contain crossplatform filename-legal characters but the /// symbols are explicitly allowed to go wild. /// There's a blacklist in [name] -pub fn import_parser<'a, F>(intern: &'a F) --> impl Parser, Error = Simple> + 'a -where F: Fn(&str) -> Spur + 'a { - let globstar = intern("*"); +pub fn import_parser<'a>(ctx: impl Context + 'a) +-> impl Parser, Error = Simple> + 'a +{ // TODO: this algorithm isn't cache friendly and copies a lot - recursive(move |expr:Recursive, Simple>| { - enum_parser!(Lexeme::Name).map(|s| intern(s.as_str())) - .separated_by(just(Lexeme::NS)) - .then( - just(Lexeme::NS) - .ignore_then( - choice(( - expr.clone() - .separated_by(just(Lexeme::name(","))) - .delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('('))) - .map(|v| box_flatten(v.into_iter())) - .labelled("import group"), - // Each expr returns a list of imports, flatten into common list - just(Lexeme::name("*")).map(move |_| init_table(globstar)) - .labelled("wildcard import"), // Just a *, wrapped - enum_parser!(Lexeme::Name) - .map(|s| init_table(intern(s.as_str()))) - .labelled("import terminal") // Just a name, wrapped - )) - ).or_not() - ) - .map(|(name, opt_post): (Vec, Option>)| - -> BoxedIterIter { - if let Some(post) = opt_post { - Box::new(post.map(move |el| { - box_chain!(name.clone().into_iter(), el) - })) - } else { - box_once(into_boxed_iter(name)) - } - }) + recursive({ + let ctx = ctx.clone(); + move |expr:Recursive>, Simple>| { + filter_map_lex(enum_filter!(Lexeme::Name)).map(|(t, _)| t) + .separated_by(Lexeme::NS.parser()) + .then( + Lexeme::NS.parser() + .ignore_then( + choice(( + expr.clone() + .separated_by(Lexeme::Name(ctx.interner().i(",")).parser()) + .delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser()) + .map(|v| box_flatten(v.into_iter())) + .labelled("import group"), + // Each expr returns a list of imports, flatten into common list + Lexeme::Name(ctx.interner().i("*")).parser() + .map(move |_| init_table(ctx.interner().i("*"))) + .labelled("wildcard import"), // Just a *, wrapped + filter_map_lex(enum_filter!(Lexeme::Name)) + .map(|(t, _)| init_table(t)) + .labelled("import terminal") // Just a name, wrapped + )) + ).or_not() + ) + .map(|(name, opt_post): (Vec>, Option>>)| + -> BoxedIterIter> { + if let Some(post) = opt_post { + Box::new(post.map(move |el| { + box_chain!(name.clone().into_iter(), el) + })) + } else { + box_once(into_boxed_iter(name)) + } + }) + } }).map(move |paths| { paths.filter_map(|namespaces| { let mut path = namespaces.collect_vec(); let name = path.pop()?; Some(Import { - path: Rc::new(path), + path: ctx.interner().i(&path), name: { - if name == globstar { None } - else { Some(name.to_owned()) } + if name == ctx.interner().i("*") { None } + else { Some(name) } } }) }).collect() diff --git a/src/parse/lexer.rs b/src/parse/lexer.rs index 008b8f1..07f56e1 100644 --- a/src/parse/lexer.rs +++ b/src/parse/lexer.rs @@ -1,53 +1,88 @@ -use std::{ops::Range, iter, fmt}; -use ordered_float::NotNan; -use chumsky::{Parser, prelude::*}; -use std::fmt::Debug; -use crate::{utils::{BoxedIter, iter::{box_once, box_flatten}}, box_chain}; +use std::fmt; +use std::ops::Range; +use ordered_float::NotNan; +use chumsky::{Parser, prelude::*, text::keyword, Span}; + +use crate::ast::{Placeholder, PHClass}; +use crate::representations::Literal; +use crate::interner::{Token, InternedDisplay, Interner}; + +use super::context::Context; +use super::placeholder; use super::{number, string, name, comment}; -#[derive(Clone, PartialEq, Eq, Hash)] -pub struct Entry(pub Lexeme, pub Range); -impl Debug for Entry { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.0) - // f.debug_tuple("Entry").field(&self.0).field(&self.1).finish() +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct Entry{ + pub lexeme: Lexeme, + pub range: Range +} +impl Entry { + pub fn is_filler(&self) -> bool { + matches!(self.lexeme, Lexeme::Comment(_)) + || matches!(self.lexeme, Lexeme::BR) + } +} + +impl InternedDisplay for Entry { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result { + self.lexeme.fmt_i(f, i) } } impl From for (Lexeme, Range) { fn from(ent: Entry) -> Self { - (ent.0, ent.1) + (ent.lexeme, ent.range) } } -#[derive(Clone, PartialEq, Eq, Hash)] +impl Span for Entry { + type Context = Lexeme; + type Offset = usize; + + fn context(&self) -> Self::Context {self.lexeme.clone()} + fn start(&self) -> Self::Offset {self.range.start()} + fn end(&self) -> Self::Offset {self.range.end()} + fn new(context: Self::Context, range: Range) -> Self { + Self{ + lexeme: context, + range + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] pub enum Lexeme { - Num(NotNan), - Uint(u64), - Char(char), - Str(String), - Name(String), + Literal(Literal), + Name(Token), Rule(NotNan), - NS, // namespace separator + /// Walrus operator (formerly shorthand macro) + Const, + /// Line break + BR, + /// Namespace separator + NS, + /// Left paren LP(char), + /// Right paren RP(char), - BS, // Backslash + /// Backslash + BS, At, Type, // type operator Comment(String), Export, Import, + Namespace, + PH(Placeholder) } -impl Debug for Lexeme { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl InternedDisplay for Lexeme { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result { match self { - Self::Num(n) => write!(f, "{}", n), - Self::Uint(i) => write!(f, "{}", i), - Self::Char(c) => write!(f, "{:?}", c), - Self::Str(s) => write!(f, "{:?}", s), - Self::Name(name) => write!(f, "{}", name), + Self::Literal(l) => write!(f, "{:?}", l), + Self::Name(token) => write!(f, "{}", i.r(*token)), + Self::Const => write!(f, ":="), Self::Rule(prio) => write!(f, "={}=>", prio), Self::NS => write!(f, "::"), Self::LP(l) => write!(f, "{}", l), @@ -57,102 +92,114 @@ impl Debug for Lexeme { '{' => write!(f, "}}"), _ => f.debug_tuple("RP").field(l).finish() }, + Self::BR => write!(f, "\n"), Self::BS => write!(f, "\\"), Self::At => write!(f, "@"), Self::Type => write!(f, ":"), Self::Comment(text) => write!(f, "--[{}]--", text), Self::Export => write!(f, "export"), Self::Import => write!(f, "import"), + Self::Namespace => write!(f, "namespace"), + Self::PH(Placeholder { name, class }) => match *class { + PHClass::Scalar => write!(f, "${}", i.r(*name)), + PHClass::Vec { nonzero, prio } => { + if nonzero {write!(f, "...")} + else {write!(f, "..")}?; + write!(f, "${}", i.r(*name))?; + if prio != 0 {write!(f, ":{}", prio)?;}; + Ok(()) + } + } } } } impl Lexeme { - pub fn name(n: T) -> Self { - Lexeme::Name(n.to_string()) + pub fn rule(prio: impl Into) -> Self { + Lexeme::Rule( + NotNan::new(prio.into()) + .expect("Rule priority cannot be NaN") + ) } - pub fn rule(prio: T) -> Self where T: Into { - Lexeme::Rule(NotNan::new(prio.into()).expect("Rule priority cannot be NaN")) - } - pub fn paren_parser( - expr: P - ) -> impl Parser> + Clone - where P: Parser> + Clone { - choice(( - expr.clone().delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('('))) - .map(|t| ('(', t)), - expr.clone().delimited_by(just(Lexeme::LP('[')), just(Lexeme::RP('['))) - .map(|t| ('[', t)), - expr.delimited_by(just(Lexeme::LP('{')), just(Lexeme::RP('{'))) - .map(|t| ('{', t)), - )) + + pub fn parser>(self) + -> impl Parser + Clone { + filter(move |ent: &Entry| ent.lexeme == self) } } #[derive(Clone, PartialEq, Eq, Hash)] -pub struct LexedText(pub Vec>); +pub struct LexedText(pub Vec); -impl Debug for LexedText { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - for row in &self.0 { - for tok in row { - tok.fmt(f)?; - f.write_str(" ")? - } - f.write_str("\n")? +impl InternedDisplay for LexedText { + fn fmt_i(&self, f: &mut fmt::Formatter<'_>, i: &Interner) -> fmt::Result { + for tok in self.0.iter() { + tok.fmt_i(f, i)?; + f.write_str(" ")? } Ok(()) } } -type LexSubres<'a> = BoxedIter<'a, Entry>; - -fn paren_parser<'a>( - expr: Recursive<'a, char, LexSubres<'a>, Simple>, - lp: char, rp: char -) -> impl Parser, Error=Simple> + 'a { - expr.padded().repeated() - .map(|x| box_flatten(x.into_iter())) - .delimited_by(just(lp), just(rp)).map_with_span(move |b, s| { - box_chain!( - iter::once(Entry(Lexeme::LP(lp), s.start..s.start+1)), - b, - iter::once(Entry(Lexeme::RP(lp), s.end-1..s.end)) - ) - }) +fn paren_parser(lp: char, rp: char) +-> impl Parser> +{ + just(lp).to(Lexeme::LP(lp)) + .or(just(rp).to(Lexeme::RP(lp))) } -pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser, Error=Simple> + 'a -where T: AsRef + Clone { - let all_ops = ops.iter().map(|o| o.as_ref().to_string()) - .chain([",", ".", "..", "..."].into_iter().map(str::to_string)) +pub fn literal_parser() -> impl Parser> { + choice(( + number::int_parser().map(Literal::Uint), // all ints are valid floats so it takes precedence + number::float_parser().map(Literal::Num), + string::char_parser().map(Literal::Char), + string::str_parser().map(Literal::Str), + )) +} + +pub static BASE_OPS: &[&str] = &[",", ".", "..", "..."]; + +pub fn lexer<'a>(ctx: impl Context + 'a) +-> impl Parser, Error=Simple> + 'a +{ + let all_ops = ctx.ops().iter() + .map(|op| op.as_ref()) + .chain(BASE_OPS.iter().cloned()) + .map(str::to_string) .collect::>(); - just("export").padded().to(Lexeme::Export) - .or(just("import").padded().to(Lexeme::Import)) - .or_not().then(recursive(move |recurse: Recursive>| { - choice(( - paren_parser(recurse.clone(), '(', ')'), - paren_parser(recurse.clone(), '[', ']'), - paren_parser(recurse.clone(), '{', '}'), - choice(( - just(":=").padded().to(Lexeme::rule(0f64)), - just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule), - comment::comment_parser().map(Lexeme::Comment), - just("::").padded().to(Lexeme::NS), - just('\\').padded().to(Lexeme::BS), - just('@').padded().to(Lexeme::At), - just(':').to(Lexeme::Type), - number::int_parser().map(Lexeme::Uint), // all ints are valid floats so it takes precedence - number::float_parser().map(Lexeme::Num), - string::char_parser().map(Lexeme::Char), - string::str_parser().map(Lexeme::Str), - name::name_parser(&all_ops).map(Lexeme::Name), // includes namespacing - )).map_with_span(|lx, span| box_once(Entry(lx, span)) as LexSubres) - )) - }).separated_by(one_of("\t ").repeated()) - .flatten().collect()) - .map(|(prefix, rest): (Option, Vec)| { - prefix.into_iter().map(|l| Entry(l, 0..6)).chain(rest.into_iter()).collect() - }) - .then_ignore(text::whitespace()).then_ignore(end()) + choice(( + keyword("export").to(Lexeme::Export), + keyword("module").to(Lexeme::Namespace), + keyword("import").to(Lexeme::Import), + paren_parser('(', ')'), + paren_parser('[', ']'), + paren_parser('{', '}'), + just(":=").to(Lexeme::Const), + just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule), + comment::comment_parser().map(Lexeme::Comment), + just("::").to(Lexeme::NS), + just('\\').to(Lexeme::BS), + just('@').to(Lexeme::At), + just(':').to(Lexeme::Type), + just('\n').to(Lexeme::BR), + placeholder::placeholder_parser(ctx.clone()).map(Lexeme::PH), + literal_parser().map(Lexeme::Literal), + name::name_parser(&all_ops).map(move |n| { + Lexeme::Name(ctx.interner().i(&n)) + }) + )) + .map_with_span(|lexeme, range| Entry{ lexeme, range }) + .padded_by(one_of(" \t").repeated()) + .repeated() + .then_ignore(end()) +} + + +pub fn filter_map_lex<'a, O, M: ToString>( + f: impl Fn(Lexeme) -> Result + Clone + 'a +) -> impl Parser), Error = Simple> + Clone + 'a { + filter_map(move |s: Range, e: Entry| { + let out = f(e.lexeme).map_err(|msg| Simple::custom(s.clone(), msg))?; + Ok((out, s)) + }) } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 6a112e8..fff5923 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -6,11 +6,14 @@ mod comment; mod expression; mod sourcefile; mod import; -mod enum_parser; mod parse; +mod enum_filter; +mod placeholder; +mod context; pub use sourcefile::line_parser; -pub use lexer::{lexer, Lexeme, Entry as LexerEntry}; +pub use lexer::{lexer, Lexeme, Entry}; pub use name::is_op; -pub use parse::{parse, reparse, ParseError}; -pub use number::{float_parser, int_parser}; \ No newline at end of file +pub use parse::{parse, ParseError}; +pub use number::{float_parser, int_parser}; +pub use context::ParsingContext; \ No newline at end of file diff --git a/src/parse/name.rs b/src/parse/name.rs index 34b484b..deb30d8 100644 --- a/src/parse/name.rs +++ b/src/parse/name.rs @@ -1,58 +1,69 @@ use chumsky::{self, prelude::*, Parser}; -/// Matches any one of the passed operators, longest-first -fn op_parser<'a, T: AsRef + Clone>(ops: &[T]) -> BoxedParser<'a, char, String, Simple> { - let mut sorted_ops: Vec = ops.iter().map(|t| t.as_ref().to_string()).collect(); +/// Matches any one of the passed operators, preferring longer ones +fn op_parser<'a>(ops: &[impl AsRef + Clone]) +-> BoxedParser<'a, char, String, Simple> +{ + let mut sorted_ops: Vec = ops.iter() + .map(|t| t.as_ref().to_string()).collect(); sorted_ops.sort_by_key(|op| -(op.len() as i64)); sorted_ops.into_iter() .map(|op| just(op).boxed()) .reduce(|a, b| a.or(b).boxed()) - .unwrap_or_else(|| empty().map(|()| panic!("Empty isn't meant to match")).boxed()) - .labelled("operator").boxed() + .unwrap_or_else(|| { + empty().map(|()| panic!("Empty isn't meant to match")).boxed() + }).labelled("operator").boxed() } +/// Characters that cannot be parsed as part of an operator +/// +/// The initial operator list overrides this. +static NOT_NAME_CHAR: &[char] = &[ + ':', // used for namespacing and type annotations + '\\', '@', // parametric expression starters + '"', '\'', // parsed as primitives and therefore would never match + '(', ')', '[', ']', '{', '}', // must be strictly balanced + '.', // Argument-body separator in parametrics + ',', // used in imports +]; + /// Matches anything that's allowed as an operator /// -/// Blacklist rationale: -/// - `:` is used for namespacing and type annotations, both are distinguished from operators -/// - `\` and `@` are parametric expression starters -/// - `"` and `'` are read as primitives and would never match. -/// - `(` and `)` are strictly balanced and this must remain the case for automation and streaming. -/// - `.` is the discriminator for parametrics. -/// - ',' is always a standalone single operator, so it can never be part of a name +/// FIXME: `@name` without a dot should be parsed correctly for overrides. +/// Could be an operator but then parametrics should take precedence, +/// which might break stuff. investigate. /// -/// FIXME: `@name` without a dot should be parsed correctly for overrides. Could be an operator but -/// then parametrics should take precedence, which might break stuff. investigate. +/// TODO: `'` could work as an operator whenever it isn't closed. +/// It's common im maths so it's worth a try /// -/// TODO: `'` could work as an operator whenever it isn't closed. It's common im maths so it's -/// worth a try -/// -/// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very -/// common in maths so it's worth a try. Investigate. -pub fn modname_parser<'a>() -> impl Parser> + 'a { - let not_name_char: Vec = vec![':', '\\', '@', '"', '\'', '(', ')', '[', ']', '{', '}', ',', '.']; - filter(move |c| !not_name_char.contains(c) && !c.is_whitespace()) +/// TODO: `.` could possibly be parsed as an operator in some contexts. +/// This operator is very common in maths so it's worth a try. +/// Investigate. +pub fn modname_parser<'a>() +-> impl Parser> + 'a +{ + filter(move |c| !NOT_NAME_CHAR.contains(c) && !c.is_whitespace()) .repeated().at_least(1) .collect() .labelled("modname") } -/// Parse an operator or name. Failing both, parse everything up to the next whitespace or -/// blacklisted character as a new operator. -pub fn name_parser<'a, T: AsRef + Clone>( - ops: &[T] -) -> impl Parser> + 'a { +/// Parse an operator or name. Failing both, parse everything up to +/// the next whitespace or blacklisted character as a new operator. +pub fn name_parser<'a>(ops: &[impl AsRef + Clone]) +-> impl Parser> + 'a +{ choice(( op_parser(ops), // First try to parse a known operator text::ident().labelled("plain text"), // Failing that, parse plain text - modname_parser() // Finally parse everything until tne next terminal as a new operator + modname_parser() // Finally parse everything until tne next forbidden char )) .labelled("name") } -/// Decide if a string can be an operator. Operators can include digits and text, just not at the -/// start. -pub fn is_op>(s: T) -> bool { +/// Decide if a string can be an operator. Operators can include digits +/// and text, just not at the start. +pub fn is_op(s: impl AsRef) -> bool { return match s.as_ref().chars().next() { Some(x) => !x.is_alphanumeric(), None => false diff --git a/src/parse/number.rs b/src/parse/number.rs index 0059567..8510e18 100644 --- a/src/parse/number.rs +++ b/src/parse/number.rs @@ -67,7 +67,7 @@ fn pow_uint_parser(base: u32) -> impl Parser> { /// parse an uint from a base determined by its prefix or lack thereof /// -/// Not to be convused with [uint_parser] which is a component of it. +/// Not to be confused with [uint_parser] which is a component of it. pub fn int_parser() -> impl Parser> { choice(( just("0b").ignore_then(pow_uint_parser(2)), diff --git a/src/parse/parse.rs b/src/parse/parse.rs index 328ebda..e21ddb8 100644 --- a/src/parse/parse.rs +++ b/src/parse/parse.rs @@ -1,75 +1,58 @@ -use std::{ops::Range, fmt::Debug}; +use std::fmt::Debug; -use chumsky::{prelude::{Simple, end}, Stream, Parser}; -use itertools::Itertools; -use lasso::Spur; +use chumsky::{prelude::*, Parser}; use thiserror::Error; -use crate::{ast::Rule, parse::{lexer::LexedText, sourcefile::split_lines}, representations::sourcefile::FileEntry}; +use crate::representations::sourcefile::{FileEntry}; +use crate::parse::sourcefile::split_lines; -use super::{Lexeme, lexer, line_parser, LexerEntry}; +use super::context::Context; +use super::{lexer, line_parser, Entry}; #[derive(Error, Debug, Clone)] pub enum ParseError { #[error("Could not tokenize {0:?}")] Lex(Vec>), - #[error("Could not parse {0:#?}")] - Ast(Vec>) + #[error("Could not parse {:?} on line {}", .0.first().unwrap().1.span(), .0.first().unwrap().0)] + Ast(Vec<(usize, Simple)>) } -pub fn parse<'a, Op, F>( - ops: &[Op], data: &str, intern: &F -) -> Result, ParseError> -where - Op: 'a + AsRef + Clone, - F: Fn(&str) -> Spur +/// All the data required for parsing + + +/// Parse a string of code into a collection of module elements; +/// imports, exports, comments, declarations, etc. +/// +/// Notice that because the lexer splits operators based on the provided +/// list, the output will only be correct if operator list already +/// contains all operators defined or imported by this module. +pub fn parse<'a>(data: &str, ctx: impl Context) +-> Result, ParseError> { - let lexie = lexer(ops); - let token_batchv = split_lines(data).map(|line| { - lexie.parse(line).map_err(ParseError::Lex) - }).collect::, _>>()?; - println!("Lexed:\n{:?}", LexedText(token_batchv.clone())); - let parsr = line_parser(intern).then_ignore(end()); - let (parsed_lines, errors_per_line) = token_batchv.into_iter().filter(|v| { - !v.is_empty() - }).map(|v| { - // Find the first invalid position for Stream::for_iter - let LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone(); - // Stream expects tuples, lexer outputs structs - let tuples = v.into_iter().map_into::<(Lexeme, Range)>(); - parsr.parse(Stream::from_iter(end..end+1, tuples)) - // ^^^^^^^^^^ - // I haven't the foggiest idea why this is needed, parsers are supposed to be lazy so the - // end of input should make little difference - }).map(|res| match res { - Ok(r) => (Some(r), vec![]), - Err(e) => (None, e) - }).unzip::<_, _, Vec<_>, Vec<_>>(); + // TODO: wrap `i`, `ops` and `prefix` in a parsing context + let lexie = lexer(ctx.clone()); + let token_batchv = lexie.parse(data).map_err(ParseError::Lex)?; + // println!("Lexed:\n{}", LexedText(token_batchv.clone()).bundle(ctx.interner())); + // println!("Lexed:\n{:?}", token_batchv.clone()); + let parsr = line_parser(ctx).then_ignore(end()); + let (parsed_lines, errors_per_line) = split_lines(&token_batchv) + .enumerate() + .map(|(i, entv)| (i, + entv.iter() + .filter(|e| !e.is_filler()) + .cloned() + .collect::>() + )) + .filter(|(_, l)| l.len() > 0) + .map(|(i, l)| (i, parsr.parse(l))) + .map(|(i, res)| match res { + Ok(r) => (Some(r), (i, vec![])), + Err(e) => (None, (i, e)) + }).unzip::<_, _, Vec<_>, Vec<_>>(); let total_err = errors_per_line.into_iter() - .flat_map(Vec::into_iter) + .flat_map(|(i, v)| v.into_iter().map(move |e| (i, e))) .collect::>(); if !total_err.is_empty() { Err(ParseError::Ast(total_err)) } else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) } } - -pub fn reparse<'a, Op, F>( - ops: &[Op], data: &str, pre: &[FileEntry], intern: &F -) --> Result, ParseError> -where - Op: 'a + AsRef + Clone, - F: Fn(&str) -> Spur -{ - let result = parse(ops, data, intern)?; - Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| { - if let FileEntry::Rule(Rule{source, ..}, _) = &mut output { - if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor { - *source = s2.clone() - } else { - panic!("Preparse and reparse received different row types!") - } - } - output - }).collect()) -} diff --git a/src/parse/placeholder.rs b/src/parse/placeholder.rs new file mode 100644 index 0000000..dab468c --- /dev/null +++ b/src/parse/placeholder.rs @@ -0,0 +1,30 @@ +use chumsky::{Parser, prelude::*}; + +use crate::ast::{Placeholder, PHClass}; + +use super::{number::int_parser, context::Context}; + +pub fn placeholder_parser<'a>(ctx: impl Context + 'a) +-> impl Parser> + 'a +{ + choice(( + just("...").to(Some(true)), + just("..").to(Some(false)), + empty().to(None) + )) + .then(just("$").ignore_then(text::ident())) + .then(just(":").ignore_then(int_parser()).or_not()) + .try_map(move |((vec_nonzero, name), vec_prio), span| { + let name = ctx.interner().i(&name); + if let Some(nonzero) = vec_nonzero { + let prio = vec_prio.unwrap_or_default(); + Ok(Placeholder { name, class: PHClass::Vec { nonzero, prio } }) + } else { + if vec_prio.is_some() { + Err(Simple::custom(span, "Scalar placeholders have no priority")) + } else { + Ok(Placeholder { name, class: PHClass::Scalar }) + } + } + }) +} diff --git a/src/parse/sourcefile.rs b/src/parse/sourcefile.rs index 9a9809b..010998a 100644 --- a/src/parse/sourcefile.rs +++ b/src/parse/sourcefile.rs @@ -1,81 +1,139 @@ use std::iter; use std::rc::Rc; -use crate::representations::sourcefile::FileEntry; -use crate::enum_parser; -use crate::ast::{Expr, Rule}; +use crate::representations::location::Location; +use crate::representations::sourcefile::{FileEntry, Member}; +use crate::enum_filter; +use crate::ast::{Rule, Constant, Expr, Clause}; +use crate::interner::Token; -use super::expression::{xpr_parser, ns_name_parser}; +use super::Entry; +use super::context::Context; +use super::expression::xpr_parser; use super::import::import_parser; -use super::lexer::Lexeme; -use chumsky::{Parser, prelude::*}; -use lasso::Spur; -use ordered_float::NotNan; +use super::lexer::{Lexeme, filter_map_lex}; -fn rule_parser<'a, F>(intern: &'a F) -> impl Parser, NotNan, Vec -), Error = Simple> + 'a -where F: Fn(&str) -> Spur + 'a { - xpr_parser(intern).repeated() - .then(enum_parser!(Lexeme::Rule)) - .then(xpr_parser(intern).repeated()) - .map(|((a, b), c)| (a, b, c)) - .labelled("Rule") +use chumsky::{Parser, prelude::*}; +use itertools::Itertools; + +fn rule_parser<'a>(ctx: impl Context + 'a) +-> impl Parser> + 'a +{ + xpr_parser(ctx.clone()).repeated().at_least(1) + .then(filter_map_lex(enum_filter!(Lexeme::Rule))) + .then(xpr_parser(ctx).repeated().at_least(1)) + .map(|((s, (prio, _)), t)| Rule{ + source: Rc::new(s), + prio, + target: Rc::new(t) + }).labelled("Rule") } -pub fn line_parser<'a, F>(intern: &'a F) --> impl Parser> + 'a -where F: Fn(&str) -> Spur + 'a { +fn const_parser<'a>(ctx: impl Context + 'a) +-> impl Parser> + 'a +{ + filter_map_lex(enum_filter!(Lexeme::Name)) + .then_ignore(Lexeme::Const.parser()) + .then(xpr_parser(ctx.clone()).repeated().at_least(1)) + .map(move |((name, _), value)| Constant{ + name, + value: if let Ok(ex) = value.iter().exactly_one() { ex.clone() } + else { + let start = value.first().expect("value cannot be empty") + .location.range().expect("all locations in parsed source are known") + .start; + let end = value.last().expect("asserted right above") + .location.range().expect("all locations in parsed source are known") + .end; + Expr{ + location: Location::Range { file: ctx.file(), range: start..end }, + value: Clause::S('(', Rc::new(value)) + } + } + }) +} + +pub fn collect_errors>(e: Vec) -> E { + e.into_iter() + .reduce(chumsky::Error::merge) + .expect("Error list must be non_enmpty") +} + +fn namespace_parser<'a>( + line: impl Parser> + 'a, +) -> impl Parser, Vec), Error = Simple> + 'a { + Lexeme::Namespace.parser() + .ignore_then(filter_map_lex(enum_filter!(Lexeme::Name))) + .then( + any().repeated().delimited_by( + Lexeme::LP('{').parser(), + Lexeme::RP('{').parser() + ).try_map(move |body, _| { + split_lines(&body) + .map(|l| line.parse(l)) + .collect::,_>>() + .map_err(collect_errors) + }) + ).map(move |((name, _), body)| { + (name, body) + }) +} + +fn member_parser<'a>( + line: impl Parser> + 'a, + ctx: impl Context + 'a +) -> impl Parser> + 'a { choice(( - // In case the usercode wants to parse doc - enum_parser!(Lexeme >> FileEntry; Comment), - just(Lexeme::Import) - .ignore_then(import_parser(intern).map(FileEntry::Import)) - .then_ignore(enum_parser!(Lexeme::Comment).or_not()), - just(Lexeme::Export).map_err_with_span(|e, s| { - println!("{:?} could not yield an export", s); e - }).ignore_then( - just(Lexeme::NS).ignore_then( - ns_name_parser(intern).map(Rc::new) - .separated_by(just(Lexeme::name(","))) - .delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('('))) - ).map(FileEntry::Export) - .or(rule_parser(intern).map(|(source, prio, target)| { - FileEntry::Rule(Rule { - source: Rc::new(source), - prio, - target: Rc::new(target) - }, true) - })) - ), - // This could match almost anything so it has to go last - rule_parser(intern).map(|(source, prio, target)| { - FileEntry::Rule(Rule{ - source: Rc::new(source), - prio, - target: Rc::new(target) - }, false) - }), + namespace_parser(line) + .map(|(name, body)| Member::Namespace(name, body)), + rule_parser(ctx.clone()).map(Member::Rule), + const_parser(ctx).map(Member::Constant), )) } -pub fn split_lines(data: &str) -> impl Iterator { - let mut source = data.char_indices(); +pub fn line_parser<'a>(ctx: impl Context + 'a) +-> impl Parser> + 'a +{ + recursive(|line: Recursive>| { + choice(( + // In case the usercode wants to parse doc + filter_map_lex(enum_filter!(Lexeme >> FileEntry; Comment)).map(|(ent, _)| ent), + // plain old imports + Lexeme::Import.parser() + .ignore_then(import_parser(ctx.clone()).map(FileEntry::Import)), + Lexeme::Export.parser().ignore_then(choice(( + // token collection + Lexeme::NS.parser().ignore_then( + filter_map_lex(enum_filter!(Lexeme::Name)).map(|(e, _)| e) + .separated_by(Lexeme::Name(ctx.interner().i(",")).parser()) + .delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser()) + ).map(FileEntry::Export), + // public declaration + member_parser(line.clone(), ctx.clone()).map(FileEntry::Exported) + ))), + // This could match almost anything so it has to go last + member_parser(line, ctx).map(FileEntry::Internal), + )) + }) +} + +pub fn split_lines(data: &[Entry]) -> impl Iterator { + let mut source = data.iter().enumerate(); let mut last_slice = 0; iter::from_fn(move || { let mut paren_count = 0; - while let Some((i, c)) = source.next() { - match c { - '(' | '{' | '[' => paren_count += 1, - ')' | '}' | ']' => paren_count -= 1, - '\n' if paren_count == 0 => { + while let Some((i, Entry{ lexeme, .. })) = source.next() { + match lexeme { + Lexeme::LP(_) => paren_count += 1, + Lexeme::RP(_) => paren_count -= 1, + Lexeme::BR if paren_count == 0 => { let begin = last_slice; - last_slice = i; + last_slice = i+1; return Some(&data[begin..i]); }, _ => (), } } None - }) -} \ No newline at end of file + }).filter(|s| s.len() > 0) +} diff --git a/src/pipeline/error/mod.rs b/src/pipeline/error/mod.rs new file mode 100644 index 0000000..dfae3b4 --- /dev/null +++ b/src/pipeline/error/mod.rs @@ -0,0 +1,15 @@ +mod project_error; +mod parse_error_with_path; +mod unexpected_directory; +mod module_not_found; +mod not_exported; +mod too_many_supers; +mod visibility_mismatch; + +pub use project_error::{ErrorPosition, ProjectError}; +pub use parse_error_with_path::ParseErrorWithPath; +pub use unexpected_directory::UnexpectedDirectory; +pub use module_not_found::ModuleNotFound; +pub use not_exported::NotExported; +pub use too_many_supers::TooManySupers; +pub use visibility_mismatch::VisibilityMismatch; \ No newline at end of file diff --git a/src/pipeline/error/module_not_found.rs b/src/pipeline/error/module_not_found.rs new file mode 100644 index 0000000..c573a15 --- /dev/null +++ b/src/pipeline/error/module_not_found.rs @@ -0,0 +1,25 @@ +use crate::utils::{BoxedIter, iter::box_once}; + +use super::{ProjectError, ErrorPosition}; + +/// Error produced when an import refers to a nonexistent module +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct ModuleNotFound { + pub file: Vec, + pub subpath: Vec +} +impl ProjectError for ModuleNotFound { + fn description(&self) -> &str { + "an import refers to a nonexistent module" + } + fn message(&self) -> String { + format!( + "module {} in {} was not found", + self.subpath.join("::"), + self.file.join("/"), + ) + } + fn positions(&self) -> BoxedIter { + box_once(ErrorPosition::just_file(self.file.clone())) + } +} \ No newline at end of file diff --git a/src/pipeline/error/not_exported.rs b/src/pipeline/error/not_exported.rs new file mode 100644 index 0000000..b617c99 --- /dev/null +++ b/src/pipeline/error/not_exported.rs @@ -0,0 +1,36 @@ +use std::rc::Rc; + +use crate::{utils::BoxedIter, representations::location::Location}; + +use super::{ProjectError, ErrorPosition}; + +#[derive(Debug)] +pub struct NotExported { + pub file: Vec, + pub subpath: Vec, + pub referrer_file: Vec, + pub referrer_subpath: Vec, +} +impl ProjectError for NotExported { + fn description(&self) -> &str { + "An import refers to a symbol that exists but isn't exported" + } + fn positions(&self) -> BoxedIter { + Box::new([ + ErrorPosition{ + location: Location::File(Rc::new(self.file.clone())), + message: Some(format!( + "{} isn't exported", + self.subpath.join("::") + )), + }, + ErrorPosition{ + location: Location::File(Rc::new(self.referrer_file.clone())), + message: Some(format!( + "{} cannot see this symbol", + self.referrer_subpath.join("::") + )), + } + ].into_iter()) + } +} \ No newline at end of file diff --git a/src/pipeline/error/parse_error_with_path.rs b/src/pipeline/error/parse_error_with_path.rs new file mode 100644 index 0000000..ff0c3ec --- /dev/null +++ b/src/pipeline/error/parse_error_with_path.rs @@ -0,0 +1,37 @@ +use std::rc::Rc; + +use crate::representations::location::Location; +use crate::utils::BoxedIter; +use crate::parse::ParseError; + +use super::ErrorPosition; +use super::ProjectError; + +/// Produced by stages that parse text when it fails. +#[derive(Debug)] +pub struct ParseErrorWithPath { + pub full_source: String, + pub path: Vec, + pub error: ParseError +} +impl ProjectError for ParseErrorWithPath { + fn description(&self) -> &str {"Failed to parse code"} + fn positions(&self) -> BoxedIter { + match &self.error { + ParseError::Lex(lex) => Box::new(lex.iter().map(|s| ErrorPosition { + location: Location::Range { + file: Rc::new(self.path.clone()), + range: s.span(), + }, + message: Some(s.to_string()) + })), + ParseError::Ast(ast) => Box::new(ast.iter().map(|(_i, s)| ErrorPosition { + location: s.found().map(|e| Location::Range { + file: Rc::new(self.path.clone()), + range: e.range.clone() + }).unwrap_or_else(|| Location::File(Rc::new(self.path.clone()))), + message: Some(s.label().unwrap_or("Parse error").to_string()) + })), + } + } +} \ No newline at end of file diff --git a/src/pipeline/error/project_error.rs b/src/pipeline/error/project_error.rs new file mode 100644 index 0000000..2d18de0 --- /dev/null +++ b/src/pipeline/error/project_error.rs @@ -0,0 +1,50 @@ +use std::fmt::{Debug, Display}; +use std::rc::Rc; + +use crate::representations::location::Location; +use crate::utils::BoxedIter; + +/// A point of interest in resolving the error, such as the point where +/// processing got stuck, a command that is likely to be incorrect +pub struct ErrorPosition { + pub location: Location, + pub message: Option +} + +impl ErrorPosition { + /// An error position referring to an entire file with no comment + pub fn just_file(file: Vec) -> Self { + Self { message: None, location: Location::File(Rc::new(file)) } + } +} + +/// Errors addressed to the developer which are to be resolved with +/// code changes +pub trait ProjectError: Debug { + /// A general description of this type of error + fn description(&self) -> &str; + /// A formatted message that includes specific parameters + fn message(&self) -> String {String::new()} + /// Code positions relevant to this error + fn positions(&self) -> BoxedIter; + /// Convert the error into an [Rc] to be able to + /// handle various errors together + fn rc(self) -> Rc where Self: Sized + 'static { + Rc::new(self) + } +} + +impl Display for dyn ProjectError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let description = self.description(); + let message = self.message(); + let positions = self.positions(); + write!(f, "Problem with the project: {description}; {message}")?; + for ErrorPosition { location, message } in positions { + write!(f, "@{location}: {}", + message.unwrap_or("location of interest".to_string()) + )? + } + Ok(()) + } +} \ No newline at end of file diff --git a/src/pipeline/error/too_many_supers.rs b/src/pipeline/error/too_many_supers.rs new file mode 100644 index 0000000..0fd7e40 --- /dev/null +++ b/src/pipeline/error/too_many_supers.rs @@ -0,0 +1,38 @@ +use std::rc::Rc; + +use crate::{utils::{BoxedIter, iter::box_once}, representations::location::Location}; + +use super::{ProjectError, ErrorPosition}; + +/// Error produced when an import path starts with more `super` segments +/// than the current module's absolute path +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct TooManySupers { + pub path: Vec, + pub offender_file: Vec, + pub offender_mod: Vec +} +impl ProjectError for TooManySupers { + fn description(&self) -> &str { + "an import path starts with more `super` segments than \ + the current module's absolute path" + } + fn message(&self) -> String { + format!( + "path {} in {} contains too many `super` steps.", + self.path.join("::"), + self.offender_mod.join("::") + ) + } + + fn positions(&self) -> BoxedIter { + box_once(ErrorPosition { + location: Location::File(Rc::new(self.offender_file.clone())), + message: Some(format!( + "path {} in {} contains too many `super` steps.", + self.path.join("::"), + self.offender_mod.join("::") + )) + }) + } +} \ No newline at end of file diff --git a/src/pipeline/error/unexpected_directory.rs b/src/pipeline/error/unexpected_directory.rs new file mode 100644 index 0000000..7968f44 --- /dev/null +++ b/src/pipeline/error/unexpected_directory.rs @@ -0,0 +1,26 @@ +use crate::utils::{BoxedIter, iter::box_once}; + +use super::ErrorPosition; +use super::ProjectError; + +/// Produced when a stage that deals specifically with code encounters +/// a path that refers to a directory +#[derive(Debug)] +pub struct UnexpectedDirectory { + pub path: Vec +} +impl ProjectError for UnexpectedDirectory { + fn description(&self) -> &str { + "A stage that deals specifically with code encountered a path \ + that refers to a directory" + } + fn positions(&self) -> BoxedIter { + box_once(ErrorPosition::just_file(self.path.clone())) + } + fn message(&self) -> String { + format!( + "{} was expected to be a file but a directory was found", + self.path.join("/") + ) + } +} \ No newline at end of file diff --git a/src/pipeline/error/visibility_mismatch.rs b/src/pipeline/error/visibility_mismatch.rs new file mode 100644 index 0000000..abf2956 --- /dev/null +++ b/src/pipeline/error/visibility_mismatch.rs @@ -0,0 +1,25 @@ +use std::rc::Rc; +use crate::representations::location::Location; +use crate::utils::{BoxedIter, iter::box_once}; + +use super::project_error::{ProjectError, ErrorPosition}; + +#[derive(Debug)] +pub struct VisibilityMismatch{ + pub namespace: Vec, + pub file: Rc> +} +impl ProjectError for VisibilityMismatch { + fn description(&self) -> &str { + "Some occurences of a namespace are exported but others are not" + } + fn positions(&self) -> BoxedIter { + box_once(ErrorPosition { + location: Location::File(self.file.clone()), + message: Some(format!( + "{} is opened multiple times with different visibilities", + self.namespace.join("::") + )) + }) + } +} diff --git a/src/pipeline/file_loader.rs b/src/pipeline/file_loader.rs new file mode 100644 index 0000000..afa0558 --- /dev/null +++ b/src/pipeline/file_loader.rs @@ -0,0 +1,106 @@ +use std::path::Path; +use std::rc::Rc; +use std::path::PathBuf; +use std::io; +use std::fs; + +use crate::utils::iter::box_once; +use crate::utils::{Cache, BoxedIter}; +use crate::interner::{Interner, Token}; +use crate::pipeline::error::UnexpectedDirectory; +use crate::pipeline::error::{ProjectError, ErrorPosition}; + +#[derive(Debug)] +pub struct FileLoadingError{ + file: io::Error, + dir: io::Error, + path: Vec +} +impl ProjectError for FileLoadingError { + fn description(&self) -> &str { + "Neither a file nor a directory could be read from \ + the requested path" + } + fn positions(&self) -> BoxedIter { + box_once(ErrorPosition::just_file(self.path.clone())) + } + fn message(&self) -> String { + format!("File: {}\nDirectory: {}", self.file, self.dir) + } +} + +/// Represents the result of loading code from a string-tree form such +/// as the file system. +#[derive(Clone, PartialEq, Eq, Hash)] +pub enum Loaded { + Code(Rc), + Collection(Rc>), +} +impl Loaded { + pub fn is_code(&self) -> bool {matches!(self, Loaded::Code(_))} +} + +pub type IOResult = Result>; + +pub type FileCache<'a> = Cache<'a, Token>>, IOResult>; + +/// Load a file from a path expressed in Rust strings, but relative to +/// a root expressed as an OS Path. +pub fn load_file(root: &Path, path: &[impl AsRef]) -> IOResult { + // let os_path = path.into_iter() + // .map_into::() + // .collect::>(); + let full_path = path.iter().fold( + root.to_owned(), + |p, s| p.join(s.as_ref()) + ); + let file_path = full_path.with_extension("orc"); + let file_error = match fs::read_to_string(&file_path) { + Ok(string) => return Ok(Loaded::Code(Rc::new(string))), + Err(err) => err + }; + let dir = match fs::read_dir(&full_path) { + Ok(dir) => dir, + Err(dir_error) => { + return Err(FileLoadingError { + file: file_error, + dir: dir_error, + path: path.iter() + .map(|s| s.as_ref().to_string()) + .collect(), + }.rc()) + } + }; + let names = dir.filter_map(Result::ok) + .filter_map(|ent| { + let fname = ent.file_name().into_string().ok()?; + let ftyp = ent.metadata().ok()?.file_type(); + Some(if ftyp.is_dir() {fname} else { + fname.strip_suffix(".or")?.to_string() + }) + }).collect(); + Ok(Loaded::Collection(Rc::new(names))) +} + +/// Generates a cached file loader for a directory +pub fn mk_cache(root: PathBuf, i: &Interner) -> FileCache { + Cache::new(move |token: Token>>, _this| -> IOResult { + let path = i.r(token).iter() + .map(|t| i.r(*t).as_str()) + .collect::>(); + load_file(&root, &path) + }) +} + +/// Loads the string contents of a file at the given location. +/// If the path points to a directory, raises an error. +pub fn load_text( + path: Token>>, + load_file: &impl Fn(Token>>) -> IOResult, + i: &Interner +) -> Result, Rc> { + if let Loaded::Code(s) = load_file(path)? {Ok(s)} + else {Err(UnexpectedDirectory{ + path: i.r(path).iter().map(|t| i.r(*t)).cloned().collect() + }.rc())} +} \ No newline at end of file diff --git a/src/pipeline/import_abs_path.rs b/src/pipeline/import_abs_path.rs new file mode 100644 index 0000000..27ef529 --- /dev/null +++ b/src/pipeline/import_abs_path.rs @@ -0,0 +1,32 @@ +use std::rc::Rc; + +use crate::representations::tree::Module; +use crate::representations::sourcefile::absolute_path; +use crate::utils::{Substack}; +use crate::interner::{Token, Interner}; + +use super::error::{ProjectError, TooManySupers}; + +pub fn import_abs_path( + src_path: &[Token], + mod_stack: Substack>, + module: &Module, + import_path: &[Token], + i: &Interner, +) -> Result>, Rc> { + // path of module within file + let mod_pathv = mod_stack.iter().rev_vec_clone(); + // path of module within compilation + let abs_pathv = src_path.iter().copied() + .chain(mod_pathv.iter().copied()) + .collect::>(); + // preload-target path relative to module + // preload-target path within compilation + absolute_path(&abs_pathv, import_path, i, &|n| { + module.items.contains_key(&n) + }).map_err(|_| TooManySupers{ + path: import_path.iter().map(|t| i.r(*t)).cloned().collect(), + offender_file: src_path.iter().map(|t| i.r(*t)).cloned().collect(), + offender_mod: mod_pathv.iter().map(|t| i.r(*t)).cloned().collect(), + }.rc()) +} \ No newline at end of file diff --git a/src/pipeline/import_resolution/alias_map.rs b/src/pipeline/import_resolution/alias_map.rs new file mode 100644 index 0000000..3880600 --- /dev/null +++ b/src/pipeline/import_resolution/alias_map.rs @@ -0,0 +1,53 @@ +use hashbrown::{HashMap, HashSet}; + +use std::hash::Hash; + +use crate::interner::Token; + +#[derive(Clone, Debug, Default)] +pub struct AliasMap{ + pub targets: HashMap>>, Token>>>, + pub aliases: HashMap>>, HashSet>>>>, +} +impl AliasMap { + pub fn new() -> Self {Self::default()} + + pub fn link(&mut self, alias: Token>>, target: Token>>) { + let prev = self.targets.insert(alias, target); + debug_assert!(prev.is_none(), "Alias already has a target"); + multimap_entry(&mut self.aliases, &target).insert(alias); + // Remove aliases of the alias + if let Some(alts) = self.aliases.remove(&alias) { + for alt in alts { + // Assert that this step has always been done in the past + debug_assert!( + self.aliases.get(&alt) + .map(HashSet::is_empty) + .unwrap_or(true), + "Alias set of alias not empty" + ); + debug_assert!( + self.targets.insert(alt, target) == Some(alias), + "Name not target of its own alias" + ); + multimap_entry(&mut self.aliases, &target).insert(alt); + } + } + } + + pub fn resolve(&self, alias: Token>>) -> Option>>> { + self.targets.get(&alias).copied() + } +} + +/// find or create the set belonging to the given key in the given +/// map-to-set (aka. multimap) +fn multimap_entry<'a, K: Eq + Hash + Clone, V>( + map: &'a mut HashMap>, + key: &'_ K +) -> &'a mut HashSet { + map.raw_entry_mut() + .from_key(key) + .or_insert_with(|| (key.clone(), HashSet::new())) + .1 +} \ No newline at end of file diff --git a/src/pipeline/import_resolution/apply_aliases.rs b/src/pipeline/import_resolution/apply_aliases.rs new file mode 100644 index 0000000..b9df142 --- /dev/null +++ b/src/pipeline/import_resolution/apply_aliases.rs @@ -0,0 +1,87 @@ +use std::rc::Rc; + +use hashbrown::HashMap; + +use crate::{utils::Substack, interner::{Token, Interner}, pipeline::{ProjectModule, ProjectExt}, representations::tree::{ModEntry, ModMember}, ast::{Rule, Expr}}; + +use super::{alias_map::AliasMap, decls::InjectedAsFn}; + +fn process_expr( + expr: &Expr, + alias_map: &AliasMap, + injected_as: &impl InjectedAsFn, + i: &Interner, +) -> Expr { + expr.map_names(&|n| { + injected_as(&i.r(n)[..]).or_else(|| { + alias_map.resolve(n).map(|n| { + injected_as(&i.r(n)[..]).unwrap_or(n) + }) + }) + }).unwrap_or_else(|| expr.clone()) +} + +// TODO: replace is_injected with injected_as +/// Replace all aliases with the name they're originally defined as +fn apply_aliases_rec( + path: Substack>, + module: &ProjectModule, + alias_map: &AliasMap, + i: &Interner, + injected_as: &impl InjectedAsFn, +) -> ProjectModule { + let items = module.items.iter().map(|(name, ent)| { + let ModEntry{ exported, member } = ent; + let member = match member { + ModMember::Item(expr) => ModMember::Item( + process_expr(expr, alias_map, injected_as, i) + ), + ModMember::Sub(module) => { + let subpath = path.push(*name); + let is_ignored = injected_as(&subpath.iter().rev_vec_clone()).is_some(); + let new_mod = if is_ignored {module.clone()} else { + let module = module.as_ref(); + Rc::new(apply_aliases_rec( + subpath, module, + alias_map, i, injected_as + )) + }; + ModMember::Sub(new_mod) + } + }; + (*name, ModEntry{ exported: *exported, member }) + }).collect::>(); + let rules = module.extra.rules.iter().map(|rule| { + let Rule{ source, prio, target } = rule; + Rule{ + prio: *prio, + source: Rc::new(source.iter() + .map(|expr| process_expr(expr, alias_map, injected_as, i)) + .collect::>() + ), + target: Rc::new(target.iter() + .map(|expr| process_expr(expr, alias_map, injected_as, i)) + .collect::>() + ), + } + }).collect::>(); + ProjectModule{ + items, + imports: module.imports.clone(), + extra: ProjectExt{ + rules, + exports: module.extra.exports.clone(), + file: module.extra.file.clone(), + imports_from: module.extra.imports_from.clone(), + } + } +} + +pub fn apply_aliases( + module: &ProjectModule, + alias_map: &AliasMap, + i: &Interner, + injected_as: &impl InjectedAsFn, +) -> ProjectModule { + apply_aliases_rec(Substack::Bottom, module, alias_map, i, injected_as) +} \ No newline at end of file diff --git a/src/pipeline/import_resolution/collect_aliases.rs b/src/pipeline/import_resolution/collect_aliases.rs new file mode 100644 index 0000000..24c3c71 --- /dev/null +++ b/src/pipeline/import_resolution/collect_aliases.rs @@ -0,0 +1,103 @@ +use std::rc::Rc; + +use crate::representations::tree::{WalkErrorKind, ModMember}; +use crate::pipeline::error::{ProjectError, NotExported}; +use crate::pipeline::project_tree::{ProjectTree, split_path, ProjectModule}; +use crate::interner::{Token, Interner}; +use crate::utils::{Substack, pushed}; + +use super::alias_map::AliasMap; +use super::decls::InjectedAsFn; + +/// Assert that a module identified by a path can see a given symbol +fn assert_visible( + source: &[Token], // must point to a file or submodule + target: &[Token], // may point to a symbol or module of any kind + project: &ProjectTree, + i: &Interner +) -> Result<(), Rc> { + let (tgt_item, tgt_path) = if let Some(s) = target.split_last() {s} + else {return Ok(())}; + let shared_len = source.iter() + .zip(tgt_path.iter()) + .take_while(|(a, b)| a == b) + .count(); + let shared_root = project.0.walk(&tgt_path[..shared_len], false) + .expect("checked in parsing"); + let direct_parent = shared_root.walk(&tgt_path[shared_len..], true) + .map_err(|e| match e.kind { + WalkErrorKind::Missing => panic!("checked in parsing"), + WalkErrorKind::Private => { + let full_path = &tgt_path[..shared_len + e.pos]; + let (file, sub) = split_path(full_path, &project); + let (ref_file, ref_sub) = split_path(source, &project); + NotExported{ + file: i.extern_all(file), + subpath: i.extern_all(sub), + referrer_file: i.extern_all(ref_file), + referrer_subpath: i.extern_all(ref_sub), + }.rc() + } + })?; + let tgt_item_exported = direct_parent.extra.exports.contains_key(tgt_item); + let target_prefixes_source = shared_len == tgt_path.len() + && source.get(shared_len) == Some(tgt_item); + if !tgt_item_exported && !target_prefixes_source { + let (file, sub) = split_path(target, &project); + let (ref_file, ref_sub) = split_path(source, &project); + Err(NotExported{ + file: i.extern_all(file), + subpath: i.extern_all(sub), + referrer_file: i.extern_all(ref_file), + referrer_subpath: i.extern_all(ref_sub), + }.rc()) + } else {Ok(())} +} + +/// Populate target and alias maps from the module tree recursively +fn collect_aliases_rec( + path: Substack>, + module: &ProjectModule, + project: &ProjectTree, + alias_map: &mut AliasMap, + i: &Interner, + injected_as: &impl InjectedAsFn, +) -> Result<(), Rc> { + // Assume injected module has been alias-resolved + let mod_path_v = path.iter().rev_vec_clone(); + if injected_as(&mod_path_v).is_some() {return Ok(())}; + for (&name, &target_mod) in module.extra.imports_from.iter() { + let target_mod_v = i.r(target_mod); + let target_sym_v = pushed(target_mod_v, name); + assert_visible(&mod_path_v, &target_sym_v, project, i)?; + let sym_path_v = pushed(&mod_path_v, name); + let sym_path = i.i(&sym_path_v); + let target_sym = i.i(&target_sym_v); + alias_map.link(sym_path, target_sym); + } + for (&name, entry) in module.items.iter() { + let submodule = if let ModMember::Sub(s) = &entry.member { + s.as_ref() + } else {continue}; + collect_aliases_rec( + path.push(name), + submodule, project, alias_map, + i, injected_as, + )? + } + Ok(()) +} + +/// Populate target and alias maps from the module tree +pub fn collect_aliases( + module: &ProjectModule, + project: &ProjectTree, + alias_map: &mut AliasMap, + i: &Interner, + injected_as: &impl InjectedAsFn, +) -> Result<(), Rc> { + collect_aliases_rec( + Substack::Bottom, module, project, alias_map, + i, injected_as + ) +} \ No newline at end of file diff --git a/src/pipeline/import_resolution/decls.rs b/src/pipeline/import_resolution/decls.rs new file mode 100644 index 0000000..b69d01e --- /dev/null +++ b/src/pipeline/import_resolution/decls.rs @@ -0,0 +1,5 @@ +use crate::interner::Token; + +pub trait InjectedAsFn = Fn( + &[Token] +) -> Option>>>; \ No newline at end of file diff --git a/src/pipeline/import_resolution/mod.rs b/src/pipeline/import_resolution/mod.rs new file mode 100644 index 0000000..d498bcd --- /dev/null +++ b/src/pipeline/import_resolution/mod.rs @@ -0,0 +1,7 @@ +mod alias_map; +mod collect_aliases; +mod apply_aliases; +mod resolve_imports; +mod decls; + +pub use resolve_imports::resolve_imports; diff --git a/src/pipeline/import_resolution/resolve_imports.rs b/src/pipeline/import_resolution/resolve_imports.rs new file mode 100644 index 0000000..bf50ac9 --- /dev/null +++ b/src/pipeline/import_resolution/resolve_imports.rs @@ -0,0 +1,28 @@ +use std::rc::Rc; + +use crate::interner::Interner; +use crate::pipeline::error::ProjectError; +use crate::pipeline::project_tree::ProjectTree; + + +use super::alias_map::AliasMap; +use super::collect_aliases::collect_aliases; +use super::apply_aliases::apply_aliases; +use super::decls::InjectedAsFn; + +/// Follow import chains to locate the original name of all tokens, then +/// replace these aliases with the original names throughout the tree +pub fn resolve_imports( + project: ProjectTree, + i: &Interner, + injected_as: &impl InjectedAsFn, +) -> Result> { + let mut map = AliasMap::new(); + collect_aliases( + project.0.as_ref(), + &project, &mut map, + i, injected_as + )?; + let new_mod = apply_aliases(project.0.as_ref(), &map, i, injected_as); + Ok(ProjectTree(Rc::new(new_mod))) +} \ No newline at end of file diff --git a/src/pipeline/mod.rs b/src/pipeline/mod.rs new file mode 100644 index 0000000..48e9f4a --- /dev/null +++ b/src/pipeline/mod.rs @@ -0,0 +1,19 @@ +pub mod error; +mod project_tree; +mod source_loader; +mod import_abs_path; +mod split_name; +mod import_resolution; +pub mod file_loader; +mod parse_layer; + +pub use parse_layer::parse_layer; +pub use project_tree::{ + ConstTree, ProjectExt, ProjectModule, ProjectTree, from_const_tree, + collect_consts, collect_rules, +}; +// pub use file_loader::{Loaded, FileLoadingError, IOResult}; +// pub use error::{ +// ErrorPosition, ModuleNotFound, NotExported, ParseErrorWithPath, +// ProjectError, TooManySupers, UnexpectedDirectory +// }; \ No newline at end of file diff --git a/src/pipeline/parse_layer.rs b/src/pipeline/parse_layer.rs new file mode 100644 index 0000000..53a89bc --- /dev/null +++ b/src/pipeline/parse_layer.rs @@ -0,0 +1,52 @@ +use std::rc::Rc; + +use crate::representations::sourcefile::FileEntry; +use crate::interner::{Token, Interner}; + +use super::{project_tree, import_resolution}; +use super::source_loader; +use super::file_loader::IOResult; +use super::error::ProjectError; +use super::ProjectTree; + +/// Using an IO callback, produce a project tree that includes the given +/// target symbols or files if they're defined. +/// +/// The environment accessible to the loaded source can be specified with +/// a pre-existing tree which will be merged with the loaded data, and a +/// prelude which will be prepended to each individual file. Since the +/// prelude gets compiled with each file, normally it should be a glob +/// import pointing to a module in the environment. +pub fn parse_layer<'a>( + targets: &[Token>>], + loader: &impl Fn(Token>>) -> IOResult, + environment: &'a ProjectTree, + prelude: &[FileEntry], + i: &Interner, +) -> Result> { + // A path is injected if it is walkable in the injected tree + let injected_as = |path: &[Token]| { + let (item, modpath) = path.split_last()?; + let module = environment.0.walk(modpath, false).ok()?; + let inj = module.extra.exports.get(item).copied()?; + Some(inj) + }; + let injected_names = |path: Token>>| { + let pathv = &i.r(path)[..]; + let module = environment.0.walk(&pathv, false).ok()?; + Some(Rc::new( + module.extra.exports.keys().copied().collect() + )) + }; + let source = source_loader::load_source( + targets, i, loader, &|path| injected_as(path).is_some() + )?; + let tree = project_tree::build_tree(source, i, prelude, &injected_names)?; + let sum = ProjectTree(Rc::new( + environment.0.as_ref().clone() + + tree.0.as_ref().clone() + )); + let resolvd = import_resolution::resolve_imports(sum, i, &injected_as)?; + // Addition among modules favours the left hand side. + Ok(resolvd) +} \ No newline at end of file diff --git a/src/pipeline/project_tree/add_prelude.rs b/src/pipeline/project_tree/add_prelude.rs new file mode 100644 index 0000000..5784524 --- /dev/null +++ b/src/pipeline/project_tree/add_prelude.rs @@ -0,0 +1,52 @@ +use crate::representations::sourcefile::{Member, FileEntry}; +use crate::interner::Token; + +fn member_rec( + // object + member: Member, + // context + path: &[Token], + prelude: &[FileEntry], +) -> Member { + match member { + Member::Namespace(name, body) => { + let new_body = entv_rec( + body, + path, + prelude + ); + Member::Namespace(name, new_body) + }, + any => any + } +} + +fn entv_rec( + // object + data: Vec, + // context + mod_path: &[Token], + prelude: &[FileEntry], +) -> Vec { + prelude.iter().cloned() + .chain(data.into_iter() + .map(|ent| match ent { + FileEntry::Exported(mem) => FileEntry::Exported(member_rec( + mem, mod_path, prelude + )), + FileEntry::Internal(mem) => FileEntry::Internal(member_rec( + mem, mod_path, prelude + )), + any => any + }) + ) + .collect() +} + +pub fn add_prelude( + data: Vec, + path: &[Token], + prelude: &[FileEntry], +) -> Vec { + entv_rec(data, path, prelude) +} \ No newline at end of file diff --git a/src/pipeline/project_tree/build_tree.rs b/src/pipeline/project_tree/build_tree.rs new file mode 100644 index 0000000..6e18a9a --- /dev/null +++ b/src/pipeline/project_tree/build_tree.rs @@ -0,0 +1,215 @@ +use std::rc::Rc; + +use hashbrown::HashMap; + +use crate::pipeline::error::ProjectError; +use crate::interner::{Token, Interner}; +use crate::utils::iter::{box_once, box_empty}; +use crate::utils::{Substack, pushed}; +use crate::ast::{Expr, Constant}; +use crate::pipeline::source_loader::{LoadedSourceTable, LoadedSource}; +use crate::representations::tree::{Module, ModMember, ModEntry}; +use crate::representations::sourcefile::{FileEntry, Member, absolute_path}; + +use super::collect_ops::InjectedOperatorsFn; +use super::{collect_ops, ProjectTree, ProjectExt}; +use super::parse_file::parse_file; + +#[derive(Debug)] +struct ParsedSource<'a> { + path: Vec>, + loaded: &'a LoadedSource, + parsed: Vec +} + +pub fn split_path<'a>(path: &'a [Token], proj: &'a ProjectTree) +-> (&'a [Token], &'a [Token]) +{ + let (end, body) = if let Some(s) = path.split_last() {s} + else {return (&[], &[])}; + let mut module = proj.0.walk(body, false).expect("invalid path cannot be split"); + if let ModMember::Sub(m) = &module.items[end].member { + module = m.clone(); + } + let file = module.extra.file.as_ref() + .map(|s| &path[..s.len()]) + .unwrap_or(&path[..]); + let subpath = &path[file.len()..]; + (file, subpath) +} + +/// Convert normalized, prefixed source into a module +fn source_to_module( + // level + path: Substack>, + preparsed: &Module, + // data + data: Vec, + // context + i: &Interner, + filepath_len: usize, +) -> Rc> { + let path_v = path.iter().rev_vec_clone(); + let imports = data.iter() + .filter_map(|ent| if let FileEntry::Import(impv) = ent { + Some(impv.iter()) + } else {None}) + .flatten() + .cloned() + .collect::>(); + let imports_from = imports.iter() + .map(|imp| { + let mut imp_path_v = i.r(imp.path).clone(); + imp_path_v.push(imp.name.expect("imports normalized")); + let mut abs_path = absolute_path( + &path_v, + &imp_path_v, + i, &|n| preparsed.items.contains_key(&n) + ).expect("tested in preparsing"); + let name = abs_path.pop().expect("importing the global context"); + (name, i.i(&abs_path)) + }) + .collect::>(); + let exports = data.iter() + .flat_map(|ent| { + let mk_ent = |name| (name, i.i(&pushed(&path_v, name))); + match ent { + FileEntry::Export(names) + => Box::new(names.iter().copied().map(mk_ent)), + FileEntry::Exported(mem) => match mem { + Member::Constant(constant) => box_once(mk_ent(constant.name)), + Member::Namespace(name, _) => box_once(mk_ent(*name)), + Member::Rule(rule) => { + let mut names = Vec::new(); + for e in rule.source.iter() { + e.visit_names(Substack::Bottom, &mut |n| { + if let Some([name]) = i.r(n).strip_prefix(&path_v[..]) { + names.push((*name, n)) + } + }) + } + Box::new(names.into_iter()) + } + } + _ => box_empty() + } + }) + .collect::>(); + let rules = data.iter() + .filter_map(|ent| match ent { + FileEntry::Exported(Member::Rule(rule)) => Some(rule), + FileEntry::Internal(Member::Rule(rule)) => Some(rule), + _ => None, + }) + .cloned() + .collect::>(); + let items = data.into_iter() + .filter_map(|ent| match ent { + FileEntry::Exported(Member::Namespace(name, body)) => { + let prep_member = &preparsed.items[&name].member; + let new_prep = if let ModMember::Sub(s) = prep_member {s.as_ref()} + else { panic!("preparsed missing a submodule") }; + let module = source_to_module( + path.push(name), + new_prep, body, i, filepath_len + ); + let member = ModMember::Sub(module); + Some((name, ModEntry{ exported: true, member })) + } + FileEntry::Internal(Member::Namespace(name, body)) => { + let prep_member = &preparsed.items[&name].member; + let new_prep = if let ModMember::Sub(s) = prep_member {s.as_ref()} + else { panic!("preparsed missing a submodule") }; + let module = source_to_module( + path.push(name), + new_prep, body, i, filepath_len + ); + let member = ModMember::Sub(module); + Some((name, ModEntry{ exported: false, member })) + } + FileEntry::Exported(Member::Constant(Constant{ name, value })) => { + let member = ModMember::Item(value); + Some((name, ModEntry{ exported: true, member })) + } + FileEntry::Internal(Member::Constant(Constant{ name, value })) => { + let member = ModMember::Item(value); + Some((name, ModEntry{ exported: false, member })) + } + _ => None, + }) + .collect::>(); + Rc::new(Module { + imports, + items, + extra: ProjectExt { + imports_from, + exports, + rules, + file: Some(path_v[..filepath_len].to_vec()) + } + }) +} + +fn files_to_module( + path: Substack>, + files: &[ParsedSource], + i: &Interner +) -> Rc> { + let lvl = path.len(); + let path_v = path.iter().rev_vec_clone(); + if files.len() == 1 && files[0].path.len() == lvl { + return source_to_module( + path, + files[0].loaded.preparsed.0.as_ref(), + files[0].parsed.clone(), + i, path.len() + ) + } + let items = files.group_by(|a, b| a.path[lvl] == b.path[lvl]).into_iter() + .map(|files| { + let namespace = files[0].path[lvl]; + let subpath = path.push(namespace); + let module = files_to_module(subpath, files, i); + let member = ModMember::Sub(module); + (namespace, ModEntry{ exported: true, member }) + }) + .collect::>(); + let exports = items.keys() + .copied() + .map(|name| (name, i.i(&pushed(&path_v, name)))) + .collect(); + Rc::new(Module{ + items, + imports: vec![], + extra: ProjectExt { + exports, + imports_from: HashMap::new(), + rules: vec![], file: None, + } + }) +} + +pub fn build_tree<'a>( + files: LoadedSourceTable, + i: &Interner, + prelude: &[FileEntry], + injected: &impl InjectedOperatorsFn, +) -> Result> { + let ops_cache = collect_ops::mk_cache(&files, i, injected); + let mut entries = files.iter() + .map(|(path, loaded)| Ok(( + i.r(*path), + loaded, + parse_file(*path, &files, &ops_cache, i, prelude)? + ))) + .collect::, Rc>>()?; + // sort by similarity, then longest-first + entries.sort_unstable_by(|a, b| a.0.cmp(&b.0).reverse()); + let files = entries.into_iter() + .map(|(path, loaded, parsed)| ParsedSource{ + loaded, parsed, + path: path.clone() + }) + .collect::>(); + Ok(ProjectTree(files_to_module(Substack::Bottom, &files, i))) +} \ No newline at end of file diff --git a/src/pipeline/project_tree/collect_ops/exported_ops.rs b/src/pipeline/project_tree/collect_ops/exported_ops.rs new file mode 100644 index 0000000..f2ffb3a --- /dev/null +++ b/src/pipeline/project_tree/collect_ops/exported_ops.rs @@ -0,0 +1,75 @@ +use std::rc::Rc; + +use hashbrown::HashSet; + +use crate::representations::tree::WalkErrorKind; +use crate::pipeline::source_loader::LoadedSourceTable; +use crate::pipeline::error::{ProjectError, ModuleNotFound}; +use crate::interner::{Token, Interner}; +use crate::utils::Cache; +use crate::pipeline::split_name::split_name; + +pub type OpsResult = Result>>, Rc>; +pub type ExportedOpsCache<'a> = Cache<'a, Token>>, OpsResult>; + +pub trait InjectedOperatorsFn = Fn( + Token>> +) -> Option>>>; + +fn coprefix( + l: impl Iterator, + r: impl Iterator +) -> usize { + l.zip(r).take_while(|(a, b)| a == b).count() +} + +/// Collect all names exported by the module at the specified path +pub fn collect_exported_ops( + path: Token>>, + loaded: &LoadedSourceTable, + i: &Interner, + injected: &impl InjectedOperatorsFn +) -> OpsResult { + if let Some(i) = injected(path) {return Ok(i)} + let is_file = |n: &[Token]| loaded.contains_key(&i.i(n)); + let path_s = &i.r(path)[..]; + let name_split = split_name(path_s, &is_file); + let (fpath_v, subpath_v) = if let Some(f) = name_split {f} else { + return Ok(Rc::new(loaded.keys().copied() + .filter_map(|modname| { + let modname_s = i.r(modname); + if path_s.len() == coprefix(path_s.iter(), modname_s.iter()) { + Some(modname_s[path_s.len()]) + } else {None} + }) + .collect::>() + )) + }; + let fpath = i.i(fpath_v); + let preparsed = &loaded[&fpath].preparsed; + let module = preparsed.0.walk(&subpath_v, false) + .map_err(|walk_err| match walk_err.kind { + WalkErrorKind::Private => unreachable!("visibility is not being checked here"), + WalkErrorKind::Missing => ModuleNotFound{ + file: i.extern_vec(fpath), + subpath: subpath_v.into_iter() + .take(walk_err.pos) + .map(|t| i.r(*t)) + .cloned() + .collect() + }.rc(), + })?; + Ok(Rc::new(module.items.iter() + .filter(|(_, v)| v.exported) + .map(|(k, _)| *k) + .collect() + )) +} + +pub fn mk_cache<'a>( + loaded: &'a LoadedSourceTable, + i: &'a Interner, + injected: &'a impl InjectedOperatorsFn, +) -> ExportedOpsCache<'a> { + Cache::new(|path, _this| collect_exported_ops(path, loaded, i, injected)) +} \ No newline at end of file diff --git a/src/pipeline/project_tree/collect_ops/mod.rs b/src/pipeline/project_tree/collect_ops/mod.rs new file mode 100644 index 0000000..36e90b3 --- /dev/null +++ b/src/pipeline/project_tree/collect_ops/mod.rs @@ -0,0 +1,8 @@ +mod exported_ops; +mod ops_for; + +pub use exported_ops::{ + ExportedOpsCache, OpsResult, InjectedOperatorsFn, + collect_exported_ops, mk_cache +}; +pub use ops_for::collect_ops_for; \ No newline at end of file diff --git a/src/pipeline/project_tree/collect_ops/ops_for.rs b/src/pipeline/project_tree/collect_ops/ops_for.rs new file mode 100644 index 0000000..3af906c --- /dev/null +++ b/src/pipeline/project_tree/collect_ops/ops_for.rs @@ -0,0 +1,49 @@ +use std::rc::Rc; + +use hashbrown::HashSet; + +use crate::parse::is_op; +use crate::pipeline::error::ProjectError; +use crate::pipeline::source_loader::LoadedSourceTable; +use crate::interner::{Token, Interner}; +use crate::representations::tree::{Module, ModMember}; +use crate::pipeline::import_abs_path::import_abs_path; + +use super::exported_ops::{ExportedOpsCache, OpsResult}; + +/// Collect all operators and names, exported or local, defined in this +/// tree. +fn tree_all_ops( + module: &Module, + ops: &mut HashSet> +) { + ops.extend(module.items.keys().copied()); + for ent in module.items.values() { + if let ModMember::Sub(m) = &ent.member { + tree_all_ops(m.as_ref(), ops); + } + } +} + +/// Collect all names imported in this file +pub fn collect_ops_for( + file: &[Token], + loaded: &LoadedSourceTable, + ops_cache: &ExportedOpsCache, + i: &Interner +) -> OpsResult { + let tree = &loaded[&i.i(file)].preparsed.0; + let mut ret = HashSet::new(); + tree_all_ops(tree.as_ref(), &mut ret); + tree.visit_all_imports(&mut |modpath, module, import| { + if let Some(n) = import.name { ret.insert(n); } else { + let path = import_abs_path( + &file, modpath, module, &i.r(import.path)[..], i + ).expect("This error should have been caught during loading"); + ret.extend(ops_cache.find(&i.i(&path))?.iter().copied()); + } + Ok::<_, Rc>(()) + })?; + ret.drain_filter(|t| !is_op(i.r(*t))); + Ok(Rc::new(ret)) +} \ No newline at end of file diff --git a/src/pipeline/project_tree/const_tree.rs b/src/pipeline/project_tree/const_tree.rs new file mode 100644 index 0000000..63d9b6e --- /dev/null +++ b/src/pipeline/project_tree/const_tree.rs @@ -0,0 +1,93 @@ +use std::{ops::Add, rc::Rc}; + +use hashbrown::HashMap; + +use crate::representations::tree::{ModEntry, ModMember, Module}; +use crate::representations::Primitive; +use crate::representations::location::Location; +use crate::foreign::ExternFn; +use crate::interner::{Token, Interner}; +use crate::ast::{Expr, Clause}; +use crate::utils::{Substack, pushed}; + +use super::{ProjectModule, ProjectExt, ProjectTree}; + +pub enum ConstTree { + Const(Expr), + Tree(HashMap, ConstTree>) +} +impl ConstTree { + pub fn xfn(xfn: impl ExternFn + 'static) -> Self { + Self::Const(Expr{ + location: Location::Unknown, + value: Clause::P(Primitive::ExternFn(Box::new(xfn))) + }) + } + pub fn tree( + arr: impl IntoIterator, Self)> + ) -> Self { + Self::Tree(arr.into_iter().collect()) + } +} +impl Add for ConstTree { + type Output = ConstTree; + + fn add(self, rhs: ConstTree) -> Self::Output { + if let (Self::Tree(t1), Self::Tree(mut t2)) = (self, rhs) { + let mut product = HashMap::new(); + for (key, i1) in t1 { + if let Some(i2) = t2.remove(&key) { + product.insert(key, i1 + i2); + } else { + product.insert(key, i1); + } + } + product.extend(t2.into_iter()); + Self::Tree(product) + } else { + panic!("cannot combine tree and value fields") + } + } +} + +fn from_const_tree_rec( + path: Substack>, + consts: HashMap, ConstTree>, + file: &[Token], + i: &Interner, +) -> ProjectModule { + let mut items = HashMap::new(); + let path_v = path.iter().rev_vec_clone(); + for (name, item) in consts { + items.insert(name, ModEntry{ + exported: true, + member: match item { + ConstTree::Const(c) => ModMember::Item(c), + ConstTree::Tree(t) => ModMember::Sub(Rc::new( + from_const_tree_rec(path.push(name), t, file, i) + )), + } + }); + } + let exports = items.keys() + .map(|name| (*name, i.i(&pushed(&path_v, *name)))) + .collect(); + Module { + items, + imports: vec![], + extra: ProjectExt { + exports, + file: Some(file.to_vec()), + ..Default::default() + } + } +} + +pub fn from_const_tree( + consts: HashMap, ConstTree>, + file: &[Token], + i: &Interner, +) -> ProjectTree { + let module = from_const_tree_rec(Substack::Bottom, consts, file, i); + ProjectTree(Rc::new(module)) +} \ No newline at end of file diff --git a/src/pipeline/project_tree/mod.rs b/src/pipeline/project_tree/mod.rs new file mode 100644 index 0000000..2e83906 --- /dev/null +++ b/src/pipeline/project_tree/mod.rs @@ -0,0 +1,38 @@ +/* FILE SEPARATION BOUNDARY + +Collect all operators accessible in each file, parse the files with +correct tokenization, resolve glob imports, convert expressions to +refer to tokens with (local) absolute path, and connect them into a +single tree. + +The module checks for imports from missing modules (including submodules). +All other errors must be checked later. + +Injection strategy: +Return all items of the given module in the injected tree for `injected` +The output of this stage is a tree, which can simply be overlaid with +the injected tree +*/ + +mod collect_ops; +mod parse_file; +mod build_tree; +mod normalize_imports; +mod prefix; +mod tree; +mod const_tree; +mod add_prelude; + +pub use collect_ops::InjectedOperatorsFn; + +pub use const_tree::{ + ConstTree, from_const_tree, +}; + +pub use tree::{ + ProjectExt, ProjectModule, ProjectTree, collect_consts, collect_rules +}; + +pub use build_tree::{ + build_tree, split_path +}; \ No newline at end of file diff --git a/src/pipeline/project_tree/normalize_imports.rs b/src/pipeline/project_tree/normalize_imports.rs new file mode 100644 index 0000000..12d6a0c --- /dev/null +++ b/src/pipeline/project_tree/normalize_imports.rs @@ -0,0 +1,84 @@ +use crate::representations::tree::{Module, ModMember}; +use crate::representations::sourcefile::{Member, FileEntry, Import}; +use crate::utils::BoxedIter; +use crate::utils::{Substack, iter::box_once}; +use crate::interner::{Interner, Token}; +use crate::pipeline::import_abs_path::import_abs_path; + +use super::collect_ops::ExportedOpsCache; + +fn member_rec( + // level + mod_stack: Substack>, + preparsed: &Module, + // object + member: Member, + // context + path: &[Token], + ops_cache: &ExportedOpsCache, + i: &Interner +) -> Member { + match member { + Member::Namespace(name, body) => { + let prepmember = &preparsed.items[&name].member; + let subprep = if let ModMember::Sub(m) = prepmember {m.clone()} + else {unreachable!("This name must point to a namespace")}; + let new_body = entv_rec( + mod_stack.push(name), + subprep.as_ref(), + body, + path, ops_cache, i + ); + Member::Namespace(name, new_body) + }, + any => any + } +} + +fn entv_rec( + // level + mod_stack: Substack>, + preparsed: &Module, + // object + data: Vec, + // context + mod_path: &[Token], + ops_cache: &ExportedOpsCache, + i: &Interner +) -> Vec { + data.into_iter() + .map(|ent| match ent { + FileEntry::Import(imps) => FileEntry::Import(imps.into_iter() + .flat_map(|import| if let Import{ name: None, path } = import { + let p = import_abs_path( + mod_path, mod_stack, preparsed, &i.r(path)[..], i + ).expect("Should have emerged in preparsing"); + let names = ops_cache.find(&i.i(&p)) + .expect("Should have emerged in second parsing"); + let imports = names.iter() + .map(move |&n| Import{ name: Some(n), path }) + .collect::>(); + Box::new(imports.into_iter()) as BoxedIter + } else {box_once(import)}) + .collect() + ), + FileEntry::Exported(mem) => FileEntry::Exported(member_rec( + mod_stack, preparsed, mem, mod_path, ops_cache, i + )), + FileEntry::Internal(mem) => FileEntry::Internal(member_rec( + mod_stack, preparsed, mem, mod_path, ops_cache, i + )), + any => any + }) + .collect() +} + +pub fn normalize_imports( + preparsed: &Module, + data: Vec, + path: &[Token], + ops_cache: &ExportedOpsCache, + i: &Interner +) -> Vec { + entv_rec(Substack::Bottom, preparsed, data, path, ops_cache, i) +} \ No newline at end of file diff --git a/src/pipeline/project_tree/parse_file.rs b/src/pipeline/project_tree/parse_file.rs new file mode 100644 index 0000000..d9b2d60 --- /dev/null +++ b/src/pipeline/project_tree/parse_file.rs @@ -0,0 +1,44 @@ +use std::rc::Rc; + +use crate::parse; +use crate::pipeline::error::ProjectError; +use crate::representations::sourcefile::{FileEntry, normalize_namespaces}; +use crate::pipeline::source_loader::LoadedSourceTable; +use crate::interner::{Token, Interner}; + +use super::add_prelude::add_prelude; +use super::collect_ops::{ExportedOpsCache, collect_ops_for}; +use super::normalize_imports::normalize_imports; +use super::prefix::prefix; + +pub fn parse_file( + path: Token>>, + loaded: &LoadedSourceTable, + ops_cache: &ExportedOpsCache, + i: &Interner, + prelude: &[FileEntry], +) -> Result, Rc> { + let ld = &loaded[&path]; + // let ops_cache = collect_ops::mk_cache(loaded, i); + let ops = collect_ops_for(&i.r(path)[..], loaded, ops_cache, i)?; + let ops_vec = ops.iter() + .map(|t| i.r(*t)) + .cloned() + .collect::>(); + let ctx = parse::ParsingContext{ + interner: i, + ops: &ops_vec, + file: Rc::new(i.extern_vec(path)) + }; + let entries = parse::parse(ld.text.as_str(), ctx) + .expect("This error should have been caught during loading"); + let with_prelude = add_prelude(entries, &i.r(path)[..], prelude); + let impnormalized = normalize_imports( + &ld.preparsed.0, with_prelude, &i.r(path)[..], ops_cache, i + ); + let nsnormalized = normalize_namespaces( + Box::new(impnormalized.into_iter()), i + ).expect("This error should have been caught during preparsing"); + let prefixed = prefix(nsnormalized, &i.r(path)[..], ops_cache, i); + Ok(prefixed) +} \ No newline at end of file diff --git a/src/pipeline/project_tree/prefix.rs b/src/pipeline/project_tree/prefix.rs new file mode 100644 index 0000000..fb9705a --- /dev/null +++ b/src/pipeline/project_tree/prefix.rs @@ -0,0 +1,82 @@ +use std::rc::Rc; + +use crate::ast::{Constant, Rule}; +use crate::interner::{Token, Interner}; +use crate::utils::Substack; +use crate::representations::sourcefile::{Member, FileEntry}; + +use super::collect_ops::ExportedOpsCache; + +fn member_rec( + // level + mod_stack: Substack>, + // object + data: Member, + // context + path: &[Token], + ops_cache: &ExportedOpsCache, + i: &Interner +) -> Member { + // let except = |op| imported.contains(&op); + let except = |_| false; + let prefix_v = path.iter().copied() + .chain(mod_stack.iter().rev_vec_clone().into_iter()) + .collect::>(); + let prefix = i.i(&prefix_v); + match data { + Member::Namespace(name, body) => { + let new_body = entv_rec( + mod_stack.push(name), + body, + path, ops_cache, i + ); + Member::Namespace(name, new_body) + } + Member::Constant(constant) => Member::Constant(Constant{ + name: constant.name, + value: constant.value.prefix(prefix, i, &except) + }), + Member::Rule(rule) => Member::Rule(Rule{ + prio: rule.prio, + source: Rc::new(rule.source.iter() + .map(|e| e.prefix(prefix, i, &except)) + .collect() + ), + target: Rc::new(rule.target.iter() + .map(|e| e.prefix(prefix, i, &except)) + .collect() + ), + }) + } +} + +fn entv_rec( + // level + mod_stack: Substack>, + // object + data: Vec, + // context + path: &[Token], + ops_cache: &ExportedOpsCache, + i: &Interner +) -> Vec { + data.into_iter().map(|fe| match fe { + FileEntry::Exported(mem) => FileEntry::Exported(member_rec( + mod_stack, mem, path, ops_cache, i + )), + FileEntry::Internal(mem) => FileEntry::Internal(member_rec( + mod_stack, mem, path, ops_cache, i + )), + // XXX should [FileEntry::Export] be prefixed? + any => any + }).collect() +} + +pub fn prefix( + data: Vec, + path: &[Token], + ops_cache: &ExportedOpsCache, + i: &Interner +) -> Vec { + entv_rec(Substack::Bottom, data, path, ops_cache, i) +} \ No newline at end of file diff --git a/src/pipeline/project_tree/tree.rs b/src/pipeline/project_tree/tree.rs new file mode 100644 index 0000000..53774fa --- /dev/null +++ b/src/pipeline/project_tree/tree.rs @@ -0,0 +1,87 @@ +use std::{ops::Add, rc::Rc}; + +use hashbrown::HashMap; + +use crate::representations::tree::{Module, ModMember}; +use crate::ast::{Rule, Expr}; +use crate::interner::{Token, Interner}; +use crate::utils::Substack; + +#[derive(Clone, Debug, Default)] +pub struct ProjectExt{ + /// Pairs each foreign token to the module it was imported from + pub imports_from: HashMap, Token>>>, + /// Pairs each exported token to its original full name. + pub exports: HashMap, Token>>>, + /// All rules defined in this module, exported or not + pub rules: Vec, + /// Filename, if known, for error reporting + pub file: Option>> +} + +impl Add for ProjectExt { + type Output = Self; + + fn add(mut self, rhs: Self) -> Self::Output { + let ProjectExt{ imports_from, exports, rules, file } = rhs; + self.imports_from.extend(imports_from.into_iter()); + self.exports.extend(exports.into_iter()); + self.rules.extend(rules.into_iter()); + if file.is_some() { self.file = file } + self + } +} + +pub type ProjectModule = Module; +pub struct ProjectTree(pub Rc); + +fn collect_rules_rec(bag: &mut Vec, module: &ProjectModule) { + bag.extend(module.extra.rules.iter().cloned()); + for item in module.items.values() { + if let ModMember::Sub(module) = &item.member { + collect_rules_rec(bag, module.as_ref()); + } + } +} + +pub fn collect_rules(project: &ProjectTree) -> Vec { + let mut rules = Vec::new(); + collect_rules_rec(&mut rules, project.0.as_ref()); + rules +} + +fn collect_consts_rec( + path: Substack>, + bag: &mut HashMap>>, Expr>, + module: &ProjectModule, + i: &Interner +) { + for (key, entry) in module.items.iter() { + match &entry.member { + ModMember::Item(expr) => { + let mut name = path.iter().rev_vec_clone(); + name.push(*key); + bag.insert(i.i(&name), expr.clone()); + } + ModMember::Sub(module) => { + collect_consts_rec( + path.push(*key), + bag, module, i + ) + } + } + } +} + +pub fn collect_consts(project: &ProjectTree, i: &Interner) +-> HashMap>>, Expr> +{ + let mut consts = HashMap::new(); + collect_consts_rec( + Substack::Bottom, + &mut consts, + project.0.as_ref(), + i + ); + consts +} \ No newline at end of file diff --git a/src/pipeline/source_loader/load_source.rs b/src/pipeline/source_loader/load_source.rs new file mode 100644 index 0000000..88957db --- /dev/null +++ b/src/pipeline/source_loader/load_source.rs @@ -0,0 +1,82 @@ +use std::iter; +use std::rc::Rc; + +use crate::pipeline::error::ProjectError; +use crate::pipeline::import_abs_path::import_abs_path; +use crate::pipeline::split_name::split_name; +use crate::interner::{Token, Interner}; + +use crate::pipeline::file_loader::{Loaded, load_text, IOResult}; +use super::loaded_source::{LoadedSourceTable, LoadedSource}; +use super::preparse::preparse; + +/// Load the source at the given path or all within if it's a collection, +/// and all sources imported from these. +fn load_abs_path_rec( + abs_path: Token>>, + table: &mut LoadedSourceTable, + i: &Interner, + get_source: &impl Fn(Token>>) -> IOResult, + is_injected: &impl Fn(&[Token]) -> bool +) -> Result<(), Rc> { + let abs_pathv = i.r(abs_path); + // short-circuit if this import is defined externally or already known + if is_injected(&abs_pathv) | table.contains_key(&abs_path) { + return Ok(()) + } + // try splitting the path to file, swallowing any IO errors + let is_file = |p| (get_source)(p).map(|l| l.is_code()).unwrap_or(false); + let name_split = split_name(&abs_pathv, &|p| is_file(i.i(p))); + let filename = if let Some((f, _)) = name_split {f} else { + // If the path could not be split to file, load it as directory + let coll = if let Loaded::Collection(c) = (get_source)(abs_path)? {c} + // ^^ raise any IO error that was previously swallowed + else {panic!("split_name returned None but the path is a file")}; + // recurse on all files and folders within + for item in coll.iter() { + let abs_subpath = abs_pathv.iter() + .copied() + .chain(iter::once(i.i(item))) + .collect::>(); + load_abs_path_rec( + i.i(&abs_subpath), table, i, get_source, is_injected + )? + } + return Ok(()); + }; + // otherwise load, preparse and record this file + let text = load_text(i.i(filename), &get_source, i)?; + let preparsed = preparse( + filename.iter().map(|t| i.r(*t)).cloned().collect(), + text.as_str(), i + )?; + table.insert(abs_path, LoadedSource{ text, preparsed: preparsed.clone() }); + // recurse on all imported modules + preparsed.0.visit_all_imports(&mut |modpath, module, import| { + let abs_pathv = import_abs_path( + &filename, modpath, + module, &import.nonglob_path(i), i + )?; + // recurse on imported module + load_abs_path_rec(i.i(&abs_pathv), table, i, get_source, is_injected) + }) +} + +/// Load and preparse all files reachable from the load targets via +/// imports that aren't injected. +pub fn load_source( + targets: &[Token>>], + i: &Interner, + get_source: &impl Fn(Token>>) -> IOResult, + is_injected: &impl Fn(&[Token]) -> bool, +) -> Result> { + let mut table = LoadedSourceTable::new(); + for target in targets { + load_abs_path_rec( + *target, + &mut table, + i, get_source, is_injected + )? + } + Ok(table) +} \ No newline at end of file diff --git a/src/pipeline/source_loader/loaded_source.rs b/src/pipeline/source_loader/loaded_source.rs new file mode 100644 index 0000000..e33a1b0 --- /dev/null +++ b/src/pipeline/source_loader/loaded_source.rs @@ -0,0 +1,13 @@ +use std::{rc::Rc, collections::HashMap}; + +use crate::interner::Token; + +use super::preparse::Preparsed; + +#[derive(Debug)] +pub struct LoadedSource { + pub text: Rc, + pub preparsed: Preparsed, +} + +pub type LoadedSourceTable = HashMap>>, LoadedSource>; \ No newline at end of file diff --git a/src/pipeline/source_loader/mod.rs b/src/pipeline/source_loader/mod.rs new file mode 100644 index 0000000..db32c49 --- /dev/null +++ b/src/pipeline/source_loader/mod.rs @@ -0,0 +1,25 @@ +/* PULL LOGISTICS BOUNDARY + +Specifying exactly what this module should be doing was an unexpectedly +hard challenge. It is intended to encapsulate all pull logistics, but +this definition is apparently prone to scope creep. + +Load files, preparse them to obtain a list of imports, follow these. +Preparsing also returns the module tree and list of exported synbols +for free, which is needed later so the output of preparsing is also +attached to the module output. + +The module checks for IO errors, syntax errors, malformed imports and +imports from missing files. All other errors must be checked later. + +Injection strategy: +see whether names are valid in the injected tree for is_injected +*/ + +mod load_source; +mod loaded_source; +mod preparse; + +pub use loaded_source::{LoadedSource, LoadedSourceTable}; +pub use load_source::load_source; +pub use preparse::Preparsed; \ No newline at end of file diff --git a/src/pipeline/source_loader/preparse.rs b/src/pipeline/source_loader/preparse.rs new file mode 100644 index 0000000..22e40d8 --- /dev/null +++ b/src/pipeline/source_loader/preparse.rs @@ -0,0 +1,102 @@ +use hashbrown::HashMap; +use std::hash::Hash; +use std::rc::Rc; + +use crate::ast::Constant; +use crate::pipeline::error::{ProjectError, ParseErrorWithPath, VisibilityMismatch}; +use crate::representations::sourcefile::{normalize_namespaces, Member}; +use crate::representations::tree::{ModEntry, ModMember}; +use crate::interner::Interner; +use crate::parse::{self, ParsingContext}; +use crate::representations::{sourcefile::{FileEntry, imports}, tree::Module}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Preparsed(pub Rc>); + +/// Add an internal flat name if it does not exist yet +fn add_intern( + map: &mut HashMap>, k: K +) { + let _ = map.try_insert(k, ModEntry { + exported: false, + member: ModMember::Item(()), + }); +} + +/// Add an exported flat name or export any existing entry +fn add_export( + map: &mut HashMap>, k: K +) { + if let Some(entry) = map.get_mut(&k) { + entry.exported = true + } else { + map.insert(k, ModEntry { + exported: true, + member: ModMember::Item(()), + }); + } +} + +/// Convert source lines into a module +fn to_module(src: &[FileEntry], i: &Interner) -> Rc> +{ + let imports = imports(src.iter()).cloned().collect::>(); + let mut items = src.iter().filter_map(|ent| match ent { + FileEntry::Internal(Member::Namespace(name, data)) => { + let member = ModMember::Sub(to_module(data, i)); + let entry = ModEntry{ exported: false, member }; + Some((*name, entry)) + } + FileEntry::Exported(Member::Namespace(name, data)) => { + let member = ModMember::Sub(to_module(data, i)); + let entry = ModEntry{ exported: true, member }; + Some((*name, entry)) + } + _ => None + }).collect::>(); + for file_entry in src { match file_entry { + FileEntry::Comment(_) | FileEntry::Import(_) + | FileEntry::Internal(Member::Namespace(..)) + | FileEntry::Exported(Member::Namespace(..)) => (), + FileEntry::Export(tokv) => for tok in tokv { + add_export(&mut items, *tok) + } + FileEntry::Internal(Member::Constant(Constant{ name, .. })) + => add_intern(&mut items, *name), + FileEntry::Exported(Member::Constant(Constant{ name, .. })) + => add_export(&mut items, *name), + FileEntry::Internal(Member::Rule(rule)) => { + let names = rule.collect_single_names(i); + for name in names { + add_intern(&mut items, name) + } + } + FileEntry::Exported(Member::Rule(rule)) => { + let names = rule.collect_single_names(i); + for name in names { + add_export(&mut items, name) + } + } + }} + Rc::new(Module { imports, items, extra: () }) +} + +/// Preparse the module. At this stage, only the imports and +/// names defined by the module can be parsed +pub fn preparse(file: Vec, source: &str, i: &Interner) +-> Result> { + // Parse with no operators + let ctx = ParsingContext::<&str>::new(&[], i, Rc::new(file.clone())); + let entries = parse::parse(source, ctx) + .map_err(|error| ParseErrorWithPath{ + full_source: source.to_string(), + error, + path: file.clone() + }.rc())?; + let normalized = normalize_namespaces(Box::new(entries.into_iter()), i) + .map_err(|ns| VisibilityMismatch{ + namespace: ns.into_iter().map(|t| i.r(t)).cloned().collect(), + file: Rc::new(file.clone()) + }.rc())?; + Ok(Preparsed(to_module(&normalized, i))) +} \ No newline at end of file diff --git a/src/pipeline/split_name.rs b/src/pipeline/split_name.rs new file mode 100644 index 0000000..acf786a --- /dev/null +++ b/src/pipeline/split_name.rs @@ -0,0 +1,14 @@ +use crate::interner::Token; + +pub fn split_name<'a>( + path: &'a [Token], + is_valid: &impl Fn(&[Token]) -> bool +) -> Option<(&'a [Token], &'a [Token])> { + for split in (0..=path.len()).rev() { + let (filename, subpath) = path.split_at(split); + if is_valid(filename) { + return Some((filename, subpath)) + } + } + None +} \ No newline at end of file diff --git a/src/project/loading/ext_loader.rs b/src/project/loading/ext_loader.rs deleted file mode 100644 index 06bbbbe..0000000 --- a/src/project/loading/ext_loader.rs +++ /dev/null @@ -1,33 +0,0 @@ -use lasso::Spur; - -use crate::representations::sourcefile::FileEntry; - -use super::{Loader, Loaded, LoadingError}; - -pub fn ext_loader<'a, T, F>( - data: Vec, - mut submods: Vec<(&'static str, T)>, - intern: &'a F -) -> impl Loader + 'a -where - T: Loader + 'a, - F: Fn(&str) -> Spur { - move |path: &[&str]| { - let (step, rest) = match path.split_first() { - None => return Ok(Loaded::AST( - data.iter().cloned().chain( - submods.iter().map(|(s, _)| FileEntry::LazyModule(intern(s))) - ).collect() - )), - Some(t) => t - }; - if let Some((_, l)) = submods.iter_mut().find(|(s, l)| s == step) { - l.load(rest) - } else { - let errtyp = if rest.is_empty() { - LoadingError::UnknownNode - } else {LoadingError::Missing}; - Err(errtyp(step.to_string())) - } - } -} \ No newline at end of file diff --git a/src/project/loading/extlib_loader.rs b/src/project/loading/extlib_loader.rs deleted file mode 100644 index 453ab7c..0000000 --- a/src/project/loading/extlib_loader.rs +++ /dev/null @@ -1,34 +0,0 @@ -use std::rc::Rc; - -use lasso::Spur; -use ordered_float::NotNan; - -use crate::representations::Primitive; -use crate::representations::sourcefile::FileEntry; -use crate::foreign::ExternFn; -use crate::ast::{Rule, Clause}; - -use super::{Loader, ext_loader}; - -pub fn extlib_loader<'a, T, F>( - fns: Vec<(&'static str, Box)>, - submods: Vec<(&'static str, T)>, - intern: &'a F -) -> impl Loader + 'a -where - T: Loader + 'a, - F: Fn(&str) -> Spur + 'a -{ - let entries = ( - fns.into_iter().map(|(name, xfn)| FileEntry::Rule(Rule { - source: Rc::new(vec![ - Clause::Name(Rc::new(vec![intern(name)])).into_expr(), - ]), - prio: NotNan::try_from(0.0f64).unwrap(), - target: Rc::new(vec![ - Clause::P(Primitive::ExternFn(xfn)).into_expr(), - ]) - }, true)) - ).collect(); - ext_loader(entries, submods, intern) -} \ No newline at end of file diff --git a/src/project/loading/file_loader.rs b/src/project/loading/file_loader.rs deleted file mode 100644 index ff5be5c..0000000 --- a/src/project/loading/file_loader.rs +++ /dev/null @@ -1,46 +0,0 @@ -use std::fs::read_to_string; -use std::path::PathBuf; - -use lasso::Spur; - -use crate::representations::sourcefile::FileEntry; - -use super::{Loaded, Loader, LoadingError}; - -pub fn file_loader<'a, F>( - proj: PathBuf, - intern: &'a F -) -> impl Loader + 'a -where F: Fn(&str) -> Spur + 'a { - move |path: &[&str]| { - let dirpath = proj.join(path.join("/")); - if dirpath.is_dir() || dirpath.is_symlink() { - return Ok(Loaded::AST( - dirpath.read_dir()? - .filter_map(|entr| { - let ent = entr.ok()?; - let typ = ent.file_type().ok()?; - let path = ent.path(); - if typ.is_dir() || typ.is_symlink() { - let name = ent.file_name(); - let spur = intern(name.to_string_lossy().as_ref()); - Some(FileEntry::LazyModule(spur)) - } else if typ.is_file() && path.extension()? == "orc" { - let name = path.file_stem().expect("extension tested above"); - let spur = intern(name.to_string_lossy().as_ref()); - Some(FileEntry::LazyModule(spur)) - } else { None } - }) - .collect() - )) - } - let orcfile = dirpath.with_extension("orc"); - if orcfile.is_file() { - read_to_string(orcfile).map(Loaded::Source).map_err(LoadingError::from) - } else { - let pathstr = dirpath.to_string_lossy().into_owned(); - Err(if dirpath.exists() { LoadingError::UnknownNode(pathstr) } - else { LoadingError::Missing(pathstr) }) - } - } -} diff --git a/src/project/loading/map_loader.rs b/src/project/loading/map_loader.rs deleted file mode 100644 index 908a577..0000000 --- a/src/project/loading/map_loader.rs +++ /dev/null @@ -1,16 +0,0 @@ -use std::collections::HashMap; - -use super::{Loader, LoadingError, Loaded}; - -pub fn map_loader<'a, T: Loader + 'a>(mut map: HashMap<&'a str, T>) -> impl Loader + 'a { - move |path: &[&str]| { - let (key, subpath) = if let Some(sf) = path.split_first() {sf} - else {return Ok(Loaded::Source(map.keys().cloned().collect()))}; - let sub = if let Some(sub) = map.get_mut(key.to_string().as_str()) {sub} - else {return Err( - if subpath.len() == 0 {LoadingError::UnknownNode(path.join("::"))} - else {LoadingError::Missing(path.join("::"))} - )}; - sub.load(subpath) - } -} \ No newline at end of file diff --git a/src/project/loading/mod.rs b/src/project/loading/mod.rs deleted file mode 100644 index bd69f13..0000000 --- a/src/project/loading/mod.rs +++ /dev/null @@ -1,58 +0,0 @@ -mod file_loader; -mod ext_loader; -mod string_loader; -mod map_loader; -mod extlib_loader; -mod prefix_loader; - -pub use file_loader::file_loader; -pub use ext_loader::ext_loader; -pub use extlib_loader::extlib_loader; -pub use string_loader::string_loader; -pub use map_loader::map_loader; -pub use prefix_loader::prefix_loader; - -use std::{rc::Rc, io}; - -use crate::representations::sourcefile::FileEntry; - -#[derive(Clone, Debug)] -pub enum LoadingError { - /// An IO operation has failed (i.e. no read permission) - IOErr(Rc), - /// The leaf does not exist - UnknownNode(String), - /// The leaf and at least the immediately containing namespace don't exist - Missing(String) -} - -impl From for LoadingError { - fn from(inner: io::Error) -> Self { - LoadingError::IOErr(Rc::new(inner)) - } -} - -#[derive(Clone)] -pub enum Loaded { - Source(String), - AST(Vec) -} - -pub trait Loader { - fn load<'s, 'a>(&'s mut self, path: &'a [&'a str]) -> Result; - fn boxed<'a>(self) -> Box where Self: 'a + Sized { - Box::new(self) - } -} - -impl Loader for T where T: for<'a> FnMut(&'a [&'a str]) -> Result { - fn load(&mut self, path: &[&str]) -> Result { - (self)(path) - } -} - -impl Loader for Box { - fn load<'s, 'a>(&'s mut self, path: &'a [&'a str]) -> Result { - self.as_mut().load(path) - } -} \ No newline at end of file diff --git a/src/project/loading/prefix_loader.rs b/src/project/loading/prefix_loader.rs deleted file mode 100644 index 45ec4a7..0000000 --- a/src/project/loading/prefix_loader.rs +++ /dev/null @@ -1,10 +0,0 @@ -use super::Loader; - -pub fn prefix_loader<'a>( - prefix: &'a [&'a str], mut loader: impl Loader + 'a -) -> impl Loader + 'a { - move |path: &[&str]| { - let full_path = prefix.iter().chain(path.iter()).map(|s| s.to_string()).clone(); - loader.load(path) - } -} \ No newline at end of file diff --git a/src/project/loading/string_loader.rs b/src/project/loading/string_loader.rs deleted file mode 100644 index ad7e8c2..0000000 --- a/src/project/loading/string_loader.rs +++ /dev/null @@ -1,5 +0,0 @@ -use super::{Loader, Loaded}; - -pub fn string_loader<'a>(data: &'a str) -> impl Loader + 'a { - move |_: &[&str]| Ok(Loaded::Source(data.to_string())) -} \ No newline at end of file diff --git a/src/project/mod.rs b/src/project/mod.rs deleted file mode 100644 index 96eeb89..0000000 --- a/src/project/mod.rs +++ /dev/null @@ -1,14 +0,0 @@ -mod rule_collector; -mod loading; -mod prefix; -mod name_resolver; -mod module_error; - -pub use module_error::ModuleError; -pub use rule_collector::rule_collector; -pub use loading::{ - Loader, Loaded, LoadingError, - ext_loader, file_loader, string_loader, map_loader, extlib_loader, - prefix_loader -}; -use crate::ast::Rule; \ No newline at end of file diff --git a/src/project/module_error.rs b/src/project/module_error.rs deleted file mode 100644 index fae1eb6..0000000 --- a/src/project/module_error.rs +++ /dev/null @@ -1,31 +0,0 @@ -use thiserror::Error; - -use crate::parse::ParseError; - -use super::name_resolver::ResolutionError; - -#[derive(Error, Debug, Clone)] -pub enum ModuleError where ELoad: Clone { - #[error("Resolution cycle")] - ResolutionCycle, - #[error("File not found: {0}")] - Load(ELoad), - #[error("Failed to parse: {0:?}")] - Syntax(ParseError), - #[error("Not a module")] - None -} - -impl From for ModuleError where T: Clone { - fn from(pars: ParseError) -> Self { Self::Syntax(pars) } -} - -impl From>> for ModuleError where T: Clone { - fn from(res: ResolutionError>) -> Self { - match res { - ResolutionError::Cycle(_) => ModuleError::ResolutionCycle, - ResolutionError::NoModule(_) => ModuleError::None, - ResolutionError::Delegate(d) => d - } - } -} \ No newline at end of file diff --git a/src/project/name_resolver.rs b/src/project/name_resolver.rs deleted file mode 100644 index 0b4a458..0000000 --- a/src/project/name_resolver.rs +++ /dev/null @@ -1,137 +0,0 @@ -use std::collections::HashMap; -use std::rc::Rc; -use itertools::Itertools; -use lasso::Spur; -use thiserror::Error; - -use crate::utils::Stackframe; - -use crate::ast::{Expr, Clause}; - -type ImportMap = HashMap>>; - -#[derive(Debug, Clone, Error)] -pub enum ResolutionError { - #[error("Reference cycle at {0:?}")] - Cycle(Vec>>), - #[error("No module provides {0:?}")] - NoModule(Rc>), - #[error(transparent)] - Delegate(#[from] Err) -} - -type ResolutionResult = Result>, ResolutionError>; - -/// Recursively resolves symbols to their original names in expressions -/// while caching every resolution. This makes the resolution process -/// lightning fast and invalidation completely impossible since -/// the intermediate steps of a resolution aren't stored. -pub struct NameResolver { - cache: HashMap>, ResolutionResult>, - split: FSplit, - get_imports: FImps -} - -impl NameResolver -where - FSplit: FnMut(Rc>) -> Option<(Rc>, Rc>)>, - FImps: FnMut(Rc>) -> Result, - E: Clone -{ - pub fn new(split: FSplit, get_imports: FImps) -> Self { - Self { - cache: HashMap::new(), - split, - get_imports - } - } - - fn split(&self, symbol: Rc>) - -> Result<(Rc>, Rc>), ResolutionError> { - let (path, name) = (self.split)(symbol.clone()) - .ok_or_else(|| ResolutionError::NoModule(symbol.clone()))?; - if name.is_empty() { - panic!("get_modname matched all to module and nothing to name") - } - Ok((path, name)) - } - - /// Obtains a symbol's originnal name - /// Uses a substack to detect loops - fn find_origin_rec( - &mut self, - symbol: Rc>, - import_path: Stackframe>> - ) -> Result>, ResolutionError> { - if let Some(cached) = self.cache.get(&symbol) { - return cached.clone() - } - // The imports and path of the referenced file and the local name - let (path, name) = self.split(symbol)?; - let imports = (self.get_imports)(path.clone())?; - let result = if let Some(source) = imports.get(&name[0]) { - let new_sym = source.iter().chain(name.iter()).cloned().collect_vec(); - if import_path.iter().any(|el| el.as_ref() == new_sym.as_slice()) { - Err(ResolutionError::Cycle(import_path.iter().cloned().collect())) - } else { - self.find_origin_rec(Rc::new(new_sym), import_path.push(symbol.clone())) - } - } else { - Ok(symbol.clone()) // If not imported, it must be locally defined - }; - self.cache.insert(symbol, result.clone()); - result - } - - fn process_exprv_rec(&mut self, exv: &[Expr]) -> Result, ResolutionError> { - exv.iter().map(|ex| self.process_expression_rec(ex)).collect() - } - - fn process_exprmrcopt_rec(&mut self, - exbo: &Option> - ) -> Result>, ResolutionError> { - exbo.iter().map(|exb| Ok(Rc::new(self.process_expression_rec(exb)?))) - .next().transpose() - } - - fn process_clause_rec(&mut self, tok: &Clause) -> Result> { - Ok(match tok { - Clause::S(c, exv) => Clause::S(*c, Rc::new( - exv.iter().map(|e| self.process_expression_rec(e)) - .collect::>()? - )), - Clause::Lambda(name, typ, body) => Clause::Lambda(name.clone(), - Rc::new(self.process_exprv_rec(&typ)?), - Rc::new(self.process_exprv_rec(&body)?) - ), - Clause::Auto(name, typ, body) => Clause::Auto(name.clone(), - Rc::new(self.process_exprv_rec(&typ)?), - Rc::new(self.process_exprv_rec(&body)?) - ), - Clause::Name(name) => Clause::Name(self.find_origin(name.clone())?), - x => x.clone() - }) - } - - fn process_expression_rec(&mut self, Expr(token, typ): &Expr) -> Result> { - Ok(Expr( - self.process_clause_rec(token)?, - Rc::new(typ.iter().map(|t| { - self.process_clause_rec(t) - }).collect::>()?) - )) - } - - pub fn find_origin(&mut self, symbol: Rc>) -> Result>, ResolutionError> { - self.find_origin_rec(symbol.clone(), Stackframe::new(symbol)) - } - - #[allow(dead_code)] - pub fn process_clause(&mut self, clause: &Clause) -> Result> { - self.process_clause_rec(clause) - } - - pub fn process_expression(&mut self, ex: &Expr) -> Result> { - self.process_expression_rec(ex) - } -} diff --git a/src/project/prefix.rs b/src/project/prefix.rs deleted file mode 100644 index a53c149..0000000 --- a/src/project/prefix.rs +++ /dev/null @@ -1,42 +0,0 @@ -use std::rc::Rc; - -use lasso::Spur; - -use crate::ast::{Expr, Clause}; - -/// Replaces the first element of a name with the matching prefix from a prefix map - -/// Produce a Token object for any value of Expr other than Typed. -/// Called by [#prefix] which handles Typed. -fn prefix_clause( - expr: &Clause, - namespace: &[Spur] -) -> Clause { - match expr { - Clause::S(c, v) => Clause::S(*c, Rc::new(v.iter().map(|e| { - prefix_expr(e, namespace) - }).collect())), - Clause::Auto(name, typ, body) => Clause::Auto( - name.clone(), - Rc::new(typ.iter().map(|e| prefix_expr(e, namespace)).collect()), - Rc::new(body.iter().map(|e| prefix_expr(e, namespace)).collect()), - ), - Clause::Lambda(name, typ, body) => Clause::Lambda( - name.clone(), - Rc::new(typ.iter().map(|e| prefix_expr(e, namespace)).collect()), - Rc::new(body.iter().map(|e| prefix_expr(e, namespace)).collect()), - ), - Clause::Name(name) => Clause::Name( - Rc::new(namespace.iter().chain(name.iter()).cloned().collect()) - ), - x => x.clone() - } -} - -/// Produce an Expr object for any value of Expr -pub fn prefix_expr(Expr(clause, typ): &Expr, namespace: &[Spur]) -> Expr { - Expr( - prefix_clause(clause, namespace), - Rc::new(typ.iter().map(|e| prefix_clause(e, namespace)).collect()) - ) -} diff --git a/src/project/rule_collector.rs b/src/project/rule_collector.rs deleted file mode 100644 index e0afbb0..0000000 --- a/src/project/rule_collector.rs +++ /dev/null @@ -1,232 +0,0 @@ -use std::cell::RefCell; -use std::collections::{HashMap, HashSet, VecDeque}; -use std::rc::Rc; - -use itertools::Itertools; -use lasso::Spur; - -use crate::ast::Rule; -use crate::parse; -use crate::representations::sourcefile::{FileEntry, exported_names, imports}; -use crate::utils::Cache; - -use super::name_resolver::NameResolver; -use super::module_error::ModuleError; -use super::prefix::prefix_expr; -use super::loading::{Loaded, Loader, LoadingError}; - -type ParseResult = Result>; - -#[derive(Clone)] -pub struct Module { - pub rules: Vec, - pub exports: Vec, - pub references: HashSet>> -} - -pub type RuleCollectionResult = Result, ModuleError>; - -pub fn rule_collector<'a, F: 'a, G: 'a, H: 'a>( - intern: &'a G, deintern: &'a H, - load_mod: F -) -> Cache<'static, Rc>, RuleCollectionResult> -where F: Loader, G: Fn(&str) -> Spur, H: Fn(Spur) -> &'a str -{ - let load_mod_rc = RefCell::new(load_mod); - // Map paths to a namespace with name list (folder) or module with source text (file) - let loaded = Cache::rc(move |path: Rc>, _| -> ParseResult> { - let load_mod = load_mod_rc.borrow_mut(); - let spath = path.iter().cloned().map(deintern).collect_vec(); - load_mod.load(&spath).map(Rc::new).map_err(ModuleError::Load) - }); - // Map names to the longest prefix that points to a valid module - // At least one segment must be in the prefix, and the prefix must not be the whole name - let modname = Cache::rc({ - let loaded = loaded.clone(); - move |symbol: Rc>, _| -> Result>, Rc>>> { - let mut errv: Vec> = Vec::new(); - let reg_err = |e, errv: &mut Vec>| { - errv.push(e); - if symbol.len() == errv.len() { Err(Rc::new(errv.clone())) } - else { Ok(()) } - }; - loop { - // TODO: this should not live on the heap - let path = Rc::new(symbol.iter() - .take(symbol.len() - errv.len() - 1) - .cloned() - .collect_vec()); - match loaded.find(&path).as_ref() { - Ok(imports) => match imports.as_ref() { - Loaded::Source(_) | Loaded::AST(_) => break Ok(path), - }, - Err(err) => reg_err(err.clone(), &mut errv)? - } - } - } - }); - // Preliminarily parse a file, substitution rules and imports are valid - let preparsed = Rc::new(Cache::new({ - // let prelude_path = vec!["prelude".to_string()]; - // let interned_prelude_path = Rc::new( - // prelude_path.iter() - // .map(|s| intern(s.as_str())) - // .collect_vec() - // ); - let loaded = loaded.clone(); - move |path: Rc>, _| -> ParseResult> { - let loaded = loaded.find(&path)?; - match loaded.as_ref() { - Loaded::Source(source) => { - let mut entv = parse::parse(&[] as &[&str], source.as_str(), intern)?; - // if path != interned_prelude_path { - // entv.push(FileEntry::Import(vec![Import{ - // name: None, path: prelude_path - // }])) - // } - Ok(entv) - } - Loaded::AST(ast) => Ok(ast.clone()), - } - } - })); - // Collect all toplevel names exported from a given file - let exports = Rc::new(Cache::new({ - let loaded = loaded.clone(); - let preparsed = preparsed.clone(); - move |path: Rc>, _| -> ParseResult> { - let loaded = loaded.find(&path)?; - let preparsed = preparsed.find(&path)?; - Ok(exported_names(&preparsed) - .into_iter() - .map(|n| n[0].clone()) - .collect()) - } - })); - // Collect all toplevel names imported by a given file - let imports = Rc::new(Cache::new({ - let preparsed = preparsed.clone(); - let exports = exports.clone(); - move |path: Rc>, _| -> ParseResult>>>> { - let entv = preparsed.find(&path)?; - let import_entries = imports(entv.iter()); - let mut imported_symbols = HashMap::>>::new(); - for imp in import_entries { - let export_list = exports.find(&path)?; - if let Some(ref name) = imp.name { - if export_list.contains(name) { - imported_symbols.insert(name.clone(), imp.path.clone()); - } else { - panic!("{:?} doesn't export {}", imp.path, deintern(*name)) - } - } else { - for exp in export_list { - imported_symbols.insert(exp, imp.path.clone()); - } - } - } - // println!("Imports for {:?} are {:?}", path.as_ref(), imported_symbols); - Ok(Rc::new(imported_symbols)) - } - })); - // Final parse, operators are correctly separated - let parsed = Rc::new(Cache::new({ - let preparsed = preparsed.clone(); - let imports = imports.clone(); - let loaded = loaded.clone(); - move |path: Rc>, _| -> ParseResult> { - let imported_ops: Vec = - imports.find(&path)? - .keys() - .map(|s| deintern(*s).to_string()) - .filter(|s| parse::is_op(s)) - .collect(); - let pre = preparsed.find(&path)?; - match loaded.find(&path)?.as_ref() { - Loaded::Source(source) => Ok(parse::reparse( - &imported_ops, source.as_str(), &pre, intern - )?), - Loaded::AST(ast) => Ok(ast.clone()), - } - } - })); - let name_resolver = NameResolver::new({ - let modname = modname.clone(); - move |path| { - let modname = modname.find(&path).ok()?; - let symname = Rc::new(path[modname.len()..].to_vec()); - Some((modname, symname)) - } - }, { - let imports = imports.clone(); - move |path| { - imports.find(&path).map(|f| f.as_ref().clone()) - } - }); - // Turn parsed files into a bag of rules and a list of toplevel export names - let resolved = Rc::new(Cache::new({ - let parsed = parsed.clone(); - let exports = exports.clone(); - let imports = imports.clone(); - move |path: Rc>, _| -> ParseResult { - let module = Module { - rules: parsed.find(&path)? - .iter() - .filter_map(|ent| { - if let FileEntry::Rule(Rule{source, prio, target}, _) = ent { - Some(Rule { - source: Rc::new( - source.iter() - .map(|ex| prefix_expr(ex, &path)) - .collect_vec() - ), - target: Rc::new( - target.iter() - .map(|ex| prefix_expr(ex, &path)) - .collect_vec() - ), - prio: *prio, - }) - } else { None } - }) - .map(|Rule{ source, target, prio }| Ok(super::Rule { - source: Rc::new(source.iter() - .map(|ex| name_resolver.process_expression(ex)) - .collect::, _>>()?), - target: Rc::new(target.iter() - .map(|ex| name_resolver.process_expression(ex)) - .collect::, _>>()?), - prio - })) - .collect::>>()?, - exports: exports.find(&path)?.clone(), - references: imports.find(&path)? - .values().cloned().collect() - }; - Ok(module) - } - })); - Cache::new({ - let resolved = resolved.clone(); - move |path: Rc>, _| -> ParseResult> { - // Breadth-first search - let mut processed: HashSet>> = HashSet::new(); - let mut rules: Vec = Vec::new(); - let mut pending: VecDeque>> = VecDeque::new(); - pending.push_back(path); - while let Some(el) = pending.pop_front() { - let resolved = resolved.find(&el)?; - processed.insert(el.clone()); - pending.extend( - resolved.references.iter() - .filter(|&v| !processed.contains(v)) - .cloned() - ); - rules.extend( - resolved.rules.iter().cloned() - ); - }; - Ok(rules) - } - }) -} diff --git a/src/representations/ast.rs b/src/representations/ast.rs index 91d5d82..f43403a 100644 --- a/src/representations/ast.rs +++ b/src/representations/ast.rs @@ -1,142 +1,154 @@ -use lasso::RodeoResolver; -use lasso::Spur; use itertools::Itertools; use ordered_float::NotNan; use std::hash::Hash; use std::rc::Rc; -use crate::utils::InternedDisplay; -use crate::utils::Stackframe; +use crate::interner::{Interner, InternedDisplay}; +use crate::utils::Substack; +use crate::interner::Token; +use super::location::Location; use super::primitive::Primitive; /// An S-expression with a type -#[derive(PartialEq, Eq, Hash)] -pub struct Expr(pub Clause, pub Rc>); -impl Expr { - pub fn into_clause(self) -> Clause { - if self.1.len() == 0 { self.0 } - else { Clause::S('(', Rc::new(vec![self])) } - } - - pub fn visit_names(&self, - binds: Stackframe>>, - cb: &mut F - ) where F: FnMut(Rc>) { - let Expr(val, typ) = self; - val.visit_names(binds.clone(), cb); - for typ in typ.as_ref() { - typ.visit_names(binds.clone(), cb); - } - } +#[derive(Clone, Debug, PartialEq)] +pub struct Expr{ + pub value: Clause, + pub location: Location } -impl Clone for Expr { - fn clone(&self) -> Self { - Self(self.0.clone(), self.1.clone()) +impl Expr { + pub fn into_clause(self) -> Clause { + self.value + } + + pub fn visit_names(&self, + binds: Substack>>>, + cb: &mut impl FnMut(Token>>) + ) { + let Expr{value, ..} = self; + value.visit_names(binds.clone(), cb); + } + + /// Process all names with the given mapper. + /// Return a new object if anything was processed + pub fn map_names(&self, + pred: &impl Fn(Token>>) -> Option>>> + ) -> Option { + Some(Self { + value: self.value.map_names(pred)?, + location: self.location.clone(), + }) + } + + /// Add the specified prefix to every Name + pub fn prefix(&self, + prefix: Token>>, + i: &Interner, + except: &impl Fn(Token) -> bool, + ) -> Self { + Self{ + value: self.value.prefix(prefix, i, except), + location: self.location.clone(), + } } } impl InternedDisplay for Expr { - fn fmt(&self, - f: &mut std::fmt::Formatter<'_>, - rr: RodeoResolver - ) -> std::fmt::Result { - let Expr(val, typ) = self; - val.fmt(f, rr)?; - for typ in typ.as_ref() { - write!(f, ":")?; - typ.fmt(f, rr)?; - } + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result { + let Expr{value, ..} = self; + value.fmt_i(f, i)?; Ok(()) } } +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum PHClass { + Vec{ + nonzero: bool, + prio: u64 + }, + Scalar, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct Placeholder { + pub name: Token, + pub class: PHClass +} + +impl InternedDisplay for Placeholder { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result { + let name = i.r(self.name); + match self.class { + PHClass::Scalar => write!(f, "${name}"), + PHClass::Vec { nonzero, prio } => { + if nonzero {write!(f, "...${name}:{prio}")} + else {write!(f, "..${name}:{prio}")} + } + } + } +} + /// An S-expression as read from a source file -#[derive(PartialEq, Eq, Hash, Clone)] +#[derive(Debug, PartialEq, Clone)] pub enum Clause { P(Primitive), /// A c-style name or an operator, eg. `+`, `i`, `foo::bar` - Name(Rc>), + Name(Token>>), /// A parenthesized exmrc_empty_slice()pression /// eg. `(print out "hello")`, `[1, 2, 3]`, `{Some(t) => t}` S(char, Rc>), - /// An explicit expression associated with the leftmost, outermost - /// [Clause::Auto], eg. `read @Uint` - Explicit(Rc), /// A function expression, eg. `\x. x + 1` - Lambda(Rc, Rc>, Rc>), - /// A parameterized expression with type inference, eg. `@T. T -> T` - Auto(Option>, Rc>, Rc>), + Lambda(Rc, Rc>), /// A placeholder for macros, eg. `$name`, `...$body`, `...$lhs:1` - Placeh{ - key: String, - /// None => matches one token - /// Some((prio, nonzero)) => - /// prio is the sizing priority for the vectorial - /// (higher prio grows first) - /// nonzero is whether the vectorial matches 1..n or 0..n tokens - vec: Option<(usize, bool)> - }, + Placeh(Placeholder) } impl Clause { + /// Extract the expressions from an auto, lambda or S pub fn body(&self) -> Option>> { match self { - Self::Auto(_, _, body) | - Self::Lambda(_, _, body) | + Self::Lambda(_, body) | Self::S(_, body) => Some(body.clone()), _ => None } } - pub fn typ(&self) -> Option>> { - match self { - Self::Auto(_, typ, _) | Self::Lambda(_, typ, _) => Some(typ.clone()), - _ => None - } - } + + /// Convert with identical meaning pub fn into_expr(self) -> Expr { if let Self::S('(', body) = &self { if body.len() == 1 { body[0].clone() } - else { Expr(self, Rc::default()) } - } else { Expr(self, Rc::default()) } + else { Expr{ value: self, location: Location::Unknown } } + } else { Expr{ value: self, location: Location::Unknown } } } - pub fn from_exprv(exprv: &[Expr]) -> Option { - if exprv.len() == 0 { None } - else if exprv.len() == 1 { Some(exprv[0].clone().into_clause()) } - else { Some(Self::S('(', Rc::new(exprv.to_vec()))) } + + /// Convert with identical meaning + pub fn from_exprs(exprs: &[Expr]) -> Option { + if exprs.len() == 0 { None } + else if exprs.len() == 1 { Some(exprs[0].clone().into_clause()) } + else { Some(Self::S('(', Rc::new(exprs.to_vec()))) } + } + /// Convert with identical meaning + pub fn from_exprv(exprv: &Rc>) -> Option { + if exprv.len() < 2 { Self::from_exprs(exprv) } + else { Some(Self::S('(', exprv.clone())) } } /// Recursively iterate through all "names" in an expression. /// It also finds a lot of things that aren't names, such as all /// bound parameters. Generally speaking, this is not a very /// sophisticated search. - pub fn visit_names(&self, - binds: Stackframe>>, - cb: &mut F - ) where F: FnMut(Rc>) { + pub fn visit_names(&self, + binds: Substack>>>, + cb: &mut impl FnMut(Token>>) + ) { match self { - Clause::Auto(name, typ, body) => { - for x in typ.iter() { - x.visit_names(binds.clone(), cb) - } - let binds_dup = binds.clone(); - let new_binds = if let Some(rc) = name { - if let Clause::Name(name) = rc.as_ref() { - binds_dup.push(name.clone()) - } else { binds } - } else { binds }; + Clause::Lambda(arg, body) => { + arg.visit_names(binds, cb); + let new_binds = if let Clause::Name(n) = arg.value { + binds.push(n) + } else {binds}; for x in body.iter() { - x.visit_names(new_binds.clone(), cb) - } - }, - Clause::Lambda(name, typ, body) => { - for x in typ.iter() { - x.visit_names(binds.clone(), cb) - } - for x in body.iter() { - let new_binds = if let Clause::Name(name) = name.as_ref() { - binds.push(name.clone()) - } else { binds }; x.visit_names(new_binds, cb) } }, @@ -145,99 +157,175 @@ impl Clause { }, Clause::Name(name) => { if binds.iter().all(|x| x != name) { - cb(name.clone()) + cb(*name) } } _ => (), } } + + /// Process all names with the given mapper. + /// Return a new object if anything was processed + pub fn map_names(&self, + pred: &impl Fn(Token>>) -> Option>>> + ) -> Option { + match self { + Clause::P(_) | Clause::Placeh(_) => None, + Clause::Name(name) => pred(*name).map(Clause::Name), + Clause::S(c, body) => { + let mut any_some = false; + let new_body = body.iter().map(|e| { + let val = e.map_names(pred); + any_some |= val.is_some(); + val.unwrap_or_else(|| e.clone()) + }).collect(); + if any_some {Some(Clause::S(*c, Rc::new(new_body)))} else {None} + } + Clause::Lambda(arg, body) => { + let new_arg = arg.map_names(pred); + let mut any_some = new_arg.is_some(); + let new_body = body.iter().map(|e| { + let val = e.map_names(pred); + any_some |= val.is_some(); + val.unwrap_or_else(|| e.clone()) + }).collect(); + if any_some {Some(Clause::Lambda( + new_arg.map(Rc::new) + .unwrap_or_else(|| arg.clone()), + Rc::new(new_body) + ))} else {None} + } + } + } + + /// Add the specified prefix to every Name + pub fn prefix(&self, + prefix: Token>>, + i: &Interner, + except: &impl Fn(Token) -> bool, + ) -> Self { + self.map_names(&|name| { + let old = i.r(name); + if except(old[0]) {return None} + let mut new = i.r(prefix).clone(); + new.extend_from_slice(&old); + Some(i.i(&new)) + }).unwrap_or_else(|| self.clone()) + } } -fn fmt_expr_seq( - it: &mut dyn Iterator, +fn fmt_expr_seq<'a>( + it: &mut impl Iterator, f: &mut std::fmt::Formatter<'_>, - rr: RodeoResolver + i: &Interner ) -> std::fmt::Result { - for item in Itertools::intersperse(it.map(Some), None) { match item { - Some(expr) => expr.fmt(f, rr), - None => f.write_str(" "), - }? } + for item in Itertools::intersperse(it.map(Some), None) { + match item { + Some(expr) => expr.fmt_i(f, i), + None => f.write_str(" "), + }? + } Ok(()) } pub fn fmt_name( - name: &Rc>, f: &mut std::fmt::Formatter, rr: RodeoResolver + name: Token>>, + f: &mut std::fmt::Formatter, + i: &Interner ) -> std::fmt::Result { - for el in itertools::intersperse( - name.iter().map(|s| rr.resolve(s)), - "::" - ) { + let strings = i.r(name).iter() + .map(|t| i.r(*t).as_str()); + for el in itertools::intersperse(strings, "::") { write!(f, "{}", el)? } Ok(()) } impl InternedDisplay for Clause { - fn fmt(&self, - f: &mut std::fmt::Formatter<'_>, - rr: RodeoResolver - ) -> std::fmt::Result { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result { match self { Self::P(p) => write!(f, "{:?}", p), - Self::Name(name) => fmt_name(name, f, rr), + Self::Name(name) => fmt_name(*name, f, i), Self::S(del, items) => { f.write_str(&del.to_string())?; - fmt_expr_seq(&mut items.iter(), f, rr)?; + fmt_expr_seq(&mut items.iter(), f, i)?; f.write_str(match del { '(' => ")", '[' => "]", '{' => "}", _ => "CLOSING_DELIM" }) }, - Self::Lambda(name, argtyp, body) => { + Self::Lambda(arg, body) => { f.write_str("\\")?; - name.fmt(f, rr)?; - f.write_str(":")?; - fmt_expr_seq(&mut argtyp.iter(), f, rr)?; + arg.fmt_i(f, i)?; f.write_str(".")?; - fmt_expr_seq(&mut body.iter(), f, rr) + fmt_expr_seq(&mut body.iter(), f, i) }, - Self::Auto(name_opt, argtyp, body) => { - f.write_str("@")?; - if let Some(name) = name_opt { name.fmt(f, rr)? } - f.write_str(":")?; - fmt_expr_seq(&mut argtyp.iter(), f, rr)?; - f.write_str(".")?; - fmt_expr_seq(&mut body.iter(), f, rr) - }, - Self::Placeh{key, vec: None} => write!(f, "${key}"), - Self::Placeh{key, vec: Some((prio, true))} => - write!(f, "...${key}:{prio}"), - Self::Placeh{key, vec: Some((prio, false))} => - write!(f, "..${key}:{prio}"), - Self::Explicit(expr) => { - write!(f, "@")?; - expr.fmt(f, rr) - } + Self::Placeh(ph) => ph.fmt_i(f, i), } } } /// A substitution rule as read from the source -#[derive(Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq)] pub struct Rule { pub source: Rc>, pub prio: NotNan, pub target: Rc> } +impl Rule { + pub fn collect_single_names(&self, i: &Interner) -> Vec> { + let mut names = Vec::new(); + for e in self.source.iter() { + e.visit_names(Substack::Bottom, &mut |tok| { + let ns_name = i.r(tok); + let (name, excess) = ns_name.split_first() + .expect("Namespaced name must not be empty"); + if excess.len() > 0 {return} + names.push(*name) + }); + } + names + } + + pub fn prefix(&self, + prefix: Token>>, + i: &Interner, + except: &impl Fn(Token) -> bool + ) -> Self { + Self { + prio: self.prio, + source: Rc::new(self.source.iter() + .map(|e| e.prefix(prefix, i, except)) + .collect() + ), + target: Rc::new(self.target.iter() + .map(|e| e.prefix(prefix, i, except)) + .collect() + ), + } + } +} + impl InternedDisplay for Rule { - fn fmt(&self, - f: &mut std::fmt::Formatter<'_>, - rr: RodeoResolver - ) -> std::fmt::Result { - for e in self.source.iter() { e.fmt(f, rr)?; write!(f, " ")?; } + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result { + for e in self.source.iter() { e.fmt_i(f, i)?; write!(f, " ")?; } write!(f, "={}=>", self.prio)?; - for e in self.target.iter() { write!(f, " ")?; e.fmt(f, rr)?; } + for e in self.target.iter() { write!(f, " ")?; e.fmt_i(f, i)?; } Ok(()) } +} + +/// A named constant +#[derive(Debug, Clone, PartialEq)] +pub struct Constant { + pub name: Token, + pub value: Expr +} + +impl InternedDisplay for Constant { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result { + write!(f, "{} := ", i.r(self.name))?; + self.value.fmt_i(f, i) + } } \ No newline at end of file diff --git a/src/representations/ast_to_postmacro.rs b/src/representations/ast_to_postmacro.rs index 8e05ae2..f1985b4 100644 --- a/src/representations/ast_to_postmacro.rs +++ b/src/representations/ast_to_postmacro.rs @@ -1,10 +1,9 @@ use std::{rc::Rc, fmt::Display}; -use lasso::{Spur, RodeoResolver}; +use crate::interner::Token; +use crate::utils::Substack; -use crate::utils::Stackframe; - -use super::{ast, postmacro}; +use super::{ast, postmacro, location::Location}; #[derive(Clone)] pub enum Error { @@ -13,26 +12,10 @@ pub enum Error { /// Only `(...)` may be converted to typed lambdas. `[...]` and `{...}` /// left in the code are signs of incomplete macro execution BadGroup(char), - /// `foo:bar:baz` will be parsed as `(foo:bar):baz`. Explicitly - /// specifying `foo:(bar:baz)` is forbidden and it's also meaningless - /// since `baz` can only ever be the kind of types - ExplicitKindOfType, - /// Name never bound in an enclosing scope - indicates incomplete - /// macro substitution - Unbound(Vec), /// Placeholders shouldn't even occur in the code during macro execution. /// Something is clearly terribly wrong Placeholder, - /// It's possible to try and transform the clause `(foo:bar)` into a - /// typed clause, however the correct value of this ast clause is a - /// typed expression (included in the error) - /// - /// [expr] handles this case, so it's only really possible to get this - /// error if you're calling [clause] directly - ExprToClause(postmacro::Expr), - /// @ tokens only ever occur between a function and a parameter - NonInfixAt, - /// Arguments can be either [ast::Clause::Name] or [ast::Clause::Placeh] + /// Arguments can only be [ast::Clause::Name] InvalidArg } @@ -40,110 +23,74 @@ impl Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Error::EmptyS => write!(f, "`()` as a clause is meaningless in lambda calculus"), - Error::BadGroup(c) => write!(f, "Only `(...)` may be converted to typed lambdas. `[...]` and `{{...}}` left in the code are signs of incomplete macro execution"), - Error::ExplicitKindOfType => write!(f, "`foo:bar:baz` will be parsed as `(foo:bar):baz`. Explicitly specifying `foo:(bar:baz)` is forbidden and meaningless since `baz` can only ever be the kind of types"), - Error::Unbound(name) => { - write!(f, "Name \""); - for el in itertools::intersperse( - name.iter().map(String::as_str), - "::" - ) { write!(f, "{}", el)? } - write!(f, "\" never bound in an enclosing scope. This indicates incomplete macro substitution") - } - Error::Placeholder => write!(f, "Placeholders shouldn't even occur in the code during macro execution, this is likely a compiler bug"), - Error::ExprToClause(expr) => write!(f, "Attempted to transform the clause (foo:bar) into a typed clause. This is likely a compiler bug"), - Error::NonInfixAt => write!(f, "@ as a token can only ever occur between a generic and a type parameter."), - Error::InvalidArg => write!(f, "Arguments can be either Name or Placeholder nodes") + Error::BadGroup(_) => write!(f, "Only `(...)` may be converted to typed lambdas. `[...]` and `{{...}}` left in the code are signs of incomplete macro execution"), + Error::Placeholder => write!(f, "Placeholders shouldn't even appear in the code during macro execution, this is likely a compiler bug"), + Error::InvalidArg => write!(f, "Arguments can only be Name nodes") } } } -#[derive(Clone, Copy)] -struct Init<'a>(&'a RodeoResolver); - /// Try to convert an expression from AST format to typed lambda -pub fn expr(expr: &ast::Expr, i: Init) -> Result { - expr_rec(expr, Context::new(i)) +pub fn expr(expr: &ast::Expr) -> Result { + expr_rec(expr, Context::new()) } /// Try and convert a single clause from AST format to typed lambda -pub fn clause( - clause: &ast::Clause, i: Init -) -> Result { - clause_rec(clause, Context::new(i)) +pub fn _clause(clause: &ast::Clause) +-> Result +{ + clause_rec(clause, Context::new()) } /// Try and convert a sequence of expressions from AST format to /// typed lambda -pub fn exprv( - exprv: &[ast::Expr], i: Init -) -> Result { - exprv_rec(exprv, Context::new(i)) +pub fn _exprv(exprv: &[ast::Expr]) +-> Result +{ + exprv_rec(exprv, Context::new()) } #[derive(Clone, Copy)] -struct Context<'a> { - names: Stackframe<'a, (&'a [Spur], bool)>, - rr: &'a RodeoResolver -} +struct Context<'a> { names: Substack<'a, Token>>> } impl<'a> Context<'a> { - fn w_name<'b>(&'b self, - name: &'b [Spur], - is_auto: bool - ) -> Context<'b> where 'a: 'b { - Context { - names: self.names.push((name, is_auto)), - rr: self.rr - } + fn w_name<'b>(&'b self, name: Token>>) -> Context<'b> where 'a: 'b { + Context { names: self.names.push(name) } } - fn new(i: Init) -> Context<'static> { - Context { names: Stackframe::new((&[], false)), rr: i.0 } + fn new() -> Context<'static> { + Context { names: Substack::Bottom } } } /// Recursive state of [exprv] -fn exprv_rec<'a>( - v: &'a [ast::Expr], - ctx: Context<'a>, -) -> Result { +fn exprv_rec<'a>(v: &'a [ast::Expr], ctx: Context<'a>) +-> Result { let (last, rest) = v.split_last().ok_or(Error::EmptyS)?; - if rest.len() == 0 {return expr_rec(&v[0], ctx)} - let clause = if let ast::Expr(ast::Clause::Explicit(inner), empty_slice) = last { - assert!(empty_slice.len() == 0, - "It is assumed that Explicit nodes can never have type annotations as the \ - wrapped expression node matches all trailing colons." - ); - let x = expr_rec(inner.as_ref(), ctx)?; - postmacro::Clause::Explicit(Rc::new(exprv_rec(rest, ctx)?), Rc::new(x)) - } else { - let f = exprv_rec(rest, ctx)?; - let x = expr_rec(last, ctx)?; - postmacro::Clause::Apply(Rc::new(f), Rc::new(x)) - }; - Ok(postmacro::Expr(clause, Rc::new(vec![]))) + if rest.is_empty() { + return expr_rec(&v[0], ctx); + } + let f = exprv_rec(rest, ctx)?; + let x = expr_rec(last, ctx)?; + let value = postmacro::Clause::Apply(Rc::new(f), Rc::new(x)); + Ok(postmacro::Expr{ value, location: Location::Unknown }) } /// Recursive state of [expr] fn expr_rec<'a>( - ast::Expr(val, typ): &'a ast::Expr, + ast::Expr{ value, location }: &'a ast::Expr, ctx: Context<'a> -) -> Result { // (output, used_explicits) - let typ: Vec = typ.iter() - .map(|c| clause_rec(c, ctx)) - .collect::>()?; - if let ast::Clause::S(paren, body) = val { +) -> Result { + if let ast::Clause::S(paren, body) = value { if *paren != '(' {return Err(Error::BadGroup(*paren))} - let postmacro::Expr(inner, inner_t) = exprv_rec(body.as_ref(), ctx)?; - let new_t = - if typ.len() == 0 { inner_t } - else if inner_t.len() == 0 { Rc::new(typ) } - else { Rc::new(inner_t.iter().chain(typ.iter()).cloned().collect()) }; - Ok(postmacro::Expr(inner, new_t)) + let expr = exprv_rec(body.as_ref(), ctx)?; + Ok(postmacro::Expr{ + value: expr.value, + location: location.clone() + }) } else { - let cls = clause_rec(&val, ctx)?; - Ok(postmacro::Expr(cls, Rc::new(typ))) + let value = clause_rec(&value, ctx)?; + Ok(postmacro::Expr{ value, location: location.clone() }) } } @@ -157,53 +104,30 @@ fn clause_rec<'a>( ) -> Result { match cls { ast::Clause::P(p) => Ok(postmacro::Clause::P(p.clone())), - ast::Clause::Auto(no, t, b) => { - let typ = if t.len() == 0 {Rc::new(vec![])} else { - let postmacro::Expr(c, t) = exprv_rec(t.as_ref(), ctx)?; - if t.len() > 0 {return Err(Error::ExplicitKindOfType)} - else {Rc::new(vec![c])} - }; - let body_ctx = if let Some(rc) = no { - match rc.as_ref() { - ast::Clause::Name(name) => ctx.w_name(&&**name, true), - ast::Clause::Placeh { .. } => return Err(Error::Placeholder), - _ => return Err(Error::InvalidArg) - } - } else {ctx}; - let body = exprv_rec(b.as_ref(), body_ctx)?; - Ok(postmacro::Clause::Auto(typ, Rc::new(body))) - } - ast::Clause::Lambda(n, t, b) => { - let typ = if t.len() == 0 {Rc::new(vec![])} else { - let postmacro::Expr(c, t) = exprv_rec(t.as_ref(), ctx)?; - if t.len() > 0 {return Err(Error::ExplicitKindOfType)} - else {Rc::new(vec![c])} - }; - let body_ctx = match n.as_ref() { - ast::Clause::Name(name) => ctx.w_name(&&**name, true), + ast::Clause::Lambda(expr, b) => { + let name = match expr.value { + ast::Clause::Name(name) => name, ast::Clause::Placeh { .. } => return Err(Error::Placeholder), _ => return Err(Error::InvalidArg) }; + let body_ctx = ctx.w_name(name); let body = exprv_rec(b.as_ref(), body_ctx)?; - Ok(postmacro::Clause::Lambda(typ, Rc::new(body))) + Ok(postmacro::Clause::Lambda(Rc::new(body))) } ast::Clause::Name(name) => { - let (level, (_, is_auto)) = ctx.names.iter().enumerate() - .find(|(_, (n, _))| n == &name.as_slice()) - .ok_or_else(|| Error::Unbound( - name.iter().map(|s| ctx.rr.resolve(s).to_string()).collect() - ))?; - let label = if *is_auto {postmacro::Clause::AutoArg} - else {postmacro::Clause::LambdaArg}; - Ok(label(level)) + let lvl_opt = ctx.names.iter().enumerate() + .find(|(_, n)| *n == name) + .map(|(lvl, _)| lvl); + Ok(match lvl_opt { + Some(lvl) => postmacro::Clause::LambdaArg(lvl), + None => postmacro::Clause::Constant(*name) + }) } ast::Clause::S(paren, entries) => { if *paren != '(' {return Err(Error::BadGroup(*paren))} - let postmacro::Expr(val, typ) = exprv_rec(entries.as_ref(), ctx)?; - if typ.len() == 0 {Ok(val)} - else {Err(Error::ExprToClause(postmacro::Expr(val, typ)))} + let expr = exprv_rec(entries.as_ref(), ctx)?; + Ok(expr.value) }, - ast::Clause::Placeh { .. } => Err(Error::Placeholder), - ast::Clause::Explicit(..) => Err(Error::NonInfixAt) + ast::Clause::Placeh { .. } => Err(Error::Placeholder) } } \ No newline at end of file diff --git a/src/representations/interpreted.rs b/src/representations/interpreted.rs index a9c2edc..03d9213 100644 --- a/src/representations/interpreted.rs +++ b/src/representations/interpreted.rs @@ -1,34 +1,156 @@ -use std::fmt::{Display, Debug}; +use std::cell::RefCell; +use std::fmt::Debug; +use std::ops::{Deref, DerefMut}; use std::rc::Rc; -use crate::utils::Side; -use crate::foreign::{ExternError, Atom}; +use crate::interner::{Token, InternedDisplay}; +use crate::utils::print_nname; use super::Literal; +use super::location::Location; use super::path_set::PathSet; use super::primitive::Primitive; -#[derive(Clone, PartialEq, Eq, Hash)] +// TODO: implement Debug, Eq and Hash with cycle detection + +pub struct Expr { + pub clause: Clause, + pub location: Location, +} + +impl Debug for Expr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self.location { + Location::Unknown => write!(f, "{:?}", self.clause), + loc => write!(f, "{:?}@{}", self.clause, loc) + } + } +} + +impl InternedDisplay for Expr { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &crate::interner::Interner) -> std::fmt::Result { + match &self.location { + Location::Unknown => self.clause.fmt_i(f, i), + loc => { + write!(f, "{}:(", loc)?; + self.clause.fmt_i(f, i)?; + write!(f, ")") + } + } + } +} + +/// A wrapper around expressions to handle their multiple occurences in +/// the tree +#[derive(Clone)] +pub struct ExprInst(pub Rc>); +impl ExprInst { + pub fn expr<'a>(&'a self) -> impl Deref + 'a { + self.0.as_ref().borrow() + } + + pub fn expr_mut<'a>(&'a self) -> impl DerefMut + 'a { + self.0.as_ref().borrow_mut() + } + + /// Call a normalization function on the expression. The expr is + /// updated with the new clause which affects all copies of it + /// across the tree. + pub fn try_normalize(&self, + mapper: impl FnOnce(&Clause) -> Result + ) -> Result { + let new_clause = mapper(&self.expr().clause)?; + self.expr_mut().clause = new_clause; + Ok(self.clone()) + } + + /// Run a mutation function on the expression, producing a new, + /// distinct expression. The new expression shares location info with + /// the original but is normalized independently. + pub fn try_update(&self, + mapper: impl FnOnce(&Clause) -> Result + ) -> Result { + let expr = self.expr(); + let new_expr = Expr{ + clause: mapper(&expr.clause)?, + location: expr.location.clone(), + }; + Ok(Self(Rc::new(RefCell::new(new_expr)))) + } + + /// Call a predicate on the expression, returning whatever the + /// predicate returns. This is a convenience function for reaching + /// through the RefCell. + pub fn inspect(&self, predicate: impl FnOnce(&Clause) -> T) -> T { + predicate(&self.expr().clause) + } + + pub fn with_literal(&self, + predicate: impl FnOnce(&Literal) -> T + ) -> Result { + let expr = self.expr(); + if let Clause::P(Primitive::Literal(l)) = &expr.clause { + Ok(predicate(l)) + } else {Err(())} + } +} + +impl Debug for ExprInst { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self.0.try_borrow() { + Ok(expr) => write!(f, "{:?}", expr), + Err(_) => write!(f, ""), + } + } +} + +impl InternedDisplay for ExprInst { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &crate::interner::Interner) -> std::fmt::Result { + match self.0.try_borrow() { + Ok(expr) => expr.fmt_i(f, i), + Err(_) => write!(f, "") + } + } +} + +#[derive(Debug, Clone)] pub enum Clause { P(Primitive), Apply{ - f: Rc, - x: Rc, - id: usize + f: ExprInst, + x: ExprInst }, + Constant(Token>>), Lambda{ args: Option, - body: Rc + body: ExprInst }, LambdaArg, } +impl Clause { + /// Wrap a constructed clause in an expression. Avoid using this to wrap + /// copied or moved clauses as it does not have debug information and + /// does not share a normalization cache list with them. + pub fn wrap(self) -> ExprInst { + ExprInst(Rc::new(RefCell::new(Expr{ + location: Location::Unknown, + clause: self + }))) + } +} -impl Debug for Clause { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl InternedDisplay for Clause { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &crate::interner::Interner) -> std::fmt::Result { match self { Clause::P(p) => write!(f, "{p:?}"), Clause::LambdaArg => write!(f, "arg"), - Clause::Apply { f: fun, x, id } => write!(f, "({:?} {:?})@{}", fun.as_ref(), x.as_ref(), id), + Clause::Apply { f: fun, x } => { + write!(f, "(")?; + fun.fmt_i(f, i)?; + write!(f, " ")?; + x.fmt_i(f, i)?; + write!(f, ")") + } Clause::Lambda { args, body } => { write!(f, "\\")?; match args { @@ -36,177 +158,15 @@ impl Debug for Clause { None => write!(f, "_")?, } write!(f, ".")?; - write!(f, "{:?}", body.as_ref()) - } - } - } -} - -impl TryFrom for Literal { - type Error = Clause; - fn try_from(value: Clause) -> Result { - if let Clause::P(Primitive::Literal(l)) = value {Ok(l)} - else {Err(value)} - } -} - -impl<'a> TryFrom<&'a Clause> for &'a Literal { - type Error = (); - fn try_from(value: &'a Clause) -> Result { - if let Clause::P(Primitive::Literal(l)) = value {Ok(l)} - else {Err(())} - } -} - -/// Problems in the process of execution -#[derive(Clone)] -pub enum RuntimeError { - Extern(Rc), - NonFunctionApplication(usize), -} - -impl Display for RuntimeError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Extern(e) => write!(f, "Error in external function: {e}"), - Self::NonFunctionApplication(loc) => write!(f, "Primitive applied as function at {loc}") - } - } -} - -/// Various reasons why a new clause might not have been produced -#[derive(Clone)] -pub enum InternalError { - Runtime(RuntimeError), - NonReducible -} - -fn map_at Result>( - path: &[Side], source: &Clause, mapper: F -) -> Result { - // Pass right through lambdas - if let Clause::Lambda { args, body } = source { - return Ok(Clause::Lambda { - args: args.clone(), - body: Rc::new(map_at(path, body, mapper)?) - }) - } - // If the path ends here, process the next (non-lambda) node - let (head, tail) = if let Some(sf) = path.split_first() {sf} else { - return mapper(source) - }; - // If it's an Apply, execute the next step in the path - if let Clause::Apply { f, x, id } = source { - return Ok(match head { - Side::Left => Clause::Apply { - f: Rc::new(map_at(tail, f, mapper)?), - x: x.clone(), - id: *id + body.fmt_i(f, i) }, - Side::Right => Clause::Apply { - f: f.clone(), - x: Rc::new(map_at(tail, x, mapper)?), - id: *id - } - }) - } - panic!("Invalid path") -} - -fn substitute(PathSet { steps, next }: &PathSet, value: &Clause, body: &Clause) -> Clause { - map_at(&steps, body, |checkpoint| -> Result { - match (checkpoint, next) { - (Clause::Lambda{..}, _) => unreachable!("Handled by map_at"), - (Clause::Apply { f, x, id }, Some((left, right))) => Ok(Clause::Apply { - f: Rc::new(substitute(left, value, f)), - x: Rc::new(substitute(right, value, x)), - id: *id - }), - (Clause::LambdaArg, None) => Ok(value.clone()), - (_, None) => panic!("Substitution path ends in something other than LambdaArg"), - (_, Some(_)) => panic!("Substitution path leads into something other than Apply"), + Clause::Constant(t) => write!(f, "{}", print_nname(*t, i)) } - }).into_ok() -} - -fn apply(f: &Clause, x: Rc, id: usize) -> Result { - match f { - Clause::P(Primitive::Atom(Atom(a))) => Ok(Clause::Apply { // Don't execute a pre-application - f: Rc::new(a.run_once()?), // take a step in expanding the atom instead - x, id - }), - Clause::P(Primitive::ExternFn(f)) => f.apply(x.as_ref().clone()) - .map_err(|e| InternalError::Runtime(RuntimeError::Extern(e))), - fex@Clause::Apply{..} => Ok(Clause::Apply{ // Don't execute the pre-function expression - f: Rc::new(fex.run_once()?), // take a step in resolving it instead - x, id - }), - Clause::Lambda{args, body} => Ok(if let Some(args) = args { - substitute(args, x.as_ref(), body) - } else {body.as_ref().clone()}), - _ => Err(InternalError::Runtime(RuntimeError::NonFunctionApplication(id))) } } -impl Clause { - pub fn run_once(&self) -> Result { - match self { - Clause::Apply{f, x, id} => apply(f.as_ref(), x.clone(), *id), - Clause::P(Primitive::Atom(Atom(data))) => data.run_once(), - _ => Err(InternalError::NonReducible) - } - } - - pub fn run_n_times(&self, n: usize) -> Result<(Self, usize), RuntimeError> { - let mut i = self.clone(); - let mut done = 0; - while done < n { - match match &i { - Clause::Apply{f, x, id} => match apply(f.as_ref(), x.clone(), *id) { - Err(e) => Err(e), - Ok(c) => { - i = c; - done += 1; - Ok(()) - } - }, - Clause::P(Primitive::Atom(Atom(data))) => match data.run_n_times(n - done) { - Err(e) => Err(InternalError::Runtime(e)), - Ok((c, n)) => { - i = c; - done += n; - Ok(()) - } - }, - _ => Err(InternalError::NonReducible) - } { - Err(InternalError::NonReducible) => return Ok((i, done)), - Err(InternalError::Runtime(e)) => return Err(e), - Ok(()) => () - } - } - return Ok((i, done)); - } - - pub fn run_to_completion(&self) -> Result { - let mut i = self.clone(); - loop { - match match &i { - Clause::Apply { f, x, id } => match apply(f.as_ref(), x.clone(), *id) { - Err(e) => Err(e), - Ok(c) => Ok(i = c) - }, - Clause::P(Primitive::Atom(Atom(data))) => match data.run_to_completion() { - Err(e) => Err(InternalError::Runtime(e)), - Ok(c) => Ok(i = c) - }, - _ => Err(InternalError::NonReducible) - } { - Err(InternalError::NonReducible) => break, - Err(InternalError::Runtime(e)) => return Err(e), - Ok(()) => () - } - }; - Ok(i) +impl> From for Clause { + fn from(value: T) -> Self { + Self::P(Primitive::Literal(value.into())) } } \ No newline at end of file diff --git a/src/representations/literal.rs b/src/representations/literal.rs index 602bc2f..686b418 100644 --- a/src/representations/literal.rs +++ b/src/representations/literal.rs @@ -19,4 +19,17 @@ impl Debug for Literal { Self::Str(arg0) => write!(f, "{:?}", arg0), } } +} + +impl From> for Literal { + fn from(value: NotNan) -> Self { Self::Num(value) } +} +impl From for Literal { + fn from(value: u64) -> Self { Self::Uint(value) } +} +impl From for Literal { + fn from(value: char) -> Self { Self::Char(value) } +} +impl From for Literal { + fn from(value: String) -> Self { Self::Str(value) } } \ No newline at end of file diff --git a/src/representations/location.rs b/src/representations/location.rs new file mode 100644 index 0000000..d7f4ed4 --- /dev/null +++ b/src/representations/location.rs @@ -0,0 +1,40 @@ +use std::{ops::Range, rc::Rc, fmt::Display}; + +use itertools::Itertools; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Location { + Unknown, + File(Rc>), + Range{ + file: Rc>, + range: Range, + } +} + +impl Location { + pub fn range(&self) -> Option> { + if let Self::Range{ range, .. } = self { + Some(range.clone()) + } else { None } + } + + pub fn file(&self) -> Option>> { + if let Self::File(file) | Self::Range { file, .. } = self { + Some(file.clone()) + } else { None } + } +} + +impl Display for Location { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Unknown => write!(f, "unknown"), + Self::File(file) => write!(f, "{}.orc", file.iter().join("/")), + Self::Range{ file, range } => write!(f, + "{}.orc:{}..{}", + file.iter().join("/"), range.start, range.end + ) + } + } +} diff --git a/src/representations/mod.rs b/src/representations/mod.rs index 977faad..289c140 100644 --- a/src/representations/mod.rs +++ b/src/representations/mod.rs @@ -3,11 +3,13 @@ pub mod ast; pub mod literal; pub mod ast_to_postmacro; pub(crate) mod interpreted; -mod postmacro; -mod primitive; -mod path_set; +pub mod postmacro; +pub mod primitive; +pub mod path_set; pub mod sourcefile; +pub mod tree; +pub mod location; pub use path_set::PathSet; pub use primitive::Primitive; pub mod postmacro_to_interpreted; -pub use literal::Literal; +pub use literal::Literal; \ No newline at end of file diff --git a/src/representations/postmacro.rs b/src/representations/postmacro.rs index 691d8b9..3c6b88c 100644 --- a/src/representations/postmacro.rs +++ b/src/representations/postmacro.rs @@ -1,5 +1,7 @@ use crate::utils::string_from_charset; +use crate::interner::Token; +use super::location::Location; use super::primitive::Primitive; use std::fmt::{Debug, Write}; @@ -10,20 +12,16 @@ use std::rc::Rc; #[derive(PartialEq, Eq, Clone, Copy)] struct Wrap(bool, bool); -#[derive(PartialEq, Eq, Hash, Clone)] -pub struct Expr(pub Clause, pub Rc>); +#[derive(Clone)] +pub struct Expr{ + pub value: Clause, + pub location: Location, +} + impl Expr { fn deep_fmt(&self, f: &mut std::fmt::Formatter<'_>, depth: usize, tr: Wrap) -> std::fmt::Result { - let Expr(val, typ) = self; - if typ.len() > 0 { - val.deep_fmt(f, depth, Wrap(true, true))?; - for typterm in typ.as_ref() { - f.write_char(':')?; - typterm.deep_fmt(f, depth, Wrap(true, true))?; - } - } else { - val.deep_fmt(f, depth, tr)?; - } + let Expr{ value, .. } = self; + value.deep_fmt(f, depth, tr)?; Ok(()) } } @@ -34,14 +32,12 @@ impl Debug for Expr { } } -#[derive(PartialEq, Eq, Hash, Clone)] +#[derive(Clone)] pub enum Clause { Apply(Rc, Rc), - Explicit(Rc, Rc), - Lambda(Rc>, Rc), - Auto(Rc>, Rc), + Lambda(Rc), + Constant(Token>>), LambdaArg(usize), - AutoArg(usize), P(Primitive), } @@ -49,15 +45,11 @@ const ARGNAME_CHARSET: &str = "abcdefghijklmnopqrstuvwxyz"; fn parametric_fmt( f: &mut std::fmt::Formatter<'_>, depth: usize, - prefix: &str, argtyp: &[Clause], body: &Expr, wrap_right: bool + prefix: &str, body: &Expr, wrap_right: bool ) -> std::fmt::Result { if wrap_right { f.write_char('(')?; } f.write_str(prefix)?; f.write_str(&string_from_charset(depth as u64, ARGNAME_CHARSET))?; - for typ in argtyp.iter() { - f.write_str(":")?; - typ.deep_fmt(f, depth, Wrap(false, false))?; - } f.write_str(".")?; body.deep_fmt(f, depth + 1, Wrap(false, false))?; if wrap_right { f.write_char(')')?; } @@ -69,9 +61,8 @@ impl Clause { -> std::fmt::Result { match self { Self::P(p) => write!(f, "{p:?}"), - Self::Lambda(argtyp, body) => parametric_fmt(f, depth, "\\", argtyp, body, wr), - Self::Auto(argtyp, body) => parametric_fmt(f, depth, "@", argtyp, body, wr), - Self::LambdaArg(skip) | Self::AutoArg(skip) => { + Self::Lambda(body) => parametric_fmt(f, depth, "\\", body, wr), + Self::LambdaArg(skip) => { let lambda_depth = (depth - skip - 1).try_into().unwrap(); f.write_str(&string_from_charset(lambda_depth, ARGNAME_CHARSET)) }, @@ -83,18 +74,13 @@ impl Clause { if wl { f.write_char(')')?; } Ok(()) } - Self::Explicit(gen, t) => { - if wl { f.write_char('(')?; } - gen.deep_fmt(f, depth, Wrap(false, true))?; - f.write_str(" @")?; - t.deep_fmt(f, depth, Wrap(true, wr && !wl))?; - if wl { f.write_char(')'); } - Ok(()) - } + Self::Constant(token) => write!(f, "{:?}", token) } } - pub fn wrap(self) -> Box { Box::new(Expr(self, Rc::new(vec![]))) } - pub fn wrap_t(self, t: Clause) -> Box { Box::new(Expr(self, Rc::new(vec![t]))) } + #[allow(unused)] + pub fn wrap(self) -> Box { + Box::new(Expr{ value: self, location: Location::Unknown }) + } } impl Debug for Clause { diff --git a/src/representations/postmacro_to_interpreted.rs b/src/representations/postmacro_to_interpreted.rs index 5f4aaea..bc8a094 100644 --- a/src/representations/postmacro_to_interpreted.rs +++ b/src/representations/postmacro_to_interpreted.rs @@ -1,21 +1,20 @@ -use std::{rc::Rc, fmt::Display}; +use std::{rc::Rc, cell::RefCell}; use crate::utils::Side; use super::{postmacro, interpreted, path_set::PathSet}; fn collect_paths_expr_rec(expr: &postmacro::Expr, depth: usize) -> Option { - collect_paths_cls_rec(&expr.0, depth) + collect_paths_cls_rec(&expr.value, depth) } fn collect_paths_cls_rec(cls: &postmacro::Clause, depth: usize) -> Option { match cls { - postmacro::Clause::P(_) | postmacro::Clause::Auto(..) | postmacro::Clause::AutoArg(_) - | postmacro::Clause::Explicit(..) => None, + postmacro::Clause::P(_) | postmacro::Clause::Constant(_) => None, postmacro::Clause::LambdaArg(h) => if *h != depth {None} else { Some(PathSet{ next: None, steps: Rc::new(vec![]) }) } - postmacro::Clause::Lambda(_, b) => collect_paths_expr_rec(b, depth + 1), + postmacro::Clause::Lambda(b) => collect_paths_expr_rec(b, depth + 1), postmacro::Clause::Apply(f, x) => { let f_opt = collect_paths_expr_rec(f, depth); let x_opt = collect_paths_expr_rec(x, depth); @@ -29,46 +28,26 @@ fn collect_paths_cls_rec(cls: &postmacro::Clause, depth: usize) -> Option) -> std::fmt::Result { - match self { - Self::ExplicitType => write!(f, "Type annotations are unsupported in the interpreter"), - Self::GenericMention - => write!(f, "The interpreter is typeless and therefore can't resolve generics") - } - } -} - -pub fn clause_rec(cls: &postmacro::Clause) -> Result { +pub fn clause(cls: &postmacro::Clause) -> interpreted::Clause { match cls { - postmacro::Clause::P(p) => Ok(interpreted::Clause::P(p.clone())), - postmacro::Clause::Explicit(..) | postmacro::Clause::AutoArg(..) | postmacro::Clause::Auto(..) - => Err(Error::GenericMention), - postmacro::Clause::Apply(f, x) => Ok(interpreted::Clause::Apply { - f: Rc::new(expr_rec(f.as_ref())?), - x: Rc::new(expr_rec(x.as_ref())?), - id: 0 - }), - postmacro::Clause::Lambda(typ, body) => if typ.len() != 0 {Err(Error::ExplicitType)} else { - Ok(interpreted::Clause::Lambda { - args: collect_paths_expr_rec(body, 0), - body: Rc::new(expr_rec(body)?) - }) + postmacro::Clause::Constant(name) + => interpreted::Clause::Constant(*name), + postmacro::Clause::P(p) => interpreted::Clause::P(p.clone()), + postmacro::Clause::Apply(f, x) => interpreted::Clause::Apply { + f: expr(f.as_ref()), + x: expr(x.as_ref()), }, - postmacro::Clause::LambdaArg(_) => Ok(interpreted::Clause::LambdaArg) + postmacro::Clause::Lambda(body) => interpreted::Clause::Lambda { + args: collect_paths_expr_rec(body, 0), + body: expr(body) + }, + postmacro::Clause::LambdaArg(_) => interpreted::Clause::LambdaArg } } -pub fn expr_rec(expr: &postmacro::Expr) -> Result { - let postmacro::Expr(c, t) = expr; - if t.len() != 0 {Err(Error::ExplicitType)} - else {clause_rec(c)} +pub fn expr(expr: &postmacro::Expr) -> interpreted::ExprInst { + interpreted::ExprInst(Rc::new(RefCell::new(interpreted::Expr{ + location: expr.location.clone(), + clause: clause(&expr.value), + }))) } \ No newline at end of file diff --git a/src/representations/primitive.rs b/src/representations/primitive.rs index 02e35c0..00df325 100644 --- a/src/representations/primitive.rs +++ b/src/representations/primitive.rs @@ -4,7 +4,6 @@ use crate::foreign::{ExternFn, Atom}; use super::Literal; -#[derive(Eq, Hash)] pub enum Primitive { /// A literal value, eg. `1`, `"hello"` Literal(Literal), @@ -16,12 +15,9 @@ pub enum Primitive { impl PartialEq for Primitive { fn eq(&self, other: &Self) -> bool { - match (self, other) { - (Self::Literal(l1), Self::Literal(l2)) => l1 == l2, - (Self::Atom(a1), Self::Atom(a2)) => a1 == a2, - (Self::ExternFn(efb1), Self::ExternFn(efb2)) => efb1 == efb2, - _ => false - } + if let (Self::Literal(l1), Self::Literal(l2)) = (self, other) { + l1 == l2 + } else {false} } } diff --git a/src/representations/sourcefile.rs b/src/representations/sourcefile.rs index 4abdd9b..83f6dec 100644 --- a/src/representations/sourcefile.rs +++ b/src/representations/sourcefile.rs @@ -1,76 +1,128 @@ -use std::rc::Rc; -use std::collections::HashSet; +use itertools::{Itertools, Either}; -use lasso::Spur; - -use crate::box_chain; -use crate::utils::{Stackframe, iter::box_empty}; -use crate::ast::{Rule, Expr}; +use crate::interner::{Token, Interner}; +use crate::utils::BoxedIter; +use crate::ast::{Rule, Constant}; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct Import { - pub path: Rc>, + pub path: Token>>, /// If name is None, this is a wildcard import - pub name: Option + pub name: Option> +} +impl Import { + /// Get the preload target space for this import - the prefix below + /// which all files should be included in the compilation + /// + /// Returns the path if this is a glob import, or the path plus the + /// name if this is a specific import + pub fn nonglob_path(&self, i: &Interner) -> Vec> { + let mut path_vec = i.r(self.path).clone(); + if let Some(n) = self.name { + path_vec.push(n) + } + path_vec + } +} + +/// Things that may be prefixed with an export +#[derive(Debug, Clone)] +pub enum Member { + Rule(Rule), + Constant(Constant), + Namespace(Token, Vec) } /// Anything we might encounter in a file -#[derive(Clone)] +#[derive(Debug, Clone)] pub enum FileEntry { Import(Vec), Comment(String), - /// The bool indicates whether the rule is exported, that is, - /// whether tokens uniquely defined inside it should be exported - Rule(Rule, bool), - Export(Vec>>), - LazyModule(Spur) -} - -/// Collect all names that occur in an expression -fn find_all_names_expr( - expr: &Expr -) -> HashSet>> { - let mut ret = HashSet::new(); - expr.visit_names( - Stackframe::new(Rc::default()), - &mut |n| { ret.insert(n); } - ); - ret -} - -/// Collect all exported names (and a lot of other words) from a file -pub fn exported_names( - src: &[FileEntry] -) -> HashSet>> { - src.iter().flat_map(|ent| match ent { - FileEntry::Rule(Rule{source, target, ..}, true) => - box_chain!(source.iter(), target.iter()), - _ => box_empty() - }).flat_map(|e| find_all_names_expr(e)) - .chain( - src.iter().filter_map(|ent| { - if let FileEntry::Export(names) = ent { - Some(names.iter()) - } else {None} - }).flatten().cloned() - ).chain( - src.iter().filter_map(|ent| { - if let FileEntry::LazyModule(lm) = ent { - Some(Rc::new(vec![*lm])) - } else {None} - }) - ).collect() + Exported(Member), + Internal(Member), + Export(Vec>), } /// Summarize all imports from a file in a single list of qualified names -pub fn imports<'a, 'b, I>( - src: I -) -> impl Iterator + 'a -where I: Iterator + 'a { +pub fn imports<'a>( + src: impl Iterator + 'a +) -> impl Iterator + 'a { src.filter_map(|ent| match ent { FileEntry::Import(impv) => Some(impv.iter()), _ => None }).flatten() } +/// Join the various redeclarations of namespaces. +/// Error if they're inconsistently exported +pub fn normalize_namespaces( + src: BoxedIter, i: &Interner +) -> Result, Vec>> { + let (mut namespaces, mut rest) = src + .partition_map::, Vec<_>, _, _, _>(|ent| match ent { + FileEntry::Exported(Member::Namespace(name, body)) + => Either::Left((true, name, body)), + FileEntry::Internal(Member::Namespace(name, body)) + => Either::Left((false, name, body)), + other => Either::Right(other) + }); + // Combine namespace blocks with the same name + namespaces.sort_unstable_by_key(|(_, name, _)| *name); + let mut lumped = namespaces.into_iter() + .group_by(|(_, name, _)| *name).into_iter() + .map(|(name, grp)| { + let mut any_exported = false; + let mut any_internal = false; + let grp_src = grp.into_iter() + .map(|(exported, name, body)| { + if exported {any_exported = true} + else {any_internal = true}; + (name, body) // Impure map is less than ideal but works + }) + .flat_map(|(_, entv)| entv.into_iter()); + // Apply the function to the contents of these blocks too + let data = normalize_namespaces(Box::new(grp_src), i) + .map_err(|mut e| { e.push(name); e })?; + let member = Member::Namespace(name, data); + match (any_exported, any_internal) { + (true, true) => Err(vec![name]), + (true, false) => Ok(FileEntry::Exported(member)), + (false, true) => Ok(FileEntry::Internal(member)), + (false, false) => unreachable!("The group cannot be empty") + } + }) + .collect::, _>>()?; + rest.append(&mut lumped); + Ok(rest) +} + +/// Turn a relative (import) path into an absolute path. +/// If the import path is empty, the return value is also empty. +/// +/// # Errors +/// +/// if the relative path contains more `super` segments than the length +/// of the absolute path. +pub fn absolute_path( + abs_location: &[Token], + rel_path: &[Token], + i: &Interner, + is_child: &impl Fn(Token) -> bool, +) -> Result>, ()> { + let (head, tail) = if let Some(p) = rel_path.split_first() {p} + else {return Ok(vec![])}; + if *head == i.i("super") { + let (_, new_abs) = abs_location.split_last().ok_or(())?; + if tail.len() == 0 {Ok(new_abs.to_vec())} + else {absolute_path(new_abs, tail, i, is_child)} + } else if *head == i.i("self") { + Ok(abs_location.iter() + .chain(tail.iter()) + .copied() + .collect() + ) + } else { + Ok(rel_path.to_vec()) + } +} \ No newline at end of file diff --git a/src/representations/tree.rs b/src/representations/tree.rs new file mode 100644 index 0000000..e602441 --- /dev/null +++ b/src/representations/tree.rs @@ -0,0 +1,133 @@ +use std::ops::Add; +use std::rc::Rc; +use hashbrown::HashMap; + +use crate::interner::Token; +use crate::utils::Substack; + +use super::sourcefile::Import; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ModMember{ + Item(TItem), + Sub(Rc>) +} +impl ModMember { + #[allow(unused)] + pub fn item(&self) -> &TItem { + if let Self::Item(it) = self {it} else { + panic!("Expected item, found submodule") + } + } + + #[allow(unused)] + pub fn sub(&self) -> &Rc> { + if let Self::Sub(sub) = self {sub} else { + panic!("Expected submodule, found item") + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ModEntry{ + pub member: ModMember, + pub exported: bool +} + +/// A module, containing imports, +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Module{ + pub imports: Vec, + pub items: HashMap, ModEntry>, + pub extra: TExt +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum WalkErrorKind { + Private, + Missing +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct WalkError { + pub pos: usize, + pub kind: WalkErrorKind +} + +pub type ModPath<'a> = Substack<'a, Token>; +impl Module { + pub fn walk(self: &Rc, + path: &[Token], require_exported: bool + ) -> Result, WalkError> { + let mut cur = self; + for (pos, step) in path.iter().enumerate() { + if let Some(ModEntry{ + member: ModMember::Sub(next), + exported, + }) = cur.items.get(step) { + if require_exported && !exported { + return Err(WalkError{ pos, kind: WalkErrorKind::Private }) + } + cur = next + } else { + return Err(WalkError{ pos, kind: WalkErrorKind::Missing }) + } + } + Ok(cur.clone()) + } + + fn visit_all_imports_rec(&self, + path: ModPath, + callback: &mut impl FnMut(ModPath, &Self, &Import) -> Result<(), E> + ) -> Result<(), E> { + for import in self.imports.iter() { + callback(path, self, import)? + } + for (name, entry) in self.items.iter() { + if let ModMember::Sub(module) = &entry.member { + module.visit_all_imports_rec( + path.push(*name), + callback + )? + } + } + Ok(()) + } + + pub fn visit_all_imports(&self, + callback: &mut impl FnMut(ModPath, &Self, &Import) -> Result<(), E> + ) -> Result<(), E> { + self.visit_all_imports_rec(Substack::Bottom, callback) + } +} + +impl Add for Module +where TExt: Add +{ + type Output = Self; + + fn add(mut self, rhs: Self) -> Self::Output { + let Module{ extra, imports, items } = rhs; + for (key, right) in items { + // if both contain a submodule + if let Some(left) = self.items.remove(&key) { + if let ModMember::Sub(rsub) = &right.member { + if let ModMember::Sub(lsub) = &left.member { + // merge them with rhs exportedness + let new_mod = lsub.as_ref().clone() + rsub.as_ref().clone(); + self.items.insert(key, ModEntry{ + exported: right.exported, + member: ModMember::Sub(Rc::new(new_mod)) + }); + continue; + } + } + } + // otherwise right shadows left + self.items.insert(key, right); + } + self.imports.extend(imports.into_iter()); + self.extra = self.extra + extra; + self + } +} \ No newline at end of file diff --git a/src/rule/executor/execute.rs b/src/rule/executor/execute.rs deleted file mode 100644 index 23657fe..0000000 --- a/src/rule/executor/execute.rs +++ /dev/null @@ -1,203 +0,0 @@ -use std::iter; -use std::rc::Rc; - -use hashbrown::HashMap; -use mappable_rc::Mrc; - -use crate::unwrap_or; -use crate::utils::{to_mrc_slice, one_mrc_slice, mrc_empty_slice}; -use crate::utils::iter::{box_once, into_boxed_iter}; -use crate::ast::{Expr, Clause}; -use super::slice_matcher::SliceMatcherDnC; -use super::state::{State, Entry}; -use super::super::RuleError; -use super::update_first_seq_rec; - -fn verify_scalar_vec(pattern: &Expr, is_vec: &mut HashMap) --> Result<(), String> { - let verify_clause = |clause: &Clause, is_vec: &mut HashMap| { - match clause { - Clause::Placeh{key, vec} => { - if let Some(known) = is_vec.get(key) { - if known != &vec.is_some() { return Err(key.to_string()) } - } else { - is_vec.insert(key.clone(), vec.is_some()); - } - } - Clause::Auto(name_opt, typ, body) => { - if let Some(name) = name_opt.as_ref() { - if let Clause::Placeh { key, vec } = name.as_ref() { - if vec.is_some() || is_vec.get(key) == Some(&true) { - return Err(key.to_string()) - } - is_vec.insert(key.to_owned(), false); - } - } - typ.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; - body.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; - } - Clause::Lambda(name, typ, body) => { - if let Clause::Placeh { key, vec } = name.as_ref() { - if vec.is_some() || is_vec.get(key) == Some(&true) { - return Err(key.to_string()) - } - is_vec.insert(key.to_owned(), false); - } - typ.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; - body.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; - } - Clause::S(_, body) => { - body.iter().try_for_each(|e| verify_scalar_vec(e, is_vec))?; - } - _ => () - }; - Ok(()) - }; - let Expr(val, typ) = pattern; - verify_clause(val, is_vec)?; - for typ in typ.as_ref() { - verify_clause(typ, is_vec)?; - } - Ok(()) -} - -/// Ensure that src starts and ends with a vectorial placeholder without -/// modifying the meaning of the substitution rule -fn slice_to_vec(src: &mut Rc>, tgt: &mut Rc>) { - let prefix_expr = Expr(Clause::Placeh{ - key: "::prefix".to_string(), - vec: Some((0, false)) - }, Rc::default()); - let postfix_expr = Expr(Clause::Placeh{ - key: "::postfix".to_string(), - vec: Some((0, false)) - }, Rc::default()); - // Prefix or postfix to match the full vector - let head_multi = matches!( - src.first().expect("Src can never be empty!").0, - Clause::Placeh{vec: Some(_), ..} - ); - let tail_multi = matches!( - src.last().expect("Impossible branch!").0, - Clause::Placeh{vec: Some(_), ..} - ); - let prefix_vec = if head_multi {vec![]} else {vec![prefix_expr]}; - let postfix_vec = if tail_multi {vec![]} else {vec![postfix_expr]}; - *src = Rc::new( - prefix_vec.iter() - .chain(src.iter()) - .chain(postfix_vec.iter()) - .cloned().collect() - ); - *tgt = Rc::new( - prefix_vec.iter() - .chain(tgt.iter()) - .chain(postfix_vec.iter()) - .cloned().collect() - ); -} - -/// keep re-probing the input with pred until it stops matching -fn update_all_seqs(input: Rc>, pred: &mut F) --> Option>> -where F: FnMut(Rc>) -> Option>> { - let mut tmp = update_first_seq_rec::exprv(input, pred); - while let Some(xv) = tmp { - tmp = update_first_seq_rec::exprv(xv.clone(), pred); - if tmp.is_none() {return Some(xv)} - } - None -} - -fn write_expr_rec(state: &State, Expr(tpl_clause, tpl_typ): &Expr) --> Box> { - let out_typ = tpl_typ.iter() - .flat_map(|c| write_expr_rec(state, &c.clone().into_expr())) - .map(Expr::into_clause) - .collect::>(); - match tpl_clause { - Clause::Auto(name_opt, typ, body) => box_once(Expr(Clause::Auto( - name_opt.as_ref().and_then(|name| { - if let Clause::Placeh { key, .. } = name { - match &state[key] { - Entry::NameOpt(name) => name.as_ref().map(|s| s.as_ref().to_owned()) - } - } - if let Some(state_key) = name.strip_prefix('$') { - match &state[state_key] { - Entry::NameOpt(name) => name.as_ref().map(|s| s.as_ref().to_owned()), - Entry::Name(name) => Some(name.as_ref().to_owned()), - _ => panic!("Auto template name may only be derived from Auto or Lambda name") - } - } else { - Some(name.to_owned()) - } - }), - write_slice_rec(state, typ), - write_slice_rec(state, body) - ), out_typ.to_owned())), - Clause::Lambda(name, typ, body) => box_once(Expr(Clause::Lambda( - if let Some(state_key) = name.strip_prefix("$_") { - if let Entry::Name(name) = &state[state_key] { - name.as_ref().to_owned() - } else {panic!("Lambda template name may only be derived from Lambda name")} - } else { - name.to_owned() - }, - write_slice_rec(state, typ), - write_slice_rec(state, body) - ), out_typ.to_owned())), - Clause::S(c, body) => box_once(Expr(Clause::S( - *c, - write_slice_rec(state, body) - ), out_typ.to_owned())), - Clause::Placeh{key, vec: None} => { - let real_key = unwrap_or!(key.strip_prefix('_'); key); - match &state[real_key] { - Entry::Scalar(x) => box_once(x.as_ref().to_owned()), - Entry::Name(n) => box_once(Expr(Clause::Name { - local: Some(n.as_ref().to_owned()), - qualified: one_mrc_slice(n.as_ref().to_owned()) - }, mrc_empty_slice())), - _ => panic!("Scalar template may only be derived from scalar placeholder"), - } - }, - Clause::Placeh{key, vec: Some(_)} => if let Entry::Vec(v) = &state[key] { - into_boxed_iter(v.as_ref().to_owned()) - } else {panic!("Vectorial template may only be derived from vectorial placeholder")}, - Clause::Explicit(param) => { - assert!(out_typ.len() == 0, "Explicit should never have a type annotation"); - box_once(Clause::Explicit(Mrc::new( - Clause::from_exprv(write_expr_rec(state, param).collect()) - .expect("Result shorter than template").into_expr() - )).into_expr()) - }, - // Explicit base case so that we get an error if Clause gets new values - c@Clause::P(_) | c@Clause::Name { .. } => box_once(Expr(c.to_owned(), out_typ.to_owned())) - } -} - -/// Fill in a template from a state as produced by a pattern -fn write_slice_rec(state: &State, tpl: &Mrc<[Expr]>) -> Mrc<[Expr]> { - tpl.iter().flat_map(|xpr| write_expr_rec(state, xpr)).collect() -} - -/// Apply a rule (a pair of pattern and template) to an expression -pub fn execute(mut src: Mrc<[Expr]>, mut tgt: Mrc<[Expr]>, input: Mrc<[Expr]>) --> Result>, RuleError> { - // Dimension check - let mut is_vec_db = HashMap::new(); - src.iter().try_for_each(|e| verify_scalar_vec(e, &mut is_vec_db)) - .map_err(RuleError::ScalarVecMismatch)?; - tgt.iter().try_for_each(|e| verify_scalar_vec(e, &mut is_vec_db)) - .map_err(RuleError::ScalarVecMismatch)?; - // Padding - slice_to_vec(&mut src, &mut tgt); - // Generate matcher - let matcher = SliceMatcherDnC::new(src); - let matcher_cache = SliceMatcherDnC::get_matcher_cache(); - Ok(update_all_seqs(Mrc::clone(&input), &mut |p| { - let state = matcher.match_range_cached(p, &matcher_cache)?; - Some(write_slice_rec(&state, &tgt)) - })) -} diff --git a/src/rule/executor/mod.rs b/src/rule/executor/mod.rs deleted file mode 100644 index ba84a09..0000000 --- a/src/rule/executor/mod.rs +++ /dev/null @@ -1,9 +0,0 @@ -mod slice_matcher; -mod state; -mod execute; -mod split_at_max_vec; -mod update_first_seq_rec; - -use state::State; - -pub use execute::execute; diff --git a/src/rule/executor/slice_matcher.rs b/src/rule/executor/slice_matcher.rs deleted file mode 100644 index f17a61f..0000000 --- a/src/rule/executor/slice_matcher.rs +++ /dev/null @@ -1,314 +0,0 @@ -use std::fmt::Debug; - -use mappable_rc::Mrc; - -use crate::ast::{Expr, Clause}; -use crate::unwrap_or; -use crate::utils::iter::box_empty; -use crate::utils::{Side, Cache, mrc_derive, mrc_try_derive, to_mrc_slice}; - -use super::State; -use super::split_at_max_vec::split_at_max_vec; - -/// Tuple with custom cloning logic -// #[derive(Debug, Eq, PartialEq, Hash)] -// pub struct CacheEntry<'a>(Mrc<[Expr]>, &'a SliceMatcherDnC); -// impl<'a> Clone for CacheEntry<'a> { -// fn clone(&self) -> Self { -// let CacheEntry(mrc, matcher) = self; -// CacheEntry(Mrc::clone(mrc), matcher) -// } -// } -// ^^^^ -// This has been removed because the slice-based version needs no custom -// cloning logic. In the next iteration, remove the this altogether. - - -/// Matcher that applies a pattern to a slice via divide-and-conquer -/// -/// Upon construction, it selects the clause of highest priority, then -/// initializes its internal state for matching that clause and delegates -/// the left and right halves of the pattern to two submatchers. -/// -/// Upon matching, it uses a cache to accelerate the process of executing -/// a pattern on the entire tree. -#[derive(Clone, Eq)] -pub struct SliceMatcherDnC { - /// The entire pattern this will match - pattern: Mrc<[Expr]>, - /// The exact clause this can match - clause: Mrc, - /// Matcher for the parts of the pattern right from us - right_subm: Option>, - /// Matcher for the parts of the pattern left from us - left_subm: Option>, - /// Matcher for the body of this clause if it has one. - /// Must be Some if pattern is (Auto, Lambda or S) - body_subm: Option>, - /// Matcher for the type of this expression if it has one (Auto usually does) - /// Optional - typ_subm: Option>, -} - -impl PartialEq for SliceMatcherDnC { - fn eq(&self, other: &Self) -> bool { - self.pattern == other.pattern - } -} - -impl std::hash::Hash for SliceMatcherDnC { - fn hash(&self, state: &mut H) { - self.pattern.hash(state); - } -} - -impl SliceMatcherDnC { - /// If this is true, `clause`, `typ_subm`, `body_subm` and `clause_qual_name` are meaningless. - /// If it's false, it's also false for both side matchers. - pub fn clause_is_vectorial(&self) -> bool { - matches!(self.clause.as_ref(), Clause::Placeh{vec: Some(..), ..}) - } - /// If clause is a name, the qualified name this can match - pub fn clause_qual_name(&self) -> Option>> { - if let Clause::Name(name) = self.clause.as_ref() {Some(name.clone())} else {None} - } - /// If clause is a Placeh, the key in the state the match will be stored at - pub fn state_key(&self) -> Option<&String> { - if let Clause::Placeh { key, .. } = self.clause.as_ref() {Some(key)} else {None} - } - pub fn own_max_size(&self, total: usize) -> Option { - if !self.clause_is_vectorial() { - if total == self.len() {Some(total)} else {None} - } else { - let margin = self.min(Side::Left) + self.min(Side::Right); - if margin + self.own_min_size() <= total {Some(total - margin)} else {None} - } - } - pub fn own_min_size(&self) -> usize { - if let Clause::Placeh { vec: Some((_, nonzero)), .. } = self.clause.as_ref() { - if *nonzero {1} else {0} - } else {self.len()} - } - - /// Enumerate all valid subdivisions based on the reported size constraints of self and - /// the two subranges - pub fn valid_subdivisions<'a>(&'a self, - range: &'a [Expr] - ) -> impl Iterator, Mrc<[Expr]>, Mrc<[Expr]>)> { - let own_max = unwrap_or!(self.own_max_size(range.len()); return box_empty()); - let own_min = self.own_min_size(); - let lmin = self.min(Side::Left); - let _lmax = self.max(Side::Left, range.len()); - let rmin = self.min(Side::Right); - let _rmax = self.max(Side::Right, range.len()); - let full_len = range.len(); - Box::new((own_min..=own_max).rev().flat_map(move |own_len| { - let wiggle = full_len - lmin - rmin - own_len; - let range = Mrc::clone(&range); - (0..=wiggle).map(move |offset| { - let first_break = lmin + offset; - let second_break = first_break + own_len; - let left = mrc_derive(&range, |p| &p[0..first_break]); - let mid = mrc_derive(&range, |p| &p[first_break..second_break]); - let right = mrc_derive(&range, |p| &p[second_break..]); - (left, mid, right) - }) - })) - } - - pub fn new(pattern: Mrc<[Expr]>) -> Self { - let (clause, left_subm, right_subm) = mrc_try_derive(&pattern, |p| { - if p.len() == 1 {Some(&p[0].0)} else {None} - }).map(|e| (e, None, None)) - .or_else(|| split_at_max_vec(Mrc::clone(&pattern)).map(|(left, _, right)| ( - mrc_derive(&pattern, |p| &p[left.len()].0), - if !left.is_empty() {Some(Box::new(Self::new(left)))} else {None}, - if !right.is_empty() {Some(Box::new(Self::new(right)))} else {None} - ))) - .unwrap_or_else(|| ( - mrc_derive(&pattern, |p| &p[0].0), - None, - Some(Box::new(Self::new(mrc_derive(&pattern, |p| &p[1..])))) - )); - Self { - pattern, right_subm, left_subm, - clause: Mrc::clone(&clause), - body_subm: clause.body().map(|b| Box::new(Self::new(b))), - typ_subm: clause.typ().map(|t| Box::new(Self::new(t))) - } - } - - /// The shortest slice this pattern can match - fn len(&self) -> usize { - if self.clause_is_vectorial() { - self.min(Side::Left) + self.min(Side::Right) + self.own_min_size() - } else {self.pattern.len()} - } - /// Pick a subpattern based on the parameter - fn side(&self, side: Side) -> Option<&SliceMatcherDnC> { - match side { - Side::Left => &self.left_subm, - Side::Right => &self.right_subm - }.as_ref().map(|b| b.as_ref()) - } - /// The shortest slice the given side can match - fn min(&self, side: Side) -> usize {self.side(side).map_or(0, |right| right.len())} - /// The longest slice the given side can match - fn max(&self, side: Side, total: usize) -> usize { - self.side(side).map_or(0, |m| if m.clause_is_vectorial() { - total - self.min(side.opposite()) - self.own_min_size() - } else {m.len()}) - } - /// Take the smallest possible slice from the given side - fn slice_min<'a>(&self, side: Side, range: &'a [Expr]) -> &'a [Expr] { - side.slice(self.min(side), range) - } - - /// Matches the body on a range - /// # Panics - /// when called on an instance that does not have a body (not Auto, Lambda or S) - fn match_body<'a>(&'a self, - range: Mrc<[Expr]>, cache: &Cache, Option> - ) -> Option { - self.body_subm.as_ref() - .expect("Missing body matcher") - .match_range_cached(range, cache) - } - /// Matches the type and body on respective ranges - /// # Panics - /// when called on an instance that does not have a body (not Auto, Lambda or S) - fn match_parts<'a>(&'a self, - typ_range: Mrc<[Expr]>, body_range: Mrc<[Expr]>, - cache: &Cache, Option> - ) -> Option { - let typ_state = if let Some(typ) = &self.typ_subm { - typ.match_range_cached(typ_range, cache)? - } else {State::new()}; - let body_state = self.match_body(body_range, cache)?; - typ_state + body_state - } - - /// Match the specified side-submatcher on the specified range with the cache - /// In absence of a side-submatcher empty ranges are matched to empty state - fn apply_side_with_cache<'a>(&'a self, - side: Side, range: Mrc<[Expr]>, - cache: &Cache, Option> - ) -> Option { - match &self.side(side) { - None => { - if !range.is_empty() {None} - else {Some(State::new())} - }, - Some(m) => cache.find(&CacheEntry(range, m)) - } - } - - fn match_range_scalar_cached<'a>(&'a self, - target: Mrc<[Expr]>, - cache: &Cache, Option> - ) -> Option { - let pos = self.min(Side::Left); - if target.len() != self.pattern.len() {return None} - let mut own_state = ( - self.apply_side_with_cache(Side::Left, mrc_derive(&target, |t| &t[0..pos]), cache)? - + self.apply_side_with_cache(Side::Right, mrc_derive(&target, |t| &t[pos+1..]), cache) - )?; - match (self.clause.as_ref(), &target.as_ref()[pos].0) { - (Clause::P(val), Clause::P(tgt)) => { - if val == tgt {Some(own_state)} else {None} - } - (Clause::Placeh{key, vec: None}, tgt_clause) => { - if let Some(real_key) = key.strip_prefix('_') { - if let Clause::Name { local: Some(value), .. } = tgt_clause { - own_state.insert_name(real_key, value) - } else {None} - } else {own_state.insert_scalar(&key, &target[pos])} - } - (Clause::S(c, _), Clause::S(c_tgt, body_range)) => { - if c != c_tgt {return None} - own_state + self.match_parts(to_mrc_slice(vec![]), Mrc::clone(body_range), cache) - } - (Clause::Name{qualified, ..}, Clause::Name{qualified: q_tgt, ..}) => { - if qualified == q_tgt {Some(own_state)} else {None} - } - (Clause::Lambda(name, _, _), Clause::Lambda(name_tgt, typ_tgt, body_tgt)) => { - // Primarily, the name works as a placeholder - if let Some(state_key) = name.strip_prefix('$') { - own_state = own_state.insert_name(state_key, name_tgt)? - } else if name != name_tgt {return None} - // ^ But if you're weird like that, it can also work as a constraint - own_state + self.match_parts(Mrc::clone(typ_tgt), Mrc::clone(body_tgt), cache) - } - (Clause::Auto(name_opt, _, _), Clause::Auto(name_range, typ_range, body_range)) => { - if let Some(name) = name_opt { - // TODO: Enforce this at construction, on a type system level - let state_key = name.strip_prefix('$') - .expect("Auto patterns may only reference, never enforce the name"); - own_state = own_state.insert_name_opt(state_key, name_range.as_ref())? - } - own_state + self.match_parts(Mrc::clone(typ_range), Mrc::clone(body_range), cache) - }, - _ => None - } - } - - /// Match the range with a vectorial _assuming we are a vectorial_ - fn match_range_vectorial_cached<'a>(&'a self, - name: &str, - target: Mrc<[Expr]>, - cache: &Cache, Option> - ) -> Option { - // Step through valid slicings based on reported size constraints in order - // from longest own section to shortest and from left to right - for (left, own, right) in self.valid_subdivisions(target) { - return Some(unwrap_or!( - self.apply_side_with_cache(Side::Left, left, cache) - .and_then(|lres| lres + self.apply_side_with_cache(Side::Right, right, cache)) - .and_then(|side_res| side_res.insert_vec(name, own.as_ref())); - continue - )) - } - None - } - - /// Try and match the specified range - pub fn match_range_cached<'a>(&'a self, - target: Mrc<[Expr]>, - cache: &Cache, Option> - ) -> Option { - if self.pattern.is_empty() { - return if target.is_empty() {Some(State::new())} else {None} - } - if self.clause_is_vectorial() { - let key = self.state_key().expect("Vectorial implies key"); - self.match_range_vectorial_cached(key, target, cache) - } else {self.match_range_scalar_cached(target, cache)} - } - - pub fn get_matcher_cache<'a>() - -> Cache<'a, CacheEntry<'a>, Option> { - Cache::new( - |CacheEntry(tgt, matcher), cache| { - matcher.match_range_cached(tgt, cache) - } - ) - } - - pub fn match_range(&self, target: Mrc<[Expr]>) -> Option { - self.match_range_cached(target, &Self::get_matcher_cache()) - } -} - -impl Debug for SliceMatcherDnC { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Matcher") - .field("clause", &self.clause) - .field("vectorial", &self.clause_is_vectorial()) - .field("min", &self.len()) - .field("left", &self.left_subm) - .field("right", &self.right_subm) - .field("lmin", &self.min(Side::Left)) - .field("rmin", &self.min(Side::Right)) - .finish() - } -} diff --git a/src/rule/executor/split_at_max_vec.rs b/src/rule/executor/split_at_max_vec.rs deleted file mode 100644 index 71fd0fa..0000000 --- a/src/rule/executor/split_at_max_vec.rs +++ /dev/null @@ -1,33 +0,0 @@ -use mappable_rc::Mrc; -use itertools::Itertools; - -use crate::ast::{Expr, Clause}; -use crate::utils::{mrc_derive, mrc_try_derive}; - -pub type MaxVecSplit = (Mrc<[Expr]>, (Mrc, usize, bool), Mrc<[Expr]>); -/// Derive the details of the central vectorial and the two sides from a slice of Expr's -pub fn split_at_max_vec(pattern: Mrc<[Expr]>) -> Option { - let rngidx = pattern.iter().position_max_by_key(|ex| { - if let Expr(Clause::Placeh{vec: Some((prio, _)), ..}, _) = ex { - *prio as i64 - } else { -1 } - })?; - let left = mrc_derive(&pattern, |p| &p[0..rngidx]); - let placeh = mrc_derive(&pattern, |p| &p[rngidx].0); - let right = if rngidx == pattern.len() { - mrc_derive(&pattern, |x| &x[0..1]) - } else { - mrc_derive(&pattern, |x| &x[rngidx + 1..]) - }; - mrc_try_derive(&placeh, |p| { - if let Clause::Placeh{key, vec: Some(_)} = p { - Some(key) - } else {None} // Repeated below on unchanged data - }).map(|key| { - let key = mrc_derive(&key, String::as_str); - if let Clause::Placeh{vec: Some((prio, nonzero)), ..} = placeh.as_ref() { - (left, (key, *prio, *nonzero), right) - } - else {panic!("Impossible branch")} // Duplicate of above - }) -} diff --git a/src/rule/executor/state.rs b/src/rule/executor/state.rs deleted file mode 100644 index 07e3728..0000000 --- a/src/rule/executor/state.rs +++ /dev/null @@ -1,141 +0,0 @@ -use std::{ops::{Add, Index}, rc::Rc, fmt::Debug}; - -use hashbrown::HashMap; -use lasso::Spur; - -use crate::ast::Expr; - -#[derive(PartialEq, Eq)] -pub enum Entry { - Vec(Rc>), - Scalar(Rc), - Name(Rc>), - NameOpt(Option>>) -} - -/// A bucket of indexed expression fragments. Addition may fail if there's a conflict. -#[derive(PartialEq, Eq, Clone)] -pub struct State(HashMap); - -/// Clone without also cloning arbitrarily heavy Expr objects. -/// Key is expected to be a very short string with an allocator overhead close to zero. -impl Clone for Entry { - fn clone(&self) -> Self { - match self { - Self::Name(n) => Self::Name(Rc::clone(n)), - Self::Scalar(x) => Self::Scalar(Rc::clone(x)), - Self::Vec(v) => Self::Vec(Rc::clone(v)), - Self::NameOpt(o) => Self::NameOpt(o.as_ref().map(Rc::clone)) - } - } -} - -impl State { - pub fn new() -> Self { - Self(HashMap::new()) - } - pub fn insert_vec(mut self, k: &S, v: &[Expr]) -> Option - where S: AsRef + ToString + ?Sized + Debug { - if let Some(old) = self.0.get(k.as_ref()) { - if let Entry::Vec(val) = old { - if val.as_slice() != v {return None} - } else {return None} - } else { - self.0.insert(k.to_string(), Entry::Vec(Rc::new(v.to_vec()))); - } - Some(self) - } - pub fn insert_scalar(mut self, k: &S, v: &Expr) -> Option - where S: AsRef + ToString + ?Sized { - if let Some(old) = self.0.get(k.as_ref()) { - if let Entry::Scalar(val) = old { - if val.as_ref() != v {return None} - } else {return None} - } else { - self.0.insert(k.to_string(), Entry::Scalar(Rc::new(v.to_owned()))); - } - Some(self) - } - pub fn insert_name(mut self, k: &S1, v: &[Spur]) -> Option - where - S1: AsRef + ToString + ?Sized - { - if let Some(old) = self.0.get(k.as_ref()) { - if let Entry::Name(val) = old { - if val.as_ref() != v.as_ref() {return None} - } else {return None} - } else { - self.0.insert(k.to_string(), Entry::Name(Rc::new(v.to_vec()))); - } - Some(self) - } - pub fn insert_name_opt(mut self, k: &S1, v: Option<&[Spur]>) - -> Option - where S1: AsRef + ToString + ?Sized - { - if let Some(old) = self.0.get(k.as_ref()) { - if let Entry::NameOpt(val) = old { - if val.as_ref().map(|s| s.as_ref().as_slice()) != v { - return None - } - } else {return None} - } else { - let data = v.map(|s| Rc::new(s.to_vec())); - self.0.insert(k.to_string(), Entry::NameOpt(data)); - } - Some(self) - } - /// Insert a new entry, return None on conflict - pub fn insert_pair(mut self, (k, v): (String, Entry)) -> Option { - if let Some(old) = self.0.get(&k) { - if old != &v {return None} - } else { - self.0.insert(k, v); - } - Some(self) - } - /// Returns `true` if the state contains no data - pub fn empty(&self) -> bool { - self.0.is_empty() - } -} - -impl Add for State { - type Output = Option; - - fn add(mut self, rhs: Self) -> Self::Output { - if self.empty() { - return Some(rhs) - } - for pair in rhs.0 { - self = self.insert_pair(pair)? - } - Some(self) - } -} - -impl Add> for State { - type Output = Option; - - fn add(self, rhs: Option) -> Self::Output { - rhs.and_then(|s| self + s) - } -} - -impl Index for State where S: AsRef { - type Output = Entry; - - fn index(&self, index: S) -> &Self::Output { - return &self.0[index.as_ref()] - } -} - -impl IntoIterator for State { - type Item = (String, Entry); - - type IntoIter = hashbrown::hash_map::IntoIter; - - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} \ No newline at end of file diff --git a/src/rule/executor/update_first_seq_rec.rs b/src/rule/executor/update_first_seq_rec.rs deleted file mode 100644 index c465376..0000000 --- a/src/rule/executor/update_first_seq_rec.rs +++ /dev/null @@ -1,54 +0,0 @@ -use std::rc::Rc; - -use crate::utils::replace_first; -use crate::ast::{Expr, Clause}; - -/// Traverse the tree, calling pred on every sibling list until it returns -/// some vec then replace the sibling list with that vec and return true -/// return false if pred never returned some -pub fn exprv(input: Rc>, pred: &mut F) -> Option>> -where F: FnMut(Rc>) -> Option>> { - if let o@Some(_) = pred(input.clone()) {return o} - replace_first(input.as_ref(), |ex| expr(ex, pred)) - .map(|i| Rc::new(i.collect())) -} - -pub fn expr(Expr(cls, typ): &Expr, pred: &mut F) -> Option -where F: FnMut(Rc>) -> Option>> { - if let Some(t) = clausev(typ.clone(), pred) {return Some(Expr(cls.clone(), t))} - if let Some(c) = clause(cls, pred) {return Some(Expr(c, typ.clone()))} - None -} - -pub fn clausev(input: Rc>, pred: &mut F) -> Option>> -where F: FnMut(Rc>) -> Option>> { - replace_first(input.as_ref(), |c| clause(c, pred)) - .map(|i| Rc::new(i.collect())) -} - -pub fn clause(c: &Clause, pred: &mut F) -> Option -where F: FnMut(Rc>) -> Option>> { - match c { - Clause::P(_) | Clause::Placeh {..} | Clause::Name {..} => None, - Clause::Lambda(n, typ, body) => { - if let Some(b) = exprv(body.clone(), pred) { - return Some(Clause::Lambda(n.clone(), typ.clone(), b)) - } - if let Some(t) = exprv(typ.clone(), pred) { - return Some(Clause::Lambda(n.clone(), t, body.clone())) - } - None - } - Clause::Auto(n, typ, body) => { - if let Some(b) = exprv(body.clone(), pred) { - return Some(Clause::Auto(n.clone(), typ.clone(), b)) - } - if let Some(t) = exprv(typ.clone(), pred) { - return Some(Clause::Auto(n.clone(), t, body.clone())) - } - None - } - Clause::S(c, body) => Some(Clause::S(*c, exprv(body.clone(), pred)?)), - Clause::Explicit(t) => Some(Clause::Explicit(Rc::new(expr(t, pred)?))) - } -} \ No newline at end of file diff --git a/src/rule/matcher.rs b/src/rule/matcher.rs new file mode 100644 index 0000000..303b565 --- /dev/null +++ b/src/rule/matcher.rs @@ -0,0 +1,10 @@ +use std::rc::Rc; + +use crate::ast::Expr; + +use super::state::State; + +pub trait Matcher { + fn new(pattern: Rc>) -> Self; + fn apply<'a>(&self, source: &'a [Expr]) -> Option>; +} \ No newline at end of file diff --git a/src/rule/matcher_second/any_match.rs b/src/rule/matcher_second/any_match.rs new file mode 100644 index 0000000..904b96b --- /dev/null +++ b/src/rule/matcher_second/any_match.rs @@ -0,0 +1,19 @@ +use crate::{ast::Expr, rule::state::State}; + +use super::{shared::AnyMatcher, scal_match::scalv_match, vec_match::vec_match}; + +pub fn any_match<'a>(matcher: &AnyMatcher, seq: &'a [Expr]) +-> Option> +{ + match matcher { + AnyMatcher::Scalar(scalv) => scalv_match(scalv, seq), + AnyMatcher::Vec{ left, mid, right } => { + let left_split = left.len(); + let right_split = seq.len() - right.len(); + let mut state = scalv_match(left, &seq[..left_split])?; + state.extend(scalv_match(right, &seq[right_split..])?); + state.extend(vec_match(mid, &seq[left_split..right_split])?); + Some(state) + } + } +} \ No newline at end of file diff --git a/src/rule/matcher_second/build.rs b/src/rule/matcher_second/build.rs new file mode 100644 index 0000000..0ae1030 --- /dev/null +++ b/src/rule/matcher_second/build.rs @@ -0,0 +1,159 @@ +use itertools::Itertools; + +use crate::{rule::vec_attrs::vec_attrs, ast::Clause}; +use crate::utils::Side; +use crate::interner::Token; +use crate::ast::{Expr, Placeholder, PHClass}; + +use super::shared::{AnyMatcher, ScalMatcher, VecMatcher}; + + +pub type MaxVecSplit<'a> = (&'a [Expr], (Token, u64, bool), &'a [Expr]); + +/// Derive the details of the central vectorial and the two sides from a slice of Expr's +fn split_at_max_vec(pattern: &[Expr]) -> Option { + let rngidx = pattern.iter() + .position_max_by_key(|expr| { + vec_attrs(expr) + .map(|attrs| attrs.1 as i64) + .unwrap_or(-1) + })?; + let (left, not_left) = pattern.split_at(rngidx); + let (placeh, right) = not_left.split_first() + .expect("The index of the greatest element must be less than the length"); + vec_attrs(placeh) + .map(|attrs| (left, attrs, right)) +} + +fn scal_cnt<'a>(iter: impl Iterator) -> usize { + iter + .take_while(|expr| vec_attrs(expr).is_none()) + .count() +} + +/// Recursively convert this pattern into a matcher that can be +/// efficiently applied to slices. +pub fn mk_matcher(pattern: &[Expr]) -> AnyMatcher { + println!("matcher from {:?}", pattern); + let left_split = scal_cnt(pattern.iter()); + if pattern.len() <= left_split { + return AnyMatcher::Scalar(mk_scalv(pattern)) + } + let (left, not_left) = pattern.split_at(left_split); + let right_split = not_left.len() - scal_cnt(pattern.iter().rev()); + let (mid, right) = not_left.split_at(right_split); + AnyMatcher::Vec { + left: mk_scalv(left), + mid: mk_vec(mid), + right: mk_scalv(right), + } +} + +/// Pattern MUST NOT contain vectorial placeholders +fn mk_scalv(pattern: &[Expr]) -> Vec { + println!("Scalv from {:?}", pattern); + pattern.iter().map(mk_scalar).collect() +} + +/// Pattern MUST start and end with a vectorial placeholder +fn mk_vec(pattern: &[Expr]) -> VecMatcher { + println!("Vec from {:?}", pattern); + debug_assert!(!pattern.is_empty(), "pattern cannot be empty"); + debug_assert!(pattern.first().map(vec_attrs).is_some(), "pattern must start with a vectorial"); + debug_assert!(pattern.last().map(vec_attrs).is_some(), "pattern must end with a vectorial"); + let (left, (key, prio, nonzero), right) = split_at_max_vec(pattern) + .expect("pattern must have vectorial placeholders at least at either end"); + if prio >= 1 {println!("Nondefault priority {} found", prio)} + let r_sep_size = scal_cnt(right.iter()); + let (r_sep, r_side) = right.split_at(r_sep_size); + let l_sep_size = scal_cnt(left.iter().rev()); + let (l_side, l_sep) = left.split_at(left.len() - l_sep_size); + let main = VecMatcher::Placeh { key, nonzero }; + match (left, right) { + (&[], &[]) => VecMatcher::Placeh { key, nonzero }, + (&[], _) => VecMatcher::Scan { + direction: Side::Left, + left: Box::new(main), + sep: mk_scalv(r_sep), + right: Box::new(mk_vec(r_side)), + }, + (_, &[]) => VecMatcher::Scan { + direction: Side::Right, + left: Box::new(mk_vec(l_side)), + sep: mk_scalv(l_sep), + right: Box::new(main), + }, + (_, _) => { + let mut key_order = l_side.iter() + .chain(r_side.iter()) + .filter_map(|e| vec_attrs(e)) + .collect::>(); + key_order.sort_by_key(|(_, prio, _)| -(*prio as i64)); + VecMatcher::Middle { + left: Box::new(mk_vec(l_side)), + left_sep: mk_scalv(l_sep), + mid: Box::new(main), + right_sep: mk_scalv(r_sep), + right: Box::new(mk_vec(r_side)), + key_order: key_order.into_iter().map(|(n, ..)| n).collect() + } + } + } +} + +/// Pattern MUST NOT be a vectorial placeholder +fn mk_scalar(pattern: &Expr) -> ScalMatcher { + println!("Scalar from {:?}", pattern); + match &pattern.value { + Clause::P(p) => ScalMatcher::P(p.clone()), + Clause::Name(n) => ScalMatcher::Name(*n), + Clause::Placeh(Placeholder{ name, class }) => { + debug_assert!(!matches!(class, PHClass::Vec{..}), "Scalar matcher cannot be built from vector pattern"); + ScalMatcher::Placeh(*name) + } + Clause::S(c, body) => ScalMatcher::S(*c, Box::new(mk_matcher(&body))), + Clause::Lambda(arg, body) => ScalMatcher::Lambda( + Box::new(mk_scalar(&arg)), + Box::new(mk_matcher(&body)) + ) + } +} + +#[cfg(test)] +mod test { + use std::rc::Rc; + + use crate::interner::{Interner, InternedDisplay}; + use crate::ast::{Clause, Placeholder, PHClass}; + + use super::mk_matcher; + + #[test] + fn test_scan() { + let i = Interner::new(); + let pattern = vec![ + Clause::Placeh(Placeholder{ + class: PHClass::Vec{ nonzero: false, prio: 0 }, + name: i.i("::prefix"), + }).into_expr(), + Clause::Name(i.i(&[i.i("prelude"), i.i("do")][..])).into_expr(), + Clause::S('(', Rc::new(vec![ + Clause::Placeh(Placeholder{ + class: PHClass::Vec{ nonzero: false, prio: 0 }, + name: i.i("expr"), + }).into_expr(), + Clause::Name(i.i(&[i.i("prelude"), i.i(";")][..])).into_expr(), + Clause::Placeh(Placeholder { + class: PHClass::Vec{ nonzero: false, prio: 1 }, + name: i.i("rest"), + }).into_expr() + ])).into_expr(), + Clause::Placeh(Placeholder { + class: PHClass::Vec{ nonzero: false, prio: 0 }, + name: i.i("::suffix"), + }).into_expr(), + ]; + let matcher = mk_matcher(&pattern); + println!("{}", matcher.bundle(&i)); + } +} \ No newline at end of file diff --git a/src/rule/matcher_second/mod.rs b/src/rule/matcher_second/mod.rs new file mode 100644 index 0000000..06a5bee --- /dev/null +++ b/src/rule/matcher_second/mod.rs @@ -0,0 +1,22 @@ +/* +Construction: +convert pattern into hierarchy of plain, scan, middle + - plain: accept any sequence or any non-empty sequence + - scan: a single scalar pattern moves LTR or RTL, submatchers on either + side + - middle: two scalar patterns walk over all permutations of matches + while getting progressively closer to each other + +Application: +walk over the current matcher's valid options and poll the submatchers + for each of them +*/ + +mod shared; +mod vec_match; +mod scal_match; +mod any_match; +mod build; + +pub use shared::AnyMatcher; +pub use build::mk_matcher; \ No newline at end of file diff --git a/src/rule/matcher_second/scal_match.rs b/src/rule/matcher_second/scal_match.rs new file mode 100644 index 0000000..3f9f35c --- /dev/null +++ b/src/rule/matcher_second/scal_match.rs @@ -0,0 +1,32 @@ +use crate::{rule::state::{State, StateEntry}, ast::{Expr, Clause}}; + +use super::{shared::ScalMatcher, any_match::any_match}; + +pub fn scal_match<'a>(matcher: &ScalMatcher, expr: &'a Expr) +-> Option> { + match (matcher, &expr.value) { + (ScalMatcher::P(p1), Clause::P(p2)) if p1 == p2 => Some(State::new()), + (ScalMatcher::Name(n1), Clause::Name(n2)) if n1 == n2 + => Some(State::new()), + (ScalMatcher::Placeh(key), _) + => Some(State::from([(*key, StateEntry::Scalar(expr))])), + (ScalMatcher::S(c1, b_mat), Clause::S(c2, body)) if c1 == c2 + => any_match(b_mat, &body[..]), + (ScalMatcher::Lambda(arg_mat, b_mat), Clause::Lambda(arg, body)) => { + let mut state = scal_match(&*arg_mat, &*arg)?; + state.extend(any_match(&*b_mat, &*body)?); + Some(state) + } + _ => None + } +} + +pub fn scalv_match<'a>(matchers: &[ScalMatcher], seq: &'a [Expr]) +-> Option> { + if seq.len() != matchers.len() {return None} + let mut state = State::new(); + for (matcher, expr) in matchers.iter().zip(seq.iter()) { + state.extend(scal_match(matcher, expr)?); + } + Some(state) +} \ No newline at end of file diff --git a/src/rule/matcher_second/shared.rs b/src/rule/matcher_second/shared.rs new file mode 100644 index 0000000..38bfe8a --- /dev/null +++ b/src/rule/matcher_second/shared.rs @@ -0,0 +1,163 @@ +use std::fmt::Write; +use std::rc::Rc; + +use crate::ast::Expr; +use crate::rule::{matcher::Matcher, state::State}; +use crate::unwrap_or; +use crate::utils::{Side, print_nname}; +use crate::interner::{Token, InternedDisplay, Interner}; +use crate::representations::Primitive; + +use super::{build::mk_matcher, any_match::any_match}; + +pub enum ScalMatcher { + P(Primitive), + Name(Token>>), + S(char, Box), + Lambda(Box, Box), + Placeh(Token), +} + +pub enum VecMatcher { + Placeh{ + key: Token, + nonzero: bool + }, + Scan{ + left: Box, + sep: Vec, + right: Box, + /// The separator traverses the sequence towards this side + direction: Side + }, + Middle{ + /// Matches the left outer region + left: Box, + /// Matches the left separator + left_sep: Vec, + /// Matches the middle - can only ever be a plain placeholder + mid: Box, + /// Matches the right separator + right_sep: Vec, + /// Matches the right outer region + right: Box, + /// Order of significance for sorting equally good solutions based on + /// the length of matches on either side. + /// + /// Vectorial keys that appear on either side, in priority order + key_order: Vec> + } +} + +pub enum AnyMatcher { + Scalar(Vec), + Vec{ + left: Vec, + mid: VecMatcher, + right: Vec + } +} +impl Matcher for AnyMatcher { + fn new(pattern: Rc>) -> Self { + mk_matcher(&pattern) + } + + fn apply<'a>(&self, source: &'a [Expr]) -> Option> { + any_match(self, source) + } +} + +// ################ InternedDisplay ################ + +fn disp_scalv( + scalv: &Vec, + f: &mut std::fmt::Formatter<'_>, + i: &Interner +) -> std::fmt::Result { + let (head, tail) = unwrap_or!(scalv.split_first(); return Ok(())); + head.fmt_i(f, i)?; + for s in tail.iter() { + write!(f, " ")?; + s.fmt_i(f, i)?; + } + Ok(()) +} + +impl InternedDisplay for ScalMatcher { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result { + match self { + Self::P(p) => write!(f, "{:?}", p), + Self::Placeh(n) => write!(f, "${}", i.r(*n)), + Self::Name(n) => write!(f, "{}", print_nname(*n, i)), + Self::S(c, body) => { + f.write_char(*c)?; + body.fmt_i(f, i)?; + f.write_char(match c {'('=>')','['=>']','{'=>'}',_=>unreachable!()}) + }, + Self::Lambda(arg, body) => { + f.write_char('\\')?; + arg.fmt_i(f, i)?; + f.write_char('.')?; + body.fmt_i(f, i) + } + } + } +} + +impl InternedDisplay for VecMatcher { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result { + match self { + Self::Placeh { key, nonzero } => { + if *nonzero {f.write_char('.')?;}; + write!(f, "..${}", i.r(*key)) + } + Self::Scan { left, sep, right, direction } => { + let arrow = match direction { + Side::Left => " <== ", + Side::Right => " ==> " + }; + write!(f, "Scan{{")?; + left.fmt_i(f, i)?; + f.write_str(arrow)?; + disp_scalv(sep, f, i)?; + f.write_str(arrow)?; + right.fmt_i(f, i)?; + write!(f, "}}") + }, + Self::Middle { left, left_sep, mid, right_sep, right, .. } => { + write!(f, "Middle{{")?; + left.fmt_i(f, i)?; + f.write_str("|")?; + disp_scalv(left_sep, f, i)?; + f.write_str("|")?; + mid.fmt_i(f, i)?; + f.write_str("|")?; + disp_scalv(right_sep, f, i)?; + f.write_str("|")?; + right.fmt_i(f, i)?; + write!(f, "}}") + } + } + } +} + +impl InternedDisplay for AnyMatcher { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result { + match self { + Self::Scalar(s) => { + write!(f, "(")?; + disp_scalv(s, f, i)?; + write!(f, ")") + } + Self::Vec { left, mid, right } => { + write!(f, "[")?; + disp_scalv(left, f, i)?; + write!(f, "|")?; + mid.fmt_i(f, i)?; + write!(f, "|")?; + disp_scalv(right, f, i)?; + write!(f, "]") + } + } + } +} \ No newline at end of file diff --git a/src/rule/matcher_second/vec_match.rs b/src/rule/matcher_second/vec_match.rs new file mode 100644 index 0000000..488e80e --- /dev/null +++ b/src/rule/matcher_second/vec_match.rs @@ -0,0 +1,90 @@ +use std::cmp::Ordering; + +use itertools::Itertools; + +use crate::unwrap_or; +use crate::ast::Expr; +use crate::rule::state::{State, StateEntry}; + +use super::scal_match::scalv_match; +use super::shared::VecMatcher; + +pub fn vec_match<'a>(matcher: &VecMatcher, seq: &'a [Expr]) +-> Option> { + match matcher { + VecMatcher::Placeh { key, nonzero } => { + if *nonzero && seq.len() == 0 {return None} + return Some(State::from([(*key, StateEntry::Vec(seq))])) + } + VecMatcher::Scan { left, sep, right, direction } => { + if seq.len() < sep.len() {return None} + for lpos in direction.walk(0..=seq.len() - sep.len()) { + let rpos = lpos + sep.len(); + let mut state = unwrap_or!(vec_match(left, &seq[..lpos]); continue); + state.extend(unwrap_or!(scalv_match(sep, &seq[lpos..rpos]); continue)); + state.extend(unwrap_or!(vec_match(right, &seq[rpos..]); continue)); + return Some(state) + } + None + } + // XXX predict heap space usage and allocation count + VecMatcher::Middle { left, left_sep, mid, right_sep, right, key_order } => { + if seq.len() < left_sep.len() + right_sep.len() {return None} + // Valid locations for the left separator + let lposv = seq[..seq.len() - right_sep.len()] + .windows(left_sep.len()) + .enumerate() + .filter_map(|(i, window)| { + scalv_match(left_sep, window).map(|s| (i, s)) + }) + .collect::>(); + // Valid locations for the right separator + let rposv = seq[left_sep.len()..] + .windows(right_sep.len()) + .enumerate() + .filter_map(|(i, window)| { + scalv_match(right_sep, window).map(|s| (i, s)) + }) + .collect::>(); + // Valid combinations of locations for the separators + let mut pos_pairs = lposv.into_iter() + .cartesian_product(rposv.into_iter()) + .filter(|((lpos, _), (rpos, _))| lpos + left_sep.len() <= *rpos) + .map(|((lpos, mut lstate), (rpos, rstate))| { + lstate.extend(rstate); + (lpos, rpos, lstate) + }) + .collect::>(); + // In descending order of size + pos_pairs.sort_by_key(|(l, r, _)| -((r - l) as i64)); + let eql_clusters = pos_pairs.into_iter() + .group_by(|(al, ar, _)| ar - al); + for (_gap_size, cluster) in eql_clusters.into_iter() { + let best_candidate = cluster.into_iter() + .filter_map(|(lpos, rpos, mut state)| { + state.extend(vec_match(left, &seq[..lpos])?); + state.extend(vec_match(mid, &seq[lpos + left_sep.len()..rpos])?); + state.extend(vec_match(right, &seq[rpos + right_sep.len()..])?); + Some(state) + }) + .max_by(|a, b| { + for key in key_order { + let aslc = if let Some(StateEntry::Vec(s)) = a.get(key) {s} + else {panic!("key_order references scalar or missing")}; + let bslc = if let Some(StateEntry::Vec(s)) = b.get(key) {s} + else {panic!("key_order references scalar or missing")}; + match aslc.len().cmp(&bslc.len()) { + Ordering::Equal => (), + any => return any + } + } + Ordering::Equal + }); + if let Some(state) = best_candidate { + return Some(state) + } + } + None + } + } +} \ No newline at end of file diff --git a/src/rule/mod.rs b/src/rule/mod.rs index c169aca..60f404f 100644 --- a/src/rule/mod.rs +++ b/src/rule/mod.rs @@ -1,7 +1,15 @@ -mod executor; +// mod executor; mod rule_error; mod repository; +mod prepare_rule; +mod matcher; +mod update_first_seq; +mod state; +mod matcher_second; +mod vec_attrs; // pub use rule::Rule; pub use rule_error::RuleError; -pub use repository::Repository; +pub use repository::{Repository, Repo}; + +pub use matcher_second::AnyMatcher; \ No newline at end of file diff --git a/src/rule/prepare_rule.rs b/src/rule/prepare_rule.rs new file mode 100644 index 0000000..b413a68 --- /dev/null +++ b/src/rule/prepare_rule.rs @@ -0,0 +1,120 @@ +use std::rc::Rc; + +use hashbrown::HashMap; +use itertools::Itertools; + +use crate::representations::location::Location; +use crate::interner::{Token, Interner}; +use crate::ast::{PHClass, Expr, Clause, Placeholder, Rule}; + +use super::RuleError; +use super::vec_attrs::vec_attrs; + +/// Ensure that the rule's source begins and ends with a vectorial without +/// changing its meaning +fn pad(mut rule: Rule, i: &Interner) -> Rule { + let class: PHClass = PHClass::Vec { nonzero: false, prio: 0 }; + let empty_exprv: &[Expr] = &[]; + let prefix_exprv: &[Expr] = &[Expr{ + location: Location::Unknown, + value: Clause::Placeh(Placeholder{ + name: i.i("::prefix"), + class + }), + }]; + let suffix_exprv: &[Expr] = &[Expr{ + location: Location::Unknown, + value: Clause::Placeh(Placeholder{ + name: i.i("::suffix"), + class + }), + }]; + let rule_head = rule.source.first().expect("Src can never be empty!"); + let head_explicit = vec_attrs(rule_head).is_some(); + let rule_tail = rule.source.last().expect("Unreachable branch!"); + let tail_explicit = vec_attrs(rule_tail).is_some(); + let prefix_vec = if head_explicit {empty_exprv} else {prefix_exprv}; + let suffix_vec = if tail_explicit {empty_exprv} else {suffix_exprv}; + rule.source = Rc::new( + prefix_vec.iter() + .chain(rule.source.iter()) + .chain(suffix_vec.iter()) + .cloned() + .collect() + ); + rule.target = Rc::new( + prefix_vec.iter() + .chain(rule.target.iter()) + .chain(suffix_vec.iter()) + .cloned() + .collect() + ); + rule +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum PHType { + Scalar, + Vec{ nonzero: bool } +} +impl From for PHType { + fn from(value: PHClass) -> Self { + match value { + PHClass::Scalar => Self::Scalar, + PHClass::Vec { nonzero, .. } => Self::Vec{ nonzero } + } + } +} + +fn check_rec_expr( + expr: &Expr, + types: &mut HashMap, PHType>, + in_template: bool +) -> Result<(), RuleError> { + match &expr.value { + Clause::Name(_) | Clause::P(_) => Ok(()), + Clause::Placeh(Placeholder{ name, class }) => { + let typ = class.clone().into(); + // in a template, the type must be known and identical + // outside template (in pattern) the type must be unknown + if let Some(known) = types.insert(*name, typ) { + if !in_template { Err(RuleError::Multiple(*name)) } + else if known != typ { Err(RuleError::TypeMismatch(*name)) } + else { Ok(()) } + } else if in_template { Err(RuleError::Missing(*name)) } + else { Ok(()) } + } + Clause::Lambda(arg, body) => { + check_rec_expr(arg.as_ref(), types, in_template)?; + check_rec_exprv(&body, types, in_template) + } + Clause::S(_, body) => check_rec_exprv(&body, types, in_template) + } +} + +fn check_rec_exprv( + exprv: &[Expr], + types: &mut HashMap, PHType>, + in_template: bool +) -> Result<(), RuleError> { + for (l, r) in exprv.iter().tuple_windows::<(_, _)>() { + check_rec_expr(l, types, in_template)?; + if !in_template { // in a pattern vectorials cannot follow each other + if let (Some(ld), Some(rd)) = (vec_attrs(l), vec_attrs(r)) { + return Err(RuleError::VecNeighbors(ld.0, rd.0)) + } + } + } + if let Some(e) = exprv.last() { + check_rec_expr(e, types, in_template) + } else { Ok(()) } +} + +pub fn prepare_rule(rule: Rule, i: &Interner) -> Result { + // Dimension check + let mut types = HashMap::new(); + check_rec_exprv(&rule.source, &mut types, false)?; + check_rec_exprv(&rule.target, &mut types, true)?; + // Padding + Ok(pad(rule, i)) +} \ No newline at end of file diff --git a/src/rule/repository.rs b/src/rule/repository.rs index efc7df1..f7fba2d 100644 --- a/src/rule/repository.rs +++ b/src/rule/repository.rs @@ -1,62 +1,137 @@ -use std::fmt::Debug; +use std::rc::Rc; +use std::fmt::{Debug, Write}; -use mappable_rc::Mrc; +use hashbrown::HashSet; -use crate::representations::ast::Expr; +use crate::interner::{Token, Interner, InternedDisplay}; +use crate::utils::Substack; +use crate::ast::{Expr, Rule}; -use super::{super::ast::Rule, executor::execute, RuleError}; +use super::state::apply_exprv; +use super::{update_first_seq, AnyMatcher}; +use super::matcher::Matcher; +use super::prepare_rule::prepare_rule; +use super::RuleError; + +#[derive(Debug)] +pub struct CachedRule { + matcher: M, + source: Rc>, + template: Rc> +} + +impl InternedDisplay for CachedRule { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result { + for item in self.source.iter() { + item.fmt_i(f, i)?; + f.write_char(' ')?; + } + write!(f, "is matched by ")?; + self.matcher.fmt_i(f, i) + } +} /// Manages a priority queue of substitution rules and allows to apply them -pub struct Repository(Vec); -impl Repository { - pub fn new(mut rules: Vec) -> Self { +pub struct Repository { + cache: Vec<(CachedRule, HashSet>>>)> +} +impl Repository { + pub fn new(mut rules: Vec, i: &Interner) + -> Result + { rules.sort_by_key(|r| -r.prio); - Self(rules) - } - - pub fn step(&self, code: Mrc<[Expr]>) -> Result>, RuleError> { - for rule in self.0.iter() { - if let Some(out) = execute( - Mrc::clone(&rule.source), Mrc::clone(&rule.target), - Mrc::clone(&code) - )? {return Ok(Some(out))} - } - Ok(None) + let cache = rules.into_iter() + .map(|r| { + let rule = prepare_rule(r.clone(), i) + .map_err(|e| (r, e))?; + let mut glossary = HashSet::new(); + for e in rule.source.iter() { + e.visit_names(Substack::Bottom, &mut |op| { + glossary.insert(op); + }) + } + let matcher = M::new(rule.source.clone()); + let prep = CachedRule{ + matcher, + source: rule.source, + template: rule.target + }; + Ok((prep, glossary)) + }) + .collect::, _>>()?; + Ok(Self{cache}) } /// Attempt to run each rule in priority order once - pub fn pass(&self, mut code: Mrc<[Expr]>) -> Result>, RuleError> { - let mut ran_once = false; - for rule in self.0.iter() { - if let Some(tmp) = execute( - Mrc::clone(&rule.source), Mrc::clone(&rule.target), - Mrc::clone(&code) - )? { - ran_once = true; - code = tmp; - } + pub fn step(&self, code: &Expr) -> Option { + let mut glossary = HashSet::new(); + code.visit_names(Substack::Bottom, &mut |op| { glossary.insert(op); }); + // println!("Glossary for code: {:?}", print_nname_seq(glossary.iter(), i)); + for (rule, deps) in self.cache.iter() { + if !deps.is_subset(&glossary) { continue; } + let product = update_first_seq::expr(code, &mut |exprv| { + let state = rule.matcher.apply(exprv.as_slice())?; + let result = apply_exprv(&rule.template, &state); + Some(Rc::new(result)) + }); + if let Some(newcode) = product {return Some(newcode)} } - Ok(if ran_once {Some(code)} else {None}) + None + } + + /// Keep running the matching rule with the highest priority until no + /// rules match. WARNING: this function might not terminate + #[allow(unused)] + pub fn pass(&self, code: &Expr) -> Option { + todo!() + // if let Some(mut processed) = self.step(code) { + // while let Some(out) = self.step(&processed) { + // processed = out + // } + // Some(processed) + // } else {None} } /// Attempt to run each rule in priority order `limit` times. Returns the final /// tree and the number of iterations left to the limit. - pub fn long_step(&self, mut code: Mrc<[Expr]>, mut limit: usize) - -> Result<(Mrc<[Expr]>, usize), RuleError> { - while let Some(tmp) = self.pass(Mrc::clone(&code))? { - if 0 >= limit {break} - limit -= 1; - code = tmp - } - Ok((code, limit)) + #[allow(unused)] + pub fn long_step(&self, code: &Expr, mut limit: usize) + -> Result<(Expr, usize), RuleError> + { + todo!() + // if limit == 0 {return Ok((code.clone(), 0))} + // if let Some(mut processed) = self.step(code) { + // limit -= 1; + // if limit == 0 {return Ok((processed.clone(), 0))} + // while let Some(out) = self.step(&processed) { + // limit -= 1; + // if limit == 0 { return Ok((out, 0)) } + // processed = out; + // } + // Ok((processed, limit)) + // } else {Ok((code.clone(), limit))} } } -impl Debug for Repository { +impl Debug for Repository { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for rule in self.0.iter() { + for rule in self.cache.iter() { writeln!(f, "{rule:?}")? } Ok(()) } } + +impl InternedDisplay for Repository { + fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result { + writeln!(f, "Repository[")?; + for (item, _) in self.cache.iter() { + write!(f, "\t")?; + item.fmt_i(f, i)?; + writeln!(f)?; + } + write!(f, "]") + } +} + +pub type Repo = Repository; \ No newline at end of file diff --git a/src/rule/rule_error.rs b/src/rule/rule_error.rs index 5ba1bb2..584309b 100644 --- a/src/rule/rule_error.rs +++ b/src/rule/rule_error.rs @@ -1,18 +1,36 @@ -use std::{fmt, error::Error}; +use std::fmt; -#[derive(Debug, Clone, PartialEq, Eq)] +use crate::interner::{Token, InternedDisplay, Interner}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum RuleError { - BadState(String), - ScalarVecMismatch(String) + Missing(Token), + TypeMismatch(Token), + /// Multiple occurences of a placeholder in a pattern are no longer + /// supported. + Multiple(Token), + VecNeighbors(Token, Token), } -impl fmt::Display for RuleError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::BadState(key) => write!(f, "Key {:?} not in match pattern", key), - Self::ScalarVecMismatch(key) => - write!(f, "Key {:?} used inconsistently with and without ellipsis", key) +impl InternedDisplay for RuleError { + fn fmt_i(&self, f: &mut fmt::Formatter<'_>, i: &Interner) -> fmt::Result { + match *self { + Self::Missing(key) => write!(f, + "Key {:?} not in match pattern", + i.r(key) + ), + Self::TypeMismatch(key) => write!(f, + "Key {:?} used inconsistently with and without ellipsis", + i.r(key) + ), + Self::Multiple(key) => write!(f, + "Key {:?} appears multiple times in match pattern", + i.r(key) + ), + Self::VecNeighbors(left, right) => write!(f, + "Keys {:?} and {:?} are two vectorials right next to each other", + i.r(left), i.r(right) + ) } } -} -impl Error for RuleError {} \ No newline at end of file +} \ No newline at end of file diff --git a/src/rule/state.rs b/src/rule/state.rs new file mode 100644 index 0000000..a4d7240 --- /dev/null +++ b/src/rule/state.rs @@ -0,0 +1,51 @@ +use std::rc::Rc; + +use hashbrown::HashMap; +use itertools::Itertools; + +use crate::interner::Token; +use crate::ast::{Expr, Clause, Placeholder, PHClass}; + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum StateEntry<'a> { + Vec(&'a[Expr]), + Scalar(&'a Expr) +} +pub type State<'a> = HashMap, StateEntry<'a>>; + +pub fn apply_exprv(template: &[Expr], state: &State) -> Vec { + template.iter() + .map(|e| apply_expr(e, state)) + .flat_map(Vec::into_iter) + .collect() +} + +pub fn apply_expr(template: &Expr, state: &State) -> Vec { + let Expr{ location, value } = template; + match value { + Clause::P(_) | Clause::Name(_) => vec![template.clone()], + Clause::S(c, body) => vec![Expr{ + location: location.clone(), + value: Clause::S(*c, Rc::new(apply_exprv(body.as_slice(), state))) + }], + Clause::Placeh(Placeholder{ name, class }) => { + let value = if let Some(&v) = state.get(name) {v} + else {panic!("Placeholder does not have a value in state")}; + match (class, value) { + (PHClass::Scalar, StateEntry::Scalar(item)) => vec![item.clone()], + (PHClass::Vec{..}, StateEntry::Vec(chunk)) => chunk.to_vec(), + _ => panic!("Type mismatch between template and state") + } + } + Clause::Lambda(arg, body) => vec![Expr{ + location: location.clone(), + value: Clause::Lambda( + Rc::new(apply_expr(arg.as_ref(), state).into_iter() + .exactly_one() + .expect("Lambda arguments can only ever be scalar") + ), + Rc::new(apply_exprv(&body[..], state)) + ) + }] + } +} diff --git a/src/rule/update_first_seq.rs b/src/rule/update_first_seq.rs new file mode 100644 index 0000000..1575726 --- /dev/null +++ b/src/rule/update_first_seq.rs @@ -0,0 +1,36 @@ +use std::rc::Rc; + +use crate::utils::replace_first; +use crate::ast::{Expr, Clause}; + +/// Traverse the tree, calling pred on every sibling list until it returns +/// some vec then replace the sibling list with that vec and return true +/// return false if pred never returned some +pub fn exprv(input: Rc>, pred: &mut F) -> Option>> +where F: FnMut(Rc>) -> Option>> { + if let o@Some(_) = pred(input.clone()) {return o} + replace_first(input.as_ref(), |ex| expr(ex, pred)) + .map(|i| Rc::new(i.collect())) +} + +pub fn expr(input: &Expr, pred: &mut F) -> Option +where F: FnMut(Rc>) -> Option>> { + if let Some(value) = clause(&input.value, pred) { + Some(Expr{ value, location: input.location.clone() }) + } else {None} +} + +pub fn clause(c: &Clause, pred: &mut F) -> Option +where F: FnMut(Rc>) -> Option>> { + match c { + Clause::P(_) | Clause::Placeh {..} | Clause::Name {..} => None, + Clause::Lambda(arg, body) => { + if let Some(arg) = expr(arg.as_ref(), pred) { + Some(Clause::Lambda(Rc::new(arg), body.clone())) + } else if let Some(body) = exprv(body.clone(), pred) { + Some(Clause::Lambda(arg.clone(), body)) + } else {None} + } + Clause::S(c, body) => Some(Clause::S(*c, exprv(body.clone(), pred)?)), + } +} \ No newline at end of file diff --git a/src/rule/vec_attrs.rs b/src/rule/vec_attrs.rs new file mode 100644 index 0000000..f83a561 --- /dev/null +++ b/src/rule/vec_attrs.rs @@ -0,0 +1,10 @@ +use crate::interner::Token; +use crate::ast::{Expr, PHClass, Placeholder, Clause}; + +/// Returns the name, priority and nonzero of the expression if it is +/// a vectorial placeholder +pub fn vec_attrs(expr: &Expr) -> Option<(Token, u64, bool)> { + if let Clause::Placeh( + Placeholder{ class: PHClass::Vec{ prio, nonzero }, name } + ) = expr.value {Some((name, prio, nonzero))} else {None} +} \ No newline at end of file diff --git a/src/run_dir.rs b/src/run_dir.rs new file mode 100644 index 0000000..06aedbb --- /dev/null +++ b/src/run_dir.rs @@ -0,0 +1,143 @@ +use std::path::Path; +use std::rc::Rc; + +use hashbrown::HashMap; +use itertools::Itertools; + +use crate::interpreter::Return; +use crate::representations::{ast_to_postmacro, postmacro_to_interpreted}; +use crate::{external, xloop, interpreter}; +use crate::pipeline::{from_const_tree, ProjectTree, parse_layer, collect_rules, collect_consts}; +use crate::pipeline::file_loader::{Loaded, mk_cache}; +use crate::representations::sourcefile::{FileEntry, Import}; +use crate::rule::Repo; +use crate::interner::{Token, Interner, InternedDisplay}; + +static PRELUDE_TXT:&str = r#" +import std::( + add, subtract, multiply, remainder, divide, + equals, ifthenelse, + concatenate +) + +export ...$a + ...$b =1001=> (add (...$a) (...$b)) +export ...$a - ...$b:1 =1001=> (subtract (...$a) (...$b)) +export ...$a * ...$b =1000=> (multiply (...$a) (...$b)) +export ...$a % ...$b:1 =1000=> (remainder (...$a) (...$b)) +export ...$a / ...$b:1 =1000=> (divide (...$a) (...$b)) +export ...$a == ...$b =1002=> (equals (...$a) (...$b)) +export ...$a ++ ...$b =1003=> (concatenate (...$a) (...$b)) + +export do { ...$statement ; ...$rest:1 } =10_001=> ( + statement (...$statement) do { ...$rest } +) +export do { ...$return } =10_000=> (...$return) + +export statement (let $name = ...$value) ...$next =10_000=> ( + (\$name. ...$next) (...$value) +) +export statement (cps $name = ...$operation) ...$next =10_001=> ( + (...$operation) \$name. ...$next +) +export statement (cps ...$operation) ...$next =10_000=> ( + (...$operation) (...$next) +) + +export if ...$cond then ...$true else ...$false:1 =5_000=> ( + ifthenelse (...$cond) (...$true) (...$false) +) +"#; + +fn prelude_path(i: &Interner) -> Token>> +{ i.i(&[ i.i("prelude") ][..]) } +fn mainmod_path(i: &Interner) -> Token>> +{ i.i(&[ i.i("main") ][..]) } +fn entrypoint(i: &Interner) -> Token>> +{ i.i(&[ i.i("main"), i.i("main") ][..]) } + +fn load_environment(i: &Interner) -> ProjectTree { + let env = from_const_tree(HashMap::from([ + (i.i("std"), external::std::std(i)) + ]), &[i.i("std")], i); + let loader = |path: Token>>| { + if path == prelude_path(i) { + Ok(Loaded::Code(Rc::new(PRELUDE_TXT.to_string()))) + } else { + panic!( + "Prelude pointed to non-std path {}", + i.extern_vec(path).join("::") + ) + } + }; + parse_layer(&[prelude_path(i)], &loader, &env, &[], i) + // .unwrap_or_else(|e| panic!("Prelude error: \n {}", e)) + .expect("prelude error") +} + +fn load_dir(i: &Interner, dir: &Path) -> ProjectTree { + let environment = load_environment(i); + let file_cache = mk_cache(dir.to_path_buf(), i); + let loader = |path| file_cache.find(&path); + let prelude = [FileEntry::Import(vec![Import{ + path: prelude_path(i), name: None + }])]; + parse_layer(&[mainmod_path(i)], &loader, &environment, &prelude, i) + .expect("Failed to load source code") +} + +#[allow(unused)] +pub fn run_dir(dir: &Path) { + let i = Interner::new(); + let project = load_dir(&i, dir); + let rules = collect_rules(&project); + let consts = collect_consts(&project, &i); + println!("Initializing rule repository with {} rules", rules.len()); + let repo = Repo::new(rules, &i) + .unwrap_or_else(|(rule, error)| { + panic!("Rule error: {} + Offending rule: {}", + error.bundle(&i), + rule.bundle(&i) + ) + }); + println!("Repo dump: {}", repo.bundle(&i)); + let mut exec_table = HashMap::new(); + for (name, source) in consts.iter() { + // let nval = entrypoint(&i); let name = &nval; let source = &consts[name]; + let mut tree = source.clone(); + let displayname = i.extern_vec(*name).join("::"); + let macro_timeout = 100; + println!("Executing macros in {displayname}...", ); + let unmatched = xloop!(let mut idx = 0; idx < macro_timeout; idx += 1; { + match repo.step(&tree) { + None => break tree, + Some(phase) => { + // println!("Step {idx}/{macro_timeout}: {}", phase.bundle(&i)); + tree = phase; + }, + } + }; panic!("Macro execution in {displayname} didn't halt")); + let pmtree = ast_to_postmacro::expr(&unmatched) + .unwrap_or_else(|e| panic!("Postmacro conversion error: {e}")); + let runtree = postmacro_to_interpreted::expr(&pmtree); + exec_table.insert(*name, runtree); + } + println!("macro execution complete"); + let ctx = interpreter::Context { + symbols: &exec_table, + gas: None + }; + let entrypoint = exec_table.get(&entrypoint(&i)) + .unwrap_or_else(|| { + panic!("entrypoint not found, known keys are: {}", + exec_table.keys() + .map(|t| i.r(*t).iter().map(|t| i.r(*t)).join("::")) + .join(", ") + ) + }); + let Return{ gas, state } = interpreter::run(entrypoint.clone(), ctx) + .unwrap_or_else(|e| panic!("Runtime error: {}", e)); + println!("Settled at {}", state.bundle(&i)); + if gas == Some(0) {println!("Ran out of gas!")} + else {println!("Expression not reducible.")} +} \ No newline at end of file diff --git a/src/scheduler/generator_task.rs b/src/scheduler/generator_task.rs deleted file mode 100644 index 0a95f51..0000000 --- a/src/scheduler/generator_task.rs +++ /dev/null @@ -1,51 +0,0 @@ -use std::{ops::{Generator, GeneratorState}, pin::Pin}; - -use super::{Task, Nice, TaskState}; - -pub struct GeneratorTask> { - nice: Nice, - generator: Pin> -} - -impl GeneratorTask where G: Generator<(), Yield = ()> { - fn new(nice: Nice, generator: G) -> Self { Self { - nice, - generator: Box::pin(generator) - } } -} - -impl Task for GeneratorTask -where G: Generator<(), Yield = ()> { - type Result = G::Return; - - fn run_once(&mut self) -> super::TaskState { - match self.generator.as_mut().resume(()) { - GeneratorState::Yielded(()) => super::TaskState::Yield, - GeneratorState::Complete(r) => super::TaskState::Complete(r) - } - } -} - -impl Task for Pin> where T: Generator<(), Yield = ()> { - type Result = T::Return; - - fn run_once(&mut self) -> super::TaskState { - match self.as_mut().resume(()) { - GeneratorState::Yielded(()) => TaskState::Yield, - GeneratorState::Complete(r) => TaskState::Complete(r) - } - } -} - -#[macro_export] -macro_rules! subtask { - ($g:tt) => { { - let task = $g; - loop { - match task.run_once() { - TaskState::Yield => yield; - TaskState::Complete(r) => break r; - } - } - } }; -} \ No newline at end of file diff --git a/src/scheduler/mod.rs b/src/scheduler/mod.rs deleted file mode 100644 index afef8cb..0000000 --- a/src/scheduler/mod.rs +++ /dev/null @@ -1,47 +0,0 @@ -mod generator_task; -mod task_pair; -mod task_vec; - -pub type Nice = u16; -pub type Priority = i32; - -pub enum TaskState { - Yield, - Complete(R) -} - -pub trait Task { - type Result; - - fn run_once(&mut self) -> TaskState; - - fn run_n_times(&mut self, count: u64) -> TaskState { - for _ in 0..count { - if let r@TaskState::Complete(_) = self.run_once() { - return r - } - } - return TaskState::Yield - } - - fn run_to_completion(&mut self) -> Self::Result { - loop { if let TaskState::Complete(r) = self.run_n_times(u64::MAX) {return r} } - } - - fn boxed<'a>(self) -> TaskBox<'a, Self::Result> where Self: 'a + Sized { Box::new(self) } -} - -pub type TaskBox<'a, T> = Box + 'a>; - -impl<'a, R> Task for TaskBox<'a, R> { - type Result = R; - - fn run_once(&mut self) -> TaskState { self.as_mut().run_once() } - fn run_n_times(&mut self, count: u64) -> TaskState { - self.as_mut().run_n_times(count) - } - - fn run_to_completion(&mut self) -> Self::Result { - self.as_mut().run_to_completion() - } -} \ No newline at end of file diff --git a/src/scheduler/notes.md b/src/scheduler/notes.md deleted file mode 100644 index cd90349..0000000 --- a/src/scheduler/notes.md +++ /dev/null @@ -1,3 +0,0 @@ -# Purpose - -Type expressions are trees. Any single branch could terminate the solver and any branch may be nonterminating, therefore all of them must be run concurrently. Thread-based concurrency isn't an option because a compiler must be perfectly deterministic. It is also beneficial to have fine-grained control over the relative priority of different tasks. \ No newline at end of file diff --git a/src/scheduler/task_pair.rs b/src/scheduler/task_pair.rs deleted file mode 100644 index 8f23cbf..0000000 --- a/src/scheduler/task_pair.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::utils::translate::process; - -use super::{Task, Nice, Priority, TaskState}; - -enum TaskPairState { - Empty, - Left(T, U::Result), - Right(T::Result, U), - Both(T, U) -} - -pub struct TaskPair { - l_nice: Nice, - r_nice: Nice, - state: TaskPairState, - tally: Priority, -} - -impl TaskPair { - pub fn new(l_nice: Nice, left: T, r_nice: Nice, right: U) -> Self { - Self { - l_nice, r_nice, - tally: 0, - state: TaskPairState::Both(left, right) - } - } -} - -/// The state machine logic, abstracted from the subtask handling system -macro_rules! main_logic { - ($self:ident, $task:ident, $task_runner:expr) => {{ - let TaskPair{ state, tally, l_nice, r_nice } = $self; - let ret = process(state, |s| match s { - TaskPairState::Empty => panic!("Generator completed and empty"), - TaskPairState::Left(mut $task, r_res) => { - match $task_runner { - TaskState::Complete(r) => (TaskPairState::Empty, TaskState::Complete((r, r_res))), - TaskState::Yield => (TaskPairState::Left($task, r_res), TaskState::Yield), - } - } - TaskPairState::Right(l_res, mut $task) => { - match $task_runner { - TaskState::Complete(r) => (TaskPairState::Empty, TaskState::Complete((l_res, r))), - TaskState::Yield => (TaskPairState::Right(l_res, $task), TaskState::Yield), - } - } - TaskPairState::Both(l_task, r_task) => { - let state = if 0 <= *tally { - *tally -= *l_nice as Priority; - let mut $task = l_task; - match $task_runner { - TaskState::Complete(r) => TaskPairState::Right(r, r_task), - TaskState::Yield => TaskPairState::Both($task, r_task), - } - } else { - *tally += *r_nice as Priority; - let mut $task = r_task; - match $task_runner { - TaskState::Complete(r) => TaskPairState::Left(l_task, r), - TaskState::Yield => TaskPairState::Both(l_task, $task), - } - }; - (state, TaskState::Yield) - } - }); - ret - }}; -} - -impl Task for TaskPair { - type Result = (T::Result, U::Result); - - fn run_n_times(&mut self, mut count: u64) -> TaskState { - loop { - if count == 0 {return TaskState::Yield} - match self.state { - TaskPairState::Left(..) | TaskPairState::Right(..) => { - return main_logic!(self, task, task.run_n_times(count)); - } - _ => () - } - if let r@TaskState::Complete(_) = self.run_once() {return r} - count -= 1; - } - } - - fn run_once(&mut self) -> TaskState { - main_logic!(self, task, task.run_once()) - } -} diff --git a/src/scheduler/task_vec.rs b/src/scheduler/task_vec.rs deleted file mode 100644 index 198295a..0000000 --- a/src/scheduler/task_vec.rs +++ /dev/null @@ -1,145 +0,0 @@ -use std::{iter, mem}; - -use itertools::Itertools; - -use super::{Task, Nice, TaskState}; - -const NORMALIZATION_THRESHOLD:Nice = Nice::MAX / 4; - -struct TaskEntry { - nice: Nice, - position: usize, - tally: Nice, - task: T -} - -struct TaskVec { - results: Vec>, - task_heap: Vec>>, -} - -impl TaskVec { - pub fn new(tasks: Vec<(Nice, T)>) -> Self { - let mut results = Vec::with_capacity(tasks.len()); - results.resize_with(tasks.len(), || None); - let task_heap = tasks.into_iter().enumerate() - .map(|(position, (nice, task))| Some(TaskEntry{ nice, task, position, tally: 1 })) - .collect_vec(); - Self { results, task_heap } - } - - fn entry(&self, i: usize) -> Option<&TaskEntry> { - if self.task_heap.len() <= i {None} - else {self.task_heap[i].as_ref()} - } - fn entry_mut(&mut self, i: usize) -> Option<&mut TaskEntry> { - if self.task_heap.len() <= i {None} - else {self.task_heap[i].as_mut()} - } - /// Returns the tally of the given record. Empty records always sink to the bottom - fn tally(&self, i: usize) -> Nice { - self.task_heap[i].as_ref().map(|e| e.tally).unwrap_or(Nice::MAX) - } - fn swap(&mut self, a: usize, b: usize) { - self.task_heap.swap(a, b); - } - fn iter_mut(&mut self) -> impl Iterator> { - self.task_heap.iter_mut().filter_map(|e| e.as_mut()) - } - - fn normalize(&mut self) { - let shrink_count = self.task_heap.iter().rev().take_while(|e| e.is_none()).count(); - let new_len = self.task_heap.len() - shrink_count; - self.task_heap.splice(new_len.., iter::empty()); - let head = self.entry_mut(0); - let offset = if let Some(e) = head { - let offset = e.tally - 1; - if offset < NORMALIZATION_THRESHOLD {return} - e.tally = 1; - offset - } else {return}; - for entry in self.iter_mut() { entry.tally -= offset } - } - - fn sink(&mut self, i: usize) { - let lchi = 2*i + 1; - let rchi = 2*i + 2; - let t = self.tally(i); - let lcht = if let Some(e) = self.entry(lchi) {e.tally} else { - if self.tally(rchi) < t { - self.swap(rchi, i); - self.sink(rchi); - } - return - }; - let rcht = if let Some(e) = self.entry(rchi) {e.tally} else { - if self.tally(lchi) < t { - self.swap(lchi, i); - self.sink(lchi); - } - return - }; - let mchi = { - if rcht < t && rcht < lcht {rchi} - else if lcht < t && lcht < rcht {lchi} - else {return} - }; - self.swap(i, mchi); - self.sink(mchi); - } - - fn take_results(&mut self) -> Vec { - let mut swap = Vec::new(); - mem::swap(&mut self.results, &mut swap); - return swap.into_iter().collect::>() - .expect("Results not full but the heap is empty"); - } - - fn one_left(&mut self) -> bool { - self.entry(0).is_some() && self.entry(1).is_none() && self.entry(2).is_none() - } -} - -impl Task for TaskVec { - type Result = Vec; - - fn run_n_times(&mut self, mut count: u64) -> TaskState { - loop { - if count == 0 {return TaskState::Yield} - if self.one_left() { - let head = &mut self.task_heap[0]; - let head_entry = head.as_mut().expect("one_left faulty"); - return match head_entry.task.run_n_times(count) { - TaskState::Yield => TaskState::Yield, - TaskState::Complete(r) => { - self.results[head_entry.position] = Some(r); - *head = None; - return TaskState::Complete(self.take_results()); - } - } - } else if let r@TaskState::Complete(_) = self.run_once() {return r} - count -= 1; - } - } - - fn run_once(&mut self) -> super::TaskState { - self.normalize(); - let head = &mut self.task_heap[0]; - let head_entry = head.as_mut().expect("All completed, cannot run further"); - head_entry.tally += head_entry.nice; - match head_entry.task.run_once() { - TaskState::Complete(r) => { - self.results[head_entry.position] = Some(r); - *head = None; - self.sink(0); - if self.entry(0).is_some() { return TaskState::Yield } - TaskState::Complete(self.take_results()) - } - TaskState::Yield => { - head_entry.tally += head_entry.nice; - self.sink(0); - TaskState::Yield - } - } - } -} \ No newline at end of file diff --git a/src/utils/bfs.rs b/src/utils/bfs.rs deleted file mode 100644 index 6a2e2ce..0000000 --- a/src/utils/bfs.rs +++ /dev/null @@ -1,115 +0,0 @@ -use std::collections::{VecDeque, HashSet}; -use std::iter; -use std::hash::Hash; - -use crate::unwrap_or; -use crate::utils::BoxedIter; - -// TODO: move to own crate - -/// Two-stage breadth-first search; -/// Instead of enumerating neighbors before returning a node, it puts visited but not yet -/// enumerated nodes in a separate queue and only enumerates them to refill the queue of children -/// one by one once it's empty. This method is preferable for generated graphs because it doesn't -/// allocate memory for the children until necessary, but it's also probably a bit slower since -/// it involves additional processing. -/// -/// # Performance -/// `T` is cloned twice for each returned value. -pub fn bfs(init: T, neighbors: F) --> impl Iterator -where T: Eq + Hash + Clone + std::fmt::Debug, - F: Fn(T) -> I, I: Iterator -{ - let mut visited: HashSet = HashSet::new(); - let mut visit_queue: VecDeque = VecDeque::from([init]); - let mut unpack_queue: VecDeque = VecDeque::new(); - iter::from_fn(move || { - let next = {loop { - let next = unwrap_or!(visit_queue.pop_front(); break None); - if !visited.contains(&next) { break Some(next) } - }}.or_else(|| loop { - let unpacked = unwrap_or!(unpack_queue.pop_front(); break None); - let mut nbv = neighbors(unpacked).filter(|t| !visited.contains(t)); - if let Some(next) = nbv.next() { - visit_queue.extend(nbv); - break Some(next) - } - })?; - visited.insert(next.clone()); - unpack_queue.push_back(next.clone()); - Some(next) - }) -} - -/// Same as [bfs] but with a recursion depth limit -/// -/// The main intent is to effectively walk infinite graphs of unknown breadth without making the -/// recursion depth dependent on the number of nodes. If predictable runtime is more important -/// than predictable depth, [bfs] with [std::iter::Iterator::take] should be used instead -pub fn bfs_upto<'a, T: 'a, F: 'a, I: 'a>(init: T, neighbors: F, limit: usize) --> impl Iterator + 'a -where T: Eq + Hash + Clone + std::fmt::Debug, - F: Fn(T) -> I, I: Iterator -{ - /// Newtype to store the recursion depth but exclude it from equality comparisons - /// Because BFS visits nodes in increasing distance order, when a node is visited for the - /// second time it will never override the earlier version of itself. This is not the case - /// with Djikstra's algorithm, which can be conceptualised as a "weighted BFS". - #[derive(Eq, Clone, Debug)] - struct Wrap(usize, U); - impl PartialEq for Wrap { - fn eq(&self, other: &Self) -> bool { self.1.eq(&other.1) } - } - impl Hash for Wrap { - fn hash(&self, state: &mut H) { self.1.hash(state) } - } - bfs(Wrap(0, init), move |Wrap(dist, t)| -> BoxedIter> { // boxed because we branch - if dist == limit {Box::new(iter::empty())} - else {Box::new(neighbors(t).map(move |t| Wrap(dist + 1, t)))} - }).map(|Wrap(_, t)| t) -} - -#[cfg(test)] -mod tests { - use itertools::Itertools; - - use super::*; - - type Graph = Vec>; - fn neighbors(graph: &Graph, pt: usize) -> impl Iterator + '_ { - graph[pt].iter().copied() - } - fn from_neighborhood_matrix(matrix: Vec>) -> Graph { - matrix.into_iter().map(|v| { - v.into_iter().enumerate().filter_map(|(i, ent)| { - if ent > 1 {panic!("Neighborhood matrices must contain binary values")} - else if ent == 1 {Some(i)} - else {None} - }).collect() - }).collect() - } - - #[test] - fn test_square() { - let simple_graph = from_neighborhood_matrix(vec![ - vec![0,1,0,1,1,0,0,0], - vec![1,0,1,0,0,1,0,0], - vec![0,1,0,1,0,0,1,0], - vec![1,0,1,0,0,0,0,1], - vec![1,0,0,0,0,1,0,1], - vec![0,1,0,0,1,0,1,0], - vec![0,0,1,0,0,1,0,1], - vec![0,0,0,1,1,0,1,0], - ]); - let scan = bfs(0, |n| neighbors(&simple_graph, n)).collect_vec(); - assert_eq!(scan, vec![0, 1, 3, 4, 2, 5, 7, 6]) - } - #[test] - fn test_stringbuilder() { - let scan = bfs("".to_string(), |s| { - vec![s.clone()+";", s.clone()+"a", s+"aaa"].into_iter() - }).take(30).collect_vec(); - println!("{scan:?}") - } -} \ No newline at end of file diff --git a/src/utils/cache.rs b/src/utils/cache.rs index 383d601..5c0a879 100644 --- a/src/utils/cache.rs +++ b/src/utils/cache.rs @@ -1,4 +1,6 @@ -use std::{hash::Hash, cell::RefCell, rc::Rc}; +use std::cell::RefCell; +use std::hash::Hash; +use std::rc::Rc; use hashbrown::HashMap; // TODO: make this a crate @@ -20,6 +22,7 @@ impl<'a, I, O> Cache<'a, I, O> where } } + #[allow(unused)] pub fn rc(closure: F) -> Rc where F: Fn(I, &Self) -> O { Rc::new(Self::new(closure)) } @@ -44,4 +47,27 @@ impl<'a, I, O> Cache<'a, I, O> where let store = self.store.borrow(); store.get(i).cloned() } + + + /// Convert this cache into a cached [Fn(&I) -> O] + #[allow(unused)] + pub fn into_fn(self) -> impl Fn(&I) -> O + 'a where I: 'a { + move |i| self.find(i) + } + + /// Borrow this cache with a cached [Fn(&I) -> O] + #[allow(unused)] + pub fn as_fn<'b: 'a>(&'b self) -> impl Fn(&I) -> O + 'b where I: 'b { + move |i| self.find(i) + } } + +impl<'a, I, O> IntoIterator for Cache<'a, I, O> { + type IntoIter = hashbrown::hash_map::IntoIter; + type Item = (I, O); + fn into_iter(self) -> Self::IntoIter { + let Cache{ store, .. } = self; + let map = store.into_inner(); + map.into_iter() + } +} \ No newline at end of file diff --git a/src/utils/coprefix.rs b/src/utils/coprefix.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/interned_display.rs b/src/utils/interned_display.rs deleted file mode 100644 index feab4f0..0000000 --- a/src/utils/interned_display.rs +++ /dev/null @@ -1,19 +0,0 @@ -use std::fmt::Display; - -use lasso::RodeoResolver; - -pub trait InternedDisplay { - fn fmt(&self, - f: &mut std::fmt::Formatter<'_>, - rr: RodeoResolver - ) -> std::fmt::Result; -} - -impl InternedDisplay for T where T: Display { - fn fmt(&self, - f: &mut std::fmt::Formatter<'_>, - rr: RodeoResolver - ) -> std::fmt::Result { - ::fmt(&self, f) - } -} \ No newline at end of file diff --git a/src/utils/interner.rs b/src/utils/interner.rs new file mode 100644 index 0000000..815e6f1 --- /dev/null +++ b/src/utils/interner.rs @@ -0,0 +1,181 @@ +use std::sync::{Mutex, Arc}; +use std::num::NonZeroU32; +use std::hash::Hash; + +use lasso::{Rodeo, Spur, Key}; +use base64::{engine::general_purpose::STANDARD_NO_PAD as BASE64, Engine}; + +/// A token representing an interned string or sequence in an interner +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)] +pub struct Token(pub Spur); + +/// An interner that can intern strings, and sequences of things it +/// interned as long as they have the same rank +pub trait Interner: Sync { + fn str2tok(&self, str: &str) -> Token; + fn tok2str(&self, token: Token) -> String; + fn slc2tok(&self, slice: &[Token]) -> Token<{RANK + 1}>; + fn tok2slc(&self, token: Token) -> Vec>; + fn tok2strv(&self, token: Token>>) -> Vec { + self.tok2slc(token).into_iter().map(|t| self.tok2str(t)).collect() + } + fn tokv2strv(&self, slice: &[Token]) -> Vec { + slice.iter().map(|t| self.tok2str(*t)).collect() + } + /// Get the first token of a sequence + fn head(&self, token: Token) -> Token<{RANK - 1}>; + /// Returns the length of a sequence + fn len(&self, token: Token) -> usize + where Token<{RANK - 1}>: Clone; + /// Returns the length of the longest identical prefix of the two sequences + fn coprefix(&self, a: Token, b: Token) -> usize + where Token<{RANK - 1}>: Clone; +} + +fn serialize_seq(seq: &[Token]) -> String { + let data: Vec = seq.iter() + .map(|t| u32::from(t.0.into_inner()).to_le_bytes().into_iter()) + .flatten() + .collect(); + BASE64.encode(data) +} + +fn deserialize_seq(string: &str) -> Vec> { + let data = BASE64.decode(string) + .expect("String is not valid base64"); + assert!(data.len() % 4 == 0, "Tokens should serialize to 3 bytes each"); + data.array_chunks::<4>().map(|a| { + let bytes = [a[0], a[1], a[2], a[3]]; + let nz32 = NonZeroU32::new(u32::from_le_bytes(bytes)) + .expect("Token representation should never be zero"); + Token(Spur::try_from_usize(u32::from(nz32) as usize).unwrap()) + }).collect() +} + +/// An interner that delegates the actual work to Lasso +#[derive(Clone)] +pub struct LassoInterner { + strings: Arc>, + slices: Arc> +} + +impl LassoInterner { + /// Create an empty interner. Called to create the singleton. + fn new() -> Self { + Self{ + slices: Arc::new(Mutex::new(Rodeo::new())), + strings: Arc::new(Mutex::new(Rodeo::new())) + } + } +} + +impl Interner for LassoInterner { + fn str2tok(&self, str: &str) -> Token { + let mut strings = self.strings.lock().unwrap(); + let key = strings.get_or_intern(str); + Token(key) + } + + fn tok2str<'a>(&'a self, token: Token) -> String { + let key = token.0; + let strings = self.strings.lock().unwrap(); + strings.resolve(&key).to_string() + } + + fn slc2tok(&self, slice: &[Token]) -> Token<{RANK + 1}> { + let data = serialize_seq(slice); + let mut slices = self.slices.lock().unwrap(); + let key = slices.get_or_intern(data); + Token(key) + } + + fn tok2slc<'a, const RANK: u8>(&'a self, token: Token) -> Vec> { + let key = token.0; + let slices = self.slices.lock().unwrap(); + let string = slices.resolve(&key); + deserialize_seq(string) + } + + fn head(&self, token: Token) -> Token<{RANK - 1}> { + let key = token.0; + let slices = self.slices.lock().unwrap(); + let string = slices.resolve(&key); + deserialize_seq(&string[0..5])[0] + } + + fn len(&self, token: Token) -> usize where Token<{RANK - 1}>: Clone { + let key = token.0; + let slices = self.slices.lock().unwrap(); + let string = slices.resolve(&key); + assert!(string.len() % 4 == 0, "Tokens should serialize to 3 characters"); + string.len() / 4 + } + + fn coprefix(&self, a: Token, b: Token) -> usize where Token<{RANK - 1}>: Clone { + let keya = a.0; + let keyb = b.0; + let slices = self.slices.lock().unwrap(); + let sa = slices.resolve(&keya); + let sb = slices.resolve(&keyb); + sa.bytes() + .zip(sb.bytes()) + .take_while(|(a, b)| a == b) + .count() / 4 + } +} + +/// Create an interner that inherits the singleton's data, and +/// block all future interaction with the singleton. +/// +/// DO NOT call within [dynamic] or elsewhere pre-main +pub fn mk_interner() -> impl Interner { + LassoInterner::new() +} + +pub trait StringLike: Clone + Eq + Hash { + fn into_str(self, i: &Interner) -> String; + fn into_tok(self, i: &Interner) -> Token; +} + +impl StringLike for String { + fn into_str(self, _i: &Interner) -> String {self} + fn into_tok(self, i: &Interner) -> Token {i.str2tok(&self)} +} + +impl StringLike for Token { + fn into_str(self, i: &Interner) -> String {i.tok2str(self)} + fn into_tok(self, _i: &Interner) -> Token {self} +} + +pub trait StringVLike: Clone + Eq + Hash { + fn into_strv(self, i: &Interner) -> Vec; + fn into_tok(self, i: &Interner) -> Token>>; + fn into_tokv(self, i: &Interner) -> Vec>; +} + +impl StringVLike for Vec { + fn into_strv(self, _i: &Interner) -> Vec {self} + fn into_tok(self, i: &Interner) -> Token>> { + let tokv = self.into_iter() + .map(|s| i.str2tok(&s)) + .collect::>(); + i.slc2tok(&tokv) + } + fn into_tokv(self, i: &Interner) -> Vec> { + self.into_iter() + .map(|s| i.str2tok(&s)) + .collect() + } +} + +impl StringVLike for Vec> { + fn into_strv(self, i: &Interner) -> Vec {i.tokv2strv(&self)} + fn into_tok(self, i: &Interner) -> Token>> {i.slc2tok(&self)} + fn into_tokv(self, _i: &Interner) -> Vec> {self} +} + +impl StringVLike for Token>> { + fn into_strv(self, i: &Interner) -> Vec {i.tok2strv(self)} + fn into_tok(self, _i: &Interner) -> Token>> {self} + fn into_tokv(self, i: &Interner) -> Vec> {i.tok2slc(self)} +} \ No newline at end of file diff --git a/src/utils/iter.rs b/src/utils/iter.rs index ce5af08..faa7809 100644 --- a/src/utils/iter.rs +++ b/src/utils/iter.rs @@ -1,6 +1,6 @@ /// Utility functions to get rid of explicit casts to BoxedIter which are tedious -use std::{iter, mem}; +use std::iter; pub type BoxedIter<'a, T> = Box + 'a>; pub type BoxedIterIter<'a, T> = BoxedIter<'a, BoxedIter<'a, T>>; diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 2b99f8a..e57772a 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,87 +1,26 @@ mod cache; pub mod translate; mod replace_first; -mod interned_display; -pub use interned_display::InternedDisplay; -// mod visitor; +// mod interned_display; +// mod interner; +mod variant; +mod print_nname; +mod pushed; +pub use pushed::pushed; +pub use print_nname::{print_nname_seq, print_nname}; +// pub use interner::*; +// pub use interned_display::InternedDisplay; pub use replace_first::replace_first; pub use cache::Cache; mod substack; -pub use substack::Stackframe; +pub use substack::{Stackframe, Substack, SubstackIterator}; mod side; pub use side::Side; mod unwrap_or; pub mod iter; pub use iter::BoxedIter; -mod bfs; mod string_from_charset; pub use string_from_charset::string_from_charset; mod xloop; mod protomap; -pub use protomap::ProtoMap; -mod product2; -pub use product2::Product2; - -use mappable_rc::Mrc; - -pub fn mrc_derive(m: &Mrc, p: P) -> Mrc -where P: for<'a> FnOnce(&'a T) -> &'a U { - Mrc::map(Mrc::clone(m), p) -} - -pub fn mrc_try_derive(m: &Mrc, p: P) -> Option> -where P: for<'a> FnOnce(&'a T) -> Option<&'a U> { - Mrc::try_map(Mrc::clone(m), p).ok() -} - -pub fn mrc_empty_slice() -> Mrc<[T]> { - mrc_derive_slice(&Mrc::new(Vec::new())) -} - -pub fn to_mrc_slice(v: Vec) -> Mrc<[T]> { - Mrc::map(Mrc::new(v), |v| v.as_slice()) -} - -pub fn collect_to_mrc(iter: I) -> Mrc<[I::Item]> where I: Iterator { - to_mrc_slice(iter.collect()) -} - -pub fn mrc_derive_slice(mv: &Mrc>) -> Mrc<[T]> { - mrc_derive(mv, |v| v.as_slice()) -} - -pub fn one_mrc_slice(t: T) -> Mrc<[T]> { - Mrc::map(Mrc::new([t; 1]), |v| v.as_slice()) -} - -pub fn mrc_to_iter(ms: Mrc<[T]>) -> impl Iterator> { - let mut i = 0; - std::iter::from_fn(move || if i < ms.len() { - let out = Some(mrc_derive(&ms, |s| &s[i])); - i += 1; - out - } else {None}) -} - -pub fn mrc_unnest(m: &Mrc>) -> Mrc { - Mrc::clone(m.as_ref()) -} - -pub fn mrc_slice_to_only(m: Mrc<[T]>) -> Result, ()> { - Mrc::try_map(m, |slice| { - if slice.len() != 1 {None} - else {Some(&slice[0])} - }).map_err(|_| ()) -} - -pub fn mrc_slice_to_only_option(m: Mrc<[T]>) -> Result>, ()> { - if m.len() > 1 {return Err(())} - Ok(Mrc::try_map(m, |slice| { - if slice.len() == 0 {None} - else {Some(&slice[0])} - }).ok()) -} - -pub fn mrc_concat(a: &Mrc<[T]>, b: &Mrc<[T]>) -> Mrc<[T]> { - collect_to_mrc(a.iter().chain(b.iter()).cloned()) -} \ No newline at end of file +pub use protomap::ProtoMap; \ No newline at end of file diff --git a/src/utils/print_nname.rs b/src/utils/print_nname.rs new file mode 100644 index 0000000..4f4846c --- /dev/null +++ b/src/utils/print_nname.rs @@ -0,0 +1,16 @@ +use itertools::Itertools; + +use crate::interner::{Interner, Token}; + +#[allow(unused)] +pub fn print_nname(t: Token>>, i: &Interner) -> String { + i.r(t).iter().map(|t| i.r(*t)).join("::") +} + +#[allow(unused)] +pub fn print_nname_seq<'a>( + tv: impl Iterator>>>, + i: &Interner +) -> String { + tv.map(|t| print_nname(*t, i)).join(", ") +} \ No newline at end of file diff --git a/src/utils/product2.rs b/src/utils/product2.rs deleted file mode 100644 index b1078a7..0000000 --- a/src/utils/product2.rs +++ /dev/null @@ -1,55 +0,0 @@ -use super::Side; - -/// The output of a two-part algorithm. The values are -/// -/// - [Product2::Left] or [Product2::Right] if one of the arguments is the product -/// - [Product2::Either] if the arguments are identical -/// - [Product2::New] if the product is a different value from either -pub enum Product2 { - Left, - Right, - #[allow(unused)] - Either, - #[allow(unused)] - New(T) -} -impl Product2 { - /// Convert the product into a concrete value by providing the original arguments - pub fn pick(self, left: T, right: T) -> T { - match self { - Self::Left | Self::Either => left, - Self::Right => right, - Self::New(t) => t - } - } - - /// Combine some subresults into a tuple representing a greater result - pub fn join( - self, (lt, rt): (T, T), - second: Product2, (lu, ru): (U, U) - ) -> Product2<(T, U)> { - match (self, second) { - (Self::Either, Product2::Either) => Product2::Either, - (Self::Left | Self::Either, Product2::Left | Product2::Either) => Product2::Left, - (Self::Right | Self::Either, Product2::Right | Product2::Either) => Product2::Right, - (t, u) => Product2::New((t.pick(lt, rt), u.pick(lu, ru))) - } - } - - /// Translate results back into the type of the original problem. - pub fn map A>(self, f: F) -> Product2 { - match self { - Product2::Left => Product2::Left, Product2::Right => Product2::Right, - Product2::Either => Product2::Either, - Product2::New(t) => Product2::New(f(t)) - } - } -} - -/// Technically very different but sometimes neecessary to translate -impl From for Product2 { - fn from(value: Side) -> Self {match value { - Side::Left => Self::Left, - Side::Right => Self::Right - }} -} \ No newline at end of file diff --git a/src/utils/pushed.rs b/src/utils/pushed.rs new file mode 100644 index 0000000..29b24d5 --- /dev/null +++ b/src/utils/pushed.rs @@ -0,0 +1,8 @@ +/// Create a new vector consisting of the provided vector with the +/// element appended +pub fn pushed(vec: &Vec, t: T) -> Vec { + let mut next = Vec::with_capacity(vec.len() + 1); + next.extend_from_slice(&vec[..]); + next.push(t); + next +} \ No newline at end of file diff --git a/src/utils/side.rs b/src/utils/side.rs index 5dde3e4..022a800 100644 --- a/src/utils/side.rs +++ b/src/utils/side.rs @@ -1,5 +1,7 @@ use std::fmt::Display; +use super::BoxedIter; + /// A primitive for encoding the two sides Left and Right. While booleans /// are technically usable for this purpose, they're less descriptive. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -55,4 +57,36 @@ impl Side { Side::Right => (opposite, this) } } -} \ No newline at end of file + /// Produces an increasing sequence on Right, and a decreasing sequence + /// on Left + pub fn walk<'a, I: DoubleEndedIterator + 'a>(&self, iter: I) + -> BoxedIter<'a, I::Item> + { + match self { + Side::Right => Box::new(iter) as BoxedIter, + Side::Left => Box::new(iter.rev()), + } + } +} + +#[cfg(test)] +mod test { + use itertools::Itertools; + use super::*; + + /// I apparently have a tendency to mix these up so it's best if + /// the sides are explicitly stated + #[test] + fn test_walk() { + assert_eq!( + Side::Right.walk(0..4).collect_vec(), + vec![0, 1, 2, 3], + "can walk a range" + ); + assert_eq!( + Side::Left.walk(0..4).collect_vec(), + vec![3, 2, 1, 0], + "can walk a range backwards" + ) + } +} diff --git a/src/utils/substack.rs b/src/utils/substack.rs index f287d62..4e2e880 100644 --- a/src/utils/substack.rs +++ b/src/utils/substack.rs @@ -1,3 +1,4 @@ +use std::collections::VecDeque; use std::fmt::Debug; // TODO: extract to crate @@ -8,71 +9,60 @@ use std::fmt::Debug; #[derive(Clone, Copy)] pub struct Stackframe<'a, T> { pub item: T, - pub prev: Option<&'a Stackframe<'a, T>>, + pub prev: &'a Substack<'a, T>, pub len: usize } -impl<'a, T: 'a> Stackframe<'a, T> { - pub fn new(item: T) -> Self { - Self { - item, - prev: None, - len: 1 - } - } - /// Get the item owned by this listlike, very fast O(1) - pub fn item(&self) -> &T { &self.item } - /// Get the next link in the list, very fast O(1) - pub fn prev(&self) -> Option<&'a Stackframe> { self.prev } - /// Construct an iterator over the listlike, very fast O(1) - pub fn iter(&self) -> StackframeIterator { - StackframeIterator { curr: Some(self) } - } - pub fn push(&self, item: T) -> Stackframe<'_, T> { - Stackframe { - item, - prev: Some(self), - len: self.len + 1 - } - } - #[allow(unused)] - pub fn opush(prev: Option<&'a Self>, item: T) -> Self { - Self { - item, - prev, - len: prev.map_or(1, |s| s.len) - } - } - #[allow(unused)] - pub fn len(&self) -> usize { self.len } - #[allow(unused)] - pub fn pop(&self, count: usize) -> Option<&Self> { - if count == 0 {Some(self)} - else {self.prev.expect("Index out of range").pop(count - 1)} - } - #[allow(unused)] - pub fn opop(cur: Option<&Self>, count: usize) -> Option<&Self> { - if count == 0 {cur} - else {Self::opop(cur.expect("Index out of range").prev, count - 1)} - } - #[allow(unused)] - pub fn o_into_iter(curr: Option<&Self>) -> StackframeIterator { - StackframeIterator { curr } - } +#[derive(Clone, Copy)] +pub enum Substack<'a, T> { + Frame(Stackframe<'a, T>), + Bottom } -impl<'a, T> Debug for Stackframe<'a, T> where T: Debug { +impl<'a, T> Substack<'a, T> { + /// Convert the substack into an option of stackframe + pub fn opt(&'a self) -> Option<&'a Stackframe<'a, T>> { match self { + Self::Frame(f) => Some(f), + Self::Bottom => None + } } + /// Construct an iterator over the listlike, very fast O(1) + pub fn iter(&self) -> SubstackIterator { + SubstackIterator { curr: self } + } + pub fn push(&'a self, item: T) -> Self { + Self::Frame(self.new_frame(item)) + } + pub fn new_frame(&'a self, item: T) -> Stackframe<'a, T> { + Stackframe { + item, + prev: &self, + len: self.opt().map_or(1, |s| s.len) + } + } + pub fn pop(&'a self, count: usize) -> Option<&'a Stackframe<'a, T>> { + if let Self::Frame(p) = self { + if count == 0 {Some(&p)} + else {p.prev.pop(count - 1)} + } else {None} + } + pub fn len(&self) -> usize { match self { + Self::Frame(f) => f.len, + Self::Bottom => 0 + } } +} + +impl<'a, T> Debug for Substack<'a, T> where T: Debug { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "Substack")?; f.debug_list().entries(self.iter()).finish() } } -pub struct StackframeIterator<'a, T> { - curr: Option<&'a Stackframe<'a, T>> +pub struct SubstackIterator<'a, T> { + curr: &'a Substack<'a, T> } -impl<'a, T> StackframeIterator<'a, T> { +impl<'a, T> SubstackIterator<'a, T> { #[allow(unused)] pub fn first_some(&mut self, f: F) -> Option where F: Fn(&T) -> Option { @@ -83,15 +73,38 @@ impl<'a, T> StackframeIterator<'a, T> { } None } + + /// Returns an iterator that starts from the bottom of the stack + /// and ends at the current position. This moves all items to the + /// heap by copying them to a [Vec] + pub fn rev_vec_clone(self) -> Vec where T: Clone { + let mut deque = VecDeque::with_capacity(self.curr.len()); + for item in self { deque.push_front(item.clone()) } + deque.into() + } } -impl<'a, T> Iterator for StackframeIterator<'a, T> { +impl<'a, T> Copy for SubstackIterator<'a, T> {} +impl<'a, T> Clone for SubstackIterator<'a, T> { + fn clone(&self) -> Self { + Self { curr: self.curr } + } +} + +impl<'a, T> Iterator for SubstackIterator<'a, T> { type Item = &'a T; fn next(&mut self) -> Option<&'a T> { - let curr = self.curr?; - let item = curr.item(); - let prev = curr.prev(); + let curr = self.curr.opt()?; + let item = &curr.item; + let prev = curr.prev; self.curr = prev; Some(item) } -} \ No newline at end of file + + fn size_hint(&self) -> (usize, Option) { + (self.curr.len(), Some(self.curr.len())) + } +} + + + diff --git a/src/utils/translate.rs b/src/utils/translate.rs index 0ef8f7a..ff4bf4d 100644 --- a/src/utils/translate.rs +++ b/src/utils/translate.rs @@ -2,9 +2,9 @@ use std::mem; // TODO: extract to crate -#[allow(unused)] /// Map over a `&mut` with a mapper function that takes ownership of /// the value +#[allow(unused)] pub fn translate T>(data: &mut T, f: F) { unsafe { let mut acc = mem::MaybeUninit::::uninit().assume_init(); @@ -17,6 +17,7 @@ pub fn translate T>(data: &mut T, f: F) { /// Map over a `&mut` with a mapper function that takes ownership of /// the value and also produces some unrelated data. +#[allow(unused)] pub fn process (T, U)>(data: &mut T, f: F) -> U { unsafe { let mut acc = mem::MaybeUninit::::uninit().assume_init(); diff --git a/src/utils/variant.rs b/src/utils/variant.rs new file mode 100644 index 0000000..fe5638c --- /dev/null +++ b/src/utils/variant.rs @@ -0,0 +1,19 @@ +// trait Var { +// type With: Var; + +// fn map(self, f: impl FnOnce(T) -> U) -> Self::With; +// fn map_multi + Var>( +// self, f: impl FnOnce(T) -> Ret +// ) -> as Var>::With; +// } + +// enum Variant { +// Head(T), +// Tail(U) +// } + +// impl> Var for Variant { +// fn map(self, f: impl FnOnce(H) -> U) -> Self::With { +// match +// } +// } diff --git a/src/utils/xloop.rs b/src/utils/xloop.rs index 43f3507..d5d5d86 100644 --- a/src/utils/xloop.rs +++ b/src/utils/xloop.rs @@ -78,8 +78,8 @@ macro_rules! xloop { (while $cond:expr; $body:stmt; $exit:stmt) => { { loop { - if $cond { break { $exit } } - else { $body } + if $cond { $body } + else { break { $exit } } } } }; @@ -87,6 +87,6 @@ macro_rules! xloop { xloop!(for ( $init; $cond; $step ) $body; ()) }; ($init:stmt; $cond:expr; $step:stmt; $body:stmt; $exit:stmt) => { - { $init; xloop!(while !($cond); { $body; $step }; $exit) } + { $init; xloop!(while $cond; { $body; $step }; $exit) } }; } \ No newline at end of file diff --git a/swap.md b/swap.md index a90eb61..e69de29 100644 --- a/swap.md +++ b/swap.md @@ -1 +0,0 @@ -Optimizations mostly left for later, len() was critical, should make most things O(N) instead of O(N!). A trivial keyword cache in the executor should prevent trying variable length patterns onto windows of unrelated sequences. Investigate different strategies as issue likely to re-emerge with marginal added pattern complexity \ No newline at end of file