From 33413b2b0f8b198f710270ffca18852c45a1e628 Mon Sep 17 00:00:00 2001 From: Lawrence Bethlenfalvy Date: Sun, 14 May 2023 20:35:31 +0100 Subject: [PATCH] backup commit of notes so far --- .vscode/swissknifeNotes.json | 1 + examples/dummy_project/main.orc | 45 ----- examples/lite/fn.orc | 15 ++ examples/lite/list.orc | 41 +++++ examples/lite/main.orc | 38 +++- examples/lite/option.orc | 6 + examples/scratch.hs | 1 + examples/vs_haskell/typeclasses.hs | 72 -------- examples/vs_haskell/typeclasses.orc | 65 ------- notes/papers/report/main/index.mjsd | 0 notes/papers/report/oss.md | 49 ----- .../report/parts/examples/calculator.md | 128 +++++++++++++ .../report/parts/examples/list-processing.md | 150 +++++++++++++++ notes/papers/report/parts/future_work.md | 71 +++++++ notes/papers/report/parts/haskell.md | 19 ++ notes/papers/report/parts/interner.md | 43 +++++ notes/papers/report/parts/macros.md | 54 ++++++ notes/papers/report/parts/oss.md | 75 ++++++++ notes/papers/report/parts/pipeline.md | 18 ++ notes/papers/report/{ => parts}/scratchpad.md | 15 +- notes/papers/report/parts/spec/01-main.md | 3 + notes/papers/report/parts/spec/02-parsing.md | 174 ++++++++++++++++++ notes/papers/report/parts/spec/03-macros.md | 45 +++++ notes/papers/report/parts/spec/04-runtime.md | 34 ++++ notes/papers/report/parts/substack.md | 7 + notes/papers/report/parts/timeline.md | 19 ++ .../report/parts/type_system/01-main.md | 20 ++ .../report/parts/type_system/02-given.md | 20 ++ .../report/parts/type_system/03-define.md | 61 ++++++ .../report/parts/type_system/04-impl.md | 58 ++++++ notes/papers/report/spec/01-parsing.md | 101 ---------- notes/papers/report/spec/02-macros.md | 45 ----- notes/papers/report/spec/03-runtime.md | 32 ---- orchid.code-workspace | 5 +- 34 files changed, 1109 insertions(+), 421 deletions(-) create mode 100644 .vscode/swissknifeNotes.json delete mode 100644 examples/dummy_project/main.orc create mode 100644 examples/lite/fn.orc create mode 100644 examples/lite/list.orc create mode 100644 examples/lite/option.orc create mode 100644 examples/scratch.hs delete mode 100644 examples/vs_haskell/typeclasses.hs delete mode 100644 examples/vs_haskell/typeclasses.orc create mode 100644 notes/papers/report/main/index.mjsd delete mode 100644 notes/papers/report/oss.md create mode 100644 notes/papers/report/parts/examples/calculator.md create mode 100644 notes/papers/report/parts/examples/list-processing.md create mode 100644 notes/papers/report/parts/future_work.md create mode 100644 notes/papers/report/parts/haskell.md create mode 100644 notes/papers/report/parts/interner.md create mode 100644 notes/papers/report/parts/macros.md create mode 100644 notes/papers/report/parts/oss.md create mode 100644 notes/papers/report/parts/pipeline.md rename notes/papers/report/{ => parts}/scratchpad.md (76%) create mode 100644 notes/papers/report/parts/spec/01-main.md create mode 100644 notes/papers/report/parts/spec/02-parsing.md create mode 100644 notes/papers/report/parts/spec/03-macros.md create mode 100644 notes/papers/report/parts/spec/04-runtime.md create mode 100644 notes/papers/report/parts/substack.md create mode 100644 notes/papers/report/parts/timeline.md create mode 100644 notes/papers/report/parts/type_system/01-main.md create mode 100644 notes/papers/report/parts/type_system/02-given.md create mode 100644 notes/papers/report/parts/type_system/03-define.md create mode 100644 notes/papers/report/parts/type_system/04-impl.md delete mode 100644 notes/papers/report/spec/01-parsing.md delete mode 100644 notes/papers/report/spec/02-macros.md delete mode 100644 notes/papers/report/spec/03-runtime.md diff --git a/.vscode/swissknifeNotes.json b/.vscode/swissknifeNotes.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/.vscode/swissknifeNotes.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/examples/dummy_project/main.orc b/examples/dummy_project/main.orc deleted file mode 100644 index d85fa6e..0000000 --- a/examples/dummy_project/main.orc +++ /dev/null @@ -1,45 +0,0 @@ -opaque := \T. T - ---[ Typeclass definition (also just a type) ]-- -define Add $L:type $R:type $O:type as $L -> $R -> $O --- HKTC -define Mappable $C:(type -> type) as @T. @U. (T -> U) -> $C T -> $C U --- Dependency on existing typeclass -define Zippable $C:(type -> type) as @:Mappable $C. ( - @T. @U. @V. (T -> U -> V) -> $C T -> $C U -> $C V -) -define Default $T:type as $T - ---[ Type definition ]-- -define Cons $elem:type as loop \r. Option (Pair T $elem) -nil := @T. from @(Cons T) none -cons := @T. \el:T. ( - generalise @(Cons T) - |> (\list. some t[el, into list]) - |> categorise @(Cons T) -) -export map := @T. @U. \f:T -> U. ( - generalise @(Cons T) - |> loop ( \recurse. \option. - map option \pair. t[f (fst pair), recurse (snd pair)] - ) - |> categorise @(Cons U) -) --- Universal typeclass implementation; no parameters, no overrides, no name for overriding -impl Mappable Cons via map --- Blanket typeclass implementation; parametric, may override, must have name for overriding -impl (@T. Add (Cons T) (Cons T) (Cons T)) by concatenation over elementwiseAddition via concat - --- Scratchpad - -filterBadWords := @C:type -> type. @:Mappable C. \strings:C String. ( - map strings \s. if intersects badWords (slice " " s) then none else some s -):(C (Option String)) - --- /Scratchpad - -main := \x. foo @bar x - -foo := @util. \x. util x - -export opaque := \T. atom \ No newline at end of file diff --git a/examples/lite/fn.orc b/examples/lite/fn.orc new file mode 100644 index 0000000..da42d77 --- /dev/null +++ b/examples/lite/fn.orc @@ -0,0 +1,15 @@ +export Y := \f.(\x.f (x x))(\x.f (x x)) + +export loop $r on (...$parameters) with ...$tail =0x5p512=> Y (\$r. + bind_names (...$parameters) (...$tail) +) ...$parameters + +-- bind each of the names in the first argument as a parameter for the second argument +bind_names ($name ..$rest) $payload =0x2p1000=> \$name. bind_names (..$rest) $payload +bind_names () (...$payload) =0x1p1000=> ...$payload + +export ...$prefix $ ...$suffix:1 =0x1p130=> ...$prefix (...$suffix) +export ...$prefix |> $fn ..$suffix:1 =0x2p130=> $fn (...$prefix) ..$suffix + +export (...$argv) => ...$body =0x2p512=> (bind_names (...$argv) (...$body)) +$name => ...$body =0x1p512=> (\$name. ...$body) \ No newline at end of file diff --git a/examples/lite/list.orc b/examples/lite/list.orc new file mode 100644 index 0000000..72f6e91 --- /dev/null +++ b/examples/lite/list.orc @@ -0,0 +1,41 @@ +import option +import super::fn::* + +pair := \a.\b. \f. f a b + +-- Constructors + +export cons := \hd.\tl. option::some (pair hd tl) +export end := option::none + +export pop := \list.\default.\f.list default \cons.cons f + +-- Operators + +export reduce := \list.\acc.\f. ( + loop r on (list acc) with + pop list acc \head.\tail. r tail (f acc head) +) + +export map := \list.\f. ( + loop r on (list) with + pop list end \head.\tail. cons (f head) (r tail) +) + +export skip := \list.\n. ( + loop r on (list n) with + if n == 0 then list + else pop list end \head.\tail. r tail (n - 1) +) + +export take := \list.\n. ( + loop r on (list n) with + if n == 0 then end + else pop list end \head.\tail. cons head $ r tail $ n - 1 +) + +new[...$item, ...$rest:1] =0x2p333=> (cons (...$item) new[...$rest]) +new[...$end] =0x1p333=> (cons (...$end) end) +new[] =0x1p333=> end + +export ::(new) diff --git a/examples/lite/main.orc b/examples/lite/main.orc index dbba362..c51da12 100644 --- a/examples/lite/main.orc +++ b/examples/lite/main.orc @@ -1,9 +1,10 @@ -import prelude::* import std::(parse_float, to_string) -import std::(readline, print) +import std::(readline, print, debug) import std::(concatenate) +import super::list +import fn::* -export main := do{ +--[ export main := do{ cps data = readline; let a = parse_float data; cps op = readline; @@ -19,6 +20,35 @@ export main := do{ ); cps print (to_string result ++ "\n"); 0 +} ]-- + +export main := do{ + let foo = list::new[1, 2, 3, 4, 5, 6]; + let bar = list::map foo n => n * 2; + let sum = bar + |> list::skip 2 + |> list::take 3 + |> list::reduce 0 (a b) => a + b; + cps print $ to_string sum ++ "\n"; + 0 } --- export main := 1 do { 1 ; 2 } 3 +--[ +export main := do{ + let n = 1; + let acc = 1; + loop r on (n acc) with ( + if n == 5 + then print acc + else r (n + 1) (acc * 2) + ) +} +]-- +--[ +export main := do{ + let n = 1; + loop r on (n) with ( + debug r + ) +} +]-- \ No newline at end of file diff --git a/examples/lite/option.orc b/examples/lite/option.orc new file mode 100644 index 0000000..840e24c --- /dev/null +++ b/examples/lite/option.orc @@ -0,0 +1,6 @@ +export some := \v. \d.\f. f v +export none := \d.\f. d + +export map := \option.\f. option none f +export flatten := \option. option none \opt. opt +export flatmap := \option.\f. option none \opt. map opt f diff --git a/examples/scratch.hs b/examples/scratch.hs new file mode 100644 index 0000000..55146ce --- /dev/null +++ b/examples/scratch.hs @@ -0,0 +1 @@ +main = sequence \ No newline at end of file diff --git a/examples/vs_haskell/typeclasses.hs b/examples/vs_haskell/typeclasses.hs deleted file mode 100644 index 5df9a2e..0000000 --- a/examples/vs_haskell/typeclasses.hs +++ /dev/null @@ -1,72 +0,0 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE MultiParamTypeClasses #-} -{-# LANGUAGE InstanceSigs #-} -{-# LANGUAGE BlockArguments #-} -import Prelude((>>=), Maybe( Just, Nothing ), return, fmap) -import Debug.Trace - --- 1 -class Add l r o where - add :: l -> r -> o - (+) :: l -> r -> o - (+) = add - --- 2 -class Mappable c where - map :: (i -> o) -> c i -> c o - --- 3 -class Mappable c => Zippable c where - zip :: (l -> r -> o) -> c l -> c r -> c o - - --- 4 -class Default t where - def :: t - --- 5 -instance (Zippable c, Add l r o) - => Add (c l) (c r) (c o) where - add :: (Zippable c, Add l r o) => c l -> c r -> c o - add = zip add - - - --- 6 --- newtype List t = List (Maybe (t, List t)) - --- instance Mappable List where --- map :: (i -> o) -> List i -> List o --- map f (List o) = List (fmap (\(h, t) -> (f h, map f t)) o) - --- instance Zippable List where --- zip :: (l -> r -> o) -> List l -> List r -> List o --- zip f (List l) (List r) = List do --- (lh, lt) <- l --- (rh, rt) <- r --- return (f lh rh, zip f lt rt) - --- instance Add (List e) (List e) (List e) where --- add (List l) (List r) = List case l of --- Just (head, tail) -> Just (head, add tail r) --- Nothing -> r - -data List t = Cons t (List t) | End - -instance Mappable List where - map :: (i -> o) -> List i -> List o - map _ End = End - map f (Cons head tail) = Cons (f head) (map f tail) - -instance Zippable List where - zip :: (l -> r -> o) -> List l -> List r -> List o - zip _ _ End = End - zip _ End _ = End - zip f (Cons lhead ltail) (Cons rhead rtail) = - Cons (f lhead rhead) (zip f ltail rtail) - -instance Add (List e) (List e) (List e) where - add End r = r - add (Cons head tail) r = Cons head (add tail r) - - \ No newline at end of file diff --git a/examples/vs_haskell/typeclasses.orc b/examples/vs_haskell/typeclasses.orc deleted file mode 100644 index b690804..0000000 --- a/examples/vs_haskell/typeclasses.orc +++ /dev/null @@ -1,65 +0,0 @@ - - - - - - - --- 1 -define Add $L $R $O -as $L -> $R -> $O - -$left:2... + $right:1... =1000=> add ($left...) ($right...) - --- 2 -define Mappable $C:type -> type -as @I. @O. (I -> O) -> $C I -> $C O - --- 3 -define Zippable $C:type -> type -as @:Mappable $C. - @L. @R. @O. (L -> R -> O) -> $C L -> $C R -> $C O - --- 4 -define Default $T:type as $T - - --- 5 -impl - @C:Type -> Type. @L. @R. @O. - @:(Zippable C). @:(Add L R O). - Add (C L) (C R) (C O) -by elementwiseAdd -via zip add - --- 6 -define List $E as Y \r. Option t[ $E, r ] - -impl Mappable List -via \f.\list. categorise ( - (Y \repeat. \opt. match opt { - Some t[head, tail] => - Some t[f head, repeat tail]; - None => None; - }) (generalise list) -) - -impl Zippable List -via \f.\l.\r. categorise ( - Y \repeat.\lopt.\ropt. do { - bind t[lhead, ltail] <- lopt; - bind t[rhead, rtail] <- ropt; - t[f lhead rhead, repeat ltail rtail] - } -) (generalise l) (generalise r) - -impl @T. Add (List T) (List T) (List T) -by concatListAdd over elementwiseAdd -via \l.\r.categorise Y \repeat.\l. ( - match l ( - Some t[head, tail] => - Some t[head, repeat tail]; - None => (generalise r) - ) -) (generalise l) - diff --git a/notes/papers/report/main/index.mjsd b/notes/papers/report/main/index.mjsd new file mode 100644 index 0000000..e69de29 diff --git a/notes/papers/report/oss.md b/notes/papers/report/oss.md deleted file mode 100644 index c7d6500..0000000 --- a/notes/papers/report/oss.md +++ /dev/null @@ -1,49 +0,0 @@ -# List of open-source packages I used - -## [thiserror](https://github.com/dtolnay/thiserror) - -_License: Apache 2.0 or MIT_ - -Helps derive `Error` for aggregate errors, although I eventually stopped trying to do so as it was simpler to just treat error types as bags of data about the failure. - -## [chumsky](https://github.com/zesterer/chumsky) - -_License: MIT_ - -A fantastic parser combinator that allowed me to specify things like the nuanced conditions under which a float token can be promoted to an uint token in a declarative way. In hindsight passes after tokenization could have been written by hand, tokenized Orchid is not that hard to parse into an AST and it would have probably made some tasks such as allowing `.` (dot) as a token considerably easier. - -## [hashbrown](https://github.com/rust-lang/hashbrown) - -_License: Apache 2.0 or MIT_ - -Google's swisstable. Almost perfectly identical to `HashMap` in std, with a couple additional APIs. I use it for the raw entry API which the generic processing step cache requires to avoid unnecessary clones of potentially very large trees. - -## [mappable-rc](https://github.com/JakobDegen/mappable-rc) - -_License: Apache 2.0 or MIT_ - -A refcounting pointer which can be updated to dereference to some part of the value it holds similarly to C++'s `shared_ptr`. Using this crate was ultimately a mistake on my part, in early stages of development (early stages of my Rust journey) I wanted to store arbitrary subsections of an expression during macro execution without dealing with lifetimes. Removing all uses of this crate and instead just dealing with lifetimes is on the roadmap. - -## [ordered-float](https://github.com/reem/rust-ordered-float) - -_License: MIT_ - -A wrapper around floating point numbers that removes `NaN` from the set of possible values, promoting `<` and `>` to total orderings and `==` to an equivalence relation. Orchid does not have `NaN` because it's a silent error. All operations that would produce `NaN` either abort or indicate the failure in their return type. - -## [itertools](https://github.com/rust-itertools/itertools) - -_License: Apache 2.0 or MIT_ - -A utility crate, I use it everywhere. - -## [smallvec](https://github.com/servo/references-smallvec) - -_License: Apache 2.0 or MIT_ - -small vector optimization - allocates space for a statically known number of elements on the stack to save heap allocations. This is a gamble since the stack space is wasted if the data does spill to the heap, but it can improve performance massively in hot paths. - -## [dyn-clone](https://github.com/dtolnay/dyn-clone) - -_License: Apache 2.0 or MIT_ - -All expressions in Orchid are clonable, and to allow for optimizations, Atoms have control over their own cloning logic, so this object-safe version of `Clone` is used. diff --git a/notes/papers/report/parts/examples/calculator.md b/notes/papers/report/parts/examples/calculator.md new file mode 100644 index 0000000..6a054ff --- /dev/null +++ b/notes/papers/report/parts/examples/calculator.md @@ -0,0 +1,128 @@ +# Calculator + +This example demonstrates various parts of the standard library, infix operators, `do{}` blocks, and various syntax elements. Approching MVP, this was the first benchmark created to debug various features. It predates the transition for `:=` from single-token macros to a dedicated language element. + +``` +import std::(parse_float, to_string) +import std::(readline, print) + +export main := do{ + cps data = readline; + let a = parse_float data; + cps op = readline; + cps print ("\"" ++ op ++ "\"\n"); + cps data = readline; + let b = parse_float data; + let result = ( + if op == "+" then a + b + else if op == "-" then a - b + else if op == "*" then a * b + else if op == "/" then a / b + else "Unsupported operation" -- dynamically typed shenanigans + ); + cps print (to_string result ++ "\n"); + 0 +} +``` + +## do + +The main function uses a `do{}` block, which is processed using the following rules, temporarily added to the prelude: + +``` +export do { ...$statement ; ...$rest:1 } =0x2p543=> ( + statement (...$statement) do { ...$rest } +) +export do { ...$return } =0x1p543=> (...$return) +``` + +This pair of rules converts the flat structure into a conslist which makes it easier for dedicated statement rules to process their own fragments. The produced structure looks roughly like this: + +``` +(statement (cps data = readline) +(statement (let a = parse_float data) +(statement (cps op = readline) +( ... +(statement (cps print (to_string result ++ "\n")) +(0) +))))) +``` + +`do` blocks contain semicolon-delimited statements which receive special handling, and a final expression that doesn't. This final expression must be present since every Orchid expression must produce a value including `do` blocks. For ergonomics, in the future a sentinel value may be returned if the body of the `do` block ends with a semicolon. + +## statement + +This example demonstrates three statement types. This collection can be extended by matching on `prelude::statement () ...$next`. + +### let + +`let` bindings are used for forward-declaring values in subsequent expressions, passing them to the rest of the body. +``` +export statement (let $name = ...$value) ...$next =0x1p1000=> ( + (\$name. ...$next) (...$value) +) +``` + +Since the executor keeps track of copies of the same expression and applies normalization steps to a shared instance, this technique also ensures that `...$value` will not be evaluated multiple times. + +### cps= + +`cps` was used for effectful functions. +``` +export statement (cps $name = ...$operation) ...$next =0x2p1000=> ( + (...$operation) \$name. ...$next +) +``` + +In the version of Orchid this example was written for, functions like `print` or `readline` carried out their work as a side effect of normalization. At this point the copy-tracking optimization described above wasn't used. Because of this, in new versions `print` or `readline` in a loop doesn't necessarily repeat its effect. This bug can be addressed in the standard library, but `cps` would still probably be just as useful. + +### cps + +Since `cps` is designed for side effects, an expression of this kind doesn't necessarily produce a value. This `=` free variant passes the tail as an argument to the expression as-is +``` +export statement (cps ...$operation) ...$next =0x1p1000=> ( + (...$operation) (...$next) +) +``` + +## if-then-else + +This rule is substantially simpler, it simply forwards the three slots to a function that makes the actual decision. +``` +export if ...$cond then ...$true else ...$false:1 =0x1p320=> ( + ifthenelse (...$cond) (...$true) (...$false) +) +``` + +Notice that `else if` isn't a syntax element, it's simply an artifact of this rule applied to itself. The critical ordering requirement that enables this is that `cond` and `true` are squeezed so neither of them can accidentally consume an `if` or `else` token. `::prefix:0` is implied at the start, it is left of `cond:0` and `true:0` so it has a higher growth priority, and `false:1` has a higher explicit priority. + +## Infix operators + +Infix operators could be intuitively defined with something like the following + +``` +$lhs + $rhs =1=> (add $lhs $rhs) +$lhs * $rhs =2=> (mul $lhs $rhs) +``` + +However, if they really were defined this way, function application would have the lowest priority. Ideally, we would like function application to have the highest priority. +``` +-- what we mean +(mult (parse_float "foobar") 2) +-- how we would like to write it +let a = parse_float "foobar" * 2 +-- how we would have to write it +let a = (parse_float "foobar") * 2 +``` + +With vectorial placeholders it's possible to define the operators in reverse, i.e. to match the "outermost" operator first. +``` +...$lhs + ...$rhs =2=> (add (...$lhs) (...$rhs)) +...$lhs * ...$rhs =1=> (mul (...$lhs) (...$rhs)) +``` + +With this, function calls get processed before any operator. + +## Dynamically typed shenanigans + +If the operator character isn't recognized, `result` gets assigned `"Unsupported operation"`. This wouldn't work in most type systems as `result` is now either a string or a number with no static discriminator. Most of Orchid's functions accept a single type of input with the sole exception being `to_string`. \ No newline at end of file diff --git a/notes/papers/report/parts/examples/list-processing.md b/notes/papers/report/parts/examples/list-processing.md new file mode 100644 index 0000000..f8faf05 --- /dev/null +++ b/notes/papers/report/parts/examples/list-processing.md @@ -0,0 +1,150 @@ +This example showcases common list processing functions and some functional programming utilities. It is also the first multi-file demo. + +_in main.orc_ +``` +import std::(to_string, print) +import super::list +import fn::* + +export main := do{ + let foo = list::new[1, 2, 3, 4, 5, 6]; + let bar = list::map foo n => n * 2; + let sum = bar + |> list::skip 2 + |> list::take 3 + |> list::reduce 0 (a b) => a + b; + cps print $ to_string sum ++ "\n"; + 0 +} +``` + +_in fn.orc_ +``` +export Y := \f.(\x.f (x x))(\x.f (x x)) + +export loop $r on (...$parameters) with ...$tail =0x5p512=> Y (\$r. + bind_names (...$parameters) (...$tail) +) ...$parameters + +-- bind each of the names in the first argument as a parameter for the second argument +bind_names ($name ..$rest) $payload =0x2p1000=> \$name. bind_names (..$rest) $payload +bind_names () (...$payload) =0x1p1000=> ...$payload + +export ...$prefix $ ...$suffix:1 =0x1p130=> ...$prefix (...$suffix) +export ...$prefix |> $fn ..$suffix:1 =0x2p130=> $fn (...$prefix) ..$suffix + +export (...$argv) => ...$body =0x2p512=> (bind_names (...$argv) (...$body)) +$name => ...$body =0x1p512=> (\$name. ...$body) +``` + +_in list.orc_ +``` +import option +import super::fn::* + +pair := \a.\b. \f. f a b + +-- Constructors + +export cons := \hd.\tl. option::some (pair hd tl) +export end := option::none + +export pop := \list.\default.\f. list default \cons.cons f + +-- Operators + +export reduce := \list.\acc.\f. ( + loop r on (list acc) with + pop list acc \head.\tail. r tail (f acc head) +) + +export map := \list.\f. ( + loop r on (list) with + pop list end \head.\tail. cons (f head) (r tail) +) + +export skip := \list.\n. ( + loop r on (list n) with + if n == 0 then list + else pop list end \head.\tail. r tail (n - 1) +) + +export take := \list.\n. ( + loop r on (list n) with + if n == 0 then end + else pop list end \head.\tail. cons head $ r tail $ n - 1 +) + +new[...$item, ...$rest:1] =0x2p333=> (cons (...$item) new[...$rest]) +new[...$end] =0x1p333=> (cons (...$end) end) +new[] =0x1p333=> end + +export ::(new) +``` + +_in option.orc_ +``` +export some := \v. \d.\f. f v +export none := \d.\f. d + +export map := \option.\f. option none f +export flatten := \option. option none \opt. opt +export flatmap := \option.\f. option none \opt. map opt f +``` + +The `main` function uses a `do{}` block to enclose a series of name bindings. It imports `list` as a sibling module and `fn` as a top-level file. These files are in identical position, the purpose of this is just to test various ways to reference modules. + +## fn + +### bind_names + +This is a utility macro for binding a list of names on an expression. It demonstrates how to extract reusable macro program fragments to simplify common tasks. This demonstrative version simply takes a sequence of name tokens without any separators or custom programming, but its functionality can be extended in the future to include eg. destructuring. + +### arrow functions + +The arrow `=>` operator here is used to define inline functions. It is very similar to the native `\x.` lambda, except that native lambdas use higher priority than any macro so they can't appear inside a `do{}` block as all of the subsequent lines would be consumed by them. It is parsed using the following rules: +``` +export (...$argv) => ...$body =0x2p512=> (bind_names (...$argv) (...$body)) +$name => ...$body =0x1p512=> (\$name. ...$body) +``` + +### pipelines + +This is a concept borrowed from Elixir. The `|>` operator simply inserts the output of the previous expression to the first argument of the following function. +``` +export ...$prefix |> $fn ..$suffix:1 =0x2p130=> $fn (...$prefix) ..$suffix +``` + +It is processed left-to-right, but leaves the suffix on the same level as the function and sinks the prefix, which means that long pipelines eventually become left associative despite the inverted processing order. + +### right-associative function call operator + +The `$` operator is analogous to its Haskell counterpart. It is right-associative and very low priority. Its purpose is to eliminate trailing parentheses. + +### Loop expression + +Recursion in lambda calculus is achieved using a fixpoint combinator. The classic version of this combinator described by Church is the [Y-combinator][hb_tlc], defined like so: +``` +export Y := \f.(\x.f (x x))(\x.f (x x)) +``` + +[hb_tlc]: ISBN-0444867481 + +Formalizing what this does is difficult, in plain words it calls `f` with an expression that is equivalent to its own return value, thus giving the parameter a convenient means to define its value in terms of different parameterizations of itself. The following snippet computes 2^12 to demonstrate how it would normally be called. +``` +export main := Y (\r.\n.\s. + if n == 0 then s + else r (n - 1) (s * 2) +) 12 0 +``` + +The purpose of the loop expression is to provide a more convenient syntax to define recursive structures, as direct calls to the Y-combinator are error prone. It is defined as follows: +``` +export loop $r on (...$parameters) with ...$tail =0x5p512=> Y (\$r. + bind_names (...$parameters) (...$tail) +) ...$parameters +``` + +The template allows the caller to give the point of recursion a name and enumerate the names that can change value between iterations of the loop. The point of recursion then has to be called with the same number of parameters. + +It may be possible to construct a variant of this statement which allows only reassigning subsets of the mutable parameter list. It is definitely possible to construct a variant that allows declaring new names in place in the parameter list, although I did not have time to do so. \ No newline at end of file diff --git a/notes/papers/report/parts/future_work.md b/notes/papers/report/parts/future_work.md new file mode 100644 index 0000000..2668f41 --- /dev/null +++ b/notes/papers/report/parts/future_work.md @@ -0,0 +1,71 @@ +# Future work + +## Standard library + +There are a few libraries I would like to implement in the future to demonstrate various uses of the language + +### Macro error reporting + +When describing syntax transformations with Orchid macros, it's fairly easy to make assertions about the stages during which given tokens should exist in the code in terms of the lower and upper bound of the currently active priority number. When these assertions fail, the consequences can be very difficult to debug since a partially transformed syntax tree with all sorts of carriages around conforms to neither the public API of any library nor the core language and lambda calculus. This problem can be addressed with guard rules and bubbling errors. To demonstrate, consider this module: + +``` +-- in client library +import std::macro_error::missing_token + +-- what this carriage does is not relevant to the example, focus on the priority numbers +start_carriage $init =100=> carriage ($init) +carriage ($state) $f =10_001=> carriage (($f $state)) +carriage ($state) stop_carriage =10_002=> $state + +-- report the suspected reason why this carriage did not get consumed +carriage ($state) =0=> (missing_token stop_carriage ($state)) + +export ::(start_carriage, stop_carriage) +``` + +``` +-- in std::macro_error + +-- convert various errors to uniform format +export (missing_token $token ..$details) =1_000_000=> (bubbling_error + "{} was not found" ($token) (..$details) +) + +-- forward error upwards +(..$_prefix (bubbling_error ...$args) ..$_suffix) =1_000_001=> (bubbling_error ...$args) +[..$_prefix (bubbling_error ...$args) ..$_suffix] =1_000_001=> (bubbling_error ...$args) +{..$_prefix (bubbling_error ...$args) ..$_suffix} =1_000_001=> (bubbling_error ...$args) +``` + +With this, various options are available for displaying the error: + +1. bubbling_error could be a magic token that always causes the macro executor to format and print the following string +2. bubbling_error could be defined as a function to raise an error when the problematic function is called. This only supports the (in my opinion, insufficient) level of error reporting Python provides for syntax errors +3. bubbling_error could be left undefined, the runtime could expose processed functions that contained undefined names after macro execution, and dev tooling could parse bubbling_error out of this data. + +### Extensible structural(?) pattern matching + +Since all tokens are namespaced, complicated protocols can be defined between libraries for dispatching macro resolution. I would like to make use of this to build a pattern matching library that resolves patterns to a series of function calls which return some kind of Maybe value. This is something I often wish Rust supported, for instance when matching a type part of which is stored in a reference-counting pointer, a second match expression is required to extract data from the reference-counted part. + +### Function serialization + +Being a pure language, Orchid carries the potential to serialize functions and send them over the network. This enables for instance an Orchid web framework to represent UI as well as and database transactions as simple callbacks in server code that are flush with the code describing server behaviour. I would like to explore this option in the future and develop a general library that allows + +### Macros for UI, declarative testing, etc. + +The flexible macro system enables library developers to invent their own syntax for essentially anything. I considered defining macros for html, music scores / midi data, marble and flow diagrams. + +### DMA/MMIO + +TODO + +## Type system + +### Early plans + +Originally, Orchid was meant to have a type system that used Orchid itself to build generic types using logic of unconstrained complexity from their arguments. The time constraints did not allow for this to be done in the initial version, but it is still on the roadmap. + +### Alternatives + +During initial testing of the working version, I found that the most common kind of programming error in lambda calculus appears to be arity mismatch or syntax errors that result in arity mismatch. Without any kind of type checking this is especially difficult to debug as every function looks the same. This can be addressed with a much simpler type system similar to System-F. Any such type checker would have to be constructed so as to only verify user-provided information regarding the arity of functions without attempting to find the arity of every expression, since System-F is strongly normalising and Orchid like any general purpose language supports potentially infinite loops. + diff --git a/notes/papers/report/parts/haskell.md b/notes/papers/report/parts/haskell.md new file mode 100644 index 0000000..7a89de7 --- /dev/null +++ b/notes/papers/report/parts/haskell.md @@ -0,0 +1,19 @@ +Myy original inspiration to create Orchid was Haskell. I found the power of lazy evaluation impressive and inspiring and saw its potential in defining zero-cost abstractions with simple data flow. I identified a few key problems that motivated me to build a new language: + +**Syntax sugar:** Infix operators in Haskell are defined as any function consisting of non-alphanumeric characters. This produces various rather confusing patterns; ternary operators are placed between their first and second argument, and the ability to use keywords as infix operators and infix operators as prefixes with the use of backticks is a pointless divergence. Other kinds of syntax sugar such as do blocks have a well-defined purpose but often appear as operators in the middle of screen-wide expressions where their purpose is hard to understand and entirely disconnected from the metaphor that brought them to life. + +In addition the handling of all syntax sugar is delegated to the compiler. This results in a system that's surprisingly limited when it comes to defining new abstractions, but also requires much greater effort to learn and read than languages with an intentionally limited syntax such as Java. + +**Syntax-level metaprogramming:** [Template Haskell][th1] is Haskell's tool for syntax-level macros. I learned about it after I built Orchid, and it addresses a lot of my problems. + +[th1]: https://wiki.haskell.org/Template_Haskell + +**Type system:** Haskell's type system is very powerful but to be able to represent some really interesting structures it requires a long list of GHC extensions to be enabled which in turn make typeclass implementation matching undecidable and the heuristic rather bad (understandably so, it was clearly not designed for that; it wasn't really even designed to be a heuristic). + +My plan for Orchid was to use Orchid itself as a type system as well; rather than aiming for a decidable type system and then extending it until it [inevitably][tc1] [becomes][tc2] [turing-complete][tc3], my type-system would be undecidable from the start and progress would point towards improving the type checker to recognize more and more cases. + +[tc1]: https://en.cppreference.com/w/cpp/language/template_metaprogramming +[tc2]: https://blog.rust-lang.org/2022/10/28/gats-stabilization.html +[tc3]: https://wiki.haskell.org/Type_SK + +A description of the planned type system is available in [[type_system/01-main.md|Appendix T]] \ No newline at end of file diff --git a/notes/papers/report/parts/interner.md b/notes/papers/report/parts/interner.md new file mode 100644 index 0000000..3edca17 --- /dev/null +++ b/notes/papers/report/parts/interner.md @@ -0,0 +1,43 @@ +# Interner + +To fix a very serious performance problem with the initial POC, all tokens and all namespaced names in Orchid are interned. + +String interning is a fairly simple optimization, the core idea is to replace strings with an ID unique to the data so that equality comparison can be executed on those IDs in place instead of having to fetch the data from possibly an uncached memory location and compare it character by character. This optimization is so popular that most high-level programming languages with immutable strings automatically do it for string literals, and it allows a lot of otherwise intolerably string-heavy systems such as Javascript's string-map objects to be not only functional but quite performant. + +For the sake of simplicity in Rust it is usually done by replacing Strings with a NonZeroU32 (or some other size). This system is very easy to understand and manage since the user doesn't have to deal with lifetimes, but it has a weakness wherein in order to print or in any other way interact with the strings themselves one needs access to the interner object itself. This is perhaps the most significant code smell in Orchid, essentially every function takes a parameter that references the interner. + +Interning is of course not limited to strings, but one has to be careful in applying it to distinct concepts as the lifetimes of every single interned thing are tied together, and sometimes the added constraints and complexity aren't worth the performance improvements. Orchid's interner is completely type-agnostic so that the possibility is there. The interning of Orchid string literals is on the roadmap hawever. + +## Initial implementation + +Initially, the interner used Lasso, which is an established string interner with a wide user base. + +### Singleton + +A string interner is inherently a memory leak, so making it static would have likely proven problematic in the future. At the same time, magic strings should be internable by any function with or without access to the interner since embedders of Orchid should be able to reference concrete names in their Rust code conveniently. To get around these constraints, the [[oss#static_init|static_init]] crate was used to retain a global singleton instance of the interner and intern magic strings with it. After the first non-static instance of the interner is created, the functions used to interact with the singleton would panic. I also tried using the iconic lazy_static crate, but unfortunately it evaluates the expressions upon first dereference which for functions that take an interner as parameter is always after the creation of the first non-static interner. + +### The Interner Trait + +The interner supported exchanging strings or sequences of tokens for tokens. To avoid accidentally comparing the token for a string with the token for a string sequence, or attempting to resolve a token referring to a string sequence as a string, the tokens have a rank, encoded as a dependent type parameter. Strings are exchanged for tokens of rank 0, and sequences of tokens of rank N are exchanged for tokens of rank N+1. + +### Lasso shim + +Because the type represented by a token is statically guaranteed, we can fearlessly store differently encoded values together without annotation. Thanks to this, strings can simply be forwarded to lasso without overhead. Token sequences are more problematic because the data is ultimately a sequence of numbers and we can't easily assert that they will constitute a valid utf8 string. My temporary solution was to encode the binary data in base64. + +## Revised implementation + +The singleton ended completely defunct because `static_init` apparently also evaluates init expressions on first dereference. Fixing this issue was a good occasion to come up with a better design for the interner. + +### monotype + +The logic for interning itself is encapsulated by a `monotype` struct. This stores values of a single homogenous type using a hashmap for value->token lookup and a vector for token->value lookup. It is based on, although considerably simpler than Lasso. + +### polytype + +The actual Interner stores a `HashMap>`, which is essentially a store of values of unique type keyed by the type. The values in this case are monotype interners. + +Unlike the naiive initial implementation, this version also operates on references, so interning and externing values causes no unnecessary copying and heap allocations. + +## The InternedDisplay Trait + +For refined error reporting most structures derive `Debug` and also implement `Display`. In most cases where the structure at hand describes code of some kind, `Display` attempts to print a fragment of valid code. With every name in the codebase interned this is really difficult because interner tokens can't be resolved from `Display` implementations. To solve this, a new trait was defined called `InternedDisplay` which has the same surface as `Display` except for the fact that `fmt`'s mirror image also takes an additional reference to Interner. The syntax sugar for string formatting is in this way unfortunately lost, but the functionality and the division of responsibilities remains. \ No newline at end of file diff --git a/notes/papers/report/parts/macros.md b/notes/papers/report/parts/macros.md new file mode 100644 index 0000000..ff285cd --- /dev/null +++ b/notes/papers/report/parts/macros.md @@ -0,0 +1,54 @@ +# Macros + +The macros describe several independent sequential programs that are expected to be able to interact with each other. To make debugging easier, the order of execution of independent macros should also be relatively static. + +## Execution order + +The macro executor follows a manually specified priority cascade, with priorities ranging from 0 to f64 max (0x2p1023, exclusive). Priorities are accepted in any valid floating point format, but usually written in binary or hexadecimal natural form, as this format represents floating point precision on the syntax level, thus making precision errors extremely unlikely. + +The range of valid priorities is divided up into bands, much like radio bands. In this case, the bands serve to establish a high level ordering between instructions. + +The bands are each an even 32 orders of magnitude, with space in between for future expansion + +| | | | | +| :----------: | :-----: | :---------: | :----------: | +| 0-31 | 32-63 | 64-95 | 96-127 | +| | x | | | +| 128-159 | 160-191 | 192-223 | 224-255 | +| operators | | | x | +| 256-287 | 288-319 | 320-351 | 352-383 | +| | | expressions | | +| 384-415 | 416-447 | 448-479 | 480-511 | +| | x | | | +| 512-543 | 544-575 | 576-607 | 608-639 | +| bindings | | | x | +| 640-671 | 672-703 | 704-735 | 736-767 | +| | | aliases | | +| 768-799 | 800-831 | 832-863 | 864-895 | +| | x | | | +| 896-927 | 928-959 | 960-991 | 992- | +| integrations | | | transitional | + +### Transitional states + +Transitional states produced and consumed by the same macro program occupy the range above 0x1p991. Nothing in this range should be written by the user or triggered by an interaction of distinct macro programs, the purpose of this high range is to prevent devices such as carriages from interacting. Any transformation sequence in this range can assume that the tree is inert other than its own operation. + +### Integrations + +Integrations expect an inert syntax tree but at least one hidden token does not belong to the macro program that resolves the rule, so it's additionally important that all macro programs be in a documented state at the time of resolution. + +### Aliases + +Fragments of code extracted for readability. + +### Binding builders + +Syntax elements that manipulate bindings should be executed earlier. Do blocks and match statements are good examples of this category. Anything with a lower priority trigger can assume that all names are correctly bound. + +### Expressions + +Things that essentially work like function calls just with added structure, such as if/then/else + +### Operators + +Binary and unary operators that process the chunks of text on either side \ No newline at end of file diff --git a/notes/papers/report/parts/oss.md b/notes/papers/report/parts/oss.md new file mode 100644 index 0000000..4c3de56 --- /dev/null +++ b/notes/papers/report/parts/oss.md @@ -0,0 +1,75 @@ +# Open-source packages Orchid depends on + +## [thiserror](https://github.com/dtolnay/thiserror) + +_License: Apache 2.0 or MIT_ + +Helps derive `Error` for aggregate errors. + +I eventually stopped trying to do this as it was simpler to just treat error types as bags of data about the failure, but some parts of the codebase still use it and it doesn't really cause any problems. + +## [chumsky](https://github.com/zesterer/chumsky) + +_License: MIT_ + +A fantastic parser combinator that allowed specifying nuanced decisions in a declarative way, such as whether a given float token can be promoted to an uint token. + +In hindsight passes after tokenization could have been written by hand, tokenized Orchid is not that hard to parse into an AST and it would have probably made some tasks such as allowing `.` (dot) as a token considerably easier. + +## [hashbrown](https://github.com/rust-lang/hashbrown) + +_License: Apache 2.0 or MIT_ + +Google's swisstable implementation. Almost perfectly identical to `std::collections::HashMap`, with minor differences. + +One of its greatest feats is support for the raw entry API which enables resolving entries using a hash and an equality lambda. This is used both by the interner to avoid many clones and allocations and by the generic processing step cache to avoid unnecessary clones of potentially very large trees. This API is experimentally available in the native hashmap too. + +Its other advantage over `std::collections::HashMap` is that its default hashing function is AHash which is said to be faster than the standard variant's default SipHash. I don't have benchmarks to back this up but since it was already in the codebase for the raw entry API I opted to use it everywhere. + +## [ordered-float](https://github.com/reem/rust-ordered-float) + +_License: MIT_ + +A wrapper around floating point numbers that removes `NaN` from the set of possible values, promoting `<` and `>` to total orderings and `==` to an equivalence relation. Orchid does not have `NaN` because it's a silent error which conflicts with the "let it crash" philosophy borrowed from Elixir. All operations that would produce `NaN` either abort or indicate the failure in their return type. + +## [itertools](https://github.com/rust-itertools/itertools) + +_License: Apache 2.0 or MIT_ + +A fundamental utility crate for Rust's iterators, it's impossible to enumerate its uses. + +## [smallvec](https://github.com/servo/references-smallvec) + +_License: Apache 2.0 or MIT_ + +small vector optimization - allocates space for a statically known number of elements on the stack to save heap allocations. This is a gamble since the stack space is wasted if the data does spill to the heap, but it can improve performance massively in hot paths. + +I used it for optimizations in the key-value store the type system used to store + +## [dyn-clone](https://github.com/dtolnay/dyn-clone) + +_License: Apache 2.0 or MIT_ + +All expressions in Orchid are clonable, and to allow for optimizations, Atoms have control over their own cloning logic, so this object-safe version of `Clone` is used. + +# Packages no longer used + +## [mappable-rc](https://github.com/JakobDegen/mappable-rc) + +A refcounting pointer which can be updated to dereference to some part of the value it holds similarly to C++'s `shared_ptr`. + +Using this crate was ultimately a mistake on my part, in early stages of development (early stages of my Rust journey) I wanted to store arbitrary subsections of an expression during macro execution without dealing with lifetimes. It was removed in the latest version. + +## [lasso](https://github.com/Kixiron/lasso) + +A very popular string interner, used for interning both strings and base64 encoded data + +## [base64](https://github.com/marshallpierce/rust-base64) + +Enable interning non-string data + +## [static_init](https://gitlab.com/okannen/static_init) + +Enable interning magic strings ahead-of-time in functions that don't have access to the interner. + +I thought that this actually runs static initializers on startup as it's advertised in the readme \ No newline at end of file diff --git a/notes/papers/report/parts/pipeline.md b/notes/papers/report/parts/pipeline.md new file mode 100644 index 0000000..cd277db --- /dev/null +++ b/notes/papers/report/parts/pipeline.md @@ -0,0 +1,18 @@ +# The pipeline + +The conversion of Orchid files into a collection of macro rules is a relatively complicated process. First, the source files are loaded and an initial parsing pass is executed. Because the set of supported operators influences the correct lexing of expressions, the output of this pass can't directly be used. The parts of each module that are known to be valid are + +- the imports, because they don't use expressions at all +- the visibility and pattern of macro rule definitions, because it is required to separate distinct operators with spaces +- the visibility and name of constant definitions +- the name of submodules and these same elements in their bodies + +This preparsed data is then used to locate all files in the solution, and to collect all operators visible to a certain file for a final parsing pass. It is necessary to refer to imported modules for a complete list of operators because glob imports don't offer any information about the set of names but still import all operators for the purpose of lexing. + +## Push vs pull logistics + +The initial POC implementation of Orchid used pull logistics aka lazy evaluation everywhere. This meant that specially annotated units of computation would only be executed when other units referenced their result. This is a classic functional optimization, but its implementation in Rust had a couple drawbacks; First, lazy evaluation conflicts with most other optimizations, because it's impossible to assert the impact of a function call. Also - although this is probably a problem with my implementation - because the caching wrapper stores a trait object of Fn, every call to a stage is equivalent to a virtual function call which alone is sometimes an excessive penalty. Second, all values must live on the heap and have static lifetimes. Eventually nearly all fields referenced by the pipeline or its stages were wrapped in Rc. + +Additionally, in a lot of cases lazy evaluation is undesirable. Most programmers other than the developers of Python would like to receive syntax errors in dead functions because statically identifiable errors are usually either typos that are trivial to fix or born out of a misconception on the programmer's part which is worth addressing in case it produces silent errors elsewhere. But errors are produced when the calculation of a value fails, so to produce errors all values about all functions msut be calculated. + +To address these issues, the second iteration only uses pull logistics for the preparsing and file collection phase, and the only errors guaranteed to be produced by this stage are imports from missing files and syntax errors regarding the structure of the S-expressions. \ No newline at end of file diff --git a/notes/papers/report/scratchpad.md b/notes/papers/report/parts/scratchpad.md similarity index 76% rename from notes/papers/report/scratchpad.md rename to notes/papers/report/parts/scratchpad.md index f8fed46..04fc832 100644 --- a/notes/papers/report/scratchpad.md +++ b/notes/papers/report/parts/scratchpad.md @@ -22,7 +22,7 @@ In addition, lazy, pure code lends itself to optimization. Deforestation and TCO # Macros -One major grievance of mine with Haskell is that its syntax isn't accessible. Even after understanding the rules, getting used to reading it takes considerable time. On the other hand, I really like the way Rust enables library developers to invent their own syntax that intuitively describes the concepts the library at hand encodes. In Orchid's codebase, I defined several macros to streamline tasks like defining functions in Rust that are visible to Orchid. +Left-associative unparenthesized function calls are intuitive in the typical case of just applying functions to a limited number of arguments, but they're not very flexible. Haskell solves this problem by defining a diverse array of syntax primitives for individual use cases such as `do` blocks for monadic operations. This system is fairly rigid. In contrast, Rust enables library developers to invent their own syntax that intuitively describes the concepts the library at hand encodes. In Orchid's codebase, I defined several macros to streamline tasks like defining functions in Rust that are visible to Orchid, or translating between various intermediate representations. ## Generalized kerning @@ -55,12 +55,12 @@ What I really appreciate in this proof is how visual it is; based on this, it's ## Namespaced tokens -I found two major problems with C and Rust macros which vastly limit their potential. They're relatively closed systems, and prone to aliasing. Every other item in Rust follows a rigorous namespacing scheme, but the macros break this seal, I presume the reason is that macro execution happens before namespace resolution. +Rust macros operate on the bare tokens and therefore are prone to accidental aliasing. Every other item in Rust follows a rigorous namespacing scheme, but macros break this structure, probably because macro execution happens before namespace resolution. The language doesn't suffer too much from this problem, but the relativity of namespacing +limits their potential. -Orchid's macros - substitution rules - operate on namespaced tokens. This means that users can safely give their macros short and intuitive names, but it also means that the macros can hook into each other. Consider for example the following example, which is a slightly modified version of a -real rule included in the prelude: +Orchid's substitution rules operate on namespaced tokens. This means that the macros can hook into each other. Consider the following example, which is a modified version of a real rule included in the prelude: -in _procedural.or_ +in _procedural.orc_ ```orchid export do { ...$statement ; ...$rest:1 } =10_001=> ( statement (...$statement) do { ...$rest } @@ -71,7 +71,7 @@ export statement (let $_name = ...$value) ...$next =10_000=> ( ) ``` -in _cpsio.or_ +in _cpsio.orc_ ```orchid import procedural::statement @@ -83,7 +83,7 @@ export statement (cps ...$operation) ...$next =10_000=> ( ) ``` -in _main.or_ +in _main.orc_ ```orchid import procedural::(do, let, ;) import cpsio::cps @@ -95,3 +95,4 @@ export main := do{ } ``` +Notice how, despite heavy use of macros, it's never ambiguous where a particular name is coming from. Namespacing, including import statements, is entirely unaffected by the macro system. The source of names is completely invariant. diff --git a/notes/papers/report/parts/spec/01-main.md b/notes/papers/report/parts/spec/01-main.md new file mode 100644 index 0000000..c4f224a --- /dev/null +++ b/notes/papers/report/parts/spec/01-main.md @@ -0,0 +1,3 @@ +# Specification + +This is a description of the syntax and execution model the submitted version of Orchid conforms to. It is intended to be as accurate as any specification, but it is written to match the implementation and not the other way. \ No newline at end of file diff --git a/notes/papers/report/parts/spec/02-parsing.md b/notes/papers/report/parts/spec/02-parsing.md new file mode 100644 index 0000000..1d956cc --- /dev/null +++ b/notes/papers/report/parts/spec/02-parsing.md @@ -0,0 +1,174 @@ +# Parsing + +Orchid expressions are similar in nature to lambda calculus or haskell, except whitespace is mostly irrelevant. + +## Names + +`name` and `ns_name` tokens appear all over the place in this spec. They represent operators, function names, arguments, modules. A `name` is + +1. the universally recognized operators `,`, `.`, `..` and `...` (comma and single, double and triple dot) +2. any C identifier +3. any sequence of name-safe characters starting with a character that cannot begin a C identifier. A name-safe character is any non-whitespace Unicode character other than + + - digits + - the namespace separator `:`, + - the parametric expression starters `\` and `@`, + - the string and char delimiters `"` and `'`, + - the various brackets`(`, `)`, `[`, `]`, `{` and `}`, + - `,`, `.` and `$` + +This means that, in absence of a known list of names, `!importatn!` is a single name but `importatn!` is two names, as a name that starts as a C identifier cannot contain special characters. It also means that using non-English characters in Orchid variables is a really bad idea. This is intentional, identifiers that need to be repeated verbatim should only contain characters that appear on all latin keyboards. + +There are also reserved words that cannot be used as names; `export`, `import`, `namespace`. + +A `ns_name` is a sequence of one or more `name` tokens separated by the namespace separator `::`. + +## Clauses + +Clauses are the building blocks of Orchid's syntax. They belong to one of a couple categories: + +- S-expressions are a parenthesized sequence of space-delimited `clause`s. All three types of brackets `()`, `[]` and `{}` are supported and treated differently. +- Lambdas start with `\`, followed by a single clause representing an argument name, then `.`, then a sequence of `clause`s representing the body. This is a greedy pattern that ends at the end of an enclosing S-expression, or the end of the line. Lambdas may contain any single clause in the position of an argument during parsing, but by the end of macro execution all arguments must become a `ns_name`. +- numbers can be in decimal, binary with the `0b` prefix, hexadecimal with the `0x` prefix, or octal with the `0` prefix. All bases support the decimal point, exponential notation or both. The exponent is prefixed with `p`, always written in decimal, may be negative, and it represents a power of the base rather than a power of 10. For example, `0xf0.4p-2` is `0xf04 / 16 ^ 3` or ~0.9385. +- Strings are delimited with `"`, support `\` escapes and four digit unicode escapes of the form `\uXXXX`. They may contain line breaks. +- Chars are a single character or escape from the above description of a string delimited by `'`. +- Placeholders are either scalar `$name`, vectorial `..$name`, vectorial nonempty `...$name`, or either of the vectorial variants with a priority attached `..$name:p`, `...$name:p`. The name is always a C identifier, p is an integer. +- Names are `ns_name` + +## Files + +Files are separated into lines. A line is delimited by newlines and only contains newlines within brackets. A line may be an + +### Import +spec: +``` +import = "import" impot_fragment +import_fragment = "*" +import_fragment = name +import_fragment = "(" import_fragment [ "," import_fragment ]* ")" +import_fragment = name "::" import_fragment +``` +examples: +``` +import prelude::* +import std::cps +import std::(num::ops::*, fn::*, conv) +import std::proc::(do, let, =, ;) +``` +counterexamples: +``` +import std::() +import std::cpsio::(print, *) +import std::(cpsio) +``` +> **info** +> +> while none of these are guaranteed to work currently, there's little reason they would have to be invalid, so future versions may allow them. + +### Constant +spec: +``` +constant = name ":=" clause* +``` +the value can consist of multiple clauses during parsing, these will be converted to a single function call after macro execution. + +examples: +``` +main := print "Hello World!\n" +pi := 3 +e := pi +exponentiate := \n.\exp. do{ + let total = 1; + loop r on (exp total) with + if exp == 1 then total + else r (exp - 1) (total * n) +} +``` +### Namespace +spec: +``` +namespace = "namespace" name "{" line* "}" +``` +examples: +``` +foo := 1 +bar := baz::quz +namespace baz ( + import super::foo + export quz := foo + 1 +) +``` + +### Exported member +spec: +``` +exported_member = "export" (constant | rule | namespace) +``` + +### Explicit export +spec: +``` +export "::" "(" name [ "," name ]* ")" +``` +examples: +``` +export ::(new, map) +``` + +### Rule +spec: +``` +rule = pattern arrow template +pattern = clause* +template = clause* +arrow = "=" priority "=>' (written together, without spaces) +priority = float +``` +Rule patterns can define new operators implicitly by referencing them, so all tokens must be delimited by spaces. The template is inserted in place of the pattern without parentheses, so unless it's meant to be part of a pattern matched by another rule which expects a particular parenthesization, when more than one token is produced the output should be wrapped in parentheses. + +examples: +``` +export loop $r on (...$parameters) with ...$tail =0x5p512=> Y (\$r. + bind_names (...$parameters) (...$tail) +) ...$parameters + +bind_names ($name ..$rest) $payload =0x2p1000=> \$name. bind_names (..$rest) $payload +bind_names () (...$payload) =0x1p1000=> ...$payload + +...$left + ...$right:1 =0x1p240=> (add (...$left) (...$right)) +``` + +### Imports + +An import is a line starting with the keyword `import`, followed by a tree of imported names. + +``` +import_tree = name + | name :: import_tree + | name :: * + | ( import_tree [, import_tree]+ ) +``` + +Some examples of valid imports: + +``` +import std::cpsio +import std::(conv::parse_float, cpsio, str::*) +import std +``` + +Some examples of invalid imports: + +``` +import std::() +import std::cpsio::(print, *) +import std::(cpsio) +``` + +> **info** +> +> while none of these are guaranteed to work currently, there's little reason they would have to be invalid, so future specifications may allow them. + +An import can be normalized into a list of independent imports ending either with a `*` called wildcard imports or with a `name`. wildcard imports are normalized to imports for all the `name`s exported from the parent module. All Name clauses in the file starting with a `name` one of these imports ended with are prefixed with the full import path. The rest of the Name clauses are prefixed with the full path of the current module. + +Reference cycles are allowed. diff --git a/notes/papers/report/parts/spec/03-macros.md b/notes/papers/report/parts/spec/03-macros.md new file mode 100644 index 0000000..bf21bb6 --- /dev/null +++ b/notes/papers/report/parts/spec/03-macros.md @@ -0,0 +1,45 @@ +# Macros + +After parsing, what remains is a set of macro rules, each with a pattern, priority and template. Modules aren't tracked at this stage, their purpose was to namespace the tokens within the rules. + +By employing custom import logic, it's also possible to add rules bypassing the parser. Starting with the macro phase, `clause`s may also be `atom`s or `externfn`s. The role of these is detailed in the [[04-runtime]] section. + +Macros are tested in order of descending priority, each macro is checked against each subsection of each clause sequence. When a match is found, the substitution is performed and all macros are checked again. + +## Placeholders + +Patterns fall into two categories + +- scalar placeholders + - `$name` matches exactly one clause, including a parenthesized sequence. +- vectorial placeholders + - `..$name` matches zero or more clauses + - `...$name` matches one or more clauses + +Vectorial placeholders may also have a positive decimal integer growth priority specified after the name, separated with a `:` like so: `...$cond:2`. If it isn't specified, the growth priority defaults to 0. + +Any single clause can appear in the position of a lambda argument during macro execution. By the end of the macro execution phase, all lambdas must have a Name in the position of argument. + +The template may only include placeholders referenced in the pattern. Two vectorial placeholders cannot appear next to each other in the pattern.\ +A placeholder name can only appar once in a pattern.\ + +## Execution + +Each clause in the pattern matches clauses as follows: + +- Name matches a Name with the same fully resolved namespaced name. +- Lambda matches a Lambda with matching argument and matching body. Lambda arguments are module-local Name clauses, so if they are moved out of the body by a macro they can become unbound or refer to a previously shadowed global. +- Parenthesized expressions match each other if the contained sequences match and both use the same delimiters. +- Placeholders' matched sets are as listed in [Placeholders](#placeholders). + +### Precedence of matches + +The growth order of vectorial placeholders is + +- Outside before inside parentheses +- descending growth priority +- right-to-left by occurrence in the pattern. + +If a pattern matches a sequence in more than one way, whichever match allocates more clauses to the highest vectorial placeholder in growth order is preferred. + +Rules are conceptually extended with a vectorial placeholder of priority 0 on either end unless a vectorial placeholder is already present there. In practice, this means that multiple occurences of a scalar pattern within a sequence are matched left to right. diff --git a/notes/papers/report/parts/spec/04-runtime.md b/notes/papers/report/parts/spec/04-runtime.md new file mode 100644 index 0000000..958a36e --- /dev/null +++ b/notes/papers/report/parts/spec/04-runtime.md @@ -0,0 +1,34 @@ +## Runtime + +Orchid is evaluated lazily. This means that everything operates on unevaluated expressions. This has the advantage that unused values never need to be computed, but it also introduces a great deal of complexity in interoperability. + +### Gas + +The executor supports an optional gas parameter to limit the number of normalization steps taken. Once an Orchid program reaches an inert state, it is either an external item, a literal, or a lambda function. + +### external API + +In order to do anything useful, Orchid provides an API for defining clauses that have additional behaviour implemented in Rust. Basic arithmetic is defined using these. + +#### Atomic + +atomics are opaque units of foreign data, with the following operations: + +- a function for reduction that behaves like the interpreter's `run` function +- attempt to downcast to a concrete type + +Atomics can be used to represent processes. Given enough processing cycles, these return a different clause. + +They can also be used to wrap data addressed to other external code. This category of atomics reports inert at all times, and relies on the downcasting API to interact with ExternFn-s. + +It's possible to use a combination of these for conditional optimizations - for instance, to recognize chains of processes that can be more efficiently expressed as a single task. + +#### ExternFn + +external functions can be combined with another clause to form a new clause. Most of the time, this new clause would be an Atomic which forwards processing to the arguments until they can't be normalized any further, at which point it either returns an ExternFn to take another argument or executes the operation associated with the function and returns a value. + +Because this combination of operations is so common, several macros are provided to streamline it. + +It is always a logic error to normalize expressions outside an `interpreter::run` (or `Atomic::run`) call, or to expect an expression to be of any particular shape without ensuring that `interpreter::run` reported inert in the past. + +All functions including external ones are assumed to be pure, and the executor uses opportunistic caching to avoid re-evaluating subexpressions, so continuation-passing style cannot be used to encode side effects. An alternative system for this purpose is being developed, but for the time being the previous CPS functions are still available in the standard library. Each print expression will be printed at least once for each qualitatively distinct argument it is applied to. diff --git a/notes/papers/report/parts/substack.md b/notes/papers/report/parts/substack.md new file mode 100644 index 0000000..3f8052b --- /dev/null +++ b/notes/papers/report/parts/substack.md @@ -0,0 +1,7 @@ +# Substack + +The vast majority of algorithms involved in this project are multiple recursive in nature. Very often information on higher levels would influence the entire subtree. A good example is the resolution of name bindings. The size of the call stack is associated with the growth of the set of names, only the top needs to be mutated, but all names seen in enclosing scopes need to be accessible. The datastructure we need is essentially a linked list on the stack. + +This is a very common and not particularly interesting datastructure; much like quicksort, every C project of considerable size that uses recursion includes some definition of it. However, I still think it deserves some attention, precisely because it's so common. For example, my implementation also defines an iterator, and a reasonably efficient implementation of the outstandingly common operation of collecting the stack into a Vec that starts at the bottom. + +Another extension to this concept I wrote to help with the type system is a stackbound map. This is not a very good implementation and it definitely needs more work to be worth publishing, but it stands to show that the concept of a substack is versatile and powerful. \ No newline at end of file diff --git a/notes/papers/report/parts/timeline.md b/notes/papers/report/parts/timeline.md new file mode 100644 index 0000000..af01748 --- /dev/null +++ b/notes/papers/report/parts/timeline.md @@ -0,0 +1,19 @@ +# Timeline + +I started working on a functional language in February 2022. I was mostly inspired by Haskell and Rust, I wanted to create a lazy, pure language with a simple rigid syntax tree like Rust that would support macros. By the end of August, I had a proof-of-concept implementation of the macro executor, just enough to test my ideas. + +This is also when I came up with the name. I read an article about how orchids don't so much grow on, but rather together with mangrove trees and influence the trees to produce patterns beneficial to them while also killing fungi and extending the tree's capacity for photosynthesis. + +Having tested that my idea could work, at the start of the academic year I switched to the type system. When the project synopsis was written, I imagined that the type system would be an appropriately sized chunk of the work for a final year project; its title was "Orchid's Type System". + +Around the end of November I had researched enough type theory to decide what kind of type system I would want. My choice was advised by a number of grievances I had with Typescript such as the lack of higher-kinded types which comes up [surprisingly often][1] in Javascript, lack of support for nominal types and the difficulty of using dependent types. I appreciated however the powerful type transformation techniques. + +[1]: https://github.com/microsoft/TypeScript/issues/1213 + +However, building a type system proved too difficult; on February 23 I decided to cut my losses and focus on building an interpreter. The proof-of-concept interpreter was finished on March 10, but the macro executor was still using the naiive implementation completed over the summer so it would take around 15 seconds to load an example file of 20 lines, and a range of other issues cropped up as well cumulatively impacting every corner of the codebase. A full rewrite was necessary. + +The final, working implementation was completed on May 8, this one uses token interning, starts up almost instantly and memoizes expressions by origin. This feature is implemented because it was very straightforward, but it actually conflicts with the pre-existing IO capabilities which still use continuation passing, so IO in a loop is actually impossible. + +## Immediate future + +The first order of business is to extend the standard library to a basic usable level, I'd like to try adding Elixir-like protocols with multiple type parameters, and some kind of IO support, perhaps mimicking algebraic effects. After that I would like to develop the embedding interface, as I hope to use Orchid in numerous future projects. diff --git a/notes/papers/report/parts/type_system/01-main.md b/notes/papers/report/parts/type_system/01-main.md new file mode 100644 index 0000000..88d50e7 --- /dev/null +++ b/notes/papers/report/parts/type_system/01-main.md @@ -0,0 +1,20 @@ +# Type system + +This is a description of the type system originally designed for Orchid which never reached the MVP stage. + +At the core the type system consists of three concepts: + +- `define` creates nominal types, which also act as typeclasses. This may be very confusing but it will make more sense later. +- `impl` provides instances of typeclasses +- a universal parametric construct that serves as both a `forall` (or generic) and a `where` (or constraint). This was temporarily named `auto` but is probably more aptly described by the word `given`. + +## Unification + +The backbone of any type system is unification. In this case, this is an especially interesting question because the type expressions are built with code and nontermination is outstandingly common. + +The unification process uses Hindley-Milner unification as a primitive. It attempts to find an MGU within a constant N steps of reduction. In every step, the candidates are compared using HM, and if it fails, branches are created for each transformation available in the tree. All branches reference the previous step. Valid transformations are + +- $\beta$-reduction +- Replacing a subtree that is syntactically equivalent to a tree it was produced by with a call to the Y combinator. + +This algorithm is prone to state explosion, but because it does not need to solve extremely complex problems but rather many many very small ones, good caching can probably solve most issues. \ No newline at end of file diff --git a/notes/papers/report/parts/type_system/02-given.md b/notes/papers/report/parts/type_system/02-given.md new file mode 100644 index 0000000..ee9a4c9 --- /dev/null +++ b/notes/papers/report/parts/type_system/02-given.md @@ -0,0 +1,20 @@ +## Given (formerly Auto) + +`given` bindings have the form `@Name:type. body`. Either the `Name` or the `:type` part can be optional but at least one is required. The central idea is that wherever a binding is unwrapped by an operation the language attempts to find a value for the name. Bindings are unwrapped in the following situations: + +- If the value is used, such as if a generic function is called +- If the value is assigned to something that has a known type which does NOT have a binding + +Bindings can be **resolved** in a couple ways: + +1. If the name appears in the type of any value, type unification provides a solution +2. If the binding has a type and the point of unwrapping is within the body of a binding with an **assignable** type, the value of that binding is forwarded +3. If none of the above options yield any success and the binding has a type, the value of the single suitable `impl` according to the [[04-impl#Matching rules|impl matching rules]] is used + +If none of the above options are successful, resolution fails. + +It is possible to store values with bindings in typed datastructures without resolving the binding, for example `List @T. @:Eq T. (T -> Option bool)` would represent a `List` of functions that take any equality-comparable value and return an optional boolean. + +Bindings can be used to represent generics. In the above example, `@T. ...` is a generic parameter. It translates to the clause "given a type T, ...". Its value will probably be decided by the function's argument. + +Bindings can also be used to represent constraints. In the above example, `@:Eq T. ...` is a constraint, which translates to the clause "given an instance of `Eq T`, ...". Its value will have to be decided by an existing `Eq` constraint if the caller is also generic over `T`, or an `impl` of `Eq` if the function is called on a value of a concrete type or if the caller does not have the `Eq` constraint. \ No newline at end of file diff --git a/notes/papers/report/parts/type_system/03-define.md b/notes/papers/report/parts/type_system/03-define.md new file mode 100644 index 0000000..1e94d8f --- /dev/null +++ b/notes/papers/report/parts/type_system/03-define.md @@ -0,0 +1,61 @@ +# Define + +Define is used to create types and typeclasses. Define is a distinct [[02-parsing#Files|line type]] that has the following form: + +``` +define = "define" name param* "as" value +param = param_name [ ":" kind ] +kind = clause +param_name = "$" name (without spaces) +value = clause* +``` + +For an example of a type, here's the definition of a conslist or linked list. +``` +define List $T as Y \r. Option (Pair $T r) +``` + +These aren't macros although they look similar. While macros are processed after parsing and then forgotten, these placeholders are recognized by the language and subject to unification. + +It's important to keep in mind that these are nominal types; when something is typed `List int`, it is not assignable to `Option (Pair int (List int))`. + +## Typeclasses + +Typeclasses are types that describe operations. Very often a typeclass will be a single function, but they can also be sequences of functions. + +For an example of a typeclass, here's the definition of Eq, the class of types that can be equality-compared. +``` +define Eq $T as $T -> $T -> bool +``` + +Eq isn't a statement about types as typeclasses commonly are in other languages; instead, it's an operation carried out on a particular type. **Constraints of `Eq` on some generic parameter `T` are expressed as a requirement for the existence of `Eq T` for the given `T`.** As an added benefit, the operations exposed by a typeclass can be unambiguously referenced from the bound name of the typeclass value within the binding. +``` +isUnaryGrp := @T. @eq:Eq T. @:Add T T T. \t:T. eq (t + t) t +``` + +In the above example, the implementation of `Eq` is used directly as a value in the expression. The implementation of `Add` is not used, but it can be assumed that the operator + is translated via macros to a call to some generic function `add` which is constrained on `Add`, so according to the second unification rule in [[#Given (formerly Auto)|Given]] the implementation is forwarded. + +## Kinds + +Each of the parameters to a nominal type has a kind. Kinds can be thought of as a "type of type", and they ensure that expressions that are used in the type of a value have no unspecified parameters while allowing values to be parametric on parametric types. + +### 1. concrete types + +`type` is the kind of concrete types. These are the only values in type-space that can stand in the position of a type annotation. Simple types such as `int` as well as fully specified generic types such as `List int` belong to this group. + +Kinds aren't inferred from usage; if a type parameter does not have a kind annotation, it is assumed to be `type`. + +### 2. generics + +Generics or parametric types act like N-ary functions. `type -> type` is the kind of generics with one type parameter, `type -> type -> type` is the kind of generics wiht two type parameters, and so on. `List` for instance is `type -> type`. + +Typeclasses applied to simple types also belong in this group. For example, `Eq` from above has kind `type -> type`. `Add` has three generic parameters for left, right and output types, and all of these are concrete types, so its kind is `type -> type -> type -> type`. + +### 3. higher-kinded polymorphism + +Types that are parametric on parametric types have kinds that are analogous to higher-order functions. Most real-world examples of this group are typeclasses that apply to containers. + +`List` has the kind `type -> type`. `Option`, also known as `Maybe` from Haskell also has the same kind, as does `HashMap string`. What's common about all of these is that they have values that can be modified without influencing the overall structure of the containers. In Haskell this capability is encoded in the typeclass `Functor`, but Orchid would probably opt for a more accessible name such as `Mapping`. The kind of this typeclass is `(type -> type) -> type`. +``` +define Mapping $C:(type -> type) as @T. @U. C T -> C U +``` diff --git a/notes/papers/report/parts/type_system/04-impl.md b/notes/papers/report/parts/type_system/04-impl.md new file mode 100644 index 0000000..a070ae4 --- /dev/null +++ b/notes/papers/report/parts/type_system/04-impl.md @@ -0,0 +1,58 @@ +# Impl + +Impl is used to implement typeclasses. Impl is a distinct [[02-parsing#Files|line type]] that has the following form: +``` +impl = "impl" target_type ["by" impl_name ["over" alternative*]] "via" value +target_type = clause* +impl_name = name +alternative = ns_name +value = clause* +``` + +Impls provide fallbacks for binding resolution. If the target type contains any @ bindings at the top level, they are also applied to the value, to avoid repetition. The list of alternatives contains references to other impls which the author of this impl is aware of and deems more general or for another reason inferior. Alternatives can never form a cycle. + +## Matching rules + +When a [[02-given|@]] binding is not resolvable using rules 1 and 2, impls are used to find a value. Each impl's target type may contain other bindings, so resolution proceeds similarly to a breadth-first Prolog solver. + +An impl is considered an acceptable **candidate** for a binding if its type unifies with goal, with its bindings resolved in the context where the original binding is defined. This means that these indirect bindings are also first resolved using **assignable** enclosing bindings before impls would be enumerated. + +An impl is considered a **match** if it is a **candidate**, and all other candidates are reachable from it by walking the alternative tree (even if the intermediate steps are not candidates). If there is no match, + +## Overrides + +In Rust impls can be placed in one of two modules; the trait owner, and the type owner. Orchid is more forgiving than that which means that mistakes in external packages can temporarily be fixed in user code, but it also means that inconsistency is possible and needs to be addressed. Two additional possibilities arise that Rust's orphan rules prevent; foster impls and arbiter impls. + +### Foster impls + +If it doesn't make sense for either of the participants to acknowledge the others, foster impls can be created which don't own any of the participant symbols. + +```orc +import GenericModule::Typeclass +import SpecificModule::(Type, function) + +impl Typeclass Type by fosterTypeclassType via function +``` + +Foster impls can be placed in foster packages whose sole purpose is to glue packages together, or they can be embedded in usercode. + +### Arbiter impls + +If multiple foster impls exist for a given package, or if a foster impl is provided by some collection but one of the parents added an impl in the mean time, ambiguities arise. To resolve these, arbiter impls can be used to decide which value will win. + +``` orc +import BadModule::badImpl +import GoodModule::goodImpl +import GenericModule::Typeclass +import SpecificModule::Type + +impl Typeclass Type by arbiterGoodModuleTypeclassType over goodImpl, badImpl via goodImpl +``` + +Notice that goodImpl appears both as a value and an impl name. Named impls are always also exported as constants, specifically to account for situations where you want to use them despite auto resolution. They can be referenced in arbiter rules, exception rules for more general impls, and directly used as values in code. + +The more common and less hacky use case for arbiter rules is when a very general rule from a general package needs to be overridden by a more specific rule from a deep ancestor. + +--- + +In all cases, these problems represent a concern gap or overlap and should be eventually resolved by the authors of the original packages. The purpose of foster and arbiter rules is to not stall the ecosystem on a trivial conflict of concepts and to make adding dependencies less risky. It should still take some effort to maintain a large dependency list, but the risk of complete blockage becomes a more manageable constant effort. \ No newline at end of file diff --git a/notes/papers/report/spec/01-parsing.md b/notes/papers/report/spec/01-parsing.md deleted file mode 100644 index 2890ab8..0000000 --- a/notes/papers/report/spec/01-parsing.md +++ /dev/null @@ -1,101 +0,0 @@ -# Parsing - -Orchid expressions are similar in nature to lambda calculus or haskell, except whitespace is mostly irrelevant. - -## Names - -`name` and `ns_name` tokens appear all over the place in this spec. They represent operators, function names, arguments, modules. A `name` is - -1. the universally recognized operators `,`, `.`, `..` and `...` (comma and single, double and triple dot) -2. any C identifier -3. any sequence of name-safe characters starting with a character that cannot begin a C identifier. A name-safe character is any non-whitespace Unicode character other than - - - digits - - the namespace separator `:`, - - the parametric expression starters `\` and `@`, - - the string and char delimiters `"` and `'`, - - the various brackets`(`, `)`, `[`, `]`, `{` and `}`, - - `,`, `.` and `$` - - This means that, in absence of a known list of names, `!importatn!` is a single name but `importatn!` is two names, as a name that starts as a C identifier cannot contain special characters. It also means that using non-English characters in Orchid variables is a really bad idea. This is intentional, identifiers that need to be repeated verbatim should only contain characters that appear on all latin keyboards. - -There are also reserved words that cannot be used as names; `export` and `import`. - -A `ns_name` is a sequence of one or more `name` tokens separated by the namespace separator `::`. - -All tokens that do not contain `::` in the code may be `name` or `ns_name` depending on their context. - -## Clauses - -Clauses are the building blocks of Orchid's syntax. They belong to one of a couple categories: - -- S-expressions are a parenthesized sequence of space-delimited `clause`s. All three types of brackets `()`, `[]` and `{}` are supported. -- Lambdas start with `\.`, followed by a sequence of `clause`s where `` is a single `name` or `$_` followed by a C identifier. This is a greedy pattern that ends at the end of an enclosing S-expression, or the end of input. -- numbers can be in decimal, binary with the `0b` prefix, hexadecimal with the `0x` prefix, or octal with the `0` prefix. All bases support the decimal point, exponential notation or both. The exponent is prefixed with `p`, always written in decimal, may be negative, and it represents a power of the base rather than a power of 10. For example, `0xf0.4p-2` is `0xf04 / 16 ^ 3` or ~0.9385. -- Strings are delimited with `"`, support `\` escapes and four digit unicode escapes of the form `\uXXXX`. They may contain line breaks. -- Chars are a single character or escape from the above description of a string delimited by `'`. -- Placeholders are either of three styles; `$name`, `..$name`, `...$name`, `..$name:p`, `...$name:p`. the name is always a C identifier, p is an integer growth priority. -- Names are a single `ns_name` - -## Files - -Files are separated into lines. A line is delimited by newlines and only contains newlines within brackets. A line may be an import, rule, exported rule, or explicit export. - -### Rules - -Rules have the following form - -``` -pattern =priority=> template -``` - -The pattern is able to define new operators implicitly by referencing them, so all tokens must be delimited by spaces. The template is inserted in place of the pattern without parentheses, so unless it's meant to be part of a pattern matched by another rule which expects a particular parenthesization, when more than one token is produced the output should be wrapped in parentheses. - -A shorthand syntax is available for functions: - -``` -name := value -``` - -name in this case must be a single `name`. Value is automatically parenthesized, and the priority of these rules is always zero. - -### Explicit exports and exported rules - -An explicit export consists of `export :: ( )` where `` is a comma-separated list of `name`s. - -An exported rule consists of the keyword `export` followed by a regular rule. It both counts as a rule and an export of all the `name`s within the pattern. - -### Imports - -An import is a line starting with the keyword `import`, followed by a tree of imported names. - -``` -import_tree = name - | name :: import_tree - | name :: * - | ( import_tree [, import_tree]+ ) -``` - -Some examples of valid imports: - -``` -import std::cpsio -import std::(conv::parse_float, cpsio, str::*) -import std -``` - -Some examples of invalid imports: - -``` -import std::() -import std::cpsio::(print, *) -import std::(cpsio) -``` - -> **info** -> -> while none of these are guaranteed to work currently, there's little reason they would have to be invalid, so future specifications may allow them. - -An import can be normalized into a list of independent imports ending either with a `*` called wildcard imports or with a `name`. wildcard imports are normalized to imports for all the `name`s exported from the parent module. All Name clauses in the file starting with the same `name` one of these imports ended with are prefixed with the full import path. The rest of the Name clauses are prefixed with the full path of the current module. - -Reference cycles in Orchid modules are never allowed, so the dependency of a module's exports on its imports and a wildcard's import's value on the referenced module's exports does not introduce the risk of circular dependencies, it just specifies the order of processing for files. diff --git a/notes/papers/report/spec/02-macros.md b/notes/papers/report/spec/02-macros.md deleted file mode 100644 index cdbde43..0000000 --- a/notes/papers/report/spec/02-macros.md +++ /dev/null @@ -1,45 +0,0 @@ -# Macros - -After parsing, what remains is a set of macro rules, each with a pattern, priority and template. Modules aren't tracked in this stage, their purpose was to namespace the tokens within the rules. - -By employing custom import logic, it's also possible to add rules bypassing the parser. Starting with the macro phase, `clause`s may also be `atom`s or `externfn`s. The role of these is detailed in the [[03-runtime]] section. - -Macros are executed in reverse priority order, each macro is checked against each subsection of each clause sequence. When a match is found, the substitution is performed and all macros are executed again. - -## Placeholders - -Patterns fall into two categories - -- scalar placeholders - - `$name` matches exactly one clause - - `$_name` matches exactly one Name clause -- vectorial placeholders - - `..$name` matches zero or more clauses - - `...$name` matches one or more clauses - -`$_name` is uniquely valid in the position of an argument name within a lambda. - -Vectorial placeholders may also have a positive decimal integer growth priority specified after the name, separated with a `:` like so: `...$cond:2`. If it isn't specified, the growth priority defaults to 0. - -The template may only include placeholders referenced in the pattern. All occurences of a placeholder within a rule must match the same things. - -## Execution - -Each clause in the pattern matches clauses as follows: - -- Name matches name with the same full path. -- Lambda matches a lambda with matching argument name and matching body. If the argument name in the pattern is a name-placeholder (as in `\$_phname.`), the argument name in the source is treated as a module-local Name clause. -- Parenthesized expressions match each other if the contained sequences match and both use the same kind of parentheses. -- Placeholders' matched sets are as listed in [Placeholders]. - -If a pattern contains the same placeholder name more than once, matches where they don't match perfectly identical clauses, names or clause sequences are discarded. - -### Order of preference - -The growth order of vectorial placeholders is - -- Outside before inside parentheses -- descending growth priority -- left-to-right by occurrence in the pattern. - -If a pattern matches a sequence in more than one way, whichever match allocates more clauses to the first vectorial placeholder in growth order is preferred. diff --git a/notes/papers/report/spec/03-runtime.md b/notes/papers/report/spec/03-runtime.md deleted file mode 100644 index 35d5b5f..0000000 --- a/notes/papers/report/spec/03-runtime.md +++ /dev/null @@ -1,32 +0,0 @@ -# Runtime - -Orchid is evaluated lazily. This means that everything operates on unevaluated expressions. This has the advantage that unused values never need to be computed, but it also introduces a great deal of complexity in interoperability. - -## Execution mode - -The executor supports step-by-step execution, multiple steps at once, and running an expression to completion. Once an Orchid program reaches a nonreducible state, it is either an external item, a literal, or a lambda function. - -## external API - -In order to do anything useful, Orchid provides an API for defining clauses that have additional behaviour implemented in Rust. Basic arithmetic is defined using these. - -### Atomic - -atomics are opaque units of foreign data, with the following operations: - -- functions for the same three execution modes the language itself supports -- downcasting to a concrete type - -Atomics can be used to represent processes. Given enough processing cycles, these return a different clause. - -They can also be used to wrap data addressed to other external code. This category of atomics reports nonreducible at all times, and relies on the downcasting API to interact with ExternFn-s. - -It's possible to use a combination of these for conditional optimizations - for instance, to recognize chains of processes that can be more efficiently expressed as a single task. - -### ExternFn - -external functions can be combined with another clause to form a new clause. Most of the time, this new clause would be an Atomic which forwards processing to the arguments until they can't be normalized any further, at which point it either returns an ExternFn to take another argument or executes the operation associated with the function and returns. - -Because this combination of operations is so common, several macros are provided to streamline it. - -Sometimes, eg. when encoding effectful functions in continuation passing style, an ExternFn returns its argument without modification. It is always a logic error to run expressions outside a run call, or to expect an expression to be of any particular shape without ensuring that run returned nonreducible in the past. diff --git a/orchid.code-workspace b/orchid.code-workspace index 4899a6f..19e5eb0 100644 --- a/orchid.code-workspace +++ b/orchid.code-workspace @@ -5,7 +5,7 @@ } ], "settings": { - "[markdown][latex]": { + "[markdown]": { "editor.unicodeHighlight.ambiguousCharacters": false, "editor.unicodeHighlight.invisibleCharacters": false, "diffEditor.ignoreTrimWhitespace": false, @@ -20,6 +20,8 @@ "editor.glyphMargin": false, "editor.rulers": [], "editor.guides.indentation": false, + "editor.formatOnSave": true, + "editor.formatOnType": true, }, "[rust]": { "editor.rulers": [74] @@ -28,6 +30,7 @@ "files.associations": { "*.mjsd": "markdown" }, + "swissknife.notesEnabled": false, }, "extensions": { "recommendations": [