From e99ade92babda9da307e1ccfdd492ba43123d53b Mon Sep 17 00:00:00 2001 From: Lawrence Bethlenfalvy Date: Tue, 23 May 2023 18:39:45 +0100 Subject: [PATCH] Cleanup #1 - Removed notes - Removed superfluous uses of `where` --- examples/scratch.hs | 1 - examples/scratchpad/main.orc | 3 - notes/papers/demo/notes.md | 19 -- notes/papers/demo/slides.md | 121 ------------ notes/papers/project_synopsis/Makefile | 28 --- notes/papers/project_synopsis/main.tex | 136 ------------- notes/papers/project_synopsis/references.bib | 20 -- notes/papers/report/main/index.mjsd | 0 notes/papers/report/parts/abbreviations.md | 4 - notes/papers/report/parts/ethics.md | 5 - notes/papers/report/parts/examples/+index.md | 54 ------ .../report/parts/examples/calculator.md | 128 ------------- .../parts/examples/list-processing/fn.md | 74 -------- .../parts/examples/list-processing/list.md | 71 ------- .../parts/examples/list-processing/main.md | 30 --- .../papers/report/parts/examples/maps/list.md | 3 - .../papers/report/parts/examples/maps/main.md | 6 - .../papers/report/parts/examples/maps/map.md | 10 - .../report/parts/examples/maps/option.md | 5 - notes/papers/report/parts/future_work.md | 80 -------- notes/papers/report/parts/haskell.md | 15 -- notes/papers/report/parts/interner.md | 43 ----- notes/papers/report/parts/interpreter.md | 13 -- notes/papers/report/parts/introduction.md | 21 --- .../papers/report/parts/literature/effects.md | 31 --- .../papers/report/parts/literature/macros.md | 34 ---- notes/papers/report/parts/macros/+index.md | 76 -------- .../report/parts/macros/implementation.md | 33 ---- notes/papers/report/parts/macros/order.md | 56 ------ notes/papers/report/parts/oss.md | 75 -------- notes/papers/report/parts/pipeline.md | 67 ------- notes/papers/report/parts/references.md | 10 - notes/papers/report/parts/spec/+index.md | 3 - notes/papers/report/parts/spec/02-parsing.md | 178 ------------------ notes/papers/report/parts/spec/03-macros.md | 45 ----- notes/papers/report/parts/spec/04-runtime.md | 34 ---- notes/papers/report/parts/substack.md | 5 - notes/papers/report/parts/timeline.md | 17 -- .../papers/report/parts/type_system/+index.md | 20 -- .../report/parts/type_system/02-given.md | 20 -- .../report/parts/type_system/03-define.md | 61 ------ .../report/parts/type_system/04-impl.md | 58 ------ notes/type_system/definitions.md | 54 ------ notes/type_system/impls.md | 67 ------- notes/type_system/unification.md | 27 --- orchid.code-workspace | 2 + src/cli.rs | 2 +- src/external/num/numeric.rs | 2 +- src/interner/monotype.rs | 26 --- src/interner/multitype.rs | 7 +- src/interpreter/run.rs | 7 +- src/representations/ast_to_postmacro.rs | 4 +- src/representations/tree.rs | 4 +- src/rule/update_first_seq.rs | 15 +- src/run_dir.rs | 1 - src/utils/cache.rs | 20 +- src/utils/iter.rs | 15 +- src/utils/mod.rs | 1 - src/utils/protomap.rs | 30 +-- src/utils/replace_first.rs | 9 +- src/utils/substack.rs | 13 +- src/utils/translate.rs | 30 --- swap.md | 0 63 files changed, 76 insertions(+), 1973 deletions(-) delete mode 100644 examples/scratch.hs delete mode 100644 examples/scratchpad/main.orc delete mode 100644 notes/papers/demo/notes.md delete mode 100644 notes/papers/demo/slides.md delete mode 100644 notes/papers/project_synopsis/Makefile delete mode 100644 notes/papers/project_synopsis/main.tex delete mode 100644 notes/papers/project_synopsis/references.bib delete mode 100644 notes/papers/report/main/index.mjsd delete mode 100644 notes/papers/report/parts/abbreviations.md delete mode 100644 notes/papers/report/parts/ethics.md delete mode 100644 notes/papers/report/parts/examples/+index.md delete mode 100644 notes/papers/report/parts/examples/calculator.md delete mode 100644 notes/papers/report/parts/examples/list-processing/fn.md delete mode 100644 notes/papers/report/parts/examples/list-processing/list.md delete mode 100644 notes/papers/report/parts/examples/list-processing/main.md delete mode 100644 notes/papers/report/parts/examples/maps/list.md delete mode 100644 notes/papers/report/parts/examples/maps/main.md delete mode 100644 notes/papers/report/parts/examples/maps/map.md delete mode 100644 notes/papers/report/parts/examples/maps/option.md delete mode 100644 notes/papers/report/parts/future_work.md delete mode 100644 notes/papers/report/parts/haskell.md delete mode 100644 notes/papers/report/parts/interner.md delete mode 100644 notes/papers/report/parts/interpreter.md delete mode 100644 notes/papers/report/parts/introduction.md delete mode 100644 notes/papers/report/parts/literature/effects.md delete mode 100644 notes/papers/report/parts/literature/macros.md delete mode 100644 notes/papers/report/parts/macros/+index.md delete mode 100644 notes/papers/report/parts/macros/implementation.md delete mode 100644 notes/papers/report/parts/macros/order.md delete mode 100644 notes/papers/report/parts/oss.md delete mode 100644 notes/papers/report/parts/pipeline.md delete mode 100644 notes/papers/report/parts/references.md delete mode 100644 notes/papers/report/parts/spec/+index.md delete mode 100644 notes/papers/report/parts/spec/02-parsing.md delete mode 100644 notes/papers/report/parts/spec/03-macros.md delete mode 100644 notes/papers/report/parts/spec/04-runtime.md delete mode 100644 notes/papers/report/parts/substack.md delete mode 100644 notes/papers/report/parts/timeline.md delete mode 100644 notes/papers/report/parts/type_system/+index.md delete mode 100644 notes/papers/report/parts/type_system/02-given.md delete mode 100644 notes/papers/report/parts/type_system/03-define.md delete mode 100644 notes/papers/report/parts/type_system/04-impl.md delete mode 100644 notes/type_system/definitions.md delete mode 100644 notes/type_system/impls.md delete mode 100644 notes/type_system/unification.md delete mode 100644 src/utils/translate.rs delete mode 100644 swap.md diff --git a/examples/scratch.hs b/examples/scratch.hs deleted file mode 100644 index 55146ce..0000000 --- a/examples/scratch.hs +++ /dev/null @@ -1 +0,0 @@ -main = sequence \ No newline at end of file diff --git a/examples/scratchpad/main.orc b/examples/scratchpad/main.orc deleted file mode 100644 index f5267a9..0000000 --- a/examples/scratchpad/main.orc +++ /dev/null @@ -1,3 +0,0 @@ -export main := do{ - -} \ No newline at end of file diff --git a/notes/papers/demo/notes.md b/notes/papers/demo/notes.md deleted file mode 100644 index a1df9ef..0000000 --- a/notes/papers/demo/notes.md +++ /dev/null @@ -1,19 +0,0 @@ -Orhid is a lazy, pure functional langauge with an execution model inspired by Haskell. It has a simple, principled syntax resembling mathematical notation commonly used to describe the lambda calculus. State is held in closures and multi-parameter functions are represented using currying. - -This minimalism is in an effort to make parsing and code generation easier, as complex structures are defined using syntax-level macros. The macro system is insipred by generalized kerning which is a well-known Turing-complete system. - -Macros consist of substitution rules applied to the tokenized, namespaced source. These rules can make use of placeholders to transform the expression tree. Placeholders can match exactly one, at least one, or any number of tokens. Macros are used to define infix operators, name bindings, friendly loop syntax over the Y-combinator and more. - -Because substitution rules are applied to the namespaced tokens, macro programs can interact with each other; parts of the pattern that triggers a macro can be generated by other macros while other parts may be provided by the user. In this way, libraries can define extension interfaces where other libraries can integrate with their constructs, and an individual token can take on many meanings depending on context. - ---- - -Orchid is designed to be embedded in a Rust application. The entire program lifecycle consists of three stages which can be individually configured: - -1. The parser pipeline is responsible for converting text - usually files - into a module tree. It allows the embedder to define the environment the code will see in terms of a series of file trees that are parsed in the context of preceding layers. - -2. The macro executor operates entirely on the output of the pipeline. Macro programs don't necessarily halt, so the executor provides an API to find and resolve one match at a time. - -3. The interpreter is a single function operating on an expression with a symbol table for resolving named constants. It also allows setting a limit to the number of normalization steps - this is commonly known as gas. - -Interfacing between eager procedural and lazy functional code can be challenging, especially with the vastly different suites of optimizations. To make this a little easier, we provide an array of Rust macros that streamline the process of exposing Rust functions to Orchid code. The usage of these is demonstrated in the standard library. \ No newline at end of file diff --git a/notes/papers/demo/slides.md b/notes/papers/demo/slides.md deleted file mode 100644 index b3c5aef..0000000 --- a/notes/papers/demo/slides.md +++ /dev/null @@ -1,121 +0,0 @@ ---- -marp: true -class: invert ---- - -# Orchid - -some tagline - ---- - -## Syntax - -basically lambda calc -``` -half := \n. div n 2 -pair := \a.\b. \f. f a b -increment := add 1 -``` - ---- - -## Macros - -match and replace token sequences -``` -if ...$cond then ...$true else ...$false ==> (ifthenelse (...$cond) (...$true) (...$false)) -``` -...while keeping parameters intact -``` -$data -- a single token (including parenthesized sequences) -...$data -- at least one token -..$data -- zero or more tokens -``` - ---- - -## Macros - -define operators... -``` -...$a + ...$b ==> (add (...$a) (...$b)) -``` -...and named bindings... -``` -let $name = ...$value in ...$body ==> (\$name. ...$body) ...$value -``` -...and control structures -``` -loop $r on (...$parameters) with ...$tail ==> Y (\$r. - bind_names (...$parameters) (...$tail) -) ...$parameters - --- bind each of the names in the first argument as a parameter for the second argument -bind_names ($name ..$rest) $payload ==> \$name. bind_names (..$rest) $payload -bind_names () (...$payload) ==> ...$payload -``` - ---- - -## Macros - -can expose interfaces... -``` -do { ...$statement ; ...$rest } ==> (statement (...$statement) do { ...$rest }) -do { ...$return } ==> (...$return) -``` -...to be used by others... -``` -statement (let $name = ...$value) ...$next ==> ((\$name. ...$next) (...$value)) -statement (cps $name = ...$operation) ...$next ==> ((...$operation) \$name. ...$next) -statement (cps ...$operation) ...$next ==> ((...$operation) (...$next)) -``` -...to define any syntax -``` -export main := do{ - cps data = readline; - let double = parse_float data * 2; - cps print (to_string double ++ "\n") -} -``` - ---- - -## Control - -remains with the embedder - -| | extension | supervision | -| ----------: | :----------------: | :--------------------: | -| pipeline | external libraries | file IO interception | -| macros | | step-by-step execution | -| interpreter | constants, input | gas | - ---- - -## Extensions - -```rs -use std::fmt::Debug; -use crate::external::litconv::with_lit; -use crate::representations::{interpreted::ExprInst, Literal}; -use crate::{atomic_impl, atomic_redirect, externfn_impl}; - -#[derive(Clone)] -pub struct ToString1; -externfn_impl!(ToString1, |_: &Self, x: ExprInst| Ok(ToString0{x})); - -#[derive(Debug, Clone)] -pub struct ToString0{ x: ExprInst } -atomic_redirect!(ToString0, x); -atomic_impl!(ToString0, |Self{ x }: &Self, _| { - let string = with_lit(x, |l| Ok(match l { - Literal::Char(c) => c.to_string(), - Literal::Uint(i) => i.to_string(), - Literal::Num(n) => n.to_string(), - Literal::Str(s) => s.clone() - }))?; - Ok(string.into()) -}); -``` \ No newline at end of file diff --git a/notes/papers/project_synopsis/Makefile b/notes/papers/project_synopsis/Makefile deleted file mode 100644 index b9ae477..0000000 --- a/notes/papers/project_synopsis/Makefile +++ /dev/null @@ -1,28 +0,0 @@ -# (c) 2010: Johann A. Briffa -# $Id: Makefile 1791 2010-09-28 17:00:10Z jabriffa $ - -TARGETS := main.pdf -DEPENDS := $(wildcard *.tex) $(wildcard *.cls) $(wildcard *.bib) - -PDFLATEX=pdflatex - -.force: - -all: $(TARGETS) - -archive: $(TARGETS) - rm -f archive.zip - zip -r archive.zip Figures/ Makefile *.cls *.tex *.bib $(TARGETS) -x "*.svn*" - -%.bbl: %.aux - bibtex $* - -%.aux: %.tex $(DEPENDS) - $(PDFLATEX) $*.tex - -%.pdf: %.aux %.bbl - $(PDFLATEX) $*.tex - $(PDFLATEX) $*.tex - -clean: - -/bin/rm -f $(TARGETS) *.aux *.log *.bbl *.blg *.out *.toc *.lof *.lot diff --git a/notes/papers/project_synopsis/main.tex b/notes/papers/project_synopsis/main.tex deleted file mode 100644 index bdfdc73..0000000 --- a/notes/papers/project_synopsis/main.tex +++ /dev/null @@ -1,136 +0,0 @@ -\documentclass{article} -\usepackage{graphicx} -\usepackage[margin=2cm]{geometry} -\usepackage[hidelinks]{hyperref} - - -\title{Orchid's Type System} -\author{Lawrence Bethlenfalvy, 6621227} -\date{12 November 2022} - -% Why would you toss all this in the template if it just doesn't compile!? -%\urn{6621227} -%\degree{Bachelor of Science in Computer Science} -%\supervisor{Brijesh Dongol} - -\begin{document} -\maketitle - - -\section{Introduction} - -Originally my final year project was going to be an entire programming language which I started to -develop around February, however at the start of the year I decided to set a more reasonable goal. - -Orchid is a functional programming language inspired by $\lambda$-calculus, Haskell and Rust. The -execution model is exactly $\lambda$-calculus with opaque predicates and functions representing -foreign data such as numbers, file descriptors and their respeective operations. For the purpose of -side effects caused by foreign functions, reduction is carried out in normal order just like -Haskell. - -There are two metaprogramming systems, one syntax level and one type level, similar to Rust. -Syntax-level metaprogramming is based on generalized kerning, it is mostly defined and a naiive -implementation is complete at the time of writing. Type-level metaprogramming resembles Prolog and -is a major objective of this year's project. - -The project's home is this repository which, at the time of writing, contains fairly outdated code -samples: \url{https://github.com/lbfalvy/orchid} - -\subsection{Aims} - -My goal for this year is to define a robust, usable type system and write a performant -implementation. - -The next phase of development will be a compiler frontend for LLVM. If the type system reaches a -satisfactory level of completion before the dissertation is complete, I may also write a bit about -the compilation. - -If due to some unforeseen circumstances I'm unable to complete enough of the type system to fill -the dissertation with its details or it ends up too simple, I may also write about the macro system -which is already in a usable state and only needs some optimizations and minor adjustments -due to shifts in responsibilities which occured while I was defining the basic properties of the -type system and experimenting with concrete code examples to get a clearer picture of the -provisional feature set. - -\subsection{Objectives} - -A working type system should have the following parts, which I will implement in roughly this order - -\begin{itemize} - \item \textbf{Type inference engine and type checker} This will be an extension of - the Hindley-Milner algorithm, which simultaneously unifies and completes partial type - annotations, and recognizes conflicts. - \item \textbf{Typeclass solver} At the moment this appears to be a relatively simple piece of - code but I'm not entirely confident that complications won't arise as its responsibilities - become clearer, so I consider it a separate component - \item \textbf{Executor} Orchid is a statically typed language so it should eventually be compiled - with LLVM, but in order to demonstrate the usability of my type system I will have to write - an experimental interpreter. Since types are already basically expressions of type type, - parts of the executor will coincide with parts of the type inference engine. -\end{itemize} - -\section{Literature Review} - -The preprocessor can parse arbitrary syntax. Generalized kerning can use "rockets" -(called carriages in Orchid terminology) to parse token sequences statefully and assume -the role of an arbitrary parser encoded as a rich Turing machine.\cite{suckerpinch} - -The type system supports higher-kinded types. I considered translating higher-kinded polymorphism -into abstract types as demonstrated by Yallop\cite{yallop} which can be translated into -Prolog and then building the breadth-first executor described by Tubella\cite{tubella}, but -in the end I decided that since I'm already building my own unification system I might as well -skip this step. Currently expressions are annotated with common Orchid expressions that evaluate to -a type. This means that unification is uncomputable in some cases, but the most common cases -such as halting expressions and recursive types using fixed point combinators can be unified -fairly easily and this leaves room for extension of the unification engine. - -\section{Technical Overview} - -\subsection{Type checker} - -Type expressions to be unified are collected into a group. For the purpose of unification, types -are either opaque types with possible parameters which are considered equal if both the type and its -parameters are equal, or transparent lambda expressions applied to types. Before unification would -begin, the expressions that refer to equal types are collected in a group. A breadth-first search -through the network of reduced forms is executed for all expressions in lockstep, and -syntactic unification is attempted on each pair of reduced forms belonging to different expressions -in the same group. - -At a minimum, the following must be valid reduction steps: - -\begin{itemize} - \item $\beta$-reduction - \item fixed point normalization, which simply means identifying that a subexpression has - reduced to an expression that contains the original. When a fixed point is detected, the - recursive expression is converted to a form that uses the Y-combinator. This operation - is ordered before $\beta$-reductions of the expression in the BFS tree but otherwise has - the same precedence. -\end{itemize} - -\subsection{Typeclass solver} - -This will be relatively simple and strongly resemble Rust's Chalk trait solver, with the exception -that I would prefer not to enforce the orphan rules on the language level so as not to completely -stall projects while a third party developer accepts pull requests on what might be legacy code to -add new impls. - -\subsection{Executor} - -A basic version of the executor can technically be produced by initializing the lazy BFS of -reductions created for the type checker on runtime code, taking the first result, dropping the -BFS iterator and repeating these two steps ad infinitum, but this will likely be very inefficient -so time permitting I would like to create a somewhat better version. This stands to show how -developer effort can be reduced - along with the learning curve of the complex type system - by -reusing the same language for both. A type system supporting HKTs would have to be uncomputable -either way. - -\section{Workplan} - -TODO - -\appendix - -\bibliographystyle{IEEEtran} -\bibliography{references} - -\end{document} \ No newline at end of file diff --git a/notes/papers/project_synopsis/references.bib b/notes/papers/project_synopsis/references.bib deleted file mode 100644 index 3d32701..0000000 --- a/notes/papers/project_synopsis/references.bib +++ /dev/null @@ -1,20 +0,0 @@ -@online{suckerpinch, - title = {Generalized kerning is undecidable! But anagraphing is possible.}, - author = {suckerpinch}, - date = {dec, 2017}, - organization = {YouTube}, - url = {https://www.youtube.com/watch?v=8\_npHZbe3qM} -} - -@phdthesis{tubella, - author = {Jordi Tubella and Antonio González}, - school = {Universitat Politechnica de Catalunya}, - title = {A Partial Breadth-First Execution Model for Prolog}, - year = {1994} -} - -@misc{yallop, - author = {Jeremy Yallop and Leo White}, - howpublished = {University of Cambridge}, - title = {Lightweight higher-kinded polymorphism} -} \ No newline at end of file diff --git a/notes/papers/report/main/index.mjsd b/notes/papers/report/main/index.mjsd deleted file mode 100644 index e69de29..0000000 diff --git a/notes/papers/report/parts/abbreviations.md b/notes/papers/report/parts/abbreviations.md deleted file mode 100644 index e39a889..0000000 --- a/notes/papers/report/parts/abbreviations.md +++ /dev/null @@ -1,4 +0,0 @@ -Table of abbreviations: - -- **CPS**: Continuation passing style, a technique of transferring control to a function in a lazy language by passing the rest of the current function in a lambda. - diff --git a/notes/papers/report/parts/ethics.md b/notes/papers/report/parts/ethics.md deleted file mode 100644 index 400c33d..0000000 --- a/notes/papers/report/parts/ethics.md +++ /dev/null @@ -1,5 +0,0 @@ -# Statement of Ethics - -People other than the author, living creatures or experiments on infrastructure were not involved in the project, so the principles of **do no harm**, **confidentiality of data** and **informed consent** are not relevant. - -As a language developer, my **social responsibility** is to build reliable languages. Orchid is a tool in service of whatever goal the programmer has in mind. \ No newline at end of file diff --git a/notes/papers/report/parts/examples/+index.md b/notes/papers/report/parts/examples/+index.md deleted file mode 100644 index 605db6b..0000000 --- a/notes/papers/report/parts/examples/+index.md +++ /dev/null @@ -1,54 +0,0 @@ -# Examples - -The following examples all work in the submitted version of Orchid, they're included in various subdircetories of `examples`. - -## Prelude - -All code files implicitly include the head statement - -``` -import prelude::* -``` - -The `prelude` module is a string literal compiled into the interpreter. Its contents are as follows: - -```rs -static PRELUDE_TXT:&str = r#" -import std::( - add, subtract, multiply, remainder, divide, - equals, ifthenelse, - concatenate -) - -export ...$a + ...$b =1001=> (add (...$a) (...$b)) -export ...$a - ...$b:1 =1001=> (subtract (...$a) (...$b)) -export ...$a * ...$b =1000=> (multiply (...$a) (...$b)) -export ...$a % ...$b:1 =1000=> (remainder (...$a) (...$b)) -export ...$a / ...$b:1 =1000=> (divide (...$a) (...$b)) -export ...$a == ...$b =1002=> (equals (...$a) (...$b)) -export ...$a ++ ...$b =1003=> (concatenate (...$a) (...$b)) - -export do { ...$statement ; ...$rest:1 } =0x2p543=> ( - statement (...$statement) do { ...$rest } -) -export do { ...$return } =0x1p543=> (...$return) - -export statement (let $name = ...$value) ...$next =0x1p1000=> ( - (\$name. ...$next) (...$value) -) -export statement (cps $name = ...$operation) ...$next =0x2p1000=> ( - (...$operation) \$name. ...$next -) -export statement (cps ...$operation) ...$next =0x1p1000=> ( - (...$operation) (...$next) -) - -export if ...$cond then ...$true else ...$false:1 =0x1p320=> ( - ifthenelse (...$cond) (...$true) (...$false) -) - -export ::(,) -"#; -``` - -The meaning of each of these rules is explained in the [calculator example](./calculator.md). The exact file is included here just as a reference while reading the other examples. \ No newline at end of file diff --git a/notes/papers/report/parts/examples/calculator.md b/notes/papers/report/parts/examples/calculator.md deleted file mode 100644 index 6a054ff..0000000 --- a/notes/papers/report/parts/examples/calculator.md +++ /dev/null @@ -1,128 +0,0 @@ -# Calculator - -This example demonstrates various parts of the standard library, infix operators, `do{}` blocks, and various syntax elements. Approching MVP, this was the first benchmark created to debug various features. It predates the transition for `:=` from single-token macros to a dedicated language element. - -``` -import std::(parse_float, to_string) -import std::(readline, print) - -export main := do{ - cps data = readline; - let a = parse_float data; - cps op = readline; - cps print ("\"" ++ op ++ "\"\n"); - cps data = readline; - let b = parse_float data; - let result = ( - if op == "+" then a + b - else if op == "-" then a - b - else if op == "*" then a * b - else if op == "/" then a / b - else "Unsupported operation" -- dynamically typed shenanigans - ); - cps print (to_string result ++ "\n"); - 0 -} -``` - -## do - -The main function uses a `do{}` block, which is processed using the following rules, temporarily added to the prelude: - -``` -export do { ...$statement ; ...$rest:1 } =0x2p543=> ( - statement (...$statement) do { ...$rest } -) -export do { ...$return } =0x1p543=> (...$return) -``` - -This pair of rules converts the flat structure into a conslist which makes it easier for dedicated statement rules to process their own fragments. The produced structure looks roughly like this: - -``` -(statement (cps data = readline) -(statement (let a = parse_float data) -(statement (cps op = readline) -( ... -(statement (cps print (to_string result ++ "\n")) -(0) -))))) -``` - -`do` blocks contain semicolon-delimited statements which receive special handling, and a final expression that doesn't. This final expression must be present since every Orchid expression must produce a value including `do` blocks. For ergonomics, in the future a sentinel value may be returned if the body of the `do` block ends with a semicolon. - -## statement - -This example demonstrates three statement types. This collection can be extended by matching on `prelude::statement () ...$next`. - -### let - -`let` bindings are used for forward-declaring values in subsequent expressions, passing them to the rest of the body. -``` -export statement (let $name = ...$value) ...$next =0x1p1000=> ( - (\$name. ...$next) (...$value) -) -``` - -Since the executor keeps track of copies of the same expression and applies normalization steps to a shared instance, this technique also ensures that `...$value` will not be evaluated multiple times. - -### cps= - -`cps` was used for effectful functions. -``` -export statement (cps $name = ...$operation) ...$next =0x2p1000=> ( - (...$operation) \$name. ...$next -) -``` - -In the version of Orchid this example was written for, functions like `print` or `readline` carried out their work as a side effect of normalization. At this point the copy-tracking optimization described above wasn't used. Because of this, in new versions `print` or `readline` in a loop doesn't necessarily repeat its effect. This bug can be addressed in the standard library, but `cps` would still probably be just as useful. - -### cps - -Since `cps` is designed for side effects, an expression of this kind doesn't necessarily produce a value. This `=` free variant passes the tail as an argument to the expression as-is -``` -export statement (cps ...$operation) ...$next =0x1p1000=> ( - (...$operation) (...$next) -) -``` - -## if-then-else - -This rule is substantially simpler, it simply forwards the three slots to a function that makes the actual decision. -``` -export if ...$cond then ...$true else ...$false:1 =0x1p320=> ( - ifthenelse (...$cond) (...$true) (...$false) -) -``` - -Notice that `else if` isn't a syntax element, it's simply an artifact of this rule applied to itself. The critical ordering requirement that enables this is that `cond` and `true` are squeezed so neither of them can accidentally consume an `if` or `else` token. `::prefix:0` is implied at the start, it is left of `cond:0` and `true:0` so it has a higher growth priority, and `false:1` has a higher explicit priority. - -## Infix operators - -Infix operators could be intuitively defined with something like the following - -``` -$lhs + $rhs =1=> (add $lhs $rhs) -$lhs * $rhs =2=> (mul $lhs $rhs) -``` - -However, if they really were defined this way, function application would have the lowest priority. Ideally, we would like function application to have the highest priority. -``` --- what we mean -(mult (parse_float "foobar") 2) --- how we would like to write it -let a = parse_float "foobar" * 2 --- how we would have to write it -let a = (parse_float "foobar") * 2 -``` - -With vectorial placeholders it's possible to define the operators in reverse, i.e. to match the "outermost" operator first. -``` -...$lhs + ...$rhs =2=> (add (...$lhs) (...$rhs)) -...$lhs * ...$rhs =1=> (mul (...$lhs) (...$rhs)) -``` - -With this, function calls get processed before any operator. - -## Dynamically typed shenanigans - -If the operator character isn't recognized, `result` gets assigned `"Unsupported operation"`. This wouldn't work in most type systems as `result` is now either a string or a number with no static discriminator. Most of Orchid's functions accept a single type of input with the sole exception being `to_string`. \ No newline at end of file diff --git a/notes/papers/report/parts/examples/list-processing/fn.md b/notes/papers/report/parts/examples/list-processing/fn.md deleted file mode 100644 index e122920..0000000 --- a/notes/papers/report/parts/examples/list-processing/fn.md +++ /dev/null @@ -1,74 +0,0 @@ -# Fn - -This file contains a variety of utilities for functional programming - -``` -export Y := \f.(\x.f (x x))(\x.f (x x)) - -export loop $r on (...$parameters) with ...$tail =0x5p512=> Y (\$r. - bind_names (...$parameters) (...$tail) -) ...$parameters - --- bind each of the names in the first argument as a parameter for the second argument -bind_names ($name ..$rest) $payload =0x2p1000=> \$name. bind_names (..$rest) $payload -bind_names () (...$payload) =0x1p1000=> ...$payload - -export ...$prefix $ ...$suffix:1 =0x1p130=> ...$prefix (...$suffix) -export ...$prefix |> $fn ..$suffix:1 =0x2p130=> $fn (...$prefix) ..$suffix - -export (...$argv) => ...$body =0x2p512=> (bind_names (...$argv) (...$body)) -$name => ...$body =0x1p512=> (\$name. ...$body) -``` - -## bind_names - -This is a utility macro for binding a list of names on an expression. It demonstrates how to extract reusable macro program fragments to simplify common tasks. This demonstrative version simply takes a sequence of name tokens without any separators or custom programming, but its functionality can be extended in the future to include eg. destructuring. - -## arrow functions - -The arrow `=>` operator here is used to define inline functions. It is very similar to the native `\x.` lambda, except that native lambdas use higher priority than any macro so they can't appear inside a `do{}` block as all of the subsequent lines would be consumed by them. It is parsed using the following rules: -``` -export (...$argv) => ...$body =0x2p512=> (bind_names (...$argv) (...$body)) -$name => ...$body =0x1p512=> (\$name. ...$body) -``` - -## pipelines - -This is a concept borrowed from Elixir. The `|>` operator simply inserts the output of the previous expression to the first argument of the following function. -``` -export ...$prefix |> $fn ..$suffix:1 =0x2p130=> $fn (...$prefix) ..$suffix -``` - -It is processed left-to-right, but leaves the suffix on the same level as the function and sinks the prefix, which means that long pipelines eventually become left associative despite the inverted processing order. - -## right-associative function call operator - -The `$` operator is analogous to its Haskell counterpart. It is right-associative and very low priority. Its purpose is to eliminate trailing parentheses. - -## Loop expression - -Recursion in lambda calculus is achieved using a fixpoint combinator. The classic version of this combinator described by Church is the [Y-combinator][hb_tlc], defined like so: -``` -export Y := \f.(\x.f (x x))(\x.f (x x)) -``` - -[hb_tlc]: ISBN-0444867481 - -Formalizing what this does is difficult, in plain words it calls `f` with an expression that is equivalent to its own return value, thus giving the parameter a convenient means to define its value in terms of different parameterizations of itself. The following snippet computes 2^12 to demonstrate how it would normally be called. -``` -export main := Y (\r.\n.\s. - if n == 0 then s - else r (n - 1) (s * 2) -) 12 0 -``` - -The purpose of the loop expression is to provide a more convenient syntax to define recursive structures, as direct calls to the Y-combinator are error prone. It is defined as follows: -``` -export loop $r on (...$parameters) with ...$tail =0x5p512=> Y (\$r. - bind_names (...$parameters) (...$tail) -) ...$parameters -``` - -The template allows the caller to give the point of recursion a name and enumerate the names that can change value between iterations of the loop. The point of recursion then has to be called with the same number of parameters. - -It may be possible to construct a variant of this statement which allows only reassigning subsets of the mutable parameter list. It is definitely possible to construct a variant that allows declaring new names in place in the parameter list, although I did not have time to do so. \ No newline at end of file diff --git a/notes/papers/report/parts/examples/list-processing/list.md b/notes/papers/report/parts/examples/list-processing/list.md deleted file mode 100644 index f47dc85..0000000 --- a/notes/papers/report/parts/examples/list-processing/list.md +++ /dev/null @@ -1,71 +0,0 @@ -# List - -These files demonstrate building datastructures using closures. - -## Option.orc - -Option is among the simplest datastructures. It either stores a value or nothing. To interact with it, one must provide a default value and a selector. - -``` -export some := \v. \d.\f. f v -export none := \d.\f. d - -export map := \option.\f. option none f -export flatten := \option. option none \opt. opt -export flatmap := \option.\f. option none \opt. map opt f -``` - -The selector is required in lambda calculus because the only way to obtain information about values is to evaluate them, but it's not actually necessary in Orchid because it's always possible to pass a primitive of incompatible type as the default value and then use equality comparison to decide whether we got the value in the option or our dud. Regardless, this interface is vastly more convenient and probably more familiar to programmers coming from functional languages. - -## List.orc - -The linked list is an outstandingly powerful and versatile datastructure and the backbone of practical functional programming. This implementation uses a locally defined church pair and the option defined above in an effort to be more transparent, although this means that the essential operation of splitting the head and tail or returning a default value becomes an explicit function (here named `pop`) instead of the intrinsic interface of the list itself. - -_in list.orc_ -``` -import option -import super::fn::* - -pair := \a.\b. \f. f a b - --- Constructors - -export cons := \hd.\tl. option::some (pair hd tl) -export end := option::none - --- Operators - -export pop := \list.\default.\f. list default \cons.cons f - -export reduce := \list.\acc.\f. ( - loop r on (list acc) with - pop list acc \head.\tail. r tail (f acc head) -) - -export map := \list.\f. ( - loop r on (list) with - pop list end \head.\tail. cons (f head) (r tail) -) - -export skip := \list.\n. ( - loop r on (list n) with - if n == 0 then list - else pop list end \head.\tail. r tail (n - 1) -) - -export take := \list.\n. ( - loop r on (list n) with - if n == 0 then end - else pop list end \head.\tail. cons head $ r tail $ n - 1 -) - -new[...$item, ...$rest:1] =0x2p333=> (cons (...$item) new[...$rest]) -new[...$end] =0x1p333=> (cons (...$end) end) -new[] =0x1p333=> end - -export ::(new) -``` - -Most of these operations should be self-explanatory in the context of the parts defined in [fn.md](./fn.md). - -The `new[]` macro builds a list from data. Because they are expected to contain expressions, the fields here are comma separated unlike in `fn::=>` and `fn::loop`. I did not find this inconsistency jarring during initial testing, but it may be updated if further improvements to `loop` and `=>`'s syntax open up the possibility of multi-token field descriptions. \ No newline at end of file diff --git a/notes/papers/report/parts/examples/list-processing/main.md b/notes/papers/report/parts/examples/list-processing/main.md deleted file mode 100644 index b4e9ac8..0000000 --- a/notes/papers/report/parts/examples/list-processing/main.md +++ /dev/null @@ -1,30 +0,0 @@ -This example showcases common list processing functions and some functional programming utilities. It is also the first multi-file demo. - -_in main.orc_ -``` -import std::(to_string, print) -import super::list -import fn::* - -export main := do{ - let foo = list::new[1, 2, 3, 4, 5, 6]; - let bar = list::map foo n => n * 2; - let sum = bar - |> list::skip 2 - |> list::take 3 - |> list::reduce 0 (a b) => a + b; - cps print $ to_string sum ++ "\n"; - 0 -} -``` - -This file imports `list` as a sibling module and `fn` as a top-level file. These files are in identical position, the purpose of this is just to test various ways to reference modules. - -- The contents of _fn.orc_ are described in [fn](./fn.md) -- _list.orc_ and its dependency, _option.orc_ are described in [list](./list.md) - ---- - -The `main` function uses a `do{}` block to enclose a series of name bindings. It constructs a list of numbers 1-6. This is done eagerly, or at least a linked list of the same size is constructed eagerly, although the `cons` calls are left until the first read. Due to Orchid's laziness, `bar` gets assigned the `map` call as-is. `sum` is assigned from the `|>` pipe chain, which is essentially the same as a chain of further name bindings; the return value of each function is passed as the first argument of the next, pushing subsequent arguments out of the way. - -When the `print` expression is evaluated, the updates are applied as needed; the mapping is never applied to 1 and 2, and none of the loops in the list processing functions execute their body on the list object containing 6. \ No newline at end of file diff --git a/notes/papers/report/parts/examples/maps/list.md b/notes/papers/report/parts/examples/maps/list.md deleted file mode 100644 index 80c2e05..0000000 --- a/notes/papers/report/parts/examples/maps/list.md +++ /dev/null @@ -1,3 +0,0 @@ -# List - -In order to use lists as tuples, one needs to be able to access arbitrary elements by index. This is done by the new `list::get` function which returns an `option`. Since most lists in complex datastructures are of known length, this leads to a lot of unreachable branches. The marking and elimination of these called for the definition of `option::unwrap` and `std::panic`. \ No newline at end of file diff --git a/notes/papers/report/parts/examples/maps/main.md b/notes/papers/report/parts/examples/maps/main.md deleted file mode 100644 index 25c4b94..0000000 --- a/notes/papers/report/parts/examples/maps/main.md +++ /dev/null @@ -1,6 +0,0 @@ -This example demonstrates the construction of a basic functional map. - -The `fn.orc` file is exactly identical to [the version in list-processing][1] -`list.orc` and `option.orc` are extended to accommodate additional functionality. - -[1]: ../list-processing/fn.md \ No newline at end of file diff --git a/notes/papers/report/parts/examples/maps/map.md b/notes/papers/report/parts/examples/maps/map.md deleted file mode 100644 index eea3736..0000000 --- a/notes/papers/report/parts/examples/maps/map.md +++ /dev/null @@ -1,10 +0,0 @@ -# Map - -A map implemented using a list of 2-length lists each containing a key and a corresponding value. Although `list` defines a `pair` for internal use, a binary `list` was chosen to test the performance of the interpreter. - -While using a Church-pair instead of a list to store individual entries could multiply the performance of this map, a greater improvement can be achieved by using some sort of tree structure. This implementation is meant for very small maps such as those representing a typical struct. - -## cover vs erase - -In a list map like this one, most operations are O(n), except insertion which has an O(1) variant - appending a new frame with the new value without checking if one already exists. This is not generally a good idea, but in some extreme situations the time it saves can be very valuable. - diff --git a/notes/papers/report/parts/examples/maps/option.md b/notes/papers/report/parts/examples/maps/option.md deleted file mode 100644 index 5abbd54..0000000 --- a/notes/papers/report/parts/examples/maps/option.md +++ /dev/null @@ -1,5 +0,0 @@ -# Option - -This example uses a lot of lists of known length, but with the introduction of `list::get` a lot of `option`s are added to the flow of logic. A way to mark impossible branches is needed. - -This is handled using a new external function called `std::panic`. Since Orchid is a sandboxed language this doesn't actually cause a Rust panic, instead it produces a dedicated ExternError when it's first reduced. Using this, `option::unwrap` is trivial to define. \ No newline at end of file diff --git a/notes/papers/report/parts/future_work.md b/notes/papers/report/parts/future_work.md deleted file mode 100644 index 0cfb9e2..0000000 --- a/notes/papers/report/parts/future_work.md +++ /dev/null @@ -1,80 +0,0 @@ -# Future work - -## Standard library - -There are a few libraries I would like to implement in the future to demonstrate various uses of the language - -### Macro error reporting - -When describing syntax transformations with Orchid macros, it's fairly easy to make assertions about the stages during which given tokens should exist in the code in terms of the lower and upper bound of the currently active priority number. When these assertions fail, the consequences can be very difficult to debug since a partially transformed syntax tree with all sorts of carriages around conforms to neither the public API of any library nor the core language and lambda calculus. This problem can be addressed with guard rules and bubbling errors. To demonstrate, consider this module: - -``` --- in client library -import std::macro_error::missing_token - --- what this carriage does is not relevant to the example, focus on the priority numbers -start_carriage $init =100=> carriage ($init) -carriage ($state) $f =10_001=> carriage (($f $state)) -carriage ($state) stop_carriage =10_002=> $state - --- report the suspected reason why this carriage did not get consumed -carriage ($state) =0=> (missing_token stop_carriage ($state)) - -export ::(start_carriage, stop_carriage) -``` - -``` --- in std::macro_error - --- convert various errors to uniform format -export (missing_token $token ..$details) =1_000_000=> (bubbling_error - "{} was not found" ($token) (..$details) -) - --- forward error upwards -(..$_prefix (bubbling_error ...$args) ..$_suffix) =1_000_001=> (bubbling_error ...$args) -[..$_prefix (bubbling_error ...$args) ..$_suffix] =1_000_001=> (bubbling_error ...$args) -{..$_prefix (bubbling_error ...$args) ..$_suffix} =1_000_001=> (bubbling_error ...$args) -``` - -With this, various options are available for displaying the error: - -1. bubbling_error could be a magic token that always causes the macro executor to format and print the following string -2. bubbling_error could be defined as a function to raise an error when the problematic function is called. This only supports the (in my opinion, insufficient) level of error reporting Python provides for syntax errors -3. bubbling_error could be left undefined, the runtime could expose processed functions that contained undefined names after macro execution, and dev tooling could parse bubbling_error out of this data. - -### Extensible structural(?) pattern matching - -Since all tokens are namespaced, complicated protocols can be defined between libraries for dispatching macro resolution. I would like to make use of this to build a pattern matching library that resolves patterns to a series of function calls which return some kind of Maybe value. This is something I often wish Rust supported, for instance when matching a type part of which is stored in a reference-counting pointer, a second match expression is required to extract data from the reference-counted part. - -### Function serialization - -Being a pure language, Orchid carries the potential to serialize functions and send them over the network. This enables for instance an Orchid web framework to represent UI as well as and database transactions as simple callbacks in server code that are flush with the code describing server behaviour. I would like to explore this option in the future and develop a general library that allows - -### Macros for UI, declarative testing, etc. - -The flexible macro system enables library developers to invent their own syntax for essentially anything. I considered defining macros for html, music scores / midi data, marble and flow diagrams. - -### Unsafe - -These functions may be exposed by a direct Orchid interpreter but they would probably not be included in the library exposed by an embedder. - -#### system calls - -While individual system APIs can be exposed to the program using dedicated Rust bindings, this takes time and limits the power of the language. The general solution to this in high level languages is to expose the `system()` function which enables high level code to interact with _some kind of shell_, the shell of the operating system. What shell this exactly is and what tools are available through it is up to the user to discover. - -#### DMA/MMIO - -As a high level language, Orchid doesn't inherently have direct memory access, in part because it's not generally required. Regardless, a way of writing to and reading from exact memory addresses may be useful in the development of libraries that interface with hardware such as a rapsberry pi's GPIO pins. - -On general this is probably better accomplished using Rust functions that interface with Orchid, but this will eventually inevitably lead to several functions that do nothing but read a number from an address or write a number to an address, except the addresses are wrapped in various tagged structs. This repetition could be nipped in the bud by simply exposing a function for mmio and allowing the Orchid side to define the wrappers. - -## Type system - -### Early plans - -Originally, Orchid was meant to have a type system that used Orchid itself to build generic types using logic of unconstrained complexity from their arguments. The time constraints did not allow for this to be done in the initial version, but it is still on the roadmap. - -### Alternatives - -During initial testing of the working version, I found that the most common kind of programming error in lambda calculus appears to be arity mismatch or syntax error that results in arity mismatch. Without any kind of type checking this is especially difficult to debug as every function looks the same. This can be addressed with a much simpler type system similar to System-F. Any such type checker would have to be constructed so as to only verify user-provided information regarding the arity of functions without attempting to find the arity of every expression, since System-F is strongly normalising and Orchid like any general purpose language supports potentially infinite loops. diff --git a/notes/papers/report/parts/haskell.md b/notes/papers/report/parts/haskell.md deleted file mode 100644 index be9e1a5..0000000 --- a/notes/papers/report/parts/haskell.md +++ /dev/null @@ -1,15 +0,0 @@ -Myy original inspiration to create Orchid was Haskell. I found the power of lazy evaluation impressive and inspiring and saw its potential in defining zero-cost abstractions with simple data flow. I identified a few key problems that motivated me to build a new language: - -**Syntax sugar:** Infix operators in Haskell are defined as any function consisting of non-alphanumeric characters. This produces various rather confusing patterns; ternary operators are placed between their first and second argument, and the ability to use keywords as infix operators and infix operators as prefixes with the use of backticks is a pointless divergence. Other kinds of syntax sugar such as do blocks have a well-defined purpose but often appear as operators in the middle of screen-wide expressions where their purpose is hard to understand and entirely disconnected from the metaphor that brought them to life. - -In addition the handling of all syntax sugar is delegated to the compiler. This results in a system that's surprisingly limited when it comes to defining new abstractions, but also requires much greater effort to learn and read than languages with an intentionally limited syntax such as Java. - -**Syntax-level metaprogramming:** [Template Haskell][th1] is Haskell's tool for syntax-level macros. I learned about it after I built Orchid, and it addresses a lot of my problems. - -[th1]: ./literature/macros.md - -**Type system:** Haskell's type system is very powerful but to be able to represent some really interesting structures it requires a long list of GHC extensions to be enabled which in turn make typeclass implementation matching undecidable and the heuristic rather bad (understandably so, it was clearly not designed for that). - -My plan for Orchid was to use Orchid itself as a type system as well; rather than aiming for a decidable type system and then extending it until it inevitably becomes turing-complete [1][2][3], my type-system would be undecidable from the start and progress would point towards improving the type checker to recognize more and more cases. - -A description of the planned type system is available in [[type_system/+index|Appendix T]] \ No newline at end of file diff --git a/notes/papers/report/parts/interner.md b/notes/papers/report/parts/interner.md deleted file mode 100644 index 569a529..0000000 --- a/notes/papers/report/parts/interner.md +++ /dev/null @@ -1,43 +0,0 @@ -## Interner - -To fix a very serious performance problem with the initial POC, all tokens and all namespaced names in Orchid are interned. - -String interning is a fairly simple optimization, the core idea is to replace strings with an ID unique to the data so that equality comparison can be executed on those IDs in place instead of having to fetch the data from possibly an uncached memory location and compare it character by character. This optimization is so popular that most high-level programming languages with immutable strings automatically do it for string literals, and it allows a lot of otherwise intolerably string-heavy systems such as Javascript's string-map objects to be not only functional but quite performant. - -For the sake of simplicity in Rust it is usually done by replacing Strings with a NonZeroU32 (or some other size). This system is very easy to understand and manage since the user doesn't have to deal with lifetimes, but it has a weakness wherein in order to print or in any other way interact with the strings themselves one needs access to the interner object itself. This is perhaps the most significant code smell in Orchid, essentially every function takes a parameter that references the interner. - -Interning is of course not limited to strings, but one has to be careful in applying it to distinct concepts as the lifetimes of every single interned thing are tied together, and sometimes the added constraints and complexity aren't worth the performance improvements. Orchid's interner is completely type-agnostic so that the possibility is there. The interning of Orchid string literals is on the roadmap hawever. - -### Initial implementation - -Initially, the interner used Lasso, which is an established string interner with a wide user base. - -#### Singleton - -A string interner is inherently a memory leak, so making it static would have likely proven problematic in the future. At the same time, magic strings should be internable by any function with or without access to the interner since embedders of Orchid should be able to reference concrete names in their Rust code conveniently. To get around these constraints, the [[oss#static_init|static_init]] crate was used to retain a global singleton instance of the interner and intern magic strings with it. After the first non-static instance of the interner is created, the functions used to interact with the singleton would panic. I also tried using the iconic lazy_static crate, but unfortunately it evaluates the expressions upon first dereference which for functions that take an interner as parameter is always after the creation of the first non-static interner. - -#### The Interner Trait - -The interner supported exchanging strings or sequences of tokens for tokens. To avoid accidentally comparing the token for a string with the token for a string sequence, or attempting to resolve a token referring to a string sequence as a string, the tokens have a rank, encoded as a dependent type parameter. Strings are exchanged for tokens of rank 0, and sequences of tokens of rank N are exchanged for tokens of rank N+1. - -#### Lasso shim - -Because the type represented by a token is statically guaranteed, we can fearlessly store differently encoded values together without annotation. Thanks to this, strings can simply be forwarded to lasso without overhead. Token sequences are more problematic because the data is ultimately a sequence of numbers and we can't easily assert that they will constitute a valid utf8 string. My temporary solution was to encode the binary data in base64. - -### Revised implementation - -The singleton ended completely defunct because `static_init` apparently also evaluates init expressions on first dereference. Fixing this issue was a good occasion to come up with a better design for the interner. - -#### monotype - -The logic for interning itself is encapsulated by a `monotype` struct. This stores values of a single homogenous type using a hashmap for value->token lookup and a vector for token->value lookup. It is based on, although considerably simpler than Lasso. - -#### polytype - -The actual Interner stores a `HashMap>`, which is essentially a store of values of unique type keyed by the type. The values in this case are monotype interners. - -Unlike the naiive initial implementation, this version also operates on references, so interning and externing values causes no unnecessary copying and heap allocations. - -### The InternedDisplay Trait - -For refined error reporting most structures derive `Debug` and also implement `Display`. In most cases where the structure at hand describes code of some kind, `Display` attempts to print a fragment of valid code. With every name in the codebase interned this is really difficult because interner tokens can't be resolved from `Display` implementations. To solve this, a new trait was defined called `InternedDisplay` which has the same surface as `Display` except for the fact that `fmt`'s mirror image also takes an additional reference to Interner. The syntax sugar for string formatting is in this way unfortunately lost, but the functionality and the division of responsibilities remains. \ No newline at end of file diff --git a/notes/papers/report/parts/interpreter.md b/notes/papers/report/parts/interpreter.md deleted file mode 100644 index 607332a..0000000 --- a/notes/papers/report/parts/interpreter.md +++ /dev/null @@ -1,13 +0,0 @@ -## Interpreter - -The Orchid interpreter exposes one main function called `run`. This function takes an expression to reduce and the symbol table returned by the pipeline and processed by the macro repository. It's also possible to specify a reduction step limit to make sure the function returns in a timely manner. - -### Interfacing with an embedder - -An embedding application essentially interacts with Orchid by way of queries, that is, it invokes the interpreter with a prepared function call. The Orchid code then replies with a return value, which the embedder can either read directly or use as a component in subsequent questions, and so the conversation develops. All communication is initiated, regulated and the conclusions executed entirely by the embedder. - -Although external functions are exposed to Orchid and they can be called at any time (within a computation), they are expected to be pure and any calls to them may be elided by the optimizer if it can deduce the return value from precedent or circumstances. - -One common way to use a query API is to define a single query that is conceptually equivalent to "What would you like to do?" and a set of valid answers which each incorporate some way to pass data through to the next (identical) query. HTTP does this, historically client state was preserved in cookies and pre-filled form inputs, later with client-side Javascript and LocalStorage. - -Orchid offers a way to do this using the `Handler` trait and the `run_handler` function which is the interpreter's second important export. Essentially, this trait offers a way to combine functions that match and process various types implmeenting `Atomic`. This allows embedders to specify an API where external functions return special, inert `Atomic` instances corresponding to environmental actions the code can take, each of which also carries the continuation of the logic. This is a variation of continuation passing style, a common way of encoding effects in pure languages. It is inspired by algebraic effects \ No newline at end of file diff --git a/notes/papers/report/parts/introduction.md b/notes/papers/report/parts/introduction.md deleted file mode 100644 index 1409a33..0000000 --- a/notes/papers/report/parts/introduction.md +++ /dev/null @@ -1,21 +0,0 @@ -# Introduction - -Orchid is a lazy, pure functional programming language with an execution model inspired by Haskell and a powerful syntax-level preprocessor for encoding rich DSLs that adhere to the language's core guarantees. - -## Immutability - -The merits of pure functional code are well known, but I would like to highlight some of them that are particularly relevant in the case of Orchid; - -- **Free execution order** The value of any particular subprogram is largely independent of its execution order, so externally defined functions have a lot of liberty in evaluating their arguments. This can ensure that errors are caught early, or even be used to start subtasks in parallel while the rest of the parameters are being collected. With a considerately designed external API, Orchid functions can be reordered and parallelized based on the resources they operate on. This approach can be observed in Rust's Bevy ECS, but Rust is an impure language so it can only guarantee this degree of safety at the cost of great complexity. - -- **Self-containment** Arguments to the current toplevel function are all completely self-contained expressions, which means that they can be serialized and sent over the network provided that an equivalent for all atoms and externally defined functions exists on both sides, which makes Orchid a prime query language. - > **note** - > Although this is possible using Javascript's `Function` constructor, it is a catastrophic security vulnerability since code sent this way can access all host APIs. In the case of Orchid it is not only perfectly safe from an information access perspective since all references are bound on the sender side and undergo explicit translation, but also from a computational resource perspective since the recipient can apply step limits to the untrusted expression, interleave it with local tasks, and monitor its size and memory footprint. - -- **reentrancy** in low reliability environments it is common to run multiple instances of an algorithm in parallel and regularly compare and correct their state using some form of consensus. In an impure language this must be done explicitly and mistakes can result in divergence. In a pure language the executor can be configured to check its state with others every so many steps. - -## Laziness - -Reactive programming is an increasingly popular paradigm for enabling systems to interact with changing state without recomputing subresults that have not been modified. It is getting popular despite the fact that enabling this kind of programming in classical languages - most notably javascript, where it appears to be the most popular - involves lots of boilerplate and complicated constructs using many many lambda functions. In a lazy language this is essentially the default. - -In addition, lazy, pure code lends itself to optimization. Deforestation and TCO are implied and CTFE (or in the case of an interpreted language ahead-of-time function execution) along with a host of other optimizations are more convenient. diff --git a/notes/papers/report/parts/literature/effects.md b/notes/papers/report/parts/literature/effects.md deleted file mode 100644 index b37f486..0000000 --- a/notes/papers/report/parts/literature/effects.md +++ /dev/null @@ -1,31 +0,0 @@ -https://www.unison-lang.org/learn/fundamentals/abilities/ - -An excellent description of algebraic effects that lead me to understand how they work and why they present an alternative to monads. - -Algebraic effects essentially associate a set of special types representing families of requests to a function that it may return other than its own return type. Effects usually carry a thunk or function to enable resuming normal processing, and handlers usually respond to the requests represented by the effects by implementing them on top of other effects such as IO. The interesting part to me is that all of this is mainly just convention, so algebraic effects provide type system support for expressing arbitrary requests using CPS. - -Although Orchid doesn't have a type system, CPS is a straightforward way to express side effects - ---- - -https://github.com/zesterer/tao - -The first place where I encountered algebraic effects, otherwise a very interesting language that I definitely hope to adopt features from in the future - -Tao is made by the same person who created Chumsky, the parser combinator used in Orchid. It demonstrates a lot of intersting concepts, its pattern matching is one of a kind. The language is focused mostly on static verification and efficiency neither of which are particularly strong points of Orchid, but some of its auxiliary features are interesting to an untyped, interpreted language too. One of these is generic effects. - ---- - -https://wiki.haskell.org/All_About_Monads#A_Catalog_of_Standard_Monads - -Originally, I intended to have dedicated objects for all action types, and transformations similar to Haskell's monad functions. - -A monad is a container that can store any type and supports three key operations: - -1. Constructing a new instance of the container around a value -2. Flattening an instance of the container that contains another instance of it into a single container of the inner nested value -3. applying a transformation to the value inside the container that produces a different type - -The defining characteristic of monads is that whether and when the transformations are applied is flexible since information can't easily leave the monad. - -This system is extremely similar to effects, and at least in an untyped context they're essentially equally powerful. I opted for effects because their defaults seem more sensible. \ No newline at end of file diff --git a/notes/papers/report/parts/literature/macros.md b/notes/papers/report/parts/literature/macros.md deleted file mode 100644 index 5f52d31..0000000 --- a/notes/papers/report/parts/literature/macros.md +++ /dev/null @@ -1,34 +0,0 @@ -https://doc.rust-lang.org/reference/macros-by-example.html - -Rust's macro system was both an invaluable tool and an example while defining Orchid's macros. - -Rust supports declarative macros in what they call "macros by example". These use a state machine-like simplistic parser model to match tokens within the strictly bounded parameter tree. Most notably, Rust's declarative macros don't support any kind of backtracking. They are computationally equivalent to a finite state machine. - ---- - -https://wiki.haskell.org/Template_Haskell - -Template haskell is haskell's macro system that I learned about a little bit too late. - -Throughout this project I was under the impression that Haskell didn't support macros at all, as I didn't discover template haskell until very recently. It is a fairly powerful system, although like Rust's macros their range is bounded, so they can hardly be used to define entirely new syntax. There also seem to be a lot of technical limitations due to this feature not being a priority to GHC. - ---- - -https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p0707r4.pdf -https://www.youtube.com/watch?v=4AfRAVcThyA - -This paper and the corresponding CppCon talk motivated me to research more natural, integrated forms of metaprogramming. - -The paper describes a way to define default behaviour for user-defined groups of types extending the analogy of enums, structs and classes using a compile-time evaluated function that processes a parameter describing the contents of a declaration. It is the first metaprogramming system I encountered that intended to write meta-programs entirely inline, using the same tools the value-level program uses. - -This eventually lead to the concept of macros over fully namespaced tokens. - ---- - -https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2021/p2392r0.pdf -https://www.youtube.com/watch?v=raB_289NxBk - -This paper and the corresponding CppCon talk demonstrate a very intersting syntax extension to C++. - -C++ is historically an object-oriented or procedural language, however in recent standards a significant movement towards declarative, functional patterns manifested. This paper in particular proposes a very deep change to the syntax of the language, an entirely new class of statements that simultaneously bind an arbitrary number of names and return a boolean, that may result in objects being constructed, partially moved and destroyed. The syntax extensions appear very fundamental and yet quite convenient, but what little C++ has in terms of local reasoning suffers. This was interesting and inspirational to me because it demonstrated that considerate syntax extensions can entirely redefine a language, while also reminding about C++'s heritage. - diff --git a/notes/papers/report/parts/macros/+index.md b/notes/papers/report/parts/macros/+index.md deleted file mode 100644 index b1bd291..0000000 --- a/notes/papers/report/parts/macros/+index.md +++ /dev/null @@ -1,76 +0,0 @@ -## Macros - -Left-associative unparenthesized function calls are intuitive in the typical case of just applying functions to a limited number of arguments, but they're not very flexible. Haskell solves this problem by defining a diverse array of syntax primitives for individual use cases such as `do` blocks for monadic operations. This system is fairly rigid. In contrast, Rust enables library developers to invent their own syntax that intuitively describes the concepts the library at hand encodes. In Orchid's codebase, I defined several macros to streamline tasks like defining functions in Rust that are visible to Orchid, or translating between various intermediate representations. - -### Generalized kerning - -In the referenced video essay, a proof of the Turing completeness of generalized kerning is presented. The proof involves encoding a Turing machine in a string and some kerning rules. The state of the machine is next to the read-write head and all previous states are enumerated next to the tape because kerning rules are reversible. The end result looks something like this: - -``` -abcbcddddef|1110000110[0]a00111010011101110 -``` - -The rules are translated into kerning rules. For a rule - -> in state `a` seeing `0`: new state is `b`, write `1` and go `left` - -the kerning rule would look like this (template instantiated for all possible characters): - -``` -$1 [ 0 ] a equals a < $1 ] b 0 -``` - -Some global rules are also needed, also instantiated for all possible characters in the templated positions - -``` -$1 $2 < equals $2 < $1 unless $1 is | -| $1 < equals $1 | > -> $1 $2 equals $1 > $2 unless $2 is ] -> $1 ] equals [ $1 ] -``` - -What I really appreciate in this proof is how visual it is; based on this, it's easy to imagine how one would go about encoding a pushdown automaton, lambda calculus or other interesting tree-walking procedures. This is exactly why I based my preprocessor on this system. - -### Namespaced tokens - -Rust macros operate on the bare tokens and therefore are prone to accidental aliasing. Every other item in Rust follows a rigorous namespacing scheme, but macros break this structure, probably because macro execution happens before namespace resolution. The language doesn't suffer too much from this problem, but the relativity of namespacing -limits their potential. - -Orchid's substitution rules operate on namespaced tokens. This means that the macros can hook into each other. Consider the following example, which is a modified version of a real rule included in the prelude: - -in _procedural.orc_ -```orchid -export do { ...$statement ; ...$rest:1 } =10_001=> ( - statement (...$statement) do { ...$rest } -) -export do { ...$return } =10_000=> (...$return) -export statement (let $_name = ...$value) ...$next =10_000=> ( - (\$_name. ...$next) (...$value) -) -``` - -in _cpsio.orc_ -```orchid -import procedural::statement - -export statement (cps $_name = ...$operation) ...$next =10_001=> ( - (...$operation) \$_name. ...$next -) -export statement (cps ...$operation) ...$next =10_000=> ( - (...$operation) (...$next) -) -``` - -in _main.orc_ -```orchid -import procedural::(do, let, ;) -import cpsio::cps - -export main := do{ - cps data = readline; - let a = parse_float data * 2; - cps print (data ++ " doubled is " ++ stringify a) -} -``` - -Notice how, despite heavy use of macros, it's never ambiguous where a particular name is coming from. Namespacing, including import statements, is entirely unaffected by the macro system. The source of names is completely invariant. diff --git a/notes/papers/report/parts/macros/implementation.md b/notes/papers/report/parts/macros/implementation.md deleted file mode 100644 index 242b7e6..0000000 --- a/notes/papers/report/parts/macros/implementation.md +++ /dev/null @@ -1,33 +0,0 @@ -## Implementation - -THe optimization of this macro execution algorithm is an interesting challenge with a diverse range of potential optimizations. The current solution is very far from ideal, but it scales to the small experimental workloads I've tried so far and it can accommodate future improvements without any major restructuring. - -The scheduling of macros is delegated to a unit called the rule repository, while the matching of rules to a given clause sequence is delegated to a unit called the matcher. Other tasks are split out into distinct self-contained functions, but these two have well-defined interfaces and encapsulate data. Constants are processed by the repository one at a time, which means that the data processed by this subsystem typically corresponds to a single struct, function or other top-level source item. - -### keyword dependencies - -The most straightforward optimization is to skip patterns that doesn contain tokens that don't appear in the code at all. This is done by the repository to skip entire rules, but not by the rules on the level of individual slices. This is a possible path of improvement for the future. - -### Matchers - -There are various ways to implement matching. To keep the architecture flexible, the repository is generic over the matcher bounded with a very small trait. - -The current implementation of the matcher attempts to build a tree of matchers rooted in the highest priority vectorial placeholder. On each level The specializations are defined as follows: - -- `VecMatcher` corresponds to a subpattern that starts and ends with a vectorial. Each matcher also matches the scalars in between its submatchers, this is not explicitly mentioned. - - - `Placeholder` corresponds to a vectorial placeholder with no lower priority vectorials around it - - It may reject zero-length slices but contains no other logic - - - `Scan` corresponds to a high priority vectorial on one side of the pattern with lower priority vectorials next to it. - - It moves the boundary - consisting of scalars - from one side to the other - - - `Middle` corresponds to a high priority vectorial surrounded on both sides by lower priority vectorials. - - This requires by far the most complicated logic, collecting matches for its scalar separators on either side, sorting their pairings by the length of the gap, then applying the submatchers on either side until a match is found. This uses copious heap allocations and it's generally not very efficient. Luckily, this kind of pattern almost never appears in practice. - -- `ScalMatcher` tests a single token. Since vectorials in subtrees are strictly lower priority than those in parent enclosing sequences `S` and `Lambda` don't require a lot of advanced negotiation logic. They normally appear in sequence, as a their operations are trivially generalizable to a static sequence of them. - -- `AnyMatcher` tests a sequence and wraps either a sequence of `ScalMatcher` or a single `VecMatcher` surrounded by two sequences of `ScalMatcher`. \ No newline at end of file diff --git a/notes/papers/report/parts/macros/order.md b/notes/papers/report/parts/macros/order.md deleted file mode 100644 index ae43d26..0000000 --- a/notes/papers/report/parts/macros/order.md +++ /dev/null @@ -1,56 +0,0 @@ -### Execution order - -The macros describe several independent sequential programs that are expected to be able to interact with each other. To make debugging easier, the order of execution of internal steps within independent macros has to be relatively static. - -The macro executor follows a manually specified priority cascade, with priorities ranging from 0 to 0xep255, exclusive. Priorities are accepted in any valid floating point format, but usually written in binary or hexadecimal natural form, as this format represents floating point precision on the syntax level, thus making precision errors extremely unlikely. - -The range of valid priorities is divided up into bands, much like radio bands. In this case, the bands serve to establish a high level ordering between instructions. - -The bands are each an even 32 orders of magnitude, with space in between for future expansion - -| | | | | -| :-----------: | :------: | :---------: | :----------: | -| 0-7 | 8-15 | 16-23 | 24-31 | -| optimizations | x | | | -| 32-39 | 40-47 | 48-55 | 56-63 | -| operators | | | x | -| 64-71 | 72-79 | 80-87 | 88-95 | -| | | expressions | | -| 96-103 | 104-111 | 112-119 | 120-127 | -| | x | | | -| 128-135 | 136-143 | 144-151 | 152-159 | -| bindings | | | x | -| 160-167 | 168-175 | 176-183 | 184-191 | -| | | x | | -| 192-199 | 200-207 | 208-215 | 216-223 | -| | aliases* | | | -| 224-231 | 232-239 | 240-247 | 248- | -| integrations | | | transitional | - -#### Transitional states - -Transitional states produced and consumed by the same macro program occupy the unbounded top region of the f64 field. Nothing in this range should be written by the user or triggered by an interaction of distinct macro programs, the purpose of this high range is to prevent devices such as carriages from interacting. Any transformation sequence in this range can assume that the tree is inert other than its own operation. - -#### Integrations - -Integrations expect an inert syntax tree but at least one token in the pattern is external to the macro program that resolves the rule, so it's critical that all macro programs be in a documented state at the time of resolution. - -#### Aliases - -Fragments of code extracted for readability are all at exactly 0x1p800. These may be written by programmers who are not comfortable with macros or metaprogramming. They must have unique single token patterns. Because their priority is higher than any entry point, they can safely contain parts of other macro invocations. They have a single priority number because they can't conceivably require internal ordering adjustments and their usage is meant to be be as straightforward as possible. - -#### Binding builders - -Syntax elements that manipulate bindings should be executed earlier. `do` blocks and (future) `match` statements are good examples of this category. Anything with a lower priority trigger can assume that all names are correctly bound. - -#### Expressions - -Things that essentially work like function calls just with added structure, such as `if`/`then`/`else` or `loop`. These are usually just more intuitive custom forms that are otherwise identical to a macro - -#### Operators - -Binary and unary operators that process the chunks of text on either side. Within the band, these macros are prioritized in inverse precedence order and apply to the entire range of clauses before and after themselves, to ensure that function calls have the highest perceived priority. - -#### Optimizations - -Macros that operate on a fully resolved lambda code and look for known patterns that can be simplified. I did not manage to create a working example of this but for instance repeated string concatenation is a good example. \ No newline at end of file diff --git a/notes/papers/report/parts/oss.md b/notes/papers/report/parts/oss.md deleted file mode 100644 index 4c3de56..0000000 --- a/notes/papers/report/parts/oss.md +++ /dev/null @@ -1,75 +0,0 @@ -# Open-source packages Orchid depends on - -## [thiserror](https://github.com/dtolnay/thiserror) - -_License: Apache 2.0 or MIT_ - -Helps derive `Error` for aggregate errors. - -I eventually stopped trying to do this as it was simpler to just treat error types as bags of data about the failure, but some parts of the codebase still use it and it doesn't really cause any problems. - -## [chumsky](https://github.com/zesterer/chumsky) - -_License: MIT_ - -A fantastic parser combinator that allowed specifying nuanced decisions in a declarative way, such as whether a given float token can be promoted to an uint token. - -In hindsight passes after tokenization could have been written by hand, tokenized Orchid is not that hard to parse into an AST and it would have probably made some tasks such as allowing `.` (dot) as a token considerably easier. - -## [hashbrown](https://github.com/rust-lang/hashbrown) - -_License: Apache 2.0 or MIT_ - -Google's swisstable implementation. Almost perfectly identical to `std::collections::HashMap`, with minor differences. - -One of its greatest feats is support for the raw entry API which enables resolving entries using a hash and an equality lambda. This is used both by the interner to avoid many clones and allocations and by the generic processing step cache to avoid unnecessary clones of potentially very large trees. This API is experimentally available in the native hashmap too. - -Its other advantage over `std::collections::HashMap` is that its default hashing function is AHash which is said to be faster than the standard variant's default SipHash. I don't have benchmarks to back this up but since it was already in the codebase for the raw entry API I opted to use it everywhere. - -## [ordered-float](https://github.com/reem/rust-ordered-float) - -_License: MIT_ - -A wrapper around floating point numbers that removes `NaN` from the set of possible values, promoting `<` and `>` to total orderings and `==` to an equivalence relation. Orchid does not have `NaN` because it's a silent error which conflicts with the "let it crash" philosophy borrowed from Elixir. All operations that would produce `NaN` either abort or indicate the failure in their return type. - -## [itertools](https://github.com/rust-itertools/itertools) - -_License: Apache 2.0 or MIT_ - -A fundamental utility crate for Rust's iterators, it's impossible to enumerate its uses. - -## [smallvec](https://github.com/servo/references-smallvec) - -_License: Apache 2.0 or MIT_ - -small vector optimization - allocates space for a statically known number of elements on the stack to save heap allocations. This is a gamble since the stack space is wasted if the data does spill to the heap, but it can improve performance massively in hot paths. - -I used it for optimizations in the key-value store the type system used to store - -## [dyn-clone](https://github.com/dtolnay/dyn-clone) - -_License: Apache 2.0 or MIT_ - -All expressions in Orchid are clonable, and to allow for optimizations, Atoms have control over their own cloning logic, so this object-safe version of `Clone` is used. - -# Packages no longer used - -## [mappable-rc](https://github.com/JakobDegen/mappable-rc) - -A refcounting pointer which can be updated to dereference to some part of the value it holds similarly to C++'s `shared_ptr`. - -Using this crate was ultimately a mistake on my part, in early stages of development (early stages of my Rust journey) I wanted to store arbitrary subsections of an expression during macro execution without dealing with lifetimes. It was removed in the latest version. - -## [lasso](https://github.com/Kixiron/lasso) - -A very popular string interner, used for interning both strings and base64 encoded data - -## [base64](https://github.com/marshallpierce/rust-base64) - -Enable interning non-string data - -## [static_init](https://gitlab.com/okannen/static_init) - -Enable interning magic strings ahead-of-time in functions that don't have access to the interner. - -I thought that this actually runs static initializers on startup as it's advertised in the readme \ No newline at end of file diff --git a/notes/papers/report/parts/pipeline.md b/notes/papers/report/parts/pipeline.md deleted file mode 100644 index 639e7cc..0000000 --- a/notes/papers/report/parts/pipeline.md +++ /dev/null @@ -1,67 +0,0 @@ -## The pipeline - -The conversion of Orchid files into a collection of macro rules is a relatively complicated process that took several attempts to get right. - -### Push vs pull logistics - -The initial POC implementation of Orchid used pull logistics aka lazy evaluation everywhere. This meant that specially annotated units of computation would only be executed when other units referenced their result. This is a classic functional optimization, but its implementation in Rust had a couple drawbacks; First, lazy evaluation conflicts with most other optimizations, because it's impossible to assert the impact of a function call. Also - although this is probably a problem with my implementation - because the caching wrapper stores a trait object of Fn, every call to a stage is equivalent to a virtual function call which alone is sometimes an excessive penalty. Second, all values must live on the heap and have static lifetimes. Eventually nearly all fields referenced by the pipeline or its stages were wrapped in Rc. - -Additionally, in a lot of cases lazy evaluation is undesirable. Most programmers other than the developers of Python would like to receive syntax errors in dead functions because statically identifiable errors are usually either typos that are trivial to fix or born out of a misconception on the programmer's part which is worth addressing in case it produces silent errors elsewhere. But errors are produced when the calculation of a value fails, so to produce errors all values about all functions msut be calculated. - -To address these issues, the second iteration only uses pull logistics for the preparsing and file collection phase, and the only errors guaranteed to be produced by this stage are imports from missing files and syntax errors regarding the structure of the S-expressions. - -### Stages - -As of writing, the pipeline consists of three main stages; source loading, tree-building and name resolution. These break down into multiple substages. - -All stages support various ways to introduce blind spots and precomputed values into their processing. This is used to load the standard library, prelude, and possibly externally defined intermediate stages of injected code. - -#### Source loading - -This stage encapsulates pull logistics. It collects all source files that should be included in the compilation in a hashmap keyed by their project-relative path. All subsequent operations are executed on every element of this map unconditionally. - -The files and directory listings are obtained from an injected function for flexibility. File collection is motivated by a set of target paths, and injected paths can be ignored with a callback. - -Parsing itself is outsourced to a Chumsky parser defined separately. This parser expects a list of operators for tokenization, but such a list is not available without knowledge of other files because glob imports don't provide information about the operators they define so much of the parsed data is invalid. What is known to be valid are - -- the types of all lines -- line types `import` and `export` -- the pattern of `rule` lines -- the name of `constant` and `namespace` lines -- valid parts of the `exported` variant of lines -- valid parts of the body of `namespace` lines - -This information is compiled into a very barebones module representation and returned alongside the loaded source code. - -#### Tree building - -This stage aims to collect all modules in a single tree. To achieve this, it re-parses each file with the set of operators collected from the datastructure built during preparsing. The glob imports in the resulting FileEntry lists are eliminated, and the names in the bodies of expressions and macro rules are prefixed with the module path in preparation for macro execution. - -Operator collection can be advised about the exports of injected modules using a callback, and a prelude in the form of a list of line objects - in the shape emitted by the parser - can be injected before the contents of every module to define universally accessible names. Since these lines are processed for every file, it's generally best to just insert a single glob import from a module that defines everything. The interpreter inserts `import prelude::*`. - -#### Import resolution - -This stage aims to produce a tree ready for consumption by a macro executor or any other subsystem. It replaces every name originating from imported namespaces in every module with the original name. - -Injection is supported with a function which takes a path and, if it's valid in the injected tree, returns its original value even if that's the path itself. This is used both to skip resolving names in the injected modules - which are expected to have already been processed using this step - and of course to find the origin of imports from the injected tree. - -### Layered parsing - -The most important export of the pipeline is the `parse_layer` function, which acts as a façade over the complex system described above. The environment in which user code runs is bootstrapped using repeated invocations of this function. It has the following options - -1. targets that motivate file loading - - In the case of intermediate layers this can be a list of all included module names. The targets are only required to be valid, global import paths without a globstar. - -2. a function that performs file and directory reads. - - This is normally set to a lambda that relays requests to `pipeline::file_loader`, but it can be replaced with another function if source code is to be loaded from an emulated file system, such as an in-memory tree or an online package repository. - -3. the previous layer as an environment -4. a prelude to every file - - The interpreter sets this to `import prelude::*`. If the embedder defines its own prelude it's a good idea to append it. - -#### The first layer - -The other important exports of the pipeline are `ConstTree` and `from_const_tree`. These are used to define a base layer that exposes extern functions. `ConstTree` implements `Add` so distinct libraries of extern functions can be intuitively combined. \ No newline at end of file diff --git a/notes/papers/report/parts/references.md b/notes/papers/report/parts/references.md deleted file mode 100644 index 8966020..0000000 --- a/notes/papers/report/parts/references.md +++ /dev/null @@ -1,10 +0,0 @@ -# References - -[1] various authors, "C++ Programming/Templates/Template Meta-Programming" https://en.wikibooks.org/wiki/C++_Programming/Templates/Template_Meta-Programming (accessed May 5, 2023) - -[2] J. Huey on behalf of The Types Team, "Generic associated types to be stable in Rust 1.65" https://blog.rust-lang.org/2022/10/28/gats-stabilization.html (accessed May 5, 2023) - -[3] K. Wnasbrough, "Instance Declarations are Uniuersal" https://www.lochan.org/keith/publications/undec.html (accessed May 5, 2023) - -[4] M. Stay, "Allow classes to be parametric in other parametric classes" https://github.com/microsoft/TypeScript/issues/1213 (accessed May 5, 2023) - diff --git a/notes/papers/report/parts/spec/+index.md b/notes/papers/report/parts/spec/+index.md deleted file mode 100644 index c4f224a..0000000 --- a/notes/papers/report/parts/spec/+index.md +++ /dev/null @@ -1,3 +0,0 @@ -# Specification - -This is a description of the syntax and execution model the submitted version of Orchid conforms to. It is intended to be as accurate as any specification, but it is written to match the implementation and not the other way. \ No newline at end of file diff --git a/notes/papers/report/parts/spec/02-parsing.md b/notes/papers/report/parts/spec/02-parsing.md deleted file mode 100644 index fd33092..0000000 --- a/notes/papers/report/parts/spec/02-parsing.md +++ /dev/null @@ -1,178 +0,0 @@ -# Parsing - -Orchid expressions are similar in nature to lambda calculus or haskell, except whitespace is mostly irrelevant. - -## Comments - -Orchid borrows Lua's comment syntax. Line comments start with `--` and end at a line break. Block comments start with `--[` and end with `]--`. - -## Names - -`name` and `ns_name` tokens appear all over the place in this spec. They represent operators, function names, arguments, modules. A `name` is - -1. the universally recognized operators `,`, `.`, `..` and `...` (comma and single, double and triple dot) -2. any C identifier -3. any sequence of name-safe characters starting with a character that cannot begin a C identifier. A name-safe character is any non-whitespace Unicode character other than - - - digits - - the namespace separator `:`, - - the parametric expression starters `\` and `@`, - - the string and char delimiters `"` and `'`, - - the various brackets`(`, `)`, `[`, `]`, `{` and `}`, - - `,`, `.` and `$` - -This means that, in absence of a known list of names, `!importatn!` is a single name but `importatn!` is two names, as a name that starts as a C identifier cannot contain special characters. It also means that using non-English characters in Orchid variables is a really bad idea. This is intentional, identifiers that need to be repeated verbatim should only contain characters that appear on all latin keyboards. - -There are also reserved words that cannot be used as names; `export`, `import`, `namespace`. - -A `ns_name` is a sequence of one or more `name` tokens separated by the namespace separator `::`. - -## Clauses - -Clauses are the building blocks of Orchid's syntax. They belong to one of a couple categories: - -- S-expressions are a parenthesized sequence of space-delimited `clause`s. All three types of brackets `()`, `[]` and `{}` are supported and treated differently. -- Lambdas start with `\`, followed by a single clause representing an argument name, then `.`, then a sequence of `clause`s representing the body. This is a greedy pattern that ends at the end of an enclosing S-expression, or the end of the line. Lambdas may contain any single clause in the position of an argument during parsing, but by the end of macro execution all arguments must become a `ns_name`. -- numbers can be in decimal, binary with the `0b` prefix, hexadecimal with the `0x` prefix, or octal with the `0` prefix. All bases support the decimal point, exponential notation or both. The exponent is prefixed with `p`, always written in decimal, may be negative, and it represents a power of the base rather than a power of 10. For example, `0xf0.4p-2` is `0xf04 / 16 ^ 3` or ~0.9385. -- Strings are delimited with `"`, support `\` escapes and four digit unicode escapes of the form `\uXXXX`. They may contain line breaks. -- Chars are a single character or escape from the above description of a string delimited by `'`. -- Placeholders are either scalar `$name`, vectorial `..$name`, vectorial nonempty `...$name`, or either of the vectorial variants with a priority attached `..$name:p`, `...$name:p`. The name is always a C identifier, p is an integer. -- Names are `ns_name` - -## Files - -Files are separated into lines. A line is delimited by newlines and only contains newlines within brackets. A line may be an - -### Import -spec: -``` -import = "import" impot_fragment -import_fragment = "*" -import_fragment = name -import_fragment = "(" import_fragment [ "," import_fragment ]* ")" -import_fragment = name "::" import_fragment -``` -examples: -``` -import prelude::* -import std::cps -import std::(num::ops::*, fn::*, conv) -import std::proc::(do, let, =, ;) -``` -counterexamples: -``` -import std::() -import std::cpsio::(print, *) -import std::(cpsio) -``` -> **info** -> -> while none of these are guaranteed to work currently, there's little reason they would have to be invalid, so future versions may allow them. - -### Constant -spec: -``` -constant = name ":=" clause* -``` -the value can consist of multiple clauses during parsing, these will be converted to a single function call after macro execution. - -examples: -``` -main := print "Hello World!\n" -pi := 3 -e := pi -exponentiate := \n.\exp. do{ - let total = 1; - loop r on (exp total) with - if exp == 1 then total - else r (exp - 1) (total * n) -} -``` -### Namespace -spec: -``` -namespace = "namespace" name "{" line* "}" -``` -examples: -``` -foo := 1 -bar := baz::quz -namespace baz ( - import super::foo - export quz := foo + 1 -) -``` - -### Exported member -spec: -``` -exported_member = "export" (constant | rule | namespace) -``` - -### Explicit export -spec: -``` -export "::" "(" name [ "," name ]* ")" -``` -examples: -``` -export ::(new, map) -``` - -### Rule -spec: -``` -rule = pattern arrow template -pattern = clause* -template = clause* -arrow = "=" priority "=>' (written together, without spaces) -priority = float -``` -Rule patterns can define new operators implicitly by referencing them, so all tokens must be delimited by spaces. The template is inserted in place of the pattern without parentheses, so unless it's meant to be part of a pattern matched by another rule which expects a particular parenthesization, when more than one token is produced the output should be wrapped in parentheses. - -examples: -``` -export loop $r on (...$parameters) with ...$tail =0x5p512=> Y (\$r. - bind_names (...$parameters) (...$tail) -) ...$parameters - -bind_names ($name ..$rest) $payload =0x2p1000=> \$name. bind_names (..$rest) $payload -bind_names () (...$payload) =0x1p1000=> ...$payload - -...$left + ...$right:1 =0x1p240=> (add (...$left) (...$right)) -``` - -### Imports - -An import is a line starting with the keyword `import`, followed by a tree of imported names. - -``` -import_tree = name - | name :: import_tree - | name :: * - | ( import_tree [, import_tree]+ ) -``` - -Some examples of valid imports: - -``` -import std::cpsio -import std::(conv::parse_float, cpsio, str::*) -import std -``` - -Some examples of invalid imports: - -``` -import std::() -import std::cpsio::(print, *) -import std::(cpsio) -``` - -> **info** -> -> while none of these are guaranteed to work currently, there's little reason they would have to be invalid, so future specifications may allow them. - -An import can be normalized into a list of independent imports ending either with a `*` called wildcard imports or with a `name`. wildcard imports are normalized to imports for all the `name`s exported from the parent module. All Name clauses in the file starting with a `name` one of these imports ended with are prefixed with the full import path. The rest of the Name clauses are prefixed with the full path of the current module. - -Reference cycles are allowed. diff --git a/notes/papers/report/parts/spec/03-macros.md b/notes/papers/report/parts/spec/03-macros.md deleted file mode 100644 index bf21bb6..0000000 --- a/notes/papers/report/parts/spec/03-macros.md +++ /dev/null @@ -1,45 +0,0 @@ -# Macros - -After parsing, what remains is a set of macro rules, each with a pattern, priority and template. Modules aren't tracked at this stage, their purpose was to namespace the tokens within the rules. - -By employing custom import logic, it's also possible to add rules bypassing the parser. Starting with the macro phase, `clause`s may also be `atom`s or `externfn`s. The role of these is detailed in the [[04-runtime]] section. - -Macros are tested in order of descending priority, each macro is checked against each subsection of each clause sequence. When a match is found, the substitution is performed and all macros are checked again. - -## Placeholders - -Patterns fall into two categories - -- scalar placeholders - - `$name` matches exactly one clause, including a parenthesized sequence. -- vectorial placeholders - - `..$name` matches zero or more clauses - - `...$name` matches one or more clauses - -Vectorial placeholders may also have a positive decimal integer growth priority specified after the name, separated with a `:` like so: `...$cond:2`. If it isn't specified, the growth priority defaults to 0. - -Any single clause can appear in the position of a lambda argument during macro execution. By the end of the macro execution phase, all lambdas must have a Name in the position of argument. - -The template may only include placeholders referenced in the pattern. Two vectorial placeholders cannot appear next to each other in the pattern.\ -A placeholder name can only appar once in a pattern.\ - -## Execution - -Each clause in the pattern matches clauses as follows: - -- Name matches a Name with the same fully resolved namespaced name. -- Lambda matches a Lambda with matching argument and matching body. Lambda arguments are module-local Name clauses, so if they are moved out of the body by a macro they can become unbound or refer to a previously shadowed global. -- Parenthesized expressions match each other if the contained sequences match and both use the same delimiters. -- Placeholders' matched sets are as listed in [Placeholders](#placeholders). - -### Precedence of matches - -The growth order of vectorial placeholders is - -- Outside before inside parentheses -- descending growth priority -- right-to-left by occurrence in the pattern. - -If a pattern matches a sequence in more than one way, whichever match allocates more clauses to the highest vectorial placeholder in growth order is preferred. - -Rules are conceptually extended with a vectorial placeholder of priority 0 on either end unless a vectorial placeholder is already present there. In practice, this means that multiple occurences of a scalar pattern within a sequence are matched left to right. diff --git a/notes/papers/report/parts/spec/04-runtime.md b/notes/papers/report/parts/spec/04-runtime.md deleted file mode 100644 index 958a36e..0000000 --- a/notes/papers/report/parts/spec/04-runtime.md +++ /dev/null @@ -1,34 +0,0 @@ -## Runtime - -Orchid is evaluated lazily. This means that everything operates on unevaluated expressions. This has the advantage that unused values never need to be computed, but it also introduces a great deal of complexity in interoperability. - -### Gas - -The executor supports an optional gas parameter to limit the number of normalization steps taken. Once an Orchid program reaches an inert state, it is either an external item, a literal, or a lambda function. - -### external API - -In order to do anything useful, Orchid provides an API for defining clauses that have additional behaviour implemented in Rust. Basic arithmetic is defined using these. - -#### Atomic - -atomics are opaque units of foreign data, with the following operations: - -- a function for reduction that behaves like the interpreter's `run` function -- attempt to downcast to a concrete type - -Atomics can be used to represent processes. Given enough processing cycles, these return a different clause. - -They can also be used to wrap data addressed to other external code. This category of atomics reports inert at all times, and relies on the downcasting API to interact with ExternFn-s. - -It's possible to use a combination of these for conditional optimizations - for instance, to recognize chains of processes that can be more efficiently expressed as a single task. - -#### ExternFn - -external functions can be combined with another clause to form a new clause. Most of the time, this new clause would be an Atomic which forwards processing to the arguments until they can't be normalized any further, at which point it either returns an ExternFn to take another argument or executes the operation associated with the function and returns a value. - -Because this combination of operations is so common, several macros are provided to streamline it. - -It is always a logic error to normalize expressions outside an `interpreter::run` (or `Atomic::run`) call, or to expect an expression to be of any particular shape without ensuring that `interpreter::run` reported inert in the past. - -All functions including external ones are assumed to be pure, and the executor uses opportunistic caching to avoid re-evaluating subexpressions, so continuation-passing style cannot be used to encode side effects. An alternative system for this purpose is being developed, but for the time being the previous CPS functions are still available in the standard library. Each print expression will be printed at least once for each qualitatively distinct argument it is applied to. diff --git a/notes/papers/report/parts/substack.md b/notes/papers/report/parts/substack.md deleted file mode 100644 index 0641a0c..0000000 --- a/notes/papers/report/parts/substack.md +++ /dev/null @@ -1,5 +0,0 @@ -# Substack - -The vast majority of algorithms involved in this project are multiple recursive in nature. Very often information on higher levels would influence the entire subtree. A good example is the resolution of name bindings. The size of the call stack is associated with the growth of the set of names, only the top needs to be mutated, but all names seen in enclosing scopes need to be accessible. The datastructure we need is essentially a linked list on the stack. - -This is a very common and not particularly interesting datastructure; much like quicksort, every C project of considerable size that uses recursion includes some definition of it. However, I still think it deserves some attention, precisely because it's so common. For example, my implementation also defines an iterator, and a reasonably efficient, safe implementation of the outstandingly common operation of collecting the stack into a Vec that starts at the bottom. \ No newline at end of file diff --git a/notes/papers/report/parts/timeline.md b/notes/papers/report/parts/timeline.md deleted file mode 100644 index e649b94..0000000 --- a/notes/papers/report/parts/timeline.md +++ /dev/null @@ -1,17 +0,0 @@ -# Timeline - -I started working on a functional language in February 2022. I was mostly inspired by Haskell and Rust, I wanted to create a lazy, pure language with a simple rigid syntax tree like Rust that would support macros. By the end of August, I had a proof-of-concept implementation of the macro executor, just enough to test my ideas. - -This is also when I came up with the name. I read an article about how orchids don't so much grow on, but rather together with mangrove trees and influence the trees to produce patterns beneficial to them while also killing fungi and extending the tree's capacity for photosynthesis. - -Having tested that my idea could work, at the start of the academic year I switched to the type system. When the project synopsis was written, I imagined that the type system would be an appropriately sized chunk of the work for a final year project; its title was "Orchid's Type System". - -Around the end of November I had researched enough type theory to decide what kind of type system I would want. My choice was advised by a number of grievances I had with Typescript such as the lack of higher-kinded types which comes up surprisingly often[4] in Javascript, lack of support for nominal types and the difficulty of using dependent types. I appreciated however the powerful type transformation techniques. - -However, building a type system proved too difficult; on February 23 I decided to cut my losses and focus on building an interpreter. The proof-of-concept interpreter was finished on March 10, but the macro executor was still using the naiive implementation completed over the summer so it would take around 15 seconds to load an example file of 20 lines, and a range of other issues cropped up as well cumulatively impacting every corner of the codebase. A full rewrite was necessary. - -The final, working implementation was completed on May 8, this one uses token interning, starts up almost instantly and memoizes expressions by origin. This feature is implemented because it was very straightforward, but it actually conflicts with the pre-existing IO capabilities which still use continuation passing, so IO in a loop is actually impossible. - -## Immediate future - -The first order of business is to extend the standard library to a basic usable level, I'd like to try adding Elixir-like protocols with multiple type parameters, and some kind of IO support, perhaps mimicking algebraic effects. After that I would like to develop the embedding interface, as I hope to use Orchid in numerous future projects. diff --git a/notes/papers/report/parts/type_system/+index.md b/notes/papers/report/parts/type_system/+index.md deleted file mode 100644 index 8334561..0000000 --- a/notes/papers/report/parts/type_system/+index.md +++ /dev/null @@ -1,20 +0,0 @@ -## Type system - -This is a description of the type system originally designed for Orchid which never reached the MVP stage. - -At the core the type system consists of three concepts: - -- `define` creates nominal types, which also act as typeclasses. This may be very confusing but it will make more sense later. -- `impl` provides instances of typeclasses -- a universal parametric construct that serves as both a `forall` (or generic) and a `where` (or constraint). This was temporarily named `auto` but is probably more aptly described by the word `given`. - -### Unification - -The backbone of any type system is unification. In this case, this is an especially interesting question because the type expressions are built with code and nontermination is outstandingly common. - -The unification process uses Hindley-Milner unification as a primitive. It attempts to find an MGU within a constant N steps of reduction. In every step, the candidates are compared using HM, and if it fails, branches are created for each transformation available in the tree. All branches reference the previous step. Valid transformations are - -- $\beta$-reduction -- Replacing a subtree that is syntactically equivalent to a tree it was produced by with a call to the Y combinator. - -This algorithm is prone to state explosion, but because it does not need to solve extremely complex problems but rather many many very small ones, good caching can probably solve most issues. \ No newline at end of file diff --git a/notes/papers/report/parts/type_system/02-given.md b/notes/papers/report/parts/type_system/02-given.md deleted file mode 100644 index 10c4f47..0000000 --- a/notes/papers/report/parts/type_system/02-given.md +++ /dev/null @@ -1,20 +0,0 @@ -### Given (formerly Auto) - -`given` bindings have the form `@Name:type. body`. Either the `Name` or the `:type` part can be optional but at least one is required. The central idea is that wherever a binding is unwrapped by an operation the language attempts to find a value for the name. Bindings are unwrapped in the following situations: - -- If the value is used, such as if a generic function is called -- If the value is assigned to something that has a known type which does NOT have a binding - -Bindings can be **resolved** in a couple ways: - -1. If the name appears in the type of any value, type unification provides a solution -2. If the binding has a type and the point of unwrapping is within the body of a binding with an **assignable** type, the value of that binding is forwarded -3. If none of the above options yield any success and the binding has a type, the value of the single suitable `impl` according to the [[04-impl#Matching rules|impl matching rules]] is used - -If none of the above options are successful, resolution fails. - -It is possible to store values with bindings in typed datastructures without resolving the binding, for example `List @T. @:Eq T. (T -> Option bool)` would represent a `List` of functions that take any equality-comparable value and return an optional boolean. - -Bindings can be used to represent generics. In the above example, `@T. ...` is a generic parameter. It translates to the clause "given a type T, ...". Its value will probably be decided by the function's argument. - -Bindings can also be used to represent constraints. In the above example, `@:Eq T. ...` is a constraint, which translates to the clause "given an instance of `Eq T`, ...". Its value will have to be decided by an existing `Eq` constraint if the caller is also generic over `T`, or an `impl` of `Eq` if the function is called on a value of a concrete type or if the caller does not have the `Eq` constraint. \ No newline at end of file diff --git a/notes/papers/report/parts/type_system/03-define.md b/notes/papers/report/parts/type_system/03-define.md deleted file mode 100644 index 1e94d8f..0000000 --- a/notes/papers/report/parts/type_system/03-define.md +++ /dev/null @@ -1,61 +0,0 @@ -# Define - -Define is used to create types and typeclasses. Define is a distinct [[02-parsing#Files|line type]] that has the following form: - -``` -define = "define" name param* "as" value -param = param_name [ ":" kind ] -kind = clause -param_name = "$" name (without spaces) -value = clause* -``` - -For an example of a type, here's the definition of a conslist or linked list. -``` -define List $T as Y \r. Option (Pair $T r) -``` - -These aren't macros although they look similar. While macros are processed after parsing and then forgotten, these placeholders are recognized by the language and subject to unification. - -It's important to keep in mind that these are nominal types; when something is typed `List int`, it is not assignable to `Option (Pair int (List int))`. - -## Typeclasses - -Typeclasses are types that describe operations. Very often a typeclass will be a single function, but they can also be sequences of functions. - -For an example of a typeclass, here's the definition of Eq, the class of types that can be equality-compared. -``` -define Eq $T as $T -> $T -> bool -``` - -Eq isn't a statement about types as typeclasses commonly are in other languages; instead, it's an operation carried out on a particular type. **Constraints of `Eq` on some generic parameter `T` are expressed as a requirement for the existence of `Eq T` for the given `T`.** As an added benefit, the operations exposed by a typeclass can be unambiguously referenced from the bound name of the typeclass value within the binding. -``` -isUnaryGrp := @T. @eq:Eq T. @:Add T T T. \t:T. eq (t + t) t -``` - -In the above example, the implementation of `Eq` is used directly as a value in the expression. The implementation of `Add` is not used, but it can be assumed that the operator + is translated via macros to a call to some generic function `add` which is constrained on `Add`, so according to the second unification rule in [[#Given (formerly Auto)|Given]] the implementation is forwarded. - -## Kinds - -Each of the parameters to a nominal type has a kind. Kinds can be thought of as a "type of type", and they ensure that expressions that are used in the type of a value have no unspecified parameters while allowing values to be parametric on parametric types. - -### 1. concrete types - -`type` is the kind of concrete types. These are the only values in type-space that can stand in the position of a type annotation. Simple types such as `int` as well as fully specified generic types such as `List int` belong to this group. - -Kinds aren't inferred from usage; if a type parameter does not have a kind annotation, it is assumed to be `type`. - -### 2. generics - -Generics or parametric types act like N-ary functions. `type -> type` is the kind of generics with one type parameter, `type -> type -> type` is the kind of generics wiht two type parameters, and so on. `List` for instance is `type -> type`. - -Typeclasses applied to simple types also belong in this group. For example, `Eq` from above has kind `type -> type`. `Add` has three generic parameters for left, right and output types, and all of these are concrete types, so its kind is `type -> type -> type -> type`. - -### 3. higher-kinded polymorphism - -Types that are parametric on parametric types have kinds that are analogous to higher-order functions. Most real-world examples of this group are typeclasses that apply to containers. - -`List` has the kind `type -> type`. `Option`, also known as `Maybe` from Haskell also has the same kind, as does `HashMap string`. What's common about all of these is that they have values that can be modified without influencing the overall structure of the containers. In Haskell this capability is encoded in the typeclass `Functor`, but Orchid would probably opt for a more accessible name such as `Mapping`. The kind of this typeclass is `(type -> type) -> type`. -``` -define Mapping $C:(type -> type) as @T. @U. C T -> C U -``` diff --git a/notes/papers/report/parts/type_system/04-impl.md b/notes/papers/report/parts/type_system/04-impl.md deleted file mode 100644 index a070ae4..0000000 --- a/notes/papers/report/parts/type_system/04-impl.md +++ /dev/null @@ -1,58 +0,0 @@ -# Impl - -Impl is used to implement typeclasses. Impl is a distinct [[02-parsing#Files|line type]] that has the following form: -``` -impl = "impl" target_type ["by" impl_name ["over" alternative*]] "via" value -target_type = clause* -impl_name = name -alternative = ns_name -value = clause* -``` - -Impls provide fallbacks for binding resolution. If the target type contains any @ bindings at the top level, they are also applied to the value, to avoid repetition. The list of alternatives contains references to other impls which the author of this impl is aware of and deems more general or for another reason inferior. Alternatives can never form a cycle. - -## Matching rules - -When a [[02-given|@]] binding is not resolvable using rules 1 and 2, impls are used to find a value. Each impl's target type may contain other bindings, so resolution proceeds similarly to a breadth-first Prolog solver. - -An impl is considered an acceptable **candidate** for a binding if its type unifies with goal, with its bindings resolved in the context where the original binding is defined. This means that these indirect bindings are also first resolved using **assignable** enclosing bindings before impls would be enumerated. - -An impl is considered a **match** if it is a **candidate**, and all other candidates are reachable from it by walking the alternative tree (even if the intermediate steps are not candidates). If there is no match, - -## Overrides - -In Rust impls can be placed in one of two modules; the trait owner, and the type owner. Orchid is more forgiving than that which means that mistakes in external packages can temporarily be fixed in user code, but it also means that inconsistency is possible and needs to be addressed. Two additional possibilities arise that Rust's orphan rules prevent; foster impls and arbiter impls. - -### Foster impls - -If it doesn't make sense for either of the participants to acknowledge the others, foster impls can be created which don't own any of the participant symbols. - -```orc -import GenericModule::Typeclass -import SpecificModule::(Type, function) - -impl Typeclass Type by fosterTypeclassType via function -``` - -Foster impls can be placed in foster packages whose sole purpose is to glue packages together, or they can be embedded in usercode. - -### Arbiter impls - -If multiple foster impls exist for a given package, or if a foster impl is provided by some collection but one of the parents added an impl in the mean time, ambiguities arise. To resolve these, arbiter impls can be used to decide which value will win. - -``` orc -import BadModule::badImpl -import GoodModule::goodImpl -import GenericModule::Typeclass -import SpecificModule::Type - -impl Typeclass Type by arbiterGoodModuleTypeclassType over goodImpl, badImpl via goodImpl -``` - -Notice that goodImpl appears both as a value and an impl name. Named impls are always also exported as constants, specifically to account for situations where you want to use them despite auto resolution. They can be referenced in arbiter rules, exception rules for more general impls, and directly used as values in code. - -The more common and less hacky use case for arbiter rules is when a very general rule from a general package needs to be overridden by a more specific rule from a deep ancestor. - ---- - -In all cases, these problems represent a concern gap or overlap and should be eventually resolved by the authors of the original packages. The purpose of foster and arbiter rules is to not stall the ecosystem on a trivial conflict of concepts and to make adding dependencies less risky. It should still take some effort to maintain a large dependency list, but the risk of complete blockage becomes a more manageable constant effort. \ No newline at end of file diff --git a/notes/type_system/definitions.md b/notes/type_system/definitions.md deleted file mode 100644 index cd018e3..0000000 --- a/notes/type_system/definitions.md +++ /dev/null @@ -1,54 +0,0 @@ -## Type definitions - -A new type can be created with the define expression, which associates a templated expression of -type `type` with a name and a template. The name allocated in this fashion is always representedas -an Atom of type `type` or some function that eventually returns `type`. The kind of the template -parameters is always inferred to be `type` rather than deduced from context. - -The following type definition - -```orc -define Cons $T as loop \r. Option (Pair $T r) -``` - -results in these conditions: - -- (Cons Int) is not assignable to @T. Option T, or any other type expression that its - definitions would be assignable to, and vice versa. -- An instance of (Cons Int) can be constructed with `categorise @(Cons Int) (some (pair 1 none))` - but the type parameter can also be inferred from the expected return type -- An instance of (Cons Int) can be deconstructed with `generalise @(Cons Int) numbers` - but the type parameter can also be inferred from the argument - -These inference rules are never reversible - -```orc -categorise :: @T:type. (definition T) -> T -generalise :: @T:type. T -> (definition T) -definition :: type -> type -- opaque function -``` - -## Unification - -The following must unify: - -```orc -@T. @:Add T T T. Mult Int T T -Mult Int (Cons Int) (Cons Int) -``` - -## Typeclasses - -Typeclasses and types use the same define syntax. In fact, much like a type is nothing but a -distinguished instance of the underlying type with added meaning and constraints, a typeclass is -nothing but a distinguished instance of the underlying function (or collection of functions) with -added meaning and constraints. A typeclass definition is therefore perfectly identical to a type -definition: - -``` -define Add $T $U $R as $T -> $U -> $R -``` - -It is clear that the definition of this type would match many, many functions, including -multiplication, so functions that should be considered addition are [impls](./impls.md) of the -typeclass Add. diff --git a/notes/type_system/impls.md b/notes/type_system/impls.md deleted file mode 100644 index 08aaff9..0000000 --- a/notes/type_system/impls.md +++ /dev/null @@ -1,67 +0,0 @@ -In Orchid, types and typeclasses aren't distinguished. Impls are the equivalent of typeclass -implementations. The syntax looks like this: - -```orc -impl typeExpression [by name [over overriddenName, furtherOverriddenNames...]] via valueExpression -``` - -An impl can be considered a candidate for an auto if its typeExpression unifies with the auto's type -An impl candidate can be used to resolve an auto if -- typeExpression unifies with the auto's type -- it is not present in any other matching impl's override tree -- all other candidates are present in its override tree - -### Impls for types - -Impls for types are generally not a good idea as autos with types like Int can -often be used in dependent typing to represent eg. an index into a type-level conslist to be -deduced by the compiler, and impls take precedence over resolution by unification. - -In Rust impls can be placed in one of two modules; the trait owner, and the type owner. In orchid -that is not the case, so two additional possibilities arise that Rust's orphan rules prevent. - -## Foster impls - -If it doesn't make sense for either of the participants to acknowledge the others, foster impls -can be created which don't own any of the participant symbols. - -```orc -import GenericModule::Typeclass -import SpecificModule::(Type, function) - -impl Typeclass Type by fosterTypeclassType via function -``` - -Foster impls can be placed in foster packages whose sole purpose is to glue packages together, or -they can be embedded in usercode. - -## Arbiter impls - -If multiple foster impls exist for a given package, or if you use a foster package but one of the -parents involved has added an impl in the mean time, ambiguities arise. To resolve these, arbiter -impls can be used to decide which impl's value will win. - -``` orc -import BadModule::badImpl -import GoodModule::goodImpl -import GenericModule::Typeclass -import SpecificModule::Type - -impl Typeclass Type by arbiterGoodModuleTypeclassType over goodImpl, badImpl via goodImpl -``` - -Notice that goodImpl appears both as a value and an impl name. Named impls are always also -exported as value substitution rules, specifically to account for situations where you want to use -them despite auto resolution. They can be referenced in arbiter rules, exception rules for more -general impls, auto-parameter overrides, and directly used as values in code. - -The more common and less hacky use case for arbiter rules is when a very general rule from a -general package needs to be overridden by a more specific rule from a deep ancestor. - ---- - -In all cases, these problems represent a concern gap or overlap and should be eventually resolved -by the authors of the original packages. The purpose of foster and arbiter rules is to not stall -the ecosystem on a trivial conflict of concepts and to make adding dependencies less risky. -It should still take some effort to maintain a large dependency list, but the risk of complete -blockage becomes a more manageable constant effort. \ No newline at end of file diff --git a/notes/type_system/unification.md b/notes/type_system/unification.md deleted file mode 100644 index fa0f62f..0000000 --- a/notes/type_system/unification.md +++ /dev/null @@ -1,27 +0,0 @@ -# Steps of validating typed lambda - -- Identify all expressions that describe the type of the same expression -- enqueue evaluation steps for each of them and put them in a unification group -- evaluation step refers to previous step, complete expression tree - - unification **succeeds** if either - - the trees are syntactically identical in any two steps between the targets - - unification succeeds for all substeps: - - try to find an ancestor step that provably produces the same value as any lambda in this - step (for example, by syntactic equality) - - if found, substitute it with the recursive normal form of the lambda - - recursive normal form is `Apply(Y, \r.[body referencing r on point of recursion])` - - find all `Apply(\x.##, ##)` nodes in the tree and execute them - - unification **fails** if a member of the concrete tree differs (only outermost steps add to - the concrete tree so it belongs to the group and not the resolution) or no substeps are found - for a resolution step _(failure: unresolved higher kinded type)_ - - if neither of these conclusions is reached within a set number of steps, unification is - **indeterminate** which is also a failure but suggests that the same value-level operations - may be unifiable with better types. - -The time complexity of this operation is O(h no) >= O(2^n). For this reason, a two-stage limit -is recommended: one for the recursion depth which is replicable and static, and another, -configurable, time-based limit enforced by a separate thread. - -How does this interact with impls? -Idea: excluding value-universe code from type-universe execution. -Digression: Is it possible to recurse across universes? \ No newline at end of file diff --git a/orchid.code-workspace b/orchid.code-workspace index 19e5eb0..1e0c1fc 100644 --- a/orchid.code-workspace +++ b/orchid.code-workspace @@ -27,6 +27,8 @@ "editor.rulers": [74] }, "rust-analyzer.showUnlinkedFileNotification": false, + "rust-analyzer.checkOnSave": true, + "rust-analyzer.check.command": "clippy", "files.associations": { "*.mjsd": "markdown" }, diff --git a/src/cli.rs b/src/cli.rs index a12a380..e80b132 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -10,7 +10,7 @@ pub fn prompt( stdout().lock().flush().unwrap(); let mut input = String::with_capacity(100); stdin().lock().read_line(&mut input).unwrap(); - if input.len() == 0 {return default} + if input.is_empty() {return default} match try_cast(input) { Ok(t) => return t, Err(e) => println!("Error: {e}") diff --git a/src/external/num/numeric.rs b/src/external/num/numeric.rs index b520a1b..337f490 100644 --- a/src/external/num/numeric.rs +++ b/src/external/num/numeric.rs @@ -22,7 +22,7 @@ impl Numeric { /// # Panics /// /// if the value is NaN or Infinity.try_into() - fn num(value: T) -> Self where T: Into { + fn num>(value: T) -> Self { let f = value.into(); assert!(f.is_finite(), "unrepresentable number"); NotNan::try_from(f).map(Self::Num).expect("not a number") diff --git a/src/interner/monotype.rs b/src/interner/monotype.rs index 7b9c6fe..fca1239 100644 --- a/src/interner/monotype.rs +++ b/src/interner/monotype.rs @@ -67,32 +67,6 @@ impl TypedInterner { } } -// impl TypedInterner> { -// pub fn iv(&self, qs: &[Q]) -> Token> -// where -// Q: Eq + Hash + ToOwned, -// T: Borrow -// { -// let mut tokens = self.tokens.borrow_mut(); -// let hash = compute_hash(tokens.hasher(), qs); -// let raw_entry = tokens.raw_entry_mut().from_hash(hash, |k| { -// k.iter().zip(qs.iter()).all(|(t, q)| t.borrow() == q) -// }); -// let kv = raw_entry.or_insert_with(|| { -// let mut values = self.values.borrow_mut(); -// let uniq_key: NonZeroU32 = (values.len() as u32 + 1u32) -// .try_into().expect("can never be zero"); -// let tv = qs.iter().map(Q::to_owned).collect::>(); -// let keybox = Box::new(tv); -// let keyref = Box::leak(keybox); -// values.push((keyref, true)); -// let token = Token::>::from_id(uniq_key); -// (keyref, token) -// }); -// *kv.1 -// } -// } - impl Drop for TypedInterner { fn drop(&mut self) { // make sure all values leaked by us are dropped diff --git a/src/interner/multitype.rs b/src/interner/multitype.rs index 067cf24..9d38724 100644 --- a/src/interner/multitype.rs +++ b/src/interner/multitype.rs @@ -17,10 +17,9 @@ impl Interner { Self { interners: RefCell::new(HashMap::new()) } } - pub fn i(&self, q: &Q) -> Token - where Q: Eq + Hash + ToOwned, - Q::Owned: 'static + Eq + Hash + Clone, - Q::Owned: Borrow + pub fn i(&self, q: &Q) + -> Token + where Q::Owned: 'static + Eq + Hash + Clone + Borrow { let mut interners = self.interners.borrow_mut(); let interner = get_interner(&mut interners); diff --git a/src/interpreter/run.rs b/src/interpreter/run.rs index 958beed..dd96e39 100644 --- a/src/interpreter/run.rs +++ b/src/interpreter/run.rs @@ -54,15 +54,12 @@ pub type HandlerRes = Result< pub trait Handler { fn resolve(&mut self, data: HandlerParm) -> HandlerRes; - fn then(self, t: T) -> impl Handler - where Self: Sized { + fn then(self, t: T) -> impl Handler where Self: Sized { Pair(self, t) } } -impl Handler for F -where F: FnMut(HandlerParm) -> HandlerRes -{ +impl Handler for F where F: FnMut(HandlerParm) -> HandlerRes { fn resolve(&mut self, data: HandlerParm) -> HandlerRes { self(data) } diff --git a/src/representations/ast_to_postmacro.rs b/src/representations/ast_to_postmacro.rs index f1985b4..46cc9f3 100644 --- a/src/representations/ast_to_postmacro.rs +++ b/src/representations/ast_to_postmacro.rs @@ -54,7 +54,9 @@ pub fn _exprv(exprv: &[ast::Expr]) struct Context<'a> { names: Substack<'a, Token>>> } impl<'a> Context<'a> { - fn w_name<'b>(&'b self, name: Token>>) -> Context<'b> where 'a: 'b { + fn w_name<'b>(&'b self, + name: Token>> + ) -> Context<'b> where 'a: 'b { Context { names: self.names.push(name) } } diff --git a/src/representations/tree.rs b/src/representations/tree.rs index e602441..4c4da5d 100644 --- a/src/representations/tree.rs +++ b/src/representations/tree.rs @@ -101,8 +101,8 @@ impl Module { } } -impl Add for Module -where TExt: Add +impl> Add +for Module { type Output = Self; diff --git a/src/rule/update_first_seq.rs b/src/rule/update_first_seq.rs index 1575726..2a60c56 100644 --- a/src/rule/update_first_seq.rs +++ b/src/rule/update_first_seq.rs @@ -6,22 +6,25 @@ use crate::ast::{Expr, Clause}; /// Traverse the tree, calling pred on every sibling list until it returns /// some vec then replace the sibling list with that vec and return true /// return false if pred never returned some -pub fn exprv(input: Rc>, pred: &mut F) -> Option>> -where F: FnMut(Rc>) -> Option>> { +pub fn exprv< + F: FnMut(Rc>) -> Option>> +>(input: Rc>, pred: &mut F) -> Option>> { if let o@Some(_) = pred(input.clone()) {return o} replace_first(input.as_ref(), |ex| expr(ex, pred)) .map(|i| Rc::new(i.collect())) } -pub fn expr(input: &Expr, pred: &mut F) -> Option -where F: FnMut(Rc>) -> Option>> { +pub fn expr< + F: FnMut(Rc>) -> Option>> +>(input: &Expr, pred: &mut F) -> Option { if let Some(value) = clause(&input.value, pred) { Some(Expr{ value, location: input.location.clone() }) } else {None} } -pub fn clause(c: &Clause, pred: &mut F) -> Option -where F: FnMut(Rc>) -> Option>> { +pub fn clause< + F: FnMut(Rc>) -> Option>> +>(c: &Clause, pred: &mut F) -> Option { match c { Clause::P(_) | Clause::Placeh {..} | Clause::Name {..} => None, Clause::Lambda(arg, body) => { diff --git a/src/run_dir.rs b/src/run_dir.rs index f15dcb0..5b4d201 100644 --- a/src/run_dir.rs +++ b/src/run_dir.rs @@ -71,7 +71,6 @@ fn load_environment(i: &Interner) -> ProjectTree { } }; parse_layer(&[prelude_path(i)], &loader, &env, &[], i) - // .unwrap_or_else(|e| panic!("Prelude error: \n {}", e)) .expect("prelude error") } diff --git a/src/utils/cache.rs b/src/utils/cache.rs index 5c0a879..35354a3 100644 --- a/src/utils/cache.rs +++ b/src/utils/cache.rs @@ -4,18 +4,24 @@ use std::rc::Rc; use hashbrown::HashMap; // TODO: make this a crate +pub trait Callback<'a, I, O: 'static> = + Fn(I, &Cache<'a, I, O>) -> O; + +pub type CbBox<'a, I, O> = + Box + 'a>; /// Cache the return values of an effectless closure in a hashmap /// Inspired by the closure_cacher crate. pub struct Cache<'a, I, O: 'static> { store: RefCell>, - closure: Box O + 'a> + closure: CbBox<'a, I, O> } -impl<'a, I, O> Cache<'a, I, O> where - I: Eq + Hash + Clone, O: Clone -{ - pub fn new(closure: F) -> Self where F: Fn(I, &Self) -> O { +impl<'a, + I: Eq + Hash + Clone, + O: Clone +> Cache<'a, I, O> { + pub fn new>(closure: F) -> Self { Self { store: RefCell::new(HashMap::new()), closure: Box::new(closure) @@ -23,7 +29,9 @@ impl<'a, I, O> Cache<'a, I, O> where } #[allow(unused)] - pub fn rc(closure: F) -> Rc where F: Fn(I, &Self) -> O { + pub fn rc< + F: 'a + Callback<'a, I, O> + >(closure: F) -> Rc { Rc::new(Self::new(closure)) } diff --git a/src/utils/iter.rs b/src/utils/iter.rs index faa7809..0491532 100644 --- a/src/utils/iter.rs +++ b/src/utils/iter.rs @@ -23,15 +23,16 @@ macro_rules! box_chain { }; } -pub fn box_flatten<'a, T: 'a, I: 'a, J: 'a>(i: I) -> BoxedIter<'a, T> -where - J: Iterator, - I: Iterator, -{ +pub fn box_flatten<'a, + T: 'a, + I: 'a + Iterator, + J: 'a + Iterator +>(i: I) -> BoxedIter<'a, T> { Box::new(i.flatten()) } -pub fn into_boxed_iter<'a, T: 'a>(t: T) -> BoxedIter<'a, ::Item> -where T: IntoIterator { +pub fn into_boxed_iter<'a, + T: 'a + IntoIterator +>(t: T) -> BoxedIter<'a, ::Item> { Box::new(t.into_iter()) } \ No newline at end of file diff --git a/src/utils/mod.rs b/src/utils/mod.rs index e57772a..4645880 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,5 +1,4 @@ mod cache; -pub mod translate; mod replace_first; // mod interned_display; // mod interner; diff --git a/src/utils/protomap.rs b/src/utils/protomap.rs index 4683d8e..86b4822 100644 --- a/src/utils/protomap.rs +++ b/src/utils/protomap.rs @@ -28,9 +28,9 @@ impl<'a, K, V, const STACK_COUNT: usize> ProtoMap<'a, K, V, STACK_COUNT> { } /// Mutable reference to entry without checking proto in O(m) - fn local_entry_mut<'b, Q: ?Sized>(&'b mut self, query: &Q) + fn local_entry_mut<'b, Q: ?Sized + Eq>(&'b mut self, query: &Q) -> Option<(usize, &'b mut K, &'b mut Option)> - where K: Borrow, Q: Eq + where K: Borrow { self.entries.iter_mut().enumerate().find_map(|(i, (k, v))| { if query.eq((*k).borrow()) { Some((i, k, v)) } else { None } @@ -38,9 +38,9 @@ impl<'a, K, V, const STACK_COUNT: usize> ProtoMap<'a, K, V, STACK_COUNT> { } /// Entry without checking proto in O(m) - fn local_entry<'b, Q: ?Sized>(&'b self, query: &Q) + fn local_entry<'b, Q: ?Sized + Eq>(&'b self, query: &Q) -> Option<(usize, &'b K, &'b Option)> - where K: Borrow, Q: Eq + where K: Borrow { self.entries.iter().enumerate().find_map(|(i, (k, v))| { if query.eq((*k).borrow()) { Some((i, k, v)) } else { None } @@ -48,8 +48,8 @@ impl<'a, K, V, const STACK_COUNT: usize> ProtoMap<'a, K, V, STACK_COUNT> { } /// Find entry in prototype chain in O(n) - pub fn get<'b, Q: ?Sized>(&'b self, query: &Q) -> Option<&'b V> - where K: Borrow, Q: Eq + pub fn get<'b, Q: ?Sized + Eq>(&'b self, query: &Q) -> Option<&'b V> + where K: Borrow { if let Some((_, _, v)) = self.local_entry(query) { v.as_ref() @@ -120,9 +120,12 @@ impl<'a, K, V, const STACK_COUNT: usize> ProtoMap<'a, K, V, STACK_COUNT> { } } -impl -From for ProtoMap<'_, K, V, STACK_COUNT> -where T: IntoIterator { +impl< + K, V, + T: IntoIterator, + const STACK_COUNT: usize +> From +for ProtoMap<'_, K, V, STACK_COUNT> { fn from(value: T) -> Self { Self { entries: value.into_iter().map(|(k, v)| (k, Some(v))).collect(), @@ -131,9 +134,8 @@ where T: IntoIterator { } } -impl -Index<&Q> for ProtoMap<'_, K, V, STACK_COUNT> -where K: Borrow, Q: Eq { +impl, V, const STACK_COUNT: usize> Index<&Q> +for ProtoMap<'_, K, V, STACK_COUNT> { type Output = V; fn index(&self, index: &Q) -> &Self::Output { self.get(index).expect("Index not found in map") @@ -158,6 +160,10 @@ Add<(K, V)> for &'a ProtoMap<'a, K, V, STACK_COUNT> { } } +impl<'a, K, V, const STACK_COUNT: usize> Default for ProtoMap<'a, K, V, STACK_COUNT> { + fn default() -> Self { Self::new() } +} + #[macro_export] macro_rules! protomap { ($($ent:expr),*) => { diff --git a/src/utils/replace_first.rs b/src/utils/replace_first.rs index d1b0d15..9b3bb93 100644 --- a/src/utils/replace_first.rs +++ b/src/utils/replace_first.rs @@ -2,9 +2,12 @@ use std::iter; /// Iterate over a sequence with the first element the function returns /// Some() for updated, but only if there is such an element. -pub fn replace_first<'a, T, F>(slice: &'a [T], mut f: F) --> Option + 'a> -where T: Clone, F: FnMut(&T) -> Option { +pub fn replace_first< + T: Clone, + F: FnMut(&T) -> Option +>( + slice: &[T], mut f: F +) -> Option + '_> { for i in 0..slice.len() { if let Some(new) = f(&slice[i]) { let subbed_iter = slice[0..i].iter().cloned() diff --git a/src/utils/substack.rs b/src/utils/substack.rs index 4e2e880..084557f 100644 --- a/src/utils/substack.rs +++ b/src/utils/substack.rs @@ -35,13 +35,13 @@ impl<'a, T> Substack<'a, T> { pub fn new_frame(&'a self, item: T) -> Stackframe<'a, T> { Stackframe { item, - prev: &self, + prev: self, len: self.opt().map_or(1, |s| s.len) } } pub fn pop(&'a self, count: usize) -> Option<&'a Stackframe<'a, T>> { if let Self::Frame(p) = self { - if count == 0 {Some(&p)} + if count == 0 {Some(p)} else {p.prev.pop(count - 1)} } else {None} } @@ -51,7 +51,7 @@ impl<'a, T> Substack<'a, T> { } } } -impl<'a, T> Debug for Substack<'a, T> where T: Debug { +impl<'a, T: Debug> Debug for Substack<'a, T> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "Substack")?; f.debug_list().entries(self.iter()).finish() @@ -64,9 +64,10 @@ pub struct SubstackIterator<'a, T> { impl<'a, T> SubstackIterator<'a, T> { #[allow(unused)] - pub fn first_some(&mut self, f: F) -> Option - where F: Fn(&T) -> Option { - while let Some(x) = self.next() { + pub fn first_some Option + >(&mut self, f: F) -> Option { + for x in self.by_ref() { if let Some(result) = f(x) { return Some(result) } diff --git a/src/utils/translate.rs b/src/utils/translate.rs deleted file mode 100644 index ff4bf4d..0000000 --- a/src/utils/translate.rs +++ /dev/null @@ -1,30 +0,0 @@ -use std::mem; - -// TODO: extract to crate - -/// Map over a `&mut` with a mapper function that takes ownership of -/// the value -#[allow(unused)] -pub fn translate T>(data: &mut T, f: F) { - unsafe { - let mut acc = mem::MaybeUninit::::uninit().assume_init(); - mem::swap(&mut acc, data); - let mut new = f(acc); - mem::swap(&mut new, data); - mem::forget(new); - } -} - -/// Map over a `&mut` with a mapper function that takes ownership of -/// the value and also produces some unrelated data. -#[allow(unused)] -pub fn process (T, U)>(data: &mut T, f: F) -> U { - unsafe { - let mut acc = mem::MaybeUninit::::uninit().assume_init(); - mem::swap(&mut acc, data); - let (mut new, ret) = f(acc); - mem::swap(&mut new, data); - mem::forget(new); - ret - } -} \ No newline at end of file diff --git a/swap.md b/swap.md deleted file mode 100644 index e69de29..0000000