diff --git a/Cargo.lock b/Cargo.lock index 1533753..8a9dcd1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -160,6 +160,7 @@ dependencies = [ "itertools", "mappable-rc", "ordered-float", + "smallvec", "thiserror", ] @@ -196,6 +197,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "smallvec" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0" + [[package]] name = "syn" version = "1.0.95" diff --git a/Cargo.toml b/Cargo.toml index 6d6ac1c..5467f31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,4 +12,5 @@ derivative = "2.2" hashbrown = "0.12" mappable-rc = "0.1" ordered-float = "3.0" -itertools = "0.10" \ No newline at end of file +itertools = "0.10" +smallvec = "1.10.0" diff --git a/examples/dummy_project/main.orc b/examples/dummy_project/main.orc index b3ae120..be64562 100644 --- a/examples/dummy_project/main.orc +++ b/examples/dummy_project/main.orc @@ -1,23 +1,46 @@ --- import std::io::(println, out) -- imports +opaque := \T. T --- single word rule (alias) -greet =1=> (\name. printf out "Hello {}!\n" [name]) - --- multi-word exported rule -export ;> $a =200=> (greet $a) - -reeee := \$a.b - --- single-word exported rule -export main := ( - print "What is your name?" >> - readln >>= \name. - greet name +--[ Typeclass definition (also just a type) ]-- +define Add $L:type $R:type $O:type as $L -> $R -> $O +-- HKTC +define Mappable $C:(type -> type) as @T. @U. (T -> U) -> $C T -> $C U +-- Dependency on existing typeclass +define Zippable $C:(type -> type) as @:Mappable $C. ( + @T. @U. @V. (T -> U -> V) -> $C T -> $C U -> $C V ) +define Default $T:type as $T +-- Is the intersection of typeclasses an operation we need? -export < $a ...$rest /> := (createElement (tok_to_str $a) [(props_carriage ...$rest)]) -export (props_carriage $key = $value) := (tok_to_str $key) => $value +--[ Type definition ]-- +define Cons $elem:type as loop \r. Option (Pair T $elem) +nil := @T. from @(Cons T) none +cons := @T. \el:T. ( + generalise @(Cons T) + |> (\list. some t[el, into list]) + |> categorise @(Cons T) +) +export map := @T. @U. \f:T -> U. ( + generalise @(Cons T) + |> loop ( \recurse. \option. + map option \pair. t[f (fst pair), recurse (snd pair)] + ) + |> categorise @(Cons U) +) +-- Universal typeclass implementation; no parameters, no overrides, no name for overriding +impl Mappable Cons via map +-- Blanket typeclass implementation; parametric, may override, must have name for overriding +impl (@T. Add (Cons T) (Cons T) (Cons T)) by concatenation over elementwiseAddition via concat --- The broadest trait definition in existence -Foo := (Bar Baz) --- default anyFoo = @T. @impl:(T (Bar Baz)). impl:(T Foo) +-- Scratchpad + +filterBadWords := @C:type -> type. @:Mappable C. \strings:C String. ( + map strings \s. if intersects badWords (slice " " s) then none else some s +):(C (Option String)) + +-- /Scratchpad + +main := \x. foo @bar x + +foo := @util. \x. util x + +export opaque := \T. atom \ No newline at end of file diff --git a/examples/rule_demo/main.orc b/examples/rule_demo/main.orc deleted file mode 100644 index e72acbe..0000000 --- a/examples/rule_demo/main.orc +++ /dev/null @@ -1,18 +0,0 @@ -export ::(main, foo) - -main := [foo, bar, baz, quz] - -foo := steamed hams - -[...$data] := (cons_start ...$data cons_carriage(none)) - -[] := none - -...$prefix:1 , ...$item cons_carriage( - $tail -) := ...$prefix cons_carriage( - (some (cons (...$item) $tail)) -) - -cons_start ...$item cons_carriage($tail) := some (cons (...$item) $tail) - diff --git a/notes.md b/notes.md index 4de0266..e69de29 100644 --- a/notes.md +++ b/notes.md @@ -1,18 +0,0 @@ -# Anatomy of a code file - -```orchid -import std::io::(println, out) -- imports - --- single word substitution (alias) -greet == \name. printf out "Hello {}!\n" [name] - --- multi-word exported substitution with nonzero priority -export (...$pre ;) $a ...$post) =200=> (...$pre (greet $a) ...$post) - --- single-word exported substitution -export main == ( - print "What is your name? >> - readln >>= \name. - greet name -) -``` diff --git a/notes/type_system/definitions.md b/notes/type_system/definitions.md new file mode 100644 index 0000000..60144fe --- /dev/null +++ b/notes/type_system/definitions.md @@ -0,0 +1,35 @@ +## Type definitions + +```orc +define Cons as \T:type. loop \r. Option (Pair T r) +``` + +Results in +- (Cons Int) is not assignable to @T. Option T +- An instance of (Cons Int) can be constructed with `categorise @(Cons Int) (some (pair 1 none))` + but the type parameter can also be inferred from the expected return type +- An instance of (Cons Int) can be deconstructed with `generalise @(Cons Int) numbers` + but the type parameter can also be inferred from the argument + +These inference rules are never reversible + +```orc +categorise :: @T:type. (definition T) -> T +generalise :: @T:type. T -> (definition T) +definition :: type -> type -- opaque function +``` + +## Unification + +The following must unify: + +```orc +@T. @:Add T T T. Mult Int T T +Mult Int (Cons Int) (Cons Int) +``` + +### Impls for types + +Impls for types are generally not a good idea as autos with types like Int can +often be used in dependent typing to represent eg. an index into a type-level conslist to be +deduced by the compiler, and impls take precedence over resolution by unification. diff --git a/notes/type_system/impls.md b/notes/type_system/impls.md new file mode 100644 index 0000000..e69de29 diff --git a/notes/type_system/unification.md b/notes/type_system/unification.md new file mode 100644 index 0000000..f57d644 --- /dev/null +++ b/notes/type_system/unification.md @@ -0,0 +1,27 @@ +# Steps of validating typed lambda + +- Identify all expressions that describe the type of the same expression +- enqueue evaluation steps for each of them and put them in a unification group +- evaluation step refers to previous step, complete expression tree + - unification **succeeds** if either + - the trees are syntactically identical in any two steps between the targets + - unification succeeds for all substeps: + - try to find an ancestor step that provably produces the same value as any lambda in this + step (for example, by syntactic equality) + - if found, substitute it with the recursive normal form of the lambda + - recursive normal form is `Apply(Y, \r.[body referencing r on point of recursion])` + - find all `Apply(\x.##, ##)` nodes in the tree and execute them + - unification **fails** if a member of the concrete tree differs (only outermost steps add to + the concrete tree so it belongs to the group and not the resolution) or no substeps are found + for a resolution step _(failure: unresolved higher kinded type)_ + - if neither of these conclusions is reached within a set number of steps, unification is + **indeterminate** which is also a failure but suggests that the same value-level operations + may be unifiable with better types. + +The time complexity of this operation is O(h no) >= O(2^n). For this reason, a two-stage limit +is recommended: one for the recursion depth which is replicable and static, and another, +configurable, time-based limit enforced by a separate thread. + +How does this interact with impls? +Idea: excluding value-universe code from type-universe execution. +Digression: Is it possible to recurse across universes? \ No newline at end of file diff --git a/src/executor/foreign.rs b/src/executor/foreign.rs new file mode 100644 index 0000000..8cb83fd --- /dev/null +++ b/src/executor/foreign.rs @@ -0,0 +1,102 @@ +use std::any::Any; +use std::fmt::{Display, Debug}; +use std::hash::Hash; + +use mappable_rc::Mrc; + +use crate::representations::typed::{Expr, Clause}; + +pub trait ExternError: Display {} + +/// Represents an externally defined function from the perspective of the executor +/// Since Orchid lacks basic numerical operations, these are also external functions. +#[derive(Eq)] +pub struct ExternFn { + name: String, param: Mrc, rttype: Mrc, + function: Mrc Result>> +} + +impl ExternFn { + pub fn new Result>>( + name: String, param: Mrc, rttype: Mrc, f: F + ) -> Self { + Self { + name, param, rttype, + function: Mrc::map(Mrc::new(f), |f| { + f as &dyn Fn(Clause) -> Result> + }) + } + } + fn name(&self) -> &str {&self.name} + fn apply(&self, arg: Clause) -> Result> {(self.function)(arg)} +} + +impl Clone for ExternFn { fn clone(&self) -> Self { Self { + name: self.name.clone(), + param: Mrc::clone(&self.param), + rttype: Mrc::clone(&self.rttype), + function: Mrc::clone(&self.function) +}}} +impl PartialEq for ExternFn { fn eq(&self, other: &Self) -> bool { self.name() == other.name() }} +impl Hash for ExternFn { + fn hash(&self, state: &mut H) { self.name.hash(state) } +} +impl Debug for ExternFn { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "##EXTERN[{}]:{:?} -> {:?}##", self.name(), self.param, self.rttype) + } +} + +pub trait Atomic: Any + Debug where Self: 'static { + fn as_any(&self) -> &dyn Any; + fn definitely_eq(&self, _other: &dyn Any) -> bool; + fn hash(&self, hasher: &mut dyn std::hash::Hasher); +} + +/// Represents a unit of information from the perspective of the executor. This may be +/// something like a file descriptor which functions can operate on, but it can also be +/// information in the universe of types or kinds such as the type of signed integers or +/// the kind of types. Ad absurdum it can also be just a number, although Literal is +/// preferable for types it's defined on. +#[derive(Eq)] +pub struct Atom { + typ: Mrc, + data: Mrc +} +impl Atom { + pub fn new(data: T, typ: Mrc) -> Self { Self{ + typ, + data: Mrc::map(Mrc::new(data), |d| d as &dyn Atomic) + } } + pub fn data(&self) -> &dyn Atomic { self.data.as_ref() as &dyn Atomic } + pub fn try_cast(&self) -> Result<&T, ()> { + self.data().as_any().downcast_ref().ok_or(()) + } + pub fn is(&self) -> bool { self.data().as_any().is::() } + pub fn cast(&self) -> &T { + self.data().as_any().downcast_ref().expect("Type mismatch on Atom::cast") + } +} + +impl Clone for Atom { + fn clone(&self) -> Self { Self { + typ: Mrc::clone(&self.typ), + data: Mrc::clone(&self.data) + } } +} +impl Hash for Atom { + fn hash(&self, state: &mut H) { + self.data.hash(state); + self.typ.hash(state) + } +} +impl Debug for Atom { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "##ATOM[{:?}]:{:?}##", self.data(), self.typ) + } +} +impl PartialEq for Atom { + fn eq(&self, other: &Self) -> bool { + self.data().definitely_eq(other.data().as_any()) + } +} \ No newline at end of file diff --git a/src/executor/mod.rs b/src/executor/mod.rs new file mode 100644 index 0000000..2631413 --- /dev/null +++ b/src/executor/mod.rs @@ -0,0 +1,3 @@ +mod foreign; +pub use foreign::ExternFn; +pub use foreign::Atom; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 3e30493..bc71a69 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,18 @@ #![feature(specialization)] -use std::{env::current_dir, process::exit}; +use std::env::current_dir; +mod executor; mod parse; mod project; mod utils; -mod expression; +mod representations; mod rule; -use expression::{Expr, Clause}; +mod types; +use file_loader::LoadingError; +pub use representations::ast; +use ast::{Expr, Clause}; +use representations::typed as t; use mappable_rc::Mrc; use project::{rule_collector, Loaded, file_loader}; use rule::Repository; @@ -34,37 +39,60 @@ export (match_sequence $lhs) >>= (match_sequence $rhs) =100=> (bind ($lhs) ($rhs fn initial_tree() -> Mrc<[Expr]> { to_mrc_slice(vec![Expr(Clause::Name { local: None, - qualified: to_mrc_slice(vec!["main".to_string(), "main".to_string()]) - }, None)]) + qualified: literal(&["main", "main"]) + }, to_mrc_slice(vec![]))]) } -fn main() { +#[allow(unused)] +fn typed_notation_debug() { + let t = t::Clause::Auto(None, + t::Clause::Lambda(Some(Mrc::new(t::Clause::Argument(0))), + t::Clause::Lambda(Some(Mrc::new(t::Clause::Argument(1))), + t::Clause::Argument(1).wrap_t(t::Clause::Argument(2)) + ).wrap() + ).wrap() + ).wrap(); + let f = t::Clause::Auto(None, + t::Clause::Lambda(Some(Mrc::new(t::Clause::Argument(0))), + t::Clause::Lambda(Some(Mrc::new(t::Clause::Argument(1))), + t::Clause::Argument(0).wrap_t(t::Clause::Argument(2)) + ).wrap() + ).wrap() + ).wrap(); + println!("{:?}", t::Clause::Apply(t::Clause::Apply(Mrc::clone(&t), t).wrap(), f)) +} + +#[allow(unused)] +fn load_project() { let cwd = current_dir().unwrap(); - let collect_rules = rule_collector(move |n| { + let collect_rules = rule_collector(move |n| -> Result { if n == literal(&["prelude"]) { Ok(Loaded::Module(PRELUDE.to_string())) } else { file_loader(cwd.clone())(n) } }, vliteral(&["...", ">>", ">>=", "[", "]", ",", "=", "=>"])); - match collect_rules.try_find(&literal(&["main"])) { - Ok(rules) => { - let mut tree = initial_tree(); - println!("Start processing {tree:?}"); - let repo = Repository::new(rules.as_ref().to_owned()); - println!("Ruleset: {repo:?}"); - let mut i = 0; loop { - if 10 <= i {break} else {i += 1} - match repo.step(Mrc::clone(&tree)) { - Ok(Some(phase)) => { - tree = phase; - println!("Step {i}: {tree:?}") - }, - Ok(None) => exit(0), - Err(e) => { - eprintln!("Rule error: {e:?}"); - exit(0) - } - } - } + let rules = match collect_rules.try_find(&literal(&["main"])) { + Ok(rules) => rules, + Err(err) => panic!("{:#?}", err) + }; + let mut tree = initial_tree(); + println!("Start processing {tree:?}"); + let repo = Repository::new(rules.as_ref().to_owned()); + println!("Ruleset: {repo:?}"); + xloop!(let mut i = 0; i < 10; i += 1; { + match repo.step(Mrc::clone(&tree)) { + Ok(Some(phase)) => { + println!("Step {i}: {phase:?}"); + tree = phase; + }, + Ok(None) => { + println!("Execution complete"); + break + }, + Err(e) => panic!("Rule error: {e:?}") } - Err(err) => println!("{:#?}", err) - } + }; println!("Macro execution didn't halt")); +} + +fn main() { + // lambda_notation_debug(); + load_project(); } diff --git a/src/parse/enum_parser.rs b/src/parse/enum_parser.rs index 375e165..6012405 100644 --- a/src/parse/enum_parser.rs +++ b/src/parse/enum_parser.rs @@ -1,3 +1,9 @@ +/// Produces parsers for tokenized sequences of enum types: +/// ```rs +/// enum_parser!(Foo::Bar | "Some error!") // Parses Foo::Bar(T) into T +/// enum_parser!(Foo::Bar) // same as above but with the default error "Expected Foo::Bar" +/// enum_parser!(Foo >> Quz; Bar, Baz) // Parses Foo::Bar(T) into Quz::Bar(T) and Foo::Baz(U) into Quz::Baz(U) +/// ``` #[macro_export] macro_rules! enum_parser { ($p:path | $m:tt) => { diff --git a/src/parse/expression.rs b/src/parse/expression.rs index aa08a5a..57cb228 100644 --- a/src/parse/expression.rs +++ b/src/parse/expression.rs @@ -1,11 +1,11 @@ use chumsky::{self, prelude::*, Parser}; -use mappable_rc::Mrc; use crate::enum_parser; -use crate::expression::{Clause, Expr, Literal}; -use crate::utils::to_mrc_slice; +use crate::representations::{Literal, ast::{Clause, Expr}}; +use crate::utils::{to_mrc_slice, one_mrc_slice}; use super::lexer::Lexeme; +/// Parses any number of expr wrapped in (), [] or {} fn sexpr_parser

( expr: P ) -> impl Parser> + Clone @@ -13,6 +13,8 @@ where P: Parser> + Clone { Lexeme::paren_parser(expr.repeated()).map(|(del, b)| Clause::S(del, to_mrc_slice(b))) } +/// Parses `\name.body` or `\name:type.body` where name is any valid name and type and body are +/// both expressions. Comments are allowed and ignored everywhere in between the tokens fn lambda_parser

( expr: P ) -> impl Parser> + Clone @@ -37,6 +39,7 @@ where P: Parser> + Clone { }) } +/// see [lambda_parser] but `@` instead of `\` and the name is optional fn auto_parser

( expr: P ) -> impl Parser> + Clone @@ -50,17 +53,22 @@ where P: Parser> + Clone { .then_ignore(enum_parser!(Lexeme::Comment).repeated()) .ignore_then(expr.clone().repeated()) .then_ignore(enum_parser!(Lexeme::Comment).repeated()) + .or_not().map(Option::unwrap_or_default) ) .then_ignore(just(Lexeme::name("."))) .then_ignore(enum_parser!(Lexeme::Comment).repeated()) .then(expr.repeated().at_least(1)) - .try_map(|((name, typ), body), s| if name.is_none() && typ.is_empty() { - Err(Simple::custom(s, "Auto without name or type has no effect")) - } else { - Ok(Clause::Auto(name, to_mrc_slice(typ), to_mrc_slice(body))) + .try_map(|((name, typ), body): ((Option, Vec), Vec), s| { + if name.is_none() && typ.is_empty() { + Err(Simple::custom(s, "Auto without name or type has no effect")) + } else { + Ok(Clause::Auto(name, to_mrc_slice(typ), to_mrc_slice(body))) + } }) } +/// Parses a sequence of names separated by ::
+/// Comments are allowed and ignored in between fn name_parser() -> impl Parser, Error = Simple> + Clone { enum_parser!(Lexeme::Name).separated_by( enum_parser!(Lexeme::Comment).repeated() @@ -69,6 +77,7 @@ fn name_parser() -> impl Parser, Error = Simple> + C ).at_least(1) } +/// Parse any legal argument name starting with a `$` fn placeholder_parser() -> impl Parser> + Clone { enum_parser!(Lexeme::Name).try_map(|name, span| { name.strip_prefix('$').map(&str::to_string) @@ -76,7 +85,7 @@ fn placeholder_parser() -> impl Parser> + }) } -/// Parse an expression without a type annotation +/// Parse an expression pub fn xpr_parser() -> impl Parser> { recursive(|expr| { let clause = @@ -102,12 +111,17 @@ pub fn xpr_parser() -> impl Parser> { }), sexpr_parser(expr.clone()), lambda_parser(expr.clone()), - auto_parser(expr.clone()) + auto_parser(expr.clone()), + just(Lexeme::At).to(Clause::Name { + local: Some("@".to_string()), + qualified: one_mrc_slice("@".to_string()) + }) ))).then_ignore(enum_parser!(Lexeme::Comment).repeated()); clause.clone().then( just(Lexeme::Type) - .ignore_then(expr.clone()).or_not() + .ignore_then(clause.clone()) + .repeated() ) - .map(|(val, typ)| Expr(val, typ.map(Mrc::new))) + .map(|(val, typ)| Expr(val, to_mrc_slice(typ))) }).labelled("Expression") } diff --git a/src/parse/import.rs b/src/parse/import.rs index 0b093b2..4f7d509 100644 --- a/src/parse/import.rs +++ b/src/parse/import.rs @@ -10,6 +10,7 @@ use super::lexer::Lexeme; #[derive(Debug, Clone)] pub struct Import { pub path: Mrc<[String]>, + /// If name is None, this is a wildcard import pub name: Option } diff --git a/src/parse/name.rs b/src/parse/name.rs index 8d50790..da5be3c 100644 --- a/src/parse/name.rs +++ b/src/parse/name.rs @@ -19,6 +19,7 @@ fn op_parser<'a, T: AsRef + Clone>(ops: &[T]) -> BoxedParser<'a, char, Stri /// - `"` and `'` are read as primitives and would never match. /// - `(` and `)` are strictly balanced and this must remain the case for automation and streaming. /// - `.` is the discriminator for parametrics. +/// - ',' is always a standalone single operator, so it can never be part of a name /// /// FIXME: `@name` without a dot should be parsed correctly for overrides. Could be an operator but /// then parametrics should take precedence, which might break stuff. investigate. diff --git a/src/parse/parse.rs b/src/parse/parse.rs index 3edf305..1117c6d 100644 --- a/src/parse/parse.rs +++ b/src/parse/parse.rs @@ -4,7 +4,7 @@ use chumsky::{prelude::{Simple, end}, Stream, Parser}; use itertools::Itertools; use thiserror::Error; -use crate::{expression::Rule, parse::lexer::LexedText}; +use crate::{ast::Rule, parse::lexer::LexedText}; use super::{Lexeme, FileEntry, lexer, line_parser, LexerEntry}; diff --git a/src/parse/sourcefile.rs b/src/parse/sourcefile.rs index 217c1e9..82122d6 100644 --- a/src/parse/sourcefile.rs +++ b/src/parse/sourcefile.rs @@ -1,7 +1,7 @@ use std::collections::HashSet; use crate::{enum_parser, box_chain}; -use crate::expression::{Expr, Clause, Rule}; +use crate::ast::{Expr, Clause, Rule}; use crate::utils::to_mrc_slice; use crate::utils::Stackframe; use crate::utils::iter::box_empty; @@ -74,8 +74,8 @@ fn visit_all_names_expr_recur<'a, F>( ) where F: FnMut(&'a [String]) { let Expr(val, typ) = expr; visit_all_names_clause_recur(val, binds.clone(), cb); - if let Some(t) = typ { - visit_all_names_expr_recur(t, binds, cb) + for typ in typ.as_ref() { + visit_all_names_clause_recur(typ, binds.clone(), cb); } } diff --git a/src/project/mod.rs b/src/project/mod.rs index 3954c6a..d410f33 100644 --- a/src/project/mod.rs +++ b/src/project/mod.rs @@ -5,6 +5,6 @@ mod name_resolver; mod loaded; pub use loaded::Loaded; mod module_error; -mod file_loader; +pub mod file_loader; pub use file_loader::file_loader; -use crate::expression::Rule; \ No newline at end of file +use crate::ast::Rule; \ No newline at end of file diff --git a/src/project/name_resolver.rs b/src/project/name_resolver.rs index 0ce2f50..7176ec6 100644 --- a/src/project/name_resolver.rs +++ b/src/project/name_resolver.rs @@ -4,7 +4,7 @@ use thiserror::Error; use crate::utils::{Stackframe, to_mrc_slice}; -use crate::expression::{Expr, Clause}; +use crate::ast::{Expr, Clause}; type ImportMap = HashMap>; @@ -50,14 +50,16 @@ where symbol: Mrc<[String]>, import_path: Stackframe> ) -> Result, ResolutionError> { - if let Some(cached) = self.cache.get(&symbol) { return cached.as_ref().map_err(|e| e.clone()).map(Mrc::clone) } + if let Some(cached) = self.cache.get(&symbol) { + return cached.as_ref().map_err(|e| e.clone()).map(Mrc::clone) + } // The imports and path of the referenced file and the local name let path = (self.get_modname)(Mrc::clone(&symbol)).ok_or_else(|| { ResolutionError::NoModule(Mrc::clone(&symbol)) })?; let name = &symbol[path.len()..]; if name.is_empty() { - panic!("Something's really broken\n{:?}", import_path) + panic!("get_modname matched all to module and nothing to name in {:?}", import_path) } let imports = (self.get_imports)(Mrc::clone(&path))?; let result = if let Some(source) = imports.get(&name[0]) { @@ -110,7 +112,7 @@ where fn process_expression_rec(&mut self, Expr(token, typ): &Expr) -> Result> { Ok(Expr( self.process_clause_rec(token)?, - self.process_exprmrcopt_rec(typ)? + typ.iter().map(|t| self.process_clause_rec(t)).collect::>()? )) } diff --git a/src/project/prefix.rs b/src/project/prefix.rs index 6185055..0e94c6b 100644 --- a/src/project/prefix.rs +++ b/src/project/prefix.rs @@ -1,6 +1,6 @@ use mappable_rc::Mrc; -use crate::{expression::{Expr, Clause}, utils::collect_to_mrc}; +use crate::{ast::{Expr, Clause}, utils::{collect_to_mrc, to_mrc_slice}}; /// Replaces the first element of a name with the matching prefix from a prefix map @@ -36,6 +36,6 @@ fn prefix_clause( pub fn prefix_expr(Expr(clause, typ): &Expr, namespace: Mrc<[String]>) -> Expr { Expr( prefix_clause(clause, Mrc::clone(&namespace)), - typ.as_ref().map(|e| Mrc::new(prefix_expr(e, namespace))) + to_mrc_slice(typ.iter().map(|e| prefix_clause(e, Mrc::clone(&namespace))).collect()) ) } diff --git a/src/project/rule_collector.rs b/src/project/rule_collector.rs index a9db930..472db1f 100644 --- a/src/project/rule_collector.rs +++ b/src/project/rule_collector.rs @@ -5,7 +5,7 @@ use std::rc::Rc; use mappable_rc::Mrc; -use crate::expression::Rule; +use crate::ast::Rule; use crate::parse::{self, FileEntry}; use crate::utils::{Cache, mrc_derive, to_mrc_slice}; @@ -40,6 +40,7 @@ where (load_mod_rc.borrow_mut())(path).map_err(ModuleError::Load) })); // Map names to the longest prefix that points to a valid module + // At least one segment must be in the prefix, and the prefix must not be the whole name let modname = Rc::new(Cache::new({ let loaded = Rc::clone(&loaded); move |symbol: Mrc<[String]>, _| -> Result, Vec>> { @@ -50,7 +51,7 @@ where else { Ok(()) } }; loop { - let path = mrc_derive(&symbol, |s| &s[..s.len() - errv.len()]); + let path = mrc_derive(&symbol, |s| &s[..s.len() - errv.len() - 1]); match loaded.try_find(&path) { Ok(imports) => match imports.as_ref() { Loaded::Module(_) => break Ok(path), diff --git a/src/expression.rs b/src/representations/ast.rs similarity index 80% rename from src/expression.rs rename to src/representations/ast.rs index 6531d95..0ddd4f5 100644 --- a/src/expression.rs +++ b/src/representations/ast.rs @@ -1,35 +1,19 @@ use mappable_rc::Mrc; use itertools::Itertools; use ordered_float::NotNan; +use std::hash::Hash; use std::fmt::Debug; +use crate::executor::{ExternFn, Atom}; -/// An exact value -#[derive(Clone, PartialEq, Eq, Hash)] -pub enum Literal { - Num(NotNan), - Int(u64), - Char(char), - Str(String), -} - -impl Debug for Literal { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::Num(arg0) => write!(f, "{:?}", arg0), - Self::Int(arg0) => write!(f, "{:?}", arg0), - Self::Char(arg0) => write!(f, "{:?}", arg0), - Self::Str(arg0) => write!(f, "{:?}", arg0), - } - } -} +use super::Literal; /// An S-expression with a type #[derive(PartialEq, Eq, Hash)] -pub struct Expr(pub Clause, pub Option>); +pub struct Expr(pub Clause, pub Mrc<[Clause]>); impl Clone for Expr { fn clone(&self) -> Self { - Self(self.0.clone(), self.1.as_ref().map(Mrc::clone)) + Self(self.0.clone(), Mrc::clone(&self.1)) } } @@ -37,8 +21,10 @@ impl Debug for Expr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let Expr(val, typ) = self; write!(f, "{:?}", val)?; - if let Some(typ) = typ { write!(f, "{:?}", typ) } - else { Ok(()) } + for typ in typ.as_ref() { + write!(f, ":{:?}", typ)? + } + Ok(()) } } @@ -53,13 +39,14 @@ pub enum Clause { S(char, Mrc<[Expr]>), Lambda(String, Mrc<[Expr]>, Mrc<[Expr]>), Auto(Option, Mrc<[Expr]>, Mrc<[Expr]>), - /// Second parameter: - /// None => matches one token - /// Some((prio, nonzero)) => - /// prio is the sizing priority for the vectorial (higher prio grows first) - /// nonzero is whether the vectorial matches 1..n or 0..n tokens + ExternFn(ExternFn), + Atom(Atom), Placeh{ key: String, + /// None => matches one token + /// Some((prio, nonzero)) => + /// prio is the sizing priority for the vectorial (higher prio grows first) + /// nonzero is whether the vectorial matches 1..n or 0..n tokens vec: Option<(usize, bool)> }, } @@ -94,7 +81,9 @@ impl Clone for Clause { n.clone(), Mrc::clone(t), Mrc::clone(b) ), Clause::Placeh{key, vec} => Clause::Placeh{key: key.clone(), vec: *vec}, - Clause::Literal(l) => Clause::Literal(l.clone()) + Clause::Literal(l) => Clause::Literal(l.clone()), + Clause::ExternFn(nc) => Clause::ExternFn(nc.clone()), + Clause::Atom(a) => Clause::Atom(a.clone()) } } } @@ -136,7 +125,9 @@ impl Debug for Clause { }, Self::Placeh{key, vec: None} => write!(f, "${key}"), Self::Placeh{key, vec: Some((prio, true))} => write!(f, "...${key}:{prio}"), - Self::Placeh{key, vec: Some((prio, false))} => write!(f, "..${key}:{prio}") + Self::Placeh{key, vec: Some((prio, false))} => write!(f, "..${key}:{prio}"), + Self::ExternFn(nc) => write!(f, "{nc:?}"), + Self::Atom(a) => write!(f, "{a:?}") } } } @@ -163,4 +154,4 @@ impl Debug for Rule { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:?} ={}=> {:?}", self.source, self.prio, self.target) } -} +} \ No newline at end of file diff --git a/src/representations/ast_to_typed.rs b/src/representations/ast_to_typed.rs new file mode 100644 index 0000000..b61234c --- /dev/null +++ b/src/representations/ast_to_typed.rs @@ -0,0 +1,120 @@ +use mappable_rc::Mrc; + +use crate::utils::{Stackframe, to_mrc_slice}; + +use super::{ast, typed}; + +#[derive(Clone)] +pub enum Error { + /// `()` as a clause is meaningless in lambda calculus + EmptyS, + /// Only `(...)` may be converted to typed lambdas. `[...]` and `{...}` left in the code are + /// signs of incomplete macro execution + BadGroup(char), + /// `foo:bar:baz` will be parsed as `(foo:bar):baz`, explicitly specifying `foo:(bar:baz)` + /// is forbidden and it's also meaningless since `baz` can only ever be the kind of types + ExplicitBottomKind, + /// Name never bound in an enclosing scope - indicates incomplete macro substitution + Unbound(String), + /// Namespaced names can never occur in the code, these are signs of incomplete macro execution + Symbol, + /// Placeholders shouldn't even occur in the code during macro execution. Something is clearly + /// terribly wrong + Placeholder, + /// It's possible to try and transform the clause `(foo:bar)` into a typed clause, + /// however the correct value of this ast clause is a typed expression (included in the error) + /// + /// [expr] handles this case, so it's only really possible to get this + /// error if you're calling [clause] directly + ExprToClause(typed::Expr) +} + +/// Try to convert an expression from AST format to typed lambda +pub fn expr(expr: &ast::Expr) -> Result { + expr_rec(expr, Stackframe::new(None)) +} + +/// Try and convert a single clause from AST format to typed lambda +pub fn clause(clause: &ast::Clause) -> Result { + clause_rec(clause, Stackframe::new(None)) +} + +/// Try and convert a sequence of expressions from AST format to typed lambda +pub fn exprv(exprv: &[ast::Expr]) -> Result { + exprv_rec(exprv, Stackframe::new(None)) +} + +/// Recursive state of [exprv] +fn exprv_rec(v: &[ast::Expr], names: Stackframe>) -> Result { + if v.len() == 0 {return Err(Error::EmptyS)} + if v.len() == 1 {return expr_rec(&v[0], names)} + let (head, tail) = v.split_at(2); + let f = expr_rec(&head[0], names)?; + let x = expr_rec(&head[1], names)?; + // TODO this could probably be normalized, it's a third copy. + tail.iter().map(|e| expr_rec(e, names)).fold( + Ok(typed::Clause::Apply(Mrc::new(f), Mrc::new(x))), + |acc, e| Ok(typed::Clause::Apply( + Mrc::new(typed::Expr(acc?, to_mrc_slice(vec![]))), + Mrc::new(e?) + )) + ).map(|cls| typed::Expr(cls, to_mrc_slice(vec![]))) +} + +/// Recursive state of [expr] +fn expr_rec(ast::Expr(val, typ): &ast::Expr, names: Stackframe>) +-> Result { + let typ: Vec = typ.iter() + .map(|c| clause_rec(c, names)) + .collect::>()?; + if let ast::Clause::S(paren, body) = val { + if *paren != '(' {return Err(Error::BadGroup(*paren))} + let typed::Expr(inner, inner_t) = exprv_rec(body.as_ref(), names)?; + let new_t = if typ.len() == 0 { inner_t } else { + to_mrc_slice(if inner_t.len() == 0 { typ } else { + inner_t.iter().chain(typ.iter()).cloned().collect() + }) + }; + Ok(typed::Expr(inner, new_t)) + } else { + Ok(typed::Expr(clause_rec(&val, names)?, to_mrc_slice(typ))) + } +} + +/// Recursive state of [clause] +fn clause_rec(cls: &ast::Clause, names: Stackframe>) +-> Result { + match cls { + ast::Clause::ExternFn(e) => Ok(typed::Clause::ExternFn(e.clone())), + ast::Clause::Atom(a) => Ok(typed::Clause::Atom(a.clone())), + ast::Clause::Auto(no, t, b) => Ok(typed::Clause::Auto( + if t.len() == 0 {None} else { + let typed::Expr(c, t) = exprv_rec(t.as_ref(), names)?; + if t.len() > 0 {return Err(Error::ExplicitBottomKind)} + else {Some(Mrc::new(c))} + }, + Mrc::new(exprv_rec(b.as_ref(), names.push(no.as_ref().map(|n| &**n)))?) + )), + ast::Clause::Lambda(n, t, b) => Ok(typed::Clause::Lambda( + if t.len() == 0 {None} else { + let typed::Expr(c, t) = exprv_rec(t.as_ref(), names)?; + if t.len() > 0 {return Err(Error::ExplicitBottomKind)} + else {Some(Mrc::new(c))} + }, + Mrc::new(exprv_rec(b.as_ref(), names.push(Some(&**n)))?) + )), + ast::Clause::Literal(l) => Ok(typed::Clause::Literal(l.clone())), + ast::Clause::Name { local: Some(arg), .. } => Ok(typed::Clause::Argument( + names.iter().position(|no| no == &Some(&**arg)) + .ok_or_else(|| Error::Unbound(arg.clone()))? + )), + ast::Clause::S(paren, entries) => { + if *paren != '(' {return Err(Error::BadGroup(*paren))} + let typed::Expr(val, typ) = exprv_rec(entries.as_ref(), names)?; + if typ.len() == 0 {Ok(val)} + else {Err(Error::ExprToClause(typed::Expr(val, typ)))} + }, + ast::Clause::Name { local: None, .. } => Err(Error::Symbol), + ast::Clause::Placeh { .. } => Err(Error::Placeholder) + } +} \ No newline at end of file diff --git a/src/representations/literal.rs b/src/representations/literal.rs new file mode 100644 index 0000000..6286f4b --- /dev/null +++ b/src/representations/literal.rs @@ -0,0 +1,22 @@ +use ordered_float::NotNan; +use std::fmt::Debug; + +/// An exact value, read from the AST and unmodified in shape until compilation +#[derive(Clone, PartialEq, Eq, Hash)] +pub enum Literal { + Num(NotNan), + Int(u64), + Char(char), + Str(String), +} + +impl Debug for Literal { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Num(arg0) => write!(f, "{:?}", arg0), + Self::Int(arg0) => write!(f, "{:?}", arg0), + Self::Char(arg0) => write!(f, "{:?}", arg0), + Self::Str(arg0) => write!(f, "{:?}", arg0), + } + } +} \ No newline at end of file diff --git a/src/representations/mod.rs b/src/representations/mod.rs new file mode 100644 index 0000000..ccabe32 --- /dev/null +++ b/src/representations/mod.rs @@ -0,0 +1,5 @@ +pub mod ast; +pub mod typed; +pub mod literal; +pub mod ast_to_typed; +pub use literal::Literal; diff --git a/src/representations/typed.rs b/src/representations/typed.rs new file mode 100644 index 0000000..2eb7c9b --- /dev/null +++ b/src/representations/typed.rs @@ -0,0 +1,147 @@ +use mappable_rc::Mrc; +use crate::executor::Atom; +use crate::utils::{to_mrc_slice, one_mrc_slice}; +use crate::{executor::ExternFn, utils::string_from_charset}; + +use super::{Literal, ast_to_typed}; +use super::ast; + +use std::fmt::{Debug, Write}; + +/// Indicates whether either side needs to be wrapped. Syntax whose end is ambiguous on that side +/// must use parentheses, or forward the flag +#[derive(PartialEq, Eq)] +struct Wrap(bool, bool); + +#[derive(PartialEq, Eq, Hash)] +pub struct Expr(pub Clause, pub Mrc<[Clause]>); +impl Expr { + fn deep_fmt(&self, f: &mut std::fmt::Formatter<'_>, depth: usize, tr: Wrap) -> std::fmt::Result { + let Expr(val, typ) = self; + if typ.len() > 0 { + val.deep_fmt(f, depth, Wrap(true, true))?; + for typ in typ.as_ref() { + f.write_char(':')?; + typ.deep_fmt(f, depth, Wrap(true, true))?; + } + } else { + val.deep_fmt(f, depth, tr)?; + } + Ok(()) + } +} + +impl Clone for Expr { + fn clone(&self) -> Self { + Self(self.0.clone(), Mrc::clone(&self.1)) + } +} + +impl Debug for Expr { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.deep_fmt(f, 0, Wrap(false, false)) + } +} + +#[derive(PartialEq, Eq, Hash)] +pub enum Clause { + Literal(Literal), + Apply(Mrc, Mrc), + /// Explicit specification of an Auto value + Explicit(Mrc, Mrc), + Lambda(Option>, Mrc), + Auto(Option>, Mrc), + Argument(usize), + ExternFn(ExternFn), + Atom(Atom) +} + +const ARGNAME_CHARSET: &str = "abcdefghijklmnopqrstuvwxyz"; + +fn parametric_fmt( + f: &mut std::fmt::Formatter<'_>, + prefix: &str, argtyp: Option>, body: Mrc, depth: usize, wrap_right: bool +) -> std::fmt::Result { + if wrap_right { f.write_char('(')?; } + f.write_str(prefix)?; + f.write_str(&string_from_charset(depth, ARGNAME_CHARSET))?; + if let Some(typ) = argtyp { + f.write_str(":")?; + typ.deep_fmt(f, depth, Wrap(false, false))?; + } + f.write_str(".")?; + body.deep_fmt(f, depth + 1, Wrap(false, false))?; + if wrap_right { f.write_char(')')?; } + Ok(()) +} + +impl Clause { + fn deep_fmt(&self, f: &mut std::fmt::Formatter<'_>, depth: usize, Wrap(wl, wr): Wrap) + -> std::fmt::Result { + match self { + Self::Literal(arg0) => write!(f, "{arg0:?}"), + Self::ExternFn(nc) => write!(f, "{nc:?}"), + Self::Atom(a) => write!(f, "{a:?}"), + Self::Lambda(argtyp, body) => parametric_fmt(f, + "\\", argtyp.as_ref().map(Mrc::clone), Mrc::clone(body), depth, wr + ), + Self::Auto(argtyp, body) => parametric_fmt(f, + "@", argtyp.as_ref().map(Mrc::clone), Mrc::clone(body), depth, wr + ), + Self::Argument(up) => f.write_str(&string_from_charset(depth - up - 1, ARGNAME_CHARSET)), + Self::Explicit(expr, param) => { + if wl { f.write_char('(')?; } + expr.deep_fmt(f, depth, Wrap(false, true))?; + f.write_str(" @")?; + param.deep_fmt(f, depth, Wrap(true, wr && !wl))?; + if wl { f.write_char(')')?; } + Ok(()) + } + Self::Apply(func, x) => { + if wl { f.write_char('(')?; } + func.deep_fmt(f, depth, Wrap(false, true) )?; + f.write_char(' ')?; + x.deep_fmt(f, depth, Wrap(true, wr && !wl) )?; + if wl { f.write_char(')')?; } + Ok(()) + } + } + } + pub fn wrap(self) -> Mrc { Mrc::new(Expr(self, to_mrc_slice(vec![]))) } + pub fn wrap_t(self, t: Clause) -> Mrc { Mrc::new(Expr(self, one_mrc_slice(t))) } +} + +impl Clone for Clause { + fn clone(&self) -> Self { + match self { + Clause::Auto(t, b) => Clause::Auto(t.as_ref().map(Mrc::clone), Mrc::clone(b)), + Clause::Lambda(t, b) => Clause::Lambda(t.as_ref().map(Mrc::clone), Mrc::clone(b)), + Clause::Literal(l) => Clause::Literal(l.clone()), + Clause::ExternFn(nc) => Clause::ExternFn(nc.clone()), + Clause::Atom(a) => Clause::Atom(a.clone()), + Clause::Apply(f, x) => Clause::Apply(Mrc::clone(f), Mrc::clone(x)), + Clause::Explicit(f, x) => Clause::Explicit(Mrc::clone(f), Mrc::clone(x)), + Clause::Argument(lvl) => Clause::Argument(*lvl) + } + } +} + +impl Debug for Clause { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.deep_fmt(f, 0, Wrap(false, false)) + } +} + +impl TryFrom<&ast::Expr> for Expr { + type Error = ast_to_typed::Error; + fn try_from(value: &ast::Expr) -> Result { + ast_to_typed::expr(value) + } +} + +impl TryFrom<&ast::Clause> for Clause { + type Error = ast_to_typed::Error; + fn try_from(value: &ast::Clause) -> Result { + ast_to_typed::clause(value) + } +} \ No newline at end of file diff --git a/src/rule/executor/execute.rs b/src/rule/executor/execute.rs index c1d06bc..e1dee7f 100644 --- a/src/rule/executor/execute.rs +++ b/src/rule/executor/execute.rs @@ -1,7 +1,7 @@ use hashbrown::HashMap; use mappable_rc::Mrc; -use crate::{expression::{Expr, Clause}, utils::{iter::{box_once, into_boxed_iter}, to_mrc_slice, one_mrc_slice}}; +use crate::{ast::{Expr, Clause}, utils::{iter::{box_once, into_boxed_iter}, to_mrc_slice, one_mrc_slice}, unwrap_or}; use super::{super::RuleError, state::{State, Entry}, slice_matcher::SliceMatcherDnC}; fn verify_scalar_vec(pattern: &Expr, is_vec: &mut HashMap) @@ -36,18 +36,18 @@ fn verify_scalar_vec(pattern: &Expr, is_vec: &mut HashMap) }; Ok(()) }; - let Expr(val, typ_opt) = pattern; + let Expr(val, typ) = pattern; verify_clause(val, is_vec)?; - if let Some(typ) = typ_opt { - verify_scalar_vec(typ, is_vec)?; + for typ in typ.as_ref() { + verify_clause(typ, is_vec)?; } Ok(()) } fn slice_to_vec(src: &mut Mrc<[Expr]>, tgt: &mut Mrc<[Expr]>) { - let prefix_expr = Expr(Clause::Placeh{key: "::prefix".to_string(), vec: Some((0, false))}, None); - let postfix_expr = Expr(Clause::Placeh{key: "::postfix".to_string(), vec: Some((0, false))}, None); + let prefix_expr = Expr(Clause::Placeh{key: "::prefix".to_string(), vec: Some((0, false))}, to_mrc_slice(vec![])); + let postfix_expr = Expr(Clause::Placeh{key: "::postfix".to_string(), vec: Some((0, false))}, to_mrc_slice(vec![])); // Prefix or postfix to match the full vector let head_multi = matches!(src.first().expect("Src can never be empty!").0, Clause::Placeh{vec: Some(_), ..}); let tail_multi = matches!(src.last().expect("Impossible branch!").0, Clause::Placeh{vec: Some(_), ..}); @@ -121,13 +121,13 @@ fn write_slice(state: &State, tpl: &Mrc<[Expr]>) -> Mrc<[Expr]> { write_slice(state, body) ), xpr_typ.to_owned())), Clause::Placeh{key, vec: None} => { - let real_key = if let Some(real_key) = key.strip_prefix('_') {real_key} else {key}; + let real_key = unwrap_or!(key.strip_prefix('_'); key); match &state[real_key] { Entry::Scalar(x) => box_once(x.as_ref().to_owned()), Entry::Name(n) => box_once(Expr(Clause::Name { local: Some(n.as_ref().to_owned()), qualified: one_mrc_slice(n.as_ref().to_owned()) - }, None)), + }, to_mrc_slice(vec![]))), _ => panic!("Scalar template may only be derived from scalar placeholder"), } }, @@ -135,7 +135,7 @@ fn write_slice(state: &State, tpl: &Mrc<[Expr]>) -> Mrc<[Expr]> { into_boxed_iter(v.as_ref().to_owned()) } else {panic!("Vectorial template may only be derived from vectorial placeholder")}, // Explicit base case so that we get an error if Clause gets new values - c@Clause::Literal(_) | c@Clause::Name { .. } => + c@Clause::Literal(_) | c@Clause::Name { .. } | c@Clause::ExternFn(_) | c@Clause::Atom(_) => box_once(Expr(c.to_owned(), xpr_typ.to_owned())) }).collect() } diff --git a/src/rule/executor/slice_matcher.rs b/src/rule/executor/slice_matcher.rs index a85bcd8..78ae783 100644 --- a/src/rule/executor/slice_matcher.rs +++ b/src/rule/executor/slice_matcher.rs @@ -2,8 +2,8 @@ use std::fmt::Debug; use mappable_rc::Mrc; -use crate::expression::{Expr, Clause}; -use crate::unwrap_or_continue; +use crate::ast::{Expr, Clause}; +use crate::unwrap_or; use crate::utils::iter::box_empty; use crate::utils::{Side, Cache, mrc_derive, mrc_try_derive, to_mrc_slice}; @@ -92,10 +92,7 @@ impl SliceMatcherDnC { pub fn valid_subdivisions(&self, range: Mrc<[Expr]> ) -> impl Iterator, Mrc<[Expr]>, Mrc<[Expr]>)> { - let own_max = { - if let Some(x) = self.own_max_size(range.len()) {x} - else {return box_empty()} - }; + let own_max = unwrap_or!(self.own_max_size(range.len()); return box_empty()); let own_min = self.own_min_size(); let lmin = self.min(Side::Left); let _lmax = self.max(Side::Left, range.len()); @@ -261,10 +258,11 @@ impl SliceMatcherDnC { // Step through valid slicings based on reported size constraints in order // from longest own section to shortest and from left to right for (left, own, right) in self.valid_subdivisions(target) { - return Some(unwrap_or_continue!( + return Some(unwrap_or!( self.apply_side_with_cache(Side::Left, left, cache) .and_then(|lres| lres + self.apply_side_with_cache(Side::Right, right, cache)) - .and_then(|side_res| side_res.insert_vec(name, own.as_ref())) + .and_then(|side_res| side_res.insert_vec(name, own.as_ref())); + continue )) } None diff --git a/src/rule/executor/split_at_max_vec.rs b/src/rule/executor/split_at_max_vec.rs index 8396c53..5a19235 100644 --- a/src/rule/executor/split_at_max_vec.rs +++ b/src/rule/executor/split_at_max_vec.rs @@ -1,7 +1,7 @@ use mappable_rc::Mrc; use itertools::Itertools; -use crate::expression::{Expr, Clause}; +use crate::ast::{Expr, Clause}; use crate::utils::{mrc_derive, mrc_try_derive}; pub type MaxVecSplit = (Mrc<[Expr]>, (Mrc, usize, bool), Mrc<[Expr]>); diff --git a/src/rule/executor/state.rs b/src/rule/executor/state.rs index 7714566..44644e3 100644 --- a/src/rule/executor/state.rs +++ b/src/rule/executor/state.rs @@ -2,7 +2,7 @@ use std::{ops::{Add, Index}, rc::Rc, fmt::Debug}; use hashbrown::HashMap; -use crate::expression::Expr; +use crate::ast::Expr; #[derive(Debug, PartialEq, Eq)] pub enum Entry { @@ -76,7 +76,9 @@ impl State { { if let Some(old) = self.0.get(k.as_ref()) { if let Entry::NameOpt(val) = old { - if val.as_ref().map(|s| s.as_ref().as_str()) != v.map(|s| s.as_ref()) {return None} + if val.as_ref().map(|s| s.as_ref().as_str()) != v.map(|s| s.as_ref()) { + return None + } } else {return None} } else { self.0.insert(k.to_string(), Entry::NameOpt(v.map(|s| Rc::new(s.to_string())))); @@ -142,4 +144,4 @@ impl Debug for State { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{:?}", self.0) } -} +} \ No newline at end of file diff --git a/src/rule/repository.rs b/src/rule/repository.rs index 78e8c85..7811dd4 100644 --- a/src/rule/repository.rs +++ b/src/rule/repository.rs @@ -2,10 +2,11 @@ use std::fmt::Debug; use mappable_rc::Mrc; -use crate::expression::Expr; +use crate::representations::ast::Expr; -use super::{super::expression::Rule, executor::execute, RuleError}; +use super::{super::ast::Rule, executor::execute, RuleError}; +/// Manages a priority queue of substitution rules and allows to apply them pub struct Repository(Vec); impl Repository { pub fn new(mut rules: Vec) -> Self { @@ -13,6 +14,7 @@ impl Repository { Self(rules) } + /// Attempt to run each rule in priority order once pub fn step(&self, mut code: Mrc<[Expr]>) -> Result>, RuleError> { let mut ran_once = false; for rule in self.0.iter() { @@ -27,11 +29,16 @@ impl Repository { Ok(if ran_once {Some(code)} else {None}) } - pub fn long_step(&self, mut code: Mrc<[Expr]>) -> Result, RuleError> { + /// Attempt to run each rule in priority order `limit` times. Returns the final + /// tree and the number of iterations left to the limit. + pub fn long_step(&self, mut code: Mrc<[Expr]>, mut limit: usize) + -> Result<(Mrc<[Expr]>, usize), RuleError> { while let Some(tmp) = self.step(Mrc::clone(&code))? { + if 0 >= limit {break} + limit -= 1; code = tmp } - Ok(code) + Ok((code, limit)) } } diff --git a/src/types/hindley_milner.rs.proto b/src/types/hindley_milner.rs.proto new file mode 100644 index 0000000..3bbf187 --- /dev/null +++ b/src/types/hindley_milner.rs.proto @@ -0,0 +1,52 @@ +use std::{borrow::Borrow}; +use std::hash::Hash; + +use hashbrown::HashMap; +use mappable_rc::Mrc; + +use crate::{ast::{Expr, Clause}, utils::mrc_to_iter}; + +pub struct Substitution(HashMap>); +impl Substitution { + fn new() -> Self { Self(HashMap::new()) } + fn apply(&self, q: &Q) -> Option> + where String: Borrow { + self.0.get(q).map(Mrc::clone) + } +} + +pub fn hindley_milner(a: Mrc<[Expr]>, b: Mrc<[Expr]>) -> Result { + hindley_milner_rec(Substitution::new(), a, b) +} + +pub fn hindley_milner_rec(mut s: Substitution, a: Mrc<[Expr]>, b: Mrc<[Expr]>) +-> Result { + if a.len() != b.len() {return Err(())} + for (mut a, mut b) in mrc_to_iter(a).zip(mrc_to_iter(b)) { + if let Clause::Placeh{key, ..} = &a.0 { + if let Some(ex) = s.apply(key) { a = ex } + } + if let Clause::Placeh{key, ..} = &b.0 { + if let Some(ex) = s.apply(key) { b = ex } + } + if !matches!(&a.0, Clause::Placeh{..}) { (a, b) = (b, a) } + match (&a.0, &b.0) { + (Clause::Placeh{key:a_key,..}, Clause::Placeh{key:b_key,..}) => + if a_key == b_key {return Ok(s)}, + + _ => return Err(()) + } + if let (Clause::Placeh{key: a_key,..}, Clause::Placeh{key: b_key,..}) = (&a.0, &b.0) { + if a_key == b_key {return Ok(s)} + } else if let (Clause::S(_, a_body), Clause::S(_, b_body)) = (&a.0, &b.0) { + s = hindley_milner_rec(s, Mrc::clone(a_body), Mrc::clone(b_body))? + } else if let () + } + Ok(s) +} + +pub fn occurs(key: &str, val: &Expr) -> bool { + match val.0 { + Clause::Auto(_, _, body) => body. + } +} \ No newline at end of file diff --git a/src/types/mod.rs b/src/types/mod.rs new file mode 100644 index 0000000..1533102 --- /dev/null +++ b/src/types/mod.rs @@ -0,0 +1,13 @@ +// mod hindley_milner; + +#[derive(Clone, Hash, PartialEq, Eq)] +pub enum Expression { + Literal(L), + Variable(V), + Operation(O, Vec>), + Lazy(F) +} + +pub struct Rule { + +} \ No newline at end of file diff --git a/src/types/unifier.rs b/src/types/unifier.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/utils/bfs.rs b/src/utils/bfs.rs new file mode 100644 index 0000000..f61436a --- /dev/null +++ b/src/utils/bfs.rs @@ -0,0 +1,113 @@ +use std::collections::{VecDeque, HashSet}; +use std::iter; +use std::hash::Hash; + +use crate::unwrap_or; +use crate::utils::BoxedIter; + +/// Two-stage breadth-first search; +/// Instead of enumerating neighbors before returning a node, it puts visited but not yet +/// enumerated nodes in a separate queue and only enumerates them to refill the queue of children +/// one by one once it's empty. This method is preferable for generated graphs because it doesn't +/// allocate memory for the children until necessary, but it's also probably a bit slower since +/// it involves additional processing. +/// +/// # Performance +/// `T` is cloned twice for each returned value. +pub fn bfs(init: T, neighbors: F) +-> impl Iterator +where T: Eq + Hash + Clone + std::fmt::Debug, + F: Fn(T) -> I, I: Iterator +{ + let mut visited: HashSet = HashSet::new(); + let mut visit_queue: VecDeque = VecDeque::from([init]); + let mut unpack_queue: VecDeque = VecDeque::new(); + iter::from_fn(move || { + let next = {loop { + let next = unwrap_or!(visit_queue.pop_front(); break None); + if !visited.contains(&next) { break Some(next) } + }}.or_else(|| loop { + let unpacked = unwrap_or!(unpack_queue.pop_front(); break None); + let mut nbv = neighbors(unpacked).filter(|t| !visited.contains(t)); + if let Some(next) = nbv.next() { + visit_queue.extend(nbv); + break Some(next) + } + })?; + visited.insert(next.clone()); + unpack_queue.push_back(next.clone()); + Some(next) + }) +} + +/// Same as [bfs] but with a recursion depth limit +/// +/// The main intent is to effectively walk infinite graphs of unknown breadth without making the +/// recursion depth dependent on the number of nodes. If predictable runtime is more important +/// than predictable depth, [bfs] with [std::iter::Iterator::take] should be used instead +pub fn bfs_upto<'a, T: 'a, F: 'a, I: 'a>(init: T, neighbors: F, limit: usize) +-> impl Iterator + 'a +where T: Eq + Hash + Clone + std::fmt::Debug, + F: Fn(T) -> I, I: Iterator +{ + /// Newtype to store the recursion depth but exclude it from equality comparisons + /// Because BFS visits nodes in increasing distance order, when a node is visited for the + /// second time it will never override the earlier version of itself. This is not the case + /// with Djikstra's algorithm, which can be conceptualised as a "weighted BFS". + #[derive(Eq, Clone, Debug)] + struct Wrap(usize, U); + impl PartialEq for Wrap { + fn eq(&self, other: &Self) -> bool { self.1.eq(&other.1) } + } + impl Hash for Wrap { + fn hash(&self, state: &mut H) { self.1.hash(state) } + } + bfs(Wrap(0, init), move |Wrap(dist, t)| -> BoxedIter> { // boxed because we branch + if dist == limit {Box::new(iter::empty())} + else {Box::new(neighbors(t).map(move |t| Wrap(dist + 1, t)))} + }).map(|Wrap(_, t)| t) +} + +#[cfg(test)] +mod tests { + use itertools::Itertools; + + use super::*; + + type Graph = Vec>; + fn neighbors(graph: &Graph, pt: usize) -> impl Iterator + '_ { + graph[pt].iter().copied() + } + fn from_neighborhood_matrix(matrix: Vec>) -> Graph { + matrix.into_iter().map(|v| { + v.into_iter().enumerate().filter_map(|(i, ent)| { + if ent > 1 {panic!("Neighborhood matrices must contain binary values")} + else if ent == 1 {Some(i)} + else {None} + }).collect() + }).collect() + } + + #[test] + fn test_square() { + let simple_graph = from_neighborhood_matrix(vec![ + vec![0,1,0,1,1,0,0,0], + vec![1,0,1,0,0,1,0,0], + vec![0,1,0,1,0,0,1,0], + vec![1,0,1,0,0,0,0,1], + vec![1,0,0,0,0,1,0,1], + vec![0,1,0,0,1,0,1,0], + vec![0,0,1,0,0,1,0,1], + vec![0,0,0,1,1,0,1,0], + ]); + let scan = bfs(0, |n| neighbors(&simple_graph, n)).collect_vec(); + assert_eq!(scan, vec![0, 1, 3, 4, 2, 5, 7, 6]) + } + #[test] + fn test_stringbuilder() { + let scan = bfs("".to_string(), |s| { + vec![s.clone()+";", s.clone()+"a", s+"aaa"].into_iter() + }).take(30).collect_vec(); + println!("{scan:?}") + } +} \ No newline at end of file diff --git a/src/utils/for_loop.rs b/src/utils/for_loop.rs new file mode 100644 index 0000000..b5c05ad --- /dev/null +++ b/src/utils/for_loop.rs @@ -0,0 +1,91 @@ +/// Imitates a regular for loop with an exit clause using Rust's `loop` keyword. +/// This macro brings the break value to all existing Rust loops, by allowing you to specify +/// an exit expression in case the loop was broken by the condition and not an explicit `break`. +/// +/// Since the exit expression can also be a block, this also allows you to execute other code when +/// the condition fails. This can also be used to re-enter the loop with an explicit `continue` +/// statement. +/// +/// The macro also adds support for classic for loops familiar to everyone since C, except with +/// the addition of an exit statement these too can be turned into expressions. +/// +/// ``` +/// xloop!(for i in 0..10; { +/// connection.try_connect() +/// if connection.ready() { +/// break Some(connection) +/// } +/// }; None) +/// ``` +/// +/// While loop with reentry. This is a very convoluted example but displays the idea quite clearly. +/// +/// ``` +/// xloop!(while socket.is_open(); { +/// let (data, is_end) = socket.read(); +/// all_data.append(data) +/// if is_end { break Ok(all_data) } +/// }; { +/// if let Ok(new_sock) = open_socket(socket.position()) { +/// new_sock.set_position(socket.position()); +/// socket = new_sock; +/// continue +/// } else { +/// Err(DownloadError::ConnectionLost) +/// } +/// }) +/// ``` +/// +/// CUDA algorythm for O(log n) summation using a C loop +/// +/// ``` +/// xloop!(let mut leap = 1; own_id*2 + leap < batch_size; leap *= 2; { +/// batch[own_id*2] += batch[own_id*2 + leap] +/// }) +/// ``` +/// +/// The above loop isn't used as an expression, but an exit expression - or block - can be added +/// to these as well just like the others. In all cases the exit expression is optional, its +/// default value is `()`. +/// +/// **todo** find a valid use case for While let for a demo +#[macro_export] +macro_rules! xloop { + (for $p:pat in $it:expr; $body:stmt) => { + xloop!(for $p in $it; $body; ()) + }; + (for $p:pat in $it:expr; $body:stmt; $exit:stmt) => { + { + let mut __xloop__ = $it.into_iter(); + xloop!(let Some($p) = __xloop__.next(); $body; $exit) + } + }; + (let $p:pat = $e:expr; $body:stmt) => { + xloop!(let $p = $e; $body; ()) + }; + (let $p:pat = $e:expr; $body:stmt; $exit:stmt) => { + { + loop { + if let $p = $e { $body } + else { break { $exit } } + } + } + }; + (while $cond:expr; $body:stmt) => { + xloop!($cond; $body; ()) + }; + (while $cond:expr; $body:stmt; $exit:stmt) => { + { + loop { + if $cond { break { $exit } } + else { $body } + } + } + }; + ($init:stmt; $cond:expr; $step:stmt; $body:stmt) => { + xloop!(for ( $init; $cond; $step ) $body; ()) + }; + ($init:stmt; $cond:expr; $step:stmt; $body:stmt; $exit:stmt) => { + { $init; xloop!(while !($cond); { $body; $step }; $exit) } + }; +} \ No newline at end of file diff --git a/src/utils/iter.rs b/src/utils/iter.rs index 15ec2af..0014c56 100644 --- a/src/utils/iter.rs +++ b/src/utils/iter.rs @@ -33,4 +33,4 @@ where pub fn into_boxed_iter<'a, T: 'a>(t: T) -> BoxedIter<'a, ::Item> where T: IntoIterator { Box::new(t.into_iter()) -} +} \ No newline at end of file diff --git a/src/utils/mod.rs b/src/utils/mod.rs index fd0fdba..a97bd1f 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -2,8 +2,13 @@ mod cache; mod substack; mod side; mod merge_sorted; -mod unwrap_or_continue; +mod unwrap_or; pub mod iter; +mod bfs; +mod unless_let; +mod string_from_charset; +mod for_loop; +mod protomap; pub use cache::Cache; use mappable_rc::Mrc; @@ -11,6 +16,7 @@ pub use substack::Stackframe; pub use side::Side; pub use merge_sorted::merge_sorted; pub use iter::BoxedIter; +pub use string_from_charset::string_from_charset; pub fn mrc_derive(m: &Mrc, p: P) -> Mrc where P: for<'a> FnOnce(&'a T) -> &'a U { @@ -37,3 +43,31 @@ pub fn mrc_derive_slice(mv: &Mrc>) -> Mrc<[T]> { pub fn one_mrc_slice(t: T) -> Mrc<[T]> { Mrc::map(Mrc::new([t; 1]), |v| v.as_slice()) } + +pub fn mrc_to_iter(ms: Mrc<[T]>) -> impl Iterator> { + let mut i = 0; + std::iter::from_fn(move || if i < ms.len() { + let out = Some(mrc_derive(&ms, |s| &s[i])); + i += 1; + out + } else {None}) +} + +pub fn mrc_unnest(m: &Mrc>) -> Mrc { + Mrc::clone(m.as_ref()) +} + +pub fn mrc_slice_to_only(m: Mrc<[T]>) -> Result, ()> { + Mrc::try_map(m, |slice| { + if slice.len() != 1 {None} + else {Some(&slice[0])} + }).map_err(|_| ()) +} + +pub fn mrc_slice_to_only_option(m: Mrc<[T]>) -> Result>, ()> { + if m.len() > 1 {return Err(())} + Ok(Mrc::try_map(m, |slice| { + if slice.len() == 0 {None} + else {Some(&slice[0])} + }).ok()) +} \ No newline at end of file diff --git a/src/utils/protomap.rs b/src/utils/protomap.rs new file mode 100644 index 0000000..e39dd9f --- /dev/null +++ b/src/utils/protomap.rs @@ -0,0 +1,152 @@ +use std::{iter, ops::{Index, Add}, borrow::Borrow}; + +use smallvec::SmallVec; + +const INLINE_ENTRIES: usize = 2; + +/// Linked-array-list of key-value pairs. +/// Lookup and modification is O(n + cachemiss * n / m) +/// Can be extended by reference in O(m) < O(n) +pub struct ProtoMap<'a, K, V> { + entries: SmallVec<[(K, Option); INLINE_ENTRIES]>, + prototype: Option<&'a ProtoMap<'a, K, V>> +} + +impl<'a, K, V> ProtoMap<'a, K, V> { + pub fn new() -> Self { + Self { + entries: SmallVec::new(), + prototype: None + } + } + + /// Mutable reference to entry without checking proto in O(m) + fn local_entry_mut<'b, Q: ?Sized>(&'b mut self, query: &Q) + -> Option<(usize, &'b mut K, &'b mut Option)> + where K: Borrow, Q: Eq + { + self.entries.iter_mut().enumerate().find_map(|(i, (k, v))| { + if query.eq((*k).borrow()) { Some((i, k, v)) } else { None } + }) + } + + /// Entry without checking proto in O(m) + fn local_entry<'b, Q: ?Sized>(&'b self, query: &Q) + -> Option<(usize, &'b K, &'b Option)> + where K: Borrow, Q: Eq + { + self.entries.iter().enumerate().find_map(|(i, (k, v))| { + if query.eq((*k).borrow()) { Some((i, k, v)) } else { None } + }) + } + + /// Find entry in prototype chain in O(n) + pub fn get<'b, Q: ?Sized>(&'b self, query: &Q) -> Option<&'b V> + where K: Borrow, Q: Eq + { + if let Some((_, _, v)) = self.local_entry(query) { + v.as_ref() + } else { + self.prototype?.get(query) + } + } + + /// Record a value for the given key in O(m) + pub fn set(&mut self, key: &K, value: V) where K: Eq + Clone { + if let Some((_, _, v)) = self.local_entry_mut(key) { + *v = Some(value); + } else { + self.entries.push((key.clone(), Some(value))) + } + } + + /// Delete in a memory-efficient way in O(n) + pub fn delete_small(&mut self, key: &K) where K: Eq + Clone { + let exists_up = self.prototype.and_then(|p| p.get(key)).is_some(); + let local_entry = self.local_entry_mut(key); + match (exists_up, local_entry) { + (false, None) => (), // nothing to do + (false, Some((i, _, _))) => { self.entries.remove(i); }, // forget locally + (true, Some((_, _, v))) => *v = None, // update local override to cover + (true, None) => self.entries.push((key.clone(), None)), // create new + } + } + + /// Delete in O(m) without checking the prototype chain + /// May produce unnecessary cover over previously unknown key + pub fn delete_fast(&mut self, key: &K) where K: Eq + Clone { + if let Some((_, _, v)) = self.local_entry_mut(key) { + *v = None + } else { + self.entries.push((key.clone(), None)) + } + } + + /// Iterate over the values defined herein and on the prototype chain + /// Note that this will visit keys multiple times + pub fn iter(&self) -> impl Iterator)> { + let mut map = self; + iter::from_fn(move || { + let pairs = map.entries.iter(); + map = map.prototype?; + Some(pairs) + }).flatten() + } + + /// Visit the keys in an unsafe random order, repeated arbitrarily many times + pub fn keys(&self) -> impl Iterator { + self.iter().map(|(k, _)| k) + } + + /// Visit the values in random order + pub fn values(&self) -> impl Iterator { + self.iter().filter_map(|(_, v)| v.as_ref()) + } + + /// Update the prototype, and correspondingly the lifetime of the map + pub fn set_proto<'b>(self, proto: &'b ProtoMap<'b, K, V>) -> ProtoMap<'b, K, V> { + ProtoMap { + entries: self.entries, + prototype: Some(proto) + } + } +} + +impl From for ProtoMap<'_, K, V> where T: IntoIterator { + fn from(value: T) -> Self { + Self { + entries: value.into_iter().map(|(k, v)| (k, Some(v))).collect(), + prototype: None + } + } +} + +impl Index<&Q> for ProtoMap<'_, K, V> where K: Borrow, Q: Eq { + type Output = V; + fn index(&self, index: &Q) -> &Self::Output { + self.get(index).expect("Index not found in map") + } +} + +impl Clone for ProtoMap<'_, K, V> { + fn clone(&self) -> Self { + Self { + entries: self.entries.clone(), + prototype: self.prototype + } + } +} + +impl<'a, K: 'a, V: 'a> Add<(K, V)> for &'a ProtoMap<'a, K, V> { + type Output = ProtoMap<'a, K, V>; + fn add(self, rhs: (K, V)) -> Self::Output { + ProtoMap::from([rhs]).set_proto(self) + } +} + +#[macro_export] +macro_rules! protomap { + ($($ent:expr),*) => { + ProtoMap::from([$($ent:expr),*]) + }; +} diff --git a/src/utils/string_from_charset.rs b/src/utils/string_from_charset.rs new file mode 100644 index 0000000..7d925b8 --- /dev/null +++ b/src/utils/string_from_charset.rs @@ -0,0 +1,14 @@ +fn string_from_charset_rec(val: usize, digits: &str) -> String { + let radix = digits.len(); + let mut prefix = if val > radix { + string_from_charset_rec(val / radix, digits) + } else {String::new()}; + prefix.push(digits.chars().nth(val - 1).unwrap_or_else(|| { + panic!("Overindexed digit set \"{}\" with {}", digits, val - 1) + })); + prefix +} + +pub fn string_from_charset(val: usize, digits: &str) -> String { + string_from_charset_rec(val + 1, digits) +} \ No newline at end of file diff --git a/src/utils/unless_let.rs b/src/utils/unless_let.rs new file mode 100644 index 0000000..da44ed2 --- /dev/null +++ b/src/utils/unless_let.rs @@ -0,0 +1,6 @@ +#[macro_export] +macro_rules! unless_let { + ($m:pat_param = $expr:tt) => { + if let $m = $expr {} else + } +} \ No newline at end of file diff --git a/src/utils/unwrap_or.rs b/src/utils/unwrap_or.rs new file mode 100644 index 0000000..24d187b --- /dev/null +++ b/src/utils/unwrap_or.rs @@ -0,0 +1,6 @@ +#[macro_export] +macro_rules! unwrap_or { + ($m:expr; $fail:expr) => { + { if let Some(res) = ($m) {res} else {$fail} } + } +} \ No newline at end of file diff --git a/src/utils/unwrap_or_continue.rs b/src/utils/unwrap_or_continue.rs deleted file mode 100644 index 5dd0e2b..0000000 --- a/src/utils/unwrap_or_continue.rs +++ /dev/null @@ -1,6 +0,0 @@ -#[macro_export] -macro_rules! unwrap_or_continue { - ($m:expr) => { - { if let Some(res) = ($m) {res} else {continue} } - } -} \ No newline at end of file