diff --git a/Cargo.lock b/Cargo.lock index e04dec1..1533753 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -22,6 +22,12 @@ dependencies = [ "version_check", ] +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + [[package]] name = "cfg-if" version = "1.0.0" @@ -76,6 +82,12 @@ dependencies = [ "syn", ] +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + [[package]] name = "getrandom" version = "0.2.6" @@ -96,6 +108,15 @@ dependencies = [ "ahash 0.7.6", ] +[[package]] +name = "itertools" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3" +dependencies = [ + "either", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -108,6 +129,21 @@ version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +[[package]] +name = "mappable-rc" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b65e7f462b4fbfe1a3c857747c9d027dd55faffaeffbca68f70d0becfe7e93c5" + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.12.0" @@ -121,9 +157,21 @@ dependencies = [ "chumsky", "derivative", "hashbrown", + "itertools", + "mappable-rc", + "ordered-float", "thiserror", ] +[[package]] +name = "ordered-float" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96bcbab4bfea7a59c2c0fe47211a1ac4e3e96bea6eb446d704f310bc5c732ae2" +dependencies = [ + "num-traits", +] + [[package]] name = "proc-macro-hack" version = "0.5.19" diff --git a/Cargo.toml b/Cargo.toml index f9b58e0..6d6ac1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,4 +9,7 @@ edition = "2021" thiserror = "1.0" chumsky = "0.8" derivative = "2.2" -hashbrown = "0.12" \ No newline at end of file +hashbrown = "0.12" +mappable-rc = "0.1" +ordered-float = "3.0" +itertools = "0.10" \ No newline at end of file diff --git a/README.md b/README.md index 965bc6a..c59bcda 100644 --- a/README.md +++ b/README.md @@ -252,7 +252,7 @@ TODO: carriage example Files are the smallest unit of namespacing, automatically grouped into folders and forming a tree the leaves of which are the actual symbols. An -exported symbol is a name referenced in an exported substitution pattern +exported symbol is a name referenced in an exported substitution rule or assigned to an exported function. Imported symbols are considered identical to the same symbol directly imported from the same module for the purposes of substitution. diff --git a/examples/dummy_project/main.orc b/examples/dummy_project/main.orc index 935a765..1307b43 100644 --- a/examples/dummy_project/main.orc +++ b/examples/dummy_project/main.orc @@ -1,18 +1,18 @@ import std::io::(println, out) -- imports --- single word substitution (alias) -greet = \name. printf out "Hello {}!\n" [name] +-- single word rule (alias) +greet =1=> (\name. printf out "Hello {}!\n" [name]) --- multi-word exported substitution -export (...$pre ;) $a ...$post) =200=> (...$pre (greet $a) ...$post) +-- multi-word exported rule +export ;> $a =200=> (greet $a) --- single-word exported substitution +-- single-word exported rule export main = ( - print "What is your name? >> + print "What is your name?" >> readln >>= \name. greet name ) -- The broadest trait definition in existence -Foo = Bar Baz -default anyFoo = @T. @impl:(T (Bar Baz)). impl:(T Foo) \ No newline at end of file +Foo = (Bar Baz) +-- default anyFoo = @T. @impl:(T (Bar Baz)). impl:(T Foo) \ No newline at end of file diff --git a/notes.md b/notes.md index 81bc09e..02933df 100644 --- a/notes.md +++ b/notes.md @@ -16,4 +16,3 @@ export main = ( greet name ) ``` - diff --git a/src/expression.rs b/src/expression.rs new file mode 100644 index 0000000..971fdde --- /dev/null +++ b/src/expression.rs @@ -0,0 +1,52 @@ +use ordered_float::NotNan; +use std::{fmt::Debug}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Literal { + Num(NotNan), + Int(u64), + Char(char), + Str(String), +} + +/// An S-expression with a type +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Expr(pub Clause, pub Option>); + +impl Expr { + /// Replace all occurences of a name in the tree with a parameter, to bypass name resolution + pub fn bind_parameter(&mut self, name: &str) { + self.0.bind_parameter(name); + if let Some(typ) = &mut self.1 { + typ.bind_parameter(name); + } + } +} + +/// An S-expression as read from a source file +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Clause { + Literal(Literal), + Name(Vec), + S(char, Vec), + Lambda(String, Vec, Vec), + Auto(Option, Vec, Vec), + Parameter(String) +} + +impl Clause { + /// Replace all occurences of a name in the tree with a parameter, to bypass name resolution + pub fn bind_parameter(&mut self, name: &str) { + match self { + Clause::Name(n) => if n.len() == 1 && n[0] == name { + *self = Clause::Parameter(name.to_string()) + } + Clause::S(_, exprv) => for expr in exprv { expr.bind_parameter(name) } + Clause::Lambda(_, typ, body) | Clause::Auto(_, typ, body) => { + for expr in typ { expr.bind_parameter(name) } + for expr in body { expr.bind_parameter(name) } + } + _ => () + } + } +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 18ee30d..1a6f35c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,16 +1,63 @@ -use std::io::{self, Read}; +use core::ops::Range; +use std::{env::current_dir, fs::read_to_string}; +use std::io; -use chumsky::{Parser, prelude::*}; +use chumsky::prelude::end; +use chumsky::{Parser, Stream}; +use parse::{LexerEntry, FileEntry}; +// use project::{rule_collector, file_loader, Loaded}; mod parse; mod project; mod utils; +mod expression; +pub use expression::*; + +fn literal(orig: &[&str]) -> Vec { + orig.iter().map(|&s| s.to_owned()).collect() +} + +static PRELUDE:&str = r#" +export ... $name =1000=> (match_seqence $name) +export ] =1000=> conslist_carriage(none) +export , $name conslist_carriage($tail) =1000=> conslist_carriage((some (cons $name $tail))) +export [ $name conslist_carriage($tail) =1000=> (some (cons $name $tail)) +export (match_sequence $lhs) >> (match_sequence $rhs) =100=> (bind ($lhs) (\_. $rhs)) +export (match_sequence $lhs) >>= (match_sequence $rhs) =100=> (bind ($lhs) ($rhs)) +"#; fn main() { - let mut input = String::new(); - let mut stdin = io::stdin(); - stdin.read_to_string(&mut input).unwrap(); - let ops: Vec<&str> = vec!["$", "."]; - let output = parse::expression_parser(&ops).then_ignore(end()).parse(input); - println!("\nParsed:\n{:?}", output); + // let mut input = String::new(); + // let mut stdin = io::stdin(); + // stdin.read_to_string(&mut input).unwrap(); + let ops: Vec<&str> = vec!["...", ">>", ">>=", "[", "]", ",", "$"]; + let data = read_to_string("./main.orc").unwrap(); + let lexed = parse::lexer(&ops).parse(data).unwrap(); + println!("Lexed: {:?}", lexed); + let parsr = parse::line_parser().then_ignore(end()); + // match parsr.parse(data) { + // Ok(output) => println!("\nParsed:\n{:?}", output), + // Err(e) => println!("\nErrored:\n{:?}", e) + // } + let lines = lexed.iter().filter_map(|v| { + let parse::LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone(); + let tuples = v.into_iter().map(|LexerEntry(l, r)| (l.clone(), r.clone())); + Some(parsr.parse_recovery_verbose(Stream::from_iter(end..end+1, tuples))) + }).collect::>(); + for (id, (out, errs)) in lines.into_iter().enumerate() { + println!("Parsing line {}", id); + if let Some(output) = out { println!("Parsed:\n{:?}", output) } + else { println!("Failed to produce output")} + if errs.len() > 0 { println!("Errored:\n{:?}", errs)} + } + // let output = parse::file_parser(&ops, &ops).parse(data).unwrap(); + // let cwd = current_dir().unwrap(); + // let collect_rules = rule_collector(move |n| { + // if n == vec!["prelude"] { Ok(Loaded::Module(PRELUDE.to_string())) } + // else { file_loader(cwd.clone())(n) } + // }, literal(&["...", ">>", ">>=", "[", "]", ","])); + // let rules = collect_rules.try_find(&literal(&["main"])).unwrap(); + // for rule in rules.iter() { + // println!("{:?} ={}=> {:?}", rule.source, rule.priority, rule.target) + // } } diff --git a/src/parse/comment.rs b/src/parse/comment.rs new file mode 100644 index 0000000..c0e6bb7 --- /dev/null +++ b/src/parse/comment.rs @@ -0,0 +1,13 @@ +pub use chumsky::{self, prelude::*, Parser}; + +/// Parses Lua-style comments +pub fn comment_parser() -> impl Parser> { + choice(( + just("--[").ignore_then(take_until( + just("]--").ignored() + )), + just("--").ignore_then(take_until( + just("\n").rewind().ignored().or(end()) + )) + )).map(|(vc, ())| vc).collect().labelled("comment") +} diff --git a/src/parse/enum_parser.rs b/src/parse/enum_parser.rs new file mode 100644 index 0000000..375e165 --- /dev/null +++ b/src/parse/enum_parser.rs @@ -0,0 +1,26 @@ +#[macro_export] +macro_rules! enum_parser { + ($p:path | $m:tt) => { + { + ::chumsky::prelude::filter_map(|s, l| { + if let $p(x) = l { Ok(x) } + else { Err(::chumsky::prelude::Simple::custom(s, $m))} + }) + } + }; + ($p:path >> $q:path; $i:ident) => { + { + use $p as srcpath; + use $q as tgtpath; + enum_parser!(srcpath::$i | (concat!("Expected ", stringify!($i)))).map(tgtpath::$i) + } + }; + ($p:path >> $q:path; $($i:ident),+) => { + { + ::chumsky::prelude::choice(( + $( enum_parser!($p >> $q; $i) ),+ + )) + } + }; + ($p:path) => { enum_parser!($p | (concat!("Expected ", stringify!($p)))) }; +} \ No newline at end of file diff --git a/src/parse/expression.rs b/src/parse/expression.rs index c57aca0..8bb3149 100644 --- a/src/parse/expression.rs +++ b/src/parse/expression.rs @@ -1,86 +1,90 @@ -use std::{fmt::Debug}; use chumsky::{self, prelude::*, Parser}; +use crate::{Clause, Expr, Literal, enum_parser}; -use super::string; -use super::number; -use super::misc; -use super::name; +use super::{lexer::Lexeme}; -/// An S-expression as read from a source file -#[derive(Debug, Clone)] -pub enum Expr { - Num(f64), - Int(u64), - Char(char), - Str(String), - Name(Vec), - S(Vec), - Lambda(String, Option>, Vec), - Auto(Option, Option>, Vec), - - Typed(Box, Box) +fn sexpr_parser

( + expr: P +) -> impl Parser> + Clone +where P: Parser> + Clone { + Lexeme::paren_parser(expr.repeated()).map(|(del, b)| Clause::S(del, b)) } -/// Parse a type annotation -fn typed_parser<'a>( - expr: Recursive<'a, char, Expr, Simple> -) -> impl Parser> + 'a { - just(':').ignore_then(expr) +fn lambda_parser

( + expr: P +) -> impl Parser> + Clone +where P: Parser> + Clone { + just(Lexeme::BS) + .then_ignore(enum_parser!(Lexeme::Comment).repeated()) + .ignore_then(enum_parser!(Lexeme::Name)) + .then_ignore(enum_parser!(Lexeme::Comment).repeated()) + .then( + just(Lexeme::Type) + .then_ignore(enum_parser!(Lexeme::Comment).repeated()) + .ignore_then(expr.clone().repeated()) + .then_ignore(enum_parser!(Lexeme::Comment).repeated()) + .or_not().map(Option::unwrap_or_default) + ) + .then_ignore(just(Lexeme::name("."))) + .then_ignore(enum_parser!(Lexeme::Comment).repeated()) + .then(expr.repeated().at_least(1)) + .map(|((name, typ), mut body): ((String, Vec), Vec)| { + for ent in &mut body { ent.bind_parameter(&name) }; + Clause::Lambda(name, typ, body) + }) +} + +fn auto_parser

( + expr: P +) -> impl Parser> + Clone +where P: Parser> + Clone { + just(Lexeme::At) + .then_ignore(enum_parser!(Lexeme::Comment).repeated()) + .ignore_then(enum_parser!(Lexeme::Name).or_not()) + .then_ignore(enum_parser!(Lexeme::Comment).repeated()) + .then( + just(Lexeme::Type) + .then_ignore(enum_parser!(Lexeme::Comment).repeated()) + .ignore_then(expr.clone().repeated()) + .then_ignore(enum_parser!(Lexeme::Comment).repeated()) + ) + .then_ignore(just(Lexeme::name("."))) + .then_ignore(enum_parser!(Lexeme::Comment).repeated()) + .then(expr.repeated().at_least(1)) + .try_map(|((name, typ), mut body), s| if name == None && typ.is_empty() { + Err(Simple::custom(s, "Auto without name or type has no effect")) + } else { + if let Some(n) = &name { + for ent in &mut body { ent.bind_parameter(n) } + } + Ok(Clause::Auto(name, typ, body)) + }) +} + +fn name_parser() -> impl Parser, Error = Simple> + Clone { + enum_parser!(Lexeme::Name).separated_by( + enum_parser!(Lexeme::Comment).repeated() + .then(just(Lexeme::NS)) + .then(enum_parser!(Lexeme::Comment).repeated()) + ).at_least(1) } /// Parse an expression without a type annotation -fn untyped_xpr_parser<'a>( - expr: Recursive<'a, char, Expr, Simple>, - ops: &[&'a str] -) -> impl Parser> + 'a { - // basic S-expression rule - let sexpr = expr.clone() - .repeated() - .delimited_by(just('('), just(')')) - .map(Expr::S); - // Blocks - // can and therefore do match everything up to the closing paren - // \name. body - // \name:type. body - let lambda = just('\\') - .ignore_then(text::ident()) - .then(typed_parser(expr.clone()).or_not()) - .then_ignore(just('.')) - .then(expr.clone().repeated().at_least(1)) - .map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body)); - // @name. body - // @name:type. body - // @:type. body - let auto = just('@') - .ignore_then(text::ident().or_not()) - .then(typed_parser(expr.clone()).or_not()) - .then_ignore(just('.')) - .then(expr.clone().repeated().at_least(1)) - .map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body)); - choice(( - number::int_parser().map(Expr::Int), // all ints are valid floats so it takes precedence - number::float_parser().map(Expr::Num), - string::char_parser().map(Expr::Char), - string::str_parser().map(Expr::Str), - name::name_parser(ops).map(Expr::Name), // includes namespacing - sexpr, - lambda, - auto - )).padded() -} - -/// Parse any expression with a type annotation, surrounded by comments -pub fn expression_parser<'a>(ops: &[&'a str]) -> impl Parser> + 'a { - // This approach to parsing comments is ugly and error-prone, - // but I don't have a lot of other ideas - return recursive(|expr| { - return misc::comment_parser().or_not().ignore_then( - untyped_xpr_parser(expr.clone(), &ops) - .then(typed_parser(expr).or_not()) - .map(|(val, t)| match t { - Some(typ) => Expr::Typed(Box::new(val), Box::new(typ)), - None => val - }) - ).then_ignore(misc::comment_parser().or_not()) +pub fn xpr_parser() -> impl Parser> { + recursive(|expr| { + let clause = + enum_parser!(Lexeme::Comment).repeated() + .ignore_then(choice(( + enum_parser!(Lexeme >> Literal; Int, Num, Char, Str).map(Clause::Literal), + name_parser().map(Clause::Name), + sexpr_parser(expr.clone()), + lambda_parser(expr.clone()), + auto_parser(expr.clone()) + ))).then_ignore(enum_parser!(Lexeme::Comment).repeated()); + clause.clone().then( + just(Lexeme::Type) + .ignore_then(expr.clone()).or_not() + ) + .map(|(val, typ)| Expr(val, typ.map(Box::new))) }) } \ No newline at end of file diff --git a/src/parse/import.rs b/src/parse/import.rs index 21d85ee..670410d 100644 --- a/src/parse/import.rs +++ b/src/parse/import.rs @@ -1,7 +1,9 @@ use std::iter; use chumsky::{Parser, prelude::*}; -use super::name; +use crate::{enum_parser, utils::BoxedIter}; + +use super::lexer::Lexeme; #[derive(Debug, Clone)] pub struct Import { @@ -9,15 +11,10 @@ pub struct Import { pub name: Option } - -pub type BoxedStrIter = Box>; -pub type BoxedStrIterIter = Box>; - -/// initialize a Box>>> -/// with a single element. -fn init_table(name: String) -> BoxedStrIterIter { - // I'm not confident at all that this is a good approach. - Box::new(iter::once(Box::new(iter::once(name)) as BoxedStrIter)) +/// initialize a BoxedIter> with a single element. +fn init_table(name: String) -> BoxedIter<'static, BoxedIter<'static, String>> { + // I'm not at all confident that this is a good approach. + Box::new(iter::once(Box::new(iter::once(name)) as BoxedIter)) } /// Parse an import command @@ -25,29 +22,38 @@ fn init_table(name: String) -> BoxedStrIterIter { /// and the delimiters are plain parentheses. Namespaces should preferably contain /// crossplatform filename-legal characters but the symbols are explicitly allowed /// to go wild. There's a blacklist in [name] -pub fn import_parser() -> impl Parser, Error = Simple> { +pub fn import_parser() -> impl Parser, Error = Simple> { // TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad. - recursive(|expr: Recursive>| { - name::modname_parser() - .padded() - .then_ignore(just("::")) - .repeated() + recursive(|expr: Recursive>, Simple>| { + enum_parser!(Lexeme::Name) + .separated_by(just(Lexeme::NS)) .then( - choice(( - expr.clone() - .separated_by(just(',')) - .delimited_by(just('('), just(')')) - .map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter), - // Each expr returns a list of imports, flatten those into a common list - just("*").map(|s| init_table(s.to_string())), // Just a *, wrapped - name::modname_parser().map(init_table) // Just a name, wrapped - )).padded() - ).map(|(pre, post)| { - Box::new(post.map(move |el| { - Box::new(pre.clone().into_iter().chain(el)) as BoxedStrIter - })) as BoxedStrIterIter + just(Lexeme::NS) + .ignore_then( + choice(( + expr.clone() + .separated_by(just(Lexeme::name(","))) + .delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('('))) + .map(|v| Box::new(v.into_iter().flatten()) as BoxedIter>) + .labelled("import group"), + // Each expr returns a list of imports, flatten those into a common list + just(Lexeme::name("*")).map(|_| init_table("*".to_string())) + .labelled("wildcard import"), // Just a *, wrapped + enum_parser!(Lexeme::Name).map(init_table) + .labelled("import terminal") // Just a name, wrapped + )) + ).or_not() + ) + .map(|(name, opt_post): (Vec, Option>>)| -> BoxedIter> { + if let Some(post) = opt_post { + Box::new(post.map(move |el| { + Box::new(name.clone().into_iter().chain(el)) as BoxedIter + })) as BoxedIter> + } else { + Box::new(iter::once(Box::new(name.into_iter()) as BoxedIter)) + } }) - }).padded().map(|paths| { + }).map(|paths| { paths.filter_map(|namespaces| { let mut path: Vec = namespaces.collect(); match path.pop()?.as_str() { @@ -55,5 +61,5 @@ pub fn import_parser() -> impl Parser, Error = Simple> { name => Some(Import { path, name: Some(name.to_owned()) }) } }).collect() - }) + }).labelled("import") } \ No newline at end of file diff --git a/src/parse/lexer.rs b/src/parse/lexer.rs new file mode 100644 index 0000000..92a1549 --- /dev/null +++ b/src/parse/lexer.rs @@ -0,0 +1,134 @@ +use std::{ops::Range, iter}; +use ordered_float::NotNan; +use chumsky::{Parser, prelude::*, text::whitespace}; +use std::fmt::Debug; +use crate::utils::BoxedIter; + +use super::{number, string, name, comment}; + +#[derive(Clone, PartialEq, Eq, Hash)] +pub struct Entry(pub Lexeme, pub Range); +impl Debug for Entry { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.0) + // f.debug_tuple("Entry").field(&self.0).field(&self.1).finish() + } +} + +#[derive(Clone, PartialEq, Eq, Hash)] +pub enum Lexeme { + Num(NotNan), + Int(u64), + Char(char), + Str(String), + Name(String), + Rule(NotNan), + NS, // namespace separator + LP(char), + RP(char), + BS, // Backslash + At, + Type, // type operator + Comment(String) +} + +impl Debug for Lexeme { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Num(n) => write!(f, "{}", n), + Self::Int(i) => write!(f, "{}", i), + Self::Char(c) => write!(f, "{:?}", c), + Self::Str(s) => write!(f, "{:?}", s), + Self::Name(name) => write!(f, "{}", name), + Self::Rule(prio) => write!(f, "={}=>", prio), + Self::NS => write!(f, "::"), + Self::LP(l) => write!(f, "{}", l), + Self::RP(l) => match l { + '(' => write!(f, ")"), + '[' => write!(f, "]"), + '{' => write!(f, "}}"), + _ => f.debug_tuple("RP").field(l).finish() + }, + Self::BS => write!(f, "\\"), + Self::At => write!(f, "@"), + Self::Type => write!(f, ":"), + Self::Comment(text) => write!(f, "--[{}]--", text), + } + } +} + +impl Lexeme { + pub fn name(n: T) -> Self { + Lexeme::Name(n.to_string()) + } + pub fn paren_parser( + expr: P + ) -> impl Parser> + Clone + where P: Parser> + Clone { + choice(( + expr.clone().delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('('))) + .map(|t| ('(', t)), + expr.clone().delimited_by(just(Lexeme::LP('[')), just(Lexeme::RP('['))) + .map(|t| ('[', t)), + expr.delimited_by(just(Lexeme::LP('{')), just(Lexeme::RP('{'))) + .map(|t| ('{', t)), + )) + } +} + +fn rule_parser() -> impl Parser, Error = Simple> { + just('=').ignore_then( + choice(( + none_of("-0123456789").rewind().to(NotNan::new(0f64).unwrap()), + number::float_parser().then_ignore(just("=>")) + )).map_err_with_span(|err, span| { + panic!("Something's up! {:?} {}", span, err) + }) + ) +} + +type LexSubres<'a> = BoxedIter<'a, Entry>; + +fn paren_parser<'a>( + expr: Recursive<'a, char, LexSubres<'a>, Simple>, + lp: char, rp: char +) -> impl Parser, Error=Simple> + 'a { + expr.padded().repeated() + .map(|x| Box::new(x.into_iter().flatten()) as LexSubres) + .delimited_by(just(lp), just(rp)).map_with_span(move |b, s| { + Box::new( + iter::once(Entry(Lexeme::LP(lp), s.start..s.start+1)) + .chain(b) + .chain(iter::once(Entry(Lexeme::RP(lp), s.end-1..s.end))) + ) as LexSubres + }) +} + +pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser>, Error=Simple> + 'a +where T: AsRef + Clone { + let all_ops = ops.iter().map(|o| o.as_ref().to_string()) + .chain(iter::once(".".to_string())).collect::>(); + recursive(move |recurse: Recursive>| { + choice(( + paren_parser(recurse.clone(), '(', ')'), + paren_parser(recurse.clone(), '[', ']'), + paren_parser(recurse.clone(), '{', '}'), + choice(( + rule_parser().map(Lexeme::Rule), + comment::comment_parser().map(Lexeme::Comment), + just("::").padded().to(Lexeme::NS), + just('\\').padded().to(Lexeme::BS), + just('@').padded().to(Lexeme::At), + just(':').to(Lexeme::Type), + number::int_parser().map(Lexeme::Int), // all ints are valid floats so it takes precedence + number::float_parser().map(Lexeme::Num), + string::char_parser().map(Lexeme::Char), + string::str_parser().map(Lexeme::Str), + name::name_parser(&all_ops).map(Lexeme::Name), // includes namespacing + )).map_with_span(|lx, span| Box::new(iter::once(Entry(lx, span))) as LexSubres) + )) + }).separated_by(one_of("\t ").repeated()) + .flatten().collect() + .separated_by(just('\n').then(text::whitespace()).ignored()) + +} \ No newline at end of file diff --git a/src/parse/misc.rs b/src/parse/misc.rs deleted file mode 100644 index 9ad2a09..0000000 --- a/src/parse/misc.rs +++ /dev/null @@ -1,8 +0,0 @@ -pub use chumsky::{self, prelude::*, Parser}; - -/// Parses Lua-style comments -pub fn comment_parser() -> impl Parser> { - any().repeated().delimited_by(just("--["), just("]--")).or( - any().repeated().delimited_by(just("--"), just("\n")) - ).map(|vc| vc.iter().collect()).padded() -} diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 95b25ab..f298457 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -1,18 +1,16 @@ -mod expression; mod string; mod number; -mod misc; -mod import; mod name; -mod substitution; +mod lexer; +mod comment; +mod expression; mod sourcefile; +mod import; +mod enum_parser; -pub use substitution::Substitution; -pub use expression::Expr; -pub use expression::expression_parser; pub use sourcefile::FileEntry; -pub use sourcefile::file_parser; +pub use sourcefile::line_parser; pub use sourcefile::imports; -pub use sourcefile::is_op; pub use sourcefile::exported_names; -pub use import::Import; \ No newline at end of file +pub use lexer::{lexer, Lexeme, Entry as LexerEntry}; +pub use name::is_op; \ No newline at end of file diff --git a/src/parse/name.rs b/src/parse/name.rs index 497dec7..6452a8f 100644 --- a/src/parse/name.rs +++ b/src/parse/name.rs @@ -1,12 +1,14 @@ use chumsky::{self, prelude::*, Parser}; /// Matches any one of the passed operators, longest-first -fn op_parser<'a>(ops: &[&'a str]) -> BoxedParser<'a, char, String, Simple> { - let mut sorted_ops = ops.to_vec(); +fn op_parser<'a, T: AsRef + Clone>(ops: &[T]) -> BoxedParser<'a, char, String, Simple> { + let mut sorted_ops: Vec = ops.iter().map(|t| t.as_ref().to_string()).collect(); sorted_ops.sort_by(|a, b| b.len().cmp(&a.len())); sorted_ops.into_iter() - .map(|op| just(op.to_string()).boxed()) - .reduce(|a, b| a.or(b).boxed()).unwrap() + .map(|op| just(op).boxed()) + .reduce(|a, b| a.or(b).boxed()) + .unwrap_or(empty().map(|()| panic!("Empty isn't meant to match")).boxed()) + .labelled("operator").boxed() } /// Matches anything that's allowed as an operator @@ -27,20 +29,31 @@ fn op_parser<'a>(ops: &[&'a str]) -> BoxedParser<'a, char, String, Simple> /// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very /// common in maths so it's worth a try. Investigate. pub fn modname_parser<'a>() -> impl Parser> + 'a { - let not_name_char: Vec = vec![':', '\\', '@', '"', '\'', '(', ')', '.']; + let not_name_char: Vec = vec![':', '\\', '@', '"', '\'', '(', ')', ',']; filter(move |c| !not_name_char.contains(c) && !c.is_whitespace()) .repeated().at_least(1) .collect() + .labelled("modname") } /// Parse an operator or name. Failing both, parse everything up to the next whitespace or /// blacklisted character as a new operator. -pub fn name_parser<'a>( - ops: &[&'a str] -) -> impl Parser, Error = Simple> + 'a { +pub fn name_parser<'a, T: AsRef + Clone>( + ops: &[T] +) -> impl Parser> + 'a { choice(( op_parser(ops), // First try to parse a known operator - text::ident(), // Failing that, parse plain text + text::ident().labelled("plain text"), // Failing that, parse plain text modname_parser() // Finally parse everything until tne next terminal as a new operator - )).padded().separated_by(just("::")).padded() + )) + .labelled("name") +} + +/// Decide if a string can be an operator. Operators can include digits and text, just not at the +/// start. +pub fn is_op>(s: T) -> bool { + return match s.as_ref().chars().next() { + Some(x) => !x.is_alphanumeric(), + None => false + } } \ No newline at end of file diff --git a/src/parse/number.rs b/src/parse/number.rs index c40a7f2..331a196 100644 --- a/src/parse/number.rs +++ b/src/parse/number.rs @@ -1,4 +1,5 @@ use chumsky::{self, prelude::*, Parser}; +use ordered_float::NotNan; fn assert_not_digit(base: u32, c: char) { if base > (10 + (c as u32 - 'a' as u32)) { @@ -51,7 +52,7 @@ fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 { } /// returns a mapper that converts a mantissa and an exponent into a float -fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 { +fn nat2f(base: u64) -> impl Fn((NotNan, i32),) -> NotNan { return move |(val, exp)| { if exp == 0 {val} else {val * (base as f64).powf(exp.try_into().unwrap())} @@ -77,32 +78,35 @@ pub fn int_parser() -> impl Parser> { } /// parse a float from dot notation -fn dotted_parser(base: u32) -> impl Parser> { +fn dotted_parser(base: u32) -> impl Parser, Error = Simple> { uint_parser(base) - .then_ignore(just('.')) .then( - text::digits(base).then(separated_digits_parser(base)) - ).map(move |(wh, (frac1, frac2))| { - let frac = frac1 + &frac2; - let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64; - let dexp = base.pow(frac.len().try_into().unwrap()); - wh as f64 + (frac_num / dexp as f64) + just('.').ignore_then( + text::digits(base).then(separated_digits_parser(base)) + ).map(move |(frac1, frac2)| { + let frac = frac1 + &frac2; + let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64; + let dexp = base.pow(frac.len().try_into().unwrap()); + frac_num / dexp as f64 + }).or_not().map(|o| o.unwrap_or_default()) + ).try_map(|(wh, f), s| { + NotNan::new(wh as f64 + f).map_err(|_| Simple::custom(s, "Float literal evaluates to NaN")) }) } /// parse a float from dotted and optionally also exponential notation -fn pow_float_parser(base: u32) -> impl Parser> { +fn pow_float_parser(base: u32) -> impl Parser, Error = Simple> { assert_not_digit(base, 'p'); dotted_parser(base).then(pow_parser()).map(nat2f(base.into())) } /// parse a float with dotted and optionally exponential notation from a base determined by its /// prefix -pub fn float_parser() -> impl Parser> { +pub fn float_parser() -> impl Parser, Error = Simple> { choice(( just("0b").ignore_then(pow_float_parser(2)), just("0x").ignore_then(pow_float_parser(16)), just('0').ignore_then(pow_float_parser(8)), pow_float_parser(10), - )) + )).labelled("float") } diff --git a/src/parse/substitution.rs b/src/parse/rule.rs similarity index 67% rename from src/parse/substitution.rs rename to src/parse/rule.rs index fb93758..ccf2f3f 100644 --- a/src/parse/substitution.rs +++ b/src/parse/rule.rs @@ -3,7 +3,7 @@ use chumsky::{self, prelude::*, Parser}; use super::{expression, number::float_parser}; #[derive(Debug, Clone)] -pub struct Substitution { +pub struct Rule { pub source: expression::Expr, pub priority: f64, pub target: expression::Expr @@ -19,15 +19,16 @@ pub struct Substitution { /// shadow_reee =0.9=> reee /// ``` /// TBD whether this disables reee in the specified range or loops forever -pub fn substitution_parser<'a>( - pattern_ops: &[&'a str], - ops: &[&'a str] -) -> impl Parser> + 'a { - expression::expression_parser(pattern_ops) +pub fn rule_parser<'a, T: 'a + AsRef + Clone>( + pattern_ops: &[T], + ops: &[T] +) -> impl Parser> + 'a { + expression::expression_parser(pattern_ops).padded() .then_ignore(just('=')) .then( float_parser().then_ignore(just("=>")) .or_not().map(|prio| prio.unwrap_or(0.0)) - ).then(expression::expression_parser(ops)) - .map(|((source, priority), target)| Substitution { source, priority, target }) + ).then(expression::expression_parser(ops).padded()) + .map(|((source, priority), target)| Rule { source, priority, target }) + .labelled("rule") } diff --git a/src/parse/sourcefile.rs b/src/parse/sourcefile.rs index a057c41..d316f2c 100644 --- a/src/parse/sourcefile.rs +++ b/src/parse/sourcefile.rs @@ -1,20 +1,25 @@ use std::collections::HashSet; +use std::fs::File; use std::iter; -use super::expression::Expr; +use crate::{enum_parser, Expr, Clause}; +use crate::utils::BoxedIter; + +use super::expression::xpr_parser; use super::import; -use super::misc; -use super::substitution::substitution_parser; -use super::substitution::Substitution; +use super::import::import_parser; +use super::lexer::Lexeme; +use super::name; use chumsky::{Parser, prelude::*}; +use ordered_float::NotNan; /// Anything we might encounter in a file #[derive(Debug, Clone)] pub enum FileEntry { Import(Vec), Comment(String), - Substitution(Substitution), - Export(Substitution) + Rule(Vec, NotNan, Vec), + Export(Vec, NotNan, Vec) } /// Recursively iterate through all "names" in an expression. It also finds a lot of things that @@ -22,19 +27,22 @@ pub enum FileEntry { /// sophisticated search. /// /// TODO: find a way to exclude parameters -fn find_all_names_recur(expr: &Expr) -> Box> + '_> { - match expr { - Expr::Auto(_, typ, body) | Expr::Lambda(_, typ, body) => Box::new(match typ { - Some(texp) => find_all_names_recur(texp), - None => Box::new(iter::empty()) - }.chain(body.into_iter().map(find_all_names_recur).flatten())), - Expr::S(body) => Box::new(body.into_iter().map(find_all_names_recur).flatten()), - Expr::Typed(val, typ) => Box::new( - find_all_names_recur(val).chain(find_all_names_recur(typ)) +fn find_all_names_recur<'a>(expr: &'a Expr) -> BoxedIter<&'a Vec> { + let proc_clause = |clause: &'a Clause| match clause { + Clause::Auto(_, typ, body) | Clause::Lambda(_, typ, body) => Box::new( + typ.iter().flat_map(find_all_names_recur) + .chain(body.iter().flat_map(find_all_names_recur)) + ) as BoxedIter<&'a Vec>, + Clause::S(_, body) => Box::new( + body.iter().flat_map(find_all_names_recur) ), - Expr::Name(x) => Box::new(iter::once(x)), + Clause::Name(x) => Box::new(iter::once(x)), _ => Box::new(iter::empty()) - } + }; + let Expr(val, typ) = expr; + if let Some(t) = typ { + Box::new(proc_clause(val).chain(find_all_names_recur(t))) + } else { proc_clause(val) } } /// Collect all names that occur in an expression @@ -42,62 +50,69 @@ fn find_all_names(expr: &Expr) -> HashSet<&Vec> { find_all_names_recur(expr).collect() } -/// Parse a file into a list of distinctive entries -pub fn file_parser<'a>( - pattern_ops: &[&'a str], ops: &[&'a str] -) -> impl Parser, Error = Simple> + 'a { - choice(( - // In case the usercode wants to parse doc - misc::comment_parser().map(FileEntry::Comment), - import::import_parser().map(FileEntry::Import), - text::keyword("export") - .ignore_then(substitution_parser(pattern_ops, ops)).map(FileEntry::Export), - // This could match almost anything so it has to go last - substitution_parser(pattern_ops, ops).map(FileEntry::Substitution) - )).padded() - .separated_by(just('\n')) - .then_ignore(end()) +fn rule_parser() -> impl Parser, NotNan, Vec), Error = Simple> { + xpr_parser().repeated() + .then(enum_parser!(Lexeme::Rule)) + .then(xpr_parser().repeated()) + // .map(|((lhs, prio), rhs)| ) + .map(|((a, b), c)| (a, b, c)) + .labelled("Rule") } -/// Decide if a string can be an operator. Operators can include digits and text, just not at the -/// start. -pub fn is_op(s: &str) -> bool { - return match s.chars().next() { - Some(x) => !x.is_alphanumeric(), - None => false - } +pub fn line_parser() -> impl Parser> { + choice(( + // In case the usercode wants to parse doc + enum_parser!(Lexeme >> FileEntry; Comment), + just(Lexeme::name("import")) + .ignore_then(import_parser().map(FileEntry::Import)) + .then_ignore(enum_parser!(Lexeme::Comment)), + just(Lexeme::name("export")).map_err_with_span(|e, s| { + println!("{:?} could not yield an export", s); e + }) + .ignore_then(rule_parser()) + .map(|(lhs, prio, rhs)| FileEntry::Export(lhs, prio, rhs)), + // This could match almost anything so it has to go last + rule_parser().map(|(lhs, prio, rhs)| FileEntry::Rule(lhs, prio, rhs)), + )) } /// Collect all exported names (and a lot of other words) from a file pub fn exported_names(src: &Vec) -> HashSet<&Vec> { - src.iter().filter_map(|ent| match ent { - FileEntry::Export(a) => Some(&a.source), - _ => None + src.iter().flat_map(|ent| match ent { + FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())) as BoxedIter<&Expr>, + _ => Box::new(iter::empty()) }).map(find_all_names).flatten().collect() } + +// #[allow(dead_code)] /// Collect all operators defined in a file (and some other words) fn defined_ops(src: &Vec, exported_only: bool) -> Vec<&String> { - let all_names:HashSet<&Vec> = src.iter().filter_map(|ent| match ent { - FileEntry::Substitution(a) => if exported_only {None} else {Some(&a.source)}, - FileEntry::Export(a) => Some(&a.source), - _ => None + let all_names:HashSet<&Vec> = src.iter().flat_map(|ent| match ent { + FileEntry::Rule(s, _, d) => + if exported_only {Box::new(iter::empty()) as BoxedIter<&Expr>} + else {Box::new(s.iter().chain(d.iter()))} + FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())), + _ => Box::new(iter::empty()) }).map(find_all_names).flatten().collect(); // Dedupe stage of dubious value; collecting into a hashset may take longer than // handling duplicates would with a file of sensible size. all_names.into_iter() .filter_map(|name| // If it's namespaced, it's imported. - if name.len() == 1 && is_op(&name[0]) {Some(&name[0])} + if name.len() == 1 && name::is_op(&name[0]) {Some(&name[0])} else {None} ).collect() } +// #[allow(dead_code)] /// Collect all operators from a file pub fn all_ops(src: &Vec) -> Vec<&String> { defined_ops(src, false) } +// #[allow(dead_code)] /// Collect exported operators from a file (plus some extra) pub fn exported_ops(src: &Vec) -> Vec<&String> { defined_ops(src, true) } + /// Summarize all imports from a file in a single list of qualified names pub fn imports<'a, 'b, I>( src: I diff --git a/src/project/file_loader.rs b/src/project/file_loader.rs new file mode 100644 index 0000000..ebe1a29 --- /dev/null +++ b/src/project/file_loader.rs @@ -0,0 +1,47 @@ +use std::io; +use std::rc::Rc; +use std::fs::read_to_string; +use std::path::PathBuf; + +use super::loaded::Loaded; + +#[derive(Clone, Debug)] +pub enum LoadingError { + IOErr(Rc), + UnknownNode(String), + Missing(String) +} + +impl From for LoadingError { + fn from(inner: io::Error) -> Self { + LoadingError::IOErr(Rc::new(inner)) + } +} + +pub fn file_loader(proj: PathBuf) -> impl FnMut(Vec) -> Result + 'static { + move |path| { + let dirpath = proj.join(path.join("/")); + if dirpath.is_dir() || dirpath.is_symlink() { + return Ok(Loaded::Namespace( + dirpath.read_dir()? + .filter_map(|entr| { + let ent = entr.ok()?; + let typ = ent.file_type().ok()?; + let path = ent.path(); + if typ.is_dir() || typ.is_symlink() { + Some(ent.file_name().to_string_lossy().into_owned()) + } else if typ.is_file() && path.extension()? == "orc" { + Some(path.file_stem()?.to_string_lossy().into_owned()) + } else { None } + }) + .collect() + )) + } + let orcfile = dirpath.with_extension("orc"); + if orcfile.is_file() { + read_to_string(orcfile).map(Loaded::Module).map_err(LoadingError::from) + } else if dirpath.exists() { + Err(LoadingError::UnknownNode(dirpath.to_string_lossy().into_owned())) + } else { Err(LoadingError::Missing(dirpath.to_string_lossy().into_owned())) } + } +} \ No newline at end of file diff --git a/src/project/loaded.rs b/src/project/loaded.rs new file mode 100644 index 0000000..7e5a00c --- /dev/null +++ b/src/project/loaded.rs @@ -0,0 +1,5 @@ +#[derive(Debug, Clone)] +pub enum Loaded { + Module(String), + Namespace(Vec), +} \ No newline at end of file diff --git a/src/project/mod.rs b/src/project/mod.rs index c40f4c3..7c83f2d 100644 --- a/src/project/mod.rs +++ b/src/project/mod.rs @@ -1,25 +1,23 @@ -use std::collections::HashMap; - -mod resolve_names; +mod rule_collector; +// pub use rule_collector::rule_collector; mod prefix; mod name_resolver; -mod expr; - -#[derive(Debug, Clone)] -pub struct Project { - pub modules: HashMap, Module>, -} +mod loaded; +pub use loaded::Loaded; +mod parse_error; +mod file_loader; +pub use file_loader::file_loader; #[derive(Debug, Clone)] pub struct Module { - pub substitutions: Vec, + pub rules: Vec, pub exports: Vec, pub references: Vec> } #[derive(Debug, Clone)] -pub struct Substitution { - pub source: expr::Expr, +pub struct Rule { + pub source: super::Expr, pub priority: f64, - pub target: expr::Expr + pub target: super::Expr } diff --git a/src/project/name_resolver.rs b/src/project/name_resolver.rs index f635590..9978872 100644 --- a/src/project/name_resolver.rs +++ b/src/project/name_resolver.rs @@ -3,7 +3,7 @@ use thiserror::Error; use crate::utils::Substack; -use super::expr::{Expr, Token}; +use crate::{Expr, Clause, Literal}; type ImportMap = HashMap>; @@ -50,9 +50,8 @@ where ) -> Result, ResolutionError> { if let Some(cached) = self.cache.get(symbol) { return cached.clone() } // The imports and path of the referenced file and the local name - let mut splitpoint = symbol.len(); let path = (self.get_modname)(symbol).ok_or(ResolutionError::NoModule(symbol.clone()))?; - let name = symbol.split_at(path.len()).1; + let (_, name) = symbol.split_at(path.len()); let imports = (self.get_imports)(&path)?; let result = if let Some(source) = imports.get(&name[0]) { let new_sym: Vec = source.iter().chain(name.iter()).cloned().collect(); @@ -79,41 +78,39 @@ where .next().transpose() } - fn process_token_rec(&mut self, tok: &Token) -> Result> { + fn process_clause_rec(&mut self, tok: &Clause) -> Result> { Ok(match tok { - Token::Literal(l) => Token::Literal(l.clone()), - Token::S(exv) => Token::S( + Clause::S(c, exv) => Clause::S(*c, exv.iter().map(|e| self.process_expression_rec(e)) .collect::, ResolutionError>>()? ), - Token::Lambda(name, typ, body) => Token::Lambda(name.clone(), - self.process_exprboxopt_rec(typ)?, + Clause::Lambda(name, typ, body) => Clause::Lambda(name.clone(), + self.process_exprv_rec(typ)?, self.process_exprv_rec(body)? ), - Token::Auto(name, typ, body) => Token::Auto(name.clone(), - self.process_exprboxopt_rec(typ)?, + Clause::Auto(name, typ, body) => Clause::Auto(name.clone(), + self.process_exprv_rec(typ)?, self.process_exprv_rec(body)? ), - Token::Name { qualified, local } => Token::Name { - local: local.clone(), - qualified: self.find_origin(qualified)? - } + Clause::Name(qualified) => Clause::Name(self.find_origin(qualified)?), + x => x.clone() }) } - fn process_expression_rec(&mut self, ex: &Expr) -> Result> { - Ok(Expr { - token: self.process_token_rec(&ex.token)?, - typ: self.process_exprboxopt_rec(&ex.typ)? - }) + fn process_expression_rec(&mut self, Expr(token, typ): &Expr) -> Result> { + Ok(Expr( + self.process_clause_rec(token)?, + self.process_exprboxopt_rec(typ)? + )) } pub fn find_origin(&mut self, symbol: &Vec) -> Result, ResolutionError> { self.find_origin_rec(symbol, &Substack::new(symbol)) } - pub fn process_token(&mut self, tok: &Token) -> Result> { - self.process_token_rec(tok) + #[allow(dead_code)] + pub fn process_clause(&mut self, clause: &Clause) -> Result> { + self.process_clause_rec(clause) } pub fn process_expression(&mut self, ex: &Expr) -> Result> { diff --git a/src/project/parse_error.rs b/src/project/parse_error.rs new file mode 100644 index 0000000..d5c1149 --- /dev/null +++ b/src/project/parse_error.rs @@ -0,0 +1,30 @@ +use chumsky::prelude::Simple; +use thiserror::Error; + +use super::name_resolver::ResolutionError; + +#[derive(Error, Debug, Clone)] +pub enum ParseError where ELoad: Clone { + #[error("Resolution cycle")] + ResolutionCycle, + #[error("File not found: {0}")] + Load(ELoad), + #[error("Failed to parse: {0:?}")] + Syntax(Vec>), + #[error("Not a module")] + None +} + +impl From>> for ParseError where T: Clone { + fn from(simp: Vec>) -> Self { Self::Syntax(simp) } +} + +impl From>> for ParseError where T: Clone { + fn from(res: ResolutionError>) -> Self { + match res { + ResolutionError::Cycle(_) => ParseError::ResolutionCycle, + ResolutionError::NoModule(_) => ParseError::None, + ResolutionError::Delegate(d) => d + } + } +} \ No newline at end of file diff --git a/src/project/prefix.rs b/src/project/prefix.rs index b9968a6..d16c747 100644 --- a/src/project/prefix.rs +++ b/src/project/prefix.rs @@ -1,61 +1,36 @@ -use std::collections::HashMap; - -use crate::parse; -use super::expr; +use crate::{Expr, Clause}; /// Replaces the first element of a name with the matching prefix from a prefix map -fn qualify( - name: &Vec, - prefixes: &HashMap> -) -> Option> { - let value = prefixes.iter().find(|(k, _)| &&name[0] == k)?.1; - Some(value.iter().chain(name.iter().skip(1)).cloned().collect()) -} -/// Produce a Token object for any value of parse::Expr other than Typed. +/// Produce a Token object for any value of Expr other than Typed. /// Called by [#prefix] which handles Typed. -fn prefix_token( - expr: &parse::Expr, +fn prefix_clause( + expr: &Clause, namespace: &Vec -) -> expr::Token { +) -> Clause { match expr { - parse::Expr::Typed(_, _) => panic!("This function should only be called by prefix!"), - parse::Expr::Char(c) => expr::Token::Literal(expr::Literal::Char(*c)), - parse::Expr::Int(i) => expr::Token::Literal(expr::Literal::Int(*i)), - parse::Expr::Num(n) => expr::Token::Literal(expr::Literal::Num(*n)), - parse::Expr::Str(s) => expr::Token::Literal(expr::Literal::Str(s.clone())), - parse::Expr::S(v) => expr::Token::S(v.iter().map(|e| prefix(e, namespace)).collect()), - parse::Expr::Auto(name, typ, body) => expr::Token::Auto( + Clause::S(c, v) => Clause::S(*c, v.iter().map(|e| prefix_expr(e, namespace)).collect()), + Clause::Auto(name, typ, body) => Clause::Auto( name.clone(), - typ.clone().map(|expr| Box::new(prefix(&expr, namespace))), - body.iter().map(|e| prefix(e, namespace)).collect(), + typ.iter().map(|e| prefix_expr(e, namespace)).collect(), + body.iter().map(|e| prefix_expr(e, namespace)).collect(), ), - parse::Expr::Lambda(name, typ, body) => expr::Token::Lambda( + Clause::Lambda(name, typ, body) => Clause::Lambda( name.clone(), - typ.clone().map(|expr| Box::new(prefix(&expr, namespace))), - body.iter().map(|e| prefix(e, namespace)).collect(), + typ.iter().map(|e| prefix_expr(e, namespace)).collect(), + body.iter().map(|e| prefix_expr(e, namespace)).collect(), ), - parse::Expr::Name(name) => expr::Token::Name { - qualified: namespace.iter().chain(name.iter()).cloned().collect(), - local: if name.len() == 1 { - Some(name[0].clone()) - } else { - None - }, - }, + Clause::Name(name) => Clause::Name ( + namespace.iter().chain(name.iter()).cloned().collect() + ), + x => x.clone() } } -/// Produce an Expr object for any value of parse::Expr -pub fn prefix(expr: &parse::Expr, namespace: &Vec) -> expr::Expr { - match expr { - parse::Expr::Typed(x, t) => expr::Expr { - typ: Some(Box::new(prefix(t, namespace))), - token: prefix_token(x, namespace), - }, - _ => expr::Expr { - typ: None, - token: prefix_token(expr, namespace), - }, - } +/// Produce an Expr object for any value of Expr +pub fn prefix_expr(Expr(clause, typ): &Expr, namespace: &Vec) -> Expr { + Expr( + prefix_clause(clause, namespace), + typ.as_ref().map(|e| Box::new(prefix_expr(e, namespace))) + ) } diff --git a/src/project/resolve_names.rs b/src/project/resolve_names.rs deleted file mode 100644 index 8dda6a0..0000000 --- a/src/project/resolve_names.rs +++ /dev/null @@ -1,221 +0,0 @@ -use std::cell::RefCell; -use std::collections::{HashMap, HashSet, VecDeque}; -use std::error; - -use chumsky::{prelude::Simple, Parser}; -use thiserror::Error; - -use crate::parse::{self, file_parser, FileEntry}; -use crate::utils::{Cache, as_modpath}; - -use super::expr; -use super::name_resolver::{NameResolver, ResolutionError}; -use super::prefix::prefix; - -#[derive(Debug, Clone)] -pub enum Loaded { - Module(String), - Namespace(Vec), -} - -#[derive(Error, Debug, Clone)] -pub enum ParseError where ELoad: Clone { - #[error("Resolution cycle")] - ResolutionCycle, - #[error("File not found: {0}")] - Load(ELoad), - #[error("Failed to parse: {0:?}")] - Syntax(Vec>), - #[error("Not a module")] - None -} - -impl From>> for ParseError where T: Clone { - fn from(simp: Vec>) -> Self { Self::Syntax(simp) } -} - -impl From>> for ParseError where T: Clone { - fn from(res: ResolutionError>) -> Self { - match res { - ResolutionError::Cycle(_) => ParseError::ResolutionCycle, - ResolutionError::NoModule(_) => ParseError::None, - ResolutionError::Delegate(d) => d - } - } -} - -type ImportMap = HashMap>; -type ParseResult = Result>; -type AnyParseResult = Result>>; - -pub fn load_project<'a, F, ELoad>( - mut load_mod: F, - prelude: &[&'a str], - entry: (Vec, expr::Expr), -) -> Result> -where - F: FnMut(&[&str]) -> Result, - ELoad: Clone -{ - let prelude_vec: Vec = prelude.iter().map(|s| s.to_string()).collect(); - let preparser = file_parser(prelude, &[]); - // Map paths to a namespace with name list (folder) or module with source text (file) - let loaded_cell = RefCell::new(Cache::new(|path: Vec| - -> ParseResult { - load_mod(&path.iter().map(|s| s.as_str()).collect::>()) - .map_err(ParseError::Load) - })); - let modname_cell = RefCell::new(Cache::new(|symbol: Vec| - -> AnyParseResult, ELoad> { - let mut local_loaded = loaded_cell.borrow_mut(); - let mut errv: Vec> = Vec::new(); - loop { - let (path, name) = symbol.split_at(symbol.len() - errv.len()); - let pathv = path.to_vec(); - match local_loaded.by_clone_fallible(&pathv) { - Ok(imports) => break Ok(pathv.clone()), - Err(err) => { - errv.push(err); - if symbol.len() == errv.len() { - break Err(errv); - } - } - } - } - })); - // Preliminarily parse a file, substitution patterns and imports are valid - let preparsed_cell = RefCell::new(Cache::new(|path: Vec| - -> ParseResult, ELoad> { - let mut loaded = loaded_cell.borrow_mut(); - let loaded = loaded.by_clone_fallible(&path)?; - if let Loaded::Module(source) = loaded { - Ok(preparser.parse(source.as_str())?) - } else {Err(ParseError::None)} - })); - // Collect all toplevel names exported from a given file - let exports_cell = RefCell::new(Cache::new(|path: Vec| - -> ParseResult, ELoad> { - let mut local_loaded = loaded_cell.borrow_mut(); - let loaded = local_loaded.by_clone_fallible(&path)?; - let mut local_preparsed = preparsed_cell.borrow_mut(); - if let Loaded::Namespace(names) = loaded { - return Ok(names.clone()); - } - let preparsed = local_preparsed.by_clone_fallible(&path)?; - Ok(parse::exported_names(&preparsed) - .into_iter() - .map(|n| n[0].clone()) - .collect()) - })); - // Collect all toplevel names imported by a given file - let imports_cell = RefCell::new(Cache::new(|path: Vec| - -> ParseResult { - let mut local_preparsed = preparsed_cell.borrow_mut(); - let entv = local_preparsed.by_clone_fallible(&path)?.clone(); - let import_entries = parse::imports(entv.iter()); - let mut imported_symbols: HashMap> = HashMap::new(); - for imp in import_entries { - let mut exports = exports_cell.borrow_mut(); - let export = exports.by_clone_fallible(&imp.path)?; - if let Some(ref name) = imp.name { - if export.contains(&name) { - imported_symbols.insert(name.clone(), imp.path.clone()); - } - } else { - for exp in export.clone() { - imported_symbols.insert(exp.clone(), imp.path.clone()); - } - } - } - Ok(imported_symbols) - })); - // Final parse, operators are correctly separated - let parsed_cell = RefCell::new(Cache::new(|path: Vec| - -> ParseResult, ELoad> { - let mut local_imports = imports_cell.borrow_mut(); - let imports = local_imports.by_clone_fallible(&path)?; - let mut local_loaded = loaded_cell.borrow_mut(); - let imported_ops: Vec<&str> = imports - .keys() - .chain(prelude_vec.iter()) - .map(|s| s.as_str()) - .filter(|s| parse::is_op(s)) - .collect(); - let parser = file_parser(prelude, &imported_ops); - if let Loaded::Module(source) = local_loaded.by_clone_fallible(&path)? { - Ok(parser.parse(source.as_str())?) - } else {Err(ParseError::None)} - })); - let mut name_resolver = NameResolver::new( - |path: &Vec| { modname_cell.borrow_mut().by_clone_fallible(path).cloned().ok() }, - |path: &Vec| { imports_cell.borrow_mut().by_clone_fallible(path).cloned() } - ); - // Turn parsed files into a bag of substitutions and a list of toplevel export names - let resolved_cell = RefCell::new(Cache::new(|path: Vec| - -> ParseResult { - let mut parsed = parsed_cell.borrow_mut(); - let parsed_entries = parsed.by_clone_fallible(&path)?; - let subs: Vec = parsed_entries - .iter() - .filter_map(|ent| { - if let FileEntry::Export(s) | FileEntry::Substitution(s) = ent { - Some(super::Substitution { - source: prefix(&s.source, &path), - target: prefix(&s.target, &path), - priority: s.priority, - }) - } else { None } - }) - .map(|sub| Ok(super::Substitution { - source: name_resolver.process_expression(&sub.source)?, - target: name_resolver.process_expression(&sub.target)?, - ..sub - })) - .collect::, ELoad>>()?; - let module = super::Module { - substitutions: subs, - exports: exports_cell - .borrow_mut() - .by_clone_fallible(&path)? - .clone(), - references: imports_cell - .borrow_mut() - .by_clone_fallible(&path)? - .values() - .filter_map(|imps| modname_cell.borrow_mut().by_clone_fallible(imps).ok().cloned()) - .collect() - }; - Ok(module) - })); - let all_subs_cell = RefCell::new(Cache::new(|path: Vec| - -> ParseResult, ELoad> { - let mut processed: HashSet> = HashSet::new(); - let mut subs: Vec = Vec::new(); - let mut pending: VecDeque> = VecDeque::new(); - while let Some(el) = pending.pop_front() { - let mut local_resolved = resolved_cell.borrow_mut(); - let resolved = local_resolved.by_clone_fallible(&el)?; - processed.insert(el.clone()); - pending.extend( - resolved.references.iter() - .filter(|&v| !processed.contains(v)) - .cloned() - ); - subs.extend( - resolved.substitutions.iter().cloned() - ) - }; - Ok(subs) - })); - // let substitutions = - // let main = preparsed.get(&[entry]); - // for imp in parse::imports(main) { - // if !modules.contains_key(&imp.path) { - // if modules[&imp.path] - // } - // } - // let mut project = super::Project { - // modules: HashMap::new() - // }; - todo!("Finish this function") -} diff --git a/src/project/rule_collector.rs b/src/project/rule_collector.rs new file mode 100644 index 0000000..1bc4361 --- /dev/null +++ b/src/project/rule_collector.rs @@ -0,0 +1,193 @@ +// use std::collections::{HashMap, HashSet, VecDeque}; +// use std::fmt::Debug; +// use std::rc::Rc; + +// use chumsky::Parser; + +// use crate::parse::{self, line_parser, FileEntry}; +// use crate::utils::Cache; + + +// use super::name_resolver::NameResolver; +// use super::parse_error::ParseError; +// use super::prefix::prefix_expr; +// use super::loaded::Loaded; + +// type ParseResult = Result>; + +// pub fn rule_collector( +// mut load_mod: F, +// prelude: Vec +// // ) -> impl FnMut(Vec) -> Result<&'a Vec, ParseError> + 'a +// ) -> Cache, Result, ParseError>> +// where +// F: FnMut(Vec) -> Result, +// ELoad: Clone + Debug +// { +// // Map paths to a namespace with name list (folder) or module with source text (file) +// let loaded = Rc::new(Cache::new(move |path: Vec| +// -> ParseResult { +// load_mod(path).map_err(ParseError::Load) +// })); +// // Map names to the longest prefix that points to a valid module +// let modname = Rc::new(Cache::new({ +// let loaded = Rc::clone(&loaded); +// move |symbol: Vec| -> Result, Vec>> { +// let mut errv: Vec> = Vec::new(); +// let reg_err = |e, errv: &mut Vec>| { +// errv.push(e); +// if symbol.len() == errv.len() { Err(errv.clone()) } +// else { Ok(()) } +// }; +// loop { +// let (path, _) = symbol.split_at(symbol.len() - errv.len()); +// let pathv = path.to_vec(); +// match loaded.try_find(&pathv) { +// Ok(imports) => match imports.as_ref() { +// Loaded::Module(_) => break Ok(pathv.clone()), +// _ => reg_err(ParseError::None, &mut errv)? +// }, +// Err(err) => reg_err(err, &mut errv)? +// } +// } +// } +// })); +// // Preliminarily parse a file, substitution rules and imports are valid +// let preparsed = Rc::new(Cache::new({ +// let preparser = line_parser(&prelude, &prelude); +// let loaded = Rc::clone(&loaded); +// move |path: Vec| -> ParseResult, ELoad> { +// let loaded = loaded.try_find(&path)?; +// if let Loaded::Module(source) = loaded.as_ref() { +// Ok(preparser.parse(source.as_str())?) +// } else {Err(ParseError::None)} +// } +// })); +// // Collect all toplevel names exported from a given file +// let exports = Rc::new(Cache::new({ +// let loaded = Rc::clone(&loaded); +// let preparsed = Rc::clone(&preparsed); +// move |path: Vec| -> ParseResult, ELoad> { +// let loaded = loaded.try_find(&path)?; +// if let Loaded::Namespace(names) = loaded.as_ref() { +// return Ok(names.clone()); +// } +// let preparsed = preparsed.try_find(&path)?; +// Ok(parse::exported_names(&preparsed) +// .into_iter() +// .map(|n| n[0].clone()) +// .collect()) +// } +// })); +// // Collect all toplevel names imported by a given file +// let imports = Rc::new(Cache::new({ +// let preparsed = Rc::clone(&preparsed); +// let exports = Rc::clone(&exports); +// move |path: Vec| -> ParseResult>, ELoad> { +// let entv = preparsed.try_find(&path)?.clone(); +// let import_entries = parse::imports(entv.iter()); +// let mut imported_symbols: HashMap> = HashMap::new(); +// for imp in import_entries { +// let export = exports.try_find(&imp.path)?; +// if let Some(ref name) = imp.name { +// if export.contains(&name) { +// imported_symbols.insert(name.clone(), imp.path.clone()); +// } +// } else { +// for exp in export.as_ref() { +// imported_symbols.insert(exp.clone(), imp.path.clone()); +// } +// } +// } +// Ok(imported_symbols) +// } +// })); +// // Final parse, operators are correctly separated +// let parsed = Rc::new(Cache::new({ +// let imports = Rc::clone(&imports); +// let loaded = Rc::clone(&loaded); +// move |path: Vec| -> ParseResult, ELoad> { +// let imported_ops: Vec = +// imports.try_find(&path)? +// .keys() +// .chain(prelude.iter()) +// .filter(|s| parse::is_op(s)) +// .cloned() +// .collect(); +// let parser = file_parser(&prelude, &imported_ops); +// if let Loaded::Module(source) = loaded.try_find(&path)?.as_ref() { +// Ok(parser.parse(source.as_str())?) +// } else { Err(ParseError::None) } +// } +// })); +// let mut name_resolver = NameResolver::new({ +// let modname = Rc::clone(&modname); +// move |path| { +// Some(modname.try_find(path).ok()?.as_ref().clone()) +// } +// }, { +// let imports = Rc::clone(&imports); +// move |path| { +// imports.try_find(path).map(|f| f.as_ref().clone()) +// } +// }); +// // Turn parsed files into a bag of rules and a list of toplevel export names +// let resolved = Rc::new(Cache::new({ +// let parsed = Rc::clone(&parsed); +// let exports = Rc::clone(&exports); +// let imports = Rc::clone(&imports); +// let modname = Rc::clone(&modname); +// move |path: Vec| -> ParseResult { +// let module = super::Module { +// rules: parsed.try_find(&path)? +// .iter() +// .filter_map(|ent| { +// if let FileEntry::Export(s) | FileEntry::Rule(s) = ent { +// Some(super::Rule { +// source: prefix_expr(&s.source, &path), +// target: prefix_expr(&s.target, &path), +// priority: s.priority, +// }) +// } else { None } +// }) +// .map(|rule| Ok(super::Rule { +// source: name_resolver.process_expression(&rule.source)?, +// target: name_resolver.process_expression(&rule.target)?, +// ..rule +// })) +// .collect::, ELoad>>()?, +// exports: exports.try_find(&path)?.as_ref().clone(), +// references: imports.try_find(&path)? +// .values() +// .filter_map(|imps| { +// modname.try_find(&imps).ok().map(|r| r.as_ref().clone()) +// }) +// .collect() +// }; +// Ok(module) +// } +// })); +// let all_rules = Cache::new({ +// let resolved = Rc::clone(&resolved); +// move |path: Vec| -> ParseResult, ELoad> { +// let mut processed: HashSet> = HashSet::new(); +// let mut rules: Vec = Vec::new(); +// let mut pending: VecDeque> = VecDeque::new(); +// pending.push_back(path); +// while let Some(el) = pending.pop_front() { +// let resolved = resolved.try_find(&el)?; +// processed.insert(el.clone()); +// pending.extend( +// resolved.references.iter() +// .filter(|&v| !processed.contains(v)) +// .cloned() +// ); +// rules.extend( +// resolved.rules.iter().cloned() +// ) +// }; +// Ok(rules) +// } +// }); +// return all_rules; +// } diff --git a/src/utils/cache.rs b/src/utils/cache.rs index befefd6..3c3f68a 100644 --- a/src/utils/cache.rs +++ b/src/utils/cache.rs @@ -1,71 +1,68 @@ -use std::hash::Hash; +use std::{hash::Hash, cell::RefCell}; use hashbrown::HashMap; +use mappable_rc::Mrc; /// Cache the return values of an effectless closure in a hashmap /// Inspired by the closure_cacher crate. -pub struct Cache { - store: HashMap, - closure: F +pub struct Cache where O: Clone { + store: RefCell>>, + closure: RefCell O + 'static>> } -impl Cache where - I: Eq + Hash, - F: FnMut(I) -> O +impl Cache where + I: Eq + Hash + Clone, + O: Clone { - pub fn new(closure: F) -> Self { - Self { store: HashMap::new(), closure } - } - /// Produce and cache a result by copying I if necessary - pub fn by_copy(&mut self, i: &I) -> &O where I: Copy { - let closure = &mut self.closure; - self.store.raw_entry_mut().from_key(i) - .or_insert_with(|| (*i, closure(*i))).1 + pub fn new(closure: F) -> Self where F: FnMut(I) -> O { + Self { + store: RefCell::new(HashMap::new()), + closure: RefCell::new(Box::new(closure)) + } } + /// Produce and cache a result by cloning I if necessary - pub fn by_clone(&mut self, i: &I) -> &O where I: Clone { - let closure = &mut self.closure; - self.store.raw_entry_mut().from_key(i) - .or_insert_with(|| (i.clone(), closure(i.clone()))).1 + pub fn find(&self, i: &I) -> Mrc { + let mut closure = self.closure.borrow_mut(); + let mut store = self.store.borrow_mut(); + Mrc::clone(store.raw_entry_mut().from_key(i) + .or_insert_with(|| (i.clone(), Mrc::new(closure(i.clone())))).1) } + #[allow(dead_code)] /// Return the result if it has already been computed - pub fn known(&self, i: &I) -> Option<&O> { - self.store.get(i) + pub fn known(&self, i: &I) -> Option> { + let store = self.store.borrow(); + store.get(i).map(Mrc::clone) } + #[allow(dead_code)] /// Forget the output for the given input - pub fn drop(&mut self, i: &I) -> bool { - self.store.remove(i).is_some() + pub fn drop(&self, i: &I) -> bool { + self.store.borrow_mut().remove(i).is_some() } } -impl Cache, F> where - I: Eq + Hash, - E: Clone, - F: FnMut(I) -> Result +impl Cache> where + I: Eq + Hash + Clone, + O: Clone, + E: Clone { - /// Sink the ref from a Result into the Ok value, such that copying only occurs on the sad path - /// but the return value can be short-circuited - pub fn by_copy_fallible(&mut self, i: &I) -> Result<&O, E> where I: Copy { - self.by_clone(i).as_ref().map_err(|e| e.clone()) - } /// Sink the ref from a Result into the Ok value, such that cloning only occurs on the sad path /// but the return value can be short-circuited - pub fn by_clone_fallible(&mut self, i: &I) -> Result<&O, E> where I: Clone { - self.by_clone(i).as_ref().map_err(|e| e.clone()) + pub fn try_find(&self, i: &I) -> Result, E> { + let ent = self.find(i); + Mrc::try_map(ent, |t| t.as_ref().ok()) + .map_err(|res| Result::as_ref(&res).err().unwrap().to_owned()) } } -impl Cache, F> where - I: Eq + Hash, - F: FnMut(I) -> Option +impl Cache> where + I: Eq + Hash + Clone, + O: Clone { + #[allow(dead_code)] /// Sink the ref from an Option into the Some value such that the return value can be /// short-circuited - pub fn by_copy_fallible(&mut self, i: &I) -> Option<&O> where I: Copy { - self.by_copy(i).as_ref() - } - /// Sink the ref from an Option into the Some value such that the return value can be - /// short-circuited - pub fn by_clone_fallible(&mut self, i: &I) -> Option<&O> where I: Clone { - self.by_clone(i).as_ref() + pub fn try_find(&self, i: &I) -> Option> where I: Clone { + let ent = self.find(i); + Mrc::try_map(ent, |o| o.as_ref()).ok() } } diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 4f15ccf..3f2c294 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,8 +1,8 @@ mod cache; mod substack; +mod result_iter_collect; pub use cache::Cache; pub use substack::Substack; +pub use result_iter_collect::result_iter_collect; -pub fn as_modpath(path: &Vec) -> String { - path.join("::") -} \ No newline at end of file +pub type BoxedIter<'a, T> = Box + 'a>; \ No newline at end of file diff --git a/src/utils/result_iter_collect.rs b/src/utils/result_iter_collect.rs new file mode 100644 index 0000000..92c830a --- /dev/null +++ b/src/utils/result_iter_collect.rs @@ -0,0 +1,19 @@ +pub fn result_iter_collect(i: &mut dyn Iterator>) +-> (Vec>, Vec>) { + i.fold((Vec::new(), Vec::new()), |(mut succ, mut err), mut next| { + match next { + Ok(res) => succ.push(Some(res)), + Err(e) => err.push(Some(e)) + } + (succ, err) + }) +} + +pub fn recoverable_iter_collect(i: &mut dyn Iterator, Vec)>) +-> (Vec>, Vec) { + i.fold((Vec::new(), Vec::new()), |(mut succ, mut err), (res, mut errv)| { + succ.push(res); + err.append(&mut errv); + (succ, err) + }) +} \ No newline at end of file diff --git a/src/utils/substack.rs b/src/utils/substack.rs index f2ffc76..286b98a 100644 --- a/src/utils/substack.rs +++ b/src/utils/substack.rs @@ -9,7 +9,9 @@ pub struct Substack<'a, T> { } impl<'a, T> Substack<'a, T> { + #[allow(dead_code)] pub fn item(&self) -> &T { &self.item } + #[allow(dead_code)] pub fn prev(&self) -> Option<&'a Substack<'a, T>> { self.prev } pub fn new(item: T) -> Self {