redid the parser, patched up the project too.

This commit is contained in:
2022-07-03 18:01:40 +02:00
parent 6fb4b581b1
commit 2b55fae10d
30 changed files with 967 additions and 570 deletions

48
Cargo.lock generated
View File

@@ -22,6 +22,12 @@ dependencies = [
"version_check",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "cfg-if"
version = "1.0.0"
@@ -76,6 +82,12 @@ dependencies = [
"syn",
]
[[package]]
name = "either"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "getrandom"
version = "0.2.6"
@@ -96,6 +108,15 @@ dependencies = [
"ahash 0.7.6",
]
[[package]]
name = "itertools"
version = "0.10.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3"
dependencies = [
"either",
]
[[package]]
name = "lazy_static"
version = "1.4.0"
@@ -108,6 +129,21 @@ version = "0.2.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
[[package]]
name = "mappable-rc"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b65e7f462b4fbfe1a3c857747c9d027dd55faffaeffbca68f70d0becfe7e93c5"
[[package]]
name = "num-traits"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd"
dependencies = [
"autocfg",
]
[[package]]
name = "once_cell"
version = "1.12.0"
@@ -121,9 +157,21 @@ dependencies = [
"chumsky",
"derivative",
"hashbrown",
"itertools",
"mappable-rc",
"ordered-float",
"thiserror",
]
[[package]]
name = "ordered-float"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96bcbab4bfea7a59c2c0fe47211a1ac4e3e96bea6eb446d704f310bc5c732ae2"
dependencies = [
"num-traits",
]
[[package]]
name = "proc-macro-hack"
version = "0.5.19"

View File

@@ -10,3 +10,6 @@ thiserror = "1.0"
chumsky = "0.8"
derivative = "2.2"
hashbrown = "0.12"
mappable-rc = "0.1"
ordered-float = "3.0"
itertools = "0.10"

View File

@@ -252,7 +252,7 @@ TODO: carriage example
Files are the smallest unit of namespacing, automatically grouped into
folders and forming a tree the leaves of which are the actual symbols. An
exported symbol is a name referenced in an exported substitution pattern
exported symbol is a name referenced in an exported substitution rule
or assigned to an exported function. Imported symbols are considered
identical to the same symbol directly imported from the same module for
the purposes of substitution.

View File

@@ -1,18 +1,18 @@
import std::io::(println, out) -- imports
-- single word substitution (alias)
greet = \name. printf out "Hello {}!\n" [name]
-- single word rule (alias)
greet =1=> (\name. printf out "Hello {}!\n" [name])
-- multi-word exported substitution
export (...$pre ;) $a ...$post) =200=> (...$pre (greet $a) ...$post)
-- multi-word exported rule
export ;> $a =200=> (greet $a)
-- single-word exported substitution
-- single-word exported rule
export main = (
print "What is your name? >>
print "What is your name?" >>
readln >>= \name.
greet name
)
-- The broadest trait definition in existence
Foo = Bar Baz
default anyFoo = @T. @impl:(T (Bar Baz)). impl:(T Foo)
Foo = (Bar Baz)
-- default anyFoo = @T. @impl:(T (Bar Baz)). impl:(T Foo)

View File

@@ -16,4 +16,3 @@ export main = (
greet name
)
```

52
src/expression.rs Normal file
View File

@@ -0,0 +1,52 @@
use ordered_float::NotNan;
use std::{fmt::Debug};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Literal {
Num(NotNan<f64>),
Int(u64),
Char(char),
Str(String),
}
/// An S-expression with a type
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Expr(pub Clause, pub Option<Box<Expr>>);
impl Expr {
/// Replace all occurences of a name in the tree with a parameter, to bypass name resolution
pub fn bind_parameter(&mut self, name: &str) {
self.0.bind_parameter(name);
if let Some(typ) = &mut self.1 {
typ.bind_parameter(name);
}
}
}
/// An S-expression as read from a source file
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Clause {
Literal(Literal),
Name(Vec<String>),
S(char, Vec<Expr>),
Lambda(String, Vec<Expr>, Vec<Expr>),
Auto(Option<String>, Vec<Expr>, Vec<Expr>),
Parameter(String)
}
impl Clause {
/// Replace all occurences of a name in the tree with a parameter, to bypass name resolution
pub fn bind_parameter(&mut self, name: &str) {
match self {
Clause::Name(n) => if n.len() == 1 && n[0] == name {
*self = Clause::Parameter(name.to_string())
}
Clause::S(_, exprv) => for expr in exprv { expr.bind_parameter(name) }
Clause::Lambda(_, typ, body) | Clause::Auto(_, typ, body) => {
for expr in typ { expr.bind_parameter(name) }
for expr in body { expr.bind_parameter(name) }
}
_ => ()
}
}
}

View File

@@ -1,16 +1,63 @@
use std::io::{self, Read};
use core::ops::Range;
use std::{env::current_dir, fs::read_to_string};
use std::io;
use chumsky::{Parser, prelude::*};
use chumsky::prelude::end;
use chumsky::{Parser, Stream};
use parse::{LexerEntry, FileEntry};
// use project::{rule_collector, file_loader, Loaded};
mod parse;
mod project;
mod utils;
mod expression;
pub use expression::*;
fn literal(orig: &[&str]) -> Vec<String> {
orig.iter().map(|&s| s.to_owned()).collect()
}
static PRELUDE:&str = r#"
export ... $name =1000=> (match_seqence $name)
export ] =1000=> conslist_carriage(none)
export , $name conslist_carriage($tail) =1000=> conslist_carriage((some (cons $name $tail)))
export [ $name conslist_carriage($tail) =1000=> (some (cons $name $tail))
export (match_sequence $lhs) >> (match_sequence $rhs) =100=> (bind ($lhs) (\_. $rhs))
export (match_sequence $lhs) >>= (match_sequence $rhs) =100=> (bind ($lhs) ($rhs))
"#;
fn main() {
let mut input = String::new();
let mut stdin = io::stdin();
stdin.read_to_string(&mut input).unwrap();
let ops: Vec<&str> = vec!["$", "."];
let output = parse::expression_parser(&ops).then_ignore(end()).parse(input);
println!("\nParsed:\n{:?}", output);
// let mut input = String::new();
// let mut stdin = io::stdin();
// stdin.read_to_string(&mut input).unwrap();
let ops: Vec<&str> = vec!["...", ">>", ">>=", "[", "]", ",", "$"];
let data = read_to_string("./main.orc").unwrap();
let lexed = parse::lexer(&ops).parse(data).unwrap();
println!("Lexed: {:?}", lexed);
let parsr = parse::line_parser().then_ignore(end());
// match parsr.parse(data) {
// Ok(output) => println!("\nParsed:\n{:?}", output),
// Err(e) => println!("\nErrored:\n{:?}", e)
// }
let lines = lexed.iter().filter_map(|v| {
let parse::LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone();
let tuples = v.into_iter().map(|LexerEntry(l, r)| (l.clone(), r.clone()));
Some(parsr.parse_recovery_verbose(Stream::from_iter(end..end+1, tuples)))
}).collect::<Vec<_>>();
for (id, (out, errs)) in lines.into_iter().enumerate() {
println!("Parsing line {}", id);
if let Some(output) = out { println!("Parsed:\n{:?}", output) }
else { println!("Failed to produce output")}
if errs.len() > 0 { println!("Errored:\n{:?}", errs)}
}
// let output = parse::file_parser(&ops, &ops).parse(data).unwrap();
// let cwd = current_dir().unwrap();
// let collect_rules = rule_collector(move |n| {
// if n == vec!["prelude"] { Ok(Loaded::Module(PRELUDE.to_string())) }
// else { file_loader(cwd.clone())(n) }
// }, literal(&["...", ">>", ">>=", "[", "]", ","]));
// let rules = collect_rules.try_find(&literal(&["main"])).unwrap();
// for rule in rules.iter() {
// println!("{:?} ={}=> {:?}", rule.source, rule.priority, rule.target)
// }
}

13
src/parse/comment.rs Normal file
View File

@@ -0,0 +1,13 @@
pub use chumsky::{self, prelude::*, Parser};
/// Parses Lua-style comments
pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
choice((
just("--[").ignore_then(take_until(
just("]--").ignored()
)),
just("--").ignore_then(take_until(
just("\n").rewind().ignored().or(end())
))
)).map(|(vc, ())| vc).collect().labelled("comment")
}

26
src/parse/enum_parser.rs Normal file
View File

@@ -0,0 +1,26 @@
#[macro_export]
macro_rules! enum_parser {
($p:path | $m:tt) => {
{
::chumsky::prelude::filter_map(|s, l| {
if let $p(x) = l { Ok(x) }
else { Err(::chumsky::prelude::Simple::custom(s, $m))}
})
}
};
($p:path >> $q:path; $i:ident) => {
{
use $p as srcpath;
use $q as tgtpath;
enum_parser!(srcpath::$i | (concat!("Expected ", stringify!($i)))).map(tgtpath::$i)
}
};
($p:path >> $q:path; $($i:ident),+) => {
{
::chumsky::prelude::choice((
$( enum_parser!($p >> $q; $i) ),+
))
}
};
($p:path) => { enum_parser!($p | (concat!("Expected ", stringify!($p)))) };
}

View File

@@ -1,86 +1,90 @@
use std::{fmt::Debug};
use chumsky::{self, prelude::*, Parser};
use crate::{Clause, Expr, Literal, enum_parser};
use super::string;
use super::number;
use super::misc;
use super::name;
use super::{lexer::Lexeme};
/// An S-expression as read from a source file
#[derive(Debug, Clone)]
pub enum Expr {
Num(f64),
Int(u64),
Char(char),
Str(String),
Name(Vec<String>),
S(Vec<Expr>),
Lambda(String, Option<Box<Expr>>, Vec<Expr>),
Auto(Option<String>, Option<Box<Expr>>, Vec<Expr>),
Typed(Box<Expr>, Box<Expr>)
fn sexpr_parser<P>(
expr: P
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
Lexeme::paren_parser(expr.repeated()).map(|(del, b)| Clause::S(del, b))
}
/// Parse a type annotation
fn typed_parser<'a>(
expr: Recursive<'a, char, Expr, Simple<char>>
) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
just(':').ignore_then(expr)
fn lambda_parser<P>(
expr: P
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
just(Lexeme::BS)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(enum_parser!(Lexeme::Name))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(
just(Lexeme::Type)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(expr.clone().repeated())
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.or_not().map(Option::unwrap_or_default)
)
.then_ignore(just(Lexeme::name(".")))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(expr.repeated().at_least(1))
.map(|((name, typ), mut body): ((String, Vec<Expr>), Vec<Expr>)| {
for ent in &mut body { ent.bind_parameter(&name) };
Clause::Lambda(name, typ, body)
})
}
fn auto_parser<P>(
expr: P
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
just(Lexeme::At)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(enum_parser!(Lexeme::Name).or_not())
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(
just(Lexeme::Type)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(expr.clone().repeated())
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
)
.then_ignore(just(Lexeme::name(".")))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(expr.repeated().at_least(1))
.try_map(|((name, typ), mut body), s| if name == None && typ.is_empty() {
Err(Simple::custom(s, "Auto without name or type has no effect"))
} else {
if let Some(n) = &name {
for ent in &mut body { ent.bind_parameter(n) }
}
Ok(Clause::Auto(name, typ, body))
})
}
fn name_parser() -> impl Parser<Lexeme, Vec<String>, Error = Simple<Lexeme>> + Clone {
enum_parser!(Lexeme::Name).separated_by(
enum_parser!(Lexeme::Comment).repeated()
.then(just(Lexeme::NS))
.then(enum_parser!(Lexeme::Comment).repeated())
).at_least(1)
}
/// Parse an expression without a type annotation
fn untyped_xpr_parser<'a>(
expr: Recursive<'a, char, Expr, Simple<char>>,
ops: &[&'a str]
) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
// basic S-expression rule
let sexpr = expr.clone()
.repeated()
.delimited_by(just('('), just(')'))
.map(Expr::S);
// Blocks
// can and therefore do match everything up to the closing paren
// \name. body
// \name:type. body
let lambda = just('\\')
.ignore_then(text::ident())
.then(typed_parser(expr.clone()).or_not())
.then_ignore(just('.'))
.then(expr.clone().repeated().at_least(1))
.map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body));
// @name. body
// @name:type. body
// @:type. body
let auto = just('@')
.ignore_then(text::ident().or_not())
.then(typed_parser(expr.clone()).or_not())
.then_ignore(just('.'))
.then(expr.clone().repeated().at_least(1))
.map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body));
choice((
number::int_parser().map(Expr::Int), // all ints are valid floats so it takes precedence
number::float_parser().map(Expr::Num),
string::char_parser().map(Expr::Char),
string::str_parser().map(Expr::Str),
name::name_parser(ops).map(Expr::Name), // includes namespacing
sexpr,
lambda,
auto
)).padded()
}
/// Parse any expression with a type annotation, surrounded by comments
pub fn expression_parser<'a>(ops: &[&'a str]) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
// This approach to parsing comments is ugly and error-prone,
// but I don't have a lot of other ideas
return recursive(|expr| {
return misc::comment_parser().or_not().ignore_then(
untyped_xpr_parser(expr.clone(), &ops)
.then(typed_parser(expr).or_not())
.map(|(val, t)| match t {
Some(typ) => Expr::Typed(Box::new(val), Box::new(typ)),
None => val
})
).then_ignore(misc::comment_parser().or_not())
pub fn xpr_parser() -> impl Parser<Lexeme, Expr, Error = Simple<Lexeme>> {
recursive(|expr| {
let clause =
enum_parser!(Lexeme::Comment).repeated()
.ignore_then(choice((
enum_parser!(Lexeme >> Literal; Int, Num, Char, Str).map(Clause::Literal),
name_parser().map(Clause::Name),
sexpr_parser(expr.clone()),
lambda_parser(expr.clone()),
auto_parser(expr.clone())
))).then_ignore(enum_parser!(Lexeme::Comment).repeated());
clause.clone().then(
just(Lexeme::Type)
.ignore_then(expr.clone()).or_not()
)
.map(|(val, typ)| Expr(val, typ.map(Box::new)))
})
}

View File

@@ -1,7 +1,9 @@
use std::iter;
use chumsky::{Parser, prelude::*};
use super::name;
use crate::{enum_parser, utils::BoxedIter};
use super::lexer::Lexeme;
#[derive(Debug, Clone)]
pub struct Import {
@@ -9,15 +11,10 @@ pub struct Import {
pub name: Option<String>
}
pub type BoxedStrIter = Box<dyn Iterator<Item = String>>;
pub type BoxedStrIterIter = Box<dyn Iterator<Item = BoxedStrIter>>;
/// initialize a Box<dyn Iterator<Item = Box<dyn Iterator<Item = String>>>>
/// with a single element.
fn init_table(name: String) -> BoxedStrIterIter {
// I'm not confident at all that this is a good approach.
Box::new(iter::once(Box::new(iter::once(name)) as BoxedStrIter))
/// initialize a BoxedIter<BoxedIter<String>> with a single element.
fn init_table(name: String) -> BoxedIter<'static, BoxedIter<'static, String>> {
// I'm not at all confident that this is a good approach.
Box::new(iter::once(Box::new(iter::once(name)) as BoxedIter<String>))
}
/// Parse an import command
@@ -25,29 +22,38 @@ fn init_table(name: String) -> BoxedStrIterIter {
/// and the delimiters are plain parentheses. Namespaces should preferably contain
/// crossplatform filename-legal characters but the symbols are explicitly allowed
/// to go wild. There's a blacklist in [name]
pub fn import_parser() -> impl Parser<char, Vec<Import>, Error = Simple<char>> {
pub fn import_parser() -> impl Parser<Lexeme, Vec<Import>, Error = Simple<Lexeme>> {
// TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad.
recursive(|expr: Recursive<char, BoxedStrIterIter, Simple<char>>| {
name::modname_parser()
.padded()
.then_ignore(just("::"))
.repeated()
recursive(|expr: Recursive<Lexeme, BoxedIter<BoxedIter<String>>, Simple<Lexeme>>| {
enum_parser!(Lexeme::Name)
.separated_by(just(Lexeme::NS))
.then(
just(Lexeme::NS)
.ignore_then(
choice((
expr.clone()
.separated_by(just(','))
.delimited_by(just('('), just(')'))
.map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter),
.separated_by(just(Lexeme::name(",")))
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
.map(|v| Box::new(v.into_iter().flatten()) as BoxedIter<BoxedIter<String>>)
.labelled("import group"),
// Each expr returns a list of imports, flatten those into a common list
just("*").map(|s| init_table(s.to_string())), // Just a *, wrapped
name::modname_parser().map(init_table) // Just a name, wrapped
)).padded()
).map(|(pre, post)| {
just(Lexeme::name("*")).map(|_| init_table("*".to_string()))
.labelled("wildcard import"), // Just a *, wrapped
enum_parser!(Lexeme::Name).map(init_table)
.labelled("import terminal") // Just a name, wrapped
))
).or_not()
)
.map(|(name, opt_post): (Vec<String>, Option<BoxedIter<BoxedIter<String>>>)| -> BoxedIter<BoxedIter<String>> {
if let Some(post) = opt_post {
Box::new(post.map(move |el| {
Box::new(pre.clone().into_iter().chain(el)) as BoxedStrIter
})) as BoxedStrIterIter
Box::new(name.clone().into_iter().chain(el)) as BoxedIter<String>
})) as BoxedIter<BoxedIter<String>>
} else {
Box::new(iter::once(Box::new(name.into_iter()) as BoxedIter<String>))
}
})
}).padded().map(|paths| {
}).map(|paths| {
paths.filter_map(|namespaces| {
let mut path: Vec<String> = namespaces.collect();
match path.pop()?.as_str() {
@@ -55,5 +61,5 @@ pub fn import_parser() -> impl Parser<char, Vec<Import>, Error = Simple<char>> {
name => Some(Import { path, name: Some(name.to_owned()) })
}
}).collect()
})
}).labelled("import")
}

134
src/parse/lexer.rs Normal file
View File

@@ -0,0 +1,134 @@
use std::{ops::Range, iter};
use ordered_float::NotNan;
use chumsky::{Parser, prelude::*, text::whitespace};
use std::fmt::Debug;
use crate::utils::BoxedIter;
use super::{number, string, name, comment};
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct Entry(pub Lexeme, pub Range<usize>);
impl Debug for Entry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.0)
// f.debug_tuple("Entry").field(&self.0).field(&self.1).finish()
}
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub enum Lexeme {
Num(NotNan<f64>),
Int(u64),
Char(char),
Str(String),
Name(String),
Rule(NotNan<f64>),
NS, // namespace separator
LP(char),
RP(char),
BS, // Backslash
At,
Type, // type operator
Comment(String)
}
impl Debug for Lexeme {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Num(n) => write!(f, "{}", n),
Self::Int(i) => write!(f, "{}", i),
Self::Char(c) => write!(f, "{:?}", c),
Self::Str(s) => write!(f, "{:?}", s),
Self::Name(name) => write!(f, "{}", name),
Self::Rule(prio) => write!(f, "={}=>", prio),
Self::NS => write!(f, "::"),
Self::LP(l) => write!(f, "{}", l),
Self::RP(l) => match l {
'(' => write!(f, ")"),
'[' => write!(f, "]"),
'{' => write!(f, "}}"),
_ => f.debug_tuple("RP").field(l).finish()
},
Self::BS => write!(f, "\\"),
Self::At => write!(f, "@"),
Self::Type => write!(f, ":"),
Self::Comment(text) => write!(f, "--[{}]--", text),
}
}
}
impl Lexeme {
pub fn name<T: ToString>(n: T) -> Self {
Lexeme::Name(n.to_string())
}
pub fn paren_parser<T, P>(
expr: P
) -> impl Parser<Lexeme, (char, T), Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, T, Error = Simple<Lexeme>> + Clone {
choice((
expr.clone().delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
.map(|t| ('(', t)),
expr.clone().delimited_by(just(Lexeme::LP('[')), just(Lexeme::RP('[')))
.map(|t| ('[', t)),
expr.delimited_by(just(Lexeme::LP('{')), just(Lexeme::RP('{')))
.map(|t| ('{', t)),
))
}
}
fn rule_parser() -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
just('=').ignore_then(
choice((
none_of("-0123456789").rewind().to(NotNan::new(0f64).unwrap()),
number::float_parser().then_ignore(just("=>"))
)).map_err_with_span(|err, span| {
panic!("Something's up! {:?} {}", span, err)
})
)
}
type LexSubres<'a> = BoxedIter<'a, Entry>;
fn paren_parser<'a>(
expr: Recursive<'a, char, LexSubres<'a>, Simple<char>>,
lp: char, rp: char
) -> impl Parser<char, LexSubres<'a>, Error=Simple<char>> + 'a {
expr.padded().repeated()
.map(|x| Box::new(x.into_iter().flatten()) as LexSubres)
.delimited_by(just(lp), just(rp)).map_with_span(move |b, s| {
Box::new(
iter::once(Entry(Lexeme::LP(lp), s.start..s.start+1))
.chain(b)
.chain(iter::once(Entry(Lexeme::RP(lp), s.end-1..s.end)))
) as LexSubres
})
}
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, Vec<Vec<Entry>>, Error=Simple<char>> + 'a
where T: AsRef<str> + Clone {
let all_ops = ops.iter().map(|o| o.as_ref().to_string())
.chain(iter::once(".".to_string())).collect::<Vec<_>>();
recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
choice((
paren_parser(recurse.clone(), '(', ')'),
paren_parser(recurse.clone(), '[', ']'),
paren_parser(recurse.clone(), '{', '}'),
choice((
rule_parser().map(Lexeme::Rule),
comment::comment_parser().map(Lexeme::Comment),
just("::").padded().to(Lexeme::NS),
just('\\').padded().to(Lexeme::BS),
just('@').padded().to(Lexeme::At),
just(':').to(Lexeme::Type),
number::int_parser().map(Lexeme::Int), // all ints are valid floats so it takes precedence
number::float_parser().map(Lexeme::Num),
string::char_parser().map(Lexeme::Char),
string::str_parser().map(Lexeme::Str),
name::name_parser(&all_ops).map(Lexeme::Name), // includes namespacing
)).map_with_span(|lx, span| Box::new(iter::once(Entry(lx, span))) as LexSubres)
))
}).separated_by(one_of("\t ").repeated())
.flatten().collect()
.separated_by(just('\n').then(text::whitespace()).ignored())
}

View File

@@ -1,8 +0,0 @@
pub use chumsky::{self, prelude::*, Parser};
/// Parses Lua-style comments
pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
any().repeated().delimited_by(just("--["), just("]--")).or(
any().repeated().delimited_by(just("--"), just("\n"))
).map(|vc| vc.iter().collect()).padded()
}

View File

@@ -1,18 +1,16 @@
mod expression;
mod string;
mod number;
mod misc;
mod import;
mod name;
mod substitution;
mod lexer;
mod comment;
mod expression;
mod sourcefile;
mod import;
mod enum_parser;
pub use substitution::Substitution;
pub use expression::Expr;
pub use expression::expression_parser;
pub use sourcefile::FileEntry;
pub use sourcefile::file_parser;
pub use sourcefile::line_parser;
pub use sourcefile::imports;
pub use sourcefile::is_op;
pub use sourcefile::exported_names;
pub use import::Import;
pub use lexer::{lexer, Lexeme, Entry as LexerEntry};
pub use name::is_op;

View File

@@ -1,12 +1,14 @@
use chumsky::{self, prelude::*, Parser};
/// Matches any one of the passed operators, longest-first
fn op_parser<'a>(ops: &[&'a str]) -> BoxedParser<'a, char, String, Simple<char>> {
let mut sorted_ops = ops.to_vec();
fn op_parser<'a, T: AsRef<str> + Clone>(ops: &[T]) -> BoxedParser<'a, char, String, Simple<char>> {
let mut sorted_ops: Vec<String> = ops.iter().map(|t| t.as_ref().to_string()).collect();
sorted_ops.sort_by(|a, b| b.len().cmp(&a.len()));
sorted_ops.into_iter()
.map(|op| just(op.to_string()).boxed())
.reduce(|a, b| a.or(b).boxed()).unwrap()
.map(|op| just(op).boxed())
.reduce(|a, b| a.or(b).boxed())
.unwrap_or(empty().map(|()| panic!("Empty isn't meant to match")).boxed())
.labelled("operator").boxed()
}
/// Matches anything that's allowed as an operator
@@ -27,20 +29,31 @@ fn op_parser<'a>(ops: &[&'a str]) -> BoxedParser<'a, char, String, Simple<char>>
/// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very
/// common in maths so it's worth a try. Investigate.
pub fn modname_parser<'a>() -> impl Parser<char, String, Error = Simple<char>> + 'a {
let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', '.'];
let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', ','];
filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
.repeated().at_least(1)
.collect()
.labelled("modname")
}
/// Parse an operator or name. Failing both, parse everything up to the next whitespace or
/// blacklisted character as a new operator.
pub fn name_parser<'a>(
ops: &[&'a str]
) -> impl Parser<char, Vec<String>, Error = Simple<char>> + 'a {
pub fn name_parser<'a, T: AsRef<str> + Clone>(
ops: &[T]
) -> impl Parser<char, String, Error = Simple<char>> + 'a {
choice((
op_parser(ops), // First try to parse a known operator
text::ident(), // Failing that, parse plain text
text::ident().labelled("plain text"), // Failing that, parse plain text
modname_parser() // Finally parse everything until tne next terminal as a new operator
)).padded().separated_by(just("::")).padded()
))
.labelled("name")
}
/// Decide if a string can be an operator. Operators can include digits and text, just not at the
/// start.
pub fn is_op<T: AsRef<str>>(s: T) -> bool {
return match s.as_ref().chars().next() {
Some(x) => !x.is_alphanumeric(),
None => false
}
}

View File

@@ -1,4 +1,5 @@
use chumsky::{self, prelude::*, Parser};
use ordered_float::NotNan;
fn assert_not_digit(base: u32, c: char) {
if base > (10 + (c as u32 - 'a' as u32)) {
@@ -51,7 +52,7 @@ fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
}
/// returns a mapper that converts a mantissa and an exponent into a float
fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 {
fn nat2f(base: u64) -> impl Fn((NotNan<f64>, i32),) -> NotNan<f64> {
return move |(val, exp)| {
if exp == 0 {val}
else {val * (base as f64).powf(exp.try_into().unwrap())}
@@ -77,32 +78,35 @@ pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
}
/// parse a float from dot notation
fn dotted_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
fn dotted_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
uint_parser(base)
.then_ignore(just('.'))
.then(
just('.').ignore_then(
text::digits(base).then(separated_digits_parser(base))
).map(move |(wh, (frac1, frac2))| {
).map(move |(frac1, frac2)| {
let frac = frac1 + &frac2;
let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
let dexp = base.pow(frac.len().try_into().unwrap());
wh as f64 + (frac_num / dexp as f64)
frac_num / dexp as f64
}).or_not().map(|o| o.unwrap_or_default())
).try_map(|(wh, f), s| {
NotNan::new(wh as f64 + f).map_err(|_| Simple::custom(s, "Float literal evaluates to NaN"))
})
}
/// parse a float from dotted and optionally also exponential notation
fn pow_float_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
fn pow_float_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
assert_not_digit(base, 'p');
dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
}
/// parse a float with dotted and optionally exponential notation from a base determined by its
/// prefix
pub fn float_parser() -> impl Parser<char, f64, Error = Simple<char>> {
pub fn float_parser() -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
choice((
just("0b").ignore_then(pow_float_parser(2)),
just("0x").ignore_then(pow_float_parser(16)),
just('0').ignore_then(pow_float_parser(8)),
pow_float_parser(10),
))
)).labelled("float")
}

View File

@@ -3,7 +3,7 @@ use chumsky::{self, prelude::*, Parser};
use super::{expression, number::float_parser};
#[derive(Debug, Clone)]
pub struct Substitution {
pub struct Rule {
pub source: expression::Expr,
pub priority: f64,
pub target: expression::Expr
@@ -19,15 +19,16 @@ pub struct Substitution {
/// shadow_reee =0.9=> reee
/// ```
/// TBD whether this disables reee in the specified range or loops forever
pub fn substitution_parser<'a>(
pattern_ops: &[&'a str],
ops: &[&'a str]
) -> impl Parser<char, Substitution, Error = Simple<char>> + 'a {
expression::expression_parser(pattern_ops)
pub fn rule_parser<'a, T: 'a + AsRef<str> + Clone>(
pattern_ops: &[T],
ops: &[T]
) -> impl Parser<char, Rule, Error = Simple<char>> + 'a {
expression::expression_parser(pattern_ops).padded()
.then_ignore(just('='))
.then(
float_parser().then_ignore(just("=>"))
.or_not().map(|prio| prio.unwrap_or(0.0))
).then(expression::expression_parser(ops))
.map(|((source, priority), target)| Substitution { source, priority, target })
).then(expression::expression_parser(ops).padded())
.map(|((source, priority), target)| Rule { source, priority, target })
.labelled("rule")
}

View File

@@ -1,20 +1,25 @@
use std::collections::HashSet;
use std::fs::File;
use std::iter;
use super::expression::Expr;
use crate::{enum_parser, Expr, Clause};
use crate::utils::BoxedIter;
use super::expression::xpr_parser;
use super::import;
use super::misc;
use super::substitution::substitution_parser;
use super::substitution::Substitution;
use super::import::import_parser;
use super::lexer::Lexeme;
use super::name;
use chumsky::{Parser, prelude::*};
use ordered_float::NotNan;
/// Anything we might encounter in a file
#[derive(Debug, Clone)]
pub enum FileEntry {
Import(Vec<import::Import>),
Comment(String),
Substitution(Substitution),
Export(Substitution)
Rule(Vec<Expr>, NotNan<f64>, Vec<Expr>),
Export(Vec<Expr>, NotNan<f64>, Vec<Expr>)
}
/// Recursively iterate through all "names" in an expression. It also finds a lot of things that
@@ -22,19 +27,22 @@ pub enum FileEntry {
/// sophisticated search.
///
/// TODO: find a way to exclude parameters
fn find_all_names_recur(expr: &Expr) -> Box<dyn Iterator<Item = &Vec<String>> + '_> {
match expr {
Expr::Auto(_, typ, body) | Expr::Lambda(_, typ, body) => Box::new(match typ {
Some(texp) => find_all_names_recur(texp),
None => Box::new(iter::empty())
}.chain(body.into_iter().map(find_all_names_recur).flatten())),
Expr::S(body) => Box::new(body.into_iter().map(find_all_names_recur).flatten()),
Expr::Typed(val, typ) => Box::new(
find_all_names_recur(val).chain(find_all_names_recur(typ))
fn find_all_names_recur<'a>(expr: &'a Expr) -> BoxedIter<&'a Vec<String>> {
let proc_clause = |clause: &'a Clause| match clause {
Clause::Auto(_, typ, body) | Clause::Lambda(_, typ, body) => Box::new(
typ.iter().flat_map(find_all_names_recur)
.chain(body.iter().flat_map(find_all_names_recur))
) as BoxedIter<&'a Vec<String>>,
Clause::S(_, body) => Box::new(
body.iter().flat_map(find_all_names_recur)
),
Expr::Name(x) => Box::new(iter::once(x)),
Clause::Name(x) => Box::new(iter::once(x)),
_ => Box::new(iter::empty())
}
};
let Expr(val, typ) = expr;
if let Some(t) = typ {
Box::new(proc_clause(val).chain(find_all_names_recur(t)))
} else { proc_clause(val) }
}
/// Collect all names that occur in an expression
@@ -42,62 +50,69 @@ fn find_all_names(expr: &Expr) -> HashSet<&Vec<String>> {
find_all_names_recur(expr).collect()
}
/// Parse a file into a list of distinctive entries
pub fn file_parser<'a>(
pattern_ops: &[&'a str], ops: &[&'a str]
) -> impl Parser<char, Vec<FileEntry>, Error = Simple<char>> + 'a {
choice((
// In case the usercode wants to parse doc
misc::comment_parser().map(FileEntry::Comment),
import::import_parser().map(FileEntry::Import),
text::keyword("export")
.ignore_then(substitution_parser(pattern_ops, ops)).map(FileEntry::Export),
// This could match almost anything so it has to go last
substitution_parser(pattern_ops, ops).map(FileEntry::Substitution)
)).padded()
.separated_by(just('\n'))
.then_ignore(end())
fn rule_parser() -> impl Parser<Lexeme, (Vec<Expr>, NotNan<f64>, Vec<Expr>), Error = Simple<Lexeme>> {
xpr_parser().repeated()
.then(enum_parser!(Lexeme::Rule))
.then(xpr_parser().repeated())
// .map(|((lhs, prio), rhs)| )
.map(|((a, b), c)| (a, b, c))
.labelled("Rule")
}
/// Decide if a string can be an operator. Operators can include digits and text, just not at the
/// start.
pub fn is_op(s: &str) -> bool {
return match s.chars().next() {
Some(x) => !x.is_alphanumeric(),
None => false
}
pub fn line_parser() -> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> {
choice((
// In case the usercode wants to parse doc
enum_parser!(Lexeme >> FileEntry; Comment),
just(Lexeme::name("import"))
.ignore_then(import_parser().map(FileEntry::Import))
.then_ignore(enum_parser!(Lexeme::Comment)),
just(Lexeme::name("export")).map_err_with_span(|e, s| {
println!("{:?} could not yield an export", s); e
})
.ignore_then(rule_parser())
.map(|(lhs, prio, rhs)| FileEntry::Export(lhs, prio, rhs)),
// This could match almost anything so it has to go last
rule_parser().map(|(lhs, prio, rhs)| FileEntry::Rule(lhs, prio, rhs)),
))
}
/// Collect all exported names (and a lot of other words) from a file
pub fn exported_names(src: &Vec<FileEntry>) -> HashSet<&Vec<String>> {
src.iter().filter_map(|ent| match ent {
FileEntry::Export(a) => Some(&a.source),
_ => None
src.iter().flat_map(|ent| match ent {
FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())) as BoxedIter<&Expr>,
_ => Box::new(iter::empty())
}).map(find_all_names).flatten().collect()
}
// #[allow(dead_code)]
/// Collect all operators defined in a file (and some other words)
fn defined_ops(src: &Vec<FileEntry>, exported_only: bool) -> Vec<&String> {
let all_names:HashSet<&Vec<String>> = src.iter().filter_map(|ent| match ent {
FileEntry::Substitution(a) => if exported_only {None} else {Some(&a.source)},
FileEntry::Export(a) => Some(&a.source),
_ => None
let all_names:HashSet<&Vec<String>> = src.iter().flat_map(|ent| match ent {
FileEntry::Rule(s, _, d) =>
if exported_only {Box::new(iter::empty()) as BoxedIter<&Expr>}
else {Box::new(s.iter().chain(d.iter()))}
FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())),
_ => Box::new(iter::empty())
}).map(find_all_names).flatten().collect();
// Dedupe stage of dubious value; collecting into a hashset may take longer than
// handling duplicates would with a file of sensible size.
all_names.into_iter()
.filter_map(|name|
// If it's namespaced, it's imported.
if name.len() == 1 && is_op(&name[0]) {Some(&name[0])}
if name.len() == 1 && name::is_op(&name[0]) {Some(&name[0])}
else {None}
).collect()
}
// #[allow(dead_code)]
/// Collect all operators from a file
pub fn all_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, false) }
// #[allow(dead_code)]
/// Collect exported operators from a file (plus some extra)
pub fn exported_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, true) }
/// Summarize all imports from a file in a single list of qualified names
pub fn imports<'a, 'b, I>(
src: I

View File

@@ -0,0 +1,47 @@
use std::io;
use std::rc::Rc;
use std::fs::read_to_string;
use std::path::PathBuf;
use super::loaded::Loaded;
#[derive(Clone, Debug)]
pub enum LoadingError {
IOErr(Rc<io::Error>),
UnknownNode(String),
Missing(String)
}
impl From<io::Error> for LoadingError {
fn from(inner: io::Error) -> Self {
LoadingError::IOErr(Rc::new(inner))
}
}
pub fn file_loader(proj: PathBuf) -> impl FnMut(Vec<String>) -> Result<Loaded, LoadingError> + 'static {
move |path| {
let dirpath = proj.join(path.join("/"));
if dirpath.is_dir() || dirpath.is_symlink() {
return Ok(Loaded::Namespace(
dirpath.read_dir()?
.filter_map(|entr| {
let ent = entr.ok()?;
let typ = ent.file_type().ok()?;
let path = ent.path();
if typ.is_dir() || typ.is_symlink() {
Some(ent.file_name().to_string_lossy().into_owned())
} else if typ.is_file() && path.extension()? == "orc" {
Some(path.file_stem()?.to_string_lossy().into_owned())
} else { None }
})
.collect()
))
}
let orcfile = dirpath.with_extension("orc");
if orcfile.is_file() {
read_to_string(orcfile).map(Loaded::Module).map_err(LoadingError::from)
} else if dirpath.exists() {
Err(LoadingError::UnknownNode(dirpath.to_string_lossy().into_owned()))
} else { Err(LoadingError::Missing(dirpath.to_string_lossy().into_owned())) }
}
}

5
src/project/loaded.rs Normal file
View File

@@ -0,0 +1,5 @@
#[derive(Debug, Clone)]
pub enum Loaded {
Module(String),
Namespace(Vec<String>),
}

View File

@@ -1,25 +1,23 @@
use std::collections::HashMap;
mod resolve_names;
mod rule_collector;
// pub use rule_collector::rule_collector;
mod prefix;
mod name_resolver;
mod expr;
#[derive(Debug, Clone)]
pub struct Project {
pub modules: HashMap<Vec<String>, Module>,
}
mod loaded;
pub use loaded::Loaded;
mod parse_error;
mod file_loader;
pub use file_loader::file_loader;
#[derive(Debug, Clone)]
pub struct Module {
pub substitutions: Vec<Substitution>,
pub rules: Vec<Rule>,
pub exports: Vec<String>,
pub references: Vec<Vec<String>>
}
#[derive(Debug, Clone)]
pub struct Substitution {
pub source: expr::Expr,
pub struct Rule {
pub source: super::Expr,
pub priority: f64,
pub target: expr::Expr
pub target: super::Expr
}

View File

@@ -3,7 +3,7 @@ use thiserror::Error;
use crate::utils::Substack;
use super::expr::{Expr, Token};
use crate::{Expr, Clause, Literal};
type ImportMap = HashMap<String, Vec<String>>;
@@ -50,9 +50,8 @@ where
) -> Result<Vec<String>, ResolutionError<E>> {
if let Some(cached) = self.cache.get(symbol) { return cached.clone() }
// The imports and path of the referenced file and the local name
let mut splitpoint = symbol.len();
let path = (self.get_modname)(symbol).ok_or(ResolutionError::NoModule(symbol.clone()))?;
let name = symbol.split_at(path.len()).1;
let (_, name) = symbol.split_at(path.len());
let imports = (self.get_imports)(&path)?;
let result = if let Some(source) = imports.get(&name[0]) {
let new_sym: Vec<String> = source.iter().chain(name.iter()).cloned().collect();
@@ -79,41 +78,39 @@ where
.next().transpose()
}
fn process_token_rec(&mut self, tok: &Token) -> Result<Token, ResolutionError<E>> {
fn process_clause_rec(&mut self, tok: &Clause) -> Result<Clause, ResolutionError<E>> {
Ok(match tok {
Token::Literal(l) => Token::Literal(l.clone()),
Token::S(exv) => Token::S(
Clause::S(c, exv) => Clause::S(*c,
exv.iter().map(|e| self.process_expression_rec(e))
.collect::<Result<Vec<Expr>, ResolutionError<E>>>()?
),
Token::Lambda(name, typ, body) => Token::Lambda(name.clone(),
self.process_exprboxopt_rec(typ)?,
Clause::Lambda(name, typ, body) => Clause::Lambda(name.clone(),
self.process_exprv_rec(typ)?,
self.process_exprv_rec(body)?
),
Token::Auto(name, typ, body) => Token::Auto(name.clone(),
self.process_exprboxopt_rec(typ)?,
Clause::Auto(name, typ, body) => Clause::Auto(name.clone(),
self.process_exprv_rec(typ)?,
self.process_exprv_rec(body)?
),
Token::Name { qualified, local } => Token::Name {
local: local.clone(),
qualified: self.find_origin(qualified)?
}
Clause::Name(qualified) => Clause::Name(self.find_origin(qualified)?),
x => x.clone()
})
}
fn process_expression_rec(&mut self, ex: &Expr) -> Result<Expr, ResolutionError<E>> {
Ok(Expr {
token: self.process_token_rec(&ex.token)?,
typ: self.process_exprboxopt_rec(&ex.typ)?
})
fn process_expression_rec(&mut self, Expr(token, typ): &Expr) -> Result<Expr, ResolutionError<E>> {
Ok(Expr(
self.process_clause_rec(token)?,
self.process_exprboxopt_rec(typ)?
))
}
pub fn find_origin(&mut self, symbol: &Vec<String>) -> Result<Vec<String>, ResolutionError<E>> {
self.find_origin_rec(symbol, &Substack::new(symbol))
}
pub fn process_token(&mut self, tok: &Token) -> Result<Token, ResolutionError<E>> {
self.process_token_rec(tok)
#[allow(dead_code)]
pub fn process_clause(&mut self, clause: &Clause) -> Result<Clause, ResolutionError<E>> {
self.process_clause_rec(clause)
}
pub fn process_expression(&mut self, ex: &Expr) -> Result<Expr, ResolutionError<E>> {

View File

@@ -0,0 +1,30 @@
use chumsky::prelude::Simple;
use thiserror::Error;
use super::name_resolver::ResolutionError;
#[derive(Error, Debug, Clone)]
pub enum ParseError<ELoad> where ELoad: Clone {
#[error("Resolution cycle")]
ResolutionCycle,
#[error("File not found: {0}")]
Load(ELoad),
#[error("Failed to parse: {0:?}")]
Syntax(Vec<Simple<char>>),
#[error("Not a module")]
None
}
impl<T> From<Vec<Simple<char>>> for ParseError<T> where T: Clone {
fn from(simp: Vec<Simple<char>>) -> Self { Self::Syntax(simp) }
}
impl<T> From<ResolutionError<ParseError<T>>> for ParseError<T> where T: Clone {
fn from(res: ResolutionError<ParseError<T>>) -> Self {
match res {
ResolutionError::Cycle(_) => ParseError::ResolutionCycle,
ResolutionError::NoModule(_) => ParseError::None,
ResolutionError::Delegate(d) => d
}
}
}

View File

@@ -1,61 +1,36 @@
use std::collections::HashMap;
use crate::parse;
use super::expr;
use crate::{Expr, Clause};
/// Replaces the first element of a name with the matching prefix from a prefix map
fn qualify(
name: &Vec<String>,
prefixes: &HashMap<String, Vec<String>>
) -> Option<Vec<String>> {
let value = prefixes.iter().find(|(k, _)| &&name[0] == k)?.1;
Some(value.iter().chain(name.iter().skip(1)).cloned().collect())
}
/// Produce a Token object for any value of parse::Expr other than Typed.
/// Produce a Token object for any value of Expr other than Typed.
/// Called by [#prefix] which handles Typed.
fn prefix_token(
expr: &parse::Expr,
fn prefix_clause(
expr: &Clause,
namespace: &Vec<String>
) -> expr::Token {
) -> Clause {
match expr {
parse::Expr::Typed(_, _) => panic!("This function should only be called by prefix!"),
parse::Expr::Char(c) => expr::Token::Literal(expr::Literal::Char(*c)),
parse::Expr::Int(i) => expr::Token::Literal(expr::Literal::Int(*i)),
parse::Expr::Num(n) => expr::Token::Literal(expr::Literal::Num(*n)),
parse::Expr::Str(s) => expr::Token::Literal(expr::Literal::Str(s.clone())),
parse::Expr::S(v) => expr::Token::S(v.iter().map(|e| prefix(e, namespace)).collect()),
parse::Expr::Auto(name, typ, body) => expr::Token::Auto(
Clause::S(c, v) => Clause::S(*c, v.iter().map(|e| prefix_expr(e, namespace)).collect()),
Clause::Auto(name, typ, body) => Clause::Auto(
name.clone(),
typ.clone().map(|expr| Box::new(prefix(&expr, namespace))),
body.iter().map(|e| prefix(e, namespace)).collect(),
typ.iter().map(|e| prefix_expr(e, namespace)).collect(),
body.iter().map(|e| prefix_expr(e, namespace)).collect(),
),
parse::Expr::Lambda(name, typ, body) => expr::Token::Lambda(
Clause::Lambda(name, typ, body) => Clause::Lambda(
name.clone(),
typ.clone().map(|expr| Box::new(prefix(&expr, namespace))),
body.iter().map(|e| prefix(e, namespace)).collect(),
typ.iter().map(|e| prefix_expr(e, namespace)).collect(),
body.iter().map(|e| prefix_expr(e, namespace)).collect(),
),
parse::Expr::Name(name) => expr::Token::Name {
qualified: namespace.iter().chain(name.iter()).cloned().collect(),
local: if name.len() == 1 {
Some(name[0].clone())
} else {
None
},
},
Clause::Name(name) => Clause::Name (
namespace.iter().chain(name.iter()).cloned().collect()
),
x => x.clone()
}
}
/// Produce an Expr object for any value of parse::Expr
pub fn prefix(expr: &parse::Expr, namespace: &Vec<String>) -> expr::Expr {
match expr {
parse::Expr::Typed(x, t) => expr::Expr {
typ: Some(Box::new(prefix(t, namespace))),
token: prefix_token(x, namespace),
},
_ => expr::Expr {
typ: None,
token: prefix_token(expr, namespace),
},
}
/// Produce an Expr object for any value of Expr
pub fn prefix_expr(Expr(clause, typ): &Expr, namespace: &Vec<String>) -> Expr {
Expr(
prefix_clause(clause, namespace),
typ.as_ref().map(|e| Box::new(prefix_expr(e, namespace)))
)
}

View File

@@ -1,221 +0,0 @@
use std::cell::RefCell;
use std::collections::{HashMap, HashSet, VecDeque};
use std::error;
use chumsky::{prelude::Simple, Parser};
use thiserror::Error;
use crate::parse::{self, file_parser, FileEntry};
use crate::utils::{Cache, as_modpath};
use super::expr;
use super::name_resolver::{NameResolver, ResolutionError};
use super::prefix::prefix;
#[derive(Debug, Clone)]
pub enum Loaded {
Module(String),
Namespace(Vec<String>),
}
#[derive(Error, Debug, Clone)]
pub enum ParseError<ELoad> where ELoad: Clone {
#[error("Resolution cycle")]
ResolutionCycle,
#[error("File not found: {0}")]
Load(ELoad),
#[error("Failed to parse: {0:?}")]
Syntax(Vec<Simple<char>>),
#[error("Not a module")]
None
}
impl<T> From<Vec<Simple<char>>> for ParseError<T> where T: Clone {
fn from(simp: Vec<Simple<char>>) -> Self { Self::Syntax(simp) }
}
impl<T> From<ResolutionError<ParseError<T>>> for ParseError<T> where T: Clone {
fn from(res: ResolutionError<ParseError<T>>) -> Self {
match res {
ResolutionError::Cycle(_) => ParseError::ResolutionCycle,
ResolutionError::NoModule(_) => ParseError::None,
ResolutionError::Delegate(d) => d
}
}
}
type ImportMap = HashMap<String, Vec<String>>;
type ParseResult<T, ELoad> = Result<T, ParseError<ELoad>>;
type AnyParseResult<T, ELoad> = Result<T, Vec<ParseError<ELoad>>>;
pub fn load_project<'a, F, ELoad>(
mut load_mod: F,
prelude: &[&'a str],
entry: (Vec<String>, expr::Expr),
) -> Result<super::Project, ParseError<ELoad>>
where
F: FnMut(&[&str]) -> Result<Loaded, ELoad>,
ELoad: Clone
{
let prelude_vec: Vec<String> = prelude.iter().map(|s| s.to_string()).collect();
let preparser = file_parser(prelude, &[]);
// Map paths to a namespace with name list (folder) or module with source text (file)
let loaded_cell = RefCell::new(Cache::new(|path: Vec<String>|
-> ParseResult<Loaded, ELoad> {
load_mod(&path.iter().map(|s| s.as_str()).collect::<Vec<_>>())
.map_err(ParseError::Load)
}));
let modname_cell = RefCell::new(Cache::new(|symbol: Vec<String>|
-> AnyParseResult<Vec<String>, ELoad> {
let mut local_loaded = loaded_cell.borrow_mut();
let mut errv: Vec<ParseError<ELoad>> = Vec::new();
loop {
let (path, name) = symbol.split_at(symbol.len() - errv.len());
let pathv = path.to_vec();
match local_loaded.by_clone_fallible(&pathv) {
Ok(imports) => break Ok(pathv.clone()),
Err(err) => {
errv.push(err);
if symbol.len() == errv.len() {
break Err(errv);
}
}
}
}
}));
// Preliminarily parse a file, substitution patterns and imports are valid
let preparsed_cell = RefCell::new(Cache::new(|path: Vec<String>|
-> ParseResult<Vec<FileEntry>, ELoad> {
let mut loaded = loaded_cell.borrow_mut();
let loaded = loaded.by_clone_fallible(&path)?;
if let Loaded::Module(source) = loaded {
Ok(preparser.parse(source.as_str())?)
} else {Err(ParseError::None)}
}));
// Collect all toplevel names exported from a given file
let exports_cell = RefCell::new(Cache::new(|path: Vec<String>|
-> ParseResult<Vec<String>, ELoad> {
let mut local_loaded = loaded_cell.borrow_mut();
let loaded = local_loaded.by_clone_fallible(&path)?;
let mut local_preparsed = preparsed_cell.borrow_mut();
if let Loaded::Namespace(names) = loaded {
return Ok(names.clone());
}
let preparsed = local_preparsed.by_clone_fallible(&path)?;
Ok(parse::exported_names(&preparsed)
.into_iter()
.map(|n| n[0].clone())
.collect())
}));
// Collect all toplevel names imported by a given file
let imports_cell = RefCell::new(Cache::new(|path: Vec<String>|
-> ParseResult<ImportMap, ELoad> {
let mut local_preparsed = preparsed_cell.borrow_mut();
let entv = local_preparsed.by_clone_fallible(&path)?.clone();
let import_entries = parse::imports(entv.iter());
let mut imported_symbols: HashMap<String, Vec<String>> = HashMap::new();
for imp in import_entries {
let mut exports = exports_cell.borrow_mut();
let export = exports.by_clone_fallible(&imp.path)?;
if let Some(ref name) = imp.name {
if export.contains(&name) {
imported_symbols.insert(name.clone(), imp.path.clone());
}
} else {
for exp in export.clone() {
imported_symbols.insert(exp.clone(), imp.path.clone());
}
}
}
Ok(imported_symbols)
}));
// Final parse, operators are correctly separated
let parsed_cell = RefCell::new(Cache::new(|path: Vec<String>|
-> ParseResult<Vec<FileEntry>, ELoad> {
let mut local_imports = imports_cell.borrow_mut();
let imports = local_imports.by_clone_fallible(&path)?;
let mut local_loaded = loaded_cell.borrow_mut();
let imported_ops: Vec<&str> = imports
.keys()
.chain(prelude_vec.iter())
.map(|s| s.as_str())
.filter(|s| parse::is_op(s))
.collect();
let parser = file_parser(prelude, &imported_ops);
if let Loaded::Module(source) = local_loaded.by_clone_fallible(&path)? {
Ok(parser.parse(source.as_str())?)
} else {Err(ParseError::None)}
}));
let mut name_resolver = NameResolver::new(
|path: &Vec<String>| { modname_cell.borrow_mut().by_clone_fallible(path).cloned().ok() },
|path: &Vec<String>| { imports_cell.borrow_mut().by_clone_fallible(path).cloned() }
);
// Turn parsed files into a bag of substitutions and a list of toplevel export names
let resolved_cell = RefCell::new(Cache::new(|path: Vec<String>|
-> ParseResult<super::Module, ELoad> {
let mut parsed = parsed_cell.borrow_mut();
let parsed_entries = parsed.by_clone_fallible(&path)?;
let subs: Vec<super::Substitution> = parsed_entries
.iter()
.filter_map(|ent| {
if let FileEntry::Export(s) | FileEntry::Substitution(s) = ent {
Some(super::Substitution {
source: prefix(&s.source, &path),
target: prefix(&s.target, &path),
priority: s.priority,
})
} else { None }
})
.map(|sub| Ok(super::Substitution {
source: name_resolver.process_expression(&sub.source)?,
target: name_resolver.process_expression(&sub.target)?,
..sub
}))
.collect::<ParseResult<Vec<super::Substitution>, ELoad>>()?;
let module = super::Module {
substitutions: subs,
exports: exports_cell
.borrow_mut()
.by_clone_fallible(&path)?
.clone(),
references: imports_cell
.borrow_mut()
.by_clone_fallible(&path)?
.values()
.filter_map(|imps| modname_cell.borrow_mut().by_clone_fallible(imps).ok().cloned())
.collect()
};
Ok(module)
}));
let all_subs_cell = RefCell::new(Cache::new(|path: Vec<String>|
-> ParseResult<Vec<super::Substitution>, ELoad> {
let mut processed: HashSet<Vec<String>> = HashSet::new();
let mut subs: Vec<super::Substitution> = Vec::new();
let mut pending: VecDeque<Vec<String>> = VecDeque::new();
while let Some(el) = pending.pop_front() {
let mut local_resolved = resolved_cell.borrow_mut();
let resolved = local_resolved.by_clone_fallible(&el)?;
processed.insert(el.clone());
pending.extend(
resolved.references.iter()
.filter(|&v| !processed.contains(v))
.cloned()
);
subs.extend(
resolved.substitutions.iter().cloned()
)
};
Ok(subs)
}));
// let substitutions =
// let main = preparsed.get(&[entry]);
// for imp in parse::imports(main) {
// if !modules.contains_key(&imp.path) {
// if modules[&imp.path]
// }
// }
// let mut project = super::Project {
// modules: HashMap::new()
// };
todo!("Finish this function")
}

View File

@@ -0,0 +1,193 @@
// use std::collections::{HashMap, HashSet, VecDeque};
// use std::fmt::Debug;
// use std::rc::Rc;
// use chumsky::Parser;
// use crate::parse::{self, line_parser, FileEntry};
// use crate::utils::Cache;
// use super::name_resolver::NameResolver;
// use super::parse_error::ParseError;
// use super::prefix::prefix_expr;
// use super::loaded::Loaded;
// type ParseResult<T, ELoad> = Result<T, ParseError<ELoad>>;
// pub fn rule_collector<F: 'static, ELoad>(
// mut load_mod: F,
// prelude: Vec<String>
// // ) -> impl FnMut(Vec<String>) -> Result<&'a Vec<super::Rule>, ParseError<ELoad>> + 'a
// ) -> Cache<Vec<String>, Result<Vec<super::Rule>, ParseError<ELoad>>>
// where
// F: FnMut(Vec<String>) -> Result<Loaded, ELoad>,
// ELoad: Clone + Debug
// {
// // Map paths to a namespace with name list (folder) or module with source text (file)
// let loaded = Rc::new(Cache::new(move |path: Vec<String>|
// -> ParseResult<Loaded, ELoad> {
// load_mod(path).map_err(ParseError::Load)
// }));
// // Map names to the longest prefix that points to a valid module
// let modname = Rc::new(Cache::new({
// let loaded = Rc::clone(&loaded);
// move |symbol: Vec<String>| -> Result<Vec<String>, Vec<ParseError<ELoad>>> {
// let mut errv: Vec<ParseError<ELoad>> = Vec::new();
// let reg_err = |e, errv: &mut Vec<ParseError<ELoad>>| {
// errv.push(e);
// if symbol.len() == errv.len() { Err(errv.clone()) }
// else { Ok(()) }
// };
// loop {
// let (path, _) = symbol.split_at(symbol.len() - errv.len());
// let pathv = path.to_vec();
// match loaded.try_find(&pathv) {
// Ok(imports) => match imports.as_ref() {
// Loaded::Module(_) => break Ok(pathv.clone()),
// _ => reg_err(ParseError::None, &mut errv)?
// },
// Err(err) => reg_err(err, &mut errv)?
// }
// }
// }
// }));
// // Preliminarily parse a file, substitution rules and imports are valid
// let preparsed = Rc::new(Cache::new({
// let preparser = line_parser(&prelude, &prelude);
// let loaded = Rc::clone(&loaded);
// move |path: Vec<String>| -> ParseResult<Vec<FileEntry>, ELoad> {
// let loaded = loaded.try_find(&path)?;
// if let Loaded::Module(source) = loaded.as_ref() {
// Ok(preparser.parse(source.as_str())?)
// } else {Err(ParseError::None)}
// }
// }));
// // Collect all toplevel names exported from a given file
// let exports = Rc::new(Cache::new({
// let loaded = Rc::clone(&loaded);
// let preparsed = Rc::clone(&preparsed);
// move |path: Vec<String>| -> ParseResult<Vec<String>, ELoad> {
// let loaded = loaded.try_find(&path)?;
// if let Loaded::Namespace(names) = loaded.as_ref() {
// return Ok(names.clone());
// }
// let preparsed = preparsed.try_find(&path)?;
// Ok(parse::exported_names(&preparsed)
// .into_iter()
// .map(|n| n[0].clone())
// .collect())
// }
// }));
// // Collect all toplevel names imported by a given file
// let imports = Rc::new(Cache::new({
// let preparsed = Rc::clone(&preparsed);
// let exports = Rc::clone(&exports);
// move |path: Vec<String>| -> ParseResult<HashMap<String, Vec<String>>, ELoad> {
// let entv = preparsed.try_find(&path)?.clone();
// let import_entries = parse::imports(entv.iter());
// let mut imported_symbols: HashMap<String, Vec<String>> = HashMap::new();
// for imp in import_entries {
// let export = exports.try_find(&imp.path)?;
// if let Some(ref name) = imp.name {
// if export.contains(&name) {
// imported_symbols.insert(name.clone(), imp.path.clone());
// }
// } else {
// for exp in export.as_ref() {
// imported_symbols.insert(exp.clone(), imp.path.clone());
// }
// }
// }
// Ok(imported_symbols)
// }
// }));
// // Final parse, operators are correctly separated
// let parsed = Rc::new(Cache::new({
// let imports = Rc::clone(&imports);
// let loaded = Rc::clone(&loaded);
// move |path: Vec<String>| -> ParseResult<Vec<FileEntry>, ELoad> {
// let imported_ops: Vec<String> =
// imports.try_find(&path)?
// .keys()
// .chain(prelude.iter())
// .filter(|s| parse::is_op(s))
// .cloned()
// .collect();
// let parser = file_parser(&prelude, &imported_ops);
// if let Loaded::Module(source) = loaded.try_find(&path)?.as_ref() {
// Ok(parser.parse(source.as_str())?)
// } else { Err(ParseError::None) }
// }
// }));
// let mut name_resolver = NameResolver::new({
// let modname = Rc::clone(&modname);
// move |path| {
// Some(modname.try_find(path).ok()?.as_ref().clone())
// }
// }, {
// let imports = Rc::clone(&imports);
// move |path| {
// imports.try_find(path).map(|f| f.as_ref().clone())
// }
// });
// // Turn parsed files into a bag of rules and a list of toplevel export names
// let resolved = Rc::new(Cache::new({
// let parsed = Rc::clone(&parsed);
// let exports = Rc::clone(&exports);
// let imports = Rc::clone(&imports);
// let modname = Rc::clone(&modname);
// move |path: Vec<String>| -> ParseResult<super::Module, ELoad> {
// let module = super::Module {
// rules: parsed.try_find(&path)?
// .iter()
// .filter_map(|ent| {
// if let FileEntry::Export(s) | FileEntry::Rule(s) = ent {
// Some(super::Rule {
// source: prefix_expr(&s.source, &path),
// target: prefix_expr(&s.target, &path),
// priority: s.priority,
// })
// } else { None }
// })
// .map(|rule| Ok(super::Rule {
// source: name_resolver.process_expression(&rule.source)?,
// target: name_resolver.process_expression(&rule.target)?,
// ..rule
// }))
// .collect::<ParseResult<Vec<super::Rule>, ELoad>>()?,
// exports: exports.try_find(&path)?.as_ref().clone(),
// references: imports.try_find(&path)?
// .values()
// .filter_map(|imps| {
// modname.try_find(&imps).ok().map(|r| r.as_ref().clone())
// })
// .collect()
// };
// Ok(module)
// }
// }));
// let all_rules = Cache::new({
// let resolved = Rc::clone(&resolved);
// move |path: Vec<String>| -> ParseResult<Vec<super::Rule>, ELoad> {
// let mut processed: HashSet<Vec<String>> = HashSet::new();
// let mut rules: Vec<super::Rule> = Vec::new();
// let mut pending: VecDeque<Vec<String>> = VecDeque::new();
// pending.push_back(path);
// while let Some(el) = pending.pop_front() {
// let resolved = resolved.try_find(&el)?;
// processed.insert(el.clone());
// pending.extend(
// resolved.references.iter()
// .filter(|&v| !processed.contains(v))
// .cloned()
// );
// rules.extend(
// resolved.rules.iter().cloned()
// )
// };
// Ok(rules)
// }
// });
// return all_rules;
// }

View File

@@ -1,71 +1,68 @@
use std::hash::Hash;
use std::{hash::Hash, cell::RefCell};
use hashbrown::HashMap;
use mappable_rc::Mrc;
/// Cache the return values of an effectless closure in a hashmap
/// Inspired by the closure_cacher crate.
pub struct Cache<I, O, F> {
store: HashMap<I, O>,
closure: F
pub struct Cache<I, O: 'static> where O: Clone {
store: RefCell<HashMap<I, Mrc<O>>>,
closure: RefCell<Box<dyn FnMut (I) -> O + 'static>>
}
impl<I: 'static, O, F> Cache<I, O, F> where
I: Eq + Hash,
F: FnMut(I) -> O
impl<I, O> Cache<I, O> where
I: Eq + Hash + Clone,
O: Clone
{
pub fn new(closure: F) -> Self {
Self { store: HashMap::new(), closure }
pub fn new<F: 'static>(closure: F) -> Self where F: FnMut(I) -> O {
Self {
store: RefCell::new(HashMap::new()),
closure: RefCell::new(Box::new(closure))
}
/// Produce and cache a result by copying I if necessary
pub fn by_copy(&mut self, i: &I) -> &O where I: Copy {
let closure = &mut self.closure;
self.store.raw_entry_mut().from_key(i)
.or_insert_with(|| (*i, closure(*i))).1
}
/// Produce and cache a result by cloning I if necessary
pub fn by_clone(&mut self, i: &I) -> &O where I: Clone {
let closure = &mut self.closure;
self.store.raw_entry_mut().from_key(i)
.or_insert_with(|| (i.clone(), closure(i.clone()))).1
pub fn find(&self, i: &I) -> Mrc<O> {
let mut closure = self.closure.borrow_mut();
let mut store = self.store.borrow_mut();
Mrc::clone(store.raw_entry_mut().from_key(i)
.or_insert_with(|| (i.clone(), Mrc::new(closure(i.clone())))).1)
}
#[allow(dead_code)]
/// Return the result if it has already been computed
pub fn known(&self, i: &I) -> Option<&O> {
self.store.get(i)
pub fn known(&self, i: &I) -> Option<Mrc<O>> {
let store = self.store.borrow();
store.get(i).map(Mrc::clone)
}
#[allow(dead_code)]
/// Forget the output for the given input
pub fn drop(&mut self, i: &I) -> bool {
self.store.remove(i).is_some()
pub fn drop(&self, i: &I) -> bool {
self.store.borrow_mut().remove(i).is_some()
}
}
impl<I: 'static, O, E, F> Cache<I, Result<O, E>, F> where
I: Eq + Hash,
E: Clone,
F: FnMut(I) -> Result<O, E>
impl<I, O, E> Cache<I, Result<O, E>> where
I: Eq + Hash + Clone,
O: Clone,
E: Clone
{
/// Sink the ref from a Result into the Ok value, such that copying only occurs on the sad path
/// but the return value can be short-circuited
pub fn by_copy_fallible(&mut self, i: &I) -> Result<&O, E> where I: Copy {
self.by_clone(i).as_ref().map_err(|e| e.clone())
}
/// Sink the ref from a Result into the Ok value, such that cloning only occurs on the sad path
/// but the return value can be short-circuited
pub fn by_clone_fallible(&mut self, i: &I) -> Result<&O, E> where I: Clone {
self.by_clone(i).as_ref().map_err(|e| e.clone())
pub fn try_find(&self, i: &I) -> Result<Mrc<O>, E> {
let ent = self.find(i);
Mrc::try_map(ent, |t| t.as_ref().ok())
.map_err(|res| Result::as_ref(&res).err().unwrap().to_owned())
}
}
impl<I: 'static, O, F> Cache<I, Option<O>, F> where
I: Eq + Hash,
F: FnMut(I) -> Option<O>
impl<I, O> Cache<I, Option<O>> where
I: Eq + Hash + Clone,
O: Clone
{
#[allow(dead_code)]
/// Sink the ref from an Option into the Some value such that the return value can be
/// short-circuited
pub fn by_copy_fallible(&mut self, i: &I) -> Option<&O> where I: Copy {
self.by_copy(i).as_ref()
}
/// Sink the ref from an Option into the Some value such that the return value can be
/// short-circuited
pub fn by_clone_fallible(&mut self, i: &I) -> Option<&O> where I: Clone {
self.by_clone(i).as_ref()
pub fn try_find(&self, i: &I) -> Option<Mrc<O>> where I: Clone {
let ent = self.find(i);
Mrc::try_map(ent, |o| o.as_ref()).ok()
}
}

View File

@@ -1,8 +1,8 @@
mod cache;
mod substack;
mod result_iter_collect;
pub use cache::Cache;
pub use substack::Substack;
pub use result_iter_collect::result_iter_collect;
pub fn as_modpath(path: &Vec<String>) -> String {
path.join("::")
}
pub type BoxedIter<'a, T> = Box<dyn Iterator<Item = T> + 'a>;

View File

@@ -0,0 +1,19 @@
pub fn result_iter_collect<T, E>(i: &mut dyn Iterator<Item = Result<T, E>>)
-> (Vec<Option<T>>, Vec<Option<E>>) {
i.fold((Vec::new(), Vec::new()), |(mut succ, mut err), mut next| {
match next {
Ok(res) => succ.push(Some(res)),
Err(e) => err.push(Some(e))
}
(succ, err)
})
}
pub fn recoverable_iter_collect<T, E>(i: &mut dyn Iterator<Item=(Option<T>, Vec<E>)>)
-> (Vec<Option<T>>, Vec<E>) {
i.fold((Vec::new(), Vec::new()), |(mut succ, mut err), (res, mut errv)| {
succ.push(res);
err.append(&mut errv);
(succ, err)
})
}

View File

@@ -9,7 +9,9 @@ pub struct Substack<'a, T> {
}
impl<'a, T> Substack<'a, T> {
#[allow(dead_code)]
pub fn item(&self) -> &T { &self.item }
#[allow(dead_code)]
pub fn prev(&self) -> Option<&'a Substack<'a, T>> { self.prev }
pub fn new(item: T) -> Self {