A lot of stuff I'm not gonna enumerate

rule fundamentals
This commit is contained in:
2022-07-06 20:13:13 +02:00
parent 49aa73956c
commit ec7ad81fac
21 changed files with 483 additions and 420 deletions

View File

@@ -1,7 +1,9 @@
use itertools::Itertools;
use ordered_float::NotNan;
use std::{fmt::Debug};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
/// An exact value
#[derive(Clone, PartialEq, Eq, Hash)]
pub enum Literal {
Num(NotNan<f64>),
Int(u64),
@@ -9,10 +11,31 @@ pub enum Literal {
Str(String),
}
impl Debug for Literal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Num(arg0) => write!(f, "{:?}", arg0),
Self::Int(arg0) => write!(f, "{:?}", arg0),
Self::Char(arg0) => write!(f, "{:?}", arg0),
Self::Str(arg0) => write!(f, "{:?}", arg0),
}
}
}
/// An S-expression with a type
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct Expr(pub Clause, pub Option<Box<Expr>>);
impl Debug for Expr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// f.debug_tuple("Expr").field(&self.0).field(&self.1).finish()
let Expr(val, typ) = self;
write!(f, "{:?}", val)?;
if let Some(typ) = typ { write!(f, "{:?}", typ) }
else { Ok(()) }
}
}
impl Expr {
/// Replace all occurences of a name in the tree with a parameter, to bypass name resolution
pub fn bind_parameter(&mut self, name: &str) {
@@ -24,7 +47,7 @@ impl Expr {
}
/// An S-expression as read from a source file
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[derive(Clone, PartialEq, Eq, Hash)]
pub enum Clause {
Literal(Literal),
Name(Vec<String>),
@@ -34,6 +57,44 @@ pub enum Clause {
Parameter(String)
}
fn fmt_expr_seq(it: &mut dyn Iterator<Item = &Expr>, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for item in Itertools::intersperse(it.map(Some), None) { match item {
Some(expr) => write!(f, "{:?}", expr),
None => f.write_str(" "),
}? }
Ok(())
}
impl Debug for Clause {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Literal(arg0) => write!(f, "{:?}", arg0),
Self::Name(arg0) => write!(f, "{}", arg0.join("::")),
Self::S(del, items) => {
f.write_str(&del.to_string())?;
fmt_expr_seq(&mut items.iter(), f)?;
f.write_str(match del {
'(' => ")", '[' => "]", '{' => "}",
_ => "CLOSING_DELIM"
})
},
Self::Lambda(name, argtyp, body) => {
f.write_str("\\")?;
f.write_str(name)?;
f.write_str(":")?; fmt_expr_seq(&mut argtyp.iter(), f)?; f.write_str(".")?;
fmt_expr_seq(&mut body.iter(), f)
},
Self::Auto(name, argtyp, body) => {
f.write_str("@")?;
f.write_str(&name.clone().unwrap_or_default())?;
f.write_str(":")?; fmt_expr_seq(&mut argtyp.iter(), f)?; f.write_str(".")?;
fmt_expr_seq(&mut body.iter(), f)
},
Self::Parameter(name) => write!(f, "`{}", name)
}
}
}
impl Clause {
/// Replace all occurences of a name in the tree with a parameter, to bypass name resolution
pub fn bind_parameter(&mut self, name: &str) {
@@ -49,4 +110,18 @@ impl Clause {
_ => ()
}
}
}
/// A substitution rule as read from the source
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct Rule {
pub source: Vec<Expr>,
pub prio: NotNan<f64>,
pub target: Vec<Expr>
}
impl Debug for Rule {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?} ={}=> {:?}", self.source, self.prio, self.target)
}
}

View File

@@ -1,17 +1,12 @@
use core::ops::Range;
use std::{env::current_dir, fs::read_to_string};
use std::io;
use chumsky::prelude::end;
use chumsky::{Parser, Stream};
use parse::{LexerEntry, FileEntry};
// use project::{rule_collector, file_loader, Loaded};
use std::env::current_dir;
mod parse;
mod project;
mod utils;
mod expression;
pub use expression::*;
mod rule;
use project::{rule_collector, Loaded, file_loader};
fn literal(orig: &[&str]) -> Vec<String> {
orig.iter().map(|&s| s.to_owned()).collect()
@@ -27,37 +22,15 @@ export (match_sequence $lhs) >>= (match_sequence $rhs) =100=> (bind ($lhs) ($rhs
"#;
fn main() {
// let mut input = String::new();
// let mut stdin = io::stdin();
// stdin.read_to_string(&mut input).unwrap();
let ops: Vec<&str> = vec!["...", ">>", ">>=", "[", "]", ",", "$"];
let data = read_to_string("./main.orc").unwrap();
let lexed = parse::lexer(&ops).parse(data).unwrap();
println!("Lexed: {:?}", lexed);
let parsr = parse::line_parser().then_ignore(end());
// match parsr.parse(data) {
// Ok(output) => println!("\nParsed:\n{:?}", output),
// Err(e) => println!("\nErrored:\n{:?}", e)
// }
let lines = lexed.iter().filter_map(|v| {
let parse::LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone();
let tuples = v.into_iter().map(|LexerEntry(l, r)| (l.clone(), r.clone()));
Some(parsr.parse_recovery_verbose(Stream::from_iter(end..end+1, tuples)))
}).collect::<Vec<_>>();
for (id, (out, errs)) in lines.into_iter().enumerate() {
println!("Parsing line {}", id);
if let Some(output) = out { println!("Parsed:\n{:?}", output) }
else { println!("Failed to produce output")}
if errs.len() > 0 { println!("Errored:\n{:?}", errs)}
let cwd = current_dir().unwrap();
let collect_rules = rule_collector(move |n| {
if n == vec!["prelude"] { Ok(Loaded::Module(PRELUDE.to_string())) }
else { file_loader(cwd.clone())(n) }
}, literal(&["...", ">>", ">>=", "[", "]", ",", "$", "=", "=>"]));
match collect_rules.try_find(&literal(&["main"])) {
Ok(rules) => for rule in rules.iter() {
println!("{rule:?}")
}
Err(err) => println!("{:#?}", err)
}
// let output = parse::file_parser(&ops, &ops).parse(data).unwrap();
// let cwd = current_dir().unwrap();
// let collect_rules = rule_collector(move |n| {
// if n == vec!["prelude"] { Ok(Loaded::Module(PRELUDE.to_string())) }
// else { file_loader(cwd.clone())(n) }
// }, literal(&["...", ">>", ">>=", "[", "]", ","]));
// let rules = collect_rules.try_find(&literal(&["main"])).unwrap();
// for rule in rules.iter() {
// println!("{:?} ={}=> {:?}", rule.source, rule.priority, rule.target)
// }
}

View File

@@ -1,5 +1,5 @@
use chumsky::{self, prelude::*, Parser};
use crate::{Clause, Expr, Literal, enum_parser};
use crate::{enum_parser, expression::{Clause, Expr, Literal}};
use super::{lexer::Lexeme};
@@ -86,5 +86,5 @@ pub fn xpr_parser() -> impl Parser<Lexeme, Expr, Error = Simple<Lexeme>> {
.ignore_then(expr.clone()).or_not()
)
.map(|(val, typ)| Expr(val, typ.map(Box::new)))
})
}).labelled("Expression")
}

View File

@@ -1,6 +1,6 @@
use std::{ops::Range, iter};
use std::{ops::Range, iter, fmt};
use ordered_float::NotNan;
use chumsky::{Parser, prelude::*, text::whitespace};
use chumsky::{Parser, prelude::*};
use std::fmt::Debug;
use crate::utils::BoxedIter;
@@ -14,6 +14,11 @@ impl Debug for Entry {
// f.debug_tuple("Entry").field(&self.0).field(&self.1).finish()
}
}
impl Into<(Lexeme, Range<usize>)> for Entry {
fn into(self) -> (Lexeme, Range<usize>) {
(self.0, self.1)
}
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub enum Lexeme {
@@ -61,6 +66,9 @@ impl Lexeme {
pub fn name<T: ToString>(n: T) -> Self {
Lexeme::Name(n.to_string())
}
pub fn rule<T>(prio: T) -> Self where T: Into<f64> {
Lexeme::Rule(NotNan::new(prio.into()).expect("Rule priority cannot be NaN"))
}
pub fn paren_parser<T, P>(
expr: P
) -> impl Parser<Lexeme, (char, T), Error = Simple<Lexeme>> + Clone
@@ -76,15 +84,20 @@ impl Lexeme {
}
}
fn rule_parser() -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
just('=').ignore_then(
choice((
none_of("-0123456789").rewind().to(NotNan::new(0f64).unwrap()),
number::float_parser().then_ignore(just("=>"))
)).map_err_with_span(|err, span| {
panic!("Something's up! {:?} {}", span, err)
})
)
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct LexedText(pub Vec<Vec<Entry>>);
impl Debug for LexedText {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for row in &self.0 {
for tok in row {
tok.fmt(f)?;
f.write_str(" ")?
}
f.write_str("\n")?
}
Ok(())
}
}
type LexSubres<'a> = BoxedIter<'a, Entry>;
@@ -104,7 +117,7 @@ fn paren_parser<'a>(
})
}
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, Vec<Vec<Entry>>, Error=Simple<char>> + 'a
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, LexedText, Error=Simple<char>> + 'a
where T: AsRef<str> + Clone {
let all_ops = ops.iter().map(|o| o.as_ref().to_string())
.chain(iter::once(".".to_string())).collect::<Vec<_>>();
@@ -114,7 +127,8 @@ where T: AsRef<str> + Clone {
paren_parser(recurse.clone(), '[', ']'),
paren_parser(recurse.clone(), '{', '}'),
choice((
rule_parser().map(Lexeme::Rule),
just("==").padded().to(Lexeme::rule(0f64)),
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
comment::comment_parser().map(Lexeme::Comment),
just("::").padded().to(Lexeme::NS),
just('\\').padded().to(Lexeme::BS),
@@ -130,5 +144,5 @@ where T: AsRef<str> + Clone {
}).separated_by(one_of("\t ").repeated())
.flatten().collect()
.separated_by(just('\n').then(text::whitespace()).ignored())
}
.map(LexedText)
}

View File

@@ -7,10 +7,12 @@ mod expression;
mod sourcefile;
mod import;
mod enum_parser;
mod parse;
pub use sourcefile::FileEntry;
pub use sourcefile::line_parser;
pub use sourcefile::imports;
pub use sourcefile::exported_names;
pub use lexer::{lexer, Lexeme, Entry as LexerEntry};
pub use name::is_op;
pub use name::is_op;
pub use parse::{parse, reparse, ParseError};

65
src/parse/parse.rs Normal file
View File

@@ -0,0 +1,65 @@
use std::{ops::Range, fmt::Debug};
use chumsky::{prelude::{Simple, end}, Stream, Parser};
use itertools::Itertools;
use thiserror::Error;
use crate::expression::Rule;
use super::{Lexeme, FileEntry, lexer, line_parser, LexerEntry};
#[derive(Error, Debug, Clone)]
pub enum ParseError {
#[error("Could not tokenize {0:?}")]
Lex(Vec<Simple<char>>),
#[error("Could not parse {0:#?}")]
Ast(Vec<Simple<Lexeme>>)
}
pub fn parse<'a, Iter, S, Op>(ops: &[Op], stream: S) -> Result<Vec<FileEntry>, ParseError>
where
Op: 'a + AsRef<str> + Clone,
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
S: Into<Stream<'a, char, Range<usize>, Iter>> {
let lexed = lexer(ops).parse(stream).map_err(ParseError::Lex)?;
println!("Lexed:\n{:?}", lexed);
let parsr = line_parser().then_ignore(end());
let (parsed_lines, errors_per_line) = lexed.0.into_iter().filter_map(|v| {
// Find the first invalid position for Stream::for_iter
let LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone();
// Stream expects tuples, lexer outputs structs
let tuples = v.into_iter().map_into::<(Lexeme, Range<usize>)>();
Some(parsr.parse(Stream::from_iter(end..end+1, tuples)))
// ^^^^^^^^^^
// I haven't the foggiest idea why this is needed, parsers are supposed to be lazy so the
// end of input should make little difference
}).map(|res| match res {
Ok(r) => (Some(r), vec![]),
Err(e) => (None, e)
}).unzip::<_, _, Vec<_>, Vec<_>>();
let total_err = errors_per_line.into_iter()
.map(Vec::into_iter).flatten()
.collect::<Vec<_>>();
if total_err.len() > 0 { Err(ParseError::Ast(total_err)) }
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
}
pub fn reparse<'a, Iter, S, Op>(ops: &[Op], stream: S, pre: &Vec<FileEntry>)
-> Result<Vec<FileEntry>, ParseError>
where
Op: 'a + AsRef<str> + Clone,
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
S: Into<Stream<'a, char, Range<usize>, Iter>> {
let result = parse(ops, stream)?;
Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| {
if let FileEntry::Rule(Rule{source, ..}, _) = &mut output {
if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor {
*source = s2.clone()
} else {
panic!("Preparse and reparse received different row types!")
}
}
output
}).collect())
}

View File

@@ -1,34 +0,0 @@
use chumsky::{self, prelude::*, Parser};
use super::{expression, number::float_parser};
#[derive(Debug, Clone)]
pub struct Rule {
pub source: expression::Expr,
pub priority: f64,
pub target: expression::Expr
}
/// Parses a substitution rule of the forms
///
/// ```orchid
/// main = \x. ...
/// $a + $b = (add $a $b)
/// (foo bar baz) =1.1=> (foo 1 e)
/// reee =2=> shadow_reee
/// shadow_reee =0.9=> reee
/// ```
/// TBD whether this disables reee in the specified range or loops forever
pub fn rule_parser<'a, T: 'a + AsRef<str> + Clone>(
pattern_ops: &[T],
ops: &[T]
) -> impl Parser<char, Rule, Error = Simple<char>> + 'a {
expression::expression_parser(pattern_ops).padded()
.then_ignore(just('='))
.then(
float_parser().then_ignore(just("=>"))
.or_not().map(|prio| prio.unwrap_or(0.0))
).then(expression::expression_parser(ops).padded())
.map(|((source, priority), target)| Rule { source, priority, target })
.labelled("rule")
}

View File

@@ -1,15 +1,13 @@
use std::collections::HashSet;
use std::fs::File;
use std::iter;
use crate::{enum_parser, Expr, Clause};
use crate::{enum_parser, expression::{Expr, Clause, Rule}};
use crate::utils::BoxedIter;
use super::expression::xpr_parser;
use super::import;
use super::import::import_parser;
use super::lexer::Lexeme;
use super::name;
use chumsky::{Parser, prelude::*};
use ordered_float::NotNan;
@@ -18,8 +16,7 @@ use ordered_float::NotNan;
pub enum FileEntry {
Import(Vec<import::Import>),
Comment(String),
Rule(Vec<Expr>, NotNan<f64>, Vec<Expr>),
Export(Vec<Expr>, NotNan<f64>, Vec<Expr>)
Rule(Rule, bool)
}
/// Recursively iterate through all "names" in an expression. It also finds a lot of things that
@@ -70,49 +67,21 @@ pub fn line_parser() -> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> {
println!("{:?} could not yield an export", s); e
})
.ignore_then(rule_parser())
.map(|(lhs, prio, rhs)| FileEntry::Export(lhs, prio, rhs)),
.map(|(source, prio, target)| FileEntry::Rule(Rule{source, prio, target}, true)),
// This could match almost anything so it has to go last
rule_parser().map(|(lhs, prio, rhs)| FileEntry::Rule(lhs, prio, rhs)),
rule_parser().map(|(source, prio, target)| FileEntry::Rule(Rule{source, prio, target}, false)),
))
}
/// Collect all exported names (and a lot of other words) from a file
pub fn exported_names(src: &Vec<FileEntry>) -> HashSet<&Vec<String>> {
src.iter().flat_map(|ent| match ent {
FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())) as BoxedIter<&Expr>,
FileEntry::Rule(Rule{source, target, ..}, true) =>
Box::new(source.iter().chain(target.iter())) as BoxedIter<&Expr>,
_ => Box::new(iter::empty())
}).map(find_all_names).flatten().collect()
}
// #[allow(dead_code)]
/// Collect all operators defined in a file (and some other words)
fn defined_ops(src: &Vec<FileEntry>, exported_only: bool) -> Vec<&String> {
let all_names:HashSet<&Vec<String>> = src.iter().flat_map(|ent| match ent {
FileEntry::Rule(s, _, d) =>
if exported_only {Box::new(iter::empty()) as BoxedIter<&Expr>}
else {Box::new(s.iter().chain(d.iter()))}
FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())),
_ => Box::new(iter::empty())
}).map(find_all_names).flatten().collect();
// Dedupe stage of dubious value; collecting into a hashset may take longer than
// handling duplicates would with a file of sensible size.
all_names.into_iter()
.filter_map(|name|
// If it's namespaced, it's imported.
if name.len() == 1 && name::is_op(&name[0]) {Some(&name[0])}
else {None}
).collect()
}
// #[allow(dead_code)]
/// Collect all operators from a file
pub fn all_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, false) }
// #[allow(dead_code)]
/// Collect exported operators from a file (plus some extra)
pub fn exported_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, true) }
/// Summarize all imports from a file in a single list of qualified names
pub fn imports<'a, 'b, I>(
src: I

View File

@@ -1,25 +0,0 @@
#[derive(Debug, Clone)]
pub enum Literal {
Num(f64),
Int(u64),
Char(char),
Str(String),
}
#[derive(Debug, Clone)]
pub enum Token {
Literal(Literal),
Name {
qualified: Vec<String>,
local: Option<String>
},
S(Vec<Expr>),
Lambda(String, Option<Box<Expr>>, Vec<Expr>),
Auto(Option<String>, Option<Box<Expr>>, Vec<Expr>)
}
#[derive(Debug, Clone)]
pub struct Expr {
pub token: Token,
pub typ: Option<Box<Expr>>
}

View File

@@ -40,8 +40,10 @@ pub fn file_loader(proj: PathBuf) -> impl FnMut(Vec<String>) -> Result<Loaded, L
let orcfile = dirpath.with_extension("orc");
if orcfile.is_file() {
read_to_string(orcfile).map(Loaded::Module).map_err(LoadingError::from)
} else if dirpath.exists() {
Err(LoadingError::UnknownNode(dirpath.to_string_lossy().into_owned()))
} else { Err(LoadingError::Missing(dirpath.to_string_lossy().into_owned())) }
} else {
let pathstr = dirpath.to_string_lossy().into_owned();
Err(if dirpath.exists() { LoadingError::UnknownNode(pathstr) }
else { LoadingError::Missing(pathstr) })
}
}
}

View File

@@ -1,23 +1,10 @@
mod rule_collector;
// pub use rule_collector::rule_collector;
pub use rule_collector::rule_collector;
mod prefix;
mod name_resolver;
mod loaded;
pub use loaded::Loaded;
mod parse_error;
mod module_error;
mod file_loader;
pub use file_loader::file_loader;
#[derive(Debug, Clone)]
pub struct Module {
pub rules: Vec<Rule>,
pub exports: Vec<String>,
pub references: Vec<Vec<String>>
}
#[derive(Debug, Clone)]
pub struct Rule {
pub source: super::Expr,
pub priority: f64,
pub target: super::Expr
}
use crate::expression::Rule;

View File

@@ -0,0 +1,31 @@
use thiserror::Error;
use crate::parse::ParseError;
use super::name_resolver::ResolutionError;
#[derive(Error, Debug, Clone)]
pub enum ModuleError<ELoad> where ELoad: Clone {
#[error("Resolution cycle")]
ResolutionCycle,
#[error("File not found: {0}")]
Load(ELoad),
#[error("Failed to parse: {0:?}")]
Syntax(ParseError),
#[error("Not a module")]
None
}
impl<T> From<ParseError> for ModuleError<T> where T: Clone {
fn from(pars: ParseError) -> Self { Self::Syntax(pars) }
}
impl<T> From<ResolutionError<ModuleError<T>>> for ModuleError<T> where T: Clone {
fn from(res: ResolutionError<ModuleError<T>>) -> Self {
match res {
ResolutionError::Cycle(_) => ModuleError::ResolutionCycle,
ResolutionError::NoModule(_) => ModuleError::None,
ResolutionError::Delegate(d) => d
}
}
}

View File

@@ -3,7 +3,7 @@ use thiserror::Error;
use crate::utils::Substack;
use crate::{Expr, Clause, Literal};
use crate::expression::{Expr, Clause};
type ImportMap = HashMap<String, Vec<String>>;

View File

@@ -1,30 +0,0 @@
use chumsky::prelude::Simple;
use thiserror::Error;
use super::name_resolver::ResolutionError;
#[derive(Error, Debug, Clone)]
pub enum ParseError<ELoad> where ELoad: Clone {
#[error("Resolution cycle")]
ResolutionCycle,
#[error("File not found: {0}")]
Load(ELoad),
#[error("Failed to parse: {0:?}")]
Syntax(Vec<Simple<char>>),
#[error("Not a module")]
None
}
impl<T> From<Vec<Simple<char>>> for ParseError<T> where T: Clone {
fn from(simp: Vec<Simple<char>>) -> Self { Self::Syntax(simp) }
}
impl<T> From<ResolutionError<ParseError<T>>> for ParseError<T> where T: Clone {
fn from(res: ResolutionError<ParseError<T>>) -> Self {
match res {
ResolutionError::Cycle(_) => ParseError::ResolutionCycle,
ResolutionError::NoModule(_) => ParseError::None,
ResolutionError::Delegate(d) => d
}
}
}

View File

@@ -1,4 +1,4 @@
use crate::{Expr, Clause};
use crate::expression::{Expr, Clause};
/// Replaces the first element of a name with the matching prefix from a prefix map

View File

@@ -1,193 +1,206 @@
// use std::collections::{HashMap, HashSet, VecDeque};
// use std::fmt::Debug;
// use std::rc::Rc;
use std::collections::{HashMap, HashSet, VecDeque};
use std::fmt::Debug;
use std::rc::Rc;
// use chumsky::Parser;
use crate::expression::Rule;
use crate::parse::{self, FileEntry};
use crate::utils::Cache;
// use crate::parse::{self, line_parser, FileEntry};
// use crate::utils::Cache;
use super::name_resolver::NameResolver;
use super::module_error::ModuleError;
use super::prefix::prefix_expr;
use super::loaded::Loaded;
type ParseResult<T, ELoad> = Result<T, ModuleError<ELoad>>;
// use super::name_resolver::NameResolver;
// use super::parse_error::ParseError;
// use super::prefix::prefix_expr;
// use super::loaded::Loaded;
#[derive(Debug, Clone)]
pub struct Module {
pub rules: Vec<Rule>,
pub exports: Vec<String>,
pub references: Vec<Vec<String>>
}
// type ParseResult<T, ELoad> = Result<T, ParseError<ELoad>>;
// pub fn rule_collector<F: 'static, ELoad>(
// mut load_mod: F,
// prelude: Vec<String>
// // ) -> impl FnMut(Vec<String>) -> Result<&'a Vec<super::Rule>, ParseError<ELoad>> + 'a
// ) -> Cache<Vec<String>, Result<Vec<super::Rule>, ParseError<ELoad>>>
// where
// F: FnMut(Vec<String>) -> Result<Loaded, ELoad>,
// ELoad: Clone + Debug
// {
// // Map paths to a namespace with name list (folder) or module with source text (file)
// let loaded = Rc::new(Cache::new(move |path: Vec<String>|
// -> ParseResult<Loaded, ELoad> {
// load_mod(path).map_err(ParseError::Load)
// }));
// // Map names to the longest prefix that points to a valid module
// let modname = Rc::new(Cache::new({
// let loaded = Rc::clone(&loaded);
// move |symbol: Vec<String>| -> Result<Vec<String>, Vec<ParseError<ELoad>>> {
// let mut errv: Vec<ParseError<ELoad>> = Vec::new();
// let reg_err = |e, errv: &mut Vec<ParseError<ELoad>>| {
// errv.push(e);
// if symbol.len() == errv.len() { Err(errv.clone()) }
// else { Ok(()) }
// };
// loop {
// let (path, _) = symbol.split_at(symbol.len() - errv.len());
// let pathv = path.to_vec();
// match loaded.try_find(&pathv) {
// Ok(imports) => match imports.as_ref() {
// Loaded::Module(_) => break Ok(pathv.clone()),
// _ => reg_err(ParseError::None, &mut errv)?
// },
// Err(err) => reg_err(err, &mut errv)?
// }
// }
// }
// }));
// // Preliminarily parse a file, substitution rules and imports are valid
// let preparsed = Rc::new(Cache::new({
// let preparser = line_parser(&prelude, &prelude);
// let loaded = Rc::clone(&loaded);
// move |path: Vec<String>| -> ParseResult<Vec<FileEntry>, ELoad> {
// let loaded = loaded.try_find(&path)?;
// if let Loaded::Module(source) = loaded.as_ref() {
// Ok(preparser.parse(source.as_str())?)
// } else {Err(ParseError::None)}
// }
// }));
// // Collect all toplevel names exported from a given file
// let exports = Rc::new(Cache::new({
// let loaded = Rc::clone(&loaded);
// let preparsed = Rc::clone(&preparsed);
// move |path: Vec<String>| -> ParseResult<Vec<String>, ELoad> {
// let loaded = loaded.try_find(&path)?;
// if let Loaded::Namespace(names) = loaded.as_ref() {
// return Ok(names.clone());
// }
// let preparsed = preparsed.try_find(&path)?;
// Ok(parse::exported_names(&preparsed)
// .into_iter()
// .map(|n| n[0].clone())
// .collect())
// }
// }));
// // Collect all toplevel names imported by a given file
// let imports = Rc::new(Cache::new({
// let preparsed = Rc::clone(&preparsed);
// let exports = Rc::clone(&exports);
// move |path: Vec<String>| -> ParseResult<HashMap<String, Vec<String>>, ELoad> {
// let entv = preparsed.try_find(&path)?.clone();
// let import_entries = parse::imports(entv.iter());
// let mut imported_symbols: HashMap<String, Vec<String>> = HashMap::new();
// for imp in import_entries {
// let export = exports.try_find(&imp.path)?;
// if let Some(ref name) = imp.name {
// if export.contains(&name) {
// imported_symbols.insert(name.clone(), imp.path.clone());
// }
// } else {
// for exp in export.as_ref() {
// imported_symbols.insert(exp.clone(), imp.path.clone());
// }
// }
// }
// Ok(imported_symbols)
// }
// }));
// // Final parse, operators are correctly separated
// let parsed = Rc::new(Cache::new({
// let imports = Rc::clone(&imports);
// let loaded = Rc::clone(&loaded);
// move |path: Vec<String>| -> ParseResult<Vec<FileEntry>, ELoad> {
// let imported_ops: Vec<String> =
// imports.try_find(&path)?
// .keys()
// .chain(prelude.iter())
// .filter(|s| parse::is_op(s))
// .cloned()
// .collect();
// let parser = file_parser(&prelude, &imported_ops);
// if let Loaded::Module(source) = loaded.try_find(&path)?.as_ref() {
// Ok(parser.parse(source.as_str())?)
// } else { Err(ParseError::None) }
// }
// }));
// let mut name_resolver = NameResolver::new({
// let modname = Rc::clone(&modname);
// move |path| {
// Some(modname.try_find(path).ok()?.as_ref().clone())
// }
// }, {
// let imports = Rc::clone(&imports);
// move |path| {
// imports.try_find(path).map(|f| f.as_ref().clone())
// }
// });
// // Turn parsed files into a bag of rules and a list of toplevel export names
// let resolved = Rc::new(Cache::new({
// let parsed = Rc::clone(&parsed);
// let exports = Rc::clone(&exports);
// let imports = Rc::clone(&imports);
// let modname = Rc::clone(&modname);
// move |path: Vec<String>| -> ParseResult<super::Module, ELoad> {
// let module = super::Module {
// rules: parsed.try_find(&path)?
// .iter()
// .filter_map(|ent| {
// if let FileEntry::Export(s) | FileEntry::Rule(s) = ent {
// Some(super::Rule {
// source: prefix_expr(&s.source, &path),
// target: prefix_expr(&s.target, &path),
// priority: s.priority,
// })
// } else { None }
// })
// .map(|rule| Ok(super::Rule {
// source: name_resolver.process_expression(&rule.source)?,
// target: name_resolver.process_expression(&rule.target)?,
// ..rule
// }))
// .collect::<ParseResult<Vec<super::Rule>, ELoad>>()?,
// exports: exports.try_find(&path)?.as_ref().clone(),
// references: imports.try_find(&path)?
// .values()
// .filter_map(|imps| {
// modname.try_find(&imps).ok().map(|r| r.as_ref().clone())
// })
// .collect()
// };
// Ok(module)
// }
// }));
// let all_rules = Cache::new({
// let resolved = Rc::clone(&resolved);
// move |path: Vec<String>| -> ParseResult<Vec<super::Rule>, ELoad> {
// let mut processed: HashSet<Vec<String>> = HashSet::new();
// let mut rules: Vec<super::Rule> = Vec::new();
// let mut pending: VecDeque<Vec<String>> = VecDeque::new();
// pending.push_back(path);
// while let Some(el) = pending.pop_front() {
// let resolved = resolved.try_find(&el)?;
// processed.insert(el.clone());
// pending.extend(
// resolved.references.iter()
// .filter(|&v| !processed.contains(v))
// .cloned()
// );
// rules.extend(
// resolved.rules.iter().cloned()
// )
// };
// Ok(rules)
// }
// });
// return all_rules;
// }
pub fn rule_collector<F: 'static, ELoad>(
mut load_mod: F,
prelude: Vec<String>
// ) -> impl FnMut(Vec<String>) -> Result<&'a Vec<super::Rule>, ParseError<ELoad>> + 'a
) -> Cache<Vec<String>, Result<Vec<super::Rule>, ModuleError<ELoad>>>
where
F: FnMut(Vec<String>) -> Result<Loaded, ELoad>,
ELoad: Clone + Debug
{
// Map paths to a namespace with name list (folder) or module with source text (file)
let loaded = Rc::new(Cache::new(move |path: Vec<String>|
-> ParseResult<Loaded, ELoad> {
load_mod(path).map_err(ModuleError::Load)
}));
// Map names to the longest prefix that points to a valid module
let modname = Rc::new(Cache::new({
let loaded = Rc::clone(&loaded);
move |symbol: Vec<String>| -> Result<Vec<String>, Vec<ModuleError<ELoad>>> {
let mut errv: Vec<ModuleError<ELoad>> = Vec::new();
let reg_err = |e, errv: &mut Vec<ModuleError<ELoad>>| {
errv.push(e);
if symbol.len() == errv.len() { Err(errv.clone()) }
else { Ok(()) }
};
loop {
let (path, _) = symbol.split_at(symbol.len() - errv.len());
let pathv = path.to_vec();
match loaded.try_find(&pathv) {
Ok(imports) => match imports.as_ref() {
Loaded::Module(_) => break Ok(pathv.clone()),
_ => reg_err(ModuleError::None, &mut errv)?
},
Err(err) => reg_err(err, &mut errv)?
}
}
}
}));
// Preliminarily parse a file, substitution rules and imports are valid
let preparsed = Rc::new(Cache::new({
let loaded = Rc::clone(&loaded);
let prelude2 = prelude.clone();
move |path: Vec<String>| -> ParseResult<Vec<FileEntry>, ELoad> {
let loaded = loaded.try_find(&path)?;
if let Loaded::Module(source) = loaded.as_ref() {
Ok(parse::parse(&prelude2, source.as_str())?)
} else {Err(ModuleError::None)}
}
}));
// Collect all toplevel names exported from a given file
let exports = Rc::new(Cache::new({
let loaded = Rc::clone(&loaded);
let preparsed = Rc::clone(&preparsed);
move |path: Vec<String>| -> ParseResult<Vec<String>, ELoad> {
let loaded = loaded.try_find(&path)?;
if let Loaded::Namespace(names) = loaded.as_ref() {
return Ok(names.clone());
}
let preparsed = preparsed.try_find(&path)?;
Ok(parse::exported_names(&preparsed)
.into_iter()
.map(|n| n[0].clone())
.collect())
}
}));
// Collect all toplevel names imported by a given file
let imports = Rc::new(Cache::new({
let preparsed = Rc::clone(&preparsed);
let exports = Rc::clone(&exports);
move |path: Vec<String>| -> ParseResult<HashMap<String, Vec<String>>, ELoad> {
let entv = preparsed.try_find(&path)?.clone();
let import_entries = parse::imports(entv.iter());
let mut imported_symbols: HashMap<String, Vec<String>> = HashMap::new();
for imp in import_entries {
let export = exports.try_find(&imp.path)?;
if let Some(ref name) = imp.name {
if export.contains(&name) {
imported_symbols.insert(name.clone(), imp.path.clone());
}
} else {
for exp in export.as_ref() {
imported_symbols.insert(exp.clone(), imp.path.clone());
}
}
}
Ok(imported_symbols)
}
}));
// Final parse, operators are correctly separated
let parsed = Rc::new(Cache::new({
let preparsed = Rc::clone(&preparsed);
let imports = Rc::clone(&imports);
let loaded = Rc::clone(&loaded);
move |path: Vec<String>| -> ParseResult<Vec<FileEntry>, ELoad> {
let imported_ops: Vec<String> =
imports.try_find(&path)?
.keys()
.chain(prelude.iter())
.filter(|s| parse::is_op(s))
.cloned()
.collect();
// let parser = file_parser(&prelude, &imported_ops);
let pre = preparsed.try_find(&path)?;
if let Loaded::Module(source) = loaded.try_find(&path)?.as_ref() {
Ok(parse::reparse(&imported_ops, source.as_str(), &pre)?)
} else { Err(ModuleError::None) }
}
}));
let mut name_resolver = NameResolver::new({
let modname = Rc::clone(&modname);
move |path| {
Some(modname.try_find(path).ok()?.as_ref().clone())
}
}, {
let imports = Rc::clone(&imports);
move |path| {
imports.try_find(path).map(|f| f.as_ref().clone())
}
});
// Turn parsed files into a bag of rules and a list of toplevel export names
let resolved = Rc::new(Cache::new({
let parsed = Rc::clone(&parsed);
let exports = Rc::clone(&exports);
let imports = Rc::clone(&imports);
let modname = Rc::clone(&modname);
move |path: Vec<String>| -> ParseResult<Module, ELoad> {
let module = Module {
rules: parsed.try_find(&path)?
.iter()
.filter_map(|ent| {
if let FileEntry::Rule(Rule{source, prio, target}, _) = ent {
Some(Rule {
source: source.iter().map(|ex| prefix_expr(ex, &path)).collect(),
target: target.iter().map(|ex| prefix_expr(ex, &path)).collect(),
prio: *prio,
})
} else { None }
})
.map(|rule| Ok(super::Rule {
source: rule.source.iter()
.map(|ex| name_resolver.process_expression(ex))
.collect::<Result<Vec<_>, _>>()?,
target: rule.target.iter()
.map(|ex| name_resolver.process_expression(ex))
.collect::<Result<Vec<_>, _>>()?,
// source: name_resolver.process_expression(&rule.source)?,
// target: name_resolver.process_expression(&rule.target)?,
..rule
}))
.collect::<ParseResult<Vec<super::Rule>, ELoad>>()?,
exports: exports.try_find(&path)?.as_ref().clone(),
references: imports.try_find(&path)?
.values()
.filter_map(|imps| {
modname.try_find(&imps).ok().map(|r| r.as_ref().clone())
})
.collect()
};
Ok(module)
}
}));
let all_rules = Cache::new({
let resolved = Rc::clone(&resolved);
move |path: Vec<String>| -> ParseResult<Vec<super::Rule>, ELoad> {
let mut processed: HashSet<Vec<String>> = HashSet::new();
let mut rules: Vec<super::Rule> = Vec::new();
let mut pending: VecDeque<Vec<String>> = VecDeque::new();
pending.push_back(path);
while let Some(el) = pending.pop_front() {
let resolved = resolved.try_find(&el)?;
processed.insert(el.clone());
pending.extend(
resolved.references.iter()
.filter(|&v| !processed.contains(v))
.cloned()
);
rules.extend(
resolved.rules.iter().cloned()
)
};
Ok(rules)
}
});
return all_rules;
}

1
src/rule/mod.rs Normal file
View File

@@ -0,0 +1 @@
mod rule;

38
src/rule/rule.rs Normal file
View File

@@ -0,0 +1,38 @@
use std::cmp::{min, max};
use hashbrown::HashSet;
use crate::expression::Expr;
pub trait Rule {
type OutIter: Iterator<Item = Option<Expr>>;
/// The minimum and maximum set of symbols this rule may match.
fn len(&self) -> (Option<usize>, Option<usize>);
/// The exact tokens the pattern consumes (None if varies)
fn consumes(&self) -> Option<HashSet<Vec<String>>>;
/// The exact tokens the pattern produces (None if varies)
fn produces(&self) -> Option<HashSet<Vec<String>>>;
/// Check if the slice matches, and produce the necessary transformations
fn produce(&self, base: &[Expr]) -> Option<Self::OutIter>;
/// Try all subsections of Vec of appropriate size, longest first, front-to-back
/// Match the first, execute the substitution, return the vector and whether any
/// substitutions happened
fn apply(&self, mut base: Vec<Expr>) -> (Vec<Expr>, bool) {
let len_range = self.len();
let lo = max(len_range.0.unwrap_or(1), 1);
let hi = min(len_range.1.unwrap_or(base.len()), base.len());
for width in (lo..hi).rev() {
let starts = (0..base.len() - width).into_iter();
let first_match = starts.filter_map(|start| {
self.produce(&base[start..start+width])
.map(|res| (start, res))
}).next();
if let Some((start, substitution)) = first_match {
let diff = substitution.enumerate().filter_map(|(i, opt)| opt.map(|val| (i, val)));
for (idx, item) in diff { base[start + idx] = item }
return (base, true)
}
}
(base, false)
}
}

View File

@@ -1,8 +1,6 @@
mod cache;
mod substack;
mod result_iter_collect;
pub use cache::Cache;
pub use substack::Substack;
pub use result_iter_collect::result_iter_collect;
pub type BoxedIter<'a, T> = Box<dyn Iterator<Item = T> + 'a>;

View File

@@ -1,19 +0,0 @@
pub fn result_iter_collect<T, E>(i: &mut dyn Iterator<Item = Result<T, E>>)
-> (Vec<Option<T>>, Vec<Option<E>>) {
i.fold((Vec::new(), Vec::new()), |(mut succ, mut err), mut next| {
match next {
Ok(res) => succ.push(Some(res)),
Err(e) => err.push(Some(e))
}
(succ, err)
})
}
pub fn recoverable_iter_collect<T, E>(i: &mut dyn Iterator<Item=(Option<T>, Vec<E>)>)
-> (Vec<Option<T>>, Vec<E>) {
i.fold((Vec::new(), Vec::new()), |(mut succ, mut err), (res, mut errv)| {
succ.push(res);
err.append(&mut errv);
(succ, err)
})
}