A lot of stuff I'm not gonna enumerate
rule fundamentals
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
use crate::{Clause, Expr, Literal, enum_parser};
|
||||
use crate::{enum_parser, expression::{Clause, Expr, Literal}};
|
||||
|
||||
use super::{lexer::Lexeme};
|
||||
|
||||
@@ -86,5 +86,5 @@ pub fn xpr_parser() -> impl Parser<Lexeme, Expr, Error = Simple<Lexeme>> {
|
||||
.ignore_then(expr.clone()).or_not()
|
||||
)
|
||||
.map(|(val, typ)| Expr(val, typ.map(Box::new)))
|
||||
})
|
||||
}).labelled("Expression")
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::{ops::Range, iter};
|
||||
use std::{ops::Range, iter, fmt};
|
||||
use ordered_float::NotNan;
|
||||
use chumsky::{Parser, prelude::*, text::whitespace};
|
||||
use chumsky::{Parser, prelude::*};
|
||||
use std::fmt::Debug;
|
||||
use crate::utils::BoxedIter;
|
||||
|
||||
@@ -14,6 +14,11 @@ impl Debug for Entry {
|
||||
// f.debug_tuple("Entry").field(&self.0).field(&self.1).finish()
|
||||
}
|
||||
}
|
||||
impl Into<(Lexeme, Range<usize>)> for Entry {
|
||||
fn into(self) -> (Lexeme, Range<usize>) {
|
||||
(self.0, self.1)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Lexeme {
|
||||
@@ -61,6 +66,9 @@ impl Lexeme {
|
||||
pub fn name<T: ToString>(n: T) -> Self {
|
||||
Lexeme::Name(n.to_string())
|
||||
}
|
||||
pub fn rule<T>(prio: T) -> Self where T: Into<f64> {
|
||||
Lexeme::Rule(NotNan::new(prio.into()).expect("Rule priority cannot be NaN"))
|
||||
}
|
||||
pub fn paren_parser<T, P>(
|
||||
expr: P
|
||||
) -> impl Parser<Lexeme, (char, T), Error = Simple<Lexeme>> + Clone
|
||||
@@ -76,15 +84,20 @@ impl Lexeme {
|
||||
}
|
||||
}
|
||||
|
||||
fn rule_parser() -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
|
||||
just('=').ignore_then(
|
||||
choice((
|
||||
none_of("-0123456789").rewind().to(NotNan::new(0f64).unwrap()),
|
||||
number::float_parser().then_ignore(just("=>"))
|
||||
)).map_err_with_span(|err, span| {
|
||||
panic!("Something's up! {:?} {}", span, err)
|
||||
})
|
||||
)
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct LexedText(pub Vec<Vec<Entry>>);
|
||||
|
||||
impl Debug for LexedText {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
for row in &self.0 {
|
||||
for tok in row {
|
||||
tok.fmt(f)?;
|
||||
f.write_str(" ")?
|
||||
}
|
||||
f.write_str("\n")?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
type LexSubres<'a> = BoxedIter<'a, Entry>;
|
||||
@@ -104,7 +117,7 @@ fn paren_parser<'a>(
|
||||
})
|
||||
}
|
||||
|
||||
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, Vec<Vec<Entry>>, Error=Simple<char>> + 'a
|
||||
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, LexedText, Error=Simple<char>> + 'a
|
||||
where T: AsRef<str> + Clone {
|
||||
let all_ops = ops.iter().map(|o| o.as_ref().to_string())
|
||||
.chain(iter::once(".".to_string())).collect::<Vec<_>>();
|
||||
@@ -114,7 +127,8 @@ where T: AsRef<str> + Clone {
|
||||
paren_parser(recurse.clone(), '[', ']'),
|
||||
paren_parser(recurse.clone(), '{', '}'),
|
||||
choice((
|
||||
rule_parser().map(Lexeme::Rule),
|
||||
just("==").padded().to(Lexeme::rule(0f64)),
|
||||
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
|
||||
comment::comment_parser().map(Lexeme::Comment),
|
||||
just("::").padded().to(Lexeme::NS),
|
||||
just('\\').padded().to(Lexeme::BS),
|
||||
@@ -130,5 +144,5 @@ where T: AsRef<str> + Clone {
|
||||
}).separated_by(one_of("\t ").repeated())
|
||||
.flatten().collect()
|
||||
.separated_by(just('\n').then(text::whitespace()).ignored())
|
||||
|
||||
}
|
||||
.map(LexedText)
|
||||
}
|
||||
|
||||
@@ -7,10 +7,12 @@ mod expression;
|
||||
mod sourcefile;
|
||||
mod import;
|
||||
mod enum_parser;
|
||||
mod parse;
|
||||
|
||||
pub use sourcefile::FileEntry;
|
||||
pub use sourcefile::line_parser;
|
||||
pub use sourcefile::imports;
|
||||
pub use sourcefile::exported_names;
|
||||
pub use lexer::{lexer, Lexeme, Entry as LexerEntry};
|
||||
pub use name::is_op;
|
||||
pub use name::is_op;
|
||||
pub use parse::{parse, reparse, ParseError};
|
||||
65
src/parse/parse.rs
Normal file
65
src/parse/parse.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
use std::{ops::Range, fmt::Debug};
|
||||
|
||||
use chumsky::{prelude::{Simple, end}, Stream, Parser};
|
||||
use itertools::Itertools;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::expression::Rule;
|
||||
|
||||
use super::{Lexeme, FileEntry, lexer, line_parser, LexerEntry};
|
||||
|
||||
|
||||
#[derive(Error, Debug, Clone)]
|
||||
pub enum ParseError {
|
||||
#[error("Could not tokenize {0:?}")]
|
||||
Lex(Vec<Simple<char>>),
|
||||
#[error("Could not parse {0:#?}")]
|
||||
Ast(Vec<Simple<Lexeme>>)
|
||||
}
|
||||
|
||||
pub fn parse<'a, Iter, S, Op>(ops: &[Op], stream: S) -> Result<Vec<FileEntry>, ParseError>
|
||||
where
|
||||
Op: 'a + AsRef<str> + Clone,
|
||||
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
|
||||
S: Into<Stream<'a, char, Range<usize>, Iter>> {
|
||||
let lexed = lexer(ops).parse(stream).map_err(ParseError::Lex)?;
|
||||
println!("Lexed:\n{:?}", lexed);
|
||||
let parsr = line_parser().then_ignore(end());
|
||||
let (parsed_lines, errors_per_line) = lexed.0.into_iter().filter_map(|v| {
|
||||
// Find the first invalid position for Stream::for_iter
|
||||
let LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone();
|
||||
// Stream expects tuples, lexer outputs structs
|
||||
let tuples = v.into_iter().map_into::<(Lexeme, Range<usize>)>();
|
||||
Some(parsr.parse(Stream::from_iter(end..end+1, tuples)))
|
||||
// ^^^^^^^^^^
|
||||
// I haven't the foggiest idea why this is needed, parsers are supposed to be lazy so the
|
||||
// end of input should make little difference
|
||||
}).map(|res| match res {
|
||||
Ok(r) => (Some(r), vec![]),
|
||||
Err(e) => (None, e)
|
||||
}).unzip::<_, _, Vec<_>, Vec<_>>();
|
||||
let total_err = errors_per_line.into_iter()
|
||||
.map(Vec::into_iter).flatten()
|
||||
.collect::<Vec<_>>();
|
||||
if total_err.len() > 0 { Err(ParseError::Ast(total_err)) }
|
||||
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
|
||||
}
|
||||
|
||||
pub fn reparse<'a, Iter, S, Op>(ops: &[Op], stream: S, pre: &Vec<FileEntry>)
|
||||
-> Result<Vec<FileEntry>, ParseError>
|
||||
where
|
||||
Op: 'a + AsRef<str> + Clone,
|
||||
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
|
||||
S: Into<Stream<'a, char, Range<usize>, Iter>> {
|
||||
let result = parse(ops, stream)?;
|
||||
Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| {
|
||||
if let FileEntry::Rule(Rule{source, ..}, _) = &mut output {
|
||||
if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor {
|
||||
*source = s2.clone()
|
||||
} else {
|
||||
panic!("Preparse and reparse received different row types!")
|
||||
}
|
||||
}
|
||||
output
|
||||
}).collect())
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
use super::{expression, number::float_parser};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Rule {
|
||||
pub source: expression::Expr,
|
||||
pub priority: f64,
|
||||
pub target: expression::Expr
|
||||
}
|
||||
|
||||
/// Parses a substitution rule of the forms
|
||||
///
|
||||
/// ```orchid
|
||||
/// main = \x. ...
|
||||
/// $a + $b = (add $a $b)
|
||||
/// (foo bar baz) =1.1=> (foo 1 e)
|
||||
/// reee =2=> shadow_reee
|
||||
/// shadow_reee =0.9=> reee
|
||||
/// ```
|
||||
/// TBD whether this disables reee in the specified range or loops forever
|
||||
pub fn rule_parser<'a, T: 'a + AsRef<str> + Clone>(
|
||||
pattern_ops: &[T],
|
||||
ops: &[T]
|
||||
) -> impl Parser<char, Rule, Error = Simple<char>> + 'a {
|
||||
expression::expression_parser(pattern_ops).padded()
|
||||
.then_ignore(just('='))
|
||||
.then(
|
||||
float_parser().then_ignore(just("=>"))
|
||||
.or_not().map(|prio| prio.unwrap_or(0.0))
|
||||
).then(expression::expression_parser(ops).padded())
|
||||
.map(|((source, priority), target)| Rule { source, priority, target })
|
||||
.labelled("rule")
|
||||
}
|
||||
@@ -1,15 +1,13 @@
|
||||
use std::collections::HashSet;
|
||||
use std::fs::File;
|
||||
use std::iter;
|
||||
|
||||
use crate::{enum_parser, Expr, Clause};
|
||||
use crate::{enum_parser, expression::{Expr, Clause, Rule}};
|
||||
use crate::utils::BoxedIter;
|
||||
|
||||
use super::expression::xpr_parser;
|
||||
use super::import;
|
||||
use super::import::import_parser;
|
||||
use super::lexer::Lexeme;
|
||||
use super::name;
|
||||
use chumsky::{Parser, prelude::*};
|
||||
use ordered_float::NotNan;
|
||||
|
||||
@@ -18,8 +16,7 @@ use ordered_float::NotNan;
|
||||
pub enum FileEntry {
|
||||
Import(Vec<import::Import>),
|
||||
Comment(String),
|
||||
Rule(Vec<Expr>, NotNan<f64>, Vec<Expr>),
|
||||
Export(Vec<Expr>, NotNan<f64>, Vec<Expr>)
|
||||
Rule(Rule, bool)
|
||||
}
|
||||
|
||||
/// Recursively iterate through all "names" in an expression. It also finds a lot of things that
|
||||
@@ -70,49 +67,21 @@ pub fn line_parser() -> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> {
|
||||
println!("{:?} could not yield an export", s); e
|
||||
})
|
||||
.ignore_then(rule_parser())
|
||||
.map(|(lhs, prio, rhs)| FileEntry::Export(lhs, prio, rhs)),
|
||||
.map(|(source, prio, target)| FileEntry::Rule(Rule{source, prio, target}, true)),
|
||||
// This could match almost anything so it has to go last
|
||||
rule_parser().map(|(lhs, prio, rhs)| FileEntry::Rule(lhs, prio, rhs)),
|
||||
rule_parser().map(|(source, prio, target)| FileEntry::Rule(Rule{source, prio, target}, false)),
|
||||
))
|
||||
}
|
||||
|
||||
/// Collect all exported names (and a lot of other words) from a file
|
||||
pub fn exported_names(src: &Vec<FileEntry>) -> HashSet<&Vec<String>> {
|
||||
src.iter().flat_map(|ent| match ent {
|
||||
FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())) as BoxedIter<&Expr>,
|
||||
FileEntry::Rule(Rule{source, target, ..}, true) =>
|
||||
Box::new(source.iter().chain(target.iter())) as BoxedIter<&Expr>,
|
||||
_ => Box::new(iter::empty())
|
||||
}).map(find_all_names).flatten().collect()
|
||||
}
|
||||
|
||||
|
||||
// #[allow(dead_code)]
|
||||
/// Collect all operators defined in a file (and some other words)
|
||||
fn defined_ops(src: &Vec<FileEntry>, exported_only: bool) -> Vec<&String> {
|
||||
let all_names:HashSet<&Vec<String>> = src.iter().flat_map(|ent| match ent {
|
||||
FileEntry::Rule(s, _, d) =>
|
||||
if exported_only {Box::new(iter::empty()) as BoxedIter<&Expr>}
|
||||
else {Box::new(s.iter().chain(d.iter()))}
|
||||
FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())),
|
||||
_ => Box::new(iter::empty())
|
||||
}).map(find_all_names).flatten().collect();
|
||||
// Dedupe stage of dubious value; collecting into a hashset may take longer than
|
||||
// handling duplicates would with a file of sensible size.
|
||||
all_names.into_iter()
|
||||
.filter_map(|name|
|
||||
// If it's namespaced, it's imported.
|
||||
if name.len() == 1 && name::is_op(&name[0]) {Some(&name[0])}
|
||||
else {None}
|
||||
).collect()
|
||||
}
|
||||
|
||||
// #[allow(dead_code)]
|
||||
/// Collect all operators from a file
|
||||
pub fn all_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, false) }
|
||||
// #[allow(dead_code)]
|
||||
/// Collect exported operators from a file (plus some extra)
|
||||
pub fn exported_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, true) }
|
||||
|
||||
|
||||
/// Summarize all imports from a file in a single list of qualified names
|
||||
pub fn imports<'a, 'b, I>(
|
||||
src: I
|
||||
|
||||
Reference in New Issue
Block a user