forked from Orchid/orchid
redid the parser, patched up the project too.
This commit is contained in:
13
src/parse/comment.rs
Normal file
13
src/parse/comment.rs
Normal file
@@ -0,0 +1,13 @@
|
||||
pub use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
/// Parses Lua-style comments
|
||||
pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
||||
choice((
|
||||
just("--[").ignore_then(take_until(
|
||||
just("]--").ignored()
|
||||
)),
|
||||
just("--").ignore_then(take_until(
|
||||
just("\n").rewind().ignored().or(end())
|
||||
))
|
||||
)).map(|(vc, ())| vc).collect().labelled("comment")
|
||||
}
|
||||
26
src/parse/enum_parser.rs
Normal file
26
src/parse/enum_parser.rs
Normal file
@@ -0,0 +1,26 @@
|
||||
#[macro_export]
|
||||
macro_rules! enum_parser {
|
||||
($p:path | $m:tt) => {
|
||||
{
|
||||
::chumsky::prelude::filter_map(|s, l| {
|
||||
if let $p(x) = l { Ok(x) }
|
||||
else { Err(::chumsky::prelude::Simple::custom(s, $m))}
|
||||
})
|
||||
}
|
||||
};
|
||||
($p:path >> $q:path; $i:ident) => {
|
||||
{
|
||||
use $p as srcpath;
|
||||
use $q as tgtpath;
|
||||
enum_parser!(srcpath::$i | (concat!("Expected ", stringify!($i)))).map(tgtpath::$i)
|
||||
}
|
||||
};
|
||||
($p:path >> $q:path; $($i:ident),+) => {
|
||||
{
|
||||
::chumsky::prelude::choice((
|
||||
$( enum_parser!($p >> $q; $i) ),+
|
||||
))
|
||||
}
|
||||
};
|
||||
($p:path) => { enum_parser!($p | (concat!("Expected ", stringify!($p)))) };
|
||||
}
|
||||
@@ -1,86 +1,90 @@
|
||||
use std::{fmt::Debug};
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
use crate::{Clause, Expr, Literal, enum_parser};
|
||||
|
||||
use super::string;
|
||||
use super::number;
|
||||
use super::misc;
|
||||
use super::name;
|
||||
use super::{lexer::Lexeme};
|
||||
|
||||
/// An S-expression as read from a source file
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Expr {
|
||||
Num(f64),
|
||||
Int(u64),
|
||||
Char(char),
|
||||
Str(String),
|
||||
Name(Vec<String>),
|
||||
S(Vec<Expr>),
|
||||
Lambda(String, Option<Box<Expr>>, Vec<Expr>),
|
||||
Auto(Option<String>, Option<Box<Expr>>, Vec<Expr>),
|
||||
|
||||
Typed(Box<Expr>, Box<Expr>)
|
||||
fn sexpr_parser<P>(
|
||||
expr: P
|
||||
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
|
||||
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
|
||||
Lexeme::paren_parser(expr.repeated()).map(|(del, b)| Clause::S(del, b))
|
||||
}
|
||||
|
||||
/// Parse a type annotation
|
||||
fn typed_parser<'a>(
|
||||
expr: Recursive<'a, char, Expr, Simple<char>>
|
||||
) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
|
||||
just(':').ignore_then(expr)
|
||||
fn lambda_parser<P>(
|
||||
expr: P
|
||||
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
|
||||
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
|
||||
just(Lexeme::BS)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(enum_parser!(Lexeme::Name))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(
|
||||
just(Lexeme::Type)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(expr.clone().repeated())
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.or_not().map(Option::unwrap_or_default)
|
||||
)
|
||||
.then_ignore(just(Lexeme::name(".")))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(expr.repeated().at_least(1))
|
||||
.map(|((name, typ), mut body): ((String, Vec<Expr>), Vec<Expr>)| {
|
||||
for ent in &mut body { ent.bind_parameter(&name) };
|
||||
Clause::Lambda(name, typ, body)
|
||||
})
|
||||
}
|
||||
|
||||
fn auto_parser<P>(
|
||||
expr: P
|
||||
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
|
||||
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
|
||||
just(Lexeme::At)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(enum_parser!(Lexeme::Name).or_not())
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(
|
||||
just(Lexeme::Type)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(expr.clone().repeated())
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
)
|
||||
.then_ignore(just(Lexeme::name(".")))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(expr.repeated().at_least(1))
|
||||
.try_map(|((name, typ), mut body), s| if name == None && typ.is_empty() {
|
||||
Err(Simple::custom(s, "Auto without name or type has no effect"))
|
||||
} else {
|
||||
if let Some(n) = &name {
|
||||
for ent in &mut body { ent.bind_parameter(n) }
|
||||
}
|
||||
Ok(Clause::Auto(name, typ, body))
|
||||
})
|
||||
}
|
||||
|
||||
fn name_parser() -> impl Parser<Lexeme, Vec<String>, Error = Simple<Lexeme>> + Clone {
|
||||
enum_parser!(Lexeme::Name).separated_by(
|
||||
enum_parser!(Lexeme::Comment).repeated()
|
||||
.then(just(Lexeme::NS))
|
||||
.then(enum_parser!(Lexeme::Comment).repeated())
|
||||
).at_least(1)
|
||||
}
|
||||
|
||||
/// Parse an expression without a type annotation
|
||||
fn untyped_xpr_parser<'a>(
|
||||
expr: Recursive<'a, char, Expr, Simple<char>>,
|
||||
ops: &[&'a str]
|
||||
) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
|
||||
// basic S-expression rule
|
||||
let sexpr = expr.clone()
|
||||
.repeated()
|
||||
.delimited_by(just('('), just(')'))
|
||||
.map(Expr::S);
|
||||
// Blocks
|
||||
// can and therefore do match everything up to the closing paren
|
||||
// \name. body
|
||||
// \name:type. body
|
||||
let lambda = just('\\')
|
||||
.ignore_then(text::ident())
|
||||
.then(typed_parser(expr.clone()).or_not())
|
||||
.then_ignore(just('.'))
|
||||
.then(expr.clone().repeated().at_least(1))
|
||||
.map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body));
|
||||
// @name. body
|
||||
// @name:type. body
|
||||
// @:type. body
|
||||
let auto = just('@')
|
||||
.ignore_then(text::ident().or_not())
|
||||
.then(typed_parser(expr.clone()).or_not())
|
||||
.then_ignore(just('.'))
|
||||
.then(expr.clone().repeated().at_least(1))
|
||||
.map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body));
|
||||
choice((
|
||||
number::int_parser().map(Expr::Int), // all ints are valid floats so it takes precedence
|
||||
number::float_parser().map(Expr::Num),
|
||||
string::char_parser().map(Expr::Char),
|
||||
string::str_parser().map(Expr::Str),
|
||||
name::name_parser(ops).map(Expr::Name), // includes namespacing
|
||||
sexpr,
|
||||
lambda,
|
||||
auto
|
||||
)).padded()
|
||||
}
|
||||
|
||||
/// Parse any expression with a type annotation, surrounded by comments
|
||||
pub fn expression_parser<'a>(ops: &[&'a str]) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
|
||||
// This approach to parsing comments is ugly and error-prone,
|
||||
// but I don't have a lot of other ideas
|
||||
return recursive(|expr| {
|
||||
return misc::comment_parser().or_not().ignore_then(
|
||||
untyped_xpr_parser(expr.clone(), &ops)
|
||||
.then(typed_parser(expr).or_not())
|
||||
.map(|(val, t)| match t {
|
||||
Some(typ) => Expr::Typed(Box::new(val), Box::new(typ)),
|
||||
None => val
|
||||
})
|
||||
).then_ignore(misc::comment_parser().or_not())
|
||||
pub fn xpr_parser() -> impl Parser<Lexeme, Expr, Error = Simple<Lexeme>> {
|
||||
recursive(|expr| {
|
||||
let clause =
|
||||
enum_parser!(Lexeme::Comment).repeated()
|
||||
.ignore_then(choice((
|
||||
enum_parser!(Lexeme >> Literal; Int, Num, Char, Str).map(Clause::Literal),
|
||||
name_parser().map(Clause::Name),
|
||||
sexpr_parser(expr.clone()),
|
||||
lambda_parser(expr.clone()),
|
||||
auto_parser(expr.clone())
|
||||
))).then_ignore(enum_parser!(Lexeme::Comment).repeated());
|
||||
clause.clone().then(
|
||||
just(Lexeme::Type)
|
||||
.ignore_then(expr.clone()).or_not()
|
||||
)
|
||||
.map(|(val, typ)| Expr(val, typ.map(Box::new)))
|
||||
})
|
||||
}
|
||||
@@ -1,7 +1,9 @@
|
||||
use std::iter;
|
||||
|
||||
use chumsky::{Parser, prelude::*};
|
||||
use super::name;
|
||||
use crate::{enum_parser, utils::BoxedIter};
|
||||
|
||||
use super::lexer::Lexeme;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Import {
|
||||
@@ -9,15 +11,10 @@ pub struct Import {
|
||||
pub name: Option<String>
|
||||
}
|
||||
|
||||
|
||||
pub type BoxedStrIter = Box<dyn Iterator<Item = String>>;
|
||||
pub type BoxedStrIterIter = Box<dyn Iterator<Item = BoxedStrIter>>;
|
||||
|
||||
/// initialize a Box<dyn Iterator<Item = Box<dyn Iterator<Item = String>>>>
|
||||
/// with a single element.
|
||||
fn init_table(name: String) -> BoxedStrIterIter {
|
||||
// I'm not confident at all that this is a good approach.
|
||||
Box::new(iter::once(Box::new(iter::once(name)) as BoxedStrIter))
|
||||
/// initialize a BoxedIter<BoxedIter<String>> with a single element.
|
||||
fn init_table(name: String) -> BoxedIter<'static, BoxedIter<'static, String>> {
|
||||
// I'm not at all confident that this is a good approach.
|
||||
Box::new(iter::once(Box::new(iter::once(name)) as BoxedIter<String>))
|
||||
}
|
||||
|
||||
/// Parse an import command
|
||||
@@ -25,29 +22,38 @@ fn init_table(name: String) -> BoxedStrIterIter {
|
||||
/// and the delimiters are plain parentheses. Namespaces should preferably contain
|
||||
/// crossplatform filename-legal characters but the symbols are explicitly allowed
|
||||
/// to go wild. There's a blacklist in [name]
|
||||
pub fn import_parser() -> impl Parser<char, Vec<Import>, Error = Simple<char>> {
|
||||
pub fn import_parser() -> impl Parser<Lexeme, Vec<Import>, Error = Simple<Lexeme>> {
|
||||
// TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad.
|
||||
recursive(|expr: Recursive<char, BoxedStrIterIter, Simple<char>>| {
|
||||
name::modname_parser()
|
||||
.padded()
|
||||
.then_ignore(just("::"))
|
||||
.repeated()
|
||||
recursive(|expr: Recursive<Lexeme, BoxedIter<BoxedIter<String>>, Simple<Lexeme>>| {
|
||||
enum_parser!(Lexeme::Name)
|
||||
.separated_by(just(Lexeme::NS))
|
||||
.then(
|
||||
choice((
|
||||
expr.clone()
|
||||
.separated_by(just(','))
|
||||
.delimited_by(just('('), just(')'))
|
||||
.map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter),
|
||||
// Each expr returns a list of imports, flatten those into a common list
|
||||
just("*").map(|s| init_table(s.to_string())), // Just a *, wrapped
|
||||
name::modname_parser().map(init_table) // Just a name, wrapped
|
||||
)).padded()
|
||||
).map(|(pre, post)| {
|
||||
Box::new(post.map(move |el| {
|
||||
Box::new(pre.clone().into_iter().chain(el)) as BoxedStrIter
|
||||
})) as BoxedStrIterIter
|
||||
just(Lexeme::NS)
|
||||
.ignore_then(
|
||||
choice((
|
||||
expr.clone()
|
||||
.separated_by(just(Lexeme::name(",")))
|
||||
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
|
||||
.map(|v| Box::new(v.into_iter().flatten()) as BoxedIter<BoxedIter<String>>)
|
||||
.labelled("import group"),
|
||||
// Each expr returns a list of imports, flatten those into a common list
|
||||
just(Lexeme::name("*")).map(|_| init_table("*".to_string()))
|
||||
.labelled("wildcard import"), // Just a *, wrapped
|
||||
enum_parser!(Lexeme::Name).map(init_table)
|
||||
.labelled("import terminal") // Just a name, wrapped
|
||||
))
|
||||
).or_not()
|
||||
)
|
||||
.map(|(name, opt_post): (Vec<String>, Option<BoxedIter<BoxedIter<String>>>)| -> BoxedIter<BoxedIter<String>> {
|
||||
if let Some(post) = opt_post {
|
||||
Box::new(post.map(move |el| {
|
||||
Box::new(name.clone().into_iter().chain(el)) as BoxedIter<String>
|
||||
})) as BoxedIter<BoxedIter<String>>
|
||||
} else {
|
||||
Box::new(iter::once(Box::new(name.into_iter()) as BoxedIter<String>))
|
||||
}
|
||||
})
|
||||
}).padded().map(|paths| {
|
||||
}).map(|paths| {
|
||||
paths.filter_map(|namespaces| {
|
||||
let mut path: Vec<String> = namespaces.collect();
|
||||
match path.pop()?.as_str() {
|
||||
@@ -55,5 +61,5 @@ pub fn import_parser() -> impl Parser<char, Vec<Import>, Error = Simple<char>> {
|
||||
name => Some(Import { path, name: Some(name.to_owned()) })
|
||||
}
|
||||
}).collect()
|
||||
})
|
||||
}).labelled("import")
|
||||
}
|
||||
134
src/parse/lexer.rs
Normal file
134
src/parse/lexer.rs
Normal file
@@ -0,0 +1,134 @@
|
||||
use std::{ops::Range, iter};
|
||||
use ordered_float::NotNan;
|
||||
use chumsky::{Parser, prelude::*, text::whitespace};
|
||||
use std::fmt::Debug;
|
||||
use crate::utils::BoxedIter;
|
||||
|
||||
use super::{number, string, name, comment};
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Entry(pub Lexeme, pub Range<usize>);
|
||||
impl Debug for Entry {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self.0)
|
||||
// f.debug_tuple("Entry").field(&self.0).field(&self.1).finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Lexeme {
|
||||
Num(NotNan<f64>),
|
||||
Int(u64),
|
||||
Char(char),
|
||||
Str(String),
|
||||
Name(String),
|
||||
Rule(NotNan<f64>),
|
||||
NS, // namespace separator
|
||||
LP(char),
|
||||
RP(char),
|
||||
BS, // Backslash
|
||||
At,
|
||||
Type, // type operator
|
||||
Comment(String)
|
||||
}
|
||||
|
||||
impl Debug for Lexeme {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Num(n) => write!(f, "{}", n),
|
||||
Self::Int(i) => write!(f, "{}", i),
|
||||
Self::Char(c) => write!(f, "{:?}", c),
|
||||
Self::Str(s) => write!(f, "{:?}", s),
|
||||
Self::Name(name) => write!(f, "{}", name),
|
||||
Self::Rule(prio) => write!(f, "={}=>", prio),
|
||||
Self::NS => write!(f, "::"),
|
||||
Self::LP(l) => write!(f, "{}", l),
|
||||
Self::RP(l) => match l {
|
||||
'(' => write!(f, ")"),
|
||||
'[' => write!(f, "]"),
|
||||
'{' => write!(f, "}}"),
|
||||
_ => f.debug_tuple("RP").field(l).finish()
|
||||
},
|
||||
Self::BS => write!(f, "\\"),
|
||||
Self::At => write!(f, "@"),
|
||||
Self::Type => write!(f, ":"),
|
||||
Self::Comment(text) => write!(f, "--[{}]--", text),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Lexeme {
|
||||
pub fn name<T: ToString>(n: T) -> Self {
|
||||
Lexeme::Name(n.to_string())
|
||||
}
|
||||
pub fn paren_parser<T, P>(
|
||||
expr: P
|
||||
) -> impl Parser<Lexeme, (char, T), Error = Simple<Lexeme>> + Clone
|
||||
where P: Parser<Lexeme, T, Error = Simple<Lexeme>> + Clone {
|
||||
choice((
|
||||
expr.clone().delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
|
||||
.map(|t| ('(', t)),
|
||||
expr.clone().delimited_by(just(Lexeme::LP('[')), just(Lexeme::RP('[')))
|
||||
.map(|t| ('[', t)),
|
||||
expr.delimited_by(just(Lexeme::LP('{')), just(Lexeme::RP('{')))
|
||||
.map(|t| ('{', t)),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn rule_parser() -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
|
||||
just('=').ignore_then(
|
||||
choice((
|
||||
none_of("-0123456789").rewind().to(NotNan::new(0f64).unwrap()),
|
||||
number::float_parser().then_ignore(just("=>"))
|
||||
)).map_err_with_span(|err, span| {
|
||||
panic!("Something's up! {:?} {}", span, err)
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
type LexSubres<'a> = BoxedIter<'a, Entry>;
|
||||
|
||||
fn paren_parser<'a>(
|
||||
expr: Recursive<'a, char, LexSubres<'a>, Simple<char>>,
|
||||
lp: char, rp: char
|
||||
) -> impl Parser<char, LexSubres<'a>, Error=Simple<char>> + 'a {
|
||||
expr.padded().repeated()
|
||||
.map(|x| Box::new(x.into_iter().flatten()) as LexSubres)
|
||||
.delimited_by(just(lp), just(rp)).map_with_span(move |b, s| {
|
||||
Box::new(
|
||||
iter::once(Entry(Lexeme::LP(lp), s.start..s.start+1))
|
||||
.chain(b)
|
||||
.chain(iter::once(Entry(Lexeme::RP(lp), s.end-1..s.end)))
|
||||
) as LexSubres
|
||||
})
|
||||
}
|
||||
|
||||
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, Vec<Vec<Entry>>, Error=Simple<char>> + 'a
|
||||
where T: AsRef<str> + Clone {
|
||||
let all_ops = ops.iter().map(|o| o.as_ref().to_string())
|
||||
.chain(iter::once(".".to_string())).collect::<Vec<_>>();
|
||||
recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
|
||||
choice((
|
||||
paren_parser(recurse.clone(), '(', ')'),
|
||||
paren_parser(recurse.clone(), '[', ']'),
|
||||
paren_parser(recurse.clone(), '{', '}'),
|
||||
choice((
|
||||
rule_parser().map(Lexeme::Rule),
|
||||
comment::comment_parser().map(Lexeme::Comment),
|
||||
just("::").padded().to(Lexeme::NS),
|
||||
just('\\').padded().to(Lexeme::BS),
|
||||
just('@').padded().to(Lexeme::At),
|
||||
just(':').to(Lexeme::Type),
|
||||
number::int_parser().map(Lexeme::Int), // all ints are valid floats so it takes precedence
|
||||
number::float_parser().map(Lexeme::Num),
|
||||
string::char_parser().map(Lexeme::Char),
|
||||
string::str_parser().map(Lexeme::Str),
|
||||
name::name_parser(&all_ops).map(Lexeme::Name), // includes namespacing
|
||||
)).map_with_span(|lx, span| Box::new(iter::once(Entry(lx, span))) as LexSubres)
|
||||
))
|
||||
}).separated_by(one_of("\t ").repeated())
|
||||
.flatten().collect()
|
||||
.separated_by(just('\n').then(text::whitespace()).ignored())
|
||||
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
pub use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
/// Parses Lua-style comments
|
||||
pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
||||
any().repeated().delimited_by(just("--["), just("]--")).or(
|
||||
any().repeated().delimited_by(just("--"), just("\n"))
|
||||
).map(|vc| vc.iter().collect()).padded()
|
||||
}
|
||||
@@ -1,18 +1,16 @@
|
||||
mod expression;
|
||||
mod string;
|
||||
mod number;
|
||||
mod misc;
|
||||
mod import;
|
||||
mod name;
|
||||
mod substitution;
|
||||
mod lexer;
|
||||
mod comment;
|
||||
mod expression;
|
||||
mod sourcefile;
|
||||
mod import;
|
||||
mod enum_parser;
|
||||
|
||||
pub use substitution::Substitution;
|
||||
pub use expression::Expr;
|
||||
pub use expression::expression_parser;
|
||||
pub use sourcefile::FileEntry;
|
||||
pub use sourcefile::file_parser;
|
||||
pub use sourcefile::line_parser;
|
||||
pub use sourcefile::imports;
|
||||
pub use sourcefile::is_op;
|
||||
pub use sourcefile::exported_names;
|
||||
pub use import::Import;
|
||||
pub use lexer::{lexer, Lexeme, Entry as LexerEntry};
|
||||
pub use name::is_op;
|
||||
@@ -1,12 +1,14 @@
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
/// Matches any one of the passed operators, longest-first
|
||||
fn op_parser<'a>(ops: &[&'a str]) -> BoxedParser<'a, char, String, Simple<char>> {
|
||||
let mut sorted_ops = ops.to_vec();
|
||||
fn op_parser<'a, T: AsRef<str> + Clone>(ops: &[T]) -> BoxedParser<'a, char, String, Simple<char>> {
|
||||
let mut sorted_ops: Vec<String> = ops.iter().map(|t| t.as_ref().to_string()).collect();
|
||||
sorted_ops.sort_by(|a, b| b.len().cmp(&a.len()));
|
||||
sorted_ops.into_iter()
|
||||
.map(|op| just(op.to_string()).boxed())
|
||||
.reduce(|a, b| a.or(b).boxed()).unwrap()
|
||||
.map(|op| just(op).boxed())
|
||||
.reduce(|a, b| a.or(b).boxed())
|
||||
.unwrap_or(empty().map(|()| panic!("Empty isn't meant to match")).boxed())
|
||||
.labelled("operator").boxed()
|
||||
}
|
||||
|
||||
/// Matches anything that's allowed as an operator
|
||||
@@ -27,20 +29,31 @@ fn op_parser<'a>(ops: &[&'a str]) -> BoxedParser<'a, char, String, Simple<char>>
|
||||
/// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very
|
||||
/// common in maths so it's worth a try. Investigate.
|
||||
pub fn modname_parser<'a>() -> impl Parser<char, String, Error = Simple<char>> + 'a {
|
||||
let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', '.'];
|
||||
let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', ','];
|
||||
filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
|
||||
.repeated().at_least(1)
|
||||
.collect()
|
||||
.labelled("modname")
|
||||
}
|
||||
|
||||
/// Parse an operator or name. Failing both, parse everything up to the next whitespace or
|
||||
/// blacklisted character as a new operator.
|
||||
pub fn name_parser<'a>(
|
||||
ops: &[&'a str]
|
||||
) -> impl Parser<char, Vec<String>, Error = Simple<char>> + 'a {
|
||||
pub fn name_parser<'a, T: AsRef<str> + Clone>(
|
||||
ops: &[T]
|
||||
) -> impl Parser<char, String, Error = Simple<char>> + 'a {
|
||||
choice((
|
||||
op_parser(ops), // First try to parse a known operator
|
||||
text::ident(), // Failing that, parse plain text
|
||||
text::ident().labelled("plain text"), // Failing that, parse plain text
|
||||
modname_parser() // Finally parse everything until tne next terminal as a new operator
|
||||
)).padded().separated_by(just("::")).padded()
|
||||
))
|
||||
.labelled("name")
|
||||
}
|
||||
|
||||
/// Decide if a string can be an operator. Operators can include digits and text, just not at the
|
||||
/// start.
|
||||
pub fn is_op<T: AsRef<str>>(s: T) -> bool {
|
||||
return match s.as_ref().chars().next() {
|
||||
Some(x) => !x.is_alphanumeric(),
|
||||
None => false
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
use ordered_float::NotNan;
|
||||
|
||||
fn assert_not_digit(base: u32, c: char) {
|
||||
if base > (10 + (c as u32 - 'a' as u32)) {
|
||||
@@ -51,7 +52,7 @@ fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
|
||||
}
|
||||
|
||||
/// returns a mapper that converts a mantissa and an exponent into a float
|
||||
fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 {
|
||||
fn nat2f(base: u64) -> impl Fn((NotNan<f64>, i32),) -> NotNan<f64> {
|
||||
return move |(val, exp)| {
|
||||
if exp == 0 {val}
|
||||
else {val * (base as f64).powf(exp.try_into().unwrap())}
|
||||
@@ -77,32 +78,35 @@ pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
}
|
||||
|
||||
/// parse a float from dot notation
|
||||
fn dotted_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
|
||||
fn dotted_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
|
||||
uint_parser(base)
|
||||
.then_ignore(just('.'))
|
||||
.then(
|
||||
text::digits(base).then(separated_digits_parser(base))
|
||||
).map(move |(wh, (frac1, frac2))| {
|
||||
let frac = frac1 + &frac2;
|
||||
let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
|
||||
let dexp = base.pow(frac.len().try_into().unwrap());
|
||||
wh as f64 + (frac_num / dexp as f64)
|
||||
just('.').ignore_then(
|
||||
text::digits(base).then(separated_digits_parser(base))
|
||||
).map(move |(frac1, frac2)| {
|
||||
let frac = frac1 + &frac2;
|
||||
let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
|
||||
let dexp = base.pow(frac.len().try_into().unwrap());
|
||||
frac_num / dexp as f64
|
||||
}).or_not().map(|o| o.unwrap_or_default())
|
||||
).try_map(|(wh, f), s| {
|
||||
NotNan::new(wh as f64 + f).map_err(|_| Simple::custom(s, "Float literal evaluates to NaN"))
|
||||
})
|
||||
}
|
||||
|
||||
/// parse a float from dotted and optionally also exponential notation
|
||||
fn pow_float_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
|
||||
fn pow_float_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
|
||||
assert_not_digit(base, 'p');
|
||||
dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
|
||||
}
|
||||
|
||||
/// parse a float with dotted and optionally exponential notation from a base determined by its
|
||||
/// prefix
|
||||
pub fn float_parser() -> impl Parser<char, f64, Error = Simple<char>> {
|
||||
pub fn float_parser() -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
|
||||
choice((
|
||||
just("0b").ignore_then(pow_float_parser(2)),
|
||||
just("0x").ignore_then(pow_float_parser(16)),
|
||||
just('0').ignore_then(pow_float_parser(8)),
|
||||
pow_float_parser(10),
|
||||
))
|
||||
)).labelled("float")
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ use chumsky::{self, prelude::*, Parser};
|
||||
use super::{expression, number::float_parser};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Substitution {
|
||||
pub struct Rule {
|
||||
pub source: expression::Expr,
|
||||
pub priority: f64,
|
||||
pub target: expression::Expr
|
||||
@@ -19,15 +19,16 @@ pub struct Substitution {
|
||||
/// shadow_reee =0.9=> reee
|
||||
/// ```
|
||||
/// TBD whether this disables reee in the specified range or loops forever
|
||||
pub fn substitution_parser<'a>(
|
||||
pattern_ops: &[&'a str],
|
||||
ops: &[&'a str]
|
||||
) -> impl Parser<char, Substitution, Error = Simple<char>> + 'a {
|
||||
expression::expression_parser(pattern_ops)
|
||||
pub fn rule_parser<'a, T: 'a + AsRef<str> + Clone>(
|
||||
pattern_ops: &[T],
|
||||
ops: &[T]
|
||||
) -> impl Parser<char, Rule, Error = Simple<char>> + 'a {
|
||||
expression::expression_parser(pattern_ops).padded()
|
||||
.then_ignore(just('='))
|
||||
.then(
|
||||
float_parser().then_ignore(just("=>"))
|
||||
.or_not().map(|prio| prio.unwrap_or(0.0))
|
||||
).then(expression::expression_parser(ops))
|
||||
.map(|((source, priority), target)| Substitution { source, priority, target })
|
||||
).then(expression::expression_parser(ops).padded())
|
||||
.map(|((source, priority), target)| Rule { source, priority, target })
|
||||
.labelled("rule")
|
||||
}
|
||||
@@ -1,20 +1,25 @@
|
||||
use std::collections::HashSet;
|
||||
use std::fs::File;
|
||||
use std::iter;
|
||||
|
||||
use super::expression::Expr;
|
||||
use crate::{enum_parser, Expr, Clause};
|
||||
use crate::utils::BoxedIter;
|
||||
|
||||
use super::expression::xpr_parser;
|
||||
use super::import;
|
||||
use super::misc;
|
||||
use super::substitution::substitution_parser;
|
||||
use super::substitution::Substitution;
|
||||
use super::import::import_parser;
|
||||
use super::lexer::Lexeme;
|
||||
use super::name;
|
||||
use chumsky::{Parser, prelude::*};
|
||||
use ordered_float::NotNan;
|
||||
|
||||
/// Anything we might encounter in a file
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum FileEntry {
|
||||
Import(Vec<import::Import>),
|
||||
Comment(String),
|
||||
Substitution(Substitution),
|
||||
Export(Substitution)
|
||||
Rule(Vec<Expr>, NotNan<f64>, Vec<Expr>),
|
||||
Export(Vec<Expr>, NotNan<f64>, Vec<Expr>)
|
||||
}
|
||||
|
||||
/// Recursively iterate through all "names" in an expression. It also finds a lot of things that
|
||||
@@ -22,19 +27,22 @@ pub enum FileEntry {
|
||||
/// sophisticated search.
|
||||
///
|
||||
/// TODO: find a way to exclude parameters
|
||||
fn find_all_names_recur(expr: &Expr) -> Box<dyn Iterator<Item = &Vec<String>> + '_> {
|
||||
match expr {
|
||||
Expr::Auto(_, typ, body) | Expr::Lambda(_, typ, body) => Box::new(match typ {
|
||||
Some(texp) => find_all_names_recur(texp),
|
||||
None => Box::new(iter::empty())
|
||||
}.chain(body.into_iter().map(find_all_names_recur).flatten())),
|
||||
Expr::S(body) => Box::new(body.into_iter().map(find_all_names_recur).flatten()),
|
||||
Expr::Typed(val, typ) => Box::new(
|
||||
find_all_names_recur(val).chain(find_all_names_recur(typ))
|
||||
fn find_all_names_recur<'a>(expr: &'a Expr) -> BoxedIter<&'a Vec<String>> {
|
||||
let proc_clause = |clause: &'a Clause| match clause {
|
||||
Clause::Auto(_, typ, body) | Clause::Lambda(_, typ, body) => Box::new(
|
||||
typ.iter().flat_map(find_all_names_recur)
|
||||
.chain(body.iter().flat_map(find_all_names_recur))
|
||||
) as BoxedIter<&'a Vec<String>>,
|
||||
Clause::S(_, body) => Box::new(
|
||||
body.iter().flat_map(find_all_names_recur)
|
||||
),
|
||||
Expr::Name(x) => Box::new(iter::once(x)),
|
||||
Clause::Name(x) => Box::new(iter::once(x)),
|
||||
_ => Box::new(iter::empty())
|
||||
}
|
||||
};
|
||||
let Expr(val, typ) = expr;
|
||||
if let Some(t) = typ {
|
||||
Box::new(proc_clause(val).chain(find_all_names_recur(t)))
|
||||
} else { proc_clause(val) }
|
||||
}
|
||||
|
||||
/// Collect all names that occur in an expression
|
||||
@@ -42,62 +50,69 @@ fn find_all_names(expr: &Expr) -> HashSet<&Vec<String>> {
|
||||
find_all_names_recur(expr).collect()
|
||||
}
|
||||
|
||||
/// Parse a file into a list of distinctive entries
|
||||
pub fn file_parser<'a>(
|
||||
pattern_ops: &[&'a str], ops: &[&'a str]
|
||||
) -> impl Parser<char, Vec<FileEntry>, Error = Simple<char>> + 'a {
|
||||
choice((
|
||||
// In case the usercode wants to parse doc
|
||||
misc::comment_parser().map(FileEntry::Comment),
|
||||
import::import_parser().map(FileEntry::Import),
|
||||
text::keyword("export")
|
||||
.ignore_then(substitution_parser(pattern_ops, ops)).map(FileEntry::Export),
|
||||
// This could match almost anything so it has to go last
|
||||
substitution_parser(pattern_ops, ops).map(FileEntry::Substitution)
|
||||
)).padded()
|
||||
.separated_by(just('\n'))
|
||||
.then_ignore(end())
|
||||
fn rule_parser() -> impl Parser<Lexeme, (Vec<Expr>, NotNan<f64>, Vec<Expr>), Error = Simple<Lexeme>> {
|
||||
xpr_parser().repeated()
|
||||
.then(enum_parser!(Lexeme::Rule))
|
||||
.then(xpr_parser().repeated())
|
||||
// .map(|((lhs, prio), rhs)| )
|
||||
.map(|((a, b), c)| (a, b, c))
|
||||
.labelled("Rule")
|
||||
}
|
||||
|
||||
/// Decide if a string can be an operator. Operators can include digits and text, just not at the
|
||||
/// start.
|
||||
pub fn is_op(s: &str) -> bool {
|
||||
return match s.chars().next() {
|
||||
Some(x) => !x.is_alphanumeric(),
|
||||
None => false
|
||||
}
|
||||
pub fn line_parser() -> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> {
|
||||
choice((
|
||||
// In case the usercode wants to parse doc
|
||||
enum_parser!(Lexeme >> FileEntry; Comment),
|
||||
just(Lexeme::name("import"))
|
||||
.ignore_then(import_parser().map(FileEntry::Import))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment)),
|
||||
just(Lexeme::name("export")).map_err_with_span(|e, s| {
|
||||
println!("{:?} could not yield an export", s); e
|
||||
})
|
||||
.ignore_then(rule_parser())
|
||||
.map(|(lhs, prio, rhs)| FileEntry::Export(lhs, prio, rhs)),
|
||||
// This could match almost anything so it has to go last
|
||||
rule_parser().map(|(lhs, prio, rhs)| FileEntry::Rule(lhs, prio, rhs)),
|
||||
))
|
||||
}
|
||||
|
||||
/// Collect all exported names (and a lot of other words) from a file
|
||||
pub fn exported_names(src: &Vec<FileEntry>) -> HashSet<&Vec<String>> {
|
||||
src.iter().filter_map(|ent| match ent {
|
||||
FileEntry::Export(a) => Some(&a.source),
|
||||
_ => None
|
||||
src.iter().flat_map(|ent| match ent {
|
||||
FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())) as BoxedIter<&Expr>,
|
||||
_ => Box::new(iter::empty())
|
||||
}).map(find_all_names).flatten().collect()
|
||||
}
|
||||
|
||||
|
||||
// #[allow(dead_code)]
|
||||
/// Collect all operators defined in a file (and some other words)
|
||||
fn defined_ops(src: &Vec<FileEntry>, exported_only: bool) -> Vec<&String> {
|
||||
let all_names:HashSet<&Vec<String>> = src.iter().filter_map(|ent| match ent {
|
||||
FileEntry::Substitution(a) => if exported_only {None} else {Some(&a.source)},
|
||||
FileEntry::Export(a) => Some(&a.source),
|
||||
_ => None
|
||||
let all_names:HashSet<&Vec<String>> = src.iter().flat_map(|ent| match ent {
|
||||
FileEntry::Rule(s, _, d) =>
|
||||
if exported_only {Box::new(iter::empty()) as BoxedIter<&Expr>}
|
||||
else {Box::new(s.iter().chain(d.iter()))}
|
||||
FileEntry::Export(s, _, d) => Box::new(s.iter().chain(d.iter())),
|
||||
_ => Box::new(iter::empty())
|
||||
}).map(find_all_names).flatten().collect();
|
||||
// Dedupe stage of dubious value; collecting into a hashset may take longer than
|
||||
// handling duplicates would with a file of sensible size.
|
||||
all_names.into_iter()
|
||||
.filter_map(|name|
|
||||
// If it's namespaced, it's imported.
|
||||
if name.len() == 1 && is_op(&name[0]) {Some(&name[0])}
|
||||
if name.len() == 1 && name::is_op(&name[0]) {Some(&name[0])}
|
||||
else {None}
|
||||
).collect()
|
||||
}
|
||||
|
||||
// #[allow(dead_code)]
|
||||
/// Collect all operators from a file
|
||||
pub fn all_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, false) }
|
||||
// #[allow(dead_code)]
|
||||
/// Collect exported operators from a file (plus some extra)
|
||||
pub fn exported_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, true) }
|
||||
|
||||
|
||||
/// Summarize all imports from a file in a single list of qualified names
|
||||
pub fn imports<'a, 'b, I>(
|
||||
src: I
|
||||
|
||||
Reference in New Issue
Block a user