forked from Orchid/orchid
transfer commit
This commit is contained in:
@@ -2,12 +2,12 @@ pub use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
/// Parses Lua-style comments
|
||||
pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
||||
choice((
|
||||
just("--[").ignore_then(take_until(
|
||||
just("]--").ignored()
|
||||
)),
|
||||
just("--").ignore_then(take_until(
|
||||
just("\n").rewind().ignored().or(end())
|
||||
))
|
||||
)).map(|(vc, ())| vc).collect().labelled("comment")
|
||||
choice((
|
||||
just("--[").ignore_then(take_until(
|
||||
just("]--").ignored()
|
||||
)),
|
||||
just("--").ignore_then(take_until(
|
||||
just("\n").rewind().ignored().or(end())
|
||||
))
|
||||
)).map(|(vc, ())| vc).collect().labelled("comment")
|
||||
}
|
||||
|
||||
@@ -6,27 +6,27 @@
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! enum_parser {
|
||||
($p:path | $m:tt) => {
|
||||
{
|
||||
::chumsky::prelude::filter_map(|s, l| {
|
||||
if let $p(x) = l { Ok(x) }
|
||||
else { Err(::chumsky::prelude::Simple::custom(s, $m))}
|
||||
})
|
||||
}
|
||||
};
|
||||
($p:path >> $q:path; $i:ident) => {
|
||||
{
|
||||
use $p as srcpath;
|
||||
use $q as tgtpath;
|
||||
enum_parser!(srcpath::$i | (concat!("Expected ", stringify!($i)))).map(tgtpath::$i)
|
||||
}
|
||||
};
|
||||
($p:path >> $q:path; $($i:ident),+) => {
|
||||
{
|
||||
::chumsky::prelude::choice((
|
||||
$( enum_parser!($p >> $q; $i) ),+
|
||||
))
|
||||
}
|
||||
};
|
||||
($p:path) => { enum_parser!($p | (concat!("Expected ", stringify!($p)))) };
|
||||
($p:path | $m:tt) => {
|
||||
{
|
||||
::chumsky::prelude::filter_map(|s, l| {
|
||||
if let $p(x) = l { Ok(x) }
|
||||
else { Err(::chumsky::prelude::Simple::custom(s, $m))}
|
||||
})
|
||||
}
|
||||
};
|
||||
($p:path >> $q:path; $i:ident) => {
|
||||
{
|
||||
use $p as srcpath;
|
||||
use $q as tgtpath;
|
||||
enum_parser!(srcpath::$i | (concat!("Expected ", stringify!($i)))).map(tgtpath::$i)
|
||||
}
|
||||
};
|
||||
($p:path >> $q:path; $($i:ident),+) => {
|
||||
{
|
||||
::chumsky::prelude::choice((
|
||||
$( enum_parser!($p >> $q; $i) ),+
|
||||
))
|
||||
}
|
||||
};
|
||||
($p:path) => { enum_parser!($p | (concat!("Expected ", stringify!($p)))) };
|
||||
}
|
||||
@@ -8,120 +8,120 @@ use super::lexer::Lexeme;
|
||||
|
||||
/// Parses any number of expr wrapped in (), [] or {}
|
||||
fn sexpr_parser<P>(
|
||||
expr: P
|
||||
expr: P
|
||||
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
|
||||
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
|
||||
Lexeme::paren_parser(expr.repeated()).map(|(del, b)| Clause::S(del, to_mrc_slice(b)))
|
||||
Lexeme::paren_parser(expr.repeated()).map(|(del, b)| Clause::S(del, to_mrc_slice(b)))
|
||||
}
|
||||
|
||||
/// Parses `\name.body` or `\name:type.body` where name is any valid name and type and body are
|
||||
/// both expressions. Comments are allowed and ignored everywhere in between the tokens
|
||||
fn lambda_parser<P>(
|
||||
expr: P
|
||||
expr: P
|
||||
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
|
||||
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
|
||||
just(Lexeme::BS)
|
||||
just(Lexeme::BS)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(enum_parser!(Lexeme::Name))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(
|
||||
just(Lexeme::Type)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(enum_parser!(Lexeme::Name))
|
||||
.ignore_then(expr.clone().repeated())
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(
|
||||
just(Lexeme::Type)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(expr.clone().repeated())
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.or_not().map(Option::unwrap_or_default)
|
||||
)
|
||||
.then_ignore(just(Lexeme::name(".")))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(expr.repeated().at_least(1))
|
||||
.map(|((name, typ), body): ((String, Vec<Expr>), Vec<Expr>)| {
|
||||
// for ent in &mut body { ent.bind_parameter(&name) };
|
||||
Clause::Lambda(name, to_mrc_slice(typ), to_mrc_slice(body))
|
||||
})
|
||||
.or_not().map(Option::unwrap_or_default)
|
||||
)
|
||||
.then_ignore(just(Lexeme::name(".")))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(expr.repeated().at_least(1))
|
||||
.map(|((name, typ), body): ((String, Vec<Expr>), Vec<Expr>)| {
|
||||
// for ent in &mut body { ent.bind_parameter(&name) };
|
||||
Clause::Lambda(name, to_mrc_slice(typ), to_mrc_slice(body))
|
||||
})
|
||||
}
|
||||
|
||||
/// see [lambda_parser] but `@` instead of `\` and the name is optional
|
||||
fn auto_parser<P>(
|
||||
expr: P
|
||||
expr: P
|
||||
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
|
||||
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
|
||||
just(Lexeme::At)
|
||||
just(Lexeme::At)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(enum_parser!(Lexeme::Name).or_not())
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(
|
||||
just(Lexeme::Type)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(enum_parser!(Lexeme::Name).or_not())
|
||||
.ignore_then(expr.clone().repeated())
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(
|
||||
just(Lexeme::Type)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(expr.clone().repeated())
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.or_not().map(Option::unwrap_or_default)
|
||||
)
|
||||
.then_ignore(just(Lexeme::name(".")))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(expr.repeated().at_least(1))
|
||||
.try_map(|((name, typ), body): ((Option<String>, Vec<Expr>), Vec<Expr>), s| {
|
||||
if name.is_none() && typ.is_empty() {
|
||||
Err(Simple::custom(s, "Auto without name or type has no effect"))
|
||||
} else {
|
||||
Ok(Clause::Auto(name, to_mrc_slice(typ), to_mrc_slice(body)))
|
||||
}
|
||||
})
|
||||
.or_not().map(Option::unwrap_or_default)
|
||||
)
|
||||
.then_ignore(just(Lexeme::name(".")))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(expr.repeated().at_least(1))
|
||||
.try_map(|((name, typ), body): ((Option<String>, Vec<Expr>), Vec<Expr>), s| {
|
||||
if name.is_none() && typ.is_empty() {
|
||||
Err(Simple::custom(s, "Auto without name or type has no effect"))
|
||||
} else {
|
||||
Ok(Clause::Auto(name, to_mrc_slice(typ), to_mrc_slice(body)))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Parses a sequence of names separated by :: <br/>
|
||||
/// Comments are allowed and ignored in between
|
||||
fn name_parser() -> impl Parser<Lexeme, Vec<String>, Error = Simple<Lexeme>> + Clone {
|
||||
enum_parser!(Lexeme::Name).separated_by(
|
||||
enum_parser!(Lexeme::Comment).repeated()
|
||||
.then(just(Lexeme::NS))
|
||||
.then(enum_parser!(Lexeme::Comment).repeated())
|
||||
).at_least(1)
|
||||
enum_parser!(Lexeme::Name).separated_by(
|
||||
enum_parser!(Lexeme::Comment).repeated()
|
||||
.then(just(Lexeme::NS))
|
||||
.then(enum_parser!(Lexeme::Comment).repeated())
|
||||
).at_least(1)
|
||||
}
|
||||
|
||||
/// Parse any legal argument name starting with a `$`
|
||||
fn placeholder_parser() -> impl Parser<Lexeme, String, Error = Simple<Lexeme>> + Clone {
|
||||
enum_parser!(Lexeme::Name).try_map(|name, span| {
|
||||
name.strip_prefix('$').map(&str::to_string)
|
||||
.ok_or_else(|| Simple::custom(span, "Not a placeholder"))
|
||||
})
|
||||
enum_parser!(Lexeme::Name).try_map(|name, span| {
|
||||
name.strip_prefix('$').map(&str::to_string)
|
||||
.ok_or_else(|| Simple::custom(span, "Not a placeholder"))
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse an expression
|
||||
pub fn xpr_parser() -> impl Parser<Lexeme, Expr, Error = Simple<Lexeme>> {
|
||||
recursive(|expr| {
|
||||
let clause =
|
||||
enum_parser!(Lexeme::Comment).repeated()
|
||||
.ignore_then(choice((
|
||||
enum_parser!(Lexeme >> Literal; Int, Num, Char, Str).map(Clause::Literal),
|
||||
placeholder_parser().map(|key| Clause::Placeh{key, vec: None}),
|
||||
just(Lexeme::name("...")).to(true)
|
||||
.or(just(Lexeme::name("..")).to(false))
|
||||
.then(placeholder_parser())
|
||||
.then(
|
||||
just(Lexeme::Type)
|
||||
.ignore_then(enum_parser!(Lexeme::Int))
|
||||
.or_not().map(Option::unwrap_or_default)
|
||||
)
|
||||
.map(|((nonzero, key), prio)| Clause::Placeh{key, vec: Some((
|
||||
prio.try_into().unwrap(),
|
||||
nonzero
|
||||
))}),
|
||||
name_parser().map(|qualified| Clause::Name {
|
||||
local: if qualified.len() == 1 {Some(qualified[0].clone())} else {None},
|
||||
qualified: to_mrc_slice(qualified)
|
||||
}),
|
||||
sexpr_parser(expr.clone()),
|
||||
lambda_parser(expr.clone()),
|
||||
auto_parser(expr.clone()),
|
||||
just(Lexeme::At).ignore_then(expr.clone()).map(|arg| {
|
||||
Clause::Explicit(Mrc::new(arg))
|
||||
})
|
||||
))).then_ignore(enum_parser!(Lexeme::Comment).repeated());
|
||||
clause.clone().then(
|
||||
just(Lexeme::Type)
|
||||
.ignore_then(clause.clone())
|
||||
.repeated()
|
||||
recursive(|expr| {
|
||||
let clause =
|
||||
enum_parser!(Lexeme::Comment).repeated()
|
||||
.ignore_then(choice((
|
||||
enum_parser!(Lexeme >> Literal; Int, Num, Char, Str).map(Clause::Literal),
|
||||
placeholder_parser().map(|key| Clause::Placeh{key, vec: None}),
|
||||
just(Lexeme::name("...")).to(true)
|
||||
.or(just(Lexeme::name("..")).to(false))
|
||||
.then(placeholder_parser())
|
||||
.then(
|
||||
just(Lexeme::Type)
|
||||
.ignore_then(enum_parser!(Lexeme::Int))
|
||||
.or_not().map(Option::unwrap_or_default)
|
||||
)
|
||||
.map(|(val, typ)| Expr(val, to_mrc_slice(typ)))
|
||||
}).labelled("Expression")
|
||||
.map(|((nonzero, key), prio)| Clause::Placeh{key, vec: Some((
|
||||
prio.try_into().unwrap(),
|
||||
nonzero
|
||||
))}),
|
||||
name_parser().map(|qualified| Clause::Name {
|
||||
local: if qualified.len() == 1 {Some(qualified[0].clone())} else {None},
|
||||
qualified: to_mrc_slice(qualified)
|
||||
}),
|
||||
sexpr_parser(expr.clone()),
|
||||
lambda_parser(expr.clone()),
|
||||
auto_parser(expr.clone()),
|
||||
just(Lexeme::At).ignore_then(expr.clone()).map(|arg| {
|
||||
Clause::Explicit(Mrc::new(arg))
|
||||
})
|
||||
))).then_ignore(enum_parser!(Lexeme::Comment).repeated());
|
||||
clause.clone().then(
|
||||
just(Lexeme::Type)
|
||||
.ignore_then(clause.clone())
|
||||
.repeated()
|
||||
)
|
||||
.map(|(val, typ)| Expr(val, to_mrc_slice(typ)))
|
||||
}).labelled("Expression")
|
||||
}
|
||||
|
||||
@@ -9,15 +9,15 @@ use super::lexer::Lexeme;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Import {
|
||||
pub path: Mrc<[String]>,
|
||||
/// If name is None, this is a wildcard import
|
||||
pub name: Option<String>
|
||||
pub path: Mrc<[String]>,
|
||||
/// If name is None, this is a wildcard import
|
||||
pub name: Option<String>
|
||||
}
|
||||
|
||||
/// initialize a BoxedIter<BoxedIter<String>> with a single element.
|
||||
fn init_table(name: String) -> BoxedIterIter<'static, String> {
|
||||
// I'm not at all confident that this is a good approach.
|
||||
box_once(box_once(name))
|
||||
// I'm not at all confident that this is a good approach.
|
||||
box_once(box_once(name))
|
||||
}
|
||||
|
||||
/// Parse an import command
|
||||
@@ -26,44 +26,44 @@ fn init_table(name: String) -> BoxedIterIter<'static, String> {
|
||||
/// crossplatform filename-legal characters but the symbols are explicitly allowed
|
||||
/// to go wild. There's a blacklist in [name]
|
||||
pub fn import_parser() -> impl Parser<Lexeme, Vec<Import>, Error = Simple<Lexeme>> {
|
||||
// TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad.
|
||||
recursive(|expr: Recursive<Lexeme, BoxedIterIter<String>, Simple<Lexeme>>| {
|
||||
enum_parser!(Lexeme::Name)
|
||||
.separated_by(just(Lexeme::NS))
|
||||
.then(
|
||||
just(Lexeme::NS)
|
||||
.ignore_then(
|
||||
choice((
|
||||
expr.clone()
|
||||
.separated_by(just(Lexeme::name(",")))
|
||||
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
|
||||
.map(|v| box_flatten(v.into_iter()))
|
||||
.labelled("import group"),
|
||||
// Each expr returns a list of imports, flatten those into a common list
|
||||
just(Lexeme::name("*")).map(|_| init_table("*".to_string()))
|
||||
.labelled("wildcard import"), // Just a *, wrapped
|
||||
enum_parser!(Lexeme::Name).map(init_table)
|
||||
.labelled("import terminal") // Just a name, wrapped
|
||||
))
|
||||
).or_not()
|
||||
)
|
||||
.map(|(name, opt_post): (Vec<String>, Option<BoxedIterIter<String>>)| -> BoxedIterIter<String> {
|
||||
if let Some(post) = opt_post {
|
||||
Box::new(post.map(move |el| {
|
||||
box_chain!(name.clone().into_iter(), el)
|
||||
}))
|
||||
} else {
|
||||
box_once(into_boxed_iter(name))
|
||||
}
|
||||
})
|
||||
}).map(|paths| {
|
||||
paths.filter_map(|namespaces| {
|
||||
let path = to_mrc_slice(namespaces.collect_vec());
|
||||
let path_prefix = mrc_derive(&path, |p| &p[..p.len() - 1]);
|
||||
match path.last()?.as_str() {
|
||||
"*" => Some(Import { path: path_prefix, name: None }),
|
||||
name => Some(Import { path: path_prefix, name: Some(name.to_owned()) })
|
||||
}
|
||||
}).collect()
|
||||
}).labelled("import")
|
||||
// TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad.
|
||||
recursive(|expr: Recursive<Lexeme, BoxedIterIter<String>, Simple<Lexeme>>| {
|
||||
enum_parser!(Lexeme::Name)
|
||||
.separated_by(just(Lexeme::NS))
|
||||
.then(
|
||||
just(Lexeme::NS)
|
||||
.ignore_then(
|
||||
choice((
|
||||
expr.clone()
|
||||
.separated_by(just(Lexeme::name(",")))
|
||||
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
|
||||
.map(|v| box_flatten(v.into_iter()))
|
||||
.labelled("import group"),
|
||||
// Each expr returns a list of imports, flatten those into a common list
|
||||
just(Lexeme::name("*")).map(|_| init_table("*".to_string()))
|
||||
.labelled("wildcard import"), // Just a *, wrapped
|
||||
enum_parser!(Lexeme::Name).map(init_table)
|
||||
.labelled("import terminal") // Just a name, wrapped
|
||||
))
|
||||
).or_not()
|
||||
)
|
||||
.map(|(name, opt_post): (Vec<String>, Option<BoxedIterIter<String>>)| -> BoxedIterIter<String> {
|
||||
if let Some(post) = opt_post {
|
||||
Box::new(post.map(move |el| {
|
||||
box_chain!(name.clone().into_iter(), el)
|
||||
}))
|
||||
} else {
|
||||
box_once(into_boxed_iter(name))
|
||||
}
|
||||
})
|
||||
}).map(|paths| {
|
||||
paths.filter_map(|namespaces| {
|
||||
let path = to_mrc_slice(namespaces.collect_vec());
|
||||
let path_prefix = mrc_derive(&path, |p| &p[..p.len() - 1]);
|
||||
match path.last()?.as_str() {
|
||||
"*" => Some(Import { path: path_prefix, name: None }),
|
||||
name => Some(Import { path: path_prefix, name: Some(name.to_owned()) })
|
||||
}
|
||||
}).collect()
|
||||
}).labelled("import")
|
||||
}
|
||||
|
||||
@@ -9,141 +9,141 @@ use super::{number, string, name, comment};
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Entry(pub Lexeme, pub Range<usize>);
|
||||
impl Debug for Entry {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self.0)
|
||||
// f.debug_tuple("Entry").field(&self.0).field(&self.1).finish()
|
||||
}
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self.0)
|
||||
// f.debug_tuple("Entry").field(&self.0).field(&self.1).finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Entry> for (Lexeme, Range<usize>) {
|
||||
fn from(ent: Entry) -> Self {
|
||||
(ent.0, ent.1)
|
||||
}
|
||||
fn from(ent: Entry) -> Self {
|
||||
(ent.0, ent.1)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Lexeme {
|
||||
Num(NotNan<f64>),
|
||||
Int(u64),
|
||||
Char(char),
|
||||
Str(String),
|
||||
Name(String),
|
||||
Rule(NotNan<f64>),
|
||||
NS, // namespace separator
|
||||
LP(char),
|
||||
RP(char),
|
||||
BS, // Backslash
|
||||
At,
|
||||
Type, // type operator
|
||||
Comment(String)
|
||||
Num(NotNan<f64>),
|
||||
Int(u64),
|
||||
Char(char),
|
||||
Str(String),
|
||||
Name(String),
|
||||
Rule(NotNan<f64>),
|
||||
NS, // namespace separator
|
||||
LP(char),
|
||||
RP(char),
|
||||
BS, // Backslash
|
||||
At,
|
||||
Type, // type operator
|
||||
Comment(String)
|
||||
}
|
||||
|
||||
impl Debug for Lexeme {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Num(n) => write!(f, "{}", n),
|
||||
Self::Int(i) => write!(f, "{}", i),
|
||||
Self::Char(c) => write!(f, "{:?}", c),
|
||||
Self::Str(s) => write!(f, "{:?}", s),
|
||||
Self::Name(name) => write!(f, "{}", name),
|
||||
Self::Rule(prio) => write!(f, "={}=>", prio),
|
||||
Self::NS => write!(f, "::"),
|
||||
Self::LP(l) => write!(f, "{}", l),
|
||||
Self::RP(l) => match l {
|
||||
'(' => write!(f, ")"),
|
||||
'[' => write!(f, "]"),
|
||||
'{' => write!(f, "}}"),
|
||||
_ => f.debug_tuple("RP").field(l).finish()
|
||||
},
|
||||
Self::BS => write!(f, "\\"),
|
||||
Self::At => write!(f, "@"),
|
||||
Self::Type => write!(f, ":"),
|
||||
Self::Comment(text) => write!(f, "--[{}]--", text),
|
||||
}
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Num(n) => write!(f, "{}", n),
|
||||
Self::Int(i) => write!(f, "{}", i),
|
||||
Self::Char(c) => write!(f, "{:?}", c),
|
||||
Self::Str(s) => write!(f, "{:?}", s),
|
||||
Self::Name(name) => write!(f, "{}", name),
|
||||
Self::Rule(prio) => write!(f, "={}=>", prio),
|
||||
Self::NS => write!(f, "::"),
|
||||
Self::LP(l) => write!(f, "{}", l),
|
||||
Self::RP(l) => match l {
|
||||
'(' => write!(f, ")"),
|
||||
'[' => write!(f, "]"),
|
||||
'{' => write!(f, "}}"),
|
||||
_ => f.debug_tuple("RP").field(l).finish()
|
||||
},
|
||||
Self::BS => write!(f, "\\"),
|
||||
Self::At => write!(f, "@"),
|
||||
Self::Type => write!(f, ":"),
|
||||
Self::Comment(text) => write!(f, "--[{}]--", text),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Lexeme {
|
||||
pub fn name<T: ToString>(n: T) -> Self {
|
||||
Lexeme::Name(n.to_string())
|
||||
}
|
||||
pub fn rule<T>(prio: T) -> Self where T: Into<f64> {
|
||||
Lexeme::Rule(NotNan::new(prio.into()).expect("Rule priority cannot be NaN"))
|
||||
}
|
||||
pub fn paren_parser<T, P>(
|
||||
expr: P
|
||||
) -> impl Parser<Lexeme, (char, T), Error = Simple<Lexeme>> + Clone
|
||||
where P: Parser<Lexeme, T, Error = Simple<Lexeme>> + Clone {
|
||||
choice((
|
||||
expr.clone().delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
|
||||
.map(|t| ('(', t)),
|
||||
expr.clone().delimited_by(just(Lexeme::LP('[')), just(Lexeme::RP('[')))
|
||||
.map(|t| ('[', t)),
|
||||
expr.delimited_by(just(Lexeme::LP('{')), just(Lexeme::RP('{')))
|
||||
.map(|t| ('{', t)),
|
||||
))
|
||||
}
|
||||
pub fn name<T: ToString>(n: T) -> Self {
|
||||
Lexeme::Name(n.to_string())
|
||||
}
|
||||
pub fn rule<T>(prio: T) -> Self where T: Into<f64> {
|
||||
Lexeme::Rule(NotNan::new(prio.into()).expect("Rule priority cannot be NaN"))
|
||||
}
|
||||
pub fn paren_parser<T, P>(
|
||||
expr: P
|
||||
) -> impl Parser<Lexeme, (char, T), Error = Simple<Lexeme>> + Clone
|
||||
where P: Parser<Lexeme, T, Error = Simple<Lexeme>> + Clone {
|
||||
choice((
|
||||
expr.clone().delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
|
||||
.map(|t| ('(', t)),
|
||||
expr.clone().delimited_by(just(Lexeme::LP('[')), just(Lexeme::RP('[')))
|
||||
.map(|t| ('[', t)),
|
||||
expr.delimited_by(just(Lexeme::LP('{')), just(Lexeme::RP('{')))
|
||||
.map(|t| ('{', t)),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct LexedText(pub Vec<Vec<Entry>>);
|
||||
|
||||
impl Debug for LexedText {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
for row in &self.0 {
|
||||
for tok in row {
|
||||
tok.fmt(f)?;
|
||||
f.write_str(" ")?
|
||||
}
|
||||
f.write_str("\n")?
|
||||
}
|
||||
Ok(())
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
for row in &self.0 {
|
||||
for tok in row {
|
||||
tok.fmt(f)?;
|
||||
f.write_str(" ")?
|
||||
}
|
||||
f.write_str("\n")?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
type LexSubres<'a> = BoxedIter<'a, Entry>;
|
||||
|
||||
fn paren_parser<'a>(
|
||||
expr: Recursive<'a, char, LexSubres<'a>, Simple<char>>,
|
||||
lp: char, rp: char
|
||||
expr: Recursive<'a, char, LexSubres<'a>, Simple<char>>,
|
||||
lp: char, rp: char
|
||||
) -> impl Parser<char, LexSubres<'a>, Error=Simple<char>> + 'a {
|
||||
expr.padded().repeated()
|
||||
.map(|x| box_flatten(x.into_iter()))
|
||||
.delimited_by(just(lp), just(rp)).map_with_span(move |b, s| {
|
||||
box_chain!(
|
||||
iter::once(Entry(Lexeme::LP(lp), s.start..s.start+1)),
|
||||
b,
|
||||
iter::once(Entry(Lexeme::RP(lp), s.end-1..s.end))
|
||||
)
|
||||
})
|
||||
expr.padded().repeated()
|
||||
.map(|x| box_flatten(x.into_iter()))
|
||||
.delimited_by(just(lp), just(rp)).map_with_span(move |b, s| {
|
||||
box_chain!(
|
||||
iter::once(Entry(Lexeme::LP(lp), s.start..s.start+1)),
|
||||
b,
|
||||
iter::once(Entry(Lexeme::RP(lp), s.end-1..s.end))
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, LexedText, Error=Simple<char>> + 'a
|
||||
where T: AsRef<str> + Clone {
|
||||
let all_ops = ops.iter().map(|o| o.as_ref().to_string())
|
||||
.chain(iter::once(".".to_string())).collect::<Vec<_>>();
|
||||
recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
|
||||
choice((
|
||||
paren_parser(recurse.clone(), '(', ')'),
|
||||
paren_parser(recurse.clone(), '[', ']'),
|
||||
paren_parser(recurse.clone(), '{', '}'),
|
||||
choice((
|
||||
just(":=").padded().to(Lexeme::rule(0f64)),
|
||||
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
|
||||
comment::comment_parser().map(Lexeme::Comment),
|
||||
just("::").padded().to(Lexeme::NS),
|
||||
just('\\').padded().to(Lexeme::BS),
|
||||
just('@').padded().to(Lexeme::At),
|
||||
just(':').to(Lexeme::Type),
|
||||
number::int_parser().map(Lexeme::Int), // all ints are valid floats so it takes precedence
|
||||
number::float_parser().map(Lexeme::Num),
|
||||
string::char_parser().map(Lexeme::Char),
|
||||
string::str_parser().map(Lexeme::Str),
|
||||
name::name_parser(&all_ops).map(Lexeme::Name), // includes namespacing
|
||||
)).map_with_span(|lx, span| box_once(Entry(lx, span)) as LexSubres)
|
||||
))
|
||||
}).separated_by(one_of("\t ").repeated())
|
||||
.flatten().collect()
|
||||
.separated_by(just('\n').then(text::whitespace()).ignored())
|
||||
.map(LexedText)
|
||||
let all_ops = ops.iter().map(|o| o.as_ref().to_string())
|
||||
.chain(iter::once(".".to_string())).collect::<Vec<_>>();
|
||||
recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
|
||||
choice((
|
||||
paren_parser(recurse.clone(), '(', ')'),
|
||||
paren_parser(recurse.clone(), '[', ']'),
|
||||
paren_parser(recurse.clone(), '{', '}'),
|
||||
choice((
|
||||
just(":=").padded().to(Lexeme::rule(0f64)),
|
||||
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
|
||||
comment::comment_parser().map(Lexeme::Comment),
|
||||
just("::").padded().to(Lexeme::NS),
|
||||
just('\\').padded().to(Lexeme::BS),
|
||||
just('@').padded().to(Lexeme::At),
|
||||
just(':').to(Lexeme::Type),
|
||||
number::int_parser().map(Lexeme::Int), // all ints are valid floats so it takes precedence
|
||||
number::float_parser().map(Lexeme::Num),
|
||||
string::char_parser().map(Lexeme::Char),
|
||||
string::str_parser().map(Lexeme::Str),
|
||||
name::name_parser(&all_ops).map(Lexeme::Name), // includes namespacing
|
||||
)).map_with_span(|lx, span| box_once(Entry(lx, span)) as LexSubres)
|
||||
))
|
||||
}).separated_by(one_of("\t ").repeated())
|
||||
.flatten().collect()
|
||||
.separated_by(just('\n').then(text::whitespace()).ignored())
|
||||
.map(LexedText)
|
||||
}
|
||||
|
||||
@@ -2,13 +2,13 @@ use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
/// Matches any one of the passed operators, longest-first
|
||||
fn op_parser<'a, T: AsRef<str> + Clone>(ops: &[T]) -> BoxedParser<'a, char, String, Simple<char>> {
|
||||
let mut sorted_ops: Vec<String> = ops.iter().map(|t| t.as_ref().to_string()).collect();
|
||||
sorted_ops.sort_by_key(|op| -(op.len() as i64));
|
||||
sorted_ops.into_iter()
|
||||
.map(|op| just(op).boxed())
|
||||
.reduce(|a, b| a.or(b).boxed())
|
||||
.unwrap_or_else(|| empty().map(|()| panic!("Empty isn't meant to match")).boxed())
|
||||
.labelled("operator").boxed()
|
||||
let mut sorted_ops: Vec<String> = ops.iter().map(|t| t.as_ref().to_string()).collect();
|
||||
sorted_ops.sort_by_key(|op| -(op.len() as i64));
|
||||
sorted_ops.into_iter()
|
||||
.map(|op| just(op).boxed())
|
||||
.reduce(|a, b| a.or(b).boxed())
|
||||
.unwrap_or_else(|| empty().map(|()| panic!("Empty isn't meant to match")).boxed())
|
||||
.labelled("operator").boxed()
|
||||
}
|
||||
|
||||
/// Matches anything that's allowed as an operator
|
||||
@@ -30,31 +30,31 @@ fn op_parser<'a, T: AsRef<str> + Clone>(ops: &[T]) -> BoxedParser<'a, char, Stri
|
||||
/// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very
|
||||
/// common in maths so it's worth a try. Investigate.
|
||||
pub fn modname_parser<'a>() -> impl Parser<char, String, Error = Simple<char>> + 'a {
|
||||
let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', '[', ']', '{', '}', ',', '.'];
|
||||
filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
|
||||
.repeated().at_least(1)
|
||||
.collect()
|
||||
.labelled("modname")
|
||||
let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', '[', ']', '{', '}', ',', '.'];
|
||||
filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
|
||||
.repeated().at_least(1)
|
||||
.collect()
|
||||
.labelled("modname")
|
||||
}
|
||||
|
||||
/// Parse an operator or name. Failing both, parse everything up to the next whitespace or
|
||||
/// blacklisted character as a new operator.
|
||||
pub fn name_parser<'a, T: AsRef<str> + Clone>(
|
||||
ops: &[T]
|
||||
ops: &[T]
|
||||
) -> impl Parser<char, String, Error = Simple<char>> + 'a {
|
||||
choice((
|
||||
op_parser(ops), // First try to parse a known operator
|
||||
text::ident().labelled("plain text"), // Failing that, parse plain text
|
||||
modname_parser() // Finally parse everything until tne next terminal as a new operator
|
||||
))
|
||||
.labelled("name")
|
||||
choice((
|
||||
op_parser(ops), // First try to parse a known operator
|
||||
text::ident().labelled("plain text"), // Failing that, parse plain text
|
||||
modname_parser() // Finally parse everything until tne next terminal as a new operator
|
||||
))
|
||||
.labelled("name")
|
||||
}
|
||||
|
||||
/// Decide if a string can be an operator. Operators can include digits and text, just not at the
|
||||
/// start.
|
||||
pub fn is_op<T: AsRef<str>>(s: T) -> bool {
|
||||
return match s.as_ref().chars().next() {
|
||||
Some(x) => !x.is_alphanumeric(),
|
||||
None => false
|
||||
}
|
||||
return match s.as_ref().chars().next() {
|
||||
Some(x) => !x.is_alphanumeric(),
|
||||
None => false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,111 +2,111 @@ use chumsky::{self, prelude::*, Parser};
|
||||
use ordered_float::NotNan;
|
||||
|
||||
fn assert_not_digit(base: u32, c: char) {
|
||||
if base > (10 + (c as u32 - 'a' as u32)) {
|
||||
panic!("The character '{}' is a digit in base ({})", c, base)
|
||||
}
|
||||
if base > (10 + (c as u32 - 'a' as u32)) {
|
||||
panic!("The character '{}' is a digit in base ({})", c, base)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse an arbitrarily grouped sequence of digits starting with an underscore.
|
||||
///
|
||||
/// TODO: this should use separated_by and parse the leading group too
|
||||
fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simple<char>> {
|
||||
just('_')
|
||||
.ignore_then(text::digits(base))
|
||||
.repeated()
|
||||
.map(|sv| sv.iter().flat_map(|s| s.chars()).collect())
|
||||
just('_')
|
||||
.ignore_then(text::digits(base))
|
||||
.repeated()
|
||||
.map(|sv| sv.iter().flat_map(|s| s.chars()).collect())
|
||||
}
|
||||
|
||||
/// parse a grouped uint
|
||||
///
|
||||
/// Not to be confused with [int_parser] which does a lot more
|
||||
fn uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
text::int(base)
|
||||
.then(separated_digits_parser(base))
|
||||
.map(move |(s1, s2): (String, String)| {
|
||||
u64::from_str_radix(&(s1 + &s2), base).unwrap()
|
||||
})
|
||||
text::int(base)
|
||||
.then(separated_digits_parser(base))
|
||||
.map(move |(s1, s2): (String, String)| {
|
||||
u64::from_str_radix(&(s1 + &s2), base).unwrap()
|
||||
})
|
||||
}
|
||||
|
||||
/// parse exponent notation, or return 0 as the default exponent.
|
||||
/// The exponent is always in decimal.
|
||||
fn pow_parser() -> impl Parser<char, i32, Error = Simple<char>> {
|
||||
choice((
|
||||
just('p')
|
||||
.ignore_then(text::int(10))
|
||||
.map(|s: String| s.parse().unwrap()),
|
||||
just("p-")
|
||||
.ignore_then(text::int(10))
|
||||
.map(|s: String| -s.parse::<i32>().unwrap()),
|
||||
)).or_else(|_| Ok(0))
|
||||
choice((
|
||||
just('p')
|
||||
.ignore_then(text::int(10))
|
||||
.map(|s: String| s.parse().unwrap()),
|
||||
just("p-")
|
||||
.ignore_then(text::int(10))
|
||||
.map(|s: String| -s.parse::<i32>().unwrap()),
|
||||
)).or_else(|_| Ok(0))
|
||||
}
|
||||
|
||||
/// returns a mapper that converts a mantissa and an exponent into an uint
|
||||
///
|
||||
/// TODO it panics if it finds a negative exponent
|
||||
fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
|
||||
move |(val, exp)| {
|
||||
if exp == 0 {val}
|
||||
else {val * base.checked_pow(exp.try_into().unwrap()).unwrap()}
|
||||
}
|
||||
move |(val, exp)| {
|
||||
if exp == 0 {val}
|
||||
else {val * base.checked_pow(exp.try_into().unwrap()).unwrap()}
|
||||
}
|
||||
}
|
||||
|
||||
/// returns a mapper that converts a mantissa and an exponent into a float
|
||||
fn nat2f(base: u64) -> impl Fn((NotNan<f64>, i32),) -> NotNan<f64> {
|
||||
move |(val, exp)| {
|
||||
if exp == 0 {val}
|
||||
else {val * (base as f64).powf(exp.try_into().unwrap())}
|
||||
}
|
||||
move |(val, exp)| {
|
||||
if exp == 0 {val}
|
||||
else {val * (base as f64).powf(exp.try_into().unwrap())}
|
||||
}
|
||||
}
|
||||
|
||||
/// parse an uint from exponential notation (panics if 'p' is a digit in base)
|
||||
fn pow_uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
assert_not_digit(base, 'p');
|
||||
uint_parser(base).then(pow_parser()).map(nat2u(base.into()))
|
||||
assert_not_digit(base, 'p');
|
||||
uint_parser(base).then(pow_parser()).map(nat2u(base.into()))
|
||||
}
|
||||
|
||||
/// parse an uint from a base determined by its prefix or lack thereof
|
||||
///
|
||||
/// Not to be convused with [uint_parser] which is a component of it.
|
||||
pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
choice((
|
||||
just("0b").ignore_then(pow_uint_parser(2)),
|
||||
just("0x").ignore_then(pow_uint_parser(16)),
|
||||
just('0').ignore_then(pow_uint_parser(8)),
|
||||
pow_uint_parser(10), // Dec has no prefix
|
||||
))
|
||||
choice((
|
||||
just("0b").ignore_then(pow_uint_parser(2)),
|
||||
just("0x").ignore_then(pow_uint_parser(16)),
|
||||
just('0').ignore_then(pow_uint_parser(8)),
|
||||
pow_uint_parser(10), // Dec has no prefix
|
||||
))
|
||||
}
|
||||
|
||||
/// parse a float from dot notation
|
||||
fn dotted_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
|
||||
uint_parser(base)
|
||||
.then(
|
||||
just('.').ignore_then(
|
||||
text::digits(base).then(separated_digits_parser(base))
|
||||
).map(move |(frac1, frac2)| {
|
||||
let frac = frac1 + &frac2;
|
||||
let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
|
||||
let dexp = base.pow(frac.len().try_into().unwrap());
|
||||
frac_num / dexp as f64
|
||||
}).or_not().map(|o| o.unwrap_or_default())
|
||||
).try_map(|(wh, f), s| {
|
||||
NotNan::new(wh as f64 + f).map_err(|_| Simple::custom(s, "Float literal evaluates to NaN"))
|
||||
})
|
||||
uint_parser(base)
|
||||
.then(
|
||||
just('.').ignore_then(
|
||||
text::digits(base).then(separated_digits_parser(base))
|
||||
).map(move |(frac1, frac2)| {
|
||||
let frac = frac1 + &frac2;
|
||||
let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
|
||||
let dexp = base.pow(frac.len().try_into().unwrap());
|
||||
frac_num / dexp as f64
|
||||
}).or_not().map(|o| o.unwrap_or_default())
|
||||
).try_map(|(wh, f), s| {
|
||||
NotNan::new(wh as f64 + f).map_err(|_| Simple::custom(s, "Float literal evaluates to NaN"))
|
||||
})
|
||||
}
|
||||
|
||||
/// parse a float from dotted and optionally also exponential notation
|
||||
fn pow_float_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
|
||||
assert_not_digit(base, 'p');
|
||||
dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
|
||||
assert_not_digit(base, 'p');
|
||||
dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
|
||||
}
|
||||
|
||||
/// parse a float with dotted and optionally exponential notation from a base determined by its
|
||||
/// prefix
|
||||
pub fn float_parser() -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
|
||||
choice((
|
||||
just("0b").ignore_then(pow_float_parser(2)),
|
||||
just("0x").ignore_then(pow_float_parser(16)),
|
||||
just('0').ignore_then(pow_float_parser(8)),
|
||||
pow_float_parser(10),
|
||||
)).labelled("float")
|
||||
choice((
|
||||
just("0b").ignore_then(pow_float_parser(2)),
|
||||
just("0x").ignore_then(pow_float_parser(16)),
|
||||
just('0').ignore_then(pow_float_parser(8)),
|
||||
pow_float_parser(10),
|
||||
)).labelled("float")
|
||||
}
|
||||
|
||||
@@ -11,58 +11,58 @@ use super::{Lexeme, FileEntry, lexer, line_parser, LexerEntry};
|
||||
|
||||
#[derive(Error, Debug, Clone)]
|
||||
pub enum ParseError {
|
||||
#[error("Could not tokenize {0:?}")]
|
||||
Lex(Vec<Simple<char>>),
|
||||
#[error("Could not parse {0:#?}")]
|
||||
Ast(Vec<Simple<Lexeme>>)
|
||||
#[error("Could not tokenize {0:?}")]
|
||||
Lex(Vec<Simple<char>>),
|
||||
#[error("Could not parse {0:#?}")]
|
||||
Ast(Vec<Simple<Lexeme>>)
|
||||
}
|
||||
|
||||
pub fn parse<'a, Iter, S, Op>(ops: &[Op], stream: S) -> Result<Vec<FileEntry>, ParseError>
|
||||
where
|
||||
Op: 'a + AsRef<str> + Clone,
|
||||
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
|
||||
S: Into<Stream<'a, char, Range<usize>, Iter>> {
|
||||
let lexed = lexer(ops).parse(stream).map_err(ParseError::Lex)?;
|
||||
println!("Lexed:\n{:?}", lexed);
|
||||
let LexedText(token_batchv) = lexed;
|
||||
let parsr = line_parser().then_ignore(end());
|
||||
let (parsed_lines, errors_per_line) = token_batchv.into_iter().filter(|v| {
|
||||
!v.is_empty()
|
||||
}).map(|v| {
|
||||
// Find the first invalid position for Stream::for_iter
|
||||
let LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone();
|
||||
// Stream expects tuples, lexer outputs structs
|
||||
let tuples = v.into_iter().map_into::<(Lexeme, Range<usize>)>();
|
||||
parsr.parse(Stream::from_iter(end..end+1, tuples))
|
||||
// ^^^^^^^^^^
|
||||
// I haven't the foggiest idea why this is needed, parsers are supposed to be lazy so the
|
||||
// end of input should make little difference
|
||||
}).map(|res| match res {
|
||||
Ok(r) => (Some(r), vec![]),
|
||||
Err(e) => (None, e)
|
||||
}).unzip::<_, _, Vec<_>, Vec<_>>();
|
||||
let total_err = errors_per_line.into_iter()
|
||||
.flat_map(Vec::into_iter)
|
||||
.collect::<Vec<_>>();
|
||||
if !total_err.is_empty() { Err(ParseError::Ast(total_err)) }
|
||||
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
|
||||
Op: 'a + AsRef<str> + Clone,
|
||||
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
|
||||
S: Into<Stream<'a, char, Range<usize>, Iter>> {
|
||||
let lexed = lexer(ops).parse(stream).map_err(ParseError::Lex)?;
|
||||
println!("Lexed:\n{:?}", lexed);
|
||||
let LexedText(token_batchv) = lexed;
|
||||
let parsr = line_parser().then_ignore(end());
|
||||
let (parsed_lines, errors_per_line) = token_batchv.into_iter().filter(|v| {
|
||||
!v.is_empty()
|
||||
}).map(|v| {
|
||||
// Find the first invalid position for Stream::for_iter
|
||||
let LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone();
|
||||
// Stream expects tuples, lexer outputs structs
|
||||
let tuples = v.into_iter().map_into::<(Lexeme, Range<usize>)>();
|
||||
parsr.parse(Stream::from_iter(end..end+1, tuples))
|
||||
// ^^^^^^^^^^
|
||||
// I haven't the foggiest idea why this is needed, parsers are supposed to be lazy so the
|
||||
// end of input should make little difference
|
||||
}).map(|res| match res {
|
||||
Ok(r) => (Some(r), vec![]),
|
||||
Err(e) => (None, e)
|
||||
}).unzip::<_, _, Vec<_>, Vec<_>>();
|
||||
let total_err = errors_per_line.into_iter()
|
||||
.flat_map(Vec::into_iter)
|
||||
.collect::<Vec<_>>();
|
||||
if !total_err.is_empty() { Err(ParseError::Ast(total_err)) }
|
||||
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
|
||||
}
|
||||
|
||||
pub fn reparse<'a, Iter, S, Op>(ops: &[Op], stream: S, pre: &[FileEntry])
|
||||
-> Result<Vec<FileEntry>, ParseError>
|
||||
where
|
||||
Op: 'a + AsRef<str> + Clone,
|
||||
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
|
||||
S: Into<Stream<'a, char, Range<usize>, Iter>> {
|
||||
let result = parse(ops, stream)?;
|
||||
Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| {
|
||||
if let FileEntry::Rule(Rule{source, ..}, _) = &mut output {
|
||||
if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor {
|
||||
*source = s2.clone()
|
||||
} else {
|
||||
panic!("Preparse and reparse received different row types!")
|
||||
}
|
||||
}
|
||||
output
|
||||
}).collect())
|
||||
Op: 'a + AsRef<str> + Clone,
|
||||
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
|
||||
S: Into<Stream<'a, char, Range<usize>, Iter>> {
|
||||
let result = parse(ops, stream)?;
|
||||
Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| {
|
||||
if let FileEntry::Rule(Rule{source, ..}, _) = &mut output {
|
||||
if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor {
|
||||
*source = s2.clone()
|
||||
} else {
|
||||
panic!("Preparse and reparse received different row types!")
|
||||
}
|
||||
}
|
||||
output
|
||||
}).collect())
|
||||
}
|
||||
|
||||
@@ -16,50 +16,50 @@ use ordered_float::NotNan;
|
||||
/// Anything we might encounter in a file
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum FileEntry {
|
||||
Import(Vec<import::Import>),
|
||||
Comment(String),
|
||||
Rule(Rule, bool),
|
||||
Export(Vec<Vec<String>>)
|
||||
Import(Vec<import::Import>),
|
||||
Comment(String),
|
||||
Rule(Rule, bool),
|
||||
Export(Vec<Vec<String>>)
|
||||
}
|
||||
|
||||
fn visit_all_names_clause_recur<'a, F>(
|
||||
clause: &'a Clause,
|
||||
binds: Stackframe<String>,
|
||||
cb: &mut F
|
||||
clause: &'a Clause,
|
||||
binds: Stackframe<String>,
|
||||
cb: &mut F
|
||||
) where F: FnMut(&'a [String]) {
|
||||
match clause {
|
||||
Clause::Auto(name, typ, body) => {
|
||||
for x in typ.iter() {
|
||||
visit_all_names_expr_recur(x, binds.clone(), cb)
|
||||
}
|
||||
let binds_dup = binds.clone();
|
||||
let new_binds = if let Some(n) = name {
|
||||
binds_dup.push(n.to_owned())
|
||||
} else {
|
||||
binds
|
||||
};
|
||||
for x in body.iter() {
|
||||
visit_all_names_expr_recur(x, new_binds.clone(), cb)
|
||||
}
|
||||
},
|
||||
Clause::Lambda(name, typ, body) => {
|
||||
for x in typ.iter() {
|
||||
visit_all_names_expr_recur(x, binds.clone(), cb)
|
||||
}
|
||||
for x in body.iter() {
|
||||
visit_all_names_expr_recur(x, binds.push(name.to_owned()), cb)
|
||||
}
|
||||
},
|
||||
Clause::S(_, body) => for x in body.iter() {
|
||||
visit_all_names_expr_recur(x, binds.clone(), cb)
|
||||
},
|
||||
Clause::Name{ local: Some(name), qualified } => {
|
||||
if binds.iter().all(|x| x != name) {
|
||||
cb(qualified)
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
match clause {
|
||||
Clause::Auto(name, typ, body) => {
|
||||
for x in typ.iter() {
|
||||
visit_all_names_expr_recur(x, binds.clone(), cb)
|
||||
}
|
||||
let binds_dup = binds.clone();
|
||||
let new_binds = if let Some(n) = name {
|
||||
binds_dup.push(n.to_owned())
|
||||
} else {
|
||||
binds
|
||||
};
|
||||
for x in body.iter() {
|
||||
visit_all_names_expr_recur(x, new_binds.clone(), cb)
|
||||
}
|
||||
},
|
||||
Clause::Lambda(name, typ, body) => {
|
||||
for x in typ.iter() {
|
||||
visit_all_names_expr_recur(x, binds.clone(), cb)
|
||||
}
|
||||
for x in body.iter() {
|
||||
visit_all_names_expr_recur(x, binds.push(name.to_owned()), cb)
|
||||
}
|
||||
},
|
||||
Clause::S(_, body) => for x in body.iter() {
|
||||
visit_all_names_expr_recur(x, binds.clone(), cb)
|
||||
},
|
||||
Clause::Name{ local: Some(name), qualified } => {
|
||||
if binds.iter().all(|x| x != name) {
|
||||
cb(qualified)
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
/// Recursively iterate through all "names" in an expression. It also finds a lot of things that
|
||||
@@ -68,88 +68,88 @@ fn visit_all_names_clause_recur<'a, F>(
|
||||
///
|
||||
/// TODO: find a way to exclude parameters
|
||||
fn visit_all_names_expr_recur<'a, F>(
|
||||
expr: &'a Expr,
|
||||
binds: Stackframe<String>,
|
||||
cb: &mut F
|
||||
expr: &'a Expr,
|
||||
binds: Stackframe<String>,
|
||||
cb: &mut F
|
||||
) where F: FnMut(&'a [String]) {
|
||||
let Expr(val, typ) = expr;
|
||||
visit_all_names_clause_recur(val, binds.clone(), cb);
|
||||
for typ in typ.as_ref() {
|
||||
visit_all_names_clause_recur(typ, binds.clone(), cb);
|
||||
}
|
||||
let Expr(val, typ) = expr;
|
||||
visit_all_names_clause_recur(val, binds.clone(), cb);
|
||||
for typ in typ.as_ref() {
|
||||
visit_all_names_clause_recur(typ, binds.clone(), cb);
|
||||
}
|
||||
}
|
||||
|
||||
/// Collect all names that occur in an expression
|
||||
fn find_all_names(expr: &Expr) -> HashSet<&[String]> {
|
||||
let mut ret = HashSet::new();
|
||||
visit_all_names_expr_recur(expr, Stackframe::new(String::new()), &mut |n| {
|
||||
if !n.last().unwrap().starts_with('$') {
|
||||
ret.insert(n);
|
||||
}
|
||||
});
|
||||
ret
|
||||
let mut ret = HashSet::new();
|
||||
visit_all_names_expr_recur(expr, Stackframe::new(String::new()), &mut |n| {
|
||||
if !n.last().unwrap().starts_with('$') {
|
||||
ret.insert(n);
|
||||
}
|
||||
});
|
||||
ret
|
||||
}
|
||||
|
||||
fn rule_parser() -> impl Parser<Lexeme, (Vec<Expr>, NotNan<f64>, Vec<Expr>), Error = Simple<Lexeme>> {
|
||||
xpr_parser().repeated()
|
||||
.then(enum_parser!(Lexeme::Rule))
|
||||
.then(xpr_parser().repeated())
|
||||
// .map(|((lhs, prio), rhs)| )
|
||||
.map(|((a, b), c)| (a, b, c))
|
||||
.labelled("Rule")
|
||||
xpr_parser().repeated()
|
||||
.then(enum_parser!(Lexeme::Rule))
|
||||
.then(xpr_parser().repeated())
|
||||
// .map(|((lhs, prio), rhs)| )
|
||||
.map(|((a, b), c)| (a, b, c))
|
||||
.labelled("Rule")
|
||||
}
|
||||
|
||||
pub fn line_parser() -> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> {
|
||||
choice((
|
||||
// In case the usercode wants to parse doc
|
||||
enum_parser!(Lexeme >> FileEntry; Comment),
|
||||
just(Lexeme::name("import"))
|
||||
.ignore_then(import_parser().map(FileEntry::Import))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment)),
|
||||
just(Lexeme::name("export")).map_err_with_span(|e, s| {
|
||||
println!("{:?} could not yield an export", s); e
|
||||
}).ignore_then(
|
||||
just(Lexeme::NS).ignore_then(
|
||||
enum_parser!(Lexeme::Name).map(|n| vec![n])
|
||||
.separated_by(just(Lexeme::name(",")))
|
||||
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
|
||||
).map(FileEntry::Export)
|
||||
).or(rule_parser().map(|(source, prio, target)| {
|
||||
FileEntry::Rule(Rule {
|
||||
source: to_mrc_slice(source),
|
||||
prio,
|
||||
target: to_mrc_slice(target)
|
||||
}, true)
|
||||
})),
|
||||
// This could match almost anything so it has to go last
|
||||
rule_parser().map(|(source, prio, target)| FileEntry::Rule(Rule{
|
||||
source: to_mrc_slice(source),
|
||||
prio,
|
||||
target: to_mrc_slice(target)
|
||||
}, false)),
|
||||
))
|
||||
choice((
|
||||
// In case the usercode wants to parse doc
|
||||
enum_parser!(Lexeme >> FileEntry; Comment),
|
||||
just(Lexeme::name("import"))
|
||||
.ignore_then(import_parser().map(FileEntry::Import))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment)),
|
||||
just(Lexeme::name("export")).map_err_with_span(|e, s| {
|
||||
println!("{:?} could not yield an export", s); e
|
||||
}).ignore_then(
|
||||
just(Lexeme::NS).ignore_then(
|
||||
enum_parser!(Lexeme::Name).map(|n| vec![n])
|
||||
.separated_by(just(Lexeme::name(",")))
|
||||
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
|
||||
).map(FileEntry::Export)
|
||||
).or(rule_parser().map(|(source, prio, target)| {
|
||||
FileEntry::Rule(Rule {
|
||||
source: to_mrc_slice(source),
|
||||
prio,
|
||||
target: to_mrc_slice(target)
|
||||
}, true)
|
||||
})),
|
||||
// This could match almost anything so it has to go last
|
||||
rule_parser().map(|(source, prio, target)| FileEntry::Rule(Rule{
|
||||
source: to_mrc_slice(source),
|
||||
prio,
|
||||
target: to_mrc_slice(target)
|
||||
}, false)),
|
||||
))
|
||||
}
|
||||
|
||||
/// Collect all exported names (and a lot of other words) from a file
|
||||
pub fn exported_names(src: &[FileEntry]) -> HashSet<&[String]> {
|
||||
src.iter().flat_map(|ent| match ent {
|
||||
FileEntry::Rule(Rule{source, target, ..}, true) =>
|
||||
box_chain!(source.iter(), target.iter()),
|
||||
_ => box_empty()
|
||||
}).flat_map(find_all_names).chain(
|
||||
src.iter().filter_map(|ent| {
|
||||
if let FileEntry::Export(names) = ent {Some(names.iter())} else {None}
|
||||
}).flatten().map(Vec::as_slice)
|
||||
).collect()
|
||||
src.iter().flat_map(|ent| match ent {
|
||||
FileEntry::Rule(Rule{source, target, ..}, true) =>
|
||||
box_chain!(source.iter(), target.iter()),
|
||||
_ => box_empty()
|
||||
}).flat_map(find_all_names).chain(
|
||||
src.iter().filter_map(|ent| {
|
||||
if let FileEntry::Export(names) = ent {Some(names.iter())} else {None}
|
||||
}).flatten().map(Vec::as_slice)
|
||||
).collect()
|
||||
}
|
||||
|
||||
/// Summarize all imports from a file in a single list of qualified names
|
||||
pub fn imports<'a, 'b, I>(
|
||||
src: I
|
||||
src: I
|
||||
) -> impl Iterator<Item = &'b import::Import> + 'a
|
||||
where I: Iterator<Item = &'b FileEntry> + 'a {
|
||||
src.filter_map(|ent| match ent {
|
||||
FileEntry::Import(impv) => Some(impv.iter()),
|
||||
_ => None
|
||||
}).flatten()
|
||||
src.filter_map(|ent| match ent {
|
||||
FileEntry::Import(impv) => Some(impv.iter()),
|
||||
_ => None
|
||||
}).flatten()
|
||||
}
|
||||
|
||||
@@ -2,45 +2,45 @@ use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
/// Parses a text character that is not the specified delimiter
|
||||
fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
|
||||
// Copied directly from Chumsky's JSON example.
|
||||
let escape = just('\\').ignore_then(
|
||||
just('\\')
|
||||
.or(just('/'))
|
||||
.or(just('"'))
|
||||
.or(just('b').to('\x08'))
|
||||
.or(just('f').to('\x0C'))
|
||||
.or(just('n').to('\n'))
|
||||
.or(just('r').to('\r'))
|
||||
.or(just('t').to('\t'))
|
||||
.or(just('u').ignore_then(
|
||||
filter(|c: &char| c.is_ascii_hexdigit())
|
||||
.repeated()
|
||||
.exactly(4)
|
||||
.collect::<String>()
|
||||
.validate(|digits, span, emit| {
|
||||
char::from_u32(u32::from_str_radix(&digits, 16).unwrap())
|
||||
.unwrap_or_else(|| {
|
||||
emit(Simple::custom(span, "invalid unicode character"));
|
||||
'\u{FFFD}' // unicode replacement character
|
||||
})
|
||||
}),
|
||||
)),
|
||||
);
|
||||
filter(move |&c| c != '\\' && c != delim).or(escape)
|
||||
// Copied directly from Chumsky's JSON example.
|
||||
let escape = just('\\').ignore_then(
|
||||
just('\\')
|
||||
.or(just('/'))
|
||||
.or(just('"'))
|
||||
.or(just('b').to('\x08'))
|
||||
.or(just('f').to('\x0C'))
|
||||
.or(just('n').to('\n'))
|
||||
.or(just('r').to('\r'))
|
||||
.or(just('t').to('\t'))
|
||||
.or(just('u').ignore_then(
|
||||
filter(|c: &char| c.is_ascii_hexdigit())
|
||||
.repeated()
|
||||
.exactly(4)
|
||||
.collect::<String>()
|
||||
.validate(|digits, span, emit| {
|
||||
char::from_u32(u32::from_str_radix(&digits, 16).unwrap())
|
||||
.unwrap_or_else(|| {
|
||||
emit(Simple::custom(span, "invalid unicode character"));
|
||||
'\u{FFFD}' // unicode replacement character
|
||||
})
|
||||
}),
|
||||
)),
|
||||
);
|
||||
filter(move |&c| c != '\\' && c != delim).or(escape)
|
||||
}
|
||||
|
||||
/// Parse a character literal between single quotes
|
||||
pub fn char_parser() -> impl Parser<char, char, Error = Simple<char>> {
|
||||
just('\'').ignore_then(text_parser('\'')).then_ignore(just('\''))
|
||||
just('\'').ignore_then(text_parser('\'')).then_ignore(just('\''))
|
||||
}
|
||||
|
||||
/// Parse a string between double quotes
|
||||
pub fn str_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
||||
just('"')
|
||||
.ignore_then(
|
||||
text_parser('"').map(Some)
|
||||
.or(just("\\\n").map(|_| None)) // Newlines preceded by backslashes are ignored.
|
||||
.repeated()
|
||||
).then_ignore(just('"'))
|
||||
.flatten().collect()
|
||||
just('"')
|
||||
.ignore_then(
|
||||
text_parser('"').map(Some)
|
||||
.or(just("\\\n").map(|_| None)) // Newlines preceded by backslashes are ignored.
|
||||
.repeated()
|
||||
).then_ignore(just('"'))
|
||||
.flatten().collect()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user