transfer commit

This commit is contained in:
2023-02-03 14:40:34 +00:00
parent a500f8b87a
commit 3c63cae242
63 changed files with 3227 additions and 2850 deletions

View File

@@ -2,12 +2,12 @@ pub use chumsky::{self, prelude::*, Parser};
/// Parses Lua-style comments
pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
choice((
just("--[").ignore_then(take_until(
just("]--").ignored()
)),
just("--").ignore_then(take_until(
just("\n").rewind().ignored().or(end())
))
)).map(|(vc, ())| vc).collect().labelled("comment")
choice((
just("--[").ignore_then(take_until(
just("]--").ignored()
)),
just("--").ignore_then(take_until(
just("\n").rewind().ignored().or(end())
))
)).map(|(vc, ())| vc).collect().labelled("comment")
}

View File

@@ -6,27 +6,27 @@
/// ```
#[macro_export]
macro_rules! enum_parser {
($p:path | $m:tt) => {
{
::chumsky::prelude::filter_map(|s, l| {
if let $p(x) = l { Ok(x) }
else { Err(::chumsky::prelude::Simple::custom(s, $m))}
})
}
};
($p:path >> $q:path; $i:ident) => {
{
use $p as srcpath;
use $q as tgtpath;
enum_parser!(srcpath::$i | (concat!("Expected ", stringify!($i)))).map(tgtpath::$i)
}
};
($p:path >> $q:path; $($i:ident),+) => {
{
::chumsky::prelude::choice((
$( enum_parser!($p >> $q; $i) ),+
))
}
};
($p:path) => { enum_parser!($p | (concat!("Expected ", stringify!($p)))) };
($p:path | $m:tt) => {
{
::chumsky::prelude::filter_map(|s, l| {
if let $p(x) = l { Ok(x) }
else { Err(::chumsky::prelude::Simple::custom(s, $m))}
})
}
};
($p:path >> $q:path; $i:ident) => {
{
use $p as srcpath;
use $q as tgtpath;
enum_parser!(srcpath::$i | (concat!("Expected ", stringify!($i)))).map(tgtpath::$i)
}
};
($p:path >> $q:path; $($i:ident),+) => {
{
::chumsky::prelude::choice((
$( enum_parser!($p >> $q; $i) ),+
))
}
};
($p:path) => { enum_parser!($p | (concat!("Expected ", stringify!($p)))) };
}

View File

@@ -8,120 +8,120 @@ use super::lexer::Lexeme;
/// Parses any number of expr wrapped in (), [] or {}
fn sexpr_parser<P>(
expr: P
expr: P
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
Lexeme::paren_parser(expr.repeated()).map(|(del, b)| Clause::S(del, to_mrc_slice(b)))
Lexeme::paren_parser(expr.repeated()).map(|(del, b)| Clause::S(del, to_mrc_slice(b)))
}
/// Parses `\name.body` or `\name:type.body` where name is any valid name and type and body are
/// both expressions. Comments are allowed and ignored everywhere in between the tokens
fn lambda_parser<P>(
expr: P
expr: P
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
just(Lexeme::BS)
just(Lexeme::BS)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(enum_parser!(Lexeme::Name))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(
just(Lexeme::Type)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(enum_parser!(Lexeme::Name))
.ignore_then(expr.clone().repeated())
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(
just(Lexeme::Type)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(expr.clone().repeated())
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.or_not().map(Option::unwrap_or_default)
)
.then_ignore(just(Lexeme::name(".")))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(expr.repeated().at_least(1))
.map(|((name, typ), body): ((String, Vec<Expr>), Vec<Expr>)| {
// for ent in &mut body { ent.bind_parameter(&name) };
Clause::Lambda(name, to_mrc_slice(typ), to_mrc_slice(body))
})
.or_not().map(Option::unwrap_or_default)
)
.then_ignore(just(Lexeme::name(".")))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(expr.repeated().at_least(1))
.map(|((name, typ), body): ((String, Vec<Expr>), Vec<Expr>)| {
// for ent in &mut body { ent.bind_parameter(&name) };
Clause::Lambda(name, to_mrc_slice(typ), to_mrc_slice(body))
})
}
/// see [lambda_parser] but `@` instead of `\` and the name is optional
fn auto_parser<P>(
expr: P
expr: P
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
just(Lexeme::At)
just(Lexeme::At)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(enum_parser!(Lexeme::Name).or_not())
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(
just(Lexeme::Type)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(enum_parser!(Lexeme::Name).or_not())
.ignore_then(expr.clone().repeated())
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(
just(Lexeme::Type)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(expr.clone().repeated())
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.or_not().map(Option::unwrap_or_default)
)
.then_ignore(just(Lexeme::name(".")))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(expr.repeated().at_least(1))
.try_map(|((name, typ), body): ((Option<String>, Vec<Expr>), Vec<Expr>), s| {
if name.is_none() && typ.is_empty() {
Err(Simple::custom(s, "Auto without name or type has no effect"))
} else {
Ok(Clause::Auto(name, to_mrc_slice(typ), to_mrc_slice(body)))
}
})
.or_not().map(Option::unwrap_or_default)
)
.then_ignore(just(Lexeme::name(".")))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(expr.repeated().at_least(1))
.try_map(|((name, typ), body): ((Option<String>, Vec<Expr>), Vec<Expr>), s| {
if name.is_none() && typ.is_empty() {
Err(Simple::custom(s, "Auto without name or type has no effect"))
} else {
Ok(Clause::Auto(name, to_mrc_slice(typ), to_mrc_slice(body)))
}
})
}
/// Parses a sequence of names separated by :: <br/>
/// Comments are allowed and ignored in between
fn name_parser() -> impl Parser<Lexeme, Vec<String>, Error = Simple<Lexeme>> + Clone {
enum_parser!(Lexeme::Name).separated_by(
enum_parser!(Lexeme::Comment).repeated()
.then(just(Lexeme::NS))
.then(enum_parser!(Lexeme::Comment).repeated())
).at_least(1)
enum_parser!(Lexeme::Name).separated_by(
enum_parser!(Lexeme::Comment).repeated()
.then(just(Lexeme::NS))
.then(enum_parser!(Lexeme::Comment).repeated())
).at_least(1)
}
/// Parse any legal argument name starting with a `$`
fn placeholder_parser() -> impl Parser<Lexeme, String, Error = Simple<Lexeme>> + Clone {
enum_parser!(Lexeme::Name).try_map(|name, span| {
name.strip_prefix('$').map(&str::to_string)
.ok_or_else(|| Simple::custom(span, "Not a placeholder"))
})
enum_parser!(Lexeme::Name).try_map(|name, span| {
name.strip_prefix('$').map(&str::to_string)
.ok_or_else(|| Simple::custom(span, "Not a placeholder"))
})
}
/// Parse an expression
pub fn xpr_parser() -> impl Parser<Lexeme, Expr, Error = Simple<Lexeme>> {
recursive(|expr| {
let clause =
enum_parser!(Lexeme::Comment).repeated()
.ignore_then(choice((
enum_parser!(Lexeme >> Literal; Int, Num, Char, Str).map(Clause::Literal),
placeholder_parser().map(|key| Clause::Placeh{key, vec: None}),
just(Lexeme::name("...")).to(true)
.or(just(Lexeme::name("..")).to(false))
.then(placeholder_parser())
.then(
just(Lexeme::Type)
.ignore_then(enum_parser!(Lexeme::Int))
.or_not().map(Option::unwrap_or_default)
)
.map(|((nonzero, key), prio)| Clause::Placeh{key, vec: Some((
prio.try_into().unwrap(),
nonzero
))}),
name_parser().map(|qualified| Clause::Name {
local: if qualified.len() == 1 {Some(qualified[0].clone())} else {None},
qualified: to_mrc_slice(qualified)
}),
sexpr_parser(expr.clone()),
lambda_parser(expr.clone()),
auto_parser(expr.clone()),
just(Lexeme::At).ignore_then(expr.clone()).map(|arg| {
Clause::Explicit(Mrc::new(arg))
})
))).then_ignore(enum_parser!(Lexeme::Comment).repeated());
clause.clone().then(
just(Lexeme::Type)
.ignore_then(clause.clone())
.repeated()
recursive(|expr| {
let clause =
enum_parser!(Lexeme::Comment).repeated()
.ignore_then(choice((
enum_parser!(Lexeme >> Literal; Int, Num, Char, Str).map(Clause::Literal),
placeholder_parser().map(|key| Clause::Placeh{key, vec: None}),
just(Lexeme::name("...")).to(true)
.or(just(Lexeme::name("..")).to(false))
.then(placeholder_parser())
.then(
just(Lexeme::Type)
.ignore_then(enum_parser!(Lexeme::Int))
.or_not().map(Option::unwrap_or_default)
)
.map(|(val, typ)| Expr(val, to_mrc_slice(typ)))
}).labelled("Expression")
.map(|((nonzero, key), prio)| Clause::Placeh{key, vec: Some((
prio.try_into().unwrap(),
nonzero
))}),
name_parser().map(|qualified| Clause::Name {
local: if qualified.len() == 1 {Some(qualified[0].clone())} else {None},
qualified: to_mrc_slice(qualified)
}),
sexpr_parser(expr.clone()),
lambda_parser(expr.clone()),
auto_parser(expr.clone()),
just(Lexeme::At).ignore_then(expr.clone()).map(|arg| {
Clause::Explicit(Mrc::new(arg))
})
))).then_ignore(enum_parser!(Lexeme::Comment).repeated());
clause.clone().then(
just(Lexeme::Type)
.ignore_then(clause.clone())
.repeated()
)
.map(|(val, typ)| Expr(val, to_mrc_slice(typ)))
}).labelled("Expression")
}

View File

@@ -9,15 +9,15 @@ use super::lexer::Lexeme;
#[derive(Debug, Clone)]
pub struct Import {
pub path: Mrc<[String]>,
/// If name is None, this is a wildcard import
pub name: Option<String>
pub path: Mrc<[String]>,
/// If name is None, this is a wildcard import
pub name: Option<String>
}
/// initialize a BoxedIter<BoxedIter<String>> with a single element.
fn init_table(name: String) -> BoxedIterIter<'static, String> {
// I'm not at all confident that this is a good approach.
box_once(box_once(name))
// I'm not at all confident that this is a good approach.
box_once(box_once(name))
}
/// Parse an import command
@@ -26,44 +26,44 @@ fn init_table(name: String) -> BoxedIterIter<'static, String> {
/// crossplatform filename-legal characters but the symbols are explicitly allowed
/// to go wild. There's a blacklist in [name]
pub fn import_parser() -> impl Parser<Lexeme, Vec<Import>, Error = Simple<Lexeme>> {
// TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad.
recursive(|expr: Recursive<Lexeme, BoxedIterIter<String>, Simple<Lexeme>>| {
enum_parser!(Lexeme::Name)
.separated_by(just(Lexeme::NS))
.then(
just(Lexeme::NS)
.ignore_then(
choice((
expr.clone()
.separated_by(just(Lexeme::name(",")))
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
.map(|v| box_flatten(v.into_iter()))
.labelled("import group"),
// Each expr returns a list of imports, flatten those into a common list
just(Lexeme::name("*")).map(|_| init_table("*".to_string()))
.labelled("wildcard import"), // Just a *, wrapped
enum_parser!(Lexeme::Name).map(init_table)
.labelled("import terminal") // Just a name, wrapped
))
).or_not()
)
.map(|(name, opt_post): (Vec<String>, Option<BoxedIterIter<String>>)| -> BoxedIterIter<String> {
if let Some(post) = opt_post {
Box::new(post.map(move |el| {
box_chain!(name.clone().into_iter(), el)
}))
} else {
box_once(into_boxed_iter(name))
}
})
}).map(|paths| {
paths.filter_map(|namespaces| {
let path = to_mrc_slice(namespaces.collect_vec());
let path_prefix = mrc_derive(&path, |p| &p[..p.len() - 1]);
match path.last()?.as_str() {
"*" => Some(Import { path: path_prefix, name: None }),
name => Some(Import { path: path_prefix, name: Some(name.to_owned()) })
}
}).collect()
}).labelled("import")
// TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad.
recursive(|expr: Recursive<Lexeme, BoxedIterIter<String>, Simple<Lexeme>>| {
enum_parser!(Lexeme::Name)
.separated_by(just(Lexeme::NS))
.then(
just(Lexeme::NS)
.ignore_then(
choice((
expr.clone()
.separated_by(just(Lexeme::name(",")))
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
.map(|v| box_flatten(v.into_iter()))
.labelled("import group"),
// Each expr returns a list of imports, flatten those into a common list
just(Lexeme::name("*")).map(|_| init_table("*".to_string()))
.labelled("wildcard import"), // Just a *, wrapped
enum_parser!(Lexeme::Name).map(init_table)
.labelled("import terminal") // Just a name, wrapped
))
).or_not()
)
.map(|(name, opt_post): (Vec<String>, Option<BoxedIterIter<String>>)| -> BoxedIterIter<String> {
if let Some(post) = opt_post {
Box::new(post.map(move |el| {
box_chain!(name.clone().into_iter(), el)
}))
} else {
box_once(into_boxed_iter(name))
}
})
}).map(|paths| {
paths.filter_map(|namespaces| {
let path = to_mrc_slice(namespaces.collect_vec());
let path_prefix = mrc_derive(&path, |p| &p[..p.len() - 1]);
match path.last()?.as_str() {
"*" => Some(Import { path: path_prefix, name: None }),
name => Some(Import { path: path_prefix, name: Some(name.to_owned()) })
}
}).collect()
}).labelled("import")
}

View File

@@ -9,141 +9,141 @@ use super::{number, string, name, comment};
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct Entry(pub Lexeme, pub Range<usize>);
impl Debug for Entry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.0)
// f.debug_tuple("Entry").field(&self.0).field(&self.1).finish()
}
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.0)
// f.debug_tuple("Entry").field(&self.0).field(&self.1).finish()
}
}
impl From<Entry> for (Lexeme, Range<usize>) {
fn from(ent: Entry) -> Self {
(ent.0, ent.1)
}
fn from(ent: Entry) -> Self {
(ent.0, ent.1)
}
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub enum Lexeme {
Num(NotNan<f64>),
Int(u64),
Char(char),
Str(String),
Name(String),
Rule(NotNan<f64>),
NS, // namespace separator
LP(char),
RP(char),
BS, // Backslash
At,
Type, // type operator
Comment(String)
Num(NotNan<f64>),
Int(u64),
Char(char),
Str(String),
Name(String),
Rule(NotNan<f64>),
NS, // namespace separator
LP(char),
RP(char),
BS, // Backslash
At,
Type, // type operator
Comment(String)
}
impl Debug for Lexeme {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Num(n) => write!(f, "{}", n),
Self::Int(i) => write!(f, "{}", i),
Self::Char(c) => write!(f, "{:?}", c),
Self::Str(s) => write!(f, "{:?}", s),
Self::Name(name) => write!(f, "{}", name),
Self::Rule(prio) => write!(f, "={}=>", prio),
Self::NS => write!(f, "::"),
Self::LP(l) => write!(f, "{}", l),
Self::RP(l) => match l {
'(' => write!(f, ")"),
'[' => write!(f, "]"),
'{' => write!(f, "}}"),
_ => f.debug_tuple("RP").field(l).finish()
},
Self::BS => write!(f, "\\"),
Self::At => write!(f, "@"),
Self::Type => write!(f, ":"),
Self::Comment(text) => write!(f, "--[{}]--", text),
}
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Num(n) => write!(f, "{}", n),
Self::Int(i) => write!(f, "{}", i),
Self::Char(c) => write!(f, "{:?}", c),
Self::Str(s) => write!(f, "{:?}", s),
Self::Name(name) => write!(f, "{}", name),
Self::Rule(prio) => write!(f, "={}=>", prio),
Self::NS => write!(f, "::"),
Self::LP(l) => write!(f, "{}", l),
Self::RP(l) => match l {
'(' => write!(f, ")"),
'[' => write!(f, "]"),
'{' => write!(f, "}}"),
_ => f.debug_tuple("RP").field(l).finish()
},
Self::BS => write!(f, "\\"),
Self::At => write!(f, "@"),
Self::Type => write!(f, ":"),
Self::Comment(text) => write!(f, "--[{}]--", text),
}
}
}
impl Lexeme {
pub fn name<T: ToString>(n: T) -> Self {
Lexeme::Name(n.to_string())
}
pub fn rule<T>(prio: T) -> Self where T: Into<f64> {
Lexeme::Rule(NotNan::new(prio.into()).expect("Rule priority cannot be NaN"))
}
pub fn paren_parser<T, P>(
expr: P
) -> impl Parser<Lexeme, (char, T), Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, T, Error = Simple<Lexeme>> + Clone {
choice((
expr.clone().delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
.map(|t| ('(', t)),
expr.clone().delimited_by(just(Lexeme::LP('[')), just(Lexeme::RP('[')))
.map(|t| ('[', t)),
expr.delimited_by(just(Lexeme::LP('{')), just(Lexeme::RP('{')))
.map(|t| ('{', t)),
))
}
pub fn name<T: ToString>(n: T) -> Self {
Lexeme::Name(n.to_string())
}
pub fn rule<T>(prio: T) -> Self where T: Into<f64> {
Lexeme::Rule(NotNan::new(prio.into()).expect("Rule priority cannot be NaN"))
}
pub fn paren_parser<T, P>(
expr: P
) -> impl Parser<Lexeme, (char, T), Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, T, Error = Simple<Lexeme>> + Clone {
choice((
expr.clone().delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
.map(|t| ('(', t)),
expr.clone().delimited_by(just(Lexeme::LP('[')), just(Lexeme::RP('[')))
.map(|t| ('[', t)),
expr.delimited_by(just(Lexeme::LP('{')), just(Lexeme::RP('{')))
.map(|t| ('{', t)),
))
}
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct LexedText(pub Vec<Vec<Entry>>);
impl Debug for LexedText {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for row in &self.0 {
for tok in row {
tok.fmt(f)?;
f.write_str(" ")?
}
f.write_str("\n")?
}
Ok(())
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for row in &self.0 {
for tok in row {
tok.fmt(f)?;
f.write_str(" ")?
}
f.write_str("\n")?
}
Ok(())
}
}
type LexSubres<'a> = BoxedIter<'a, Entry>;
fn paren_parser<'a>(
expr: Recursive<'a, char, LexSubres<'a>, Simple<char>>,
lp: char, rp: char
expr: Recursive<'a, char, LexSubres<'a>, Simple<char>>,
lp: char, rp: char
) -> impl Parser<char, LexSubres<'a>, Error=Simple<char>> + 'a {
expr.padded().repeated()
.map(|x| box_flatten(x.into_iter()))
.delimited_by(just(lp), just(rp)).map_with_span(move |b, s| {
box_chain!(
iter::once(Entry(Lexeme::LP(lp), s.start..s.start+1)),
b,
iter::once(Entry(Lexeme::RP(lp), s.end-1..s.end))
)
})
expr.padded().repeated()
.map(|x| box_flatten(x.into_iter()))
.delimited_by(just(lp), just(rp)).map_with_span(move |b, s| {
box_chain!(
iter::once(Entry(Lexeme::LP(lp), s.start..s.start+1)),
b,
iter::once(Entry(Lexeme::RP(lp), s.end-1..s.end))
)
})
}
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, LexedText, Error=Simple<char>> + 'a
where T: AsRef<str> + Clone {
let all_ops = ops.iter().map(|o| o.as_ref().to_string())
.chain(iter::once(".".to_string())).collect::<Vec<_>>();
recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
choice((
paren_parser(recurse.clone(), '(', ')'),
paren_parser(recurse.clone(), '[', ']'),
paren_parser(recurse.clone(), '{', '}'),
choice((
just(":=").padded().to(Lexeme::rule(0f64)),
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
comment::comment_parser().map(Lexeme::Comment),
just("::").padded().to(Lexeme::NS),
just('\\').padded().to(Lexeme::BS),
just('@').padded().to(Lexeme::At),
just(':').to(Lexeme::Type),
number::int_parser().map(Lexeme::Int), // all ints are valid floats so it takes precedence
number::float_parser().map(Lexeme::Num),
string::char_parser().map(Lexeme::Char),
string::str_parser().map(Lexeme::Str),
name::name_parser(&all_ops).map(Lexeme::Name), // includes namespacing
)).map_with_span(|lx, span| box_once(Entry(lx, span)) as LexSubres)
))
}).separated_by(one_of("\t ").repeated())
.flatten().collect()
.separated_by(just('\n').then(text::whitespace()).ignored())
.map(LexedText)
let all_ops = ops.iter().map(|o| o.as_ref().to_string())
.chain(iter::once(".".to_string())).collect::<Vec<_>>();
recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
choice((
paren_parser(recurse.clone(), '(', ')'),
paren_parser(recurse.clone(), '[', ']'),
paren_parser(recurse.clone(), '{', '}'),
choice((
just(":=").padded().to(Lexeme::rule(0f64)),
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
comment::comment_parser().map(Lexeme::Comment),
just("::").padded().to(Lexeme::NS),
just('\\').padded().to(Lexeme::BS),
just('@').padded().to(Lexeme::At),
just(':').to(Lexeme::Type),
number::int_parser().map(Lexeme::Int), // all ints are valid floats so it takes precedence
number::float_parser().map(Lexeme::Num),
string::char_parser().map(Lexeme::Char),
string::str_parser().map(Lexeme::Str),
name::name_parser(&all_ops).map(Lexeme::Name), // includes namespacing
)).map_with_span(|lx, span| box_once(Entry(lx, span)) as LexSubres)
))
}).separated_by(one_of("\t ").repeated())
.flatten().collect()
.separated_by(just('\n').then(text::whitespace()).ignored())
.map(LexedText)
}

View File

@@ -2,13 +2,13 @@ use chumsky::{self, prelude::*, Parser};
/// Matches any one of the passed operators, longest-first
fn op_parser<'a, T: AsRef<str> + Clone>(ops: &[T]) -> BoxedParser<'a, char, String, Simple<char>> {
let mut sorted_ops: Vec<String> = ops.iter().map(|t| t.as_ref().to_string()).collect();
sorted_ops.sort_by_key(|op| -(op.len() as i64));
sorted_ops.into_iter()
.map(|op| just(op).boxed())
.reduce(|a, b| a.or(b).boxed())
.unwrap_or_else(|| empty().map(|()| panic!("Empty isn't meant to match")).boxed())
.labelled("operator").boxed()
let mut sorted_ops: Vec<String> = ops.iter().map(|t| t.as_ref().to_string()).collect();
sorted_ops.sort_by_key(|op| -(op.len() as i64));
sorted_ops.into_iter()
.map(|op| just(op).boxed())
.reduce(|a, b| a.or(b).boxed())
.unwrap_or_else(|| empty().map(|()| panic!("Empty isn't meant to match")).boxed())
.labelled("operator").boxed()
}
/// Matches anything that's allowed as an operator
@@ -30,31 +30,31 @@ fn op_parser<'a, T: AsRef<str> + Clone>(ops: &[T]) -> BoxedParser<'a, char, Stri
/// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very
/// common in maths so it's worth a try. Investigate.
pub fn modname_parser<'a>() -> impl Parser<char, String, Error = Simple<char>> + 'a {
let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', '[', ']', '{', '}', ',', '.'];
filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
.repeated().at_least(1)
.collect()
.labelled("modname")
let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', '[', ']', '{', '}', ',', '.'];
filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
.repeated().at_least(1)
.collect()
.labelled("modname")
}
/// Parse an operator or name. Failing both, parse everything up to the next whitespace or
/// blacklisted character as a new operator.
pub fn name_parser<'a, T: AsRef<str> + Clone>(
ops: &[T]
ops: &[T]
) -> impl Parser<char, String, Error = Simple<char>> + 'a {
choice((
op_parser(ops), // First try to parse a known operator
text::ident().labelled("plain text"), // Failing that, parse plain text
modname_parser() // Finally parse everything until tne next terminal as a new operator
))
.labelled("name")
choice((
op_parser(ops), // First try to parse a known operator
text::ident().labelled("plain text"), // Failing that, parse plain text
modname_parser() // Finally parse everything until tne next terminal as a new operator
))
.labelled("name")
}
/// Decide if a string can be an operator. Operators can include digits and text, just not at the
/// start.
pub fn is_op<T: AsRef<str>>(s: T) -> bool {
return match s.as_ref().chars().next() {
Some(x) => !x.is_alphanumeric(),
None => false
}
return match s.as_ref().chars().next() {
Some(x) => !x.is_alphanumeric(),
None => false
}
}

View File

@@ -2,111 +2,111 @@ use chumsky::{self, prelude::*, Parser};
use ordered_float::NotNan;
fn assert_not_digit(base: u32, c: char) {
if base > (10 + (c as u32 - 'a' as u32)) {
panic!("The character '{}' is a digit in base ({})", c, base)
}
if base > (10 + (c as u32 - 'a' as u32)) {
panic!("The character '{}' is a digit in base ({})", c, base)
}
}
/// Parse an arbitrarily grouped sequence of digits starting with an underscore.
///
/// TODO: this should use separated_by and parse the leading group too
fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simple<char>> {
just('_')
.ignore_then(text::digits(base))
.repeated()
.map(|sv| sv.iter().flat_map(|s| s.chars()).collect())
just('_')
.ignore_then(text::digits(base))
.repeated()
.map(|sv| sv.iter().flat_map(|s| s.chars()).collect())
}
/// parse a grouped uint
///
/// Not to be confused with [int_parser] which does a lot more
fn uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
text::int(base)
.then(separated_digits_parser(base))
.map(move |(s1, s2): (String, String)| {
u64::from_str_radix(&(s1 + &s2), base).unwrap()
})
text::int(base)
.then(separated_digits_parser(base))
.map(move |(s1, s2): (String, String)| {
u64::from_str_radix(&(s1 + &s2), base).unwrap()
})
}
/// parse exponent notation, or return 0 as the default exponent.
/// The exponent is always in decimal.
fn pow_parser() -> impl Parser<char, i32, Error = Simple<char>> {
choice((
just('p')
.ignore_then(text::int(10))
.map(|s: String| s.parse().unwrap()),
just("p-")
.ignore_then(text::int(10))
.map(|s: String| -s.parse::<i32>().unwrap()),
)).or_else(|_| Ok(0))
choice((
just('p')
.ignore_then(text::int(10))
.map(|s: String| s.parse().unwrap()),
just("p-")
.ignore_then(text::int(10))
.map(|s: String| -s.parse::<i32>().unwrap()),
)).or_else(|_| Ok(0))
}
/// returns a mapper that converts a mantissa and an exponent into an uint
///
/// TODO it panics if it finds a negative exponent
fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
move |(val, exp)| {
if exp == 0 {val}
else {val * base.checked_pow(exp.try_into().unwrap()).unwrap()}
}
move |(val, exp)| {
if exp == 0 {val}
else {val * base.checked_pow(exp.try_into().unwrap()).unwrap()}
}
}
/// returns a mapper that converts a mantissa and an exponent into a float
fn nat2f(base: u64) -> impl Fn((NotNan<f64>, i32),) -> NotNan<f64> {
move |(val, exp)| {
if exp == 0 {val}
else {val * (base as f64).powf(exp.try_into().unwrap())}
}
move |(val, exp)| {
if exp == 0 {val}
else {val * (base as f64).powf(exp.try_into().unwrap())}
}
}
/// parse an uint from exponential notation (panics if 'p' is a digit in base)
fn pow_uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
assert_not_digit(base, 'p');
uint_parser(base).then(pow_parser()).map(nat2u(base.into()))
assert_not_digit(base, 'p');
uint_parser(base).then(pow_parser()).map(nat2u(base.into()))
}
/// parse an uint from a base determined by its prefix or lack thereof
///
/// Not to be convused with [uint_parser] which is a component of it.
pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
choice((
just("0b").ignore_then(pow_uint_parser(2)),
just("0x").ignore_then(pow_uint_parser(16)),
just('0').ignore_then(pow_uint_parser(8)),
pow_uint_parser(10), // Dec has no prefix
))
choice((
just("0b").ignore_then(pow_uint_parser(2)),
just("0x").ignore_then(pow_uint_parser(16)),
just('0').ignore_then(pow_uint_parser(8)),
pow_uint_parser(10), // Dec has no prefix
))
}
/// parse a float from dot notation
fn dotted_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
uint_parser(base)
.then(
just('.').ignore_then(
text::digits(base).then(separated_digits_parser(base))
).map(move |(frac1, frac2)| {
let frac = frac1 + &frac2;
let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
let dexp = base.pow(frac.len().try_into().unwrap());
frac_num / dexp as f64
}).or_not().map(|o| o.unwrap_or_default())
).try_map(|(wh, f), s| {
NotNan::new(wh as f64 + f).map_err(|_| Simple::custom(s, "Float literal evaluates to NaN"))
})
uint_parser(base)
.then(
just('.').ignore_then(
text::digits(base).then(separated_digits_parser(base))
).map(move |(frac1, frac2)| {
let frac = frac1 + &frac2;
let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
let dexp = base.pow(frac.len().try_into().unwrap());
frac_num / dexp as f64
}).or_not().map(|o| o.unwrap_or_default())
).try_map(|(wh, f), s| {
NotNan::new(wh as f64 + f).map_err(|_| Simple::custom(s, "Float literal evaluates to NaN"))
})
}
/// parse a float from dotted and optionally also exponential notation
fn pow_float_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
assert_not_digit(base, 'p');
dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
assert_not_digit(base, 'p');
dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
}
/// parse a float with dotted and optionally exponential notation from a base determined by its
/// prefix
pub fn float_parser() -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
choice((
just("0b").ignore_then(pow_float_parser(2)),
just("0x").ignore_then(pow_float_parser(16)),
just('0').ignore_then(pow_float_parser(8)),
pow_float_parser(10),
)).labelled("float")
choice((
just("0b").ignore_then(pow_float_parser(2)),
just("0x").ignore_then(pow_float_parser(16)),
just('0').ignore_then(pow_float_parser(8)),
pow_float_parser(10),
)).labelled("float")
}

View File

@@ -11,58 +11,58 @@ use super::{Lexeme, FileEntry, lexer, line_parser, LexerEntry};
#[derive(Error, Debug, Clone)]
pub enum ParseError {
#[error("Could not tokenize {0:?}")]
Lex(Vec<Simple<char>>),
#[error("Could not parse {0:#?}")]
Ast(Vec<Simple<Lexeme>>)
#[error("Could not tokenize {0:?}")]
Lex(Vec<Simple<char>>),
#[error("Could not parse {0:#?}")]
Ast(Vec<Simple<Lexeme>>)
}
pub fn parse<'a, Iter, S, Op>(ops: &[Op], stream: S) -> Result<Vec<FileEntry>, ParseError>
where
Op: 'a + AsRef<str> + Clone,
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
S: Into<Stream<'a, char, Range<usize>, Iter>> {
let lexed = lexer(ops).parse(stream).map_err(ParseError::Lex)?;
println!("Lexed:\n{:?}", lexed);
let LexedText(token_batchv) = lexed;
let parsr = line_parser().then_ignore(end());
let (parsed_lines, errors_per_line) = token_batchv.into_iter().filter(|v| {
!v.is_empty()
}).map(|v| {
// Find the first invalid position for Stream::for_iter
let LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone();
// Stream expects tuples, lexer outputs structs
let tuples = v.into_iter().map_into::<(Lexeme, Range<usize>)>();
parsr.parse(Stream::from_iter(end..end+1, tuples))
// ^^^^^^^^^^
// I haven't the foggiest idea why this is needed, parsers are supposed to be lazy so the
// end of input should make little difference
}).map(|res| match res {
Ok(r) => (Some(r), vec![]),
Err(e) => (None, e)
}).unzip::<_, _, Vec<_>, Vec<_>>();
let total_err = errors_per_line.into_iter()
.flat_map(Vec::into_iter)
.collect::<Vec<_>>();
if !total_err.is_empty() { Err(ParseError::Ast(total_err)) }
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
Op: 'a + AsRef<str> + Clone,
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
S: Into<Stream<'a, char, Range<usize>, Iter>> {
let lexed = lexer(ops).parse(stream).map_err(ParseError::Lex)?;
println!("Lexed:\n{:?}", lexed);
let LexedText(token_batchv) = lexed;
let parsr = line_parser().then_ignore(end());
let (parsed_lines, errors_per_line) = token_batchv.into_iter().filter(|v| {
!v.is_empty()
}).map(|v| {
// Find the first invalid position for Stream::for_iter
let LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone();
// Stream expects tuples, lexer outputs structs
let tuples = v.into_iter().map_into::<(Lexeme, Range<usize>)>();
parsr.parse(Stream::from_iter(end..end+1, tuples))
// ^^^^^^^^^^
// I haven't the foggiest idea why this is needed, parsers are supposed to be lazy so the
// end of input should make little difference
}).map(|res| match res {
Ok(r) => (Some(r), vec![]),
Err(e) => (None, e)
}).unzip::<_, _, Vec<_>, Vec<_>>();
let total_err = errors_per_line.into_iter()
.flat_map(Vec::into_iter)
.collect::<Vec<_>>();
if !total_err.is_empty() { Err(ParseError::Ast(total_err)) }
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
}
pub fn reparse<'a, Iter, S, Op>(ops: &[Op], stream: S, pre: &[FileEntry])
-> Result<Vec<FileEntry>, ParseError>
where
Op: 'a + AsRef<str> + Clone,
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
S: Into<Stream<'a, char, Range<usize>, Iter>> {
let result = parse(ops, stream)?;
Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| {
if let FileEntry::Rule(Rule{source, ..}, _) = &mut output {
if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor {
*source = s2.clone()
} else {
panic!("Preparse and reparse received different row types!")
}
}
output
}).collect())
Op: 'a + AsRef<str> + Clone,
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
S: Into<Stream<'a, char, Range<usize>, Iter>> {
let result = parse(ops, stream)?;
Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| {
if let FileEntry::Rule(Rule{source, ..}, _) = &mut output {
if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor {
*source = s2.clone()
} else {
panic!("Preparse and reparse received different row types!")
}
}
output
}).collect())
}

View File

@@ -16,50 +16,50 @@ use ordered_float::NotNan;
/// Anything we might encounter in a file
#[derive(Debug, Clone)]
pub enum FileEntry {
Import(Vec<import::Import>),
Comment(String),
Rule(Rule, bool),
Export(Vec<Vec<String>>)
Import(Vec<import::Import>),
Comment(String),
Rule(Rule, bool),
Export(Vec<Vec<String>>)
}
fn visit_all_names_clause_recur<'a, F>(
clause: &'a Clause,
binds: Stackframe<String>,
cb: &mut F
clause: &'a Clause,
binds: Stackframe<String>,
cb: &mut F
) where F: FnMut(&'a [String]) {
match clause {
Clause::Auto(name, typ, body) => {
for x in typ.iter() {
visit_all_names_expr_recur(x, binds.clone(), cb)
}
let binds_dup = binds.clone();
let new_binds = if let Some(n) = name {
binds_dup.push(n.to_owned())
} else {
binds
};
for x in body.iter() {
visit_all_names_expr_recur(x, new_binds.clone(), cb)
}
},
Clause::Lambda(name, typ, body) => {
for x in typ.iter() {
visit_all_names_expr_recur(x, binds.clone(), cb)
}
for x in body.iter() {
visit_all_names_expr_recur(x, binds.push(name.to_owned()), cb)
}
},
Clause::S(_, body) => for x in body.iter() {
visit_all_names_expr_recur(x, binds.clone(), cb)
},
Clause::Name{ local: Some(name), qualified } => {
if binds.iter().all(|x| x != name) {
cb(qualified)
}
}
_ => (),
match clause {
Clause::Auto(name, typ, body) => {
for x in typ.iter() {
visit_all_names_expr_recur(x, binds.clone(), cb)
}
let binds_dup = binds.clone();
let new_binds = if let Some(n) = name {
binds_dup.push(n.to_owned())
} else {
binds
};
for x in body.iter() {
visit_all_names_expr_recur(x, new_binds.clone(), cb)
}
},
Clause::Lambda(name, typ, body) => {
for x in typ.iter() {
visit_all_names_expr_recur(x, binds.clone(), cb)
}
for x in body.iter() {
visit_all_names_expr_recur(x, binds.push(name.to_owned()), cb)
}
},
Clause::S(_, body) => for x in body.iter() {
visit_all_names_expr_recur(x, binds.clone(), cb)
},
Clause::Name{ local: Some(name), qualified } => {
if binds.iter().all(|x| x != name) {
cb(qualified)
}
}
_ => (),
}
}
/// Recursively iterate through all "names" in an expression. It also finds a lot of things that
@@ -68,88 +68,88 @@ fn visit_all_names_clause_recur<'a, F>(
///
/// TODO: find a way to exclude parameters
fn visit_all_names_expr_recur<'a, F>(
expr: &'a Expr,
binds: Stackframe<String>,
cb: &mut F
expr: &'a Expr,
binds: Stackframe<String>,
cb: &mut F
) where F: FnMut(&'a [String]) {
let Expr(val, typ) = expr;
visit_all_names_clause_recur(val, binds.clone(), cb);
for typ in typ.as_ref() {
visit_all_names_clause_recur(typ, binds.clone(), cb);
}
let Expr(val, typ) = expr;
visit_all_names_clause_recur(val, binds.clone(), cb);
for typ in typ.as_ref() {
visit_all_names_clause_recur(typ, binds.clone(), cb);
}
}
/// Collect all names that occur in an expression
fn find_all_names(expr: &Expr) -> HashSet<&[String]> {
let mut ret = HashSet::new();
visit_all_names_expr_recur(expr, Stackframe::new(String::new()), &mut |n| {
if !n.last().unwrap().starts_with('$') {
ret.insert(n);
}
});
ret
let mut ret = HashSet::new();
visit_all_names_expr_recur(expr, Stackframe::new(String::new()), &mut |n| {
if !n.last().unwrap().starts_with('$') {
ret.insert(n);
}
});
ret
}
fn rule_parser() -> impl Parser<Lexeme, (Vec<Expr>, NotNan<f64>, Vec<Expr>), Error = Simple<Lexeme>> {
xpr_parser().repeated()
.then(enum_parser!(Lexeme::Rule))
.then(xpr_parser().repeated())
// .map(|((lhs, prio), rhs)| )
.map(|((a, b), c)| (a, b, c))
.labelled("Rule")
xpr_parser().repeated()
.then(enum_parser!(Lexeme::Rule))
.then(xpr_parser().repeated())
// .map(|((lhs, prio), rhs)| )
.map(|((a, b), c)| (a, b, c))
.labelled("Rule")
}
pub fn line_parser() -> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> {
choice((
// In case the usercode wants to parse doc
enum_parser!(Lexeme >> FileEntry; Comment),
just(Lexeme::name("import"))
.ignore_then(import_parser().map(FileEntry::Import))
.then_ignore(enum_parser!(Lexeme::Comment)),
just(Lexeme::name("export")).map_err_with_span(|e, s| {
println!("{:?} could not yield an export", s); e
}).ignore_then(
just(Lexeme::NS).ignore_then(
enum_parser!(Lexeme::Name).map(|n| vec![n])
.separated_by(just(Lexeme::name(",")))
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
).map(FileEntry::Export)
).or(rule_parser().map(|(source, prio, target)| {
FileEntry::Rule(Rule {
source: to_mrc_slice(source),
prio,
target: to_mrc_slice(target)
}, true)
})),
// This could match almost anything so it has to go last
rule_parser().map(|(source, prio, target)| FileEntry::Rule(Rule{
source: to_mrc_slice(source),
prio,
target: to_mrc_slice(target)
}, false)),
))
choice((
// In case the usercode wants to parse doc
enum_parser!(Lexeme >> FileEntry; Comment),
just(Lexeme::name("import"))
.ignore_then(import_parser().map(FileEntry::Import))
.then_ignore(enum_parser!(Lexeme::Comment)),
just(Lexeme::name("export")).map_err_with_span(|e, s| {
println!("{:?} could not yield an export", s); e
}).ignore_then(
just(Lexeme::NS).ignore_then(
enum_parser!(Lexeme::Name).map(|n| vec![n])
.separated_by(just(Lexeme::name(",")))
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
).map(FileEntry::Export)
).or(rule_parser().map(|(source, prio, target)| {
FileEntry::Rule(Rule {
source: to_mrc_slice(source),
prio,
target: to_mrc_slice(target)
}, true)
})),
// This could match almost anything so it has to go last
rule_parser().map(|(source, prio, target)| FileEntry::Rule(Rule{
source: to_mrc_slice(source),
prio,
target: to_mrc_slice(target)
}, false)),
))
}
/// Collect all exported names (and a lot of other words) from a file
pub fn exported_names(src: &[FileEntry]) -> HashSet<&[String]> {
src.iter().flat_map(|ent| match ent {
FileEntry::Rule(Rule{source, target, ..}, true) =>
box_chain!(source.iter(), target.iter()),
_ => box_empty()
}).flat_map(find_all_names).chain(
src.iter().filter_map(|ent| {
if let FileEntry::Export(names) = ent {Some(names.iter())} else {None}
}).flatten().map(Vec::as_slice)
).collect()
src.iter().flat_map(|ent| match ent {
FileEntry::Rule(Rule{source, target, ..}, true) =>
box_chain!(source.iter(), target.iter()),
_ => box_empty()
}).flat_map(find_all_names).chain(
src.iter().filter_map(|ent| {
if let FileEntry::Export(names) = ent {Some(names.iter())} else {None}
}).flatten().map(Vec::as_slice)
).collect()
}
/// Summarize all imports from a file in a single list of qualified names
pub fn imports<'a, 'b, I>(
src: I
src: I
) -> impl Iterator<Item = &'b import::Import> + 'a
where I: Iterator<Item = &'b FileEntry> + 'a {
src.filter_map(|ent| match ent {
FileEntry::Import(impv) => Some(impv.iter()),
_ => None
}).flatten()
src.filter_map(|ent| match ent {
FileEntry::Import(impv) => Some(impv.iter()),
_ => None
}).flatten()
}

View File

@@ -2,45 +2,45 @@ use chumsky::{self, prelude::*, Parser};
/// Parses a text character that is not the specified delimiter
fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
// Copied directly from Chumsky's JSON example.
let escape = just('\\').ignore_then(
just('\\')
.or(just('/'))
.or(just('"'))
.or(just('b').to('\x08'))
.or(just('f').to('\x0C'))
.or(just('n').to('\n'))
.or(just('r').to('\r'))
.or(just('t').to('\t'))
.or(just('u').ignore_then(
filter(|c: &char| c.is_ascii_hexdigit())
.repeated()
.exactly(4)
.collect::<String>()
.validate(|digits, span, emit| {
char::from_u32(u32::from_str_radix(&digits, 16).unwrap())
.unwrap_or_else(|| {
emit(Simple::custom(span, "invalid unicode character"));
'\u{FFFD}' // unicode replacement character
})
}),
)),
);
filter(move |&c| c != '\\' && c != delim).or(escape)
// Copied directly from Chumsky's JSON example.
let escape = just('\\').ignore_then(
just('\\')
.or(just('/'))
.or(just('"'))
.or(just('b').to('\x08'))
.or(just('f').to('\x0C'))
.or(just('n').to('\n'))
.or(just('r').to('\r'))
.or(just('t').to('\t'))
.or(just('u').ignore_then(
filter(|c: &char| c.is_ascii_hexdigit())
.repeated()
.exactly(4)
.collect::<String>()
.validate(|digits, span, emit| {
char::from_u32(u32::from_str_radix(&digits, 16).unwrap())
.unwrap_or_else(|| {
emit(Simple::custom(span, "invalid unicode character"));
'\u{FFFD}' // unicode replacement character
})
}),
)),
);
filter(move |&c| c != '\\' && c != delim).or(escape)
}
/// Parse a character literal between single quotes
pub fn char_parser() -> impl Parser<char, char, Error = Simple<char>> {
just('\'').ignore_then(text_parser('\'')).then_ignore(just('\''))
just('\'').ignore_then(text_parser('\'')).then_ignore(just('\''))
}
/// Parse a string between double quotes
pub fn str_parser() -> impl Parser<char, String, Error = Simple<char>> {
just('"')
.ignore_then(
text_parser('"').map(Some)
.or(just("\\\n").map(|_| None)) // Newlines preceded by backslashes are ignored.
.repeated()
).then_ignore(just('"'))
.flatten().collect()
just('"')
.ignore_then(
text_parser('"').map(Some)
.or(just("\\\n").map(|_| None)) // Newlines preceded by backslashes are ignored.
.repeated()
).then_ignore(just('"'))
.flatten().collect()
}