Preparation for sharing

- rustfmt
- clippy
- comments
- README
This commit is contained in:
2023-05-25 19:14:24 +01:00
parent e99ade92ba
commit bc2714aad8
144 changed files with 3734 additions and 3243 deletions

View File

@@ -1,13 +1,15 @@
pub use chumsky::{self, prelude::*, Parser};
pub use chumsky::prelude::*;
pub use chumsky::{self, Parser};
use super::decls::SimpleParser;
/// Parses Lua-style comments
pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
pub fn comment_parser() -> impl SimpleParser<char, String> {
choice((
just("--[").ignore_then(take_until(
just("]--").ignored()
)),
just("--").ignore_then(take_until(
just("\n").rewind().ignored().or(end())
))
)).map(|(vc, ())| vc).collect().labelled("comment")
just("--[").ignore_then(take_until(just("]--").ignored())),
just("--").ignore_then(take_until(just("\n").rewind().ignored().or(end()))),
))
.map(|(vc, ())| vc)
.collect()
.labelled("comment")
}

View File

@@ -3,46 +3,53 @@ use std::rc::Rc;
use crate::interner::Interner;
/// Trait enclosing all context features
///
///
/// Hiding type parameters in associated types allows for simpler
/// parser definitions
pub trait Context: Clone {
type Op: AsRef<str>;
fn ops<'a>(&'a self) -> &'a [Self::Op];
fn ops(&self) -> &[Self::Op];
fn file(&self) -> Rc<Vec<String>>;
fn interner<'a>(&'a self) -> &'a Interner;
fn interner(&self) -> &Interner;
}
/// Struct implementing context
///
///
/// Hiding type parameters in associated types allows for simpler
/// parser definitions
pub struct ParsingContext<'a, Op> {
pub ops: &'a [Op],
pub interner: &'a Interner,
pub file: Rc<Vec<String>>
pub file: Rc<Vec<String>>,
}
impl<'a, Op> ParsingContext<'a, Op> {
pub fn new(ops: &'a [Op], interner: &'a Interner, file: Rc<Vec<String>>)
-> Self { Self { ops, interner, file } }
pub fn new(
ops: &'a [Op],
interner: &'a Interner,
file: Rc<Vec<String>>,
) -> Self {
Self { ops, interner, file }
}
}
impl<'a, Op> Clone for ParsingContext<'a, Op> {
fn clone(&self) -> Self {
Self {
ops: self.ops,
interner: self.interner,
file: self.file.clone()
}
Self { ops: self.ops, interner: self.interner, file: self.file.clone() }
}
}
impl<Op: AsRef<str>> Context for ParsingContext<'_, Op> {
type Op = Op;
fn interner<'a>(&'a self) -> &'a Interner { self.interner }
fn file(&self) -> Rc<Vec<String>> {self.file.clone()}
fn ops<'a>(&'a self) -> &'a [Self::Op] { self.ops }
}
fn interner(&self) -> &Interner {
self.interner
}
fn file(&self) -> Rc<Vec<String>> {
self.file.clone()
}
fn ops(&self) -> &[Self::Op] {
self.ops
}
}

14
src/parse/decls.rs Normal file
View File

@@ -0,0 +1,14 @@
use std::hash::Hash;
use chumsky::prelude::Simple;
use chumsky::recursive::Recursive;
use chumsky::{BoxedParser, Parser};
/// Wrapper around [Parser] with [Simple] error to avoid repeating the input
pub trait SimpleParser<I: Eq + Hash + Clone, O> =
Parser<I, O, Error = Simple<I>>;
/// Boxed version of [SimpleParser]
pub type BoxedSimpleParser<'a, I, O> = BoxedParser<'a, I, O, Simple<I>>;
/// [Recursive] specialization of [SimpleParser] to parameterize calls to
/// [chumsky::recursive::recursive]
pub type SimpleRecursive<'a, I, O> = Recursive<'a, I, O, Simple<I>>;

View File

@@ -1,8 +1,12 @@
/// Produces filter_mapping functions for enum types:
/// ```rs
/// enum_parser!(Foo::Bar | "Some error!") // Accepts Foo::Bar(T) into T
/// enum_parser!(Foo::Bar) // same as above but with the default error "Expected Foo::Bar"
/// enum_parser!(Foo >> Quz; Bar, Baz) // Parses Foo::Bar(T) into Quz::Bar(T) and Foo::Baz(U) into Quz::Baz(U)
/// enum_parser!(Foo::Bar | "Some error!")
/// // Foo::Bar(T) into T
/// enum_parser!(Foo::Bar)
/// // same as above but with the default error "Expected Foo::Bar"
/// enum_parser!(Foo >> Quz; Bar, Baz)
/// // Foo::Bar(T) into Quz::Bar(T)
/// // Foo::Baz(U) into Quz::Baz(U)
/// ```
#[macro_export]
macro_rules! enum_filter {
@@ -43,4 +47,4 @@ macro_rules! enum_filter {
($p:path) => {
enum_filter!($p | {concat!("Expected ", stringify!($p))})
};
}
}

View File

@@ -1,107 +1,110 @@
use std::ops::Range;
use std::rc::Rc;
use chumsky::{self, prelude::*, Parser};
use crate::enum_filter;
use crate::representations::Primitive;
use crate::representations::ast::{Clause, Expr};
use crate::representations::location::Location;
use crate::interner::Token;
use chumsky::prelude::*;
use chumsky::{self, Parser};
use super::context::Context;
use super::lexer::{Lexeme, Entry, filter_map_lex};
use super::decls::SimpleParser;
use super::lexer::{filter_map_lex, Entry, Lexeme};
use crate::enum_filter;
use crate::interner::Sym;
use crate::representations::ast::{Clause, Expr};
use crate::representations::location::Location;
use crate::representations::Primitive;
/// Parses any number of expr wrapped in (), [] or {}
fn sexpr_parser(
expr: impl Parser<Entry, Expr, Error = Simple<Entry>> + Clone
) -> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone {
expr: impl SimpleParser<Entry, Expr> + Clone,
) -> impl SimpleParser<Entry, (Clause, Range<usize>)> + Clone {
let body = expr.repeated();
choice((
Lexeme::LP('(').parser().then(body.clone())
.then(Lexeme::RP('(').parser()),
Lexeme::LP('[').parser().then(body.clone())
.then(Lexeme::RP('[').parser()),
Lexeme::LP('{').parser().then(body.clone())
.then(Lexeme::RP('{').parser()),
)).map(|((lp, body), rp)| {
let Entry{lexeme, range: Range{start, ..}} = lp;
Lexeme::LP('(').parser().then(body.clone()).then(Lexeme::RP('(').parser()),
Lexeme::LP('[').parser().then(body.clone()).then(Lexeme::RP('[').parser()),
Lexeme::LP('{').parser().then(body).then(Lexeme::RP('{').parser()),
))
.map(|((lp, body), rp)| {
let Entry { lexeme, range: Range { start, .. } } = lp;
let end = rp.range.end;
let char = if let Lexeme::LP(c) = lexeme {c}
else {unreachable!("The parser only matches Lexeme::LP")};
let char = if let Lexeme::LP(c) = lexeme {
c
} else {
unreachable!("The parser only matches Lexeme::LP")
};
(Clause::S(char, Rc::new(body)), start..end)
}).labelled("S-expression")
})
.labelled("S-expression")
}
/// Parses `\name.body` or `\name:type.body` where name is any valid name
/// and type and body are both expressions. Comments are allowed
/// and ignored everywhere in between the tokens
fn lambda_parser<'a>(
expr: impl Parser<Entry, Expr, Error = Simple<Entry>> + Clone + 'a,
ctx: impl Context + 'a
) -> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone + 'a {
Lexeme::BS.parser()
.ignore_then(expr.clone())
.then_ignore(Lexeme::Name(ctx.interner().i(".")).parser())
.then(expr.repeated().at_least(1))
.map_with_span(move |(arg, body), span| {
(Clause::Lambda(Rc::new(arg), Rc::new(body)), span)
}).labelled("Lambda")
expr: impl SimpleParser<Entry, Expr> + Clone + 'a,
ctx: impl Context + 'a,
) -> impl SimpleParser<Entry, (Clause, Range<usize>)> + Clone + 'a {
Lexeme::BS
.parser()
.ignore_then(expr.clone())
.then_ignore(Lexeme::Name(ctx.interner().i(".")).parser())
.then(expr.repeated().at_least(1))
.map_with_span(move |(arg, body), span| {
(Clause::Lambda(Rc::new(arg), Rc::new(body)), span)
})
.labelled("Lambda")
}
/// Parses a sequence of names separated by :: <br/>
/// Comments and line breaks are allowed and ignored in between
pub fn ns_name_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, (Token<Vec<Token<String>>>, Range<usize>), Error = Simple<Entry>> + Clone + 'a
{
pub fn ns_name_parser<'a>(
ctx: impl Context + 'a,
) -> impl SimpleParser<Entry, (Sym, Range<usize>)> + Clone + 'a {
filter_map_lex(enum_filter!(Lexeme::Name))
.separated_by(Lexeme::NS.parser()).at_least(1)
.separated_by(Lexeme::NS.parser())
.at_least(1)
.map(move |elements| {
let start = elements.first().expect("can never be empty").1.start;
let end = elements.last().expect("can never be empty").1.end;
let tokens =
/*ctx.prefix().iter().copied().chain*/(
elements.iter().map(|(t, _)| *t)
).collect::<Vec<_>>();
let tokens = (elements.iter().map(|(t, _)| *t)).collect::<Vec<_>>();
(ctx.interner().i(&tokens), start..end)
}).labelled("Namespaced name")
})
.labelled("Namespaced name")
}
pub fn namelike_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone + 'a
{
pub fn namelike_parser<'a>(
ctx: impl Context + 'a,
) -> impl SimpleParser<Entry, (Clause, Range<usize>)> + Clone + 'a {
choice((
filter_map_lex(enum_filter!(Lexeme::PH))
.map(|(ph, range)| (Clause::Placeh(ph), range)),
ns_name_parser(ctx)
.map(|(token, range)| (Clause::Name(token), range)),
ns_name_parser(ctx).map(|(token, range)| (Clause::Name(token), range)),
))
}
pub fn clause_parser<'a>(
expr: impl Parser<Entry, Expr, Error = Simple<Entry>> + Clone + 'a,
ctx: impl Context + 'a
) -> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone + 'a {
expr: impl SimpleParser<Entry, Expr> + Clone + 'a,
ctx: impl Context + 'a,
) -> impl SimpleParser<Entry, (Clause, Range<usize>)> + Clone + 'a {
choice((
filter_map_lex(enum_filter!(Lexeme >> Primitive; Literal))
.map(|(p, s)| (Clause::P(p), s)).labelled("Literal"),
.map(|(p, s)| (Clause::P(p), s))
.labelled("Literal"),
sexpr_parser(expr.clone()),
lambda_parser(expr.clone(), ctx.clone()),
lambda_parser(expr, ctx.clone()),
namelike_parser(ctx),
)).labelled("Clause")
))
.labelled("Clause")
}
/// Parse an expression
pub fn xpr_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, Expr, Error = Simple<Entry>> + 'a
{
pub fn xpr_parser<'a>(
ctx: impl Context + 'a,
) -> impl SimpleParser<Entry, Expr> + 'a {
recursive(move |expr| {
clause_parser(expr, ctx.clone())
.map(move |(value, range)| {
Expr{
value: value.clone(),
location: Location::Range { file: ctx.file(), range }
}
clause_parser(expr, ctx.clone()).map(move |(value, range)| Expr {
value,
location: Location::Range { file: ctx.file(), range },
})
}).labelled("Expression")
}
})
.labelled("Expression")
}

View File

@@ -1,58 +1,59 @@
use std::fmt::Debug;
use chumsky::{prelude::*, Parser};
use chumsky::prelude::*;
use chumsky::Parser;
use thiserror::Error;
use crate::representations::sourcefile::{FileEntry};
use crate::parse::sourcefile::split_lines;
use super::context::Context;
use super::{lexer, line_parser, Entry};
use crate::parse::sourcefile::split_lines;
use crate::representations::sourcefile::FileEntry;
#[derive(Error, Debug, Clone)]
pub enum ParseError {
#[error("Could not tokenize {0:?}")]
Lex(Vec<Simple<char>>),
#[error("Could not parse {:?} on line {}", .0.first().unwrap().1.span(), .0.first().unwrap().0)]
Ast(Vec<(usize, Simple<Entry>)>)
#[error(
"Could not parse {:?} on line {}",
.0.first().unwrap().1.span(),
.0.first().unwrap().0
)]
Ast(Vec<(usize, Simple<Entry>)>),
}
/// All the data required for parsing
/// Parse a string of code into a collection of module elements;
/// imports, exports, comments, declarations, etc.
///
///
/// Notice that because the lexer splits operators based on the provided
/// list, the output will only be correct if operator list already
/// contains all operators defined or imported by this module.
pub fn parse<'a>(data: &str, ctx: impl Context)
-> Result<Vec<FileEntry>, ParseError>
{
pub fn parse(
data: &str,
ctx: impl Context,
) -> Result<Vec<FileEntry>, ParseError> {
// TODO: wrap `i`, `ops` and `prefix` in a parsing context
let lexie = lexer(ctx.clone());
let token_batchv = lexie.parse(data).map_err(ParseError::Lex)?;
// println!("Lexed:\n{}", LexedText(token_batchv.clone()).bundle(ctx.interner()));
// println!("Lexed:\n{:?}", token_batchv.clone());
let parsr = line_parser(ctx).then_ignore(end());
let (parsed_lines, errors_per_line) = split_lines(&token_batchv)
.enumerate()
.map(|(i, entv)| (i,
entv.iter()
.filter(|e| !e.is_filler())
.cloned()
.collect::<Vec<_>>()
))
.filter(|(_, l)| l.len() > 0)
.map(|(i, entv)| {
(i, entv.iter().filter(|e| !e.is_filler()).cloned().collect::<Vec<_>>())
})
.filter(|(_, l)| !l.is_empty())
.map(|(i, l)| (i, parsr.parse(l)))
.map(|(i, res)| match res {
Ok(r) => (Some(r), (i, vec![])),
Err(e) => (None, (i, e))
}).unzip::<_, _, Vec<_>, Vec<_>>();
let total_err = errors_per_line.into_iter()
Err(e) => (None, (i, e)),
})
.unzip::<_, _, Vec<_>, Vec<_>>();
let total_err = errors_per_line
.into_iter()
.flat_map(|(i, v)| v.into_iter().map(move |e| (i, e)))
.collect::<Vec<_>>();
if !total_err.is_empty() { Err(ParseError::Ast(total_err)) }
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
if !total_err.is_empty() {
Err(ParseError::Ast(total_err))
} else {
Ok(parsed_lines.into_iter().map(Option::unwrap).collect())
}
}

View File

@@ -1,16 +1,20 @@
use chumsky::{Parser, prelude::*};
use chumsky::prelude::*;
use chumsky::Parser;
use itertools::Itertools;
use super::context::Context;
use super::decls::{SimpleParser, SimpleRecursive};
use super::lexer::{filter_map_lex, Lexeme};
use super::Entry;
use crate::interner::Tok;
use crate::representations::sourcefile::Import;
use crate::utils::iter::{box_once, box_flatten, into_boxed_iter, BoxedIterIter};
use crate::interner::Token;
use crate::utils::iter::{
box_flatten, box_once, into_boxed_iter, BoxedIterIter,
};
use crate::{box_chain, enum_filter};
use super::Entry;
use super::context::Context;
use super::lexer::{Lexeme, filter_map_lex};
/// initialize a BoxedIter<BoxedIter<String>> with a single element.
fn init_table(name: Token<String>) -> BoxedIterIter<'static, Token<String>> {
fn init_table(name: Tok<String>) -> BoxedIterIter<'static, Tok<String>> {
// I'm not at all confident that this is a good approach.
box_once(box_once(name))
}
@@ -21,56 +25,74 @@ fn init_table(name: Token<String>) -> BoxedIterIter<'static, Token<String>> {
/// preferably contain crossplatform filename-legal characters but the
/// symbols are explicitly allowed to go wild.
/// There's a blacklist in [name]
pub fn import_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, Vec<Import>, Error = Simple<Entry>> + 'a
{
pub fn import_parser<'a>(
ctx: impl Context + 'a,
) -> impl SimpleParser<Entry, Vec<Import>> + 'a {
// TODO: this algorithm isn't cache friendly and copies a lot
recursive({
let ctx = ctx.clone();
move |expr:Recursive<Entry, BoxedIterIter<Token<String>>, Simple<Entry>>| {
filter_map_lex(enum_filter!(Lexeme::Name)).map(|(t, _)| t)
.separated_by(Lexeme::NS.parser())
.then(
Lexeme::NS.parser()
.ignore_then(
choice((
expr.clone()
.separated_by(Lexeme::Name(ctx.interner().i(",")).parser())
.delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser())
.map(|v| box_flatten(v.into_iter()))
.labelled("import group"),
// Each expr returns a list of imports, flatten into common list
Lexeme::Name(ctx.interner().i("*")).parser()
.map(move |_| init_table(ctx.interner().i("*")))
.labelled("wildcard import"), // Just a *, wrapped
filter_map_lex(enum_filter!(Lexeme::Name))
.map(|(t, _)| init_table(t))
.labelled("import terminal") // Just a name, wrapped
))
).or_not()
)
.map(|(name, opt_post): (Vec<Token<String>>, Option<BoxedIterIter<Token<String>>>)|
-> BoxedIterIter<Token<String>> {
if let Some(post) = opt_post {
Box::new(post.map(move |el| {
box_chain!(name.clone().into_iter(), el)
}))
} else {
box_once(into_boxed_iter(name))
}
})
move |expr: SimpleRecursive<Entry, BoxedIterIter<Tok<String>>>| {
filter_map_lex(enum_filter!(Lexeme::Name))
.map(|(t, _)| t)
.separated_by(Lexeme::NS.parser())
.then(
Lexeme::NS
.parser()
.ignore_then(choice((
expr
.clone()
.separated_by(Lexeme::Name(ctx.interner().i(",")).parser())
.delimited_by(
Lexeme::LP('(').parser(),
Lexeme::RP('(').parser(),
)
.map(|v| box_flatten(v.into_iter()))
.labelled("import group"),
// Each expr returns a list of imports, flatten into common list
Lexeme::Name(ctx.interner().i("*"))
.parser()
.map(move |_| init_table(ctx.interner().i("*")))
.labelled("wildcard import"), // Just a *, wrapped
filter_map_lex(enum_filter!(Lexeme::Name))
.map(|(t, _)| init_table(t))
.labelled("import terminal"), // Just a name, wrapped
)))
.or_not(),
)
.map(
|(name, opt_post): (
Vec<Tok<String>>,
Option<BoxedIterIter<Tok<String>>>,
)|
-> BoxedIterIter<Tok<String>> {
if let Some(post) = opt_post {
Box::new(
post.map(move |el| box_chain!(name.clone().into_iter(), el)),
)
} else {
box_once(into_boxed_iter(name))
}
},
)
}
}).map(move |paths| {
paths.filter_map(|namespaces| {
let mut path = namespaces.collect_vec();
let name = path.pop()?;
Some(Import {
path: ctx.interner().i(&path),
name: {
if name == ctx.interner().i("*") { None }
else { Some(name) }
}
})
.map(move |paths| {
paths
.filter_map(|namespaces| {
let mut path = namespaces.collect_vec();
let name = path.pop()?;
Some(Import {
path: ctx.interner().i(&path),
name: {
if name == ctx.interner().i("*") {
None
} else {
Some(name)
}
},
})
})
}).collect()
}).labelled("import")
.collect()
})
.labelled("import")
}

View File

@@ -1,31 +1,36 @@
use std::fmt;
use std::ops::Range;
use chumsky::prelude::*;
use chumsky::text::keyword;
use chumsky::{Parser, Span};
use ordered_float::NotNan;
use chumsky::{Parser, prelude::*, text::keyword, Span};
use crate::ast::{Placeholder, PHClass};
use crate::representations::Literal;
use crate::interner::{Token, InternedDisplay, Interner};
use super::context::Context;
use super::placeholder;
use super::{number, string, name, comment};
use super::decls::SimpleParser;
use super::{comment, name, number, placeholder, string};
use crate::ast::{PHClass, Placeholder};
use crate::interner::{InternedDisplay, Interner, Tok};
use crate::representations::Literal;
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Entry{
pub struct Entry {
pub lexeme: Lexeme,
pub range: Range<usize>
pub range: Range<usize>,
}
impl Entry {
pub fn is_filler(&self) -> bool {
matches!(self.lexeme, Lexeme::Comment(_))
|| matches!(self.lexeme, Lexeme::BR)
|| matches!(self.lexeme, Lexeme::BR)
}
}
impl InternedDisplay for Entry {
fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result {
fn fmt_i(
&self,
f: &mut std::fmt::Formatter<'_>,
i: &Interner,
) -> std::fmt::Result {
self.lexeme.fmt_i(f, i)
}
}
@@ -40,21 +45,24 @@ impl Span for Entry {
type Context = Lexeme;
type Offset = usize;
fn context(&self) -> Self::Context {self.lexeme.clone()}
fn start(&self) -> Self::Offset {self.range.start()}
fn end(&self) -> Self::Offset {self.range.end()}
fn context(&self) -> Self::Context {
self.lexeme.clone()
}
fn start(&self) -> Self::Offset {
self.range.start()
}
fn end(&self) -> Self::Offset {
self.range.end()
}
fn new(context: Self::Context, range: Range<Self::Offset>) -> Self {
Self{
lexeme: context,
range
}
Self { lexeme: context, range }
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum Lexeme {
Literal(Literal),
Name(Token<String>),
Name(Tok<String>),
Rule(NotNan<f64>),
/// Walrus operator (formerly shorthand macro)
Const,
@@ -74,11 +82,15 @@ pub enum Lexeme {
Export,
Import,
Namespace,
PH(Placeholder)
PH(Placeholder),
}
impl InternedDisplay for Lexeme {
fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result {
fn fmt_i(
&self,
f: &mut std::fmt::Formatter<'_>,
i: &Interner,
) -> std::fmt::Result {
match self {
Self::Literal(l) => write!(f, "{:?}", l),
Self::Name(token) => write!(f, "{}", i.r(*token)),
@@ -90,9 +102,9 @@ impl InternedDisplay for Lexeme {
'(' => write!(f, ")"),
'[' => write!(f, "]"),
'{' => write!(f, "}}"),
_ => f.debug_tuple("RP").field(l).finish()
_ => f.debug_tuple("RP").field(l).finish(),
},
Self::BR => write!(f, "\n"),
Self::BR => writeln!(f),
Self::BS => write!(f, "\\"),
Self::At => write!(f, "@"),
Self::Type => write!(f, ":"),
@@ -103,27 +115,30 @@ impl InternedDisplay for Lexeme {
Self::PH(Placeholder { name, class }) => match *class {
PHClass::Scalar => write!(f, "${}", i.r(*name)),
PHClass::Vec { nonzero, prio } => {
if nonzero {write!(f, "...")}
else {write!(f, "..")}?;
if nonzero {
write!(f, "...")
} else {
write!(f, "..")
}?;
write!(f, "${}", i.r(*name))?;
if prio != 0 {write!(f, ":{}", prio)?;};
if prio != 0 {
write!(f, ":{}", prio)?;
};
Ok(())
}
}
},
},
}
}
}
impl Lexeme {
pub fn rule(prio: impl Into<f64>) -> Self {
Lexeme::Rule(
NotNan::new(prio.into())
.expect("Rule priority cannot be NaN")
)
Lexeme::Rule(NotNan::new(prio.into()).expect("Rule priority cannot be NaN"))
}
pub fn parser<E: chumsky::Error<Entry>>(self)
-> impl Parser<Entry, Entry, Error = E> + Clone {
pub fn parser<E: chumsky::Error<Entry>>(
self,
) -> impl Parser<Entry, Entry, Error = E> + Clone {
filter(move |ent: &Entry| ent.lexeme == self)
}
}
@@ -141,16 +156,14 @@ impl InternedDisplay for LexedText {
}
}
fn paren_parser(lp: char, rp: char)
-> impl Parser<char, Lexeme, Error=Simple<char>>
{
just(lp).to(Lexeme::LP(lp))
.or(just(rp).to(Lexeme::RP(lp)))
fn paren_parser(lp: char, rp: char) -> impl SimpleParser<char, Lexeme> {
just(lp).to(Lexeme::LP(lp)).or(just(rp).to(Lexeme::RP(lp)))
}
pub fn literal_parser() -> impl Parser<char, Literal, Error = Simple<char>> {
pub fn literal_parser() -> impl SimpleParser<char, Literal> {
choice((
number::int_parser().map(Literal::Uint), // all ints are valid floats so it takes precedence
// all ints are valid floats so it takes precedence
number::int_parser().map(Literal::Uint),
number::float_parser().map(Literal::Num),
string::char_parser().map(Literal::Char),
string::str_parser().map(Literal::Str),
@@ -159,10 +172,12 @@ pub fn literal_parser() -> impl Parser<char, Literal, Error = Simple<char>> {
pub static BASE_OPS: &[&str] = &[",", ".", "..", "..."];
pub fn lexer<'a>(ctx: impl Context + 'a)
-> impl Parser<char, Vec<Entry>, Error=Simple<char>> + 'a
{
let all_ops = ctx.ops().iter()
pub fn lexer<'a>(
ctx: impl Context + 'a,
) -> impl SimpleParser<char, Vec<Entry>> + 'a {
let all_ops = ctx
.ops()
.iter()
.map(|op| op.as_ref())
.chain(BASE_OPS.iter().cloned())
.map(str::to_string)
@@ -175,7 +190,10 @@ pub fn lexer<'a>(ctx: impl Context + 'a)
paren_parser('[', ']'),
paren_parser('{', '}'),
just(":=").to(Lexeme::Const),
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
just("=")
.ignore_then(number::float_parser())
.then_ignore(just("=>"))
.map(Lexeme::rule),
comment::comment_parser().map(Lexeme::Comment),
just("::").to(Lexeme::NS),
just('\\').to(Lexeme::BS),
@@ -184,20 +202,18 @@ pub fn lexer<'a>(ctx: impl Context + 'a)
just('\n').to(Lexeme::BR),
placeholder::placeholder_parser(ctx.clone()).map(Lexeme::PH),
literal_parser().map(Lexeme::Literal),
name::name_parser(&all_ops).map(move |n| {
Lexeme::Name(ctx.interner().i(&n))
})
name::name_parser(&all_ops)
.map(move |n| Lexeme::Name(ctx.interner().i(&n))),
))
.map_with_span(|lexeme, range| Entry{ lexeme, range })
.padded_by(one_of(" \t").repeated())
.repeated()
.then_ignore(end())
.map_with_span(|lexeme, range| Entry { lexeme, range })
.padded_by(one_of(" \t").repeated())
.repeated()
.then_ignore(end())
}
pub fn filter_map_lex<'a, O, M: ToString>(
f: impl Fn(Lexeme) -> Result<O, M> + Clone + 'a
) -> impl Parser<Entry, (O, Range<usize>), Error = Simple<Entry>> + Clone + 'a {
f: impl Fn(Lexeme) -> Result<O, M> + Clone + 'a,
) -> impl SimpleParser<Entry, (O, Range<usize>)> + Clone + 'a {
filter_map(move |s: Range<usize>, e: Entry| {
let out = f(e.lexeme).map_err(|msg| Simple::custom(s.clone(), msg))?;
Ok((out, s))

View File

@@ -1,19 +1,20 @@
mod string;
mod number;
mod name;
mod lexer;
mod comment;
mod expression;
mod sourcefile;
mod import;
mod parse;
mod enum_filter;
mod placeholder;
mod context;
mod decls;
mod enum_filter;
mod expression;
mod facade;
mod import;
mod lexer;
mod name;
mod number;
mod placeholder;
mod sourcefile;
mod string;
pub use sourcefile::line_parser;
pub use lexer::{lexer, Lexeme, Entry};
pub use context::ParsingContext;
pub use facade::{parse, ParseError};
pub use lexer::{lexer, Entry, Lexeme};
pub use name::is_op;
pub use parse::{parse, ParseError};
pub use number::{float_parser, int_parser};
pub use context::ParsingContext;
pub use sourcefile::line_parser;

View File

@@ -1,22 +1,28 @@
use chumsky::{self, prelude::*, Parser};
use chumsky::prelude::*;
use chumsky::{self, Parser};
use super::decls::{BoxedSimpleParser, SimpleParser};
/// Matches any one of the passed operators, preferring longer ones
fn op_parser<'a>(ops: &[impl AsRef<str> + Clone])
-> BoxedParser<'a, char, String, Simple<char>>
{
let mut sorted_ops: Vec<String> = ops.iter()
.map(|t| t.as_ref().to_string()).collect();
fn op_parser<'a>(
ops: &[impl AsRef<str> + Clone],
) -> BoxedSimpleParser<'a, char, String> {
let mut sorted_ops: Vec<String> =
ops.iter().map(|t| t.as_ref().to_string()).collect();
sorted_ops.sort_by_key(|op| -(op.len() as i64));
sorted_ops.into_iter()
sorted_ops
.into_iter()
.map(|op| just(op).boxed())
.reduce(|a, b| a.or(b).boxed())
.unwrap_or_else(|| {
empty().map(|()| panic!("Empty isn't meant to match")).boxed()
}).labelled("operator").boxed()
})
.labelled("operator")
.boxed()
}
/// Characters that cannot be parsed as part of an operator
///
///
/// The initial operator list overrides this.
static NOT_NAME_CHAR: &[char] = &[
':', // used for namespacing and type annotations
@@ -28,35 +34,34 @@ static NOT_NAME_CHAR: &[char] = &[
];
/// Matches anything that's allowed as an operator
///
///
/// FIXME: `@name` without a dot should be parsed correctly for overrides.
/// Could be an operator but then parametrics should take precedence,
/// which might break stuff. investigate.
///
///
/// TODO: `'` could work as an operator whenever it isn't closed.
/// It's common im maths so it's worth a try
///
///
/// TODO: `.` could possibly be parsed as an operator in some contexts.
/// This operator is very common in maths so it's worth a try.
/// Investigate.
pub fn modname_parser<'a>()
-> impl Parser<char, String, Error = Simple<char>> + 'a
{
pub fn modname_parser<'a>() -> impl SimpleParser<char, String> + 'a {
filter(move |c| !NOT_NAME_CHAR.contains(c) && !c.is_whitespace())
.repeated().at_least(1)
.repeated()
.at_least(1)
.collect()
.labelled("modname")
}
/// Parse an operator or name. Failing both, parse everything up to
/// the next whitespace or blacklisted character as a new operator.
pub fn name_parser<'a>(ops: &[impl AsRef<str> + Clone])
-> impl Parser<char, String, Error = Simple<char>> + 'a
{
pub fn name_parser<'a>(
ops: &[impl AsRef<str> + Clone],
) -> impl SimpleParser<char, String> + 'a {
choice((
op_parser(ops), // First try to parse a known operator
text::ident().labelled("plain text"), // Failing that, parse plain text
modname_parser() // Finally parse everything until tne next forbidden char
modname_parser(), // Finally parse everything until tne next forbidden char
))
.labelled("name")
}
@@ -65,7 +70,7 @@ pub fn name_parser<'a>(ops: &[impl AsRef<str> + Clone])
/// and text, just not at the start.
pub fn is_op(s: impl AsRef<str>) -> bool {
return match s.as_ref().chars().next() {
Some(x) => !x.is_alphanumeric(),
None => false
}
Some(x) => !x.is_alphanumeric(),
None => false,
};
}

View File

@@ -1,6 +1,9 @@
use chumsky::{self, prelude::*, Parser};
use chumsky::prelude::*;
use chumsky::{self, Parser};
use ordered_float::NotNan;
use super::decls::SimpleParser;
fn assert_not_digit(base: u32, c: char) {
if base > (10 + (c as u32 - 'a' as u32)) {
panic!("The character '{}' is a digit in base ({})", c, base)
@@ -8,9 +11,9 @@ fn assert_not_digit(base: u32, c: char) {
}
/// Parse an arbitrarily grouped sequence of digits starting with an underscore.
///
///
/// TODO: this should use separated_by and parse the leading group too
fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simple<char>> {
fn separated_digits_parser(base: u32) -> impl SimpleParser<char, String> {
just('_')
.ignore_then(text::digits(base))
.repeated()
@@ -18,57 +21,62 @@ fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simpl
}
/// parse a grouped uint
///
///
/// Not to be confused with [int_parser] which does a lot more
fn uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
text::int(base)
.then(separated_digits_parser(base))
.map(move |(s1, s2): (String, String)| {
fn uint_parser(base: u32) -> impl SimpleParser<char, u64> {
text::int(base).then(separated_digits_parser(base)).map(
move |(s1, s2): (String, String)| {
u64::from_str_radix(&(s1 + &s2), base).unwrap()
})
},
)
}
/// parse exponent notation, or return 0 as the default exponent.
/// The exponent is always in decimal.
fn pow_parser() -> impl Parser<char, i32, Error = Simple<char>> {
/// The exponent is always in decimal.
fn pow_parser() -> impl SimpleParser<char, i32> {
choice((
just('p')
.ignore_then(text::int(10))
.map(|s: String| s.parse().unwrap()),
just('p').ignore_then(text::int(10)).map(|s: String| s.parse().unwrap()),
just("p-")
.ignore_then(text::int(10))
.map(|s: String| -s.parse::<i32>().unwrap()),
)).or_else(|_| Ok(0))
))
.or_else(|_| Ok(0))
}
/// returns a mapper that converts a mantissa and an exponent into an uint
///
///
/// TODO it panics if it finds a negative exponent
fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
fn nat2u(base: u64) -> impl Fn((u64, i32)) -> u64 {
move |(val, exp)| {
if exp == 0 {val}
else {val * base.checked_pow(exp.try_into().unwrap()).unwrap()}
if exp == 0 {
val
} else {
val * base.checked_pow(exp.try_into().unwrap()).unwrap()
}
}
}
/// returns a mapper that converts a mantissa and an exponent into a float
fn nat2f(base: u64) -> impl Fn((NotNan<f64>, i32),) -> NotNan<f64> {
fn nat2f(base: u64) -> impl Fn((NotNan<f64>, i32)) -> NotNan<f64> {
move |(val, exp)| {
if exp == 0 {val}
else {val * (base as f64).powf(exp.try_into().unwrap())}
if exp == 0 {
val
} else {
val * (base as f64).powf(exp.try_into().unwrap())
}
}
}
/// parse an uint from exponential notation (panics if 'p' is a digit in base)
fn pow_uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
fn pow_uint_parser(base: u32) -> impl SimpleParser<char, u64> {
assert_not_digit(base, 'p');
uint_parser(base).then(pow_parser()).map(nat2u(base.into()))
}
/// parse an uint from a base determined by its prefix or lack thereof
///
///
/// Not to be confused with [uint_parser] which is a component of it.
pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
pub fn int_parser() -> impl SimpleParser<char, u64> {
choice((
just("0b").ignore_then(pow_uint_parser(2)),
just("0x").ignore_then(pow_uint_parser(16)),
@@ -78,35 +86,40 @@ pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
}
/// parse a float from dot notation
fn dotted_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
fn dotted_parser(base: u32) -> impl SimpleParser<char, NotNan<f64>> {
uint_parser(base)
.then(
just('.').ignore_then(
text::digits(base).then(separated_digits_parser(base))
).map(move |(frac1, frac2)| {
let frac = frac1 + &frac2;
let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
let dexp = base.pow(frac.len().try_into().unwrap());
frac_num / dexp as f64
}).or_not().map(|o| o.unwrap_or_default())
).try_map(|(wh, f), s| {
NotNan::new(wh as f64 + f).map_err(|_| Simple::custom(s, "Float literal evaluates to NaN"))
})
.then(
just('.')
.ignore_then(text::digits(base).then(separated_digits_parser(base)))
.map(move |(frac1, frac2)| {
let frac = frac1 + &frac2;
let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
let dexp = base.pow(frac.len().try_into().unwrap());
frac_num / dexp as f64
})
.or_not()
.map(|o| o.unwrap_or_default()),
)
.try_map(|(wh, f), s| {
NotNan::new(wh as f64 + f)
.map_err(|_| Simple::custom(s, "Float literal evaluates to NaN"))
})
}
/// parse a float from dotted and optionally also exponential notation
fn pow_float_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
fn pow_float_parser(base: u32) -> impl SimpleParser<char, NotNan<f64>> {
assert_not_digit(base, 'p');
dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
}
/// parse a float with dotted and optionally exponential notation from a base determined by its
/// prefix
pub fn float_parser() -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
/// parse a float with dotted and optionally exponential notation from a base
/// determined by its prefix
pub fn float_parser() -> impl SimpleParser<char, NotNan<f64>> {
choice((
just("0b").ignore_then(pow_float_parser(2)),
just("0x").ignore_then(pow_float_parser(16)),
just('0').ignore_then(pow_float_parser(8)),
pow_float_parser(10),
)).labelled("float")
))
.labelled("float")
}

View File

@@ -1,16 +1,18 @@
use chumsky::{Parser, prelude::*};
use chumsky::prelude::*;
use chumsky::Parser;
use crate::ast::{Placeholder, PHClass};
use super::context::Context;
use super::decls::SimpleParser;
use super::number::int_parser;
use crate::ast::{PHClass, Placeholder};
use super::{number::int_parser, context::Context};
pub fn placeholder_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<char, Placeholder, Error = Simple<char>> + 'a
{
pub fn placeholder_parser(
ctx: impl Context,
) -> impl SimpleParser<char, Placeholder> {
choice((
just("...").to(Some(true)),
just("..").to(Some(false)),
empty().to(None)
empty().to(None),
))
.then(just("$").ignore_then(text::ident()))
.then(just(":").ignore_then(int_parser()).or_not())
@@ -19,12 +21,10 @@ pub fn placeholder_parser<'a>(ctx: impl Context + 'a)
if let Some(nonzero) = vec_nonzero {
let prio = vec_prio.unwrap_or_default();
Ok(Placeholder { name, class: PHClass::Vec { nonzero, prio } })
} else if vec_prio.is_some() {
Err(Simple::custom(span, "Scalar placeholders have no priority"))
} else {
if vec_prio.is_some() {
Err(Simple::custom(span, "Scalar placeholders have no priority"))
} else {
Ok(Placeholder { name, class: PHClass::Scalar })
}
Ok(Placeholder { name, class: PHClass::Scalar })
}
})
}

View File

@@ -1,55 +1,67 @@
use std::iter;
use std::rc::Rc;
use crate::representations::location::Location;
use crate::representations::sourcefile::{FileEntry, Member};
use crate::enum_filter;
use crate::ast::{Rule, Constant, Expr, Clause};
use crate::interner::Token;
use super::Entry;
use super::context::Context;
use super::expression::xpr_parser;
use super::import::import_parser;
use super::lexer::{Lexeme, filter_map_lex};
use chumsky::{Parser, prelude::*};
use chumsky::prelude::*;
use chumsky::Parser;
use itertools::Itertools;
fn rule_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, Rule, Error = Simple<Entry>> + 'a
{
xpr_parser(ctx.clone()).repeated().at_least(1)
use super::context::Context;
use super::decls::{SimpleParser, SimpleRecursive};
use super::expression::xpr_parser;
use super::import::import_parser;
use super::lexer::{filter_map_lex, Lexeme};
use super::Entry;
use crate::ast::{Clause, Constant, Expr, Rule};
use crate::enum_filter;
use crate::representations::location::Location;
use crate::representations::sourcefile::{FileEntry, Member, Namespace};
fn rule_parser<'a>(
ctx: impl Context + 'a,
) -> impl SimpleParser<Entry, Rule> + 'a {
xpr_parser(ctx.clone())
.repeated()
.at_least(1)
.then(filter_map_lex(enum_filter!(Lexeme::Rule)))
.then(xpr_parser(ctx).repeated().at_least(1))
.map(|((s, (prio, _)), t)| Rule{
.map(|((s, (prio, _)), t)| Rule {
source: Rc::new(s),
prio,
target: Rc::new(t)
}).labelled("Rule")
target: Rc::new(t),
})
.labelled("Rule")
}
fn const_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, Constant, Error = Simple<Entry>> + 'a
{
fn const_parser<'a>(
ctx: impl Context + 'a,
) -> impl SimpleParser<Entry, Constant> + 'a {
filter_map_lex(enum_filter!(Lexeme::Name))
.then_ignore(Lexeme::Const.parser())
.then(xpr_parser(ctx.clone()).repeated().at_least(1))
.map(move |((name, _), value)| Constant{
.map(move |((name, _), value)| Constant {
name,
value: if let Ok(ex) = value.iter().exactly_one() { ex.clone() }
else {
let start = value.first().expect("value cannot be empty")
.location.range().expect("all locations in parsed source are known")
value: if let Ok(ex) = value.iter().exactly_one() {
ex.clone()
} else {
let start = value
.first()
.expect("value cannot be empty")
.location
.range()
.expect("all locations in parsed source are known")
.start;
let end = value.last().expect("asserted right above")
.location.range().expect("all locations in parsed source are known")
let end = value
.last()
.expect("asserted right above")
.location
.range()
.expect("all locations in parsed source are known")
.end;
Expr{
Expr {
location: Location::Range { file: ctx.file(), range: start..end },
value: Clause::S('(', Rc::new(value))
value: Clause::S('(', Rc::new(value)),
}
}
},
})
}
@@ -60,56 +72,61 @@ pub fn collect_errors<T, E: chumsky::Error<T>>(e: Vec<E>) -> E {
}
fn namespace_parser<'a>(
line: impl Parser<Entry, FileEntry, Error = Simple<Entry>> + 'a,
) -> impl Parser<Entry, (Token<String>, Vec<FileEntry>), Error = Simple<Entry>> + 'a {
Lexeme::Namespace.parser()
.ignore_then(filter_map_lex(enum_filter!(Lexeme::Name)))
.then(
any().repeated().delimited_by(
Lexeme::LP('(').parser(),
Lexeme::RP('(').parser()
).try_map(move |body, _| {
split_lines(&body)
.map(|l| line.parse(l))
.collect::<Result<Vec<_>,_>>()
.map_err(collect_errors)
})
).map(move |((name, _), body)| {
(name, body)
})
line: impl SimpleParser<Entry, FileEntry> + 'a,
) -> impl SimpleParser<Entry, Namespace> + 'a {
Lexeme::Namespace
.parser()
.ignore_then(filter_map_lex(enum_filter!(Lexeme::Name)))
.then(
any()
.repeated()
.delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser())
.try_map(move |body, _| {
split_lines(&body)
.map(|l| line.parse(l))
.collect::<Result<Vec<_>, _>>()
.map_err(collect_errors)
}),
)
.map(move |((name, _), body)| Namespace { name, body })
}
fn member_parser<'a>(
line: impl Parser<Entry, FileEntry, Error = Simple<Entry>> + 'a,
ctx: impl Context + 'a
) -> impl Parser<Entry, Member, Error = Simple<Entry>> + 'a {
line: impl SimpleParser<Entry, FileEntry> + 'a,
ctx: impl Context + 'a,
) -> impl SimpleParser<Entry, Member> + 'a {
choice((
namespace_parser(line)
.map(|(name, body)| Member::Namespace(name, body)),
namespace_parser(line).map(Member::Namespace),
rule_parser(ctx.clone()).map(Member::Rule),
const_parser(ctx).map(Member::Constant),
))
}
pub fn line_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, FileEntry, Error = Simple<Entry>> + 'a
{
recursive(|line: Recursive<Entry, FileEntry, Simple<Entry>>| {
pub fn line_parser<'a>(
ctx: impl Context + 'a,
) -> impl SimpleParser<Entry, FileEntry> + 'a {
recursive(|line: SimpleRecursive<Entry, FileEntry>| {
choice((
// In case the usercode wants to parse doc
filter_map_lex(enum_filter!(Lexeme >> FileEntry; Comment)).map(|(ent, _)| ent),
filter_map_lex(enum_filter!(Lexeme >> FileEntry; Comment))
.map(|(ent, _)| ent),
// plain old imports
Lexeme::Import.parser()
Lexeme::Import
.parser()
.ignore_then(import_parser(ctx.clone()).map(FileEntry::Import)),
Lexeme::Export.parser().ignore_then(choice((
// token collection
Lexeme::NS.parser().ignore_then(
filter_map_lex(enum_filter!(Lexeme::Name)).map(|(e, _)| e)
.separated_by(Lexeme::Name(ctx.interner().i(",")).parser())
.delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser())
).map(FileEntry::Export),
Lexeme::NS
.parser()
.ignore_then(
filter_map_lex(enum_filter!(Lexeme::Name))
.map(|(e, _)| e)
.separated_by(Lexeme::Name(ctx.interner().i(",")).parser())
.delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser()),
)
.map(FileEntry::Export),
// public declaration
member_parser(line.clone(), ctx.clone()).map(FileEntry::Exported)
member_parser(line.clone(), ctx.clone()).map(FileEntry::Exported),
))),
// This could match almost anything so it has to go last
member_parser(line, ctx).map(FileEntry::Internal),
@@ -123,13 +140,13 @@ pub fn split_lines(data: &[Entry]) -> impl Iterator<Item = &[Entry]> {
let mut finished = false;
iter::from_fn(move || {
let mut paren_count = 0;
while let Some((i, Entry{ lexeme, .. })) = source.next() {
for (i, Entry { lexeme, .. }) in source.by_ref() {
match lexeme {
Lexeme::LP(_) => paren_count += 1,
Lexeme::RP(_) => paren_count -= 1,
Lexeme::BR if paren_count == 0 => {
let begin = last_slice;
last_slice = i+1;
last_slice = i + 1;
return Some(&data[begin..i]);
},
_ => (),
@@ -138,8 +155,9 @@ pub fn split_lines(data: &[Entry]) -> impl Iterator<Item = &[Entry]> {
// Include last line even without trailing newline
if !finished {
finished = true;
return Some(&data[last_slice..])
return Some(&data[last_slice..]);
}
None
}).filter(|s| s.len() > 0)
})
.filter(|s| !s.is_empty())
}

View File

@@ -1,7 +1,10 @@
use chumsky::{self, prelude::*, Parser};
use chumsky::prelude::*;
use chumsky::{self, Parser};
use super::decls::SimpleParser;
/// Parses a text character that is not the specified delimiter
fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
fn text_parser(delim: char) -> impl SimpleParser<char, char> {
// Copied directly from Chumsky's JSON example.
let escape = just('\\').ignore_then(
just('\\')
@@ -12,35 +15,39 @@ fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
.or(just('n').to('\n'))
.or(just('r').to('\r'))
.or(just('t').to('\t'))
.or(just('u').ignore_then(
filter(|c: &char| c.is_ascii_hexdigit())
.repeated()
.exactly(4)
.collect::<String>()
.validate(|digits, span, emit| {
char::from_u32(u32::from_str_radix(&digits, 16).unwrap())
.unwrap_or_else(|| {
emit(Simple::custom(span, "invalid unicode character"));
'\u{FFFD}' // unicode replacement character
})
}),
)),
.or(
just('u').ignore_then(
filter(|c: &char| c.is_ascii_hexdigit())
.repeated()
.exactly(4)
.collect::<String>()
.validate(|digits, span, emit| {
char::from_u32(u32::from_str_radix(&digits, 16).unwrap())
.unwrap_or_else(|| {
emit(Simple::custom(span, "invalid unicode character"));
'\u{FFFD}' // unicode replacement character
})
}),
),
),
);
filter(move |&c| c != '\\' && c != delim).or(escape)
}
/// Parse a character literal between single quotes
pub fn char_parser() -> impl Parser<char, char, Error = Simple<char>> {
pub fn char_parser() -> impl SimpleParser<char, char> {
just('\'').ignore_then(text_parser('\'')).then_ignore(just('\''))
}
/// Parse a string between double quotes
pub fn str_parser() -> impl Parser<char, String, Error = Simple<char>> {
pub fn str_parser() -> impl SimpleParser<char, String> {
just('"')
.ignore_then(
text_parser('"').map(Some)
.ignore_then(
text_parser('"').map(Some)
.or(just("\\\n").map(|_| None)) // Newlines preceded by backslashes are ignored.
.repeated()
).then_ignore(just('"'))
.flatten().collect()
.repeated(),
)
.then_ignore(just('"'))
.flatten()
.collect()
}