bug fixes and performance improvements

This commit is contained in:
2023-05-07 22:35:38 +01:00
parent f3ce910f66
commit a604e40bad
167 changed files with 5965 additions and 4229 deletions

48
src/parse/context.rs Normal file
View File

@@ -0,0 +1,48 @@
use std::rc::Rc;
use crate::interner::Interner;
/// Trait enclosing all context features
///
/// Hiding type parameters in associated types allows for simpler
/// parser definitions
pub trait Context: Clone {
type Op: AsRef<str>;
fn ops<'a>(&'a self) -> &'a [Self::Op];
fn file(&self) -> Rc<Vec<String>>;
fn interner<'a>(&'a self) -> &'a Interner;
}
/// Struct implementing context
///
/// Hiding type parameters in associated types allows for simpler
/// parser definitions
pub struct ParsingContext<'a, Op> {
pub ops: &'a [Op],
pub interner: &'a Interner,
pub file: Rc<Vec<String>>
}
impl<'a, Op> ParsingContext<'a, Op> {
pub fn new(ops: &'a [Op], interner: &'a Interner, file: Rc<Vec<String>>)
-> Self { Self { ops, interner, file } }
}
impl<'a, Op> Clone for ParsingContext<'a, Op> {
fn clone(&self) -> Self {
Self {
ops: self.ops,
interner: self.interner,
file: self.file.clone()
}
}
}
impl<Op: AsRef<str>> Context for ParsingContext<'_, Op> {
type Op = Op;
fn interner<'a>(&'a self) -> &'a Interner { self.interner }
fn file(&self) -> Rc<Vec<String>> {self.file.clone()}
fn ops<'a>(&'a self) -> &'a [Self::Op] { self.ops }
}

46
src/parse/enum_filter.rs Normal file
View File

@@ -0,0 +1,46 @@
/// Produces filter_mapping functions for enum types:
/// ```rs
/// enum_parser!(Foo::Bar | "Some error!") // Accepts Foo::Bar(T) into T
/// enum_parser!(Foo::Bar) // same as above but with the default error "Expected Foo::Bar"
/// enum_parser!(Foo >> Quz; Bar, Baz) // Parses Foo::Bar(T) into Quz::Bar(T) and Foo::Baz(U) into Quz::Baz(U)
/// ```
#[macro_export]
macro_rules! enum_filter {
($p:path | $m:tt) => {
{
|l| {
if let $p(x) = l { Ok(x) }
else { Err($m) }
}
}
};
($p:path >> $q:path; $i:ident | $m:tt) => {
{
use $p as srcpath;
use $q as tgtpath;
let base = enum_filter!(srcpath::$i | $m);
move |l| base(l).map(tgtpath::$i)
}
};
($p:path >> $q:path; $i:ident) => {
enum_filter!($p >> $q; $i | {concat!("Expected ", stringify!($i))})
};
($p:path >> $q:path; $($i:ident),+ | $m:tt) => {
{
use $p as srcpath;
use $q as tgtpath;
|l| match l {
$( srcpath::$i(x) => Ok(tgtpath::$i(x)), )+
_ => Err($m)
}
}
};
($p:path >> $q:path; $($i:ident),+) => {
enum_filter!($p >> $q; $($i),+ | {
concat!("Expected one of ", $(stringify!($i), " "),+)
})
};
($p:path) => {
enum_filter!($p | {concat!("Expected ", stringify!($p))})
};
}

View File

@@ -1,32 +0,0 @@
/// Produces parsers for tokenized sequences of enum types:
/// ```rs
/// enum_parser!(Foo::Bar | "Some error!") // Parses Foo::Bar(T) into T
/// enum_parser!(Foo::Bar) // same as above but with the default error "Expected Foo::Bar"
/// enum_parser!(Foo >> Quz; Bar, Baz) // Parses Foo::Bar(T) into Quz::Bar(T) and Foo::Baz(U) into Quz::Baz(U)
/// ```
#[macro_export]
macro_rules! enum_parser {
($p:path | $m:tt) => {
{
::chumsky::prelude::filter_map(|s, l| {
if let $p(x) = l { Ok(x) }
else { Err(::chumsky::prelude::Simple::custom(s, $m))}
})
}
};
($p:path >> $q:path; $i:ident) => {
{
use $p as srcpath;
use $q as tgtpath;
enum_parser!(srcpath::$i | (concat!("Expected ", stringify!($i)))).map(tgtpath::$i)
}
};
($p:path >> $q:path; $($i:ident),+) => {
{
::chumsky::prelude::choice((
$( enum_parser!($p >> $q; $i) ),+
))
}
};
($p:path) => { enum_parser!($p | (concat!("Expected ", stringify!($p)))) };
}

View File

@@ -1,155 +1,107 @@
use std::ops::Range;
use std::rc::Rc;
use chumsky::{self, prelude::*, Parser};
use lasso::Spur;
use crate::enum_parser;
use crate::representations::Primitive;
use crate::representations::{Literal, ast::{Clause, Expr}};
use super::lexer::Lexeme;
use crate::enum_filter;
use crate::representations::Primitive;
use crate::representations::ast::{Clause, Expr};
use crate::representations::location::Location;
use crate::interner::Token;
use super::context::Context;
use super::lexer::{Lexeme, Entry, filter_map_lex};
/// Parses any number of expr wrapped in (), [] or {}
fn sexpr_parser<P>(
expr: P
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
Lexeme::paren_parser(expr.repeated())
.map(|(del, b)| Clause::S(del, Rc::new(b)))
fn sexpr_parser(
expr: impl Parser<Entry, Expr, Error = Simple<Entry>> + Clone
) -> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone {
let body = expr.repeated();
choice((
Lexeme::LP('(').parser().then(body.clone())
.then(Lexeme::RP('(').parser()),
Lexeme::LP('[').parser().then(body.clone())
.then(Lexeme::RP('[').parser()),
Lexeme::LP('{').parser().then(body.clone())
.then(Lexeme::RP('{').parser()),
)).map(|((lp, body), rp)| {
let Entry{lexeme, range: Range{start, ..}} = lp;
let end = rp.range.end;
let char = if let Lexeme::LP(c) = lexeme {c}
else {unreachable!("The parser only matches Lexeme::LP")};
(Clause::S(char, Rc::new(body)), start..end)
}).labelled("S-expression")
}
/// Parses `\name.body` or `\name:type.body` where name is any valid name
/// and type and body are both expressions. Comments are allowed
/// and ignored everywhere in between the tokens
fn lambda_parser<'a, P, F>(
expr: P, intern: &'a F
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone + 'a
where
P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone + 'a,
F: Fn(&str) -> Spur + 'a {
just(Lexeme::BS)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(namelike_parser(intern))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(
just(Lexeme::Type)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(expr.clone().repeated())
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.or_not().map(Option::unwrap_or_default)
)
.then_ignore(just(Lexeme::name(".")))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
fn lambda_parser<'a>(
expr: impl Parser<Entry, Expr, Error = Simple<Entry>> + Clone + 'a,
ctx: impl Context + 'a
) -> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone + 'a {
Lexeme::BS.parser()
.ignore_then(expr.clone())
.then_ignore(Lexeme::Name(ctx.interner().i(".")).parser())
.then(expr.repeated().at_least(1))
.map(|((name, typ), body): ((Clause, Vec<Expr>), Vec<Expr>)| {
Clause::Lambda(Rc::new(name), Rc::new(typ), Rc::new(body))
})
}
/// see [lambda_parser] but `@` instead of `\` and the name is optional
fn auto_parser<'a, P, F>(
expr: P, intern: &'a F
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone + 'a
where
P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone + 'a,
F: Fn(&str) -> Spur + 'a {
just(Lexeme::At)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(namelike_parser(intern).or_not())
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(
just(Lexeme::Type)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(expr.clone().repeated())
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.or_not().map(Option::unwrap_or_default)
)
.then_ignore(just(Lexeme::name(".")))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(expr.repeated().at_least(1))
.try_map(|((name, typ), body): ((Option<Clause>, Vec<Expr>), Vec<Expr>), s| {
if name.is_none() && typ.is_empty() {
Err(Simple::custom(s, "Auto without name or type has no effect"))
} else {
Ok(Clause::Auto(name.map(Rc::new), Rc::new(typ), Rc::new(body)))
}
})
.map_with_span(move |(arg, body), span| {
(Clause::Lambda(Rc::new(arg), Rc::new(body)), span)
}).labelled("Lambda")
}
/// Parses a sequence of names separated by :: <br/>
/// Comments are allowed and ignored in between
pub fn ns_name_parser<'a, F>(intern: &'a F)
-> impl Parser<Lexeme, Vec<Spur>, Error = Simple<Lexeme>> + Clone + 'a
where F: Fn(&str) -> Spur + 'a {
enum_parser!(Lexeme::Name)
.map(|s| intern(&s))
.separated_by(
enum_parser!(Lexeme::Comment).repeated()
.then(just(Lexeme::NS))
.then(enum_parser!(Lexeme::Comment).repeated())
).at_least(1)
/// Comments and line breaks are allowed and ignored in between
pub fn ns_name_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, (Token<Vec<Token<String>>>, Range<usize>), Error = Simple<Entry>> + Clone + 'a
{
filter_map_lex(enum_filter!(Lexeme::Name))
.separated_by(Lexeme::NS.parser()).at_least(1)
.map(move |elements| {
let start = elements.first().expect("can never be empty").1.start;
let end = elements.last().expect("can never be empty").1.end;
let tokens =
/*ctx.prefix().iter().copied().chain*/(
elements.iter().map(|(t, _)| *t)
).collect::<Vec<_>>();
(ctx.interner().i(&tokens), start..end)
}).labelled("Namespaced name")
}
/// Parse any legal argument name starting with a `$`
fn placeholder_parser() -> impl Parser<Lexeme, String, Error = Simple<Lexeme>> + Clone {
enum_parser!(Lexeme::Name).try_map(|name, span| {
name.strip_prefix('$').map(&str::to_string)
.ok_or_else(|| Simple::custom(span, "Not a placeholder"))
})
}
pub fn namelike_parser<'a, F>(intern: &'a F)
-> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone + 'a
where F: Fn(&str) -> Spur + 'a {
pub fn namelike_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone + 'a
{
choice((
just(Lexeme::name("...")).to(true)
.or(just(Lexeme::name("..")).to(false))
.then(placeholder_parser())
.then(
just(Lexeme::Type)
.ignore_then(enum_parser!(Lexeme::Uint))
.or_not().map(Option::unwrap_or_default)
)
.map(|((nonzero, key), prio)| Clause::Placeh{key, vec: Some((
prio.try_into().unwrap(),
nonzero
))}),
ns_name_parser(intern)
.map(|qualified| Clause::Name(Rc::new(qualified))),
filter_map_lex(enum_filter!(Lexeme::PH))
.map(|(ph, range)| (Clause::Placeh(ph), range)),
ns_name_parser(ctx)
.map(|(token, range)| (Clause::Name(token), range)),
))
}
pub fn clause_parser<'a, P, F>(
expr: P, intern: &'a F
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone + 'a
where
P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone + 'a,
F: Fn(&str) -> Spur + 'a {
enum_parser!(Lexeme::Comment).repeated()
.ignore_then(choice((
enum_parser!(Lexeme >> Literal; Uint, Num, Char, Str)
.map(Primitive::Literal).map(Clause::P),
placeholder_parser().map(|key| Clause::Placeh{key, vec: None}),
namelike_parser(intern),
pub fn clause_parser<'a>(
expr: impl Parser<Entry, Expr, Error = Simple<Entry>> + Clone + 'a,
ctx: impl Context + 'a
) -> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone + 'a {
choice((
filter_map_lex(enum_filter!(Lexeme >> Primitive; Literal))
.map(|(p, s)| (Clause::P(p), s)).labelled("Literal"),
sexpr_parser(expr.clone()),
lambda_parser(expr.clone(), intern),
auto_parser(expr.clone(), intern),
just(Lexeme::At).ignore_then(expr.clone()).map(|arg| {
Clause::Explicit(Rc::new(arg))
})
))).then_ignore(enum_parser!(Lexeme::Comment).repeated())
lambda_parser(expr.clone(), ctx.clone()),
namelike_parser(ctx),
)).labelled("Clause")
}
/// Parse an expression
pub fn xpr_parser<'a, F>(intern: &'a F)
-> impl Parser<Lexeme, Expr, Error = Simple<Lexeme>> + 'a
where F: Fn(&str) -> Spur + 'a {
recursive(|expr| {
let clause = clause_parser(expr, intern);
clause.clone().then(
just(Lexeme::Type)
.ignore_then(clause.clone())
.repeated()
)
.map(|(val, typ)| Expr(val, Rc::new(typ)))
pub fn xpr_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, Expr, Error = Simple<Entry>> + 'a
{
recursive(move |expr| {
clause_parser(expr, ctx.clone())
.map(move |(value, range)| {
Expr{
value: value.clone(),
location: Location::Range { file: ctx.file(), range }
}
})
}).labelled("Expression")
}
}

View File

@@ -1,16 +1,16 @@
use std::rc::Rc;
use chumsky::{Parser, prelude::*};
use itertools::Itertools;
use lasso::Spur;
use crate::representations::sourcefile::Import;
use crate::utils::iter::{box_once, box_flatten, into_boxed_iter, BoxedIterIter};
use crate::{enum_parser, box_chain};
use crate::interner::Token;
use crate::{box_chain, enum_filter};
use super::lexer::Lexeme;
use super::Entry;
use super::context::Context;
use super::lexer::{Lexeme, filter_map_lex};
/// initialize a BoxedIter<BoxedIter<String>> with a single element.
fn init_table(name: Spur) -> BoxedIterIter<'static, Spur> {
fn init_table(name: Token<String>) -> BoxedIterIter<'static, Token<String>> {
// I'm not at all confident that this is a good approach.
box_once(box_once(name))
}
@@ -21,51 +21,54 @@ fn init_table(name: Spur) -> BoxedIterIter<'static, Spur> {
/// preferably contain crossplatform filename-legal characters but the
/// symbols are explicitly allowed to go wild.
/// There's a blacklist in [name]
pub fn import_parser<'a, F>(intern: &'a F)
-> impl Parser<Lexeme, Vec<Import>, Error = Simple<Lexeme>> + 'a
where F: Fn(&str) -> Spur + 'a {
let globstar = intern("*");
pub fn import_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, Vec<Import>, Error = Simple<Entry>> + 'a
{
// TODO: this algorithm isn't cache friendly and copies a lot
recursive(move |expr:Recursive<Lexeme, BoxedIterIter<Spur>, Simple<Lexeme>>| {
enum_parser!(Lexeme::Name).map(|s| intern(s.as_str()))
.separated_by(just(Lexeme::NS))
.then(
just(Lexeme::NS)
.ignore_then(
choice((
expr.clone()
.separated_by(just(Lexeme::name(",")))
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
.map(|v| box_flatten(v.into_iter()))
.labelled("import group"),
// Each expr returns a list of imports, flatten into common list
just(Lexeme::name("*")).map(move |_| init_table(globstar))
.labelled("wildcard import"), // Just a *, wrapped
enum_parser!(Lexeme::Name)
.map(|s| init_table(intern(s.as_str())))
.labelled("import terminal") // Just a name, wrapped
))
).or_not()
)
.map(|(name, opt_post): (Vec<Spur>, Option<BoxedIterIter<Spur>>)|
-> BoxedIterIter<Spur> {
if let Some(post) = opt_post {
Box::new(post.map(move |el| {
box_chain!(name.clone().into_iter(), el)
}))
} else {
box_once(into_boxed_iter(name))
}
})
recursive({
let ctx = ctx.clone();
move |expr:Recursive<Entry, BoxedIterIter<Token<String>>, Simple<Entry>>| {
filter_map_lex(enum_filter!(Lexeme::Name)).map(|(t, _)| t)
.separated_by(Lexeme::NS.parser())
.then(
Lexeme::NS.parser()
.ignore_then(
choice((
expr.clone()
.separated_by(Lexeme::Name(ctx.interner().i(",")).parser())
.delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser())
.map(|v| box_flatten(v.into_iter()))
.labelled("import group"),
// Each expr returns a list of imports, flatten into common list
Lexeme::Name(ctx.interner().i("*")).parser()
.map(move |_| init_table(ctx.interner().i("*")))
.labelled("wildcard import"), // Just a *, wrapped
filter_map_lex(enum_filter!(Lexeme::Name))
.map(|(t, _)| init_table(t))
.labelled("import terminal") // Just a name, wrapped
))
).or_not()
)
.map(|(name, opt_post): (Vec<Token<String>>, Option<BoxedIterIter<Token<String>>>)|
-> BoxedIterIter<Token<String>> {
if let Some(post) = opt_post {
Box::new(post.map(move |el| {
box_chain!(name.clone().into_iter(), el)
}))
} else {
box_once(into_boxed_iter(name))
}
})
}
}).map(move |paths| {
paths.filter_map(|namespaces| {
let mut path = namespaces.collect_vec();
let name = path.pop()?;
Some(Import {
path: Rc::new(path),
path: ctx.interner().i(&path),
name: {
if name == globstar { None }
else { Some(name.to_owned()) }
if name == ctx.interner().i("*") { None }
else { Some(name) }
}
})
}).collect()

View File

@@ -1,53 +1,88 @@
use std::{ops::Range, iter, fmt};
use ordered_float::NotNan;
use chumsky::{Parser, prelude::*};
use std::fmt::Debug;
use crate::{utils::{BoxedIter, iter::{box_once, box_flatten}}, box_chain};
use std::fmt;
use std::ops::Range;
use ordered_float::NotNan;
use chumsky::{Parser, prelude::*, text::keyword, Span};
use crate::ast::{Placeholder, PHClass};
use crate::representations::Literal;
use crate::interner::{Token, InternedDisplay, Interner};
use super::context::Context;
use super::placeholder;
use super::{number, string, name, comment};
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct Entry(pub Lexeme, pub Range<usize>);
impl Debug for Entry {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{:?}", self.0)
// f.debug_tuple("Entry").field(&self.0).field(&self.1).finish()
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Entry{
pub lexeme: Lexeme,
pub range: Range<usize>
}
impl Entry {
pub fn is_filler(&self) -> bool {
matches!(self.lexeme, Lexeme::Comment(_))
|| matches!(self.lexeme, Lexeme::BR)
}
}
impl InternedDisplay for Entry {
fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result {
self.lexeme.fmt_i(f, i)
}
}
impl From<Entry> for (Lexeme, Range<usize>) {
fn from(ent: Entry) -> Self {
(ent.0, ent.1)
(ent.lexeme, ent.range)
}
}
#[derive(Clone, PartialEq, Eq, Hash)]
impl Span for Entry {
type Context = Lexeme;
type Offset = usize;
fn context(&self) -> Self::Context {self.lexeme.clone()}
fn start(&self) -> Self::Offset {self.range.start()}
fn end(&self) -> Self::Offset {self.range.end()}
fn new(context: Self::Context, range: Range<Self::Offset>) -> Self {
Self{
lexeme: context,
range
}
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum Lexeme {
Num(NotNan<f64>),
Uint(u64),
Char(char),
Str(String),
Name(String),
Literal(Literal),
Name(Token<String>),
Rule(NotNan<f64>),
NS, // namespace separator
/// Walrus operator (formerly shorthand macro)
Const,
/// Line break
BR,
/// Namespace separator
NS,
/// Left paren
LP(char),
/// Right paren
RP(char),
BS, // Backslash
/// Backslash
BS,
At,
Type, // type operator
Comment(String),
Export,
Import,
Namespace,
PH(Placeholder)
}
impl Debug for Lexeme {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
impl InternedDisplay for Lexeme {
fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result {
match self {
Self::Num(n) => write!(f, "{}", n),
Self::Uint(i) => write!(f, "{}", i),
Self::Char(c) => write!(f, "{:?}", c),
Self::Str(s) => write!(f, "{:?}", s),
Self::Name(name) => write!(f, "{}", name),
Self::Literal(l) => write!(f, "{:?}", l),
Self::Name(token) => write!(f, "{}", i.r(*token)),
Self::Const => write!(f, ":="),
Self::Rule(prio) => write!(f, "={}=>", prio),
Self::NS => write!(f, "::"),
Self::LP(l) => write!(f, "{}", l),
@@ -57,102 +92,114 @@ impl Debug for Lexeme {
'{' => write!(f, "}}"),
_ => f.debug_tuple("RP").field(l).finish()
},
Self::BR => write!(f, "\n"),
Self::BS => write!(f, "\\"),
Self::At => write!(f, "@"),
Self::Type => write!(f, ":"),
Self::Comment(text) => write!(f, "--[{}]--", text),
Self::Export => write!(f, "export"),
Self::Import => write!(f, "import"),
Self::Namespace => write!(f, "namespace"),
Self::PH(Placeholder { name, class }) => match *class {
PHClass::Scalar => write!(f, "${}", i.r(*name)),
PHClass::Vec { nonzero, prio } => {
if nonzero {write!(f, "...")}
else {write!(f, "..")}?;
write!(f, "${}", i.r(*name))?;
if prio != 0 {write!(f, ":{}", prio)?;};
Ok(())
}
}
}
}
}
impl Lexeme {
pub fn name<T: ToString>(n: T) -> Self {
Lexeme::Name(n.to_string())
pub fn rule(prio: impl Into<f64>) -> Self {
Lexeme::Rule(
NotNan::new(prio.into())
.expect("Rule priority cannot be NaN")
)
}
pub fn rule<T>(prio: T) -> Self where T: Into<f64> {
Lexeme::Rule(NotNan::new(prio.into()).expect("Rule priority cannot be NaN"))
}
pub fn paren_parser<T, P>(
expr: P
) -> impl Parser<Lexeme, (char, T), Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, T, Error = Simple<Lexeme>> + Clone {
choice((
expr.clone().delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
.map(|t| ('(', t)),
expr.clone().delimited_by(just(Lexeme::LP('[')), just(Lexeme::RP('[')))
.map(|t| ('[', t)),
expr.delimited_by(just(Lexeme::LP('{')), just(Lexeme::RP('{')))
.map(|t| ('{', t)),
))
pub fn parser<E: chumsky::Error<Entry>>(self)
-> impl Parser<Entry, Entry, Error = E> + Clone {
filter(move |ent: &Entry| ent.lexeme == self)
}
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub struct LexedText(pub Vec<Vec<Entry>>);
pub struct LexedText(pub Vec<Entry>);
impl Debug for LexedText {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for row in &self.0 {
for tok in row {
tok.fmt(f)?;
f.write_str(" ")?
}
f.write_str("\n")?
impl InternedDisplay for LexedText {
fn fmt_i(&self, f: &mut fmt::Formatter<'_>, i: &Interner) -> fmt::Result {
for tok in self.0.iter() {
tok.fmt_i(f, i)?;
f.write_str(" ")?
}
Ok(())
}
}
type LexSubres<'a> = BoxedIter<'a, Entry>;
fn paren_parser<'a>(
expr: Recursive<'a, char, LexSubres<'a>, Simple<char>>,
lp: char, rp: char
) -> impl Parser<char, LexSubres<'a>, Error=Simple<char>> + 'a {
expr.padded().repeated()
.map(|x| box_flatten(x.into_iter()))
.delimited_by(just(lp), just(rp)).map_with_span(move |b, s| {
box_chain!(
iter::once(Entry(Lexeme::LP(lp), s.start..s.start+1)),
b,
iter::once(Entry(Lexeme::RP(lp), s.end-1..s.end))
)
})
fn paren_parser(lp: char, rp: char)
-> impl Parser<char, Lexeme, Error=Simple<char>>
{
just(lp).to(Lexeme::LP(lp))
.or(just(rp).to(Lexeme::RP(lp)))
}
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, Vec<Entry>, Error=Simple<char>> + 'a
where T: AsRef<str> + Clone {
let all_ops = ops.iter().map(|o| o.as_ref().to_string())
.chain([",", ".", "..", "..."].into_iter().map(str::to_string))
pub fn literal_parser() -> impl Parser<char, Literal, Error = Simple<char>> {
choice((
number::int_parser().map(Literal::Uint), // all ints are valid floats so it takes precedence
number::float_parser().map(Literal::Num),
string::char_parser().map(Literal::Char),
string::str_parser().map(Literal::Str),
))
}
pub static BASE_OPS: &[&str] = &[",", ".", "..", "..."];
pub fn lexer<'a>(ctx: impl Context + 'a)
-> impl Parser<char, Vec<Entry>, Error=Simple<char>> + 'a
{
let all_ops = ctx.ops().iter()
.map(|op| op.as_ref())
.chain(BASE_OPS.iter().cloned())
.map(str::to_string)
.collect::<Vec<_>>();
just("export").padded().to(Lexeme::Export)
.or(just("import").padded().to(Lexeme::Import))
.or_not().then(recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
choice((
paren_parser(recurse.clone(), '(', ')'),
paren_parser(recurse.clone(), '[', ']'),
paren_parser(recurse.clone(), '{', '}'),
choice((
just(":=").padded().to(Lexeme::rule(0f64)),
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
comment::comment_parser().map(Lexeme::Comment),
just("::").padded().to(Lexeme::NS),
just('\\').padded().to(Lexeme::BS),
just('@').padded().to(Lexeme::At),
just(':').to(Lexeme::Type),
number::int_parser().map(Lexeme::Uint), // all ints are valid floats so it takes precedence
number::float_parser().map(Lexeme::Num),
string::char_parser().map(Lexeme::Char),
string::str_parser().map(Lexeme::Str),
name::name_parser(&all_ops).map(Lexeme::Name), // includes namespacing
)).map_with_span(|lx, span| box_once(Entry(lx, span)) as LexSubres)
))
}).separated_by(one_of("\t ").repeated())
.flatten().collect())
.map(|(prefix, rest): (Option<Lexeme>, Vec<Entry>)| {
prefix.into_iter().map(|l| Entry(l, 0..6)).chain(rest.into_iter()).collect()
})
.then_ignore(text::whitespace()).then_ignore(end())
choice((
keyword("export").to(Lexeme::Export),
keyword("module").to(Lexeme::Namespace),
keyword("import").to(Lexeme::Import),
paren_parser('(', ')'),
paren_parser('[', ']'),
paren_parser('{', '}'),
just(":=").to(Lexeme::Const),
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
comment::comment_parser().map(Lexeme::Comment),
just("::").to(Lexeme::NS),
just('\\').to(Lexeme::BS),
just('@').to(Lexeme::At),
just(':').to(Lexeme::Type),
just('\n').to(Lexeme::BR),
placeholder::placeholder_parser(ctx.clone()).map(Lexeme::PH),
literal_parser().map(Lexeme::Literal),
name::name_parser(&all_ops).map(move |n| {
Lexeme::Name(ctx.interner().i(&n))
})
))
.map_with_span(|lexeme, range| Entry{ lexeme, range })
.padded_by(one_of(" \t").repeated())
.repeated()
.then_ignore(end())
}
pub fn filter_map_lex<'a, O, M: ToString>(
f: impl Fn(Lexeme) -> Result<O, M> + Clone + 'a
) -> impl Parser<Entry, (O, Range<usize>), Error = Simple<Entry>> + Clone + 'a {
filter_map(move |s: Range<usize>, e: Entry| {
let out = f(e.lexeme).map_err(|msg| Simple::custom(s.clone(), msg))?;
Ok((out, s))
})
}

View File

@@ -6,11 +6,14 @@ mod comment;
mod expression;
mod sourcefile;
mod import;
mod enum_parser;
mod parse;
mod enum_filter;
mod placeholder;
mod context;
pub use sourcefile::line_parser;
pub use lexer::{lexer, Lexeme, Entry as LexerEntry};
pub use lexer::{lexer, Lexeme, Entry};
pub use name::is_op;
pub use parse::{parse, reparse, ParseError};
pub use number::{float_parser, int_parser};
pub use parse::{parse, ParseError};
pub use number::{float_parser, int_parser};
pub use context::ParsingContext;

View File

@@ -1,58 +1,69 @@
use chumsky::{self, prelude::*, Parser};
/// Matches any one of the passed operators, longest-first
fn op_parser<'a, T: AsRef<str> + Clone>(ops: &[T]) -> BoxedParser<'a, char, String, Simple<char>> {
let mut sorted_ops: Vec<String> = ops.iter().map(|t| t.as_ref().to_string()).collect();
/// Matches any one of the passed operators, preferring longer ones
fn op_parser<'a>(ops: &[impl AsRef<str> + Clone])
-> BoxedParser<'a, char, String, Simple<char>>
{
let mut sorted_ops: Vec<String> = ops.iter()
.map(|t| t.as_ref().to_string()).collect();
sorted_ops.sort_by_key(|op| -(op.len() as i64));
sorted_ops.into_iter()
.map(|op| just(op).boxed())
.reduce(|a, b| a.or(b).boxed())
.unwrap_or_else(|| empty().map(|()| panic!("Empty isn't meant to match")).boxed())
.labelled("operator").boxed()
.unwrap_or_else(|| {
empty().map(|()| panic!("Empty isn't meant to match")).boxed()
}).labelled("operator").boxed()
}
/// Characters that cannot be parsed as part of an operator
///
/// The initial operator list overrides this.
static NOT_NAME_CHAR: &[char] = &[
':', // used for namespacing and type annotations
'\\', '@', // parametric expression starters
'"', '\'', // parsed as primitives and therefore would never match
'(', ')', '[', ']', '{', '}', // must be strictly balanced
'.', // Argument-body separator in parametrics
',', // used in imports
];
/// Matches anything that's allowed as an operator
///
/// Blacklist rationale:
/// - `:` is used for namespacing and type annotations, both are distinguished from operators
/// - `\` and `@` are parametric expression starters
/// - `"` and `'` are read as primitives and would never match.
/// - `(` and `)` are strictly balanced and this must remain the case for automation and streaming.
/// - `.` is the discriminator for parametrics.
/// - ',' is always a standalone single operator, so it can never be part of a name
/// FIXME: `@name` without a dot should be parsed correctly for overrides.
/// Could be an operator but then parametrics should take precedence,
/// which might break stuff. investigate.
///
/// FIXME: `@name` without a dot should be parsed correctly for overrides. Could be an operator but
/// then parametrics should take precedence, which might break stuff. investigate.
/// TODO: `'` could work as an operator whenever it isn't closed.
/// It's common im maths so it's worth a try
///
/// TODO: `'` could work as an operator whenever it isn't closed. It's common im maths so it's
/// worth a try
///
/// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very
/// common in maths so it's worth a try. Investigate.
pub fn modname_parser<'a>() -> impl Parser<char, String, Error = Simple<char>> + 'a {
let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', '[', ']', '{', '}', ',', '.'];
filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
/// TODO: `.` could possibly be parsed as an operator in some contexts.
/// This operator is very common in maths so it's worth a try.
/// Investigate.
pub fn modname_parser<'a>()
-> impl Parser<char, String, Error = Simple<char>> + 'a
{
filter(move |c| !NOT_NAME_CHAR.contains(c) && !c.is_whitespace())
.repeated().at_least(1)
.collect()
.labelled("modname")
}
/// Parse an operator or name. Failing both, parse everything up to the next whitespace or
/// blacklisted character as a new operator.
pub fn name_parser<'a, T: AsRef<str> + Clone>(
ops: &[T]
) -> impl Parser<char, String, Error = Simple<char>> + 'a {
/// Parse an operator or name. Failing both, parse everything up to
/// the next whitespace or blacklisted character as a new operator.
pub fn name_parser<'a>(ops: &[impl AsRef<str> + Clone])
-> impl Parser<char, String, Error = Simple<char>> + 'a
{
choice((
op_parser(ops), // First try to parse a known operator
text::ident().labelled("plain text"), // Failing that, parse plain text
modname_parser() // Finally parse everything until tne next terminal as a new operator
modname_parser() // Finally parse everything until tne next forbidden char
))
.labelled("name")
}
/// Decide if a string can be an operator. Operators can include digits and text, just not at the
/// start.
pub fn is_op<T: AsRef<str>>(s: T) -> bool {
/// Decide if a string can be an operator. Operators can include digits
/// and text, just not at the start.
pub fn is_op(s: impl AsRef<str>) -> bool {
return match s.as_ref().chars().next() {
Some(x) => !x.is_alphanumeric(),
None => false

View File

@@ -67,7 +67,7 @@ fn pow_uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
/// parse an uint from a base determined by its prefix or lack thereof
///
/// Not to be convused with [uint_parser] which is a component of it.
/// Not to be confused with [uint_parser] which is a component of it.
pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
choice((
just("0b").ignore_then(pow_uint_parser(2)),

View File

@@ -1,75 +1,58 @@
use std::{ops::Range, fmt::Debug};
use std::fmt::Debug;
use chumsky::{prelude::{Simple, end}, Stream, Parser};
use itertools::Itertools;
use lasso::Spur;
use chumsky::{prelude::*, Parser};
use thiserror::Error;
use crate::{ast::Rule, parse::{lexer::LexedText, sourcefile::split_lines}, representations::sourcefile::FileEntry};
use crate::representations::sourcefile::{FileEntry};
use crate::parse::sourcefile::split_lines;
use super::{Lexeme, lexer, line_parser, LexerEntry};
use super::context::Context;
use super::{lexer, line_parser, Entry};
#[derive(Error, Debug, Clone)]
pub enum ParseError {
#[error("Could not tokenize {0:?}")]
Lex(Vec<Simple<char>>),
#[error("Could not parse {0:#?}")]
Ast(Vec<Simple<Lexeme>>)
#[error("Could not parse {:?} on line {}", .0.first().unwrap().1.span(), .0.first().unwrap().0)]
Ast(Vec<(usize, Simple<Entry>)>)
}
pub fn parse<'a, Op, F>(
ops: &[Op], data: &str, intern: &F
) -> Result<Vec<FileEntry>, ParseError>
where
Op: 'a + AsRef<str> + Clone,
F: Fn(&str) -> Spur
/// All the data required for parsing
/// Parse a string of code into a collection of module elements;
/// imports, exports, comments, declarations, etc.
///
/// Notice that because the lexer splits operators based on the provided
/// list, the output will only be correct if operator list already
/// contains all operators defined or imported by this module.
pub fn parse<'a>(data: &str, ctx: impl Context)
-> Result<Vec<FileEntry>, ParseError>
{
let lexie = lexer(ops);
let token_batchv = split_lines(data).map(|line| {
lexie.parse(line).map_err(ParseError::Lex)
}).collect::<Result<Vec<_>, _>>()?;
println!("Lexed:\n{:?}", LexedText(token_batchv.clone()));
let parsr = line_parser(intern).then_ignore(end());
let (parsed_lines, errors_per_line) = token_batchv.into_iter().filter(|v| {
!v.is_empty()
}).map(|v| {
// Find the first invalid position for Stream::for_iter
let LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone();
// Stream expects tuples, lexer outputs structs
let tuples = v.into_iter().map_into::<(Lexeme, Range<usize>)>();
parsr.parse(Stream::from_iter(end..end+1, tuples))
// ^^^^^^^^^^
// I haven't the foggiest idea why this is needed, parsers are supposed to be lazy so the
// end of input should make little difference
}).map(|res| match res {
Ok(r) => (Some(r), vec![]),
Err(e) => (None, e)
}).unzip::<_, _, Vec<_>, Vec<_>>();
// TODO: wrap `i`, `ops` and `prefix` in a parsing context
let lexie = lexer(ctx.clone());
let token_batchv = lexie.parse(data).map_err(ParseError::Lex)?;
// println!("Lexed:\n{}", LexedText(token_batchv.clone()).bundle(ctx.interner()));
// println!("Lexed:\n{:?}", token_batchv.clone());
let parsr = line_parser(ctx).then_ignore(end());
let (parsed_lines, errors_per_line) = split_lines(&token_batchv)
.enumerate()
.map(|(i, entv)| (i,
entv.iter()
.filter(|e| !e.is_filler())
.cloned()
.collect::<Vec<_>>()
))
.filter(|(_, l)| l.len() > 0)
.map(|(i, l)| (i, parsr.parse(l)))
.map(|(i, res)| match res {
Ok(r) => (Some(r), (i, vec![])),
Err(e) => (None, (i, e))
}).unzip::<_, _, Vec<_>, Vec<_>>();
let total_err = errors_per_line.into_iter()
.flat_map(Vec::into_iter)
.flat_map(|(i, v)| v.into_iter().map(move |e| (i, e)))
.collect::<Vec<_>>();
if !total_err.is_empty() { Err(ParseError::Ast(total_err)) }
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
}
pub fn reparse<'a, Op, F>(
ops: &[Op], data: &str, pre: &[FileEntry], intern: &F
)
-> Result<Vec<FileEntry>, ParseError>
where
Op: 'a + AsRef<str> + Clone,
F: Fn(&str) -> Spur
{
let result = parse(ops, data, intern)?;
Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| {
if let FileEntry::Rule(Rule{source, ..}, _) = &mut output {
if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor {
*source = s2.clone()
} else {
panic!("Preparse and reparse received different row types!")
}
}
output
}).collect())
}

30
src/parse/placeholder.rs Normal file
View File

@@ -0,0 +1,30 @@
use chumsky::{Parser, prelude::*};
use crate::ast::{Placeholder, PHClass};
use super::{number::int_parser, context::Context};
pub fn placeholder_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<char, Placeholder, Error = Simple<char>> + 'a
{
choice((
just("...").to(Some(true)),
just("..").to(Some(false)),
empty().to(None)
))
.then(just("$").ignore_then(text::ident()))
.then(just(":").ignore_then(int_parser()).or_not())
.try_map(move |((vec_nonzero, name), vec_prio), span| {
let name = ctx.interner().i(&name);
if let Some(nonzero) = vec_nonzero {
let prio = vec_prio.unwrap_or_default();
Ok(Placeholder { name, class: PHClass::Vec { nonzero, prio } })
} else {
if vec_prio.is_some() {
Err(Simple::custom(span, "Scalar placeholders have no priority"))
} else {
Ok(Placeholder { name, class: PHClass::Scalar })
}
}
})
}

View File

@@ -1,81 +1,139 @@
use std::iter;
use std::rc::Rc;
use crate::representations::sourcefile::FileEntry;
use crate::enum_parser;
use crate::ast::{Expr, Rule};
use crate::representations::location::Location;
use crate::representations::sourcefile::{FileEntry, Member};
use crate::enum_filter;
use crate::ast::{Rule, Constant, Expr, Clause};
use crate::interner::Token;
use super::expression::{xpr_parser, ns_name_parser};
use super::Entry;
use super::context::Context;
use super::expression::xpr_parser;
use super::import::import_parser;
use super::lexer::Lexeme;
use chumsky::{Parser, prelude::*};
use lasso::Spur;
use ordered_float::NotNan;
use super::lexer::{Lexeme, filter_map_lex};
fn rule_parser<'a, F>(intern: &'a F) -> impl Parser<Lexeme, (
Vec<Expr>, NotNan<f64>, Vec<Expr>
), Error = Simple<Lexeme>> + 'a
where F: Fn(&str) -> Spur + 'a {
xpr_parser(intern).repeated()
.then(enum_parser!(Lexeme::Rule))
.then(xpr_parser(intern).repeated())
.map(|((a, b), c)| (a, b, c))
.labelled("Rule")
use chumsky::{Parser, prelude::*};
use itertools::Itertools;
fn rule_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, Rule, Error = Simple<Entry>> + 'a
{
xpr_parser(ctx.clone()).repeated().at_least(1)
.then(filter_map_lex(enum_filter!(Lexeme::Rule)))
.then(xpr_parser(ctx).repeated().at_least(1))
.map(|((s, (prio, _)), t)| Rule{
source: Rc::new(s),
prio,
target: Rc::new(t)
}).labelled("Rule")
}
pub fn line_parser<'a, F>(intern: &'a F)
-> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> + 'a
where F: Fn(&str) -> Spur + 'a {
fn const_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, Constant, Error = Simple<Entry>> + 'a
{
filter_map_lex(enum_filter!(Lexeme::Name))
.then_ignore(Lexeme::Const.parser())
.then(xpr_parser(ctx.clone()).repeated().at_least(1))
.map(move |((name, _), value)| Constant{
name,
value: if let Ok(ex) = value.iter().exactly_one() { ex.clone() }
else {
let start = value.first().expect("value cannot be empty")
.location.range().expect("all locations in parsed source are known")
.start;
let end = value.last().expect("asserted right above")
.location.range().expect("all locations in parsed source are known")
.end;
Expr{
location: Location::Range { file: ctx.file(), range: start..end },
value: Clause::S('(', Rc::new(value))
}
}
})
}
pub fn collect_errors<T, E: chumsky::Error<T>>(e: Vec<E>) -> E {
e.into_iter()
.reduce(chumsky::Error::merge)
.expect("Error list must be non_enmpty")
}
fn namespace_parser<'a>(
line: impl Parser<Entry, FileEntry, Error = Simple<Entry>> + 'a,
) -> impl Parser<Entry, (Token<String>, Vec<FileEntry>), Error = Simple<Entry>> + 'a {
Lexeme::Namespace.parser()
.ignore_then(filter_map_lex(enum_filter!(Lexeme::Name)))
.then(
any().repeated().delimited_by(
Lexeme::LP('{').parser(),
Lexeme::RP('{').parser()
).try_map(move |body, _| {
split_lines(&body)
.map(|l| line.parse(l))
.collect::<Result<Vec<_>,_>>()
.map_err(collect_errors)
})
).map(move |((name, _), body)| {
(name, body)
})
}
fn member_parser<'a>(
line: impl Parser<Entry, FileEntry, Error = Simple<Entry>> + 'a,
ctx: impl Context + 'a
) -> impl Parser<Entry, Member, Error = Simple<Entry>> + 'a {
choice((
// In case the usercode wants to parse doc
enum_parser!(Lexeme >> FileEntry; Comment),
just(Lexeme::Import)
.ignore_then(import_parser(intern).map(FileEntry::Import))
.then_ignore(enum_parser!(Lexeme::Comment).or_not()),
just(Lexeme::Export).map_err_with_span(|e, s| {
println!("{:?} could not yield an export", s); e
}).ignore_then(
just(Lexeme::NS).ignore_then(
ns_name_parser(intern).map(Rc::new)
.separated_by(just(Lexeme::name(",")))
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
).map(FileEntry::Export)
.or(rule_parser(intern).map(|(source, prio, target)| {
FileEntry::Rule(Rule {
source: Rc::new(source),
prio,
target: Rc::new(target)
}, true)
}))
),
// This could match almost anything so it has to go last
rule_parser(intern).map(|(source, prio, target)| {
FileEntry::Rule(Rule{
source: Rc::new(source),
prio,
target: Rc::new(target)
}, false)
}),
namespace_parser(line)
.map(|(name, body)| Member::Namespace(name, body)),
rule_parser(ctx.clone()).map(Member::Rule),
const_parser(ctx).map(Member::Constant),
))
}
pub fn split_lines(data: &str) -> impl Iterator<Item = &str> {
let mut source = data.char_indices();
pub fn line_parser<'a>(ctx: impl Context + 'a)
-> impl Parser<Entry, FileEntry, Error = Simple<Entry>> + 'a
{
recursive(|line: Recursive<Entry, FileEntry, Simple<Entry>>| {
choice((
// In case the usercode wants to parse doc
filter_map_lex(enum_filter!(Lexeme >> FileEntry; Comment)).map(|(ent, _)| ent),
// plain old imports
Lexeme::Import.parser()
.ignore_then(import_parser(ctx.clone()).map(FileEntry::Import)),
Lexeme::Export.parser().ignore_then(choice((
// token collection
Lexeme::NS.parser().ignore_then(
filter_map_lex(enum_filter!(Lexeme::Name)).map(|(e, _)| e)
.separated_by(Lexeme::Name(ctx.interner().i(",")).parser())
.delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser())
).map(FileEntry::Export),
// public declaration
member_parser(line.clone(), ctx.clone()).map(FileEntry::Exported)
))),
// This could match almost anything so it has to go last
member_parser(line, ctx).map(FileEntry::Internal),
))
})
}
pub fn split_lines(data: &[Entry]) -> impl Iterator<Item = &[Entry]> {
let mut source = data.iter().enumerate();
let mut last_slice = 0;
iter::from_fn(move || {
let mut paren_count = 0;
while let Some((i, c)) = source.next() {
match c {
'(' | '{' | '[' => paren_count += 1,
')' | '}' | ']' => paren_count -= 1,
'\n' if paren_count == 0 => {
while let Some((i, Entry{ lexeme, .. })) = source.next() {
match lexeme {
Lexeme::LP(_) => paren_count += 1,
Lexeme::RP(_) => paren_count -= 1,
Lexeme::BR if paren_count == 0 => {
let begin = last_slice;
last_slice = i;
last_slice = i+1;
return Some(&data[begin..i]);
},
_ => (),
}
}
None
})
}
}).filter(|s| s.len() > 0)
}