forked from Orchid/orchid
bug fixes and performance improvements
This commit is contained in:
48
src/parse/context.rs
Normal file
48
src/parse/context.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
use std::rc::Rc;
|
||||
|
||||
use crate::interner::Interner;
|
||||
|
||||
/// Trait enclosing all context features
|
||||
///
|
||||
/// Hiding type parameters in associated types allows for simpler
|
||||
/// parser definitions
|
||||
pub trait Context: Clone {
|
||||
type Op: AsRef<str>;
|
||||
|
||||
fn ops<'a>(&'a self) -> &'a [Self::Op];
|
||||
fn file(&self) -> Rc<Vec<String>>;
|
||||
fn interner<'a>(&'a self) -> &'a Interner;
|
||||
}
|
||||
|
||||
/// Struct implementing context
|
||||
///
|
||||
/// Hiding type parameters in associated types allows for simpler
|
||||
/// parser definitions
|
||||
pub struct ParsingContext<'a, Op> {
|
||||
pub ops: &'a [Op],
|
||||
pub interner: &'a Interner,
|
||||
pub file: Rc<Vec<String>>
|
||||
}
|
||||
|
||||
impl<'a, Op> ParsingContext<'a, Op> {
|
||||
pub fn new(ops: &'a [Op], interner: &'a Interner, file: Rc<Vec<String>>)
|
||||
-> Self { Self { ops, interner, file } }
|
||||
}
|
||||
|
||||
impl<'a, Op> Clone for ParsingContext<'a, Op> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
ops: self.ops,
|
||||
interner: self.interner,
|
||||
file: self.file.clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Op: AsRef<str>> Context for ParsingContext<'_, Op> {
|
||||
type Op = Op;
|
||||
|
||||
fn interner<'a>(&'a self) -> &'a Interner { self.interner }
|
||||
fn file(&self) -> Rc<Vec<String>> {self.file.clone()}
|
||||
fn ops<'a>(&'a self) -> &'a [Self::Op] { self.ops }
|
||||
}
|
||||
46
src/parse/enum_filter.rs
Normal file
46
src/parse/enum_filter.rs
Normal file
@@ -0,0 +1,46 @@
|
||||
/// Produces filter_mapping functions for enum types:
|
||||
/// ```rs
|
||||
/// enum_parser!(Foo::Bar | "Some error!") // Accepts Foo::Bar(T) into T
|
||||
/// enum_parser!(Foo::Bar) // same as above but with the default error "Expected Foo::Bar"
|
||||
/// enum_parser!(Foo >> Quz; Bar, Baz) // Parses Foo::Bar(T) into Quz::Bar(T) and Foo::Baz(U) into Quz::Baz(U)
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! enum_filter {
|
||||
($p:path | $m:tt) => {
|
||||
{
|
||||
|l| {
|
||||
if let $p(x) = l { Ok(x) }
|
||||
else { Err($m) }
|
||||
}
|
||||
}
|
||||
};
|
||||
($p:path >> $q:path; $i:ident | $m:tt) => {
|
||||
{
|
||||
use $p as srcpath;
|
||||
use $q as tgtpath;
|
||||
let base = enum_filter!(srcpath::$i | $m);
|
||||
move |l| base(l).map(tgtpath::$i)
|
||||
}
|
||||
};
|
||||
($p:path >> $q:path; $i:ident) => {
|
||||
enum_filter!($p >> $q; $i | {concat!("Expected ", stringify!($i))})
|
||||
};
|
||||
($p:path >> $q:path; $($i:ident),+ | $m:tt) => {
|
||||
{
|
||||
use $p as srcpath;
|
||||
use $q as tgtpath;
|
||||
|l| match l {
|
||||
$( srcpath::$i(x) => Ok(tgtpath::$i(x)), )+
|
||||
_ => Err($m)
|
||||
}
|
||||
}
|
||||
};
|
||||
($p:path >> $q:path; $($i:ident),+) => {
|
||||
enum_filter!($p >> $q; $($i),+ | {
|
||||
concat!("Expected one of ", $(stringify!($i), " "),+)
|
||||
})
|
||||
};
|
||||
($p:path) => {
|
||||
enum_filter!($p | {concat!("Expected ", stringify!($p))})
|
||||
};
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
/// Produces parsers for tokenized sequences of enum types:
|
||||
/// ```rs
|
||||
/// enum_parser!(Foo::Bar | "Some error!") // Parses Foo::Bar(T) into T
|
||||
/// enum_parser!(Foo::Bar) // same as above but with the default error "Expected Foo::Bar"
|
||||
/// enum_parser!(Foo >> Quz; Bar, Baz) // Parses Foo::Bar(T) into Quz::Bar(T) and Foo::Baz(U) into Quz::Baz(U)
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! enum_parser {
|
||||
($p:path | $m:tt) => {
|
||||
{
|
||||
::chumsky::prelude::filter_map(|s, l| {
|
||||
if let $p(x) = l { Ok(x) }
|
||||
else { Err(::chumsky::prelude::Simple::custom(s, $m))}
|
||||
})
|
||||
}
|
||||
};
|
||||
($p:path >> $q:path; $i:ident) => {
|
||||
{
|
||||
use $p as srcpath;
|
||||
use $q as tgtpath;
|
||||
enum_parser!(srcpath::$i | (concat!("Expected ", stringify!($i)))).map(tgtpath::$i)
|
||||
}
|
||||
};
|
||||
($p:path >> $q:path; $($i:ident),+) => {
|
||||
{
|
||||
::chumsky::prelude::choice((
|
||||
$( enum_parser!($p >> $q; $i) ),+
|
||||
))
|
||||
}
|
||||
};
|
||||
($p:path) => { enum_parser!($p | (concat!("Expected ", stringify!($p)))) };
|
||||
}
|
||||
@@ -1,155 +1,107 @@
|
||||
use std::ops::Range;
|
||||
use std::rc::Rc;
|
||||
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
use lasso::Spur;
|
||||
use crate::enum_parser;
|
||||
use crate::representations::Primitive;
|
||||
use crate::representations::{Literal, ast::{Clause, Expr}};
|
||||
|
||||
use super::lexer::Lexeme;
|
||||
use crate::enum_filter;
|
||||
use crate::representations::Primitive;
|
||||
use crate::representations::ast::{Clause, Expr};
|
||||
use crate::representations::location::Location;
|
||||
use crate::interner::Token;
|
||||
|
||||
use super::context::Context;
|
||||
use super::lexer::{Lexeme, Entry, filter_map_lex};
|
||||
|
||||
/// Parses any number of expr wrapped in (), [] or {}
|
||||
fn sexpr_parser<P>(
|
||||
expr: P
|
||||
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
|
||||
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
|
||||
Lexeme::paren_parser(expr.repeated())
|
||||
.map(|(del, b)| Clause::S(del, Rc::new(b)))
|
||||
fn sexpr_parser(
|
||||
expr: impl Parser<Entry, Expr, Error = Simple<Entry>> + Clone
|
||||
) -> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone {
|
||||
let body = expr.repeated();
|
||||
choice((
|
||||
Lexeme::LP('(').parser().then(body.clone())
|
||||
.then(Lexeme::RP('(').parser()),
|
||||
Lexeme::LP('[').parser().then(body.clone())
|
||||
.then(Lexeme::RP('[').parser()),
|
||||
Lexeme::LP('{').parser().then(body.clone())
|
||||
.then(Lexeme::RP('{').parser()),
|
||||
)).map(|((lp, body), rp)| {
|
||||
let Entry{lexeme, range: Range{start, ..}} = lp;
|
||||
let end = rp.range.end;
|
||||
let char = if let Lexeme::LP(c) = lexeme {c}
|
||||
else {unreachable!("The parser only matches Lexeme::LP")};
|
||||
(Clause::S(char, Rc::new(body)), start..end)
|
||||
}).labelled("S-expression")
|
||||
}
|
||||
|
||||
/// Parses `\name.body` or `\name:type.body` where name is any valid name
|
||||
/// and type and body are both expressions. Comments are allowed
|
||||
/// and ignored everywhere in between the tokens
|
||||
fn lambda_parser<'a, P, F>(
|
||||
expr: P, intern: &'a F
|
||||
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone + 'a
|
||||
where
|
||||
P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone + 'a,
|
||||
F: Fn(&str) -> Spur + 'a {
|
||||
just(Lexeme::BS)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(namelike_parser(intern))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(
|
||||
just(Lexeme::Type)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(expr.clone().repeated())
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.or_not().map(Option::unwrap_or_default)
|
||||
)
|
||||
.then_ignore(just(Lexeme::name(".")))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
fn lambda_parser<'a>(
|
||||
expr: impl Parser<Entry, Expr, Error = Simple<Entry>> + Clone + 'a,
|
||||
ctx: impl Context + 'a
|
||||
) -> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone + 'a {
|
||||
Lexeme::BS.parser()
|
||||
.ignore_then(expr.clone())
|
||||
.then_ignore(Lexeme::Name(ctx.interner().i(".")).parser())
|
||||
.then(expr.repeated().at_least(1))
|
||||
.map(|((name, typ), body): ((Clause, Vec<Expr>), Vec<Expr>)| {
|
||||
Clause::Lambda(Rc::new(name), Rc::new(typ), Rc::new(body))
|
||||
})
|
||||
}
|
||||
|
||||
/// see [lambda_parser] but `@` instead of `\` and the name is optional
|
||||
fn auto_parser<'a, P, F>(
|
||||
expr: P, intern: &'a F
|
||||
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone + 'a
|
||||
where
|
||||
P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone + 'a,
|
||||
F: Fn(&str) -> Spur + 'a {
|
||||
just(Lexeme::At)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(namelike_parser(intern).or_not())
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(
|
||||
just(Lexeme::Type)
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.ignore_then(expr.clone().repeated())
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.or_not().map(Option::unwrap_or_default)
|
||||
)
|
||||
.then_ignore(just(Lexeme::name(".")))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
.then(expr.repeated().at_least(1))
|
||||
.try_map(|((name, typ), body): ((Option<Clause>, Vec<Expr>), Vec<Expr>), s| {
|
||||
if name.is_none() && typ.is_empty() {
|
||||
Err(Simple::custom(s, "Auto without name or type has no effect"))
|
||||
} else {
|
||||
Ok(Clause::Auto(name.map(Rc::new), Rc::new(typ), Rc::new(body)))
|
||||
}
|
||||
})
|
||||
.map_with_span(move |(arg, body), span| {
|
||||
(Clause::Lambda(Rc::new(arg), Rc::new(body)), span)
|
||||
}).labelled("Lambda")
|
||||
}
|
||||
|
||||
/// Parses a sequence of names separated by :: <br/>
|
||||
/// Comments are allowed and ignored in between
|
||||
pub fn ns_name_parser<'a, F>(intern: &'a F)
|
||||
-> impl Parser<Lexeme, Vec<Spur>, Error = Simple<Lexeme>> + Clone + 'a
|
||||
where F: Fn(&str) -> Spur + 'a {
|
||||
enum_parser!(Lexeme::Name)
|
||||
.map(|s| intern(&s))
|
||||
.separated_by(
|
||||
enum_parser!(Lexeme::Comment).repeated()
|
||||
.then(just(Lexeme::NS))
|
||||
.then(enum_parser!(Lexeme::Comment).repeated())
|
||||
).at_least(1)
|
||||
/// Comments and line breaks are allowed and ignored in between
|
||||
pub fn ns_name_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, (Token<Vec<Token<String>>>, Range<usize>), Error = Simple<Entry>> + Clone + 'a
|
||||
{
|
||||
filter_map_lex(enum_filter!(Lexeme::Name))
|
||||
.separated_by(Lexeme::NS.parser()).at_least(1)
|
||||
.map(move |elements| {
|
||||
let start = elements.first().expect("can never be empty").1.start;
|
||||
let end = elements.last().expect("can never be empty").1.end;
|
||||
let tokens =
|
||||
/*ctx.prefix().iter().copied().chain*/(
|
||||
elements.iter().map(|(t, _)| *t)
|
||||
).collect::<Vec<_>>();
|
||||
(ctx.interner().i(&tokens), start..end)
|
||||
}).labelled("Namespaced name")
|
||||
}
|
||||
|
||||
/// Parse any legal argument name starting with a `$`
|
||||
fn placeholder_parser() -> impl Parser<Lexeme, String, Error = Simple<Lexeme>> + Clone {
|
||||
enum_parser!(Lexeme::Name).try_map(|name, span| {
|
||||
name.strip_prefix('$').map(&str::to_string)
|
||||
.ok_or_else(|| Simple::custom(span, "Not a placeholder"))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn namelike_parser<'a, F>(intern: &'a F)
|
||||
-> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone + 'a
|
||||
where F: Fn(&str) -> Spur + 'a {
|
||||
pub fn namelike_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone + 'a
|
||||
{
|
||||
choice((
|
||||
just(Lexeme::name("...")).to(true)
|
||||
.or(just(Lexeme::name("..")).to(false))
|
||||
.then(placeholder_parser())
|
||||
.then(
|
||||
just(Lexeme::Type)
|
||||
.ignore_then(enum_parser!(Lexeme::Uint))
|
||||
.or_not().map(Option::unwrap_or_default)
|
||||
)
|
||||
.map(|((nonzero, key), prio)| Clause::Placeh{key, vec: Some((
|
||||
prio.try_into().unwrap(),
|
||||
nonzero
|
||||
))}),
|
||||
ns_name_parser(intern)
|
||||
.map(|qualified| Clause::Name(Rc::new(qualified))),
|
||||
filter_map_lex(enum_filter!(Lexeme::PH))
|
||||
.map(|(ph, range)| (Clause::Placeh(ph), range)),
|
||||
ns_name_parser(ctx)
|
||||
.map(|(token, range)| (Clause::Name(token), range)),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn clause_parser<'a, P, F>(
|
||||
expr: P, intern: &'a F
|
||||
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone + 'a
|
||||
where
|
||||
P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone + 'a,
|
||||
F: Fn(&str) -> Spur + 'a {
|
||||
enum_parser!(Lexeme::Comment).repeated()
|
||||
.ignore_then(choice((
|
||||
enum_parser!(Lexeme >> Literal; Uint, Num, Char, Str)
|
||||
.map(Primitive::Literal).map(Clause::P),
|
||||
placeholder_parser().map(|key| Clause::Placeh{key, vec: None}),
|
||||
namelike_parser(intern),
|
||||
pub fn clause_parser<'a>(
|
||||
expr: impl Parser<Entry, Expr, Error = Simple<Entry>> + Clone + 'a,
|
||||
ctx: impl Context + 'a
|
||||
) -> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone + 'a {
|
||||
choice((
|
||||
filter_map_lex(enum_filter!(Lexeme >> Primitive; Literal))
|
||||
.map(|(p, s)| (Clause::P(p), s)).labelled("Literal"),
|
||||
sexpr_parser(expr.clone()),
|
||||
lambda_parser(expr.clone(), intern),
|
||||
auto_parser(expr.clone(), intern),
|
||||
just(Lexeme::At).ignore_then(expr.clone()).map(|arg| {
|
||||
Clause::Explicit(Rc::new(arg))
|
||||
})
|
||||
))).then_ignore(enum_parser!(Lexeme::Comment).repeated())
|
||||
lambda_parser(expr.clone(), ctx.clone()),
|
||||
namelike_parser(ctx),
|
||||
)).labelled("Clause")
|
||||
}
|
||||
|
||||
/// Parse an expression
|
||||
pub fn xpr_parser<'a, F>(intern: &'a F)
|
||||
-> impl Parser<Lexeme, Expr, Error = Simple<Lexeme>> + 'a
|
||||
where F: Fn(&str) -> Spur + 'a {
|
||||
recursive(|expr| {
|
||||
let clause = clause_parser(expr, intern);
|
||||
clause.clone().then(
|
||||
just(Lexeme::Type)
|
||||
.ignore_then(clause.clone())
|
||||
.repeated()
|
||||
)
|
||||
.map(|(val, typ)| Expr(val, Rc::new(typ)))
|
||||
pub fn xpr_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, Expr, Error = Simple<Entry>> + 'a
|
||||
{
|
||||
recursive(move |expr| {
|
||||
clause_parser(expr, ctx.clone())
|
||||
.map(move |(value, range)| {
|
||||
Expr{
|
||||
value: value.clone(),
|
||||
location: Location::Range { file: ctx.file(), range }
|
||||
}
|
||||
})
|
||||
}).labelled("Expression")
|
||||
}
|
||||
}
|
||||
@@ -1,16 +1,16 @@
|
||||
use std::rc::Rc;
|
||||
|
||||
use chumsky::{Parser, prelude::*};
|
||||
use itertools::Itertools;
|
||||
use lasso::Spur;
|
||||
use crate::representations::sourcefile::Import;
|
||||
use crate::utils::iter::{box_once, box_flatten, into_boxed_iter, BoxedIterIter};
|
||||
use crate::{enum_parser, box_chain};
|
||||
use crate::interner::Token;
|
||||
use crate::{box_chain, enum_filter};
|
||||
|
||||
use super::lexer::Lexeme;
|
||||
use super::Entry;
|
||||
use super::context::Context;
|
||||
use super::lexer::{Lexeme, filter_map_lex};
|
||||
|
||||
/// initialize a BoxedIter<BoxedIter<String>> with a single element.
|
||||
fn init_table(name: Spur) -> BoxedIterIter<'static, Spur> {
|
||||
fn init_table(name: Token<String>) -> BoxedIterIter<'static, Token<String>> {
|
||||
// I'm not at all confident that this is a good approach.
|
||||
box_once(box_once(name))
|
||||
}
|
||||
@@ -21,51 +21,54 @@ fn init_table(name: Spur) -> BoxedIterIter<'static, Spur> {
|
||||
/// preferably contain crossplatform filename-legal characters but the
|
||||
/// symbols are explicitly allowed to go wild.
|
||||
/// There's a blacklist in [name]
|
||||
pub fn import_parser<'a, F>(intern: &'a F)
|
||||
-> impl Parser<Lexeme, Vec<Import>, Error = Simple<Lexeme>> + 'a
|
||||
where F: Fn(&str) -> Spur + 'a {
|
||||
let globstar = intern("*");
|
||||
pub fn import_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, Vec<Import>, Error = Simple<Entry>> + 'a
|
||||
{
|
||||
// TODO: this algorithm isn't cache friendly and copies a lot
|
||||
recursive(move |expr:Recursive<Lexeme, BoxedIterIter<Spur>, Simple<Lexeme>>| {
|
||||
enum_parser!(Lexeme::Name).map(|s| intern(s.as_str()))
|
||||
.separated_by(just(Lexeme::NS))
|
||||
.then(
|
||||
just(Lexeme::NS)
|
||||
.ignore_then(
|
||||
choice((
|
||||
expr.clone()
|
||||
.separated_by(just(Lexeme::name(",")))
|
||||
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
|
||||
.map(|v| box_flatten(v.into_iter()))
|
||||
.labelled("import group"),
|
||||
// Each expr returns a list of imports, flatten into common list
|
||||
just(Lexeme::name("*")).map(move |_| init_table(globstar))
|
||||
.labelled("wildcard import"), // Just a *, wrapped
|
||||
enum_parser!(Lexeme::Name)
|
||||
.map(|s| init_table(intern(s.as_str())))
|
||||
.labelled("import terminal") // Just a name, wrapped
|
||||
))
|
||||
).or_not()
|
||||
)
|
||||
.map(|(name, opt_post): (Vec<Spur>, Option<BoxedIterIter<Spur>>)|
|
||||
-> BoxedIterIter<Spur> {
|
||||
if let Some(post) = opt_post {
|
||||
Box::new(post.map(move |el| {
|
||||
box_chain!(name.clone().into_iter(), el)
|
||||
}))
|
||||
} else {
|
||||
box_once(into_boxed_iter(name))
|
||||
}
|
||||
})
|
||||
recursive({
|
||||
let ctx = ctx.clone();
|
||||
move |expr:Recursive<Entry, BoxedIterIter<Token<String>>, Simple<Entry>>| {
|
||||
filter_map_lex(enum_filter!(Lexeme::Name)).map(|(t, _)| t)
|
||||
.separated_by(Lexeme::NS.parser())
|
||||
.then(
|
||||
Lexeme::NS.parser()
|
||||
.ignore_then(
|
||||
choice((
|
||||
expr.clone()
|
||||
.separated_by(Lexeme::Name(ctx.interner().i(",")).parser())
|
||||
.delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser())
|
||||
.map(|v| box_flatten(v.into_iter()))
|
||||
.labelled("import group"),
|
||||
// Each expr returns a list of imports, flatten into common list
|
||||
Lexeme::Name(ctx.interner().i("*")).parser()
|
||||
.map(move |_| init_table(ctx.interner().i("*")))
|
||||
.labelled("wildcard import"), // Just a *, wrapped
|
||||
filter_map_lex(enum_filter!(Lexeme::Name))
|
||||
.map(|(t, _)| init_table(t))
|
||||
.labelled("import terminal") // Just a name, wrapped
|
||||
))
|
||||
).or_not()
|
||||
)
|
||||
.map(|(name, opt_post): (Vec<Token<String>>, Option<BoxedIterIter<Token<String>>>)|
|
||||
-> BoxedIterIter<Token<String>> {
|
||||
if let Some(post) = opt_post {
|
||||
Box::new(post.map(move |el| {
|
||||
box_chain!(name.clone().into_iter(), el)
|
||||
}))
|
||||
} else {
|
||||
box_once(into_boxed_iter(name))
|
||||
}
|
||||
})
|
||||
}
|
||||
}).map(move |paths| {
|
||||
paths.filter_map(|namespaces| {
|
||||
let mut path = namespaces.collect_vec();
|
||||
let name = path.pop()?;
|
||||
Some(Import {
|
||||
path: Rc::new(path),
|
||||
path: ctx.interner().i(&path),
|
||||
name: {
|
||||
if name == globstar { None }
|
||||
else { Some(name.to_owned()) }
|
||||
if name == ctx.interner().i("*") { None }
|
||||
else { Some(name) }
|
||||
}
|
||||
})
|
||||
}).collect()
|
||||
|
||||
@@ -1,53 +1,88 @@
|
||||
use std::{ops::Range, iter, fmt};
|
||||
use ordered_float::NotNan;
|
||||
use chumsky::{Parser, prelude::*};
|
||||
use std::fmt::Debug;
|
||||
use crate::{utils::{BoxedIter, iter::{box_once, box_flatten}}, box_chain};
|
||||
use std::fmt;
|
||||
use std::ops::Range;
|
||||
|
||||
use ordered_float::NotNan;
|
||||
use chumsky::{Parser, prelude::*, text::keyword, Span};
|
||||
|
||||
use crate::ast::{Placeholder, PHClass};
|
||||
use crate::representations::Literal;
|
||||
use crate::interner::{Token, InternedDisplay, Interner};
|
||||
|
||||
use super::context::Context;
|
||||
use super::placeholder;
|
||||
use super::{number, string, name, comment};
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Entry(pub Lexeme, pub Range<usize>);
|
||||
impl Debug for Entry {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self.0)
|
||||
// f.debug_tuple("Entry").field(&self.0).field(&self.1).finish()
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct Entry{
|
||||
pub lexeme: Lexeme,
|
||||
pub range: Range<usize>
|
||||
}
|
||||
impl Entry {
|
||||
pub fn is_filler(&self) -> bool {
|
||||
matches!(self.lexeme, Lexeme::Comment(_))
|
||||
|| matches!(self.lexeme, Lexeme::BR)
|
||||
}
|
||||
}
|
||||
|
||||
impl InternedDisplay for Entry {
|
||||
fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result {
|
||||
self.lexeme.fmt_i(f, i)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Entry> for (Lexeme, Range<usize>) {
|
||||
fn from(ent: Entry) -> Self {
|
||||
(ent.0, ent.1)
|
||||
(ent.lexeme, ent.range)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
impl Span for Entry {
|
||||
type Context = Lexeme;
|
||||
type Offset = usize;
|
||||
|
||||
fn context(&self) -> Self::Context {self.lexeme.clone()}
|
||||
fn start(&self) -> Self::Offset {self.range.start()}
|
||||
fn end(&self) -> Self::Offset {self.range.end()}
|
||||
fn new(context: Self::Context, range: Range<Self::Offset>) -> Self {
|
||||
Self{
|
||||
lexeme: context,
|
||||
range
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum Lexeme {
|
||||
Num(NotNan<f64>),
|
||||
Uint(u64),
|
||||
Char(char),
|
||||
Str(String),
|
||||
Name(String),
|
||||
Literal(Literal),
|
||||
Name(Token<String>),
|
||||
Rule(NotNan<f64>),
|
||||
NS, // namespace separator
|
||||
/// Walrus operator (formerly shorthand macro)
|
||||
Const,
|
||||
/// Line break
|
||||
BR,
|
||||
/// Namespace separator
|
||||
NS,
|
||||
/// Left paren
|
||||
LP(char),
|
||||
/// Right paren
|
||||
RP(char),
|
||||
BS, // Backslash
|
||||
/// Backslash
|
||||
BS,
|
||||
At,
|
||||
Type, // type operator
|
||||
Comment(String),
|
||||
Export,
|
||||
Import,
|
||||
Namespace,
|
||||
PH(Placeholder)
|
||||
}
|
||||
|
||||
impl Debug for Lexeme {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
impl InternedDisplay for Lexeme {
|
||||
fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Num(n) => write!(f, "{}", n),
|
||||
Self::Uint(i) => write!(f, "{}", i),
|
||||
Self::Char(c) => write!(f, "{:?}", c),
|
||||
Self::Str(s) => write!(f, "{:?}", s),
|
||||
Self::Name(name) => write!(f, "{}", name),
|
||||
Self::Literal(l) => write!(f, "{:?}", l),
|
||||
Self::Name(token) => write!(f, "{}", i.r(*token)),
|
||||
Self::Const => write!(f, ":="),
|
||||
Self::Rule(prio) => write!(f, "={}=>", prio),
|
||||
Self::NS => write!(f, "::"),
|
||||
Self::LP(l) => write!(f, "{}", l),
|
||||
@@ -57,102 +92,114 @@ impl Debug for Lexeme {
|
||||
'{' => write!(f, "}}"),
|
||||
_ => f.debug_tuple("RP").field(l).finish()
|
||||
},
|
||||
Self::BR => write!(f, "\n"),
|
||||
Self::BS => write!(f, "\\"),
|
||||
Self::At => write!(f, "@"),
|
||||
Self::Type => write!(f, ":"),
|
||||
Self::Comment(text) => write!(f, "--[{}]--", text),
|
||||
Self::Export => write!(f, "export"),
|
||||
Self::Import => write!(f, "import"),
|
||||
Self::Namespace => write!(f, "namespace"),
|
||||
Self::PH(Placeholder { name, class }) => match *class {
|
||||
PHClass::Scalar => write!(f, "${}", i.r(*name)),
|
||||
PHClass::Vec { nonzero, prio } => {
|
||||
if nonzero {write!(f, "...")}
|
||||
else {write!(f, "..")}?;
|
||||
write!(f, "${}", i.r(*name))?;
|
||||
if prio != 0 {write!(f, ":{}", prio)?;};
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Lexeme {
|
||||
pub fn name<T: ToString>(n: T) -> Self {
|
||||
Lexeme::Name(n.to_string())
|
||||
pub fn rule(prio: impl Into<f64>) -> Self {
|
||||
Lexeme::Rule(
|
||||
NotNan::new(prio.into())
|
||||
.expect("Rule priority cannot be NaN")
|
||||
)
|
||||
}
|
||||
pub fn rule<T>(prio: T) -> Self where T: Into<f64> {
|
||||
Lexeme::Rule(NotNan::new(prio.into()).expect("Rule priority cannot be NaN"))
|
||||
}
|
||||
pub fn paren_parser<T, P>(
|
||||
expr: P
|
||||
) -> impl Parser<Lexeme, (char, T), Error = Simple<Lexeme>> + Clone
|
||||
where P: Parser<Lexeme, T, Error = Simple<Lexeme>> + Clone {
|
||||
choice((
|
||||
expr.clone().delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
|
||||
.map(|t| ('(', t)),
|
||||
expr.clone().delimited_by(just(Lexeme::LP('[')), just(Lexeme::RP('[')))
|
||||
.map(|t| ('[', t)),
|
||||
expr.delimited_by(just(Lexeme::LP('{')), just(Lexeme::RP('{')))
|
||||
.map(|t| ('{', t)),
|
||||
))
|
||||
|
||||
pub fn parser<E: chumsky::Error<Entry>>(self)
|
||||
-> impl Parser<Entry, Entry, Error = E> + Clone {
|
||||
filter(move |ent: &Entry| ent.lexeme == self)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct LexedText(pub Vec<Vec<Entry>>);
|
||||
pub struct LexedText(pub Vec<Entry>);
|
||||
|
||||
impl Debug for LexedText {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
for row in &self.0 {
|
||||
for tok in row {
|
||||
tok.fmt(f)?;
|
||||
f.write_str(" ")?
|
||||
}
|
||||
f.write_str("\n")?
|
||||
impl InternedDisplay for LexedText {
|
||||
fn fmt_i(&self, f: &mut fmt::Formatter<'_>, i: &Interner) -> fmt::Result {
|
||||
for tok in self.0.iter() {
|
||||
tok.fmt_i(f, i)?;
|
||||
f.write_str(" ")?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
type LexSubres<'a> = BoxedIter<'a, Entry>;
|
||||
|
||||
fn paren_parser<'a>(
|
||||
expr: Recursive<'a, char, LexSubres<'a>, Simple<char>>,
|
||||
lp: char, rp: char
|
||||
) -> impl Parser<char, LexSubres<'a>, Error=Simple<char>> + 'a {
|
||||
expr.padded().repeated()
|
||||
.map(|x| box_flatten(x.into_iter()))
|
||||
.delimited_by(just(lp), just(rp)).map_with_span(move |b, s| {
|
||||
box_chain!(
|
||||
iter::once(Entry(Lexeme::LP(lp), s.start..s.start+1)),
|
||||
b,
|
||||
iter::once(Entry(Lexeme::RP(lp), s.end-1..s.end))
|
||||
)
|
||||
})
|
||||
fn paren_parser(lp: char, rp: char)
|
||||
-> impl Parser<char, Lexeme, Error=Simple<char>>
|
||||
{
|
||||
just(lp).to(Lexeme::LP(lp))
|
||||
.or(just(rp).to(Lexeme::RP(lp)))
|
||||
}
|
||||
|
||||
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, Vec<Entry>, Error=Simple<char>> + 'a
|
||||
where T: AsRef<str> + Clone {
|
||||
let all_ops = ops.iter().map(|o| o.as_ref().to_string())
|
||||
.chain([",", ".", "..", "..."].into_iter().map(str::to_string))
|
||||
pub fn literal_parser() -> impl Parser<char, Literal, Error = Simple<char>> {
|
||||
choice((
|
||||
number::int_parser().map(Literal::Uint), // all ints are valid floats so it takes precedence
|
||||
number::float_parser().map(Literal::Num),
|
||||
string::char_parser().map(Literal::Char),
|
||||
string::str_parser().map(Literal::Str),
|
||||
))
|
||||
}
|
||||
|
||||
pub static BASE_OPS: &[&str] = &[",", ".", "..", "..."];
|
||||
|
||||
pub fn lexer<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<char, Vec<Entry>, Error=Simple<char>> + 'a
|
||||
{
|
||||
let all_ops = ctx.ops().iter()
|
||||
.map(|op| op.as_ref())
|
||||
.chain(BASE_OPS.iter().cloned())
|
||||
.map(str::to_string)
|
||||
.collect::<Vec<_>>();
|
||||
just("export").padded().to(Lexeme::Export)
|
||||
.or(just("import").padded().to(Lexeme::Import))
|
||||
.or_not().then(recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
|
||||
choice((
|
||||
paren_parser(recurse.clone(), '(', ')'),
|
||||
paren_parser(recurse.clone(), '[', ']'),
|
||||
paren_parser(recurse.clone(), '{', '}'),
|
||||
choice((
|
||||
just(":=").padded().to(Lexeme::rule(0f64)),
|
||||
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
|
||||
comment::comment_parser().map(Lexeme::Comment),
|
||||
just("::").padded().to(Lexeme::NS),
|
||||
just('\\').padded().to(Lexeme::BS),
|
||||
just('@').padded().to(Lexeme::At),
|
||||
just(':').to(Lexeme::Type),
|
||||
number::int_parser().map(Lexeme::Uint), // all ints are valid floats so it takes precedence
|
||||
number::float_parser().map(Lexeme::Num),
|
||||
string::char_parser().map(Lexeme::Char),
|
||||
string::str_parser().map(Lexeme::Str),
|
||||
name::name_parser(&all_ops).map(Lexeme::Name), // includes namespacing
|
||||
)).map_with_span(|lx, span| box_once(Entry(lx, span)) as LexSubres)
|
||||
))
|
||||
}).separated_by(one_of("\t ").repeated())
|
||||
.flatten().collect())
|
||||
.map(|(prefix, rest): (Option<Lexeme>, Vec<Entry>)| {
|
||||
prefix.into_iter().map(|l| Entry(l, 0..6)).chain(rest.into_iter()).collect()
|
||||
})
|
||||
.then_ignore(text::whitespace()).then_ignore(end())
|
||||
choice((
|
||||
keyword("export").to(Lexeme::Export),
|
||||
keyword("module").to(Lexeme::Namespace),
|
||||
keyword("import").to(Lexeme::Import),
|
||||
paren_parser('(', ')'),
|
||||
paren_parser('[', ']'),
|
||||
paren_parser('{', '}'),
|
||||
just(":=").to(Lexeme::Const),
|
||||
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
|
||||
comment::comment_parser().map(Lexeme::Comment),
|
||||
just("::").to(Lexeme::NS),
|
||||
just('\\').to(Lexeme::BS),
|
||||
just('@').to(Lexeme::At),
|
||||
just(':').to(Lexeme::Type),
|
||||
just('\n').to(Lexeme::BR),
|
||||
placeholder::placeholder_parser(ctx.clone()).map(Lexeme::PH),
|
||||
literal_parser().map(Lexeme::Literal),
|
||||
name::name_parser(&all_ops).map(move |n| {
|
||||
Lexeme::Name(ctx.interner().i(&n))
|
||||
})
|
||||
))
|
||||
.map_with_span(|lexeme, range| Entry{ lexeme, range })
|
||||
.padded_by(one_of(" \t").repeated())
|
||||
.repeated()
|
||||
.then_ignore(end())
|
||||
}
|
||||
|
||||
|
||||
pub fn filter_map_lex<'a, O, M: ToString>(
|
||||
f: impl Fn(Lexeme) -> Result<O, M> + Clone + 'a
|
||||
) -> impl Parser<Entry, (O, Range<usize>), Error = Simple<Entry>> + Clone + 'a {
|
||||
filter_map(move |s: Range<usize>, e: Entry| {
|
||||
let out = f(e.lexeme).map_err(|msg| Simple::custom(s.clone(), msg))?;
|
||||
Ok((out, s))
|
||||
})
|
||||
}
|
||||
|
||||
@@ -6,11 +6,14 @@ mod comment;
|
||||
mod expression;
|
||||
mod sourcefile;
|
||||
mod import;
|
||||
mod enum_parser;
|
||||
mod parse;
|
||||
mod enum_filter;
|
||||
mod placeholder;
|
||||
mod context;
|
||||
|
||||
pub use sourcefile::line_parser;
|
||||
pub use lexer::{lexer, Lexeme, Entry as LexerEntry};
|
||||
pub use lexer::{lexer, Lexeme, Entry};
|
||||
pub use name::is_op;
|
||||
pub use parse::{parse, reparse, ParseError};
|
||||
pub use number::{float_parser, int_parser};
|
||||
pub use parse::{parse, ParseError};
|
||||
pub use number::{float_parser, int_parser};
|
||||
pub use context::ParsingContext;
|
||||
@@ -1,58 +1,69 @@
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
/// Matches any one of the passed operators, longest-first
|
||||
fn op_parser<'a, T: AsRef<str> + Clone>(ops: &[T]) -> BoxedParser<'a, char, String, Simple<char>> {
|
||||
let mut sorted_ops: Vec<String> = ops.iter().map(|t| t.as_ref().to_string()).collect();
|
||||
/// Matches any one of the passed operators, preferring longer ones
|
||||
fn op_parser<'a>(ops: &[impl AsRef<str> + Clone])
|
||||
-> BoxedParser<'a, char, String, Simple<char>>
|
||||
{
|
||||
let mut sorted_ops: Vec<String> = ops.iter()
|
||||
.map(|t| t.as_ref().to_string()).collect();
|
||||
sorted_ops.sort_by_key(|op| -(op.len() as i64));
|
||||
sorted_ops.into_iter()
|
||||
.map(|op| just(op).boxed())
|
||||
.reduce(|a, b| a.or(b).boxed())
|
||||
.unwrap_or_else(|| empty().map(|()| panic!("Empty isn't meant to match")).boxed())
|
||||
.labelled("operator").boxed()
|
||||
.unwrap_or_else(|| {
|
||||
empty().map(|()| panic!("Empty isn't meant to match")).boxed()
|
||||
}).labelled("operator").boxed()
|
||||
}
|
||||
|
||||
/// Characters that cannot be parsed as part of an operator
|
||||
///
|
||||
/// The initial operator list overrides this.
|
||||
static NOT_NAME_CHAR: &[char] = &[
|
||||
':', // used for namespacing and type annotations
|
||||
'\\', '@', // parametric expression starters
|
||||
'"', '\'', // parsed as primitives and therefore would never match
|
||||
'(', ')', '[', ']', '{', '}', // must be strictly balanced
|
||||
'.', // Argument-body separator in parametrics
|
||||
',', // used in imports
|
||||
];
|
||||
|
||||
/// Matches anything that's allowed as an operator
|
||||
///
|
||||
/// Blacklist rationale:
|
||||
/// - `:` is used for namespacing and type annotations, both are distinguished from operators
|
||||
/// - `\` and `@` are parametric expression starters
|
||||
/// - `"` and `'` are read as primitives and would never match.
|
||||
/// - `(` and `)` are strictly balanced and this must remain the case for automation and streaming.
|
||||
/// - `.` is the discriminator for parametrics.
|
||||
/// - ',' is always a standalone single operator, so it can never be part of a name
|
||||
/// FIXME: `@name` without a dot should be parsed correctly for overrides.
|
||||
/// Could be an operator but then parametrics should take precedence,
|
||||
/// which might break stuff. investigate.
|
||||
///
|
||||
/// FIXME: `@name` without a dot should be parsed correctly for overrides. Could be an operator but
|
||||
/// then parametrics should take precedence, which might break stuff. investigate.
|
||||
/// TODO: `'` could work as an operator whenever it isn't closed.
|
||||
/// It's common im maths so it's worth a try
|
||||
///
|
||||
/// TODO: `'` could work as an operator whenever it isn't closed. It's common im maths so it's
|
||||
/// worth a try
|
||||
///
|
||||
/// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very
|
||||
/// common in maths so it's worth a try. Investigate.
|
||||
pub fn modname_parser<'a>() -> impl Parser<char, String, Error = Simple<char>> + 'a {
|
||||
let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', '[', ']', '{', '}', ',', '.'];
|
||||
filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
|
||||
/// TODO: `.` could possibly be parsed as an operator in some contexts.
|
||||
/// This operator is very common in maths so it's worth a try.
|
||||
/// Investigate.
|
||||
pub fn modname_parser<'a>()
|
||||
-> impl Parser<char, String, Error = Simple<char>> + 'a
|
||||
{
|
||||
filter(move |c| !NOT_NAME_CHAR.contains(c) && !c.is_whitespace())
|
||||
.repeated().at_least(1)
|
||||
.collect()
|
||||
.labelled("modname")
|
||||
}
|
||||
|
||||
/// Parse an operator or name. Failing both, parse everything up to the next whitespace or
|
||||
/// blacklisted character as a new operator.
|
||||
pub fn name_parser<'a, T: AsRef<str> + Clone>(
|
||||
ops: &[T]
|
||||
) -> impl Parser<char, String, Error = Simple<char>> + 'a {
|
||||
/// Parse an operator or name. Failing both, parse everything up to
|
||||
/// the next whitespace or blacklisted character as a new operator.
|
||||
pub fn name_parser<'a>(ops: &[impl AsRef<str> + Clone])
|
||||
-> impl Parser<char, String, Error = Simple<char>> + 'a
|
||||
{
|
||||
choice((
|
||||
op_parser(ops), // First try to parse a known operator
|
||||
text::ident().labelled("plain text"), // Failing that, parse plain text
|
||||
modname_parser() // Finally parse everything until tne next terminal as a new operator
|
||||
modname_parser() // Finally parse everything until tne next forbidden char
|
||||
))
|
||||
.labelled("name")
|
||||
}
|
||||
|
||||
/// Decide if a string can be an operator. Operators can include digits and text, just not at the
|
||||
/// start.
|
||||
pub fn is_op<T: AsRef<str>>(s: T) -> bool {
|
||||
/// Decide if a string can be an operator. Operators can include digits
|
||||
/// and text, just not at the start.
|
||||
pub fn is_op(s: impl AsRef<str>) -> bool {
|
||||
return match s.as_ref().chars().next() {
|
||||
Some(x) => !x.is_alphanumeric(),
|
||||
None => false
|
||||
|
||||
@@ -67,7 +67,7 @@ fn pow_uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
|
||||
/// parse an uint from a base determined by its prefix or lack thereof
|
||||
///
|
||||
/// Not to be convused with [uint_parser] which is a component of it.
|
||||
/// Not to be confused with [uint_parser] which is a component of it.
|
||||
pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
choice((
|
||||
just("0b").ignore_then(pow_uint_parser(2)),
|
||||
|
||||
@@ -1,75 +1,58 @@
|
||||
use std::{ops::Range, fmt::Debug};
|
||||
use std::fmt::Debug;
|
||||
|
||||
use chumsky::{prelude::{Simple, end}, Stream, Parser};
|
||||
use itertools::Itertools;
|
||||
use lasso::Spur;
|
||||
use chumsky::{prelude::*, Parser};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::{ast::Rule, parse::{lexer::LexedText, sourcefile::split_lines}, representations::sourcefile::FileEntry};
|
||||
use crate::representations::sourcefile::{FileEntry};
|
||||
use crate::parse::sourcefile::split_lines;
|
||||
|
||||
use super::{Lexeme, lexer, line_parser, LexerEntry};
|
||||
use super::context::Context;
|
||||
use super::{lexer, line_parser, Entry};
|
||||
|
||||
|
||||
#[derive(Error, Debug, Clone)]
|
||||
pub enum ParseError {
|
||||
#[error("Could not tokenize {0:?}")]
|
||||
Lex(Vec<Simple<char>>),
|
||||
#[error("Could not parse {0:#?}")]
|
||||
Ast(Vec<Simple<Lexeme>>)
|
||||
#[error("Could not parse {:?} on line {}", .0.first().unwrap().1.span(), .0.first().unwrap().0)]
|
||||
Ast(Vec<(usize, Simple<Entry>)>)
|
||||
}
|
||||
|
||||
pub fn parse<'a, Op, F>(
|
||||
ops: &[Op], data: &str, intern: &F
|
||||
) -> Result<Vec<FileEntry>, ParseError>
|
||||
where
|
||||
Op: 'a + AsRef<str> + Clone,
|
||||
F: Fn(&str) -> Spur
|
||||
/// All the data required for parsing
|
||||
|
||||
|
||||
/// Parse a string of code into a collection of module elements;
|
||||
/// imports, exports, comments, declarations, etc.
|
||||
///
|
||||
/// Notice that because the lexer splits operators based on the provided
|
||||
/// list, the output will only be correct if operator list already
|
||||
/// contains all operators defined or imported by this module.
|
||||
pub fn parse<'a>(data: &str, ctx: impl Context)
|
||||
-> Result<Vec<FileEntry>, ParseError>
|
||||
{
|
||||
let lexie = lexer(ops);
|
||||
let token_batchv = split_lines(data).map(|line| {
|
||||
lexie.parse(line).map_err(ParseError::Lex)
|
||||
}).collect::<Result<Vec<_>, _>>()?;
|
||||
println!("Lexed:\n{:?}", LexedText(token_batchv.clone()));
|
||||
let parsr = line_parser(intern).then_ignore(end());
|
||||
let (parsed_lines, errors_per_line) = token_batchv.into_iter().filter(|v| {
|
||||
!v.is_empty()
|
||||
}).map(|v| {
|
||||
// Find the first invalid position for Stream::for_iter
|
||||
let LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone();
|
||||
// Stream expects tuples, lexer outputs structs
|
||||
let tuples = v.into_iter().map_into::<(Lexeme, Range<usize>)>();
|
||||
parsr.parse(Stream::from_iter(end..end+1, tuples))
|
||||
// ^^^^^^^^^^
|
||||
// I haven't the foggiest idea why this is needed, parsers are supposed to be lazy so the
|
||||
// end of input should make little difference
|
||||
}).map(|res| match res {
|
||||
Ok(r) => (Some(r), vec![]),
|
||||
Err(e) => (None, e)
|
||||
}).unzip::<_, _, Vec<_>, Vec<_>>();
|
||||
// TODO: wrap `i`, `ops` and `prefix` in a parsing context
|
||||
let lexie = lexer(ctx.clone());
|
||||
let token_batchv = lexie.parse(data).map_err(ParseError::Lex)?;
|
||||
// println!("Lexed:\n{}", LexedText(token_batchv.clone()).bundle(ctx.interner()));
|
||||
// println!("Lexed:\n{:?}", token_batchv.clone());
|
||||
let parsr = line_parser(ctx).then_ignore(end());
|
||||
let (parsed_lines, errors_per_line) = split_lines(&token_batchv)
|
||||
.enumerate()
|
||||
.map(|(i, entv)| (i,
|
||||
entv.iter()
|
||||
.filter(|e| !e.is_filler())
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
))
|
||||
.filter(|(_, l)| l.len() > 0)
|
||||
.map(|(i, l)| (i, parsr.parse(l)))
|
||||
.map(|(i, res)| match res {
|
||||
Ok(r) => (Some(r), (i, vec![])),
|
||||
Err(e) => (None, (i, e))
|
||||
}).unzip::<_, _, Vec<_>, Vec<_>>();
|
||||
let total_err = errors_per_line.into_iter()
|
||||
.flat_map(Vec::into_iter)
|
||||
.flat_map(|(i, v)| v.into_iter().map(move |e| (i, e)))
|
||||
.collect::<Vec<_>>();
|
||||
if !total_err.is_empty() { Err(ParseError::Ast(total_err)) }
|
||||
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
|
||||
}
|
||||
|
||||
pub fn reparse<'a, Op, F>(
|
||||
ops: &[Op], data: &str, pre: &[FileEntry], intern: &F
|
||||
)
|
||||
-> Result<Vec<FileEntry>, ParseError>
|
||||
where
|
||||
Op: 'a + AsRef<str> + Clone,
|
||||
F: Fn(&str) -> Spur
|
||||
{
|
||||
let result = parse(ops, data, intern)?;
|
||||
Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| {
|
||||
if let FileEntry::Rule(Rule{source, ..}, _) = &mut output {
|
||||
if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor {
|
||||
*source = s2.clone()
|
||||
} else {
|
||||
panic!("Preparse and reparse received different row types!")
|
||||
}
|
||||
}
|
||||
output
|
||||
}).collect())
|
||||
}
|
||||
|
||||
30
src/parse/placeholder.rs
Normal file
30
src/parse/placeholder.rs
Normal file
@@ -0,0 +1,30 @@
|
||||
use chumsky::{Parser, prelude::*};
|
||||
|
||||
use crate::ast::{Placeholder, PHClass};
|
||||
|
||||
use super::{number::int_parser, context::Context};
|
||||
|
||||
pub fn placeholder_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<char, Placeholder, Error = Simple<char>> + 'a
|
||||
{
|
||||
choice((
|
||||
just("...").to(Some(true)),
|
||||
just("..").to(Some(false)),
|
||||
empty().to(None)
|
||||
))
|
||||
.then(just("$").ignore_then(text::ident()))
|
||||
.then(just(":").ignore_then(int_parser()).or_not())
|
||||
.try_map(move |((vec_nonzero, name), vec_prio), span| {
|
||||
let name = ctx.interner().i(&name);
|
||||
if let Some(nonzero) = vec_nonzero {
|
||||
let prio = vec_prio.unwrap_or_default();
|
||||
Ok(Placeholder { name, class: PHClass::Vec { nonzero, prio } })
|
||||
} else {
|
||||
if vec_prio.is_some() {
|
||||
Err(Simple::custom(span, "Scalar placeholders have no priority"))
|
||||
} else {
|
||||
Ok(Placeholder { name, class: PHClass::Scalar })
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
@@ -1,81 +1,139 @@
|
||||
use std::iter;
|
||||
use std::rc::Rc;
|
||||
|
||||
use crate::representations::sourcefile::FileEntry;
|
||||
use crate::enum_parser;
|
||||
use crate::ast::{Expr, Rule};
|
||||
use crate::representations::location::Location;
|
||||
use crate::representations::sourcefile::{FileEntry, Member};
|
||||
use crate::enum_filter;
|
||||
use crate::ast::{Rule, Constant, Expr, Clause};
|
||||
use crate::interner::Token;
|
||||
|
||||
use super::expression::{xpr_parser, ns_name_parser};
|
||||
use super::Entry;
|
||||
use super::context::Context;
|
||||
use super::expression::xpr_parser;
|
||||
use super::import::import_parser;
|
||||
use super::lexer::Lexeme;
|
||||
use chumsky::{Parser, prelude::*};
|
||||
use lasso::Spur;
|
||||
use ordered_float::NotNan;
|
||||
use super::lexer::{Lexeme, filter_map_lex};
|
||||
|
||||
fn rule_parser<'a, F>(intern: &'a F) -> impl Parser<Lexeme, (
|
||||
Vec<Expr>, NotNan<f64>, Vec<Expr>
|
||||
), Error = Simple<Lexeme>> + 'a
|
||||
where F: Fn(&str) -> Spur + 'a {
|
||||
xpr_parser(intern).repeated()
|
||||
.then(enum_parser!(Lexeme::Rule))
|
||||
.then(xpr_parser(intern).repeated())
|
||||
.map(|((a, b), c)| (a, b, c))
|
||||
.labelled("Rule")
|
||||
use chumsky::{Parser, prelude::*};
|
||||
use itertools::Itertools;
|
||||
|
||||
fn rule_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, Rule, Error = Simple<Entry>> + 'a
|
||||
{
|
||||
xpr_parser(ctx.clone()).repeated().at_least(1)
|
||||
.then(filter_map_lex(enum_filter!(Lexeme::Rule)))
|
||||
.then(xpr_parser(ctx).repeated().at_least(1))
|
||||
.map(|((s, (prio, _)), t)| Rule{
|
||||
source: Rc::new(s),
|
||||
prio,
|
||||
target: Rc::new(t)
|
||||
}).labelled("Rule")
|
||||
}
|
||||
|
||||
pub fn line_parser<'a, F>(intern: &'a F)
|
||||
-> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> + 'a
|
||||
where F: Fn(&str) -> Spur + 'a {
|
||||
fn const_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, Constant, Error = Simple<Entry>> + 'a
|
||||
{
|
||||
filter_map_lex(enum_filter!(Lexeme::Name))
|
||||
.then_ignore(Lexeme::Const.parser())
|
||||
.then(xpr_parser(ctx.clone()).repeated().at_least(1))
|
||||
.map(move |((name, _), value)| Constant{
|
||||
name,
|
||||
value: if let Ok(ex) = value.iter().exactly_one() { ex.clone() }
|
||||
else {
|
||||
let start = value.first().expect("value cannot be empty")
|
||||
.location.range().expect("all locations in parsed source are known")
|
||||
.start;
|
||||
let end = value.last().expect("asserted right above")
|
||||
.location.range().expect("all locations in parsed source are known")
|
||||
.end;
|
||||
Expr{
|
||||
location: Location::Range { file: ctx.file(), range: start..end },
|
||||
value: Clause::S('(', Rc::new(value))
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn collect_errors<T, E: chumsky::Error<T>>(e: Vec<E>) -> E {
|
||||
e.into_iter()
|
||||
.reduce(chumsky::Error::merge)
|
||||
.expect("Error list must be non_enmpty")
|
||||
}
|
||||
|
||||
fn namespace_parser<'a>(
|
||||
line: impl Parser<Entry, FileEntry, Error = Simple<Entry>> + 'a,
|
||||
) -> impl Parser<Entry, (Token<String>, Vec<FileEntry>), Error = Simple<Entry>> + 'a {
|
||||
Lexeme::Namespace.parser()
|
||||
.ignore_then(filter_map_lex(enum_filter!(Lexeme::Name)))
|
||||
.then(
|
||||
any().repeated().delimited_by(
|
||||
Lexeme::LP('{').parser(),
|
||||
Lexeme::RP('{').parser()
|
||||
).try_map(move |body, _| {
|
||||
split_lines(&body)
|
||||
.map(|l| line.parse(l))
|
||||
.collect::<Result<Vec<_>,_>>()
|
||||
.map_err(collect_errors)
|
||||
})
|
||||
).map(move |((name, _), body)| {
|
||||
(name, body)
|
||||
})
|
||||
}
|
||||
|
||||
fn member_parser<'a>(
|
||||
line: impl Parser<Entry, FileEntry, Error = Simple<Entry>> + 'a,
|
||||
ctx: impl Context + 'a
|
||||
) -> impl Parser<Entry, Member, Error = Simple<Entry>> + 'a {
|
||||
choice((
|
||||
// In case the usercode wants to parse doc
|
||||
enum_parser!(Lexeme >> FileEntry; Comment),
|
||||
just(Lexeme::Import)
|
||||
.ignore_then(import_parser(intern).map(FileEntry::Import))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).or_not()),
|
||||
just(Lexeme::Export).map_err_with_span(|e, s| {
|
||||
println!("{:?} could not yield an export", s); e
|
||||
}).ignore_then(
|
||||
just(Lexeme::NS).ignore_then(
|
||||
ns_name_parser(intern).map(Rc::new)
|
||||
.separated_by(just(Lexeme::name(",")))
|
||||
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
|
||||
).map(FileEntry::Export)
|
||||
.or(rule_parser(intern).map(|(source, prio, target)| {
|
||||
FileEntry::Rule(Rule {
|
||||
source: Rc::new(source),
|
||||
prio,
|
||||
target: Rc::new(target)
|
||||
}, true)
|
||||
}))
|
||||
),
|
||||
// This could match almost anything so it has to go last
|
||||
rule_parser(intern).map(|(source, prio, target)| {
|
||||
FileEntry::Rule(Rule{
|
||||
source: Rc::new(source),
|
||||
prio,
|
||||
target: Rc::new(target)
|
||||
}, false)
|
||||
}),
|
||||
namespace_parser(line)
|
||||
.map(|(name, body)| Member::Namespace(name, body)),
|
||||
rule_parser(ctx.clone()).map(Member::Rule),
|
||||
const_parser(ctx).map(Member::Constant),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn split_lines(data: &str) -> impl Iterator<Item = &str> {
|
||||
let mut source = data.char_indices();
|
||||
pub fn line_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, FileEntry, Error = Simple<Entry>> + 'a
|
||||
{
|
||||
recursive(|line: Recursive<Entry, FileEntry, Simple<Entry>>| {
|
||||
choice((
|
||||
// In case the usercode wants to parse doc
|
||||
filter_map_lex(enum_filter!(Lexeme >> FileEntry; Comment)).map(|(ent, _)| ent),
|
||||
// plain old imports
|
||||
Lexeme::Import.parser()
|
||||
.ignore_then(import_parser(ctx.clone()).map(FileEntry::Import)),
|
||||
Lexeme::Export.parser().ignore_then(choice((
|
||||
// token collection
|
||||
Lexeme::NS.parser().ignore_then(
|
||||
filter_map_lex(enum_filter!(Lexeme::Name)).map(|(e, _)| e)
|
||||
.separated_by(Lexeme::Name(ctx.interner().i(",")).parser())
|
||||
.delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser())
|
||||
).map(FileEntry::Export),
|
||||
// public declaration
|
||||
member_parser(line.clone(), ctx.clone()).map(FileEntry::Exported)
|
||||
))),
|
||||
// This could match almost anything so it has to go last
|
||||
member_parser(line, ctx).map(FileEntry::Internal),
|
||||
))
|
||||
})
|
||||
}
|
||||
|
||||
pub fn split_lines(data: &[Entry]) -> impl Iterator<Item = &[Entry]> {
|
||||
let mut source = data.iter().enumerate();
|
||||
let mut last_slice = 0;
|
||||
iter::from_fn(move || {
|
||||
let mut paren_count = 0;
|
||||
while let Some((i, c)) = source.next() {
|
||||
match c {
|
||||
'(' | '{' | '[' => paren_count += 1,
|
||||
')' | '}' | ']' => paren_count -= 1,
|
||||
'\n' if paren_count == 0 => {
|
||||
while let Some((i, Entry{ lexeme, .. })) = source.next() {
|
||||
match lexeme {
|
||||
Lexeme::LP(_) => paren_count += 1,
|
||||
Lexeme::RP(_) => paren_count -= 1,
|
||||
Lexeme::BR if paren_count == 0 => {
|
||||
let begin = last_slice;
|
||||
last_slice = i;
|
||||
last_slice = i+1;
|
||||
return Some(&data[begin..i]);
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
}
|
||||
}).filter(|s| s.len() > 0)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user