forked from Orchid/orchid
Preparation for sharing
- rustfmt - clippy - comments - README
This commit is contained in:
@@ -1,13 +1,15 @@
|
||||
pub use chumsky::{self, prelude::*, Parser};
|
||||
pub use chumsky::prelude::*;
|
||||
pub use chumsky::{self, Parser};
|
||||
|
||||
use super::decls::SimpleParser;
|
||||
|
||||
/// Parses Lua-style comments
|
||||
pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
||||
pub fn comment_parser() -> impl SimpleParser<char, String> {
|
||||
choice((
|
||||
just("--[").ignore_then(take_until(
|
||||
just("]--").ignored()
|
||||
)),
|
||||
just("--").ignore_then(take_until(
|
||||
just("\n").rewind().ignored().or(end())
|
||||
))
|
||||
)).map(|(vc, ())| vc).collect().labelled("comment")
|
||||
just("--[").ignore_then(take_until(just("]--").ignored())),
|
||||
just("--").ignore_then(take_until(just("\n").rewind().ignored().or(end()))),
|
||||
))
|
||||
.map(|(vc, ())| vc)
|
||||
.collect()
|
||||
.labelled("comment")
|
||||
}
|
||||
|
||||
@@ -3,46 +3,53 @@ use std::rc::Rc;
|
||||
use crate::interner::Interner;
|
||||
|
||||
/// Trait enclosing all context features
|
||||
///
|
||||
///
|
||||
/// Hiding type parameters in associated types allows for simpler
|
||||
/// parser definitions
|
||||
pub trait Context: Clone {
|
||||
type Op: AsRef<str>;
|
||||
|
||||
fn ops<'a>(&'a self) -> &'a [Self::Op];
|
||||
fn ops(&self) -> &[Self::Op];
|
||||
fn file(&self) -> Rc<Vec<String>>;
|
||||
fn interner<'a>(&'a self) -> &'a Interner;
|
||||
fn interner(&self) -> &Interner;
|
||||
}
|
||||
|
||||
/// Struct implementing context
|
||||
///
|
||||
///
|
||||
/// Hiding type parameters in associated types allows for simpler
|
||||
/// parser definitions
|
||||
pub struct ParsingContext<'a, Op> {
|
||||
pub ops: &'a [Op],
|
||||
pub interner: &'a Interner,
|
||||
pub file: Rc<Vec<String>>
|
||||
pub file: Rc<Vec<String>>,
|
||||
}
|
||||
|
||||
impl<'a, Op> ParsingContext<'a, Op> {
|
||||
pub fn new(ops: &'a [Op], interner: &'a Interner, file: Rc<Vec<String>>)
|
||||
-> Self { Self { ops, interner, file } }
|
||||
pub fn new(
|
||||
ops: &'a [Op],
|
||||
interner: &'a Interner,
|
||||
file: Rc<Vec<String>>,
|
||||
) -> Self {
|
||||
Self { ops, interner, file }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, Op> Clone for ParsingContext<'a, Op> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
ops: self.ops,
|
||||
interner: self.interner,
|
||||
file: self.file.clone()
|
||||
}
|
||||
Self { ops: self.ops, interner: self.interner, file: self.file.clone() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<Op: AsRef<str>> Context for ParsingContext<'_, Op> {
|
||||
type Op = Op;
|
||||
|
||||
fn interner<'a>(&'a self) -> &'a Interner { self.interner }
|
||||
fn file(&self) -> Rc<Vec<String>> {self.file.clone()}
|
||||
fn ops<'a>(&'a self) -> &'a [Self::Op] { self.ops }
|
||||
}
|
||||
fn interner(&self) -> &Interner {
|
||||
self.interner
|
||||
}
|
||||
fn file(&self) -> Rc<Vec<String>> {
|
||||
self.file.clone()
|
||||
}
|
||||
fn ops(&self) -> &[Self::Op] {
|
||||
self.ops
|
||||
}
|
||||
}
|
||||
|
||||
14
src/parse/decls.rs
Normal file
14
src/parse/decls.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
use std::hash::Hash;
|
||||
|
||||
use chumsky::prelude::Simple;
|
||||
use chumsky::recursive::Recursive;
|
||||
use chumsky::{BoxedParser, Parser};
|
||||
|
||||
/// Wrapper around [Parser] with [Simple] error to avoid repeating the input
|
||||
pub trait SimpleParser<I: Eq + Hash + Clone, O> =
|
||||
Parser<I, O, Error = Simple<I>>;
|
||||
/// Boxed version of [SimpleParser]
|
||||
pub type BoxedSimpleParser<'a, I, O> = BoxedParser<'a, I, O, Simple<I>>;
|
||||
/// [Recursive] specialization of [SimpleParser] to parameterize calls to
|
||||
/// [chumsky::recursive::recursive]
|
||||
pub type SimpleRecursive<'a, I, O> = Recursive<'a, I, O, Simple<I>>;
|
||||
@@ -1,8 +1,12 @@
|
||||
/// Produces filter_mapping functions for enum types:
|
||||
/// ```rs
|
||||
/// enum_parser!(Foo::Bar | "Some error!") // Accepts Foo::Bar(T) into T
|
||||
/// enum_parser!(Foo::Bar) // same as above but with the default error "Expected Foo::Bar"
|
||||
/// enum_parser!(Foo >> Quz; Bar, Baz) // Parses Foo::Bar(T) into Quz::Bar(T) and Foo::Baz(U) into Quz::Baz(U)
|
||||
/// enum_parser!(Foo::Bar | "Some error!")
|
||||
/// // Foo::Bar(T) into T
|
||||
/// enum_parser!(Foo::Bar)
|
||||
/// // same as above but with the default error "Expected Foo::Bar"
|
||||
/// enum_parser!(Foo >> Quz; Bar, Baz)
|
||||
/// // Foo::Bar(T) into Quz::Bar(T)
|
||||
/// // Foo::Baz(U) into Quz::Baz(U)
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! enum_filter {
|
||||
@@ -43,4 +47,4 @@ macro_rules! enum_filter {
|
||||
($p:path) => {
|
||||
enum_filter!($p | {concat!("Expected ", stringify!($p))})
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,107 +1,110 @@
|
||||
use std::ops::Range;
|
||||
use std::rc::Rc;
|
||||
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
use crate::enum_filter;
|
||||
use crate::representations::Primitive;
|
||||
use crate::representations::ast::{Clause, Expr};
|
||||
use crate::representations::location::Location;
|
||||
use crate::interner::Token;
|
||||
use chumsky::prelude::*;
|
||||
use chumsky::{self, Parser};
|
||||
|
||||
use super::context::Context;
|
||||
use super::lexer::{Lexeme, Entry, filter_map_lex};
|
||||
use super::decls::SimpleParser;
|
||||
use super::lexer::{filter_map_lex, Entry, Lexeme};
|
||||
use crate::enum_filter;
|
||||
use crate::interner::Sym;
|
||||
use crate::representations::ast::{Clause, Expr};
|
||||
use crate::representations::location::Location;
|
||||
use crate::representations::Primitive;
|
||||
|
||||
/// Parses any number of expr wrapped in (), [] or {}
|
||||
fn sexpr_parser(
|
||||
expr: impl Parser<Entry, Expr, Error = Simple<Entry>> + Clone
|
||||
) -> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone {
|
||||
expr: impl SimpleParser<Entry, Expr> + Clone,
|
||||
) -> impl SimpleParser<Entry, (Clause, Range<usize>)> + Clone {
|
||||
let body = expr.repeated();
|
||||
choice((
|
||||
Lexeme::LP('(').parser().then(body.clone())
|
||||
.then(Lexeme::RP('(').parser()),
|
||||
Lexeme::LP('[').parser().then(body.clone())
|
||||
.then(Lexeme::RP('[').parser()),
|
||||
Lexeme::LP('{').parser().then(body.clone())
|
||||
.then(Lexeme::RP('{').parser()),
|
||||
)).map(|((lp, body), rp)| {
|
||||
let Entry{lexeme, range: Range{start, ..}} = lp;
|
||||
Lexeme::LP('(').parser().then(body.clone()).then(Lexeme::RP('(').parser()),
|
||||
Lexeme::LP('[').parser().then(body.clone()).then(Lexeme::RP('[').parser()),
|
||||
Lexeme::LP('{').parser().then(body).then(Lexeme::RP('{').parser()),
|
||||
))
|
||||
.map(|((lp, body), rp)| {
|
||||
let Entry { lexeme, range: Range { start, .. } } = lp;
|
||||
let end = rp.range.end;
|
||||
let char = if let Lexeme::LP(c) = lexeme {c}
|
||||
else {unreachable!("The parser only matches Lexeme::LP")};
|
||||
let char = if let Lexeme::LP(c) = lexeme {
|
||||
c
|
||||
} else {
|
||||
unreachable!("The parser only matches Lexeme::LP")
|
||||
};
|
||||
(Clause::S(char, Rc::new(body)), start..end)
|
||||
}).labelled("S-expression")
|
||||
})
|
||||
.labelled("S-expression")
|
||||
}
|
||||
|
||||
/// Parses `\name.body` or `\name:type.body` where name is any valid name
|
||||
/// and type and body are both expressions. Comments are allowed
|
||||
/// and ignored everywhere in between the tokens
|
||||
fn lambda_parser<'a>(
|
||||
expr: impl Parser<Entry, Expr, Error = Simple<Entry>> + Clone + 'a,
|
||||
ctx: impl Context + 'a
|
||||
) -> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone + 'a {
|
||||
Lexeme::BS.parser()
|
||||
.ignore_then(expr.clone())
|
||||
.then_ignore(Lexeme::Name(ctx.interner().i(".")).parser())
|
||||
.then(expr.repeated().at_least(1))
|
||||
.map_with_span(move |(arg, body), span| {
|
||||
(Clause::Lambda(Rc::new(arg), Rc::new(body)), span)
|
||||
}).labelled("Lambda")
|
||||
expr: impl SimpleParser<Entry, Expr> + Clone + 'a,
|
||||
ctx: impl Context + 'a,
|
||||
) -> impl SimpleParser<Entry, (Clause, Range<usize>)> + Clone + 'a {
|
||||
Lexeme::BS
|
||||
.parser()
|
||||
.ignore_then(expr.clone())
|
||||
.then_ignore(Lexeme::Name(ctx.interner().i(".")).parser())
|
||||
.then(expr.repeated().at_least(1))
|
||||
.map_with_span(move |(arg, body), span| {
|
||||
(Clause::Lambda(Rc::new(arg), Rc::new(body)), span)
|
||||
})
|
||||
.labelled("Lambda")
|
||||
}
|
||||
|
||||
/// Parses a sequence of names separated by :: <br/>
|
||||
/// Comments and line breaks are allowed and ignored in between
|
||||
pub fn ns_name_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, (Token<Vec<Token<String>>>, Range<usize>), Error = Simple<Entry>> + Clone + 'a
|
||||
{
|
||||
pub fn ns_name_parser<'a>(
|
||||
ctx: impl Context + 'a,
|
||||
) -> impl SimpleParser<Entry, (Sym, Range<usize>)> + Clone + 'a {
|
||||
filter_map_lex(enum_filter!(Lexeme::Name))
|
||||
.separated_by(Lexeme::NS.parser()).at_least(1)
|
||||
.separated_by(Lexeme::NS.parser())
|
||||
.at_least(1)
|
||||
.map(move |elements| {
|
||||
let start = elements.first().expect("can never be empty").1.start;
|
||||
let end = elements.last().expect("can never be empty").1.end;
|
||||
let tokens =
|
||||
/*ctx.prefix().iter().copied().chain*/(
|
||||
elements.iter().map(|(t, _)| *t)
|
||||
).collect::<Vec<_>>();
|
||||
let tokens = (elements.iter().map(|(t, _)| *t)).collect::<Vec<_>>();
|
||||
(ctx.interner().i(&tokens), start..end)
|
||||
}).labelled("Namespaced name")
|
||||
})
|
||||
.labelled("Namespaced name")
|
||||
}
|
||||
|
||||
pub fn namelike_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone + 'a
|
||||
{
|
||||
pub fn namelike_parser<'a>(
|
||||
ctx: impl Context + 'a,
|
||||
) -> impl SimpleParser<Entry, (Clause, Range<usize>)> + Clone + 'a {
|
||||
choice((
|
||||
filter_map_lex(enum_filter!(Lexeme::PH))
|
||||
.map(|(ph, range)| (Clause::Placeh(ph), range)),
|
||||
ns_name_parser(ctx)
|
||||
.map(|(token, range)| (Clause::Name(token), range)),
|
||||
ns_name_parser(ctx).map(|(token, range)| (Clause::Name(token), range)),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn clause_parser<'a>(
|
||||
expr: impl Parser<Entry, Expr, Error = Simple<Entry>> + Clone + 'a,
|
||||
ctx: impl Context + 'a
|
||||
) -> impl Parser<Entry, (Clause, Range<usize>), Error = Simple<Entry>> + Clone + 'a {
|
||||
expr: impl SimpleParser<Entry, Expr> + Clone + 'a,
|
||||
ctx: impl Context + 'a,
|
||||
) -> impl SimpleParser<Entry, (Clause, Range<usize>)> + Clone + 'a {
|
||||
choice((
|
||||
filter_map_lex(enum_filter!(Lexeme >> Primitive; Literal))
|
||||
.map(|(p, s)| (Clause::P(p), s)).labelled("Literal"),
|
||||
.map(|(p, s)| (Clause::P(p), s))
|
||||
.labelled("Literal"),
|
||||
sexpr_parser(expr.clone()),
|
||||
lambda_parser(expr.clone(), ctx.clone()),
|
||||
lambda_parser(expr, ctx.clone()),
|
||||
namelike_parser(ctx),
|
||||
)).labelled("Clause")
|
||||
))
|
||||
.labelled("Clause")
|
||||
}
|
||||
|
||||
/// Parse an expression
|
||||
pub fn xpr_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, Expr, Error = Simple<Entry>> + 'a
|
||||
{
|
||||
pub fn xpr_parser<'a>(
|
||||
ctx: impl Context + 'a,
|
||||
) -> impl SimpleParser<Entry, Expr> + 'a {
|
||||
recursive(move |expr| {
|
||||
clause_parser(expr, ctx.clone())
|
||||
.map(move |(value, range)| {
|
||||
Expr{
|
||||
value: value.clone(),
|
||||
location: Location::Range { file: ctx.file(), range }
|
||||
}
|
||||
clause_parser(expr, ctx.clone()).map(move |(value, range)| Expr {
|
||||
value,
|
||||
location: Location::Range { file: ctx.file(), range },
|
||||
})
|
||||
}).labelled("Expression")
|
||||
}
|
||||
})
|
||||
.labelled("Expression")
|
||||
}
|
||||
|
||||
@@ -1,58 +1,59 @@
|
||||
use std::fmt::Debug;
|
||||
|
||||
use chumsky::{prelude::*, Parser};
|
||||
use chumsky::prelude::*;
|
||||
use chumsky::Parser;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::representations::sourcefile::{FileEntry};
|
||||
use crate::parse::sourcefile::split_lines;
|
||||
|
||||
use super::context::Context;
|
||||
use super::{lexer, line_parser, Entry};
|
||||
|
||||
use crate::parse::sourcefile::split_lines;
|
||||
use crate::representations::sourcefile::FileEntry;
|
||||
|
||||
#[derive(Error, Debug, Clone)]
|
||||
pub enum ParseError {
|
||||
#[error("Could not tokenize {0:?}")]
|
||||
Lex(Vec<Simple<char>>),
|
||||
#[error("Could not parse {:?} on line {}", .0.first().unwrap().1.span(), .0.first().unwrap().0)]
|
||||
Ast(Vec<(usize, Simple<Entry>)>)
|
||||
#[error(
|
||||
"Could not parse {:?} on line {}",
|
||||
.0.first().unwrap().1.span(),
|
||||
.0.first().unwrap().0
|
||||
)]
|
||||
Ast(Vec<(usize, Simple<Entry>)>),
|
||||
}
|
||||
|
||||
/// All the data required for parsing
|
||||
|
||||
|
||||
/// Parse a string of code into a collection of module elements;
|
||||
/// imports, exports, comments, declarations, etc.
|
||||
///
|
||||
///
|
||||
/// Notice that because the lexer splits operators based on the provided
|
||||
/// list, the output will only be correct if operator list already
|
||||
/// contains all operators defined or imported by this module.
|
||||
pub fn parse<'a>(data: &str, ctx: impl Context)
|
||||
-> Result<Vec<FileEntry>, ParseError>
|
||||
{
|
||||
pub fn parse(
|
||||
data: &str,
|
||||
ctx: impl Context,
|
||||
) -> Result<Vec<FileEntry>, ParseError> {
|
||||
// TODO: wrap `i`, `ops` and `prefix` in a parsing context
|
||||
let lexie = lexer(ctx.clone());
|
||||
let token_batchv = lexie.parse(data).map_err(ParseError::Lex)?;
|
||||
// println!("Lexed:\n{}", LexedText(token_batchv.clone()).bundle(ctx.interner()));
|
||||
// println!("Lexed:\n{:?}", token_batchv.clone());
|
||||
let parsr = line_parser(ctx).then_ignore(end());
|
||||
let (parsed_lines, errors_per_line) = split_lines(&token_batchv)
|
||||
.enumerate()
|
||||
.map(|(i, entv)| (i,
|
||||
entv.iter()
|
||||
.filter(|e| !e.is_filler())
|
||||
.cloned()
|
||||
.collect::<Vec<_>>()
|
||||
))
|
||||
.filter(|(_, l)| l.len() > 0)
|
||||
.map(|(i, entv)| {
|
||||
(i, entv.iter().filter(|e| !e.is_filler()).cloned().collect::<Vec<_>>())
|
||||
})
|
||||
.filter(|(_, l)| !l.is_empty())
|
||||
.map(|(i, l)| (i, parsr.parse(l)))
|
||||
.map(|(i, res)| match res {
|
||||
Ok(r) => (Some(r), (i, vec![])),
|
||||
Err(e) => (None, (i, e))
|
||||
}).unzip::<_, _, Vec<_>, Vec<_>>();
|
||||
let total_err = errors_per_line.into_iter()
|
||||
Err(e) => (None, (i, e)),
|
||||
})
|
||||
.unzip::<_, _, Vec<_>, Vec<_>>();
|
||||
let total_err = errors_per_line
|
||||
.into_iter()
|
||||
.flat_map(|(i, v)| v.into_iter().map(move |e| (i, e)))
|
||||
.collect::<Vec<_>>();
|
||||
if !total_err.is_empty() { Err(ParseError::Ast(total_err)) }
|
||||
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
|
||||
if !total_err.is_empty() {
|
||||
Err(ParseError::Ast(total_err))
|
||||
} else {
|
||||
Ok(parsed_lines.into_iter().map(Option::unwrap).collect())
|
||||
}
|
||||
}
|
||||
@@ -1,16 +1,20 @@
|
||||
use chumsky::{Parser, prelude::*};
|
||||
use chumsky::prelude::*;
|
||||
use chumsky::Parser;
|
||||
use itertools::Itertools;
|
||||
|
||||
use super::context::Context;
|
||||
use super::decls::{SimpleParser, SimpleRecursive};
|
||||
use super::lexer::{filter_map_lex, Lexeme};
|
||||
use super::Entry;
|
||||
use crate::interner::Tok;
|
||||
use crate::representations::sourcefile::Import;
|
||||
use crate::utils::iter::{box_once, box_flatten, into_boxed_iter, BoxedIterIter};
|
||||
use crate::interner::Token;
|
||||
use crate::utils::iter::{
|
||||
box_flatten, box_once, into_boxed_iter, BoxedIterIter,
|
||||
};
|
||||
use crate::{box_chain, enum_filter};
|
||||
|
||||
use super::Entry;
|
||||
use super::context::Context;
|
||||
use super::lexer::{Lexeme, filter_map_lex};
|
||||
|
||||
/// initialize a BoxedIter<BoxedIter<String>> with a single element.
|
||||
fn init_table(name: Token<String>) -> BoxedIterIter<'static, Token<String>> {
|
||||
fn init_table(name: Tok<String>) -> BoxedIterIter<'static, Tok<String>> {
|
||||
// I'm not at all confident that this is a good approach.
|
||||
box_once(box_once(name))
|
||||
}
|
||||
@@ -21,56 +25,74 @@ fn init_table(name: Token<String>) -> BoxedIterIter<'static, Token<String>> {
|
||||
/// preferably contain crossplatform filename-legal characters but the
|
||||
/// symbols are explicitly allowed to go wild.
|
||||
/// There's a blacklist in [name]
|
||||
pub fn import_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, Vec<Import>, Error = Simple<Entry>> + 'a
|
||||
{
|
||||
pub fn import_parser<'a>(
|
||||
ctx: impl Context + 'a,
|
||||
) -> impl SimpleParser<Entry, Vec<Import>> + 'a {
|
||||
// TODO: this algorithm isn't cache friendly and copies a lot
|
||||
recursive({
|
||||
let ctx = ctx.clone();
|
||||
move |expr:Recursive<Entry, BoxedIterIter<Token<String>>, Simple<Entry>>| {
|
||||
filter_map_lex(enum_filter!(Lexeme::Name)).map(|(t, _)| t)
|
||||
.separated_by(Lexeme::NS.parser())
|
||||
.then(
|
||||
Lexeme::NS.parser()
|
||||
.ignore_then(
|
||||
choice((
|
||||
expr.clone()
|
||||
.separated_by(Lexeme::Name(ctx.interner().i(",")).parser())
|
||||
.delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser())
|
||||
.map(|v| box_flatten(v.into_iter()))
|
||||
.labelled("import group"),
|
||||
// Each expr returns a list of imports, flatten into common list
|
||||
Lexeme::Name(ctx.interner().i("*")).parser()
|
||||
.map(move |_| init_table(ctx.interner().i("*")))
|
||||
.labelled("wildcard import"), // Just a *, wrapped
|
||||
filter_map_lex(enum_filter!(Lexeme::Name))
|
||||
.map(|(t, _)| init_table(t))
|
||||
.labelled("import terminal") // Just a name, wrapped
|
||||
))
|
||||
).or_not()
|
||||
)
|
||||
.map(|(name, opt_post): (Vec<Token<String>>, Option<BoxedIterIter<Token<String>>>)|
|
||||
-> BoxedIterIter<Token<String>> {
|
||||
if let Some(post) = opt_post {
|
||||
Box::new(post.map(move |el| {
|
||||
box_chain!(name.clone().into_iter(), el)
|
||||
}))
|
||||
} else {
|
||||
box_once(into_boxed_iter(name))
|
||||
}
|
||||
})
|
||||
move |expr: SimpleRecursive<Entry, BoxedIterIter<Tok<String>>>| {
|
||||
filter_map_lex(enum_filter!(Lexeme::Name))
|
||||
.map(|(t, _)| t)
|
||||
.separated_by(Lexeme::NS.parser())
|
||||
.then(
|
||||
Lexeme::NS
|
||||
.parser()
|
||||
.ignore_then(choice((
|
||||
expr
|
||||
.clone()
|
||||
.separated_by(Lexeme::Name(ctx.interner().i(",")).parser())
|
||||
.delimited_by(
|
||||
Lexeme::LP('(').parser(),
|
||||
Lexeme::RP('(').parser(),
|
||||
)
|
||||
.map(|v| box_flatten(v.into_iter()))
|
||||
.labelled("import group"),
|
||||
// Each expr returns a list of imports, flatten into common list
|
||||
Lexeme::Name(ctx.interner().i("*"))
|
||||
.parser()
|
||||
.map(move |_| init_table(ctx.interner().i("*")))
|
||||
.labelled("wildcard import"), // Just a *, wrapped
|
||||
filter_map_lex(enum_filter!(Lexeme::Name))
|
||||
.map(|(t, _)| init_table(t))
|
||||
.labelled("import terminal"), // Just a name, wrapped
|
||||
)))
|
||||
.or_not(),
|
||||
)
|
||||
.map(
|
||||
|(name, opt_post): (
|
||||
Vec<Tok<String>>,
|
||||
Option<BoxedIterIter<Tok<String>>>,
|
||||
)|
|
||||
-> BoxedIterIter<Tok<String>> {
|
||||
if let Some(post) = opt_post {
|
||||
Box::new(
|
||||
post.map(move |el| box_chain!(name.clone().into_iter(), el)),
|
||||
)
|
||||
} else {
|
||||
box_once(into_boxed_iter(name))
|
||||
}
|
||||
},
|
||||
)
|
||||
}
|
||||
}).map(move |paths| {
|
||||
paths.filter_map(|namespaces| {
|
||||
let mut path = namespaces.collect_vec();
|
||||
let name = path.pop()?;
|
||||
Some(Import {
|
||||
path: ctx.interner().i(&path),
|
||||
name: {
|
||||
if name == ctx.interner().i("*") { None }
|
||||
else { Some(name) }
|
||||
}
|
||||
})
|
||||
.map(move |paths| {
|
||||
paths
|
||||
.filter_map(|namespaces| {
|
||||
let mut path = namespaces.collect_vec();
|
||||
let name = path.pop()?;
|
||||
Some(Import {
|
||||
path: ctx.interner().i(&path),
|
||||
name: {
|
||||
if name == ctx.interner().i("*") {
|
||||
None
|
||||
} else {
|
||||
Some(name)
|
||||
}
|
||||
},
|
||||
})
|
||||
})
|
||||
}).collect()
|
||||
}).labelled("import")
|
||||
.collect()
|
||||
})
|
||||
.labelled("import")
|
||||
}
|
||||
|
||||
@@ -1,31 +1,36 @@
|
||||
use std::fmt;
|
||||
use std::ops::Range;
|
||||
|
||||
use chumsky::prelude::*;
|
||||
use chumsky::text::keyword;
|
||||
use chumsky::{Parser, Span};
|
||||
use ordered_float::NotNan;
|
||||
use chumsky::{Parser, prelude::*, text::keyword, Span};
|
||||
|
||||
use crate::ast::{Placeholder, PHClass};
|
||||
use crate::representations::Literal;
|
||||
use crate::interner::{Token, InternedDisplay, Interner};
|
||||
|
||||
use super::context::Context;
|
||||
use super::placeholder;
|
||||
use super::{number, string, name, comment};
|
||||
use super::decls::SimpleParser;
|
||||
use super::{comment, name, number, placeholder, string};
|
||||
use crate::ast::{PHClass, Placeholder};
|
||||
use crate::interner::{InternedDisplay, Interner, Tok};
|
||||
use crate::representations::Literal;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct Entry{
|
||||
pub struct Entry {
|
||||
pub lexeme: Lexeme,
|
||||
pub range: Range<usize>
|
||||
pub range: Range<usize>,
|
||||
}
|
||||
impl Entry {
|
||||
pub fn is_filler(&self) -> bool {
|
||||
matches!(self.lexeme, Lexeme::Comment(_))
|
||||
|| matches!(self.lexeme, Lexeme::BR)
|
||||
|| matches!(self.lexeme, Lexeme::BR)
|
||||
}
|
||||
}
|
||||
|
||||
impl InternedDisplay for Entry {
|
||||
fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result {
|
||||
fn fmt_i(
|
||||
&self,
|
||||
f: &mut std::fmt::Formatter<'_>,
|
||||
i: &Interner,
|
||||
) -> std::fmt::Result {
|
||||
self.lexeme.fmt_i(f, i)
|
||||
}
|
||||
}
|
||||
@@ -40,21 +45,24 @@ impl Span for Entry {
|
||||
type Context = Lexeme;
|
||||
type Offset = usize;
|
||||
|
||||
fn context(&self) -> Self::Context {self.lexeme.clone()}
|
||||
fn start(&self) -> Self::Offset {self.range.start()}
|
||||
fn end(&self) -> Self::Offset {self.range.end()}
|
||||
fn context(&self) -> Self::Context {
|
||||
self.lexeme.clone()
|
||||
}
|
||||
fn start(&self) -> Self::Offset {
|
||||
self.range.start()
|
||||
}
|
||||
fn end(&self) -> Self::Offset {
|
||||
self.range.end()
|
||||
}
|
||||
fn new(context: Self::Context, range: Range<Self::Offset>) -> Self {
|
||||
Self{
|
||||
lexeme: context,
|
||||
range
|
||||
}
|
||||
Self { lexeme: context, range }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum Lexeme {
|
||||
Literal(Literal),
|
||||
Name(Token<String>),
|
||||
Name(Tok<String>),
|
||||
Rule(NotNan<f64>),
|
||||
/// Walrus operator (formerly shorthand macro)
|
||||
Const,
|
||||
@@ -74,11 +82,15 @@ pub enum Lexeme {
|
||||
Export,
|
||||
Import,
|
||||
Namespace,
|
||||
PH(Placeholder)
|
||||
PH(Placeholder),
|
||||
}
|
||||
|
||||
impl InternedDisplay for Lexeme {
|
||||
fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result {
|
||||
fn fmt_i(
|
||||
&self,
|
||||
f: &mut std::fmt::Formatter<'_>,
|
||||
i: &Interner,
|
||||
) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Literal(l) => write!(f, "{:?}", l),
|
||||
Self::Name(token) => write!(f, "{}", i.r(*token)),
|
||||
@@ -90,9 +102,9 @@ impl InternedDisplay for Lexeme {
|
||||
'(' => write!(f, ")"),
|
||||
'[' => write!(f, "]"),
|
||||
'{' => write!(f, "}}"),
|
||||
_ => f.debug_tuple("RP").field(l).finish()
|
||||
_ => f.debug_tuple("RP").field(l).finish(),
|
||||
},
|
||||
Self::BR => write!(f, "\n"),
|
||||
Self::BR => writeln!(f),
|
||||
Self::BS => write!(f, "\\"),
|
||||
Self::At => write!(f, "@"),
|
||||
Self::Type => write!(f, ":"),
|
||||
@@ -103,27 +115,30 @@ impl InternedDisplay for Lexeme {
|
||||
Self::PH(Placeholder { name, class }) => match *class {
|
||||
PHClass::Scalar => write!(f, "${}", i.r(*name)),
|
||||
PHClass::Vec { nonzero, prio } => {
|
||||
if nonzero {write!(f, "...")}
|
||||
else {write!(f, "..")}?;
|
||||
if nonzero {
|
||||
write!(f, "...")
|
||||
} else {
|
||||
write!(f, "..")
|
||||
}?;
|
||||
write!(f, "${}", i.r(*name))?;
|
||||
if prio != 0 {write!(f, ":{}", prio)?;};
|
||||
if prio != 0 {
|
||||
write!(f, ":{}", prio)?;
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Lexeme {
|
||||
pub fn rule(prio: impl Into<f64>) -> Self {
|
||||
Lexeme::Rule(
|
||||
NotNan::new(prio.into())
|
||||
.expect("Rule priority cannot be NaN")
|
||||
)
|
||||
Lexeme::Rule(NotNan::new(prio.into()).expect("Rule priority cannot be NaN"))
|
||||
}
|
||||
|
||||
pub fn parser<E: chumsky::Error<Entry>>(self)
|
||||
-> impl Parser<Entry, Entry, Error = E> + Clone {
|
||||
pub fn parser<E: chumsky::Error<Entry>>(
|
||||
self,
|
||||
) -> impl Parser<Entry, Entry, Error = E> + Clone {
|
||||
filter(move |ent: &Entry| ent.lexeme == self)
|
||||
}
|
||||
}
|
||||
@@ -141,16 +156,14 @@ impl InternedDisplay for LexedText {
|
||||
}
|
||||
}
|
||||
|
||||
fn paren_parser(lp: char, rp: char)
|
||||
-> impl Parser<char, Lexeme, Error=Simple<char>>
|
||||
{
|
||||
just(lp).to(Lexeme::LP(lp))
|
||||
.or(just(rp).to(Lexeme::RP(lp)))
|
||||
fn paren_parser(lp: char, rp: char) -> impl SimpleParser<char, Lexeme> {
|
||||
just(lp).to(Lexeme::LP(lp)).or(just(rp).to(Lexeme::RP(lp)))
|
||||
}
|
||||
|
||||
pub fn literal_parser() -> impl Parser<char, Literal, Error = Simple<char>> {
|
||||
pub fn literal_parser() -> impl SimpleParser<char, Literal> {
|
||||
choice((
|
||||
number::int_parser().map(Literal::Uint), // all ints are valid floats so it takes precedence
|
||||
// all ints are valid floats so it takes precedence
|
||||
number::int_parser().map(Literal::Uint),
|
||||
number::float_parser().map(Literal::Num),
|
||||
string::char_parser().map(Literal::Char),
|
||||
string::str_parser().map(Literal::Str),
|
||||
@@ -159,10 +172,12 @@ pub fn literal_parser() -> impl Parser<char, Literal, Error = Simple<char>> {
|
||||
|
||||
pub static BASE_OPS: &[&str] = &[",", ".", "..", "..."];
|
||||
|
||||
pub fn lexer<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<char, Vec<Entry>, Error=Simple<char>> + 'a
|
||||
{
|
||||
let all_ops = ctx.ops().iter()
|
||||
pub fn lexer<'a>(
|
||||
ctx: impl Context + 'a,
|
||||
) -> impl SimpleParser<char, Vec<Entry>> + 'a {
|
||||
let all_ops = ctx
|
||||
.ops()
|
||||
.iter()
|
||||
.map(|op| op.as_ref())
|
||||
.chain(BASE_OPS.iter().cloned())
|
||||
.map(str::to_string)
|
||||
@@ -175,7 +190,10 @@ pub fn lexer<'a>(ctx: impl Context + 'a)
|
||||
paren_parser('[', ']'),
|
||||
paren_parser('{', '}'),
|
||||
just(":=").to(Lexeme::Const),
|
||||
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
|
||||
just("=")
|
||||
.ignore_then(number::float_parser())
|
||||
.then_ignore(just("=>"))
|
||||
.map(Lexeme::rule),
|
||||
comment::comment_parser().map(Lexeme::Comment),
|
||||
just("::").to(Lexeme::NS),
|
||||
just('\\').to(Lexeme::BS),
|
||||
@@ -184,20 +202,18 @@ pub fn lexer<'a>(ctx: impl Context + 'a)
|
||||
just('\n').to(Lexeme::BR),
|
||||
placeholder::placeholder_parser(ctx.clone()).map(Lexeme::PH),
|
||||
literal_parser().map(Lexeme::Literal),
|
||||
name::name_parser(&all_ops).map(move |n| {
|
||||
Lexeme::Name(ctx.interner().i(&n))
|
||||
})
|
||||
name::name_parser(&all_ops)
|
||||
.map(move |n| Lexeme::Name(ctx.interner().i(&n))),
|
||||
))
|
||||
.map_with_span(|lexeme, range| Entry{ lexeme, range })
|
||||
.padded_by(one_of(" \t").repeated())
|
||||
.repeated()
|
||||
.then_ignore(end())
|
||||
.map_with_span(|lexeme, range| Entry { lexeme, range })
|
||||
.padded_by(one_of(" \t").repeated())
|
||||
.repeated()
|
||||
.then_ignore(end())
|
||||
}
|
||||
|
||||
|
||||
pub fn filter_map_lex<'a, O, M: ToString>(
|
||||
f: impl Fn(Lexeme) -> Result<O, M> + Clone + 'a
|
||||
) -> impl Parser<Entry, (O, Range<usize>), Error = Simple<Entry>> + Clone + 'a {
|
||||
f: impl Fn(Lexeme) -> Result<O, M> + Clone + 'a,
|
||||
) -> impl SimpleParser<Entry, (O, Range<usize>)> + Clone + 'a {
|
||||
filter_map(move |s: Range<usize>, e: Entry| {
|
||||
let out = f(e.lexeme).map_err(|msg| Simple::custom(s.clone(), msg))?;
|
||||
Ok((out, s))
|
||||
|
||||
@@ -1,19 +1,20 @@
|
||||
mod string;
|
||||
mod number;
|
||||
mod name;
|
||||
mod lexer;
|
||||
mod comment;
|
||||
mod expression;
|
||||
mod sourcefile;
|
||||
mod import;
|
||||
mod parse;
|
||||
mod enum_filter;
|
||||
mod placeholder;
|
||||
mod context;
|
||||
mod decls;
|
||||
mod enum_filter;
|
||||
mod expression;
|
||||
mod facade;
|
||||
mod import;
|
||||
mod lexer;
|
||||
mod name;
|
||||
mod number;
|
||||
mod placeholder;
|
||||
mod sourcefile;
|
||||
mod string;
|
||||
|
||||
pub use sourcefile::line_parser;
|
||||
pub use lexer::{lexer, Lexeme, Entry};
|
||||
pub use context::ParsingContext;
|
||||
pub use facade::{parse, ParseError};
|
||||
pub use lexer::{lexer, Entry, Lexeme};
|
||||
pub use name::is_op;
|
||||
pub use parse::{parse, ParseError};
|
||||
pub use number::{float_parser, int_parser};
|
||||
pub use context::ParsingContext;
|
||||
pub use sourcefile::line_parser;
|
||||
|
||||
@@ -1,22 +1,28 @@
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
use chumsky::prelude::*;
|
||||
use chumsky::{self, Parser};
|
||||
|
||||
use super::decls::{BoxedSimpleParser, SimpleParser};
|
||||
|
||||
/// Matches any one of the passed operators, preferring longer ones
|
||||
fn op_parser<'a>(ops: &[impl AsRef<str> + Clone])
|
||||
-> BoxedParser<'a, char, String, Simple<char>>
|
||||
{
|
||||
let mut sorted_ops: Vec<String> = ops.iter()
|
||||
.map(|t| t.as_ref().to_string()).collect();
|
||||
fn op_parser<'a>(
|
||||
ops: &[impl AsRef<str> + Clone],
|
||||
) -> BoxedSimpleParser<'a, char, String> {
|
||||
let mut sorted_ops: Vec<String> =
|
||||
ops.iter().map(|t| t.as_ref().to_string()).collect();
|
||||
sorted_ops.sort_by_key(|op| -(op.len() as i64));
|
||||
sorted_ops.into_iter()
|
||||
sorted_ops
|
||||
.into_iter()
|
||||
.map(|op| just(op).boxed())
|
||||
.reduce(|a, b| a.or(b).boxed())
|
||||
.unwrap_or_else(|| {
|
||||
empty().map(|()| panic!("Empty isn't meant to match")).boxed()
|
||||
}).labelled("operator").boxed()
|
||||
})
|
||||
.labelled("operator")
|
||||
.boxed()
|
||||
}
|
||||
|
||||
/// Characters that cannot be parsed as part of an operator
|
||||
///
|
||||
///
|
||||
/// The initial operator list overrides this.
|
||||
static NOT_NAME_CHAR: &[char] = &[
|
||||
':', // used for namespacing and type annotations
|
||||
@@ -28,35 +34,34 @@ static NOT_NAME_CHAR: &[char] = &[
|
||||
];
|
||||
|
||||
/// Matches anything that's allowed as an operator
|
||||
///
|
||||
///
|
||||
/// FIXME: `@name` without a dot should be parsed correctly for overrides.
|
||||
/// Could be an operator but then parametrics should take precedence,
|
||||
/// which might break stuff. investigate.
|
||||
///
|
||||
///
|
||||
/// TODO: `'` could work as an operator whenever it isn't closed.
|
||||
/// It's common im maths so it's worth a try
|
||||
///
|
||||
///
|
||||
/// TODO: `.` could possibly be parsed as an operator in some contexts.
|
||||
/// This operator is very common in maths so it's worth a try.
|
||||
/// Investigate.
|
||||
pub fn modname_parser<'a>()
|
||||
-> impl Parser<char, String, Error = Simple<char>> + 'a
|
||||
{
|
||||
pub fn modname_parser<'a>() -> impl SimpleParser<char, String> + 'a {
|
||||
filter(move |c| !NOT_NAME_CHAR.contains(c) && !c.is_whitespace())
|
||||
.repeated().at_least(1)
|
||||
.repeated()
|
||||
.at_least(1)
|
||||
.collect()
|
||||
.labelled("modname")
|
||||
}
|
||||
|
||||
/// Parse an operator or name. Failing both, parse everything up to
|
||||
/// the next whitespace or blacklisted character as a new operator.
|
||||
pub fn name_parser<'a>(ops: &[impl AsRef<str> + Clone])
|
||||
-> impl Parser<char, String, Error = Simple<char>> + 'a
|
||||
{
|
||||
pub fn name_parser<'a>(
|
||||
ops: &[impl AsRef<str> + Clone],
|
||||
) -> impl SimpleParser<char, String> + 'a {
|
||||
choice((
|
||||
op_parser(ops), // First try to parse a known operator
|
||||
text::ident().labelled("plain text"), // Failing that, parse plain text
|
||||
modname_parser() // Finally parse everything until tne next forbidden char
|
||||
modname_parser(), // Finally parse everything until tne next forbidden char
|
||||
))
|
||||
.labelled("name")
|
||||
}
|
||||
@@ -65,7 +70,7 @@ pub fn name_parser<'a>(ops: &[impl AsRef<str> + Clone])
|
||||
/// and text, just not at the start.
|
||||
pub fn is_op(s: impl AsRef<str>) -> bool {
|
||||
return match s.as_ref().chars().next() {
|
||||
Some(x) => !x.is_alphanumeric(),
|
||||
None => false
|
||||
}
|
||||
Some(x) => !x.is_alphanumeric(),
|
||||
None => false,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
use chumsky::prelude::*;
|
||||
use chumsky::{self, Parser};
|
||||
use ordered_float::NotNan;
|
||||
|
||||
use super::decls::SimpleParser;
|
||||
|
||||
fn assert_not_digit(base: u32, c: char) {
|
||||
if base > (10 + (c as u32 - 'a' as u32)) {
|
||||
panic!("The character '{}' is a digit in base ({})", c, base)
|
||||
@@ -8,9 +11,9 @@ fn assert_not_digit(base: u32, c: char) {
|
||||
}
|
||||
|
||||
/// Parse an arbitrarily grouped sequence of digits starting with an underscore.
|
||||
///
|
||||
///
|
||||
/// TODO: this should use separated_by and parse the leading group too
|
||||
fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simple<char>> {
|
||||
fn separated_digits_parser(base: u32) -> impl SimpleParser<char, String> {
|
||||
just('_')
|
||||
.ignore_then(text::digits(base))
|
||||
.repeated()
|
||||
@@ -18,57 +21,62 @@ fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simpl
|
||||
}
|
||||
|
||||
/// parse a grouped uint
|
||||
///
|
||||
///
|
||||
/// Not to be confused with [int_parser] which does a lot more
|
||||
fn uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
text::int(base)
|
||||
.then(separated_digits_parser(base))
|
||||
.map(move |(s1, s2): (String, String)| {
|
||||
fn uint_parser(base: u32) -> impl SimpleParser<char, u64> {
|
||||
text::int(base).then(separated_digits_parser(base)).map(
|
||||
move |(s1, s2): (String, String)| {
|
||||
u64::from_str_radix(&(s1 + &s2), base).unwrap()
|
||||
})
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
/// parse exponent notation, or return 0 as the default exponent.
|
||||
/// The exponent is always in decimal.
|
||||
fn pow_parser() -> impl Parser<char, i32, Error = Simple<char>> {
|
||||
/// The exponent is always in decimal.
|
||||
fn pow_parser() -> impl SimpleParser<char, i32> {
|
||||
choice((
|
||||
just('p')
|
||||
.ignore_then(text::int(10))
|
||||
.map(|s: String| s.parse().unwrap()),
|
||||
just('p').ignore_then(text::int(10)).map(|s: String| s.parse().unwrap()),
|
||||
just("p-")
|
||||
.ignore_then(text::int(10))
|
||||
.map(|s: String| -s.parse::<i32>().unwrap()),
|
||||
)).or_else(|_| Ok(0))
|
||||
))
|
||||
.or_else(|_| Ok(0))
|
||||
}
|
||||
|
||||
/// returns a mapper that converts a mantissa and an exponent into an uint
|
||||
///
|
||||
///
|
||||
/// TODO it panics if it finds a negative exponent
|
||||
fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
|
||||
fn nat2u(base: u64) -> impl Fn((u64, i32)) -> u64 {
|
||||
move |(val, exp)| {
|
||||
if exp == 0 {val}
|
||||
else {val * base.checked_pow(exp.try_into().unwrap()).unwrap()}
|
||||
if exp == 0 {
|
||||
val
|
||||
} else {
|
||||
val * base.checked_pow(exp.try_into().unwrap()).unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// returns a mapper that converts a mantissa and an exponent into a float
|
||||
fn nat2f(base: u64) -> impl Fn((NotNan<f64>, i32),) -> NotNan<f64> {
|
||||
fn nat2f(base: u64) -> impl Fn((NotNan<f64>, i32)) -> NotNan<f64> {
|
||||
move |(val, exp)| {
|
||||
if exp == 0 {val}
|
||||
else {val * (base as f64).powf(exp.try_into().unwrap())}
|
||||
if exp == 0 {
|
||||
val
|
||||
} else {
|
||||
val * (base as f64).powf(exp.try_into().unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// parse an uint from exponential notation (panics if 'p' is a digit in base)
|
||||
fn pow_uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
fn pow_uint_parser(base: u32) -> impl SimpleParser<char, u64> {
|
||||
assert_not_digit(base, 'p');
|
||||
uint_parser(base).then(pow_parser()).map(nat2u(base.into()))
|
||||
}
|
||||
|
||||
/// parse an uint from a base determined by its prefix or lack thereof
|
||||
///
|
||||
///
|
||||
/// Not to be confused with [uint_parser] which is a component of it.
|
||||
pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
pub fn int_parser() -> impl SimpleParser<char, u64> {
|
||||
choice((
|
||||
just("0b").ignore_then(pow_uint_parser(2)),
|
||||
just("0x").ignore_then(pow_uint_parser(16)),
|
||||
@@ -78,35 +86,40 @@ pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
}
|
||||
|
||||
/// parse a float from dot notation
|
||||
fn dotted_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
|
||||
fn dotted_parser(base: u32) -> impl SimpleParser<char, NotNan<f64>> {
|
||||
uint_parser(base)
|
||||
.then(
|
||||
just('.').ignore_then(
|
||||
text::digits(base).then(separated_digits_parser(base))
|
||||
).map(move |(frac1, frac2)| {
|
||||
let frac = frac1 + &frac2;
|
||||
let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
|
||||
let dexp = base.pow(frac.len().try_into().unwrap());
|
||||
frac_num / dexp as f64
|
||||
}).or_not().map(|o| o.unwrap_or_default())
|
||||
).try_map(|(wh, f), s| {
|
||||
NotNan::new(wh as f64 + f).map_err(|_| Simple::custom(s, "Float literal evaluates to NaN"))
|
||||
})
|
||||
.then(
|
||||
just('.')
|
||||
.ignore_then(text::digits(base).then(separated_digits_parser(base)))
|
||||
.map(move |(frac1, frac2)| {
|
||||
let frac = frac1 + &frac2;
|
||||
let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
|
||||
let dexp = base.pow(frac.len().try_into().unwrap());
|
||||
frac_num / dexp as f64
|
||||
})
|
||||
.or_not()
|
||||
.map(|o| o.unwrap_or_default()),
|
||||
)
|
||||
.try_map(|(wh, f), s| {
|
||||
NotNan::new(wh as f64 + f)
|
||||
.map_err(|_| Simple::custom(s, "Float literal evaluates to NaN"))
|
||||
})
|
||||
}
|
||||
|
||||
/// parse a float from dotted and optionally also exponential notation
|
||||
fn pow_float_parser(base: u32) -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
|
||||
fn pow_float_parser(base: u32) -> impl SimpleParser<char, NotNan<f64>> {
|
||||
assert_not_digit(base, 'p');
|
||||
dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
|
||||
}
|
||||
|
||||
/// parse a float with dotted and optionally exponential notation from a base determined by its
|
||||
/// prefix
|
||||
pub fn float_parser() -> impl Parser<char, NotNan<f64>, Error = Simple<char>> {
|
||||
/// parse a float with dotted and optionally exponential notation from a base
|
||||
/// determined by its prefix
|
||||
pub fn float_parser() -> impl SimpleParser<char, NotNan<f64>> {
|
||||
choice((
|
||||
just("0b").ignore_then(pow_float_parser(2)),
|
||||
just("0x").ignore_then(pow_float_parser(16)),
|
||||
just('0').ignore_then(pow_float_parser(8)),
|
||||
pow_float_parser(10),
|
||||
)).labelled("float")
|
||||
))
|
||||
.labelled("float")
|
||||
}
|
||||
|
||||
@@ -1,16 +1,18 @@
|
||||
use chumsky::{Parser, prelude::*};
|
||||
use chumsky::prelude::*;
|
||||
use chumsky::Parser;
|
||||
|
||||
use crate::ast::{Placeholder, PHClass};
|
||||
use super::context::Context;
|
||||
use super::decls::SimpleParser;
|
||||
use super::number::int_parser;
|
||||
use crate::ast::{PHClass, Placeholder};
|
||||
|
||||
use super::{number::int_parser, context::Context};
|
||||
|
||||
pub fn placeholder_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<char, Placeholder, Error = Simple<char>> + 'a
|
||||
{
|
||||
pub fn placeholder_parser(
|
||||
ctx: impl Context,
|
||||
) -> impl SimpleParser<char, Placeholder> {
|
||||
choice((
|
||||
just("...").to(Some(true)),
|
||||
just("..").to(Some(false)),
|
||||
empty().to(None)
|
||||
empty().to(None),
|
||||
))
|
||||
.then(just("$").ignore_then(text::ident()))
|
||||
.then(just(":").ignore_then(int_parser()).or_not())
|
||||
@@ -19,12 +21,10 @@ pub fn placeholder_parser<'a>(ctx: impl Context + 'a)
|
||||
if let Some(nonzero) = vec_nonzero {
|
||||
let prio = vec_prio.unwrap_or_default();
|
||||
Ok(Placeholder { name, class: PHClass::Vec { nonzero, prio } })
|
||||
} else if vec_prio.is_some() {
|
||||
Err(Simple::custom(span, "Scalar placeholders have no priority"))
|
||||
} else {
|
||||
if vec_prio.is_some() {
|
||||
Err(Simple::custom(span, "Scalar placeholders have no priority"))
|
||||
} else {
|
||||
Ok(Placeholder { name, class: PHClass::Scalar })
|
||||
}
|
||||
Ok(Placeholder { name, class: PHClass::Scalar })
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,55 +1,67 @@
|
||||
use std::iter;
|
||||
use std::rc::Rc;
|
||||
|
||||
use crate::representations::location::Location;
|
||||
use crate::representations::sourcefile::{FileEntry, Member};
|
||||
use crate::enum_filter;
|
||||
use crate::ast::{Rule, Constant, Expr, Clause};
|
||||
use crate::interner::Token;
|
||||
|
||||
use super::Entry;
|
||||
use super::context::Context;
|
||||
use super::expression::xpr_parser;
|
||||
use super::import::import_parser;
|
||||
use super::lexer::{Lexeme, filter_map_lex};
|
||||
|
||||
use chumsky::{Parser, prelude::*};
|
||||
use chumsky::prelude::*;
|
||||
use chumsky::Parser;
|
||||
use itertools::Itertools;
|
||||
|
||||
fn rule_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, Rule, Error = Simple<Entry>> + 'a
|
||||
{
|
||||
xpr_parser(ctx.clone()).repeated().at_least(1)
|
||||
use super::context::Context;
|
||||
use super::decls::{SimpleParser, SimpleRecursive};
|
||||
use super::expression::xpr_parser;
|
||||
use super::import::import_parser;
|
||||
use super::lexer::{filter_map_lex, Lexeme};
|
||||
use super::Entry;
|
||||
use crate::ast::{Clause, Constant, Expr, Rule};
|
||||
use crate::enum_filter;
|
||||
use crate::representations::location::Location;
|
||||
use crate::representations::sourcefile::{FileEntry, Member, Namespace};
|
||||
|
||||
fn rule_parser<'a>(
|
||||
ctx: impl Context + 'a,
|
||||
) -> impl SimpleParser<Entry, Rule> + 'a {
|
||||
xpr_parser(ctx.clone())
|
||||
.repeated()
|
||||
.at_least(1)
|
||||
.then(filter_map_lex(enum_filter!(Lexeme::Rule)))
|
||||
.then(xpr_parser(ctx).repeated().at_least(1))
|
||||
.map(|((s, (prio, _)), t)| Rule{
|
||||
.map(|((s, (prio, _)), t)| Rule {
|
||||
source: Rc::new(s),
|
||||
prio,
|
||||
target: Rc::new(t)
|
||||
}).labelled("Rule")
|
||||
target: Rc::new(t),
|
||||
})
|
||||
.labelled("Rule")
|
||||
}
|
||||
|
||||
fn const_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, Constant, Error = Simple<Entry>> + 'a
|
||||
{
|
||||
fn const_parser<'a>(
|
||||
ctx: impl Context + 'a,
|
||||
) -> impl SimpleParser<Entry, Constant> + 'a {
|
||||
filter_map_lex(enum_filter!(Lexeme::Name))
|
||||
.then_ignore(Lexeme::Const.parser())
|
||||
.then(xpr_parser(ctx.clone()).repeated().at_least(1))
|
||||
.map(move |((name, _), value)| Constant{
|
||||
.map(move |((name, _), value)| Constant {
|
||||
name,
|
||||
value: if let Ok(ex) = value.iter().exactly_one() { ex.clone() }
|
||||
else {
|
||||
let start = value.first().expect("value cannot be empty")
|
||||
.location.range().expect("all locations in parsed source are known")
|
||||
value: if let Ok(ex) = value.iter().exactly_one() {
|
||||
ex.clone()
|
||||
} else {
|
||||
let start = value
|
||||
.first()
|
||||
.expect("value cannot be empty")
|
||||
.location
|
||||
.range()
|
||||
.expect("all locations in parsed source are known")
|
||||
.start;
|
||||
let end = value.last().expect("asserted right above")
|
||||
.location.range().expect("all locations in parsed source are known")
|
||||
let end = value
|
||||
.last()
|
||||
.expect("asserted right above")
|
||||
.location
|
||||
.range()
|
||||
.expect("all locations in parsed source are known")
|
||||
.end;
|
||||
Expr{
|
||||
Expr {
|
||||
location: Location::Range { file: ctx.file(), range: start..end },
|
||||
value: Clause::S('(', Rc::new(value))
|
||||
value: Clause::S('(', Rc::new(value)),
|
||||
}
|
||||
}
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
@@ -60,56 +72,61 @@ pub fn collect_errors<T, E: chumsky::Error<T>>(e: Vec<E>) -> E {
|
||||
}
|
||||
|
||||
fn namespace_parser<'a>(
|
||||
line: impl Parser<Entry, FileEntry, Error = Simple<Entry>> + 'a,
|
||||
) -> impl Parser<Entry, (Token<String>, Vec<FileEntry>), Error = Simple<Entry>> + 'a {
|
||||
Lexeme::Namespace.parser()
|
||||
.ignore_then(filter_map_lex(enum_filter!(Lexeme::Name)))
|
||||
.then(
|
||||
any().repeated().delimited_by(
|
||||
Lexeme::LP('(').parser(),
|
||||
Lexeme::RP('(').parser()
|
||||
).try_map(move |body, _| {
|
||||
split_lines(&body)
|
||||
.map(|l| line.parse(l))
|
||||
.collect::<Result<Vec<_>,_>>()
|
||||
.map_err(collect_errors)
|
||||
})
|
||||
).map(move |((name, _), body)| {
|
||||
(name, body)
|
||||
})
|
||||
line: impl SimpleParser<Entry, FileEntry> + 'a,
|
||||
) -> impl SimpleParser<Entry, Namespace> + 'a {
|
||||
Lexeme::Namespace
|
||||
.parser()
|
||||
.ignore_then(filter_map_lex(enum_filter!(Lexeme::Name)))
|
||||
.then(
|
||||
any()
|
||||
.repeated()
|
||||
.delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser())
|
||||
.try_map(move |body, _| {
|
||||
split_lines(&body)
|
||||
.map(|l| line.parse(l))
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.map_err(collect_errors)
|
||||
}),
|
||||
)
|
||||
.map(move |((name, _), body)| Namespace { name, body })
|
||||
}
|
||||
|
||||
fn member_parser<'a>(
|
||||
line: impl Parser<Entry, FileEntry, Error = Simple<Entry>> + 'a,
|
||||
ctx: impl Context + 'a
|
||||
) -> impl Parser<Entry, Member, Error = Simple<Entry>> + 'a {
|
||||
line: impl SimpleParser<Entry, FileEntry> + 'a,
|
||||
ctx: impl Context + 'a,
|
||||
) -> impl SimpleParser<Entry, Member> + 'a {
|
||||
choice((
|
||||
namespace_parser(line)
|
||||
.map(|(name, body)| Member::Namespace(name, body)),
|
||||
namespace_parser(line).map(Member::Namespace),
|
||||
rule_parser(ctx.clone()).map(Member::Rule),
|
||||
const_parser(ctx).map(Member::Constant),
|
||||
))
|
||||
}
|
||||
|
||||
pub fn line_parser<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<Entry, FileEntry, Error = Simple<Entry>> + 'a
|
||||
{
|
||||
recursive(|line: Recursive<Entry, FileEntry, Simple<Entry>>| {
|
||||
pub fn line_parser<'a>(
|
||||
ctx: impl Context + 'a,
|
||||
) -> impl SimpleParser<Entry, FileEntry> + 'a {
|
||||
recursive(|line: SimpleRecursive<Entry, FileEntry>| {
|
||||
choice((
|
||||
// In case the usercode wants to parse doc
|
||||
filter_map_lex(enum_filter!(Lexeme >> FileEntry; Comment)).map(|(ent, _)| ent),
|
||||
filter_map_lex(enum_filter!(Lexeme >> FileEntry; Comment))
|
||||
.map(|(ent, _)| ent),
|
||||
// plain old imports
|
||||
Lexeme::Import.parser()
|
||||
Lexeme::Import
|
||||
.parser()
|
||||
.ignore_then(import_parser(ctx.clone()).map(FileEntry::Import)),
|
||||
Lexeme::Export.parser().ignore_then(choice((
|
||||
// token collection
|
||||
Lexeme::NS.parser().ignore_then(
|
||||
filter_map_lex(enum_filter!(Lexeme::Name)).map(|(e, _)| e)
|
||||
.separated_by(Lexeme::Name(ctx.interner().i(",")).parser())
|
||||
.delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser())
|
||||
).map(FileEntry::Export),
|
||||
Lexeme::NS
|
||||
.parser()
|
||||
.ignore_then(
|
||||
filter_map_lex(enum_filter!(Lexeme::Name))
|
||||
.map(|(e, _)| e)
|
||||
.separated_by(Lexeme::Name(ctx.interner().i(",")).parser())
|
||||
.delimited_by(Lexeme::LP('(').parser(), Lexeme::RP('(').parser()),
|
||||
)
|
||||
.map(FileEntry::Export),
|
||||
// public declaration
|
||||
member_parser(line.clone(), ctx.clone()).map(FileEntry::Exported)
|
||||
member_parser(line.clone(), ctx.clone()).map(FileEntry::Exported),
|
||||
))),
|
||||
// This could match almost anything so it has to go last
|
||||
member_parser(line, ctx).map(FileEntry::Internal),
|
||||
@@ -123,13 +140,13 @@ pub fn split_lines(data: &[Entry]) -> impl Iterator<Item = &[Entry]> {
|
||||
let mut finished = false;
|
||||
iter::from_fn(move || {
|
||||
let mut paren_count = 0;
|
||||
while let Some((i, Entry{ lexeme, .. })) = source.next() {
|
||||
for (i, Entry { lexeme, .. }) in source.by_ref() {
|
||||
match lexeme {
|
||||
Lexeme::LP(_) => paren_count += 1,
|
||||
Lexeme::RP(_) => paren_count -= 1,
|
||||
Lexeme::BR if paren_count == 0 => {
|
||||
let begin = last_slice;
|
||||
last_slice = i+1;
|
||||
last_slice = i + 1;
|
||||
return Some(&data[begin..i]);
|
||||
},
|
||||
_ => (),
|
||||
@@ -138,8 +155,9 @@ pub fn split_lines(data: &[Entry]) -> impl Iterator<Item = &[Entry]> {
|
||||
// Include last line even without trailing newline
|
||||
if !finished {
|
||||
finished = true;
|
||||
return Some(&data[last_slice..])
|
||||
return Some(&data[last_slice..]);
|
||||
}
|
||||
None
|
||||
}).filter(|s| s.len() > 0)
|
||||
})
|
||||
.filter(|s| !s.is_empty())
|
||||
}
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
use chumsky::prelude::*;
|
||||
use chumsky::{self, Parser};
|
||||
|
||||
use super::decls::SimpleParser;
|
||||
|
||||
/// Parses a text character that is not the specified delimiter
|
||||
fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
|
||||
fn text_parser(delim: char) -> impl SimpleParser<char, char> {
|
||||
// Copied directly from Chumsky's JSON example.
|
||||
let escape = just('\\').ignore_then(
|
||||
just('\\')
|
||||
@@ -12,35 +15,39 @@ fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
|
||||
.or(just('n').to('\n'))
|
||||
.or(just('r').to('\r'))
|
||||
.or(just('t').to('\t'))
|
||||
.or(just('u').ignore_then(
|
||||
filter(|c: &char| c.is_ascii_hexdigit())
|
||||
.repeated()
|
||||
.exactly(4)
|
||||
.collect::<String>()
|
||||
.validate(|digits, span, emit| {
|
||||
char::from_u32(u32::from_str_radix(&digits, 16).unwrap())
|
||||
.unwrap_or_else(|| {
|
||||
emit(Simple::custom(span, "invalid unicode character"));
|
||||
'\u{FFFD}' // unicode replacement character
|
||||
})
|
||||
}),
|
||||
)),
|
||||
.or(
|
||||
just('u').ignore_then(
|
||||
filter(|c: &char| c.is_ascii_hexdigit())
|
||||
.repeated()
|
||||
.exactly(4)
|
||||
.collect::<String>()
|
||||
.validate(|digits, span, emit| {
|
||||
char::from_u32(u32::from_str_radix(&digits, 16).unwrap())
|
||||
.unwrap_or_else(|| {
|
||||
emit(Simple::custom(span, "invalid unicode character"));
|
||||
'\u{FFFD}' // unicode replacement character
|
||||
})
|
||||
}),
|
||||
),
|
||||
),
|
||||
);
|
||||
filter(move |&c| c != '\\' && c != delim).or(escape)
|
||||
}
|
||||
|
||||
/// Parse a character literal between single quotes
|
||||
pub fn char_parser() -> impl Parser<char, char, Error = Simple<char>> {
|
||||
pub fn char_parser() -> impl SimpleParser<char, char> {
|
||||
just('\'').ignore_then(text_parser('\'')).then_ignore(just('\''))
|
||||
}
|
||||
|
||||
/// Parse a string between double quotes
|
||||
pub fn str_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
||||
pub fn str_parser() -> impl SimpleParser<char, String> {
|
||||
just('"')
|
||||
.ignore_then(
|
||||
text_parser('"').map(Some)
|
||||
.ignore_then(
|
||||
text_parser('"').map(Some)
|
||||
.or(just("\\\n").map(|_| None)) // Newlines preceded by backslashes are ignored.
|
||||
.repeated()
|
||||
).then_ignore(just('"'))
|
||||
.flatten().collect()
|
||||
.repeated(),
|
||||
)
|
||||
.then_ignore(just('"'))
|
||||
.flatten()
|
||||
.collect()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user