forked from Orchid/orchid
bug fixes and performance improvements
This commit is contained in:
@@ -1,53 +1,88 @@
|
||||
use std::{ops::Range, iter, fmt};
|
||||
use ordered_float::NotNan;
|
||||
use chumsky::{Parser, prelude::*};
|
||||
use std::fmt::Debug;
|
||||
use crate::{utils::{BoxedIter, iter::{box_once, box_flatten}}, box_chain};
|
||||
use std::fmt;
|
||||
use std::ops::Range;
|
||||
|
||||
use ordered_float::NotNan;
|
||||
use chumsky::{Parser, prelude::*, text::keyword, Span};
|
||||
|
||||
use crate::ast::{Placeholder, PHClass};
|
||||
use crate::representations::Literal;
|
||||
use crate::interner::{Token, InternedDisplay, Interner};
|
||||
|
||||
use super::context::Context;
|
||||
use super::placeholder;
|
||||
use super::{number, string, name, comment};
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Entry(pub Lexeme, pub Range<usize>);
|
||||
impl Debug for Entry {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{:?}", self.0)
|
||||
// f.debug_tuple("Entry").field(&self.0).field(&self.1).finish()
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct Entry{
|
||||
pub lexeme: Lexeme,
|
||||
pub range: Range<usize>
|
||||
}
|
||||
impl Entry {
|
||||
pub fn is_filler(&self) -> bool {
|
||||
matches!(self.lexeme, Lexeme::Comment(_))
|
||||
|| matches!(self.lexeme, Lexeme::BR)
|
||||
}
|
||||
}
|
||||
|
||||
impl InternedDisplay for Entry {
|
||||
fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result {
|
||||
self.lexeme.fmt_i(f, i)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Entry> for (Lexeme, Range<usize>) {
|
||||
fn from(ent: Entry) -> Self {
|
||||
(ent.0, ent.1)
|
||||
(ent.lexeme, ent.range)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
impl Span for Entry {
|
||||
type Context = Lexeme;
|
||||
type Offset = usize;
|
||||
|
||||
fn context(&self) -> Self::Context {self.lexeme.clone()}
|
||||
fn start(&self) -> Self::Offset {self.range.start()}
|
||||
fn end(&self) -> Self::Offset {self.range.end()}
|
||||
fn new(context: Self::Context, range: Range<Self::Offset>) -> Self {
|
||||
Self{
|
||||
lexeme: context,
|
||||
range
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum Lexeme {
|
||||
Num(NotNan<f64>),
|
||||
Uint(u64),
|
||||
Char(char),
|
||||
Str(String),
|
||||
Name(String),
|
||||
Literal(Literal),
|
||||
Name(Token<String>),
|
||||
Rule(NotNan<f64>),
|
||||
NS, // namespace separator
|
||||
/// Walrus operator (formerly shorthand macro)
|
||||
Const,
|
||||
/// Line break
|
||||
BR,
|
||||
/// Namespace separator
|
||||
NS,
|
||||
/// Left paren
|
||||
LP(char),
|
||||
/// Right paren
|
||||
RP(char),
|
||||
BS, // Backslash
|
||||
/// Backslash
|
||||
BS,
|
||||
At,
|
||||
Type, // type operator
|
||||
Comment(String),
|
||||
Export,
|
||||
Import,
|
||||
Namespace,
|
||||
PH(Placeholder)
|
||||
}
|
||||
|
||||
impl Debug for Lexeme {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
impl InternedDisplay for Lexeme {
|
||||
fn fmt_i(&self, f: &mut std::fmt::Formatter<'_>, i: &Interner) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Num(n) => write!(f, "{}", n),
|
||||
Self::Uint(i) => write!(f, "{}", i),
|
||||
Self::Char(c) => write!(f, "{:?}", c),
|
||||
Self::Str(s) => write!(f, "{:?}", s),
|
||||
Self::Name(name) => write!(f, "{}", name),
|
||||
Self::Literal(l) => write!(f, "{:?}", l),
|
||||
Self::Name(token) => write!(f, "{}", i.r(*token)),
|
||||
Self::Const => write!(f, ":="),
|
||||
Self::Rule(prio) => write!(f, "={}=>", prio),
|
||||
Self::NS => write!(f, "::"),
|
||||
Self::LP(l) => write!(f, "{}", l),
|
||||
@@ -57,102 +92,114 @@ impl Debug for Lexeme {
|
||||
'{' => write!(f, "}}"),
|
||||
_ => f.debug_tuple("RP").field(l).finish()
|
||||
},
|
||||
Self::BR => write!(f, "\n"),
|
||||
Self::BS => write!(f, "\\"),
|
||||
Self::At => write!(f, "@"),
|
||||
Self::Type => write!(f, ":"),
|
||||
Self::Comment(text) => write!(f, "--[{}]--", text),
|
||||
Self::Export => write!(f, "export"),
|
||||
Self::Import => write!(f, "import"),
|
||||
Self::Namespace => write!(f, "namespace"),
|
||||
Self::PH(Placeholder { name, class }) => match *class {
|
||||
PHClass::Scalar => write!(f, "${}", i.r(*name)),
|
||||
PHClass::Vec { nonzero, prio } => {
|
||||
if nonzero {write!(f, "...")}
|
||||
else {write!(f, "..")}?;
|
||||
write!(f, "${}", i.r(*name))?;
|
||||
if prio != 0 {write!(f, ":{}", prio)?;};
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Lexeme {
|
||||
pub fn name<T: ToString>(n: T) -> Self {
|
||||
Lexeme::Name(n.to_string())
|
||||
pub fn rule(prio: impl Into<f64>) -> Self {
|
||||
Lexeme::Rule(
|
||||
NotNan::new(prio.into())
|
||||
.expect("Rule priority cannot be NaN")
|
||||
)
|
||||
}
|
||||
pub fn rule<T>(prio: T) -> Self where T: Into<f64> {
|
||||
Lexeme::Rule(NotNan::new(prio.into()).expect("Rule priority cannot be NaN"))
|
||||
}
|
||||
pub fn paren_parser<T, P>(
|
||||
expr: P
|
||||
) -> impl Parser<Lexeme, (char, T), Error = Simple<Lexeme>> + Clone
|
||||
where P: Parser<Lexeme, T, Error = Simple<Lexeme>> + Clone {
|
||||
choice((
|
||||
expr.clone().delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
|
||||
.map(|t| ('(', t)),
|
||||
expr.clone().delimited_by(just(Lexeme::LP('[')), just(Lexeme::RP('[')))
|
||||
.map(|t| ('[', t)),
|
||||
expr.delimited_by(just(Lexeme::LP('{')), just(Lexeme::RP('{')))
|
||||
.map(|t| ('{', t)),
|
||||
))
|
||||
|
||||
pub fn parser<E: chumsky::Error<Entry>>(self)
|
||||
-> impl Parser<Entry, Entry, Error = E> + Clone {
|
||||
filter(move |ent: &Entry| ent.lexeme == self)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct LexedText(pub Vec<Vec<Entry>>);
|
||||
pub struct LexedText(pub Vec<Entry>);
|
||||
|
||||
impl Debug for LexedText {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
for row in &self.0 {
|
||||
for tok in row {
|
||||
tok.fmt(f)?;
|
||||
f.write_str(" ")?
|
||||
}
|
||||
f.write_str("\n")?
|
||||
impl InternedDisplay for LexedText {
|
||||
fn fmt_i(&self, f: &mut fmt::Formatter<'_>, i: &Interner) -> fmt::Result {
|
||||
for tok in self.0.iter() {
|
||||
tok.fmt_i(f, i)?;
|
||||
f.write_str(" ")?
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
type LexSubres<'a> = BoxedIter<'a, Entry>;
|
||||
|
||||
fn paren_parser<'a>(
|
||||
expr: Recursive<'a, char, LexSubres<'a>, Simple<char>>,
|
||||
lp: char, rp: char
|
||||
) -> impl Parser<char, LexSubres<'a>, Error=Simple<char>> + 'a {
|
||||
expr.padded().repeated()
|
||||
.map(|x| box_flatten(x.into_iter()))
|
||||
.delimited_by(just(lp), just(rp)).map_with_span(move |b, s| {
|
||||
box_chain!(
|
||||
iter::once(Entry(Lexeme::LP(lp), s.start..s.start+1)),
|
||||
b,
|
||||
iter::once(Entry(Lexeme::RP(lp), s.end-1..s.end))
|
||||
)
|
||||
})
|
||||
fn paren_parser(lp: char, rp: char)
|
||||
-> impl Parser<char, Lexeme, Error=Simple<char>>
|
||||
{
|
||||
just(lp).to(Lexeme::LP(lp))
|
||||
.or(just(rp).to(Lexeme::RP(lp)))
|
||||
}
|
||||
|
||||
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, Vec<Entry>, Error=Simple<char>> + 'a
|
||||
where T: AsRef<str> + Clone {
|
||||
let all_ops = ops.iter().map(|o| o.as_ref().to_string())
|
||||
.chain([",", ".", "..", "..."].into_iter().map(str::to_string))
|
||||
pub fn literal_parser() -> impl Parser<char, Literal, Error = Simple<char>> {
|
||||
choice((
|
||||
number::int_parser().map(Literal::Uint), // all ints are valid floats so it takes precedence
|
||||
number::float_parser().map(Literal::Num),
|
||||
string::char_parser().map(Literal::Char),
|
||||
string::str_parser().map(Literal::Str),
|
||||
))
|
||||
}
|
||||
|
||||
pub static BASE_OPS: &[&str] = &[",", ".", "..", "..."];
|
||||
|
||||
pub fn lexer<'a>(ctx: impl Context + 'a)
|
||||
-> impl Parser<char, Vec<Entry>, Error=Simple<char>> + 'a
|
||||
{
|
||||
let all_ops = ctx.ops().iter()
|
||||
.map(|op| op.as_ref())
|
||||
.chain(BASE_OPS.iter().cloned())
|
||||
.map(str::to_string)
|
||||
.collect::<Vec<_>>();
|
||||
just("export").padded().to(Lexeme::Export)
|
||||
.or(just("import").padded().to(Lexeme::Import))
|
||||
.or_not().then(recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
|
||||
choice((
|
||||
paren_parser(recurse.clone(), '(', ')'),
|
||||
paren_parser(recurse.clone(), '[', ']'),
|
||||
paren_parser(recurse.clone(), '{', '}'),
|
||||
choice((
|
||||
just(":=").padded().to(Lexeme::rule(0f64)),
|
||||
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
|
||||
comment::comment_parser().map(Lexeme::Comment),
|
||||
just("::").padded().to(Lexeme::NS),
|
||||
just('\\').padded().to(Lexeme::BS),
|
||||
just('@').padded().to(Lexeme::At),
|
||||
just(':').to(Lexeme::Type),
|
||||
number::int_parser().map(Lexeme::Uint), // all ints are valid floats so it takes precedence
|
||||
number::float_parser().map(Lexeme::Num),
|
||||
string::char_parser().map(Lexeme::Char),
|
||||
string::str_parser().map(Lexeme::Str),
|
||||
name::name_parser(&all_ops).map(Lexeme::Name), // includes namespacing
|
||||
)).map_with_span(|lx, span| box_once(Entry(lx, span)) as LexSubres)
|
||||
))
|
||||
}).separated_by(one_of("\t ").repeated())
|
||||
.flatten().collect())
|
||||
.map(|(prefix, rest): (Option<Lexeme>, Vec<Entry>)| {
|
||||
prefix.into_iter().map(|l| Entry(l, 0..6)).chain(rest.into_iter()).collect()
|
||||
})
|
||||
.then_ignore(text::whitespace()).then_ignore(end())
|
||||
choice((
|
||||
keyword("export").to(Lexeme::Export),
|
||||
keyword("module").to(Lexeme::Namespace),
|
||||
keyword("import").to(Lexeme::Import),
|
||||
paren_parser('(', ')'),
|
||||
paren_parser('[', ']'),
|
||||
paren_parser('{', '}'),
|
||||
just(":=").to(Lexeme::Const),
|
||||
just("=").ignore_then(number::float_parser()).then_ignore(just("=>")).map(Lexeme::rule),
|
||||
comment::comment_parser().map(Lexeme::Comment),
|
||||
just("::").to(Lexeme::NS),
|
||||
just('\\').to(Lexeme::BS),
|
||||
just('@').to(Lexeme::At),
|
||||
just(':').to(Lexeme::Type),
|
||||
just('\n').to(Lexeme::BR),
|
||||
placeholder::placeholder_parser(ctx.clone()).map(Lexeme::PH),
|
||||
literal_parser().map(Lexeme::Literal),
|
||||
name::name_parser(&all_ops).map(move |n| {
|
||||
Lexeme::Name(ctx.interner().i(&n))
|
||||
})
|
||||
))
|
||||
.map_with_span(|lexeme, range| Entry{ lexeme, range })
|
||||
.padded_by(one_of(" \t").repeated())
|
||||
.repeated()
|
||||
.then_ignore(end())
|
||||
}
|
||||
|
||||
|
||||
pub fn filter_map_lex<'a, O, M: ToString>(
|
||||
f: impl Fn(Lexeme) -> Result<O, M> + Clone + 'a
|
||||
) -> impl Parser<Entry, (O, Range<usize>), Error = Simple<Entry>> + Clone + 'a {
|
||||
filter_map(move |s: Range<usize>, e: Entry| {
|
||||
let out = f(e.lexeme).map_err(|msg| Simple::custom(s.clone(), msg))?;
|
||||
Ok((out, s))
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user