forked from Orchid/orchid
September-october commit
- manual parser - stl refinements - all language constructs are now Send
This commit is contained in:
@@ -1,24 +1,26 @@
|
||||
use std::fmt::{self, Display};
|
||||
use std::fmt::Display;
|
||||
use std::ops::Range;
|
||||
use std::rc::Rc;
|
||||
use std::sync::Arc;
|
||||
|
||||
use chumsky::prelude::*;
|
||||
use chumsky::text::keyword;
|
||||
use chumsky::Parser;
|
||||
use itertools::Itertools;
|
||||
use ordered_float::NotNan;
|
||||
|
||||
use super::LexerPlugin;
|
||||
use super::context::Context;
|
||||
use super::decls::SimpleParser;
|
||||
use super::number::print_nat16;
|
||||
use super::{comment, name, number, placeholder, string};
|
||||
use super::errors::{FloatPlacehPrio, NoCommentEnd};
|
||||
use super::numeric::{parse_num, print_nat16, numstart};
|
||||
use crate::ast::{PHClass, Placeholder};
|
||||
use crate::error::{ProjectResult, ProjectError};
|
||||
use crate::foreign::Atom;
|
||||
use crate::interner::Tok;
|
||||
use crate::parse::operators::operators_parser;
|
||||
use crate::representations::Literal;
|
||||
use crate::{Interner, Location, VName};
|
||||
use crate::parse::numeric::{numchar, lex_numeric};
|
||||
use crate::parse::string::lex_string;
|
||||
use crate::systems::stl::Numeric;
|
||||
use crate::utils::pure_seq::next;
|
||||
use crate::utils::unwrap_or;
|
||||
use crate::{Location, VName};
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Entry {
|
||||
pub lexeme: Lexeme,
|
||||
pub location: Location,
|
||||
@@ -32,14 +34,15 @@ impl Entry {
|
||||
|
||||
#[must_use]
|
||||
pub fn is_keyword(&self) -> bool {
|
||||
matches!(
|
||||
self.lexeme,
|
||||
Lexeme::Const
|
||||
| Lexeme::Export
|
||||
| Lexeme::Import
|
||||
| Lexeme::Macro
|
||||
| Lexeme::Module
|
||||
)
|
||||
false
|
||||
// matches!(
|
||||
// self.lexeme,
|
||||
// Lexeme::Const
|
||||
// | Lexeme::Export
|
||||
// | Lexeme::Import
|
||||
// | Lexeme::Macro
|
||||
// | Lexeme::Module
|
||||
// )
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
@@ -51,9 +54,13 @@ impl Entry {
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn file(&self) -> Rc<VName> {
|
||||
pub fn file(&self) -> Arc<VName> {
|
||||
self.location.file().expect("An Entry can only have a range location")
|
||||
}
|
||||
|
||||
fn new(location: Location, lexeme: Lexeme) -> Self {
|
||||
Self { lexeme, location }
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Entry {
|
||||
@@ -66,9 +73,9 @@ impl AsRef<Location> for Entry {
|
||||
fn as_ref(&self) -> &Location { &self.location }
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Lexeme {
|
||||
Literal(Literal),
|
||||
Atom(Atom),
|
||||
Name(Tok<String>),
|
||||
Arrow(NotNan<f64>),
|
||||
/// Walrus operator (formerly shorthand macro)
|
||||
@@ -86,20 +93,19 @@ pub enum Lexeme {
|
||||
At,
|
||||
// Dot,
|
||||
Type, // type operator
|
||||
Comment(Rc<String>),
|
||||
Export,
|
||||
Import,
|
||||
Module,
|
||||
Macro,
|
||||
Const,
|
||||
Operators(Rc<VName>),
|
||||
Comment(Arc<String>),
|
||||
// Export,
|
||||
// Import,
|
||||
// Module,
|
||||
// Macro,
|
||||
// Const,
|
||||
Placeh(Placeholder),
|
||||
}
|
||||
|
||||
impl Display for Lexeme {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Literal(l) => write!(f, "{:?}", l),
|
||||
Self::Atom(a) => write!(f, "{a:?}"),
|
||||
Self::Name(token) => write!(f, "{}", **token),
|
||||
Self::Walrus => write!(f, ":="),
|
||||
Self::Arrow(prio) => write!(f, "={}=>", print_nat16(*prio)),
|
||||
@@ -116,14 +122,11 @@ impl Display for Lexeme {
|
||||
Self::At => write!(f, "@"),
|
||||
Self::Type => write!(f, ":"),
|
||||
Self::Comment(text) => write!(f, "--[{}]--", text),
|
||||
Self::Export => write!(f, "export"),
|
||||
Self::Import => write!(f, "import"),
|
||||
Self::Module => write!(f, "module"),
|
||||
Self::Const => write!(f, "const"),
|
||||
Self::Macro => write!(f, "macro"),
|
||||
Self::Operators(ops) => {
|
||||
write!(f, "operators[{}]", Interner::extern_all(ops).join(" "))
|
||||
},
|
||||
// Self::Export => write!(f, "export"),
|
||||
// Self::Import => write!(f, "import"),
|
||||
// Self::Module => write!(f, "module"),
|
||||
// Self::Const => write!(f, "const"),
|
||||
// Self::Macro => write!(f, "macro"),
|
||||
Self::Placeh(Placeholder { name, class }) => match *class {
|
||||
PHClass::Scalar => write!(f, "${}", **name),
|
||||
PHClass::Vec { nonzero, prio } => {
|
||||
@@ -147,97 +150,192 @@ impl Lexeme {
|
||||
)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn parser<E: chumsky::Error<Entry>>(
|
||||
self,
|
||||
) -> impl Parser<Entry, Entry, Error = E> + Clone {
|
||||
filter(move |ent: &Entry| ent.lexeme == self)
|
||||
pub fn strict_eq(&self, other: &Self) -> bool {
|
||||
match (self, other) {
|
||||
(Self::Arrow(f1), Self::Arrow(f2)) => f1 == f2,
|
||||
(Self::At, Self::At) | (Self::BR, Self::BR) => true,
|
||||
(Self::BS, Self::BS) /*| (Self::Const, Self::Const)*/ => true,
|
||||
// (Self::Export, Self::Export) | (Self::Import, Self::Import) => true,
|
||||
// (Self::Macro, Self::Macro) | (Self::Module, Self::Module) => true,
|
||||
(Self::NS, Self::NS) | (Self::Type, Self::Type) => true,
|
||||
(Self::Walrus, Self::Walrus) => true,
|
||||
(Self::Atom(a1), Self::Atom(a2)) => a1.0.strict_eq(&a2.0),
|
||||
(Self::Comment(c1), Self::Comment(c2)) => c1 == c2,
|
||||
(Self::LP(p1), Self::LP(p2)) | (Self::RP(p1), Self::RP(p2)) => p1 == p2,
|
||||
(Self::Name(n1), Self::Name(n2)) => n1 == n2,
|
||||
(Self::Placeh(ph1), Self::Placeh(ph2)) => ph1 == ph2,
|
||||
(_, _) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct LexedText(pub Vec<Entry>);
|
||||
#[allow(unused)]
|
||||
pub fn format(lexed: &[Entry]) -> String { lexed.iter().join(" ") }
|
||||
|
||||
impl Display for LexedText {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", self.0.iter().join(" "))
|
||||
pub fn namechar(c: char) -> bool { c.is_alphanumeric() | (c == '_') }
|
||||
pub fn namestart(c: char) -> bool { c.is_alphabetic() | (c == '_') }
|
||||
pub fn opchar(c: char) -> bool {
|
||||
!namestart(c) && !numstart(c) && !c.is_whitespace() && !"()[]{},".contains(c)
|
||||
}
|
||||
|
||||
pub fn split_filter(
|
||||
s: &str,
|
||||
mut pred: impl FnMut(char) -> bool,
|
||||
) -> (&str, &str) {
|
||||
s.find(|c| !pred(c)).map_or((s, ""), |i| s.split_at(i))
|
||||
}
|
||||
|
||||
fn lit_table() -> impl IntoIterator<Item = (&'static str, Lexeme)> {
|
||||
[
|
||||
("\\", Lexeme::BS),
|
||||
("@", Lexeme::At),
|
||||
("(", Lexeme::LP('(')),
|
||||
("[", Lexeme::LP('[')),
|
||||
("{", Lexeme::LP('{')),
|
||||
(")", Lexeme::RP('(')),
|
||||
("]", Lexeme::RP('[')),
|
||||
("}", Lexeme::RP('{')),
|
||||
("\n", Lexeme::BR),
|
||||
(":=", Lexeme::Walrus),
|
||||
("::", Lexeme::NS),
|
||||
(":", Lexeme::Type),
|
||||
]
|
||||
}
|
||||
|
||||
static BUILTIN_ATOMS: &[&dyn LexerPlugin] = &[&lex_string, &lex_numeric];
|
||||
|
||||
pub fn lex(
|
||||
mut tokens: Vec<Entry>,
|
||||
mut data: &str,
|
||||
ctx: &impl Context,
|
||||
) -> ProjectResult<Vec<Entry>> {
|
||||
let mut prev_len = data.len() + 1;
|
||||
'tail:loop {
|
||||
if prev_len == data.len() {
|
||||
panic!("got stuck at {data:?}, parsed {:?}", tokens.last().unwrap());
|
||||
}
|
||||
prev_len = data.len();
|
||||
data = data.trim_start_matches(|c: char| c.is_whitespace() && c != '\n');
|
||||
let (head, _) = match next(data.chars()) {
|
||||
Some((h, t)) => (h, t.as_str()),
|
||||
None => return Ok(tokens),
|
||||
};
|
||||
for lexer in ctx.lexers().iter().chain(BUILTIN_ATOMS.iter()) {
|
||||
if let Some(res) = lexer(data, ctx) {
|
||||
let (atom, tail) = res?;
|
||||
if tail.len() == data.len() {
|
||||
panic!("lexer plugin consumed 0 characters")
|
||||
}
|
||||
let loc = ctx.location(data.len() - tail.len(), tail);
|
||||
tokens.push(Entry::new(loc, Lexeme::Atom(atom)));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
for (prefix, lexeme) in lit_table() {
|
||||
if let Some(tail) = data.strip_prefix(prefix) {
|
||||
tokens.push(Entry::new(ctx.location(prefix.len(), tail), lexeme.clone()));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(tail) = data.strip_prefix(',') {
|
||||
let lexeme = Lexeme::Name(ctx.interner().i(","));
|
||||
tokens.push(Entry::new(ctx.location(1, tail), lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
if let Some(tail) = data.strip_prefix("--[") {
|
||||
let (note, tail) = (tail.split_once("]--"))
|
||||
.ok_or_else(|| NoCommentEnd(ctx.location(tail.len(), "")).rc())?;
|
||||
let lexeme = Lexeme::Comment(Arc::new(note.to_string()));
|
||||
let location = ctx.location(note.len() + 3, tail);
|
||||
tokens.push(Entry::new(location, lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
if let Some(tail) = data.strip_prefix("--") {
|
||||
let (note, tail) = split_filter(tail, |c| c != '\n');
|
||||
let lexeme = Lexeme::Comment(Arc::new(note.to_string()));
|
||||
let location = ctx.location(note.len(), tail);
|
||||
tokens.push(Entry::new(location, lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
if let Some(tail) = data.strip_prefix('=') {
|
||||
if tail.chars().next().map_or(false, numstart) {
|
||||
let (num, post_num) = split_filter(tail, numchar);
|
||||
if let Some(tail) = post_num.strip_prefix("=>") {
|
||||
let lexeme = Lexeme::Arrow(parse_num(num).map_err(|e| e.into_proj(num.len(), post_num, ctx))?.as_float());
|
||||
let location = ctx.location(num.len() + 3, tail);
|
||||
tokens.push(Entry::new(location, lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
}
|
||||
// todo: parse placeholders, don't forget vectorials!
|
||||
if let Some(tail) = data.strip_prefix('$') {
|
||||
let (name, tail) = split_filter(tail, namechar);
|
||||
if !name.is_empty() {
|
||||
let name = ctx.interner().i(name);
|
||||
let location = ctx.location(name.len() + 1, tail);
|
||||
let lexeme = Lexeme::Placeh(Placeholder { name, class: PHClass::Scalar });
|
||||
tokens.push(Entry::new(location, lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
if let Some(vec) = data.strip_prefix("..") {
|
||||
let (nonzero, tail) =
|
||||
vec.strip_prefix('.').map_or((false, vec), |t| (true, t));
|
||||
if let Some(tail) = tail.strip_prefix('$') {
|
||||
let (name, tail) = split_filter(tail, namechar);
|
||||
if !name.is_empty() {
|
||||
let (prio, priolen, tail) = tail
|
||||
.strip_prefix(':')
|
||||
.map(|tail| split_filter(tail, numchar))
|
||||
.filter(|(num, _)| !num.is_empty())
|
||||
.map(|(num_str, tail)| {
|
||||
parse_num(num_str)
|
||||
.map_err(|e| e.into_proj(num_str.len(), tail, ctx))
|
||||
.and_then(|num| {
|
||||
Ok(unwrap_or!(num => Numeric::Uint; {
|
||||
return Err(FloatPlacehPrio(ctx.location(num_str.len(), tail)).rc())
|
||||
}))
|
||||
})
|
||||
.map(|p| (p, num_str.len() + 1, tail))
|
||||
})
|
||||
.unwrap_or(Ok((0, 0, tail)))?;
|
||||
let byte_len = if nonzero { 4 } else { 3 } + priolen + name.len();
|
||||
let name = ctx.interner().i(name);
|
||||
let class = PHClass::Vec { nonzero, prio };
|
||||
let lexeme = Lexeme::Placeh(Placeholder { name, class });
|
||||
tokens.push(Entry::new(ctx.location(byte_len, tail), lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
}
|
||||
if namestart(head) {
|
||||
let (name, tail) = split_filter(data, namechar);
|
||||
if !name.is_empty() {
|
||||
let lexeme = Lexeme::Name(ctx.interner().i(name));
|
||||
tokens.push(Entry::new(ctx.location(name.len(), tail), lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
if opchar(head) {
|
||||
let (name, tail) = split_filter(data, opchar);
|
||||
if !name.is_empty() {
|
||||
let lexeme = Lexeme::Name(ctx.interner().i(name));
|
||||
tokens.push(Entry::new(ctx.location(name.len(), tail), lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
unreachable!(r#"opchar is pretty much defined as "not namechar" "#)
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
fn paren_parser(lp: char, rp: char) -> impl SimpleParser<char, Lexeme> {
|
||||
just(lp).to(Lexeme::LP(lp)).or(just(rp).to(Lexeme::RP(lp)))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn literal_parser<'a>(
|
||||
ctx: impl Context + 'a,
|
||||
) -> impl SimpleParser<char, Literal> + 'a {
|
||||
choice((
|
||||
// all ints are valid floats so it takes precedence
|
||||
number::int_parser().map(Literal::Uint),
|
||||
number::float_parser().map(Literal::Num),
|
||||
string::str_parser()
|
||||
.map(move |s| Literal::Str(ctx.interner().i(&s).into())),
|
||||
))
|
||||
}
|
||||
|
||||
pub static BASE_OPS: &[&str] = &[",", ".", "..", "...", "*"];
|
||||
|
||||
#[must_use]
|
||||
pub fn lexer<'a>(
|
||||
ctx: impl Context + 'a,
|
||||
source: Rc<String>,
|
||||
) -> impl SimpleParser<char, Vec<Entry>> + 'a {
|
||||
let all_ops = ctx
|
||||
.ops()
|
||||
.iter()
|
||||
.map(|op| op.as_ref())
|
||||
.chain(BASE_OPS.iter().cloned())
|
||||
.map(str::to_string)
|
||||
.collect::<Vec<_>>();
|
||||
choice((
|
||||
keyword("export").to(Lexeme::Export),
|
||||
keyword("module").to(Lexeme::Module),
|
||||
keyword("import").to(Lexeme::Import),
|
||||
keyword("macro").to(Lexeme::Macro),
|
||||
keyword("const").to(Lexeme::Const),
|
||||
operators_parser({
|
||||
let ctx = ctx.clone();
|
||||
move |s| ctx.interner().i(&s)
|
||||
})
|
||||
.map(|v| Lexeme::Operators(Rc::new(v))),
|
||||
paren_parser('(', ')'),
|
||||
paren_parser('[', ']'),
|
||||
paren_parser('{', '}'),
|
||||
just(":=").to(Lexeme::Walrus),
|
||||
just("=")
|
||||
.ignore_then(number::float_parser())
|
||||
.then_ignore(just("=>"))
|
||||
.map(Lexeme::rule),
|
||||
comment::comment_parser().map(|s| Lexeme::Comment(Rc::new(s))),
|
||||
placeholder::placeholder_parser(ctx.clone()).map(Lexeme::Placeh),
|
||||
just("::").to(Lexeme::NS),
|
||||
just('\\').to(Lexeme::BS),
|
||||
just('@').to(Lexeme::At),
|
||||
just(':').to(Lexeme::Type),
|
||||
just('\n').to(Lexeme::BR),
|
||||
// just('.').to(Lexeme::Dot),
|
||||
literal_parser(ctx.clone()).map(Lexeme::Literal),
|
||||
name::name_parser(&all_ops).map({
|
||||
let ctx = ctx.clone();
|
||||
move |n| Lexeme::Name(ctx.interner().i(&n))
|
||||
}),
|
||||
))
|
||||
.map_with_span(move |lexeme, range| Entry {
|
||||
lexeme,
|
||||
location: Location::Range {
|
||||
range,
|
||||
file: ctx.file(),
|
||||
source: source.clone(),
|
||||
},
|
||||
})
|
||||
.padded_by(one_of(" \t").repeated())
|
||||
.repeated()
|
||||
.then_ignore(end())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user