Transfer commit

This commit is contained in:
2023-03-21 19:36:40 +00:00
parent 180ebb56fa
commit f3ce910f66
63 changed files with 1410 additions and 1023 deletions

View File

@@ -1,9 +1,10 @@
use std::rc::Rc;
use chumsky::{self, prelude::*, Parser};
use mappable_rc::Mrc;
use lasso::Spur;
use crate::enum_parser;
use crate::representations::Primitive;
use crate::representations::{Literal, ast::{Clause, Expr}};
use crate::utils::to_mrc_slice;
use super::lexer::Lexeme;
@@ -12,18 +13,22 @@ fn sexpr_parser<P>(
expr: P
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
Lexeme::paren_parser(expr.repeated()).map(|(del, b)| Clause::S(del, to_mrc_slice(b)))
Lexeme::paren_parser(expr.repeated())
.map(|(del, b)| Clause::S(del, Rc::new(b)))
}
/// Parses `\name.body` or `\name:type.body` where name is any valid name and type and body are
/// both expressions. Comments are allowed and ignored everywhere in between the tokens
fn lambda_parser<P>(
expr: P
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
/// Parses `\name.body` or `\name:type.body` where name is any valid name
/// and type and body are both expressions. Comments are allowed
/// and ignored everywhere in between the tokens
fn lambda_parser<'a, P, F>(
expr: P, intern: &'a F
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone + 'a
where
P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone + 'a,
F: Fn(&str) -> Spur + 'a {
just(Lexeme::BS)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(enum_parser!(Lexeme::Name))
.ignore_then(namelike_parser(intern))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(
just(Lexeme::Type)
@@ -35,20 +40,21 @@ where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
.then_ignore(just(Lexeme::name(".")))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(expr.repeated().at_least(1))
.map(|((name, typ), body): ((String, Vec<Expr>), Vec<Expr>)| {
// for ent in &mut body { ent.bind_parameter(&name) };
Clause::Lambda(name, to_mrc_slice(typ), to_mrc_slice(body))
.map(|((name, typ), body): ((Clause, Vec<Expr>), Vec<Expr>)| {
Clause::Lambda(Rc::new(name), Rc::new(typ), Rc::new(body))
})
}
/// see [lambda_parser] but `@` instead of `\` and the name is optional
fn auto_parser<P>(
expr: P
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone
where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
fn auto_parser<'a, P, F>(
expr: P, intern: &'a F
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone + 'a
where
P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone + 'a,
F: Fn(&str) -> Spur + 'a {
just(Lexeme::At)
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.ignore_then(enum_parser!(Lexeme::Name).or_not())
.ignore_then(namelike_parser(intern).or_not())
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(
just(Lexeme::Type)
@@ -60,23 +66,27 @@ where P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone {
.then_ignore(just(Lexeme::name(".")))
.then_ignore(enum_parser!(Lexeme::Comment).repeated())
.then(expr.repeated().at_least(1))
.try_map(|((name, typ), body): ((Option<String>, Vec<Expr>), Vec<Expr>), s| {
.try_map(|((name, typ), body): ((Option<Clause>, Vec<Expr>), Vec<Expr>), s| {
if name.is_none() && typ.is_empty() {
Err(Simple::custom(s, "Auto without name or type has no effect"))
} else {
Ok(Clause::Auto(name, to_mrc_slice(typ), to_mrc_slice(body)))
} else {
Ok(Clause::Auto(name.map(Rc::new), Rc::new(typ), Rc::new(body)))
}
})
}
/// Parses a sequence of names separated by :: <br/>
/// Comments are allowed and ignored in between
fn name_parser() -> impl Parser<Lexeme, Vec<String>, Error = Simple<Lexeme>> + Clone {
enum_parser!(Lexeme::Name).separated_by(
enum_parser!(Lexeme::Comment).repeated()
.then(just(Lexeme::NS))
.then(enum_parser!(Lexeme::Comment).repeated())
).at_least(1)
pub fn ns_name_parser<'a, F>(intern: &'a F)
-> impl Parser<Lexeme, Vec<Spur>, Error = Simple<Lexeme>> + Clone + 'a
where F: Fn(&str) -> Spur + 'a {
enum_parser!(Lexeme::Name)
.map(|s| intern(&s))
.separated_by(
enum_parser!(Lexeme::Comment).repeated()
.then(just(Lexeme::NS))
.then(enum_parser!(Lexeme::Comment).repeated())
).at_least(1)
}
/// Parse any legal argument name starting with a `$`
@@ -87,42 +97,59 @@ fn placeholder_parser() -> impl Parser<Lexeme, String, Error = Simple<Lexeme>> +
})
}
pub fn namelike_parser<'a, F>(intern: &'a F)
-> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone + 'a
where F: Fn(&str) -> Spur + 'a {
choice((
just(Lexeme::name("...")).to(true)
.or(just(Lexeme::name("..")).to(false))
.then(placeholder_parser())
.then(
just(Lexeme::Type)
.ignore_then(enum_parser!(Lexeme::Uint))
.or_not().map(Option::unwrap_or_default)
)
.map(|((nonzero, key), prio)| Clause::Placeh{key, vec: Some((
prio.try_into().unwrap(),
nonzero
))}),
ns_name_parser(intern)
.map(|qualified| Clause::Name(Rc::new(qualified))),
))
}
pub fn clause_parser<'a, P, F>(
expr: P, intern: &'a F
) -> impl Parser<Lexeme, Clause, Error = Simple<Lexeme>> + Clone + 'a
where
P: Parser<Lexeme, Expr, Error = Simple<Lexeme>> + Clone + 'a,
F: Fn(&str) -> Spur + 'a {
enum_parser!(Lexeme::Comment).repeated()
.ignore_then(choice((
enum_parser!(Lexeme >> Literal; Uint, Num, Char, Str)
.map(Primitive::Literal).map(Clause::P),
placeholder_parser().map(|key| Clause::Placeh{key, vec: None}),
namelike_parser(intern),
sexpr_parser(expr.clone()),
lambda_parser(expr.clone(), intern),
auto_parser(expr.clone(), intern),
just(Lexeme::At).ignore_then(expr.clone()).map(|arg| {
Clause::Explicit(Rc::new(arg))
})
))).then_ignore(enum_parser!(Lexeme::Comment).repeated())
}
/// Parse an expression
pub fn xpr_parser() -> impl Parser<Lexeme, Expr, Error = Simple<Lexeme>> {
pub fn xpr_parser<'a, F>(intern: &'a F)
-> impl Parser<Lexeme, Expr, Error = Simple<Lexeme>> + 'a
where F: Fn(&str) -> Spur + 'a {
recursive(|expr| {
let clause =
enum_parser!(Lexeme::Comment).repeated()
.ignore_then(choice((
enum_parser!(Lexeme >> Literal; Uint, Num, Char, Str).map(Primitive::Literal).map(Clause::P),
placeholder_parser().map(|key| Clause::Placeh{key, vec: None}),
just(Lexeme::name("...")).to(true)
.or(just(Lexeme::name("..")).to(false))
.then(placeholder_parser())
.then(
just(Lexeme::Type)
.ignore_then(enum_parser!(Lexeme::Uint))
.or_not().map(Option::unwrap_or_default)
)
.map(|((nonzero, key), prio)| Clause::Placeh{key, vec: Some((
prio.try_into().unwrap(),
nonzero
))}),
name_parser().map(|qualified| Clause::Name {
local: if qualified.len() == 1 {Some(qualified[0].clone())} else {None},
qualified: to_mrc_slice(qualified)
}),
sexpr_parser(expr.clone()),
lambda_parser(expr.clone()),
auto_parser(expr.clone()),
just(Lexeme::At).ignore_then(expr.clone()).map(|arg| {
Clause::Explicit(Mrc::new(arg))
})
))).then_ignore(enum_parser!(Lexeme::Comment).repeated());
let clause = clause_parser(expr, intern);
clause.clone().then(
just(Lexeme::Type)
.ignore_then(clause.clone())
.repeated()
)
.map(|(val, typ)| Expr(val, to_mrc_slice(typ)))
.map(|(val, typ)| Expr(val, Rc::new(typ)))
}).labelled("Expression")
}

View File

@@ -1,34 +1,33 @@
use std::rc::Rc;
use chumsky::{Parser, prelude::*};
use itertools::Itertools;
use mappable_rc::Mrc;
use lasso::Spur;
use crate::representations::sourcefile::Import;
use crate::utils::iter::{box_once, box_flatten, into_boxed_iter, BoxedIterIter};
use crate::utils::{to_mrc_slice, mrc_derive};
use crate::{enum_parser, box_chain};
use super::lexer::Lexeme;
#[derive(Debug, Clone)]
pub struct Import {
pub path: Mrc<[String]>,
/// If name is None, this is a wildcard import
pub name: Option<String>
}
/// initialize a BoxedIter<BoxedIter<String>> with a single element.
fn init_table(name: String) -> BoxedIterIter<'static, String> {
fn init_table(name: Spur) -> BoxedIterIter<'static, Spur> {
// I'm not at all confident that this is a good approach.
box_once(box_once(name))
}
/// Parse an import command
/// Syntax is same as Rust's `use` except the verb is import, no trailing semi
/// and the delimiters are plain parentheses. Namespaces should preferably contain
/// crossplatform filename-legal characters but the symbols are explicitly allowed
/// to go wild. There's a blacklist in [name]
pub fn import_parser() -> impl Parser<Lexeme, Vec<Import>, Error = Simple<Lexeme>> {
// TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad.
recursive(|expr: Recursive<Lexeme, BoxedIterIter<String>, Simple<Lexeme>>| {
enum_parser!(Lexeme::Name)
/// Syntax is same as Rust's `use` except the verb is import, no trailing
/// semi and the delimiters are plain parentheses. Namespaces should
/// preferably contain crossplatform filename-legal characters but the
/// symbols are explicitly allowed to go wild.
/// There's a blacklist in [name]
pub fn import_parser<'a, F>(intern: &'a F)
-> impl Parser<Lexeme, Vec<Import>, Error = Simple<Lexeme>> + 'a
where F: Fn(&str) -> Spur + 'a {
let globstar = intern("*");
// TODO: this algorithm isn't cache friendly and copies a lot
recursive(move |expr:Recursive<Lexeme, BoxedIterIter<Spur>, Simple<Lexeme>>| {
enum_parser!(Lexeme::Name).map(|s| intern(s.as_str()))
.separated_by(just(Lexeme::NS))
.then(
just(Lexeme::NS)
@@ -39,15 +38,17 @@ pub fn import_parser() -> impl Parser<Lexeme, Vec<Import>, Error = Simple<Lexeme
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
.map(|v| box_flatten(v.into_iter()))
.labelled("import group"),
// Each expr returns a list of imports, flatten those into a common list
just(Lexeme::name("*")).map(|_| init_table("*".to_string()))
// Each expr returns a list of imports, flatten into common list
just(Lexeme::name("*")).map(move |_| init_table(globstar))
.labelled("wildcard import"), // Just a *, wrapped
enum_parser!(Lexeme::Name).map(init_table)
enum_parser!(Lexeme::Name)
.map(|s| init_table(intern(s.as_str())))
.labelled("import terminal") // Just a name, wrapped
))
).or_not()
)
.map(|(name, opt_post): (Vec<String>, Option<BoxedIterIter<String>>)| -> BoxedIterIter<String> {
.map(|(name, opt_post): (Vec<Spur>, Option<BoxedIterIter<Spur>>)|
-> BoxedIterIter<Spur> {
if let Some(post) = opt_post {
Box::new(post.map(move |el| {
box_chain!(name.clone().into_iter(), el)
@@ -56,14 +57,17 @@ pub fn import_parser() -> impl Parser<Lexeme, Vec<Import>, Error = Simple<Lexeme
box_once(into_boxed_iter(name))
}
})
}).map(|paths| {
}).map(move |paths| {
paths.filter_map(|namespaces| {
let path = to_mrc_slice(namespaces.collect_vec());
let path_prefix = mrc_derive(&path, |p| &p[..p.len() - 1]);
match path.last()?.as_str() {
"*" => Some(Import { path: path_prefix, name: None }),
name => Some(Import { path: path_prefix, name: Some(name.to_owned()) })
}
let mut path = namespaces.collect_vec();
let name = path.pop()?;
Some(Import {
path: Rc::new(path),
name: {
if name == globstar { None }
else { Some(name.to_owned()) }
}
})
}).collect()
}).labelled("import")
}

View File

@@ -9,12 +9,8 @@ mod import;
mod enum_parser;
mod parse;
pub use sourcefile::FileEntry;
pub use sourcefile::line_parser;
pub use sourcefile::imports;
pub use sourcefile::exported_names;
pub use lexer::{lexer, Lexeme, Entry as LexerEntry};
pub use name::is_op;
pub use parse::{parse, reparse, ParseError};
pub use import::Import;
pub use number::{float_parser, int_parser};

View File

@@ -2,11 +2,12 @@ use std::{ops::Range, fmt::Debug};
use chumsky::{prelude::{Simple, end}, Stream, Parser};
use itertools::Itertools;
use lasso::Spur;
use thiserror::Error;
use crate::{ast::Rule, parse::{lexer::LexedText, sourcefile::split_lines}};
use crate::{ast::Rule, parse::{lexer::LexedText, sourcefile::split_lines}, representations::sourcefile::FileEntry};
use super::{Lexeme, FileEntry, lexer, line_parser, LexerEntry};
use super::{Lexeme, lexer, line_parser, LexerEntry};
#[derive(Error, Debug, Clone)]
@@ -17,14 +18,19 @@ pub enum ParseError {
Ast(Vec<Simple<Lexeme>>)
}
pub fn parse<'a, Op>(ops: &[Op], data: &str) -> Result<Vec<FileEntry>, ParseError>
where Op: 'a + AsRef<str> + Clone {
pub fn parse<'a, Op, F>(
ops: &[Op], data: &str, intern: &F
) -> Result<Vec<FileEntry>, ParseError>
where
Op: 'a + AsRef<str> + Clone,
F: Fn(&str) -> Spur
{
let lexie = lexer(ops);
let token_batchv = split_lines(data).map(|line| {
lexie.parse(line).map_err(ParseError::Lex)
}).collect::<Result<Vec<_>, _>>()?;
println!("Lexed:\n{:?}", LexedText(token_batchv.clone()));
let parsr = line_parser().then_ignore(end());
let parsr = line_parser(intern).then_ignore(end());
let (parsed_lines, errors_per_line) = token_batchv.into_iter().filter(|v| {
!v.is_empty()
}).map(|v| {
@@ -47,10 +53,15 @@ where Op: 'a + AsRef<str> + Clone {
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
}
pub fn reparse<'a, Op>(ops: &[Op], data: &str, pre: &[FileEntry])
pub fn reparse<'a, Op, F>(
ops: &[Op], data: &str, pre: &[FileEntry], intern: &F
)
-> Result<Vec<FileEntry>, ParseError>
where Op: 'a + AsRef<str> + Clone {
let result = parse(ops, data)?;
where
Op: 'a + AsRef<str> + Clone,
F: Fn(&str) -> Spur
{
let result = parse(ops, data, intern)?;
Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| {
if let FileEntry::Rule(Rule{source, ..}, _) = &mut output {
if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor {

View File

@@ -1,164 +1,64 @@
use std::collections::HashSet;
use std::iter;
use std::rc::Rc;
use crate::{enum_parser, box_chain};
use crate::ast::{Expr, Clause, Rule};
use crate::utils::{to_mrc_slice, one_mrc_slice};
use crate::utils::Stackframe;
use crate::utils::iter::box_empty;
use crate::representations::sourcefile::FileEntry;
use crate::enum_parser;
use crate::ast::{Expr, Rule};
use super::expression::xpr_parser;
use super::import::{self, Import};
use super::expression::{xpr_parser, ns_name_parser};
use super::import::import_parser;
use super::lexer::Lexeme;
use chumsky::{Parser, prelude::*};
use lasso::Spur;
use ordered_float::NotNan;
use lazy_static::lazy_static;
/// Anything we might encounter in a file
#[derive(Debug, Clone)]
pub enum FileEntry {
Import(Vec<import::Import>),
Comment(String),
/// The bool indicates whether the rule is exported - whether tokens uniquely defined inside it
/// should be exported
Rule(Rule, bool),
Export(Vec<Vec<String>>)
}
fn visit_all_names_clause_recur<'a, F>(
clause: &'a Clause,
binds: Stackframe<String>,
cb: &mut F
) where F: FnMut(&'a [String]) {
match clause {
Clause::Auto(name, typ, body) => {
for x in typ.iter() {
visit_all_names_expr_recur(x, binds.clone(), cb)
}
let binds_dup = binds.clone();
let new_binds = if let Some(n) = name {
binds_dup.push(n.to_owned())
} else {
binds
};
for x in body.iter() {
visit_all_names_expr_recur(x, new_binds.clone(), cb)
}
},
Clause::Lambda(name, typ, body) => {
for x in typ.iter() {
visit_all_names_expr_recur(x, binds.clone(), cb)
}
for x in body.iter() {
visit_all_names_expr_recur(x, binds.push(name.to_owned()), cb)
}
},
Clause::S(_, body) => for x in body.iter() {
visit_all_names_expr_recur(x, binds.clone(), cb)
},
Clause::Name{ local: Some(name), qualified } => {
if binds.iter().all(|x| x != name) {
cb(qualified)
}
}
_ => (),
}
}
/// Recursively iterate through all "names" in an expression. It also finds a lot of things that
/// aren't names, such as all bound parameters. Generally speaking, this is not a very
/// sophisticated search.
///
/// TODO: find a way to exclude parameters
fn visit_all_names_expr_recur<'a, F>(
expr: &'a Expr,
binds: Stackframe<String>,
cb: &mut F
) where F: FnMut(&'a [String]) {
let Expr(val, typ) = expr;
visit_all_names_clause_recur(val, binds.clone(), cb);
for typ in typ.as_ref() {
visit_all_names_clause_recur(typ, binds.clone(), cb);
}
}
/// Collect all names that occur in an expression
fn find_all_names(expr: &Expr) -> HashSet<&[String]> {
let mut ret = HashSet::new();
visit_all_names_expr_recur(expr, Stackframe::new(String::new()), &mut |n| {
if !n.last().unwrap().starts_with('$') {
ret.insert(n);
}
});
ret
}
fn rule_parser() -> impl Parser<Lexeme, (Vec<Expr>, NotNan<f64>, Vec<Expr>), Error = Simple<Lexeme>> {
xpr_parser().repeated()
fn rule_parser<'a, F>(intern: &'a F) -> impl Parser<Lexeme, (
Vec<Expr>, NotNan<f64>, Vec<Expr>
), Error = Simple<Lexeme>> + 'a
where F: Fn(&str) -> Spur + 'a {
xpr_parser(intern).repeated()
.then(enum_parser!(Lexeme::Rule))
.then(xpr_parser().repeated())
// .map(|((lhs, prio), rhs)| )
.then(xpr_parser(intern).repeated())
.map(|((a, b), c)| (a, b, c))
.labelled("Rule")
}
pub fn line_parser() -> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> {
pub fn line_parser<'a, F>(intern: &'a F)
-> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> + 'a
where F: Fn(&str) -> Spur + 'a {
choice((
// In case the usercode wants to parse doc
enum_parser!(Lexeme >> FileEntry; Comment),
just(Lexeme::Import)
.ignore_then(import_parser().map(FileEntry::Import))
.ignore_then(import_parser(intern).map(FileEntry::Import))
.then_ignore(enum_parser!(Lexeme::Comment).or_not()),
just(Lexeme::Export).map_err_with_span(|e, s| {
println!("{:?} could not yield an export", s); e
}).ignore_then(
just(Lexeme::NS).ignore_then(
enum_parser!(Lexeme::Name).map(|n| vec![n])
ns_name_parser(intern).map(Rc::new)
.separated_by(just(Lexeme::name(",")))
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
).map(FileEntry::Export)
.or(rule_parser().map(|(source, prio, target)| {
.or(rule_parser(intern).map(|(source, prio, target)| {
FileEntry::Rule(Rule {
source: to_mrc_slice(source),
source: Rc::new(source),
prio,
target: to_mrc_slice(target)
target: Rc::new(target)
}, true)
}))
),
// This could match almost anything so it has to go last
rule_parser().map(|(source, prio, target)| FileEntry::Rule(Rule{
source: to_mrc_slice(source),
prio,
target: to_mrc_slice(target)
}, false)),
rule_parser(intern).map(|(source, prio, target)| {
FileEntry::Rule(Rule{
source: Rc::new(source),
prio,
target: Rc::new(target)
}, false)
}),
))
}
/// Collect all exported names (and a lot of other words) from a file
pub fn exported_names(src: &[FileEntry]) -> HashSet<&[String]> {
src.iter().flat_map(|ent| match ent {
FileEntry::Rule(Rule{source, target, ..}, true) =>
box_chain!(source.iter(), target.iter()),
_ => box_empty()
}).flat_map(find_all_names).chain(
src.iter().filter_map(|ent| {
if let FileEntry::Export(names) = ent {Some(names.iter())} else {None}
}).flatten().map(Vec::as_slice)
).collect()
}
/// Summarize all imports from a file in a single list of qualified names
pub fn imports<'a, 'b, I>(
src: I
) -> impl Iterator<Item = &'b import::Import> + 'a
where I: Iterator<Item = &'b FileEntry> + 'a {
src.filter_map(|ent| match ent {
FileEntry::Import(impv) => Some(impv.iter()),
_ => None
}).flatten()
}
pub fn split_lines(data: &str) -> impl Iterator<Item = &str> {
let mut source = data.char_indices();
let mut last_slice = 0;