Difficult ownership questions

This commit is contained in:
2022-05-30 05:21:00 +02:00
parent 1e8aa45176
commit ec1734e113
15 changed files with 441 additions and 89 deletions

View File

@@ -6,63 +6,76 @@ use super::number;
use super::misc;
use super::name;
#[derive(Debug)]
/// An S-expression as read from a source file
#[derive(Debug, Clone)]
pub enum Expr {
Num(f64),
Int(u64),
Char(char),
Str(String),
Name(String),
Name(Vec<String>),
S(Vec<Expr>),
Lambda(String, Option<Box<Expr>>, Vec<Expr>),
Auto(Option<String>, Option<Box<Expr>>, Vec<Expr>),
Typed(Box<Expr>, Box<Expr>)
}
/// Parse a type annotation
fn typed_parser<'a>(
expr: Recursive<'a, char, Expr, Simple<char>>,
ops: &'a [String]
expr: Recursive<'a, char, Expr, Simple<char>>
) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
just(':').ignore_then(expr)
}
/// Parse an expression without a type annotation
fn untyped_xpr_parser<'a>(
expr: Recursive<'a, char, Expr, Simple<char>>,
ops: &'a [String]
ops: &[&'a str]
) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
let lambda = just('\\')
.ignore_then(name::name_parser(ops))
.then(typed_parser(expr.clone(), ops).or_not())
.then_ignore(just('.'))
.then(expr.clone().repeated().at_least(1))
.map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body));
let auto = just('@')
.ignore_then(name::name_parser(ops).or_not())
.then(typed_parser(expr.clone(), ops).or_not())
.then_ignore(just('.'))
.then(expr.clone().repeated().at_least(1))
.map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body));
// basic S-expression rule
let sexpr = expr.clone()
.repeated()
.delimited_by(just('('), just(')'))
.map(Expr::S);
// Blocks
// can and therefore do match everything up to the closing paren
// \name. body
// \name:type. body
let lambda = just('\\')
.ignore_then(text::ident())
.then(typed_parser(expr.clone()).or_not())
.then_ignore(just('.'))
.then(expr.clone().repeated().at_least(1))
.map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body));
// @name. body
// @name:type. body
// @:type. body
let auto = just('@')
.ignore_then(text::ident().or_not())
.then(typed_parser(expr.clone()).or_not())
.then_ignore(just('.'))
.then(expr.clone().repeated().at_least(1))
.map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body));
choice((
number::int_parser().map(Expr::Int), // all ints are valid floats so it takes precedence
number::float_parser().map(Expr::Num),
number::int_parser().map(Expr::Int),
string::char_parser().map(Expr::Char),
string::str_parser().map(Expr::Str),
name::name_parser(ops).map(Expr::Name),
name::name_parser(ops).map(Expr::Name), // includes namespacing
sexpr,
lambda,
auto
)).padded()
}
pub fn expression_parser(ops: &[String]) -> impl Parser<char, Expr, Error = Simple<char>> + '_ {
/// Parse any expression with a type annotation, surrounded by comments
pub fn expression_parser<'a>(ops: &[&'a str]) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
// This approach to parsing comments is ugly and error-prone,
// but I don't have a lot of other ideas
return recursive(|expr| {
return misc::comment_parser().or_not().ignore_then(
untyped_xpr_parser(expr.clone(), &ops)
.then(typed_parser(expr, ops).or_not())
.then(typed_parser(expr).or_not())
.map(|(val, t)| match t {
Some(typ) => Expr::Typed(Box::new(val), Box::new(typ)),
None => val

View File

@@ -1,57 +1,58 @@
use chumsky::{Parser, prelude::*, text::Character};
use std::iter;
use chumsky::{Parser, prelude::*};
use super::name;
enum Import {
Name(Vec<String>, String),
All(Vec<String>)
}
fn prefix(pre: Vec<String>, im: Import) -> Import {
match im {
Import::Name(ns, name) => Import::Name(
pre.into_iter().chain(ns.into_iter()).collect(),
name
),
Import::All(ns) => Import::All(
pre.into_iter().chain(ns.into_iter()).collect()
)
}
#[derive(Debug, Clone)]
pub struct Import {
pub path: Vec<String>,
pub name: Option<String>
}
type BoxedStrIter = Box<dyn Iterator<Item = String>>;
type BoxedStrIterIter = Box<dyn Iterator<Item = BoxedStrIter>>;
pub type BoxedStrIter = Box<dyn Iterator<Item = String>>;
pub type BoxedStrIterIter = Box<dyn Iterator<Item = BoxedStrIter>>;
/// initialize a Box<dyn Iterator<Item = Box<dyn Iterator<Item = String>>>>
/// with a single element.
fn init_table(name: String) -> BoxedStrIterIter {
Box::new(vec![Box::new(vec![name].into_iter()) as BoxedStrIter].into_iter())
// I'm not confident at all that this is a good approach.
Box::new(iter::once(Box::new(iter::once(name)) as BoxedStrIter))
}
/// Parse an import command
/// Syntax is same as Rust's `use` except the verb is import, no trailing semi
/// and the delimiters are plain parentheses. Namespaces should preferably contain
/// crossplatform filename-legal characters but the symbols are explicitly allowed
/// to go wild. There's a blacklist in [name]
pub fn import_parser() -> impl Parser<char, Vec<Import>, Error = Simple<char>> {
// TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad.
recursive(|expr: Recursive<char, BoxedStrIterIter, Simple<char>>| {
name::modname_parser()
.padded()
.then_ignore(just("::"))
.repeated()
.then(
choice((
expr.clone()
.separated_by(just(','))
.delimited_by(just('('), just(')'))
.map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter),
just("*").map(|s| init_table(s.to_string())),
name::modname_parser().map(init_table)
)).padded()
).map(|(pre, post)| {
Box::new(post.map(move |el| {
Box::new(pre.clone().into_iter().chain(el)) as BoxedStrIter
})) as BoxedStrIterIter
})
.padded()
.then_ignore(just("::"))
.repeated()
.then(
choice((
expr.clone()
.separated_by(just(','))
.delimited_by(just('('), just(')'))
.map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter),
// Each expr returns a list of imports, flatten those into a common list
just("*").map(|s| init_table(s.to_string())), // Just a *, wrapped
name::modname_parser().map(init_table) // Just a name, wrapped
)).padded()
).map(|(pre, post)| {
Box::new(post.map(move |el| {
Box::new(pre.clone().into_iter().chain(el)) as BoxedStrIter
})) as BoxedStrIterIter
})
}).padded().map(|paths| {
paths.filter_map(|namespaces| {
let mut path: Vec<String> = namespaces.collect();
match path.pop()?.as_str() {
"*" => Some(Import::All(path)),
name => Some(Import::Name(path, name.to_owned()))
"*" => Some(Import { path, name: None }),
name => Some(Import { path, name: Some(name.to_owned()) })
}
}).collect()
})

View File

@@ -1,5 +1,6 @@
pub use chumsky::{self, prelude::*, Parser};
/// Parses Lua-style comments
pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
any().repeated().delimited_by(just("--["), just("]--")).or(
any().repeated().delimited_by(just("--"), just("\n"))

View File

@@ -5,5 +5,12 @@ mod misc;
mod import;
mod name;
mod substitution;
mod sourcefile;
pub use expression::Expr;
pub use expression::expression_parser;
pub use sourcefile::FileEntry;
pub use sourcefile::file_parser;
pub use sourcefile::imports;
pub use sourcefile::exported_names;
pub use import::Import;

View File

@@ -1,28 +1,46 @@
use chumsky::{self, prelude::*, Parser};
fn op_parser_recur<'a, 'b>(ops: &'a [String]) -> BoxedParser<'b, char, String, Simple<char>> {
if ops.len() == 1 { just(ops[0].clone()).boxed() }
else { just(ops[0].clone()).or(op_parser_recur(&ops[1..])).boxed() }
}
fn op_parser(ops: &[String]) -> BoxedParser<char, String, Simple<char>> {
/// Matches any one of the passed operators, longest-first
fn op_parser<'a>(ops: &[&'a str]) -> BoxedParser<'a, char, String, Simple<char>> {
let mut sorted_ops = ops.to_vec();
sorted_ops.sort_by(|a, b| b.len().cmp(&a.len()));
op_parser_recur(&sorted_ops)
sorted_ops.into_iter()
.map(|op| just(op.to_string()).boxed())
.reduce(|a, b| a.or(b).boxed()).unwrap()
}
pub fn modname_parser() -> impl Parser<char, String, Error = Simple<char>> {
let not_name_char: Vec<char> = vec![':', '\\', '"', '\'', '(', ')', '.'];
/// Matches anything that's allowed as an operator
///
/// Blacklist rationale:
/// - `:` is used for namespacing and type annotations, both are distinguished from operators
/// - `\` and `@` are parametric expression starters
/// - `"` and `'` are read as primitives and would never match.
/// - `(` and `)` are strictly balanced and this must remain the case for automation and streaming.
/// - `.` is the discriminator for parametrics.
///
/// FIXME: `@name` without a dot should be parsed correctly for overrides. Could be an operator but
/// then parametrics should take precedence, which might break stuff. investigate.
///
/// TODO: `'` could work as an operator whenever it isn't closed. It's common im maths so it's
/// worth a try
///
/// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very
/// common in maths so it's worth a try. Investigate.
pub fn modname_parser<'a>() -> impl Parser<char, String, Error = Simple<char>> + 'a {
let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', '.'];
filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
.repeated().at_least(1)
.collect()
}
pub fn name_parser<'a>(ops: &'a [String]) -> impl Parser<char, String, Error = Simple<char>> + 'a {
/// Parse an operator or name. Failing both, parse everything up to the next whitespace or
/// blacklisted character as a new operator.
pub fn name_parser<'a>(
ops: &[&'a str]
) -> impl Parser<char, Vec<String>, Error = Simple<char>> + 'a {
choice((
op_parser(ops), // First try to parse a known operator
text::ident(), // Failing that, parse plain text
// Finally parse everything until tne next terminal as a new operator
modname_parser()
)).padded()
modname_parser() // Finally parse everything until tne next terminal as a new operator
)).padded().separated_by(just("::")).padded()
}

View File

@@ -6,6 +6,9 @@ fn assert_not_digit(base: u32, c: char) {
}
}
/// Parse an arbitrarily grouped sequence of digits starting with an underscore.
///
/// TODO: this should use separated_by and parse the leading group too
fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simple<char>> {
just('_')
.ignore_then(text::digits(base))
@@ -13,6 +16,9 @@ fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simpl
.map(|sv| sv.iter().map(|s| s.chars()).flatten().collect())
}
/// parse a grouped uint
///
/// Not to be confused with [int_parser] which does a lot more
fn uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
text::int(base)
.then(separated_digits_parser(base))
@@ -21,6 +27,8 @@ fn uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
})
}
/// parse exponent notation, or return 0 as the default exponent.
/// The exponent is always in decimal.
fn pow_parser() -> impl Parser<char, i32, Error = Simple<char>> {
return choice((
just('p')
@@ -32,6 +40,9 @@ fn pow_parser() -> impl Parser<char, i32, Error = Simple<char>> {
)).or_else(|_| Ok(0))
}
/// returns a mapper that converts a mantissa and an exponent into an uint
///
/// TODO it panics if it finds a negative exponent
fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
return move |(val, exp)| {
if exp == 0 {val}
@@ -39,6 +50,7 @@ fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
};
}
/// returns a mapper that converts a mantissa and an exponent into a float
fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 {
return move |(val, exp)| {
if exp == 0 {val}
@@ -46,11 +58,15 @@ fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 {
}
}
/// parse an uint from exponential notation (panics if 'p' is a digit in base)
fn pow_uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
assert_not_digit(base, 'p');
uint_parser(base).then(pow_parser()).map(nat2u(base.into()))
}
/// parse an uint from a base determined by its prefix or lack thereof
///
/// Not to be convused with [uint_parser] which is a component of it.
pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
choice((
just("0b").ignore_then(pow_uint_parser(2)),
@@ -60,6 +76,7 @@ pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
))
}
/// parse a float from dot notation
fn dotted_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
uint_parser(base)
.then_ignore(just('.'))
@@ -73,11 +90,14 @@ fn dotted_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
})
}
/// parse a float from dotted and optionally also exponential notation
fn pow_float_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
assert_not_digit(base, 'p');
dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
}
/// parse a float with dotted and optionally exponential notation from a base determined by its
/// prefix
pub fn float_parser() -> impl Parser<char, f64, Error = Simple<char>> {
choice((
just("0b").ignore_then(pow_float_parser(2)),

107
src/parse/sourcefile.rs Normal file
View File

@@ -0,0 +1,107 @@
use std::collections::HashSet;
use std::iter;
use super::expression::Expr;
use super::import;
use super::misc;
use super::substitution::substitution_parser;
use super::substitution::Substitution;
use chumsky::{Parser, prelude::*};
/// Anything we might encounter in a file
#[derive(Debug, Clone)]
pub enum FileEntry {
Import(Vec<import::Import>),
Comment(String),
Substitution(Substitution),
Export(Substitution)
}
/// Recursively iterate through all "names" in an expression. It also finds a lot of things that
/// aren't names, such as all bound parameters. Generally speaking, this is not a very
/// sophisticated search.
///
/// TODO: find a way to exclude parameters
fn find_all_names_recur(expr: &Expr) -> Box<dyn Iterator<Item = &Vec<String>> + '_> {
match expr {
Expr::Auto(_, typ, body) | Expr::Lambda(_, typ, body) => Box::new(match typ {
Some(texp) => find_all_names_recur(texp),
None => Box::new(iter::empty())
}.chain(body.into_iter().map(find_all_names_recur).flatten())),
Expr::S(body) => Box::new(body.into_iter().map(find_all_names_recur).flatten()),
Expr::Typed(val, typ) => Box::new(
find_all_names_recur(val).chain(find_all_names_recur(typ))
),
Expr::Name(x) => Box::new(iter::once(x)),
_ => Box::new(iter::empty())
}
}
/// Collect all names that occur in an expression
fn find_all_names(expr: &Expr) -> HashSet<&Vec<String>> {
find_all_names_recur(expr).collect()
}
/// Parse a file into a list of distinctive entries
pub fn file_parser<'a>(
pattern_ops: &[&'a str], ops: &[&'a str]
) -> impl Parser<char, Vec<FileEntry>, Error = Simple<char>> + 'a {
choice((
// In case the usercode wants to parse doc
misc::comment_parser().map(FileEntry::Comment),
import::import_parser().map(FileEntry::Import),
text::keyword("export")
.ignore_then(substitution_parser(pattern_ops, ops)).map(FileEntry::Export),
// This could match almost anything so it has to go last
substitution_parser(pattern_ops, ops).map(FileEntry::Substitution)
)).padded()
.separated_by(just('\n'))
.then_ignore(end())
}
/// Decide if a string can be an operator. Operators can include digits and text, just not at the
/// start.
fn is_op(s: &str) -> bool {
return match s.chars().next() {
Some(x) => !x.is_alphanumeric(),
None => false
}
}
/// Collect all exported names (and a lot of other words) from a file
pub fn exported_names(src: &Vec<FileEntry>) -> HashSet<&Vec<String>> {
src.iter().filter_map(|ent| match ent {
FileEntry::Export(a) => Some(&a.source),
_ => None
}).map(find_all_names).flatten().collect()
}
/// Collect all operators defined in a file (and some other words)
fn defined_ops(src: &Vec<FileEntry>, exported_only: bool) -> Vec<&String> {
let all_names:HashSet<&Vec<String>> = src.iter().filter_map(|ent| match ent {
FileEntry::Substitution(a) => if exported_only {None} else {Some(&a.source)},
FileEntry::Export(a) => Some(&a.source),
_ => None
}).map(find_all_names).flatten().collect();
// Dedupe stage of dubious value; collecting into a hashset may take longer than
// handling duplicates would with a file of sensible size.
all_names.into_iter()
.filter_map(|name|
// If it's namespaced, it's imported.
if name.len() == 1 && is_op(&name[0]) {Some(&name[0])}
else {None}
).collect()
}
/// Collect all operators from a file
pub fn all_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, false) }
/// Collect exported operators from a file (plus some extra)
pub fn exported_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, true) }
/// Summarize all imports from a file in a single list of qualified names
pub fn imports(src: &Vec<FileEntry>) -> Vec<&import::Import> {
src.into_iter().filter_map(|ent| match ent {
FileEntry::Import(impv) => Some(impv.iter()),
_ => None
}).flatten().collect()
}

View File

@@ -1,6 +1,8 @@
use chumsky::{self, prelude::*, Parser};
/// Parses a text character that is not the specified delimiter
fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
// Copied directly from Chumsky's JSON example.
let escape = just('\\').ignore_then(
just('\\')
.or(just('/'))
@@ -27,15 +29,17 @@ fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
filter(move |&c| c != '\\' && c != delim).or(escape)
}
/// Parse a character literal between single quotes
pub fn char_parser() -> impl Parser<char, char, Error = Simple<char>> {
just('\'').ignore_then(text_parser('\'')).then_ignore(just('\''))
}
/// Parse a string between double quotes
pub fn str_parser() -> impl Parser<char, String, Error = Simple<char>> {
just('"')
.ignore_then(
text_parser('"').map(Some)
.or(just("\\\n").map(|_| None))
.or(just("\\\n").map(|_| None)) // Newlines preceded by backslashes are ignored.
.repeated()
).then_ignore(just('"'))
.flatten().collect()

View File

@@ -2,16 +2,28 @@ use chumsky::{self, prelude::*, Parser};
use super::{expression, number::float_parser};
#[derive(Debug, Clone)]
pub struct Substitution {
source: expression::Expr,
priority: f64,
target: expression::Expr
pub source: expression::Expr,
pub priority: f64,
pub target: expression::Expr
}
pub fn substitutionParser<'a>(
ops: &'a [String]
/// Parses a substitution rule of the forms
///
/// ```orchid
/// main = \x. ...
/// $a + $b = (add $a $b)
/// (foo bar baz) =1.1=> (foo 1 e)
/// reee =2=> shadow_reee
/// shadow_reee =0.9=> reee
/// ```
/// TBD whether this disables reee in the specified range or loops forever
pub fn substitution_parser<'a>(
pattern_ops: &[&'a str],
ops: &[&'a str]
) -> impl Parser<char, Substitution, Error = Simple<char>> + 'a {
expression::expression_parser(ops)
expression::expression_parser(pattern_ops)
.then_ignore(just('='))
.then(
float_parser().then_ignore(just("=>"))