Difficult ownership questions
This commit is contained in:
18
README.md
18
README.md
@@ -262,14 +262,14 @@ the purposes of substitution.
|
|||||||
This is very far away so I don't want to make promises, but I have some
|
This is very far away so I don't want to make promises, but I have some
|
||||||
ideas.
|
ideas.
|
||||||
|
|
||||||
[ ] early execution of functions on any subset of their arguments where it
|
- [ ] early execution of functions on any subset of their arguments where
|
||||||
could provide substantial speedup
|
it could provide substantial speedup
|
||||||
[ ] tracking copies of expressions and evaluating them only once
|
- [ ] tracking copies of expressions and evaluating them only once
|
||||||
[ ] Many cases of single recursion converted to loops
|
- [ ] Many cases of single recursion converted to loops
|
||||||
[ ] tail recursion
|
- [ ] tail recursion
|
||||||
[ ] 2 distinct loops where the tail doesn't use the arguments
|
- [ ] 2 distinct loops where the tail doesn't use the arguments
|
||||||
[ ] reorder operations to favour this scenario
|
- [ ] reorder operations to favour this scenario
|
||||||
[ ] reactive calculation of values that are deemed to be read more often
|
- [ ] reactive calculation of values that are deemed to be read more often
|
||||||
than written
|
than written
|
||||||
[ ] automatic profiling based on performance metrics generated by debug
|
- [ ] automatic profiling based on performance metrics generated by debug
|
||||||
builds
|
builds
|
||||||
@@ -3,12 +3,14 @@ use std::io::{self, Read};
|
|||||||
use chumsky::{Parser, prelude::*};
|
use chumsky::{Parser, prelude::*};
|
||||||
|
|
||||||
mod parse;
|
mod parse;
|
||||||
|
mod project;
|
||||||
|
mod utils;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let mut input = String::new();
|
let mut input = String::new();
|
||||||
let mut stdin = io::stdin();
|
let mut stdin = io::stdin();
|
||||||
stdin.read_to_string(&mut input).unwrap();
|
stdin.read_to_string(&mut input).unwrap();
|
||||||
let ops: Vec<String> = vec!["$", "."].iter().map(|&s| s.to_string()).collect();
|
let ops: Vec<&str> = vec!["$", "."];
|
||||||
let output = parse::expression_parser(&ops).then_ignore(end()).parse(input);
|
let output = parse::expression_parser(&ops).then_ignore(end()).parse(input);
|
||||||
println!("\nParsed:\n{:?}", output);
|
println!("\nParsed:\n{:?}", output);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,63 +6,76 @@ use super::number;
|
|||||||
use super::misc;
|
use super::misc;
|
||||||
use super::name;
|
use super::name;
|
||||||
|
|
||||||
#[derive(Debug)]
|
/// An S-expression as read from a source file
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
pub enum Expr {
|
pub enum Expr {
|
||||||
Num(f64),
|
Num(f64),
|
||||||
Int(u64),
|
Int(u64),
|
||||||
Char(char),
|
Char(char),
|
||||||
Str(String),
|
Str(String),
|
||||||
Name(String),
|
Name(Vec<String>),
|
||||||
S(Vec<Expr>),
|
S(Vec<Expr>),
|
||||||
Lambda(String, Option<Box<Expr>>, Vec<Expr>),
|
Lambda(String, Option<Box<Expr>>, Vec<Expr>),
|
||||||
Auto(Option<String>, Option<Box<Expr>>, Vec<Expr>),
|
Auto(Option<String>, Option<Box<Expr>>, Vec<Expr>),
|
||||||
Typed(Box<Expr>, Box<Expr>)
|
Typed(Box<Expr>, Box<Expr>)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse a type annotation
|
||||||
fn typed_parser<'a>(
|
fn typed_parser<'a>(
|
||||||
expr: Recursive<'a, char, Expr, Simple<char>>,
|
expr: Recursive<'a, char, Expr, Simple<char>>
|
||||||
ops: &'a [String]
|
|
||||||
) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
|
) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
|
||||||
just(':').ignore_then(expr)
|
just(':').ignore_then(expr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse an expression without a type annotation
|
||||||
fn untyped_xpr_parser<'a>(
|
fn untyped_xpr_parser<'a>(
|
||||||
expr: Recursive<'a, char, Expr, Simple<char>>,
|
expr: Recursive<'a, char, Expr, Simple<char>>,
|
||||||
ops: &'a [String]
|
ops: &[&'a str]
|
||||||
) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
|
) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
|
||||||
let lambda = just('\\')
|
// basic S-expression rule
|
||||||
.ignore_then(name::name_parser(ops))
|
|
||||||
.then(typed_parser(expr.clone(), ops).or_not())
|
|
||||||
.then_ignore(just('.'))
|
|
||||||
.then(expr.clone().repeated().at_least(1))
|
|
||||||
.map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body));
|
|
||||||
let auto = just('@')
|
|
||||||
.ignore_then(name::name_parser(ops).or_not())
|
|
||||||
.then(typed_parser(expr.clone(), ops).or_not())
|
|
||||||
.then_ignore(just('.'))
|
|
||||||
.then(expr.clone().repeated().at_least(1))
|
|
||||||
.map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body));
|
|
||||||
let sexpr = expr.clone()
|
let sexpr = expr.clone()
|
||||||
.repeated()
|
.repeated()
|
||||||
.delimited_by(just('('), just(')'))
|
.delimited_by(just('('), just(')'))
|
||||||
.map(Expr::S);
|
.map(Expr::S);
|
||||||
|
// Blocks
|
||||||
|
// can and therefore do match everything up to the closing paren
|
||||||
|
// \name. body
|
||||||
|
// \name:type. body
|
||||||
|
let lambda = just('\\')
|
||||||
|
.ignore_then(text::ident())
|
||||||
|
.then(typed_parser(expr.clone()).or_not())
|
||||||
|
.then_ignore(just('.'))
|
||||||
|
.then(expr.clone().repeated().at_least(1))
|
||||||
|
.map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body));
|
||||||
|
// @name. body
|
||||||
|
// @name:type. body
|
||||||
|
// @:type. body
|
||||||
|
let auto = just('@')
|
||||||
|
.ignore_then(text::ident().or_not())
|
||||||
|
.then(typed_parser(expr.clone()).or_not())
|
||||||
|
.then_ignore(just('.'))
|
||||||
|
.then(expr.clone().repeated().at_least(1))
|
||||||
|
.map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body));
|
||||||
choice((
|
choice((
|
||||||
|
number::int_parser().map(Expr::Int), // all ints are valid floats so it takes precedence
|
||||||
number::float_parser().map(Expr::Num),
|
number::float_parser().map(Expr::Num),
|
||||||
number::int_parser().map(Expr::Int),
|
|
||||||
string::char_parser().map(Expr::Char),
|
string::char_parser().map(Expr::Char),
|
||||||
string::str_parser().map(Expr::Str),
|
string::str_parser().map(Expr::Str),
|
||||||
name::name_parser(ops).map(Expr::Name),
|
name::name_parser(ops).map(Expr::Name), // includes namespacing
|
||||||
sexpr,
|
sexpr,
|
||||||
lambda,
|
lambda,
|
||||||
auto
|
auto
|
||||||
)).padded()
|
)).padded()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn expression_parser(ops: &[String]) -> impl Parser<char, Expr, Error = Simple<char>> + '_ {
|
/// Parse any expression with a type annotation, surrounded by comments
|
||||||
|
pub fn expression_parser<'a>(ops: &[&'a str]) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
|
||||||
|
// This approach to parsing comments is ugly and error-prone,
|
||||||
|
// but I don't have a lot of other ideas
|
||||||
return recursive(|expr| {
|
return recursive(|expr| {
|
||||||
return misc::comment_parser().or_not().ignore_then(
|
return misc::comment_parser().or_not().ignore_then(
|
||||||
untyped_xpr_parser(expr.clone(), &ops)
|
untyped_xpr_parser(expr.clone(), &ops)
|
||||||
.then(typed_parser(expr, ops).or_not())
|
.then(typed_parser(expr).or_not())
|
||||||
.map(|(val, t)| match t {
|
.map(|(val, t)| match t {
|
||||||
Some(typ) => Expr::Typed(Box::new(val), Box::new(typ)),
|
Some(typ) => Expr::Typed(Box::new(val), Box::new(typ)),
|
||||||
None => val
|
None => val
|
||||||
|
|||||||
@@ -1,32 +1,32 @@
|
|||||||
use chumsky::{Parser, prelude::*, text::Character};
|
use std::iter;
|
||||||
|
|
||||||
|
use chumsky::{Parser, prelude::*};
|
||||||
use super::name;
|
use super::name;
|
||||||
|
|
||||||
enum Import {
|
#[derive(Debug, Clone)]
|
||||||
Name(Vec<String>, String),
|
pub struct Import {
|
||||||
All(Vec<String>)
|
pub path: Vec<String>,
|
||||||
}
|
pub name: Option<String>
|
||||||
|
|
||||||
fn prefix(pre: Vec<String>, im: Import) -> Import {
|
|
||||||
match im {
|
|
||||||
Import::Name(ns, name) => Import::Name(
|
|
||||||
pre.into_iter().chain(ns.into_iter()).collect(),
|
|
||||||
name
|
|
||||||
),
|
|
||||||
Import::All(ns) => Import::All(
|
|
||||||
pre.into_iter().chain(ns.into_iter()).collect()
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
type BoxedStrIter = Box<dyn Iterator<Item = String>>;
|
pub type BoxedStrIter = Box<dyn Iterator<Item = String>>;
|
||||||
type BoxedStrIterIter = Box<dyn Iterator<Item = BoxedStrIter>>;
|
pub type BoxedStrIterIter = Box<dyn Iterator<Item = BoxedStrIter>>;
|
||||||
|
|
||||||
|
/// initialize a Box<dyn Iterator<Item = Box<dyn Iterator<Item = String>>>>
|
||||||
|
/// with a single element.
|
||||||
fn init_table(name: String) -> BoxedStrIterIter {
|
fn init_table(name: String) -> BoxedStrIterIter {
|
||||||
Box::new(vec![Box::new(vec![name].into_iter()) as BoxedStrIter].into_iter())
|
// I'm not confident at all that this is a good approach.
|
||||||
|
Box::new(iter::once(Box::new(iter::once(name)) as BoxedStrIter))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse an import command
|
||||||
|
/// Syntax is same as Rust's `use` except the verb is import, no trailing semi
|
||||||
|
/// and the delimiters are plain parentheses. Namespaces should preferably contain
|
||||||
|
/// crossplatform filename-legal characters but the symbols are explicitly allowed
|
||||||
|
/// to go wild. There's a blacklist in [name]
|
||||||
pub fn import_parser() -> impl Parser<char, Vec<Import>, Error = Simple<char>> {
|
pub fn import_parser() -> impl Parser<char, Vec<Import>, Error = Simple<char>> {
|
||||||
|
// TODO: this algorithm isn't cache friendly, copies a lot and is generally pretty bad.
|
||||||
recursive(|expr: Recursive<char, BoxedStrIterIter, Simple<char>>| {
|
recursive(|expr: Recursive<char, BoxedStrIterIter, Simple<char>>| {
|
||||||
name::modname_parser()
|
name::modname_parser()
|
||||||
.padded()
|
.padded()
|
||||||
@@ -38,8 +38,9 @@ pub fn import_parser() -> impl Parser<char, Vec<Import>, Error = Simple<char>> {
|
|||||||
.separated_by(just(','))
|
.separated_by(just(','))
|
||||||
.delimited_by(just('('), just(')'))
|
.delimited_by(just('('), just(')'))
|
||||||
.map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter),
|
.map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter),
|
||||||
just("*").map(|s| init_table(s.to_string())),
|
// Each expr returns a list of imports, flatten those into a common list
|
||||||
name::modname_parser().map(init_table)
|
just("*").map(|s| init_table(s.to_string())), // Just a *, wrapped
|
||||||
|
name::modname_parser().map(init_table) // Just a name, wrapped
|
||||||
)).padded()
|
)).padded()
|
||||||
).map(|(pre, post)| {
|
).map(|(pre, post)| {
|
||||||
Box::new(post.map(move |el| {
|
Box::new(post.map(move |el| {
|
||||||
@@ -50,8 +51,8 @@ pub fn import_parser() -> impl Parser<char, Vec<Import>, Error = Simple<char>> {
|
|||||||
paths.filter_map(|namespaces| {
|
paths.filter_map(|namespaces| {
|
||||||
let mut path: Vec<String> = namespaces.collect();
|
let mut path: Vec<String> = namespaces.collect();
|
||||||
match path.pop()?.as_str() {
|
match path.pop()?.as_str() {
|
||||||
"*" => Some(Import::All(path)),
|
"*" => Some(Import { path, name: None }),
|
||||||
name => Some(Import::Name(path, name.to_owned()))
|
name => Some(Import { path, name: Some(name.to_owned()) })
|
||||||
}
|
}
|
||||||
}).collect()
|
}).collect()
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
pub use chumsky::{self, prelude::*, Parser};
|
pub use chumsky::{self, prelude::*, Parser};
|
||||||
|
|
||||||
|
/// Parses Lua-style comments
|
||||||
pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
||||||
any().repeated().delimited_by(just("--["), just("]--")).or(
|
any().repeated().delimited_by(just("--["), just("]--")).or(
|
||||||
any().repeated().delimited_by(just("--"), just("\n"))
|
any().repeated().delimited_by(just("--"), just("\n"))
|
||||||
|
|||||||
@@ -5,5 +5,12 @@ mod misc;
|
|||||||
mod import;
|
mod import;
|
||||||
mod name;
|
mod name;
|
||||||
mod substitution;
|
mod substitution;
|
||||||
|
mod sourcefile;
|
||||||
|
|
||||||
|
pub use expression::Expr;
|
||||||
pub use expression::expression_parser;
|
pub use expression::expression_parser;
|
||||||
|
pub use sourcefile::FileEntry;
|
||||||
|
pub use sourcefile::file_parser;
|
||||||
|
pub use sourcefile::imports;
|
||||||
|
pub use sourcefile::exported_names;
|
||||||
|
pub use import::Import;
|
||||||
@@ -1,28 +1,46 @@
|
|||||||
use chumsky::{self, prelude::*, Parser};
|
use chumsky::{self, prelude::*, Parser};
|
||||||
|
|
||||||
fn op_parser_recur<'a, 'b>(ops: &'a [String]) -> BoxedParser<'b, char, String, Simple<char>> {
|
/// Matches any one of the passed operators, longest-first
|
||||||
if ops.len() == 1 { just(ops[0].clone()).boxed() }
|
fn op_parser<'a>(ops: &[&'a str]) -> BoxedParser<'a, char, String, Simple<char>> {
|
||||||
else { just(ops[0].clone()).or(op_parser_recur(&ops[1..])).boxed() }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn op_parser(ops: &[String]) -> BoxedParser<char, String, Simple<char>> {
|
|
||||||
let mut sorted_ops = ops.to_vec();
|
let mut sorted_ops = ops.to_vec();
|
||||||
sorted_ops.sort_by(|a, b| b.len().cmp(&a.len()));
|
sorted_ops.sort_by(|a, b| b.len().cmp(&a.len()));
|
||||||
op_parser_recur(&sorted_ops)
|
sorted_ops.into_iter()
|
||||||
|
.map(|op| just(op.to_string()).boxed())
|
||||||
|
.reduce(|a, b| a.or(b).boxed()).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn modname_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
/// Matches anything that's allowed as an operator
|
||||||
let not_name_char: Vec<char> = vec![':', '\\', '"', '\'', '(', ')', '.'];
|
///
|
||||||
|
/// Blacklist rationale:
|
||||||
|
/// - `:` is used for namespacing and type annotations, both are distinguished from operators
|
||||||
|
/// - `\` and `@` are parametric expression starters
|
||||||
|
/// - `"` and `'` are read as primitives and would never match.
|
||||||
|
/// - `(` and `)` are strictly balanced and this must remain the case for automation and streaming.
|
||||||
|
/// - `.` is the discriminator for parametrics.
|
||||||
|
///
|
||||||
|
/// FIXME: `@name` without a dot should be parsed correctly for overrides. Could be an operator but
|
||||||
|
/// then parametrics should take precedence, which might break stuff. investigate.
|
||||||
|
///
|
||||||
|
/// TODO: `'` could work as an operator whenever it isn't closed. It's common im maths so it's
|
||||||
|
/// worth a try
|
||||||
|
///
|
||||||
|
/// TODO: `.` could possibly be parsed as an operator depending on context. This operator is very
|
||||||
|
/// common in maths so it's worth a try. Investigate.
|
||||||
|
pub fn modname_parser<'a>() -> impl Parser<char, String, Error = Simple<char>> + 'a {
|
||||||
|
let not_name_char: Vec<char> = vec![':', '\\', '@', '"', '\'', '(', ')', '.'];
|
||||||
filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
|
filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
|
||||||
.repeated().at_least(1)
|
.repeated().at_least(1)
|
||||||
.collect()
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn name_parser<'a>(ops: &'a [String]) -> impl Parser<char, String, Error = Simple<char>> + 'a {
|
/// Parse an operator or name. Failing both, parse everything up to the next whitespace or
|
||||||
|
/// blacklisted character as a new operator.
|
||||||
|
pub fn name_parser<'a>(
|
||||||
|
ops: &[&'a str]
|
||||||
|
) -> impl Parser<char, Vec<String>, Error = Simple<char>> + 'a {
|
||||||
choice((
|
choice((
|
||||||
op_parser(ops), // First try to parse a known operator
|
op_parser(ops), // First try to parse a known operator
|
||||||
text::ident(), // Failing that, parse plain text
|
text::ident(), // Failing that, parse plain text
|
||||||
// Finally parse everything until tne next terminal as a new operator
|
modname_parser() // Finally parse everything until tne next terminal as a new operator
|
||||||
modname_parser()
|
)).padded().separated_by(just("::")).padded()
|
||||||
)).padded()
|
|
||||||
}
|
}
|
||||||
@@ -6,6 +6,9 @@ fn assert_not_digit(base: u32, c: char) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse an arbitrarily grouped sequence of digits starting with an underscore.
|
||||||
|
///
|
||||||
|
/// TODO: this should use separated_by and parse the leading group too
|
||||||
fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simple<char>> {
|
fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simple<char>> {
|
||||||
just('_')
|
just('_')
|
||||||
.ignore_then(text::digits(base))
|
.ignore_then(text::digits(base))
|
||||||
@@ -13,6 +16,9 @@ fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simpl
|
|||||||
.map(|sv| sv.iter().map(|s| s.chars()).flatten().collect())
|
.map(|sv| sv.iter().map(|s| s.chars()).flatten().collect())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// parse a grouped uint
|
||||||
|
///
|
||||||
|
/// Not to be confused with [int_parser] which does a lot more
|
||||||
fn uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
fn uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
||||||
text::int(base)
|
text::int(base)
|
||||||
.then(separated_digits_parser(base))
|
.then(separated_digits_parser(base))
|
||||||
@@ -21,6 +27,8 @@ fn uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// parse exponent notation, or return 0 as the default exponent.
|
||||||
|
/// The exponent is always in decimal.
|
||||||
fn pow_parser() -> impl Parser<char, i32, Error = Simple<char>> {
|
fn pow_parser() -> impl Parser<char, i32, Error = Simple<char>> {
|
||||||
return choice((
|
return choice((
|
||||||
just('p')
|
just('p')
|
||||||
@@ -32,6 +40,9 @@ fn pow_parser() -> impl Parser<char, i32, Error = Simple<char>> {
|
|||||||
)).or_else(|_| Ok(0))
|
)).or_else(|_| Ok(0))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// returns a mapper that converts a mantissa and an exponent into an uint
|
||||||
|
///
|
||||||
|
/// TODO it panics if it finds a negative exponent
|
||||||
fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
|
fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
|
||||||
return move |(val, exp)| {
|
return move |(val, exp)| {
|
||||||
if exp == 0 {val}
|
if exp == 0 {val}
|
||||||
@@ -39,6 +50,7 @@ fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// returns a mapper that converts a mantissa and an exponent into a float
|
||||||
fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 {
|
fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 {
|
||||||
return move |(val, exp)| {
|
return move |(val, exp)| {
|
||||||
if exp == 0 {val}
|
if exp == 0 {val}
|
||||||
@@ -46,11 +58,15 @@ fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// parse an uint from exponential notation (panics if 'p' is a digit in base)
|
||||||
fn pow_uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
fn pow_uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
||||||
assert_not_digit(base, 'p');
|
assert_not_digit(base, 'p');
|
||||||
uint_parser(base).then(pow_parser()).map(nat2u(base.into()))
|
uint_parser(base).then(pow_parser()).map(nat2u(base.into()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// parse an uint from a base determined by its prefix or lack thereof
|
||||||
|
///
|
||||||
|
/// Not to be convused with [uint_parser] which is a component of it.
|
||||||
pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
|
pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
|
||||||
choice((
|
choice((
|
||||||
just("0b").ignore_then(pow_uint_parser(2)),
|
just("0b").ignore_then(pow_uint_parser(2)),
|
||||||
@@ -60,6 +76,7 @@ pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// parse a float from dot notation
|
||||||
fn dotted_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
|
fn dotted_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
|
||||||
uint_parser(base)
|
uint_parser(base)
|
||||||
.then_ignore(just('.'))
|
.then_ignore(just('.'))
|
||||||
@@ -73,11 +90,14 @@ fn dotted_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// parse a float from dotted and optionally also exponential notation
|
||||||
fn pow_float_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
|
fn pow_float_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
|
||||||
assert_not_digit(base, 'p');
|
assert_not_digit(base, 'p');
|
||||||
dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
|
dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// parse a float with dotted and optionally exponential notation from a base determined by its
|
||||||
|
/// prefix
|
||||||
pub fn float_parser() -> impl Parser<char, f64, Error = Simple<char>> {
|
pub fn float_parser() -> impl Parser<char, f64, Error = Simple<char>> {
|
||||||
choice((
|
choice((
|
||||||
just("0b").ignore_then(pow_float_parser(2)),
|
just("0b").ignore_then(pow_float_parser(2)),
|
||||||
|
|||||||
107
src/parse/sourcefile.rs
Normal file
107
src/parse/sourcefile.rs
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
|
use std::iter;
|
||||||
|
|
||||||
|
use super::expression::Expr;
|
||||||
|
use super::import;
|
||||||
|
use super::misc;
|
||||||
|
use super::substitution::substitution_parser;
|
||||||
|
use super::substitution::Substitution;
|
||||||
|
use chumsky::{Parser, prelude::*};
|
||||||
|
|
||||||
|
/// Anything we might encounter in a file
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum FileEntry {
|
||||||
|
Import(Vec<import::Import>),
|
||||||
|
Comment(String),
|
||||||
|
Substitution(Substitution),
|
||||||
|
Export(Substitution)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Recursively iterate through all "names" in an expression. It also finds a lot of things that
|
||||||
|
/// aren't names, such as all bound parameters. Generally speaking, this is not a very
|
||||||
|
/// sophisticated search.
|
||||||
|
///
|
||||||
|
/// TODO: find a way to exclude parameters
|
||||||
|
fn find_all_names_recur(expr: &Expr) -> Box<dyn Iterator<Item = &Vec<String>> + '_> {
|
||||||
|
match expr {
|
||||||
|
Expr::Auto(_, typ, body) | Expr::Lambda(_, typ, body) => Box::new(match typ {
|
||||||
|
Some(texp) => find_all_names_recur(texp),
|
||||||
|
None => Box::new(iter::empty())
|
||||||
|
}.chain(body.into_iter().map(find_all_names_recur).flatten())),
|
||||||
|
Expr::S(body) => Box::new(body.into_iter().map(find_all_names_recur).flatten()),
|
||||||
|
Expr::Typed(val, typ) => Box::new(
|
||||||
|
find_all_names_recur(val).chain(find_all_names_recur(typ))
|
||||||
|
),
|
||||||
|
Expr::Name(x) => Box::new(iter::once(x)),
|
||||||
|
_ => Box::new(iter::empty())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collect all names that occur in an expression
|
||||||
|
fn find_all_names(expr: &Expr) -> HashSet<&Vec<String>> {
|
||||||
|
find_all_names_recur(expr).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a file into a list of distinctive entries
|
||||||
|
pub fn file_parser<'a>(
|
||||||
|
pattern_ops: &[&'a str], ops: &[&'a str]
|
||||||
|
) -> impl Parser<char, Vec<FileEntry>, Error = Simple<char>> + 'a {
|
||||||
|
choice((
|
||||||
|
// In case the usercode wants to parse doc
|
||||||
|
misc::comment_parser().map(FileEntry::Comment),
|
||||||
|
import::import_parser().map(FileEntry::Import),
|
||||||
|
text::keyword("export")
|
||||||
|
.ignore_then(substitution_parser(pattern_ops, ops)).map(FileEntry::Export),
|
||||||
|
// This could match almost anything so it has to go last
|
||||||
|
substitution_parser(pattern_ops, ops).map(FileEntry::Substitution)
|
||||||
|
)).padded()
|
||||||
|
.separated_by(just('\n'))
|
||||||
|
.then_ignore(end())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decide if a string can be an operator. Operators can include digits and text, just not at the
|
||||||
|
/// start.
|
||||||
|
fn is_op(s: &str) -> bool {
|
||||||
|
return match s.chars().next() {
|
||||||
|
Some(x) => !x.is_alphanumeric(),
|
||||||
|
None => false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collect all exported names (and a lot of other words) from a file
|
||||||
|
pub fn exported_names(src: &Vec<FileEntry>) -> HashSet<&Vec<String>> {
|
||||||
|
src.iter().filter_map(|ent| match ent {
|
||||||
|
FileEntry::Export(a) => Some(&a.source),
|
||||||
|
_ => None
|
||||||
|
}).map(find_all_names).flatten().collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collect all operators defined in a file (and some other words)
|
||||||
|
fn defined_ops(src: &Vec<FileEntry>, exported_only: bool) -> Vec<&String> {
|
||||||
|
let all_names:HashSet<&Vec<String>> = src.iter().filter_map(|ent| match ent {
|
||||||
|
FileEntry::Substitution(a) => if exported_only {None} else {Some(&a.source)},
|
||||||
|
FileEntry::Export(a) => Some(&a.source),
|
||||||
|
_ => None
|
||||||
|
}).map(find_all_names).flatten().collect();
|
||||||
|
// Dedupe stage of dubious value; collecting into a hashset may take longer than
|
||||||
|
// handling duplicates would with a file of sensible size.
|
||||||
|
all_names.into_iter()
|
||||||
|
.filter_map(|name|
|
||||||
|
// If it's namespaced, it's imported.
|
||||||
|
if name.len() == 1 && is_op(&name[0]) {Some(&name[0])}
|
||||||
|
else {None}
|
||||||
|
).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Collect all operators from a file
|
||||||
|
pub fn all_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, false) }
|
||||||
|
/// Collect exported operators from a file (plus some extra)
|
||||||
|
pub fn exported_ops(src: &Vec<FileEntry>) -> Vec<&String> { defined_ops(src, true) }
|
||||||
|
|
||||||
|
/// Summarize all imports from a file in a single list of qualified names
|
||||||
|
pub fn imports(src: &Vec<FileEntry>) -> Vec<&import::Import> {
|
||||||
|
src.into_iter().filter_map(|ent| match ent {
|
||||||
|
FileEntry::Import(impv) => Some(impv.iter()),
|
||||||
|
_ => None
|
||||||
|
}).flatten().collect()
|
||||||
|
}
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
use chumsky::{self, prelude::*, Parser};
|
use chumsky::{self, prelude::*, Parser};
|
||||||
|
|
||||||
|
/// Parses a text character that is not the specified delimiter
|
||||||
fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
|
fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
|
||||||
|
// Copied directly from Chumsky's JSON example.
|
||||||
let escape = just('\\').ignore_then(
|
let escape = just('\\').ignore_then(
|
||||||
just('\\')
|
just('\\')
|
||||||
.or(just('/'))
|
.or(just('/'))
|
||||||
@@ -27,15 +29,17 @@ fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
|
|||||||
filter(move |&c| c != '\\' && c != delim).or(escape)
|
filter(move |&c| c != '\\' && c != delim).or(escape)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse a character literal between single quotes
|
||||||
pub fn char_parser() -> impl Parser<char, char, Error = Simple<char>> {
|
pub fn char_parser() -> impl Parser<char, char, Error = Simple<char>> {
|
||||||
just('\'').ignore_then(text_parser('\'')).then_ignore(just('\''))
|
just('\'').ignore_then(text_parser('\'')).then_ignore(just('\''))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Parse a string between double quotes
|
||||||
pub fn str_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
pub fn str_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
||||||
just('"')
|
just('"')
|
||||||
.ignore_then(
|
.ignore_then(
|
||||||
text_parser('"').map(Some)
|
text_parser('"').map(Some)
|
||||||
.or(just("\\\n").map(|_| None))
|
.or(just("\\\n").map(|_| None)) // Newlines preceded by backslashes are ignored.
|
||||||
.repeated()
|
.repeated()
|
||||||
).then_ignore(just('"'))
|
).then_ignore(just('"'))
|
||||||
.flatten().collect()
|
.flatten().collect()
|
||||||
|
|||||||
@@ -2,16 +2,28 @@ use chumsky::{self, prelude::*, Parser};
|
|||||||
|
|
||||||
use super::{expression, number::float_parser};
|
use super::{expression, number::float_parser};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
pub struct Substitution {
|
pub struct Substitution {
|
||||||
source: expression::Expr,
|
pub source: expression::Expr,
|
||||||
priority: f64,
|
pub priority: f64,
|
||||||
target: expression::Expr
|
pub target: expression::Expr
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn substitutionParser<'a>(
|
/// Parses a substitution rule of the forms
|
||||||
ops: &'a [String]
|
///
|
||||||
|
/// ```orchid
|
||||||
|
/// main = \x. ...
|
||||||
|
/// $a + $b = (add $a $b)
|
||||||
|
/// (foo bar baz) =1.1=> (foo 1 e)
|
||||||
|
/// reee =2=> shadow_reee
|
||||||
|
/// shadow_reee =0.9=> reee
|
||||||
|
/// ```
|
||||||
|
/// TBD whether this disables reee in the specified range or loops forever
|
||||||
|
pub fn substitution_parser<'a>(
|
||||||
|
pattern_ops: &[&'a str],
|
||||||
|
ops: &[&'a str]
|
||||||
) -> impl Parser<char, Substitution, Error = Simple<char>> + 'a {
|
) -> impl Parser<char, Substitution, Error = Simple<char>> + 'a {
|
||||||
expression::expression_parser(ops)
|
expression::expression_parser(pattern_ops)
|
||||||
.then_ignore(just('='))
|
.then_ignore(just('='))
|
||||||
.then(
|
.then(
|
||||||
float_parser().then_ignore(just("=>"))
|
float_parser().then_ignore(just("=>"))
|
||||||
|
|||||||
53
src/project/mod.rs
Normal file
53
src/project/mod.rs
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
mod resolve_names;
|
||||||
|
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Project {
|
||||||
|
pub modules: HashMap<Vec<String>, Module>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Export {
|
||||||
|
isSymbol: bool,
|
||||||
|
subpaths: HashMap<String, Export>
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Module {
|
||||||
|
pub substitutions: Vec<Substitution>,
|
||||||
|
pub exports: HashMap<String, Export>,
|
||||||
|
pub all_ops: Vec<String>
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Substitution {
|
||||||
|
pub source: Expr,
|
||||||
|
pub priority: f64,
|
||||||
|
pub target: Expr
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum Literal {
|
||||||
|
Num(f64),
|
||||||
|
Int(u64),
|
||||||
|
Char(char),
|
||||||
|
Str(String),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum Token {
|
||||||
|
Literal(Literal),
|
||||||
|
Name(String),
|
||||||
|
Bound,
|
||||||
|
S(Vec<Expr>),
|
||||||
|
Lambda(Vec<Vec<usize>>, Option<Box<Expr>>, Vec<Expr>),
|
||||||
|
Auto(Option<Vec<Vec<usize>>>, Option<Box<Expr>>, Vec<Expr>)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Expr {
|
||||||
|
pub token: Token,
|
||||||
|
pub typ: Box<Expr>
|
||||||
|
}
|
||||||
87
src/project/resolve_names.rs
Normal file
87
src/project/resolve_names.rs
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use chumsky::{Parser, prelude::Simple};
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
use crate::parse::{self, file_parser, exported_names, FileEntry};
|
||||||
|
use crate::utils::Cache;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum Loaded {
|
||||||
|
Module(String),
|
||||||
|
Namespace(Vec<String>)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum ParseError {
|
||||||
|
#[error("Not found: {0}")]
|
||||||
|
NotFound(String),
|
||||||
|
#[error("Failed to parse {file}: {errors:?}")]
|
||||||
|
Syntax {
|
||||||
|
file: String,
|
||||||
|
errors: Vec<Simple<char>>
|
||||||
|
},
|
||||||
|
#[error("Expected {0}, found {1}")]
|
||||||
|
Mismatch(String, String),
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ParseError {
|
||||||
|
pub fn not_found(name: &str) -> ParseError { ParseError::NotFound(name.to_string()) }
|
||||||
|
pub fn syntax(file: &str, errors: Vec<Simple<char>>) -> ParseError {
|
||||||
|
ParseError::Syntax { file: file.to_string(), errors }
|
||||||
|
}
|
||||||
|
pub fn mismatch(expected: &str, found: &str) -> ParseError {
|
||||||
|
ParseError::Mismatch(expected.to_string(), found.to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Loading a module:
|
||||||
|
// 1. [X] Parse the imports
|
||||||
|
// 2. [ ] Build a mapping of all imported symbols to full paths
|
||||||
|
// -> [X] Parse the exported symbols from all imported modules
|
||||||
|
// 3. [ ] Parse everything using the full list of operators
|
||||||
|
// 4. [ ] Traverse and remap elements
|
||||||
|
|
||||||
|
pub fn load_project<F>(
|
||||||
|
mut load_mod: F, prelude: &[&str], entry: &str
|
||||||
|
) -> Result<super::Project, ParseError>
|
||||||
|
where F: FnMut(&[&str]) -> Option<Loaded> {
|
||||||
|
let preparser = file_parser(prelude, &[]);
|
||||||
|
let mut loaded = Cache::new(|path: &[&str]| load_mod(path));
|
||||||
|
let mut preparsed = Cache::new(|path: &[&str]| {
|
||||||
|
loaded.get(path).as_ref().map(|loaded| match loaded {
|
||||||
|
Loaded::Module(source) => Some(preparser.parse(source.as_str()).ok()?),
|
||||||
|
_ => return None
|
||||||
|
}).flatten()
|
||||||
|
});
|
||||||
|
let exports = Cache::new(|path: &[&str]| loaded.get(path).map(|data| {
|
||||||
|
match data {
|
||||||
|
Loaded::Namespace(names) => Some(names),
|
||||||
|
Loaded::Module(source) => preparsed.get(path).map(|data| {
|
||||||
|
exported_names(&data).into_iter().map(|n| n[0]).collect()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}).flatten());
|
||||||
|
let imports = Cache::new(|path: &[&str]| preparsed.get(path).map(|data| {
|
||||||
|
data.iter().filter_map(|ent| match ent {
|
||||||
|
FileEntry::Import(imp) => Some(imp),
|
||||||
|
_ => None
|
||||||
|
}).flatten().collect::<Vec<_>>()
|
||||||
|
}));
|
||||||
|
// let main = preparsed.get(&[entry]);
|
||||||
|
// for imp in parse::imports(main) {
|
||||||
|
// if !modules.contains_key(&imp.path) {
|
||||||
|
// if modules[&imp.path]
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// let mut project = super::Project {
|
||||||
|
// modules: HashMap::new()
|
||||||
|
// };
|
||||||
|
|
||||||
|
|
||||||
|
// Some(project)
|
||||||
|
todo!("Finish this function")
|
||||||
|
}
|
||||||
25
src/utils/cache.rs
Normal file
25
src/utils/cache.rs
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
use std::{collections::HashMap, hash::Hash};
|
||||||
|
|
||||||
|
/// Cache the return values of an effectless closure in a hashmap
|
||||||
|
/// Inspired by the closure_cacher crate.
|
||||||
|
pub struct Cache<I, O, F> where F: FnMut(I) -> O {
|
||||||
|
store: HashMap<I, O>,
|
||||||
|
closure: F
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I, O, F> Cache<I, O, F>
|
||||||
|
where
|
||||||
|
F: FnMut(I) -> O,
|
||||||
|
I: Eq + Hash + Copy
|
||||||
|
{
|
||||||
|
pub fn new(closure: F) -> Self { Self { store: HashMap::new(), closure } }
|
||||||
|
pub fn get(&mut self, i: I) -> &O {
|
||||||
|
// I copied it because I might need `drop` and I prefer `I` to be unconstrained.
|
||||||
|
let closure = &mut self.closure;
|
||||||
|
self.store.entry(i).or_insert_with(|| closure(i))
|
||||||
|
}
|
||||||
|
/// Forget the output for the given input
|
||||||
|
pub fn drop(&mut self, i: &I) -> bool {
|
||||||
|
self.store.remove(i).is_some()
|
||||||
|
}
|
||||||
|
}
|
||||||
2
src/utils/mod.rs
Normal file
2
src/utils/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
mod cache;
|
||||||
|
pub use cache::Cache;
|
||||||
Reference in New Issue
Block a user