Bunch of improvements
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
use std::io::{self, Read};
|
||||
|
||||
use chumsky::Parser;
|
||||
use chumsky::{Parser, prelude::*};
|
||||
|
||||
mod parse;
|
||||
|
||||
@@ -8,6 +8,7 @@ fn main() {
|
||||
let mut input = String::new();
|
||||
let mut stdin = io::stdin();
|
||||
stdin.read_to_string(&mut input).unwrap();
|
||||
let output = parse::parser().parse(input);
|
||||
let ops: Vec<String> = vec!["$", "."].iter().map(|&s| s.to_string()).collect();
|
||||
let output = parse::expression_parser(&ops).then_ignore(end()).parse(input);
|
||||
println!("\nParsed:\n{:?}", output);
|
||||
}
|
||||
|
||||
143
src/parse.rs
143
src/parse.rs
@@ -1,143 +0,0 @@
|
||||
use std::fmt::Debug;
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Expr {
|
||||
Num(f64),
|
||||
Int(u64),
|
||||
Char(char),
|
||||
Str(String),
|
||||
Name(String),
|
||||
S(Vec<Expr>),
|
||||
Lambda(String, Vec<Expr>)
|
||||
}
|
||||
|
||||
fn uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
text::int(base).map(move |s: String| u64::from_str_radix(&s, base).unwrap())
|
||||
}
|
||||
|
||||
fn e_parser() -> impl Parser<char, i32, Error = Simple<char>> {
|
||||
return choice((
|
||||
just('e')
|
||||
.ignore_then(text::int(10))
|
||||
.map(|s: String| s.parse().unwrap()),
|
||||
just("e-")
|
||||
.ignore_then(text::int(10))
|
||||
.map(|s: String| -s.parse::<i32>().unwrap()),
|
||||
empty().map(|()| 0)
|
||||
))
|
||||
}
|
||||
|
||||
fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
|
||||
return move |(val, exp)| {
|
||||
if exp == 0 {val}
|
||||
else {val * base.checked_pow(exp.try_into().unwrap()).unwrap()}
|
||||
};
|
||||
}
|
||||
|
||||
fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 {
|
||||
return move |(val, exp)| {
|
||||
if exp == 0 {val}
|
||||
else {val * (base as f64).powf(exp.try_into().unwrap())}
|
||||
}
|
||||
}
|
||||
|
||||
fn e_uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
if base > 14 {panic!("exponential in base that uses the digit 'e' is ambiguous")}
|
||||
uint_parser(base).then(e_parser()).map(nat2u(base.into()))
|
||||
}
|
||||
|
||||
fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
choice((
|
||||
just("0b").ignore_then(e_uint_parser(2)),
|
||||
just("0x").ignore_then(uint_parser(16)),
|
||||
just('0').ignore_then(e_uint_parser(8)),
|
||||
e_uint_parser(10), // Dec has no prefix
|
||||
))
|
||||
}
|
||||
|
||||
fn dotted_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
|
||||
uint_parser(base)
|
||||
.then_ignore(just('.'))
|
||||
.then(text::digits(base))
|
||||
.map(move |(wh, frac)| {
|
||||
let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
|
||||
let dexp = base.pow(frac.len().try_into().unwrap());
|
||||
wh as f64 + (frac_num / dexp as f64)
|
||||
})
|
||||
}
|
||||
|
||||
fn e_float_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
|
||||
if base > 14 {panic!("exponential in base that uses the digit 'e' is ambiguous")}
|
||||
dotted_parser(base).then(e_parser()).map(nat2f(base.into()))
|
||||
}
|
||||
|
||||
fn float_parser() -> impl Parser<char, f64, Error = Simple<char>> {
|
||||
choice((
|
||||
just("0b").ignore_then(e_float_parser(2)),
|
||||
just("0x").ignore_then(dotted_parser(16)),
|
||||
just('0').ignore_then(e_float_parser(8)),
|
||||
e_float_parser(10),
|
||||
))
|
||||
}
|
||||
|
||||
fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
|
||||
let escape = just('\\').ignore_then(
|
||||
just('\\')
|
||||
.or(just('/'))
|
||||
.or(just('"'))
|
||||
.or(just('b').to('\x08'))
|
||||
.or(just('f').to('\x0C'))
|
||||
.or(just('n').to('\n'))
|
||||
.or(just('r').to('\r'))
|
||||
.or(just('t').to('\t'))
|
||||
.or(just('u').ignore_then(
|
||||
filter(|c: &char| c.is_digit(16))
|
||||
.repeated()
|
||||
.exactly(4)
|
||||
.collect::<String>()
|
||||
.validate(|digits, span, emit| {
|
||||
char::from_u32(u32::from_str_radix(&digits, 16).unwrap())
|
||||
.unwrap_or_else(|| {
|
||||
emit(Simple::custom(span, "invalid unicode character"));
|
||||
'\u{FFFD}' // unicode replacement character
|
||||
})
|
||||
}),
|
||||
)),
|
||||
);
|
||||
filter(move |&c| c != '\\' && c != delim).or(escape)
|
||||
}
|
||||
|
||||
fn char_parser() -> impl Parser<char, char, Error = Simple<char>> {
|
||||
just('\'').ignore_then(text_parser('\'')).then_ignore(just('\''))
|
||||
}
|
||||
|
||||
fn str_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
||||
just('"')
|
||||
.ignore_then(text_parser('"').repeated())
|
||||
.then_ignore(just('"'))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn parser() -> impl Parser<char, Expr, Error = Simple<char>> {
|
||||
return recursive(|expr| {
|
||||
let lambda = just('\\')
|
||||
.ignore_then(text::ident())
|
||||
.then_ignore(just('.'))
|
||||
.then(expr.clone().repeated().at_least(1))
|
||||
.map(|(name, body)| Expr::Lambda(name, body));
|
||||
let sexpr = expr.clone()
|
||||
.repeated()
|
||||
.delimited_by(just('('), just(')'))
|
||||
.map(Expr::S);
|
||||
choice((
|
||||
float_parser().map(Expr::Num),
|
||||
int_parser().map(Expr::Int),
|
||||
char_parser().map(Expr::Char),
|
||||
str_parser().map(Expr::Str),
|
||||
text::ident().map(Expr::Name),
|
||||
sexpr,
|
||||
lambda
|
||||
)).padded()
|
||||
}).then_ignore(end())
|
||||
}
|
||||
72
src/parse/expression.rs
Normal file
72
src/parse/expression.rs
Normal file
@@ -0,0 +1,72 @@
|
||||
use std::{fmt::Debug};
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
use super::string;
|
||||
use super::number;
|
||||
use super::misc;
|
||||
use super::name;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Expr {
|
||||
Num(f64),
|
||||
Int(u64),
|
||||
Char(char),
|
||||
Str(String),
|
||||
Name(String),
|
||||
S(Vec<Expr>),
|
||||
Lambda(String, Option<Box<Expr>>, Vec<Expr>),
|
||||
Auto(Option<String>, Option<Box<Expr>>, Vec<Expr>),
|
||||
Typed(Box<Expr>, Box<Expr>)
|
||||
}
|
||||
|
||||
fn typed_parser<'a>(
|
||||
expr: Recursive<'a, char, Expr, Simple<char>>,
|
||||
ops: &'a [String]
|
||||
) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
|
||||
just(':').ignore_then(expr)
|
||||
}
|
||||
|
||||
fn untyped_xpr_parser<'a>(
|
||||
expr: Recursive<'a, char, Expr, Simple<char>>,
|
||||
ops: &'a [String]
|
||||
) -> impl Parser<char, Expr, Error = Simple<char>> + 'a {
|
||||
let lambda = just('\\')
|
||||
.ignore_then(name::name_parser(ops))
|
||||
.then(typed_parser(expr.clone(), ops).or_not())
|
||||
.then_ignore(just('.'))
|
||||
.then(expr.clone().repeated().at_least(1))
|
||||
.map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body));
|
||||
let auto = just('@')
|
||||
.ignore_then(name::name_parser(ops).or_not())
|
||||
.then(typed_parser(expr.clone(), ops).or_not())
|
||||
.then_ignore(just('.'))
|
||||
.then(expr.clone().repeated().at_least(1))
|
||||
.map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body));
|
||||
let sexpr = expr.clone()
|
||||
.repeated()
|
||||
.delimited_by(just('('), just(')'))
|
||||
.map(Expr::S);
|
||||
choice((
|
||||
number::float_parser().map(Expr::Num),
|
||||
number::int_parser().map(Expr::Int),
|
||||
string::char_parser().map(Expr::Char),
|
||||
string::str_parser().map(Expr::Str),
|
||||
name::name_parser(ops).map(Expr::Name),
|
||||
sexpr,
|
||||
lambda,
|
||||
auto
|
||||
)).padded()
|
||||
}
|
||||
|
||||
pub fn expression_parser(ops: &[String]) -> impl Parser<char, Expr, Error = Simple<char>> + '_ {
|
||||
return recursive(|expr| {
|
||||
return misc::comment_parser().or_not().ignore_then(
|
||||
untyped_xpr_parser(expr.clone(), &ops)
|
||||
.then(typed_parser(expr, ops).or_not())
|
||||
.map(|(val, t)| match t {
|
||||
Some(typ) => Expr::Typed(Box::new(val), Box::new(typ)),
|
||||
None => val
|
||||
})
|
||||
).then_ignore(misc::comment_parser().or_not())
|
||||
})
|
||||
}
|
||||
58
src/parse/import.rs
Normal file
58
src/parse/import.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
use chumsky::{Parser, prelude::*, text::Character};
|
||||
use super::name;
|
||||
|
||||
enum Import {
|
||||
Name(Vec<String>, String),
|
||||
All(Vec<String>)
|
||||
}
|
||||
|
||||
fn prefix(pre: Vec<String>, im: Import) -> Import {
|
||||
match im {
|
||||
Import::Name(ns, name) => Import::Name(
|
||||
pre.into_iter().chain(ns.into_iter()).collect(),
|
||||
name
|
||||
),
|
||||
Import::All(ns) => Import::All(
|
||||
pre.into_iter().chain(ns.into_iter()).collect()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
type BoxedStrIter = Box<dyn Iterator<Item = String>>;
|
||||
type BoxedStrIterIter = Box<dyn Iterator<Item = BoxedStrIter>>;
|
||||
|
||||
fn init_table(name: String) -> BoxedStrIterIter {
|
||||
Box::new(vec![Box::new(vec![name].into_iter()) as BoxedStrIter].into_iter())
|
||||
}
|
||||
|
||||
pub fn import_parser() -> impl Parser<char, Vec<Import>, Error = Simple<char>> {
|
||||
recursive(|expr: Recursive<char, BoxedStrIterIter, Simple<char>>| {
|
||||
name::modname_parser()
|
||||
.padded()
|
||||
.then_ignore(just("::"))
|
||||
.repeated()
|
||||
.then(
|
||||
choice((
|
||||
expr.clone()
|
||||
.separated_by(just(','))
|
||||
.delimited_by(just('('), just(')'))
|
||||
.map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter),
|
||||
just("*").map(|s| init_table(s.to_string())),
|
||||
name::modname_parser().map(init_table)
|
||||
)).padded()
|
||||
).map(|(pre, post)| {
|
||||
Box::new(post.map(move |el| {
|
||||
Box::new(pre.clone().into_iter().chain(el)) as BoxedStrIter
|
||||
})) as BoxedStrIterIter
|
||||
})
|
||||
}).padded().map(|paths| {
|
||||
paths.filter_map(|namespaces| {
|
||||
let mut path: Vec<String> = namespaces.collect();
|
||||
match path.pop()?.as_str() {
|
||||
"*" => Some(Import::All(path)),
|
||||
name => Some(Import::Name(path, name.to_owned()))
|
||||
}
|
||||
}).collect()
|
||||
})
|
||||
}
|
||||
7
src/parse/misc.rs
Normal file
7
src/parse/misc.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
pub use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
pub fn comment_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
||||
any().repeated().delimited_by(just("--["), just("]--")).or(
|
||||
any().repeated().delimited_by(just("--"), just("\n"))
|
||||
).map(|vc| vc.iter().collect()).padded()
|
||||
}
|
||||
9
src/parse/mod.rs
Normal file
9
src/parse/mod.rs
Normal file
@@ -0,0 +1,9 @@
|
||||
mod expression;
|
||||
mod string;
|
||||
mod number;
|
||||
mod misc;
|
||||
mod import;
|
||||
mod name;
|
||||
mod substitution;
|
||||
|
||||
pub use expression::expression_parser;
|
||||
28
src/parse/name.rs
Normal file
28
src/parse/name.rs
Normal file
@@ -0,0 +1,28 @@
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
fn op_parser_recur<'a, 'b>(ops: &'a [String]) -> BoxedParser<'b, char, String, Simple<char>> {
|
||||
if ops.len() == 1 { just(ops[0].clone()).boxed() }
|
||||
else { just(ops[0].clone()).or(op_parser_recur(&ops[1..])).boxed() }
|
||||
}
|
||||
|
||||
fn op_parser(ops: &[String]) -> BoxedParser<char, String, Simple<char>> {
|
||||
let mut sorted_ops = ops.to_vec();
|
||||
sorted_ops.sort_by(|a, b| b.len().cmp(&a.len()));
|
||||
op_parser_recur(&sorted_ops)
|
||||
}
|
||||
|
||||
pub fn modname_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
||||
let not_name_char: Vec<char> = vec![':', '\\', '"', '\'', '(', ')', '.'];
|
||||
filter(move |c| !not_name_char.contains(c) && !c.is_whitespace())
|
||||
.repeated().at_least(1)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn name_parser<'a>(ops: &'a [String]) -> impl Parser<char, String, Error = Simple<char>> + 'a {
|
||||
choice((
|
||||
op_parser(ops), // First try to parse a known operator
|
||||
text::ident(), // Failing that, parse plain text
|
||||
// Finally parse everything until tne next terminal as a new operator
|
||||
modname_parser()
|
||||
)).padded()
|
||||
}
|
||||
88
src/parse/number.rs
Normal file
88
src/parse/number.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
fn assert_not_digit(base: u32, c: char) {
|
||||
if base > (10 + (c as u32 - 'a' as u32)) {
|
||||
panic!("The character '{}' is a digit in base ({})", c, base)
|
||||
}
|
||||
}
|
||||
|
||||
fn separated_digits_parser(base: u32) -> impl Parser<char, String, Error = Simple<char>> {
|
||||
just('_')
|
||||
.ignore_then(text::digits(base))
|
||||
.repeated()
|
||||
.map(|sv| sv.iter().map(|s| s.chars()).flatten().collect())
|
||||
}
|
||||
|
||||
fn uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
text::int(base)
|
||||
.then(separated_digits_parser(base))
|
||||
.map(move |(s1, s2): (String, String)| {
|
||||
u64::from_str_radix(&(s1 + &s2), base).unwrap()
|
||||
})
|
||||
}
|
||||
|
||||
fn pow_parser() -> impl Parser<char, i32, Error = Simple<char>> {
|
||||
return choice((
|
||||
just('p')
|
||||
.ignore_then(text::int(10))
|
||||
.map(|s: String| s.parse().unwrap()),
|
||||
just("p-")
|
||||
.ignore_then(text::int(10))
|
||||
.map(|s: String| -s.parse::<i32>().unwrap()),
|
||||
)).or_else(|_| Ok(0))
|
||||
}
|
||||
|
||||
fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 {
|
||||
return move |(val, exp)| {
|
||||
if exp == 0 {val}
|
||||
else {val * base.checked_pow(exp.try_into().unwrap()).unwrap()}
|
||||
};
|
||||
}
|
||||
|
||||
fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 {
|
||||
return move |(val, exp)| {
|
||||
if exp == 0 {val}
|
||||
else {val * (base as f64).powf(exp.try_into().unwrap())}
|
||||
}
|
||||
}
|
||||
|
||||
fn pow_uint_parser(base: u32) -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
assert_not_digit(base, 'p');
|
||||
uint_parser(base).then(pow_parser()).map(nat2u(base.into()))
|
||||
}
|
||||
|
||||
pub fn int_parser() -> impl Parser<char, u64, Error = Simple<char>> {
|
||||
choice((
|
||||
just("0b").ignore_then(pow_uint_parser(2)),
|
||||
just("0x").ignore_then(pow_uint_parser(16)),
|
||||
just('0').ignore_then(pow_uint_parser(8)),
|
||||
pow_uint_parser(10), // Dec has no prefix
|
||||
))
|
||||
}
|
||||
|
||||
fn dotted_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
|
||||
uint_parser(base)
|
||||
.then_ignore(just('.'))
|
||||
.then(
|
||||
text::digits(base).then(separated_digits_parser(base))
|
||||
).map(move |(wh, (frac1, frac2))| {
|
||||
let frac = frac1 + &frac2;
|
||||
let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64;
|
||||
let dexp = base.pow(frac.len().try_into().unwrap());
|
||||
wh as f64 + (frac_num / dexp as f64)
|
||||
})
|
||||
}
|
||||
|
||||
fn pow_float_parser(base: u32) -> impl Parser<char, f64, Error = Simple<char>> {
|
||||
assert_not_digit(base, 'p');
|
||||
dotted_parser(base).then(pow_parser()).map(nat2f(base.into()))
|
||||
}
|
||||
|
||||
pub fn float_parser() -> impl Parser<char, f64, Error = Simple<char>> {
|
||||
choice((
|
||||
just("0b").ignore_then(pow_float_parser(2)),
|
||||
just("0x").ignore_then(pow_float_parser(16)),
|
||||
just('0').ignore_then(pow_float_parser(8)),
|
||||
pow_float_parser(10),
|
||||
))
|
||||
}
|
||||
42
src/parse/string.rs
Normal file
42
src/parse/string.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
fn text_parser(delim: char) -> impl Parser<char, char, Error = Simple<char>> {
|
||||
let escape = just('\\').ignore_then(
|
||||
just('\\')
|
||||
.or(just('/'))
|
||||
.or(just('"'))
|
||||
.or(just('b').to('\x08'))
|
||||
.or(just('f').to('\x0C'))
|
||||
.or(just('n').to('\n'))
|
||||
.or(just('r').to('\r'))
|
||||
.or(just('t').to('\t'))
|
||||
.or(just('u').ignore_then(
|
||||
filter(|c: &char| c.is_digit(16))
|
||||
.repeated()
|
||||
.exactly(4)
|
||||
.collect::<String>()
|
||||
.validate(|digits, span, emit| {
|
||||
char::from_u32(u32::from_str_radix(&digits, 16).unwrap())
|
||||
.unwrap_or_else(|| {
|
||||
emit(Simple::custom(span, "invalid unicode character"));
|
||||
'\u{FFFD}' // unicode replacement character
|
||||
})
|
||||
}),
|
||||
)),
|
||||
);
|
||||
filter(move |&c| c != '\\' && c != delim).or(escape)
|
||||
}
|
||||
|
||||
pub fn char_parser() -> impl Parser<char, char, Error = Simple<char>> {
|
||||
just('\'').ignore_then(text_parser('\'')).then_ignore(just('\''))
|
||||
}
|
||||
|
||||
pub fn str_parser() -> impl Parser<char, String, Error = Simple<char>> {
|
||||
just('"')
|
||||
.ignore_then(
|
||||
text_parser('"').map(Some)
|
||||
.or(just("\\\n").map(|_| None))
|
||||
.repeated()
|
||||
).then_ignore(just('"'))
|
||||
.flatten().collect()
|
||||
}
|
||||
21
src/parse/substitution.rs
Normal file
21
src/parse/substitution.rs
Normal file
@@ -0,0 +1,21 @@
|
||||
use chumsky::{self, prelude::*, Parser};
|
||||
|
||||
use super::{expression, number::float_parser};
|
||||
|
||||
pub struct Substitution {
|
||||
source: expression::Expr,
|
||||
priority: f64,
|
||||
target: expression::Expr
|
||||
}
|
||||
|
||||
pub fn substitutionParser<'a>(
|
||||
ops: &'a [String]
|
||||
) -> impl Parser<char, Substitution, Error = Simple<char>> + 'a {
|
||||
expression::expression_parser(ops)
|
||||
.then_ignore(just('='))
|
||||
.then(
|
||||
float_parser().then_ignore(just("=>"))
|
||||
.or_not().map(|prio| prio.unwrap_or(0.0))
|
||||
).then(expression::expression_parser(ops))
|
||||
.map(|((source, priority), target)| Substitution { source, priority, target })
|
||||
}
|
||||
Reference in New Issue
Block a user