diff --git a/Cargo.lock b/Cargo.lock index 0a2bd0d..ba42476 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -54,6 +54,17 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "getrandom" version = "0.2.6" @@ -82,6 +93,7 @@ name = "orchid" version = "0.1.0" dependencies = [ "chumsky", + "derivative", "thiserror", ] diff --git a/Cargo.toml b/Cargo.toml index 54f3296..90e40b3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,4 +7,5 @@ edition = "2021" [dependencies] thiserror = "1.0" -chumsky = "0.8" \ No newline at end of file +chumsky = "0.8" +derivative = "2.2" \ No newline at end of file diff --git a/README.md b/README.md index 7cea890..034f194 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,274 @@ -Orchid will be a functional language with a powerful macro language and -optimizer. Further explanation and demos coming soon! \ No newline at end of file +Orchid will be a compiled functional language with a powerful macro +language and optimizer. + +# Examples + +Hello World in Orchid +```orchid +import std::io::(println, out) + +main = println out "Hello World!" +``` + +Basic command line calculator +```orchid +import std::io::(readln, printf, in, out) + +main = ( + readln in >>= int |> \a. + readln in >>= \op. + readln in >>= int |> \b. + printf out "the result is {}\n", [match op ( + "+" => a + b, + "-" => a - b, + "*" => a * b, + "/" => a / b + )] +) +``` + +Grep +```orchid +import std::io::(readln, println, in, out, getarg) + +main = loop \r. ( + readln in >>= \line. + if (substring (getarg 1) line) + then (println out ln >>= r) + else r +) +``` + +Filter through an arbitrary collection +```orchid +filter = @C:Type -> Type. @:Map C. @T. @U. \f:T -> U. \coll:C T. ( + coll >> \el. if (f el) then (Some el) else Nil +):(C U) +``` + +# Explanation + +This explanation is not a tutorial. It follows a constructive order, +gradually introducing language features to better demonstrate their +purpose. It also assumes that the reader is familiar with functional +programming. + +## Lambda calculus recap + +The language is almost entirely based on lambda calculus, so everything +is immutable and evaluation is lazy. The following is an anonymous +function that takes an integer argument and multiplies it by 2: + +```orchid +\x:int. imul 2 x +``` + +Multiple parameters are represented using currying, so the above is +equivalent to + +```orchid +imul 2 +``` + +Recursion is accomplished using the Y combinator (called `loop`), which +is a function that takes a function as its single parameter and applies +it to itself. A naiive implementation of `imul` might look like this. + +```orchid +\a:int.\b:int. loop \r. (\i. + ifthenelse (ieq i 0) + b + (iadd b (r (isub i 1)) +) a +``` + +`ifthenelse` takes a boolean as its first parameter and selects one of the +following two expressions (of identical type) accordingly. `ieq`, `iadd` +and `isub` are self explanatory. + +## Auto parameters (generics, polymorphism) + +Although I didin't specify the type of `i` in the above example, it is +known at compile time because the recursion is applied to b which is an +integer. I could have omitted the second argument but then I would have +had to specify `i`'s type as an integer, because for plain lambda +expressions all types have to be statically knoqn at compile time. To +achieve polymorphism, one parametric tool is available, called auto +parameters. An auto parameter is a placeholder filled in during +compilation, syntactically remarkably similar to lambda expressions: + +```orchid +@T. --[ body of expression referencing T ]-- +``` + +Autos have two closely related uses. First, they are used to represent +generic type parameters. If an auto is used as the type of an argument +or some other subexpression that can be trivially deduced from the calling +context, it is filled in. + +The second usage of autos is for constraints, if they have a type that +references another auto. Because these parameters are filled in by the +compiler, referencing them is equivalent to the statement that a default +value assignable to the specified type exists. Default values are declared +explicitly and identified by their type, where that type itself may be +parametric and may specify its own constraints which are resolved +recursively. If the referenced default is itself a useful value or +function you can give it a name and use it as such, but you can also omit +the name, using the default as a hint to the compiler to be able to call +functions that also have defaults of the same types, or possibly other +types whose defaults have implmentations based on your defaults. + +For a demonstration, here's a sample implementation of the Option monad. +```orchid +--[[ The definition of Monad ]]-- +Bind = \M:Type -> Type. @T -> @U -> (T -> M U) -> M T -> M U +Return = \M:Type -> Type. @T -> T -> M T +Monad = \M:Type -> Type. ( + @:Bind M. + @:Return M. + 0 --[ Note that empty expressions are forbidden so those that exist + purely for their constraints should return a nondescript constant + that is likely to raise a type error when used by mistake, such as + zero ]-- +) + +--[[ The definition of Option ]]-- +export Option = \T:Type. @U -> U -> (T -> U) -> U +--[ Constructors ]-- +export Some = @T. \data:T. ( \default. \map. map data ):(Option T) +export None = @T. ( \default. \map. default ):(Option T) +--[ Implement Monad ]-- +default returnOption = Some:(Return Option) +default bindOption = ( @T:Type. @U:Type. + \f:T -> U. \opt:Option T. opt None f +):(Bind Option) +--[ Sample function that works on unknown monad to demonstrate HKTs. + Turns (Option (M T)) into (M (Option T)), "raising" the unknown monad + out of the Option ]-- +export raise = @M:Type -> Type. @T:Type. @:Monad M. \opt:Option (M T). ( + opt (return None) (\m. bind m (\x. Some x)) +):(M (Option T)) +``` + +Defaults may be defined in any module that also defines at least one of +the types in the definition, which includes both the type of the +expression and the types of its auto parameters. They always have a name, +which can be used to override known defaults with which your definiton +may overlap. For example, if addition is defined elementwise for all +applicative functors, the author of List might want for concatenation to +take precedence in the case where all element types match. Notice how +Add has three arguments, two are the types of the operands and one is +the result: + +```orchid +default concatListAdd replacing applicativeAdd = @T. ( + ... +):(Add (List T) (List T) (List T)) +``` + +For completeness' sake, the original definition might look like this: + +```orchid +default elementwiseAdd = @C:Type -> Type. @T. @U. @V. @:(Applicative C). @:(Add T U V). ( + ... +):(Add (C T) (C U) (C V)) +``` + +With the use of autos, here's what the recursive multiplication +implementation looks like: + +```orchid +default iterativeMultiply = @T. @:(Add T T T). ( + \a:int.\b:T. loop \r. (\i. + ifthenelse (ieq i 0) + b + (add b (r (isub i 1)) -- notice how iadd is now add + ) a +):(Multiply T int T) +``` + +This could then be applied to any type that's closed over addition + +```orchid +aroundTheWorldLyrics = ( + mult 18 (add (mult 4 "Around the World\n") "\n") +) +``` + +## Preprocessor + +The above code samples have one notable difference from the Examples +section above; they're ugly and hard to read. The solution to this is a +powerful preprocessor which is used internally to define all sorts of +syntax sugar from operators to complex syntax patterns and even pattern +matching, and can also be used to define custom syntax. The preprocessor +executes substitution rules on the S-tree which have a real numbered +priority and an internal order of resolution. + +In the following example, seq matches a list of arbitrary tokens and its +parameter is the order of resolution. The order can be used for example to +make sure that `if a then b else if c then d else e` becomes +`(ifthenelse a b (ifthenelse c d e))` and not +`(ifthenelse a b if) c then d else e`. It's worth highlighting here that +preprocessing works on the typeless AST and matchers are constructed +using inclusion rather than exclusion, so it would not be possible to +selectively allow the above example without enforcing that if-statements +are searched back-to-front. If order is still a problem, you can always +parenthesize problematic expressions. + +```orchid +(...$pre:(seq 2) if $1 then $2 else $3 ...$post:(seq 1)) =2=> ( + ...$pre + (ifthenelse $1 $2 $3) + ...$post +) +$a + $b =10=> (add $a $b) +$a == $b =5=> (eq $a $b) +$a - $b =10=> (sub $a $b) +``` + +The recursive addition function now looks like this + +```orchid +default iterativeMultiply = @T. @:(Add T T T). ( + \a:int.\b:T. loop \r. (\i. + if (i == 0) then b + else (b + (r (i - 1))) + ) a +):(Multiply T int T) +``` + +### Traversal using carriages + +While it may not be immediately apparent, these substitution rules are +actually Turing complete. They can be used quite intuitively to traverse +the token tree with unique "carriage" symbols that move according to their +environment and can carry structured data payloads. + +TODO: carriage example + +# Module system + +Files are the smallest unit of namespacing, automatically grouped into +folders and forming a tree the leaves of which are the actual symbols. An +exported symbol is a name referenced in an exported substitution pattern +or assigned to an exported function. Imported symbols are considered +identical to the same symbol directly imported from the same module for +the purposes of substitution. + +# Optimization + +This is very far away so I don't want to make promises, but I have some +ideas. + +[ ] early execution of functions on any subset of their arguments where it + could provide substantial speedup +[ ] tracking copies of expressions and evaluating them only once +[ ] Many cases of single recursion converted to loops + [ ] tail recursion + [ ] 2 distinct loops where the tail doesn't use the arguments + [ ] reorder operations to favour this scenario +[ ] reactive calculation of values that are deemed to be read more often + than written +[ ] automatic profiling based on performance metrics generated by debug + builds \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 9dd3550..e0e31ab 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ use std::io::{self, Read}; -use chumsky::Parser; +use chumsky::{Parser, prelude::*}; mod parse; @@ -8,6 +8,7 @@ fn main() { let mut input = String::new(); let mut stdin = io::stdin(); stdin.read_to_string(&mut input).unwrap(); - let output = parse::parser().parse(input); + let ops: Vec = vec!["$", "."].iter().map(|&s| s.to_string()).collect(); + let output = parse::expression_parser(&ops).then_ignore(end()).parse(input); println!("\nParsed:\n{:?}", output); } diff --git a/src/parse.rs b/src/parse.rs deleted file mode 100644 index eab8242..0000000 --- a/src/parse.rs +++ /dev/null @@ -1,143 +0,0 @@ -use std::fmt::Debug; -use chumsky::{self, prelude::*, Parser}; - -#[derive(Debug)] -pub enum Expr { - Num(f64), - Int(u64), - Char(char), - Str(String), - Name(String), - S(Vec), - Lambda(String, Vec) -} - -fn uint_parser(base: u32) -> impl Parser> { - text::int(base).map(move |s: String| u64::from_str_radix(&s, base).unwrap()) -} - -fn e_parser() -> impl Parser> { - return choice(( - just('e') - .ignore_then(text::int(10)) - .map(|s: String| s.parse().unwrap()), - just("e-") - .ignore_then(text::int(10)) - .map(|s: String| -s.parse::().unwrap()), - empty().map(|()| 0) - )) -} - -fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 { - return move |(val, exp)| { - if exp == 0 {val} - else {val * base.checked_pow(exp.try_into().unwrap()).unwrap()} - }; -} - -fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 { - return move |(val, exp)| { - if exp == 0 {val} - else {val * (base as f64).powf(exp.try_into().unwrap())} - } -} - -fn e_uint_parser(base: u32) -> impl Parser> { - if base > 14 {panic!("exponential in base that uses the digit 'e' is ambiguous")} - uint_parser(base).then(e_parser()).map(nat2u(base.into())) -} - -fn int_parser() -> impl Parser> { - choice(( - just("0b").ignore_then(e_uint_parser(2)), - just("0x").ignore_then(uint_parser(16)), - just('0').ignore_then(e_uint_parser(8)), - e_uint_parser(10), // Dec has no prefix - )) -} - -fn dotted_parser(base: u32) -> impl Parser> { - uint_parser(base) - .then_ignore(just('.')) - .then(text::digits(base)) - .map(move |(wh, frac)| { - let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64; - let dexp = base.pow(frac.len().try_into().unwrap()); - wh as f64 + (frac_num / dexp as f64) - }) -} - -fn e_float_parser(base: u32) -> impl Parser> { - if base > 14 {panic!("exponential in base that uses the digit 'e' is ambiguous")} - dotted_parser(base).then(e_parser()).map(nat2f(base.into())) -} - -fn float_parser() -> impl Parser> { - choice(( - just("0b").ignore_then(e_float_parser(2)), - just("0x").ignore_then(dotted_parser(16)), - just('0').ignore_then(e_float_parser(8)), - e_float_parser(10), - )) -} - -fn text_parser(delim: char) -> impl Parser> { - let escape = just('\\').ignore_then( - just('\\') - .or(just('/')) - .or(just('"')) - .or(just('b').to('\x08')) - .or(just('f').to('\x0C')) - .or(just('n').to('\n')) - .or(just('r').to('\r')) - .or(just('t').to('\t')) - .or(just('u').ignore_then( - filter(|c: &char| c.is_digit(16)) - .repeated() - .exactly(4) - .collect::() - .validate(|digits, span, emit| { - char::from_u32(u32::from_str_radix(&digits, 16).unwrap()) - .unwrap_or_else(|| { - emit(Simple::custom(span, "invalid unicode character")); - '\u{FFFD}' // unicode replacement character - }) - }), - )), - ); - filter(move |&c| c != '\\' && c != delim).or(escape) -} - -fn char_parser() -> impl Parser> { - just('\'').ignore_then(text_parser('\'')).then_ignore(just('\'')) -} - -fn str_parser() -> impl Parser> { - just('"') - .ignore_then(text_parser('"').repeated()) - .then_ignore(just('"')) - .collect() -} - -pub fn parser() -> impl Parser> { - return recursive(|expr| { - let lambda = just('\\') - .ignore_then(text::ident()) - .then_ignore(just('.')) - .then(expr.clone().repeated().at_least(1)) - .map(|(name, body)| Expr::Lambda(name, body)); - let sexpr = expr.clone() - .repeated() - .delimited_by(just('('), just(')')) - .map(Expr::S); - choice(( - float_parser().map(Expr::Num), - int_parser().map(Expr::Int), - char_parser().map(Expr::Char), - str_parser().map(Expr::Str), - text::ident().map(Expr::Name), - sexpr, - lambda - )).padded() - }).then_ignore(end()) -} \ No newline at end of file diff --git a/src/parse/expression.rs b/src/parse/expression.rs new file mode 100644 index 0000000..35da5a2 --- /dev/null +++ b/src/parse/expression.rs @@ -0,0 +1,72 @@ +use std::{fmt::Debug}; +use chumsky::{self, prelude::*, Parser}; + +use super::string; +use super::number; +use super::misc; +use super::name; + +#[derive(Debug)] +pub enum Expr { + Num(f64), + Int(u64), + Char(char), + Str(String), + Name(String), + S(Vec), + Lambda(String, Option>, Vec), + Auto(Option, Option>, Vec), + Typed(Box, Box) +} + +fn typed_parser<'a>( + expr: Recursive<'a, char, Expr, Simple>, + ops: &'a [String] +) -> impl Parser> + 'a { + just(':').ignore_then(expr) +} + +fn untyped_xpr_parser<'a>( + expr: Recursive<'a, char, Expr, Simple>, + ops: &'a [String] +) -> impl Parser> + 'a { + let lambda = just('\\') + .ignore_then(name::name_parser(ops)) + .then(typed_parser(expr.clone(), ops).or_not()) + .then_ignore(just('.')) + .then(expr.clone().repeated().at_least(1)) + .map(|((name, t), body)| Expr::Lambda(name, t.map(Box::new), body)); + let auto = just('@') + .ignore_then(name::name_parser(ops).or_not()) + .then(typed_parser(expr.clone(), ops).or_not()) + .then_ignore(just('.')) + .then(expr.clone().repeated().at_least(1)) + .map(|((name, t), body)| Expr::Auto(name, t.map(Box::new), body)); + let sexpr = expr.clone() + .repeated() + .delimited_by(just('('), just(')')) + .map(Expr::S); + choice(( + number::float_parser().map(Expr::Num), + number::int_parser().map(Expr::Int), + string::char_parser().map(Expr::Char), + string::str_parser().map(Expr::Str), + name::name_parser(ops).map(Expr::Name), + sexpr, + lambda, + auto + )).padded() +} + +pub fn expression_parser(ops: &[String]) -> impl Parser> + '_ { + return recursive(|expr| { + return misc::comment_parser().or_not().ignore_then( + untyped_xpr_parser(expr.clone(), &ops) + .then(typed_parser(expr, ops).or_not()) + .map(|(val, t)| match t { + Some(typ) => Expr::Typed(Box::new(val), Box::new(typ)), + None => val + }) + ).then_ignore(misc::comment_parser().or_not()) + }) +} \ No newline at end of file diff --git a/src/parse/import.rs b/src/parse/import.rs new file mode 100644 index 0000000..57d0c75 --- /dev/null +++ b/src/parse/import.rs @@ -0,0 +1,58 @@ +use chumsky::{Parser, prelude::*, text::Character}; +use super::name; + +enum Import { + Name(Vec, String), + All(Vec) +} + +fn prefix(pre: Vec, im: Import) -> Import { + match im { + Import::Name(ns, name) => Import::Name( + pre.into_iter().chain(ns.into_iter()).collect(), + name + ), + Import::All(ns) => Import::All( + pre.into_iter().chain(ns.into_iter()).collect() + ) + } +} + + +type BoxedStrIter = Box>; +type BoxedStrIterIter = Box>; + +fn init_table(name: String) -> BoxedStrIterIter { + Box::new(vec![Box::new(vec![name].into_iter()) as BoxedStrIter].into_iter()) +} + +pub fn import_parser() -> impl Parser, Error = Simple> { + recursive(|expr: Recursive>| { + name::modname_parser() + .padded() + .then_ignore(just("::")) + .repeated() + .then( + choice(( + expr.clone() + .separated_by(just(',')) + .delimited_by(just('('), just(')')) + .map(|v| Box::new(v.into_iter().flatten()) as BoxedStrIterIter), + just("*").map(|s| init_table(s.to_string())), + name::modname_parser().map(init_table) + )).padded() + ).map(|(pre, post)| { + Box::new(post.map(move |el| { + Box::new(pre.clone().into_iter().chain(el)) as BoxedStrIter + })) as BoxedStrIterIter + }) + }).padded().map(|paths| { + paths.filter_map(|namespaces| { + let mut path: Vec = namespaces.collect(); + match path.pop()?.as_str() { + "*" => Some(Import::All(path)), + name => Some(Import::Name(path, name.to_owned())) + } + }).collect() + }) +} \ No newline at end of file diff --git a/src/parse/misc.rs b/src/parse/misc.rs new file mode 100644 index 0000000..fbe905d --- /dev/null +++ b/src/parse/misc.rs @@ -0,0 +1,7 @@ +pub use chumsky::{self, prelude::*, Parser}; + +pub fn comment_parser() -> impl Parser> { + any().repeated().delimited_by(just("--["), just("]--")).or( + any().repeated().delimited_by(just("--"), just("\n")) + ).map(|vc| vc.iter().collect()).padded() +} diff --git a/src/parse/mod.rs b/src/parse/mod.rs new file mode 100644 index 0000000..1d7b7f7 --- /dev/null +++ b/src/parse/mod.rs @@ -0,0 +1,9 @@ +mod expression; +mod string; +mod number; +mod misc; +mod import; +mod name; +mod substitution; + +pub use expression::expression_parser; diff --git a/src/parse/name.rs b/src/parse/name.rs new file mode 100644 index 0000000..c8dfa67 --- /dev/null +++ b/src/parse/name.rs @@ -0,0 +1,28 @@ +use chumsky::{self, prelude::*, Parser}; + +fn op_parser_recur<'a, 'b>(ops: &'a [String]) -> BoxedParser<'b, char, String, Simple> { + if ops.len() == 1 { just(ops[0].clone()).boxed() } + else { just(ops[0].clone()).or(op_parser_recur(&ops[1..])).boxed() } +} + +fn op_parser(ops: &[String]) -> BoxedParser> { + let mut sorted_ops = ops.to_vec(); + sorted_ops.sort_by(|a, b| b.len().cmp(&a.len())); + op_parser_recur(&sorted_ops) +} + +pub fn modname_parser() -> impl Parser> { + let not_name_char: Vec = vec![':', '\\', '"', '\'', '(', ')', '.']; + filter(move |c| !not_name_char.contains(c) && !c.is_whitespace()) + .repeated().at_least(1) + .collect() +} + +pub fn name_parser<'a>(ops: &'a [String]) -> impl Parser> + 'a { + choice(( + op_parser(ops), // First try to parse a known operator + text::ident(), // Failing that, parse plain text + // Finally parse everything until tne next terminal as a new operator + modname_parser() + )).padded() +} \ No newline at end of file diff --git a/src/parse/number.rs b/src/parse/number.rs new file mode 100644 index 0000000..dde4e54 --- /dev/null +++ b/src/parse/number.rs @@ -0,0 +1,88 @@ +use chumsky::{self, prelude::*, Parser}; + +fn assert_not_digit(base: u32, c: char) { + if base > (10 + (c as u32 - 'a' as u32)) { + panic!("The character '{}' is a digit in base ({})", c, base) + } +} + +fn separated_digits_parser(base: u32) -> impl Parser> { + just('_') + .ignore_then(text::digits(base)) + .repeated() + .map(|sv| sv.iter().map(|s| s.chars()).flatten().collect()) +} + +fn uint_parser(base: u32) -> impl Parser> { + text::int(base) + .then(separated_digits_parser(base)) + .map(move |(s1, s2): (String, String)| { + u64::from_str_radix(&(s1 + &s2), base).unwrap() + }) +} + +fn pow_parser() -> impl Parser> { + return choice(( + just('p') + .ignore_then(text::int(10)) + .map(|s: String| s.parse().unwrap()), + just("p-") + .ignore_then(text::int(10)) + .map(|s: String| -s.parse::().unwrap()), + )).or_else(|_| Ok(0)) +} + +fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 { + return move |(val, exp)| { + if exp == 0 {val} + else {val * base.checked_pow(exp.try_into().unwrap()).unwrap()} + }; +} + +fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 { + return move |(val, exp)| { + if exp == 0 {val} + else {val * (base as f64).powf(exp.try_into().unwrap())} + } +} + +fn pow_uint_parser(base: u32) -> impl Parser> { + assert_not_digit(base, 'p'); + uint_parser(base).then(pow_parser()).map(nat2u(base.into())) +} + +pub fn int_parser() -> impl Parser> { + choice(( + just("0b").ignore_then(pow_uint_parser(2)), + just("0x").ignore_then(pow_uint_parser(16)), + just('0').ignore_then(pow_uint_parser(8)), + pow_uint_parser(10), // Dec has no prefix + )) +} + +fn dotted_parser(base: u32) -> impl Parser> { + uint_parser(base) + .then_ignore(just('.')) + .then( + text::digits(base).then(separated_digits_parser(base)) + ).map(move |(wh, (frac1, frac2))| { + let frac = frac1 + &frac2; + let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64; + let dexp = base.pow(frac.len().try_into().unwrap()); + wh as f64 + (frac_num / dexp as f64) + }) +} + +fn pow_float_parser(base: u32) -> impl Parser> { + assert_not_digit(base, 'p'); + dotted_parser(base).then(pow_parser()).map(nat2f(base.into())) +} + +pub fn float_parser() -> impl Parser> { + choice(( + just("0b").ignore_then(pow_float_parser(2)), + just("0x").ignore_then(pow_float_parser(16)), + just('0').ignore_then(pow_float_parser(8)), + pow_float_parser(10), + )) +} diff --git a/src/parse/string.rs b/src/parse/string.rs new file mode 100644 index 0000000..b74014d --- /dev/null +++ b/src/parse/string.rs @@ -0,0 +1,42 @@ +use chumsky::{self, prelude::*, Parser}; + +fn text_parser(delim: char) -> impl Parser> { + let escape = just('\\').ignore_then( + just('\\') + .or(just('/')) + .or(just('"')) + .or(just('b').to('\x08')) + .or(just('f').to('\x0C')) + .or(just('n').to('\n')) + .or(just('r').to('\r')) + .or(just('t').to('\t')) + .or(just('u').ignore_then( + filter(|c: &char| c.is_digit(16)) + .repeated() + .exactly(4) + .collect::() + .validate(|digits, span, emit| { + char::from_u32(u32::from_str_radix(&digits, 16).unwrap()) + .unwrap_or_else(|| { + emit(Simple::custom(span, "invalid unicode character")); + '\u{FFFD}' // unicode replacement character + }) + }), + )), + ); + filter(move |&c| c != '\\' && c != delim).or(escape) +} + +pub fn char_parser() -> impl Parser> { + just('\'').ignore_then(text_parser('\'')).then_ignore(just('\'')) +} + +pub fn str_parser() -> impl Parser> { + just('"') + .ignore_then( + text_parser('"').map(Some) + .or(just("\\\n").map(|_| None)) + .repeated() + ).then_ignore(just('"')) + .flatten().collect() +} \ No newline at end of file diff --git a/src/parse/substitution.rs b/src/parse/substitution.rs new file mode 100644 index 0000000..ccc60c8 --- /dev/null +++ b/src/parse/substitution.rs @@ -0,0 +1,21 @@ +use chumsky::{self, prelude::*, Parser}; + +use super::{expression, number::float_parser}; + +pub struct Substitution { + source: expression::Expr, + priority: f64, + target: expression::Expr +} + +pub fn substitutionParser<'a>( + ops: &'a [String] +) -> impl Parser> + 'a { + expression::expression_parser(ops) + .then_ignore(just('=')) + .then( + float_parser().then_ignore(just("=>")) + .or_not().map(|prio| prio.unwrap_or(0.0)) + ).then(expression::expression_parser(ops)) + .map(|((source, priority), target)| Substitution { source, priority, target }) +}