commit 9a553b7b6806d6bde1b096049ecc4f0fb57d940a Author: Lawrence Bethlenfalvy Date: Wed May 25 02:07:49 2022 +0200 Initial commit First prototype parser ready diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..0a2bd0d --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,162 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ahash" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8fd72866655d1904d6b0997d0b07ba561047d070fbe29de039031c641b61217" +dependencies = [ + "const-random", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chumsky" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d02796e4586c6c41aeb68eae9bfb4558a522c35f1430c14b40136c3706e09e4" +dependencies = [ + "ahash", +] + +[[package]] +name = "const-random" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f590d95d011aa80b063ffe3253422ed5aa462af4e9867d43ce8337562bac77c4" +dependencies = [ + "const-random-macro", + "proc-macro-hack", +] + +[[package]] +name = "const-random-macro" +version = "0.1.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "615f6e27d000a2bffbc7f2f6a8669179378fa27ee4d0a509e985dfc0a7defb40" +dependencies = [ + "getrandom", + "lazy_static", + "proc-macro-hack", + "tiny-keccak", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "getrandom" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" + +[[package]] +name = "orchid" +version = "0.1.0" +dependencies = [ + "chumsky", + "thiserror", +] + +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "proc-macro2" +version = "1.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "syn" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "unicode-ident" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" + +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..54f3296 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "orchid" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +thiserror = "1.0" +chumsky = "0.8" \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..7cea890 --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +Orchid will be a functional language with a powerful macro language and +optimizer. Further explanation and demos coming soon! \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..9dd3550 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,13 @@ +use std::io::{self, Read}; + +use chumsky::Parser; + +mod parse; + +fn main() { + let mut input = String::new(); + let mut stdin = io::stdin(); + stdin.read_to_string(&mut input).unwrap(); + let output = parse::parser().parse(input); + println!("\nParsed:\n{:?}", output); +} diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..eab8242 --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,143 @@ +use std::fmt::Debug; +use chumsky::{self, prelude::*, Parser}; + +#[derive(Debug)] +pub enum Expr { + Num(f64), + Int(u64), + Char(char), + Str(String), + Name(String), + S(Vec), + Lambda(String, Vec) +} + +fn uint_parser(base: u32) -> impl Parser> { + text::int(base).map(move |s: String| u64::from_str_radix(&s, base).unwrap()) +} + +fn e_parser() -> impl Parser> { + return choice(( + just('e') + .ignore_then(text::int(10)) + .map(|s: String| s.parse().unwrap()), + just("e-") + .ignore_then(text::int(10)) + .map(|s: String| -s.parse::().unwrap()), + empty().map(|()| 0) + )) +} + +fn nat2u(base: u64) -> impl Fn((u64, i32),) -> u64 { + return move |(val, exp)| { + if exp == 0 {val} + else {val * base.checked_pow(exp.try_into().unwrap()).unwrap()} + }; +} + +fn nat2f(base: u64) -> impl Fn((f64, i32),) -> f64 { + return move |(val, exp)| { + if exp == 0 {val} + else {val * (base as f64).powf(exp.try_into().unwrap())} + } +} + +fn e_uint_parser(base: u32) -> impl Parser> { + if base > 14 {panic!("exponential in base that uses the digit 'e' is ambiguous")} + uint_parser(base).then(e_parser()).map(nat2u(base.into())) +} + +fn int_parser() -> impl Parser> { + choice(( + just("0b").ignore_then(e_uint_parser(2)), + just("0x").ignore_then(uint_parser(16)), + just('0').ignore_then(e_uint_parser(8)), + e_uint_parser(10), // Dec has no prefix + )) +} + +fn dotted_parser(base: u32) -> impl Parser> { + uint_parser(base) + .then_ignore(just('.')) + .then(text::digits(base)) + .map(move |(wh, frac)| { + let frac_num = u64::from_str_radix(&frac, base).unwrap() as f64; + let dexp = base.pow(frac.len().try_into().unwrap()); + wh as f64 + (frac_num / dexp as f64) + }) +} + +fn e_float_parser(base: u32) -> impl Parser> { + if base > 14 {panic!("exponential in base that uses the digit 'e' is ambiguous")} + dotted_parser(base).then(e_parser()).map(nat2f(base.into())) +} + +fn float_parser() -> impl Parser> { + choice(( + just("0b").ignore_then(e_float_parser(2)), + just("0x").ignore_then(dotted_parser(16)), + just('0').ignore_then(e_float_parser(8)), + e_float_parser(10), + )) +} + +fn text_parser(delim: char) -> impl Parser> { + let escape = just('\\').ignore_then( + just('\\') + .or(just('/')) + .or(just('"')) + .or(just('b').to('\x08')) + .or(just('f').to('\x0C')) + .or(just('n').to('\n')) + .or(just('r').to('\r')) + .or(just('t').to('\t')) + .or(just('u').ignore_then( + filter(|c: &char| c.is_digit(16)) + .repeated() + .exactly(4) + .collect::() + .validate(|digits, span, emit| { + char::from_u32(u32::from_str_radix(&digits, 16).unwrap()) + .unwrap_or_else(|| { + emit(Simple::custom(span, "invalid unicode character")); + '\u{FFFD}' // unicode replacement character + }) + }), + )), + ); + filter(move |&c| c != '\\' && c != delim).or(escape) +} + +fn char_parser() -> impl Parser> { + just('\'').ignore_then(text_parser('\'')).then_ignore(just('\'')) +} + +fn str_parser() -> impl Parser> { + just('"') + .ignore_then(text_parser('"').repeated()) + .then_ignore(just('"')) + .collect() +} + +pub fn parser() -> impl Parser> { + return recursive(|expr| { + let lambda = just('\\') + .ignore_then(text::ident()) + .then_ignore(just('.')) + .then(expr.clone().repeated().at_least(1)) + .map(|(name, body)| Expr::Lambda(name, body)); + let sexpr = expr.clone() + .repeated() + .delimited_by(just('('), just(')')) + .map(Expr::S); + choice(( + float_parser().map(Expr::Num), + int_parser().map(Expr::Int), + char_parser().map(Expr::Char), + str_parser().map(Expr::Str), + text::ident().map(Expr::Name), + sexpr, + lambda + )).padded() + }).then_ignore(end()) +} \ No newline at end of file