From 751a02a1ece4c322d67044dd44567e013bdddb80 Mon Sep 17 00:00:00 2001 From: Lawrence Bethlenfalvy Date: Sun, 2 Jul 2023 23:56:54 +0100 Subject: [PATCH] Basic string and binary processing - strings are now made of graphemes - char is no longer a literal type - preliminary binary support - added implicit extraction methods for primitives - added explicit extraction method for atoms Nothing has been tested yet --- Cargo.lock | 14 +++ Cargo.toml | 2 + ROADMAP.md | 5 + src/foreign_macros/atomic_impl.rs | 1 - src/foreign_macros/define_fn.rs | 7 +- src/foreign_macros/write_fn_step.rs | 9 +- src/parse/lexer.rs | 1 - src/parse/name.rs | 2 +- src/parse/string.rs | 7 +- src/representations/literal.rs | 8 -- src/stl/bin.rs | 171 ++++++++++++++++++++++++++++ src/stl/bool.rs | 25 ++-- src/stl/codegen.rs | 46 ++++++++ src/stl/conv.rs | 11 +- src/stl/{litconv.rs => inspect.rs} | 49 +++++++- src/stl/io/command.rs | 2 + src/stl/io/panic.rs | 2 +- src/stl/io/print.rs | 2 +- src/stl/mk_stl.rs | 3 +- src/stl/mod.rs | 4 +- src/stl/num.rs | 2 +- src/stl/str.orc | 11 +- src/stl/str.rs | 83 +++++++++++--- src/utils/iter_find.rs | 47 ++++++++ src/utils/mod.rs | 2 + 25 files changed, 440 insertions(+), 76 deletions(-) create mode 100644 ROADMAP.md create mode 100644 src/stl/bin.rs create mode 100644 src/stl/codegen.rs rename src/stl/{litconv.rs => inspect.rs} (52%) create mode 100644 src/utils/iter_find.rs diff --git a/Cargo.lock b/Cargo.lock index 7589a66..299d79b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -412,8 +412,10 @@ dependencies = [ "ordered-float", "paste", "rust-embed", + "take_mut", "thiserror", "trait-set", + "unicode-segmentation", ] [[package]] @@ -615,6 +617,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "take_mut" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" + [[package]] name = "thiserror" version = "1.0.40" @@ -658,6 +666,12 @@ version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + [[package]] name = "utf8parse" version = "0.2.1" diff --git a/Cargo.toml b/Cargo.toml index 9498940..e29c0c7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,3 +33,5 @@ trait-set = "0.3" paste = "1.0" rust-embed = { version = "6.6", features = ["include-exclude"] } duplicate = "1.0.0" +take_mut = "0.2.2" +unicode-segmentation = "1.10.1" diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..ea9893d --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,5 @@ +# IO + +All IO is event-based via callbacks. +Driven streams such as stdin expose single-fire events for the results of functions such as "read until terminator" or "read N bytes". +Network IO exposes repeated events such as "connect", "message", etc. \ No newline at end of file diff --git a/src/foreign_macros/atomic_impl.rs b/src/foreign_macros/atomic_impl.rs index 1d538fb..b417054 100644 --- a/src/foreign_macros/atomic_impl.rs +++ b/src/foreign_macros/atomic_impl.rs @@ -63,7 +63,6 @@ use crate::Primitive; /// atomic_redirect!(InternalToString, expr_inst); /// atomic_impl!(InternalToString, |Self { expr_inst }: &Self, _|{ /// with_lit(expr_inst, |l| Ok(match l { -/// Literal::Char(c) => c.to_string(), /// Literal::Uint(i) => i.to_string(), /// Literal::Num(n) => n.to_string(), /// Literal::Str(s) => s.clone(), diff --git a/src/foreign_macros/define_fn.rs b/src/foreign_macros/define_fn.rs index ac54aef..3ebdf4b 100644 --- a/src/foreign_macros/define_fn.rs +++ b/src/foreign_macros/define_fn.rs @@ -65,7 +65,6 @@ use crate::write_fn_step; /// /// Convert a literal to a string using Rust's conversions for floats, /// /// chars and uints respectively /// ToString = |x| with_lit(x, |l| Ok(match l { -/// Literal::Char(c) => c.to_string(), /// Literal::Uint(i) => i.to_string(), /// Literal::Num(n) => n.to_string(), /// Literal::Str(s) => s.clone(), @@ -147,12 +146,12 @@ macro_rules! define_fn { $crate::write_fn_step!( $name { - $( $arg_prev:ident : $typ_prev:ty ),* + $( $arg_prev : $typ_prev ),* } - [< $name $arg0:upper >] + [< $name $arg0:camel >] where $arg0:$typ0 $( = $xname => $parse0 )? ; ); - $crate::define_fn!(@MIDDLE $xname [< $name $arg0:upper >] ($body) + $crate::define_fn!(@MIDDLE $xname [< $name $arg0:camel >] ($body) ( $( ($arg_prev: $typ_prev) )* ($arg0: $typ0) diff --git a/src/foreign_macros/write_fn_step.rs b/src/foreign_macros/write_fn_step.rs index 82e1420..f3a91b4 100644 --- a/src/foreign_macros/write_fn_step.rs +++ b/src/foreign_macros/write_fn_step.rs @@ -18,7 +18,10 @@ use crate::interpreted::ExprInst; /// discussed below. The newly bound names (here `s` and `i` before `=`) can /// also receive type annotations. /// -/// ``` +/// ```no_run +/// // FIXME this is a very old example that wouldn't compile now +/// use unicode_segmentation::UnicodeSegmentation; +/// /// use orchidlang::{write_fn_step, Literal, Primitive}; /// use orchidlang::interpreted::Clause; /// use orchidlang::stl::litconv::{with_str, with_uint}; @@ -36,8 +39,8 @@ use crate::interpreted::ExprInst; /// CharAt0 { s: String } /// i = x => with_uint(x, Ok); /// { -/// if let Some(c) = s.chars().nth(*i as usize) { -/// Ok(Clause::P(Primitive::Literal(Literal::Char(c)))) +/// if let Some(c) = s.graphemes(true).nth(*i as usize) { +/// Ok(Literal::Char(c.to_string()).into()) /// } else { /// RuntimeError::fail( /// "Character index out of bounds".to_string(), diff --git a/src/parse/lexer.rs b/src/parse/lexer.rs index c1e7728..706ff23 100644 --- a/src/parse/lexer.rs +++ b/src/parse/lexer.rs @@ -161,7 +161,6 @@ pub fn literal_parser() -> impl SimpleParser { // all ints are valid floats so it takes precedence number::int_parser().map(Literal::Uint), number::float_parser().map(Literal::Num), - string::char_parser().map(Literal::Char), string::str_parser().map(Literal::Str), )) } diff --git a/src/parse/name.rs b/src/parse/name.rs index ffab6c2..6ecd0c9 100644 --- a/src/parse/name.rs +++ b/src/parse/name.rs @@ -27,7 +27,7 @@ fn op_parser<'a>( pub static NOT_NAME_CHAR: &[char] = &[ ':', // used for namespacing and type annotations '\\', '@', // parametric expression starters - '"', '\'', // parsed as primitives and therefore would never match + '"', // parsed as primitive and therefore would never match '(', ')', '[', ']', '{', '}', // must be strictly balanced '.', // Argument-body separator in parametrics ',', // used in imports diff --git a/src/parse/string.rs b/src/parse/string.rs index 2f512c3..5230c88 100644 --- a/src/parse/string.rs +++ b/src/parse/string.rs @@ -34,17 +34,12 @@ fn text_parser(delim: char) -> impl SimpleParser { filter(move |&c| c != '\\' && c != delim).or(escape) } -/// Parse a character literal between single quotes -pub fn char_parser() -> impl SimpleParser { - just('\'').ignore_then(text_parser('\'')).then_ignore(just('\'')) -} - /// Parse a string between double quotes pub fn str_parser() -> impl SimpleParser { just('"') .ignore_then( text_parser('"').map(Some) - .or(just("\\\n").map(|_| None)) // Newlines preceded by backslashes are ignored. + .or(just("\\\n").then(just(' ').or(just('\t')).repeated()).map(|_| None)) // Newlines preceded by backslashes are ignored along with all following indentation. .repeated(), ) .then_ignore(just('"')) diff --git a/src/representations/literal.rs b/src/representations/literal.rs index ebc03b9..ec77a6a 100644 --- a/src/representations/literal.rs +++ b/src/representations/literal.rs @@ -10,8 +10,6 @@ pub enum Literal { Num(NotNan), /// An unsigned integer; a size, index or pointer Uint(u64), - /// A single utf-8 codepoint - Char(char), /// A utf-8 character sequence Str(String), } @@ -21,7 +19,6 @@ impl Debug for Literal { match self { Self::Num(arg0) => write!(f, "{:?}", arg0), Self::Uint(arg0) => write!(f, "{:?}", arg0), - Self::Char(arg0) => write!(f, "{:?}", arg0), Self::Str(arg0) => write!(f, "{:?}", arg0), } } @@ -37,11 +34,6 @@ impl From for Literal { Self::Uint(value) } } -impl From for Literal { - fn from(value: char) -> Self { - Self::Char(value) - } -} impl From for Literal { fn from(value: String) -> Self { Self::Str(value) diff --git a/src/stl/bin.rs b/src/stl/bin.rs new file mode 100644 index 0000000..cd323c3 --- /dev/null +++ b/src/stl/bin.rs @@ -0,0 +1,171 @@ +use std::fmt::Debug; +use std::rc::Rc; + +use itertools::Itertools; + +use super::codegen::{orchid_opt, tuple}; +use super::inspect::{with_atom, with_uint}; +use super::{RuntimeError, Boolean}; +use crate::foreign::ExternError; +use crate::interpreted::ExprInst; +use crate::utils::{iter_find, unwrap_or}; +use crate::{atomic_inert, define_fn, ConstTree, Interner, Literal}; + +/// A block of binary data +#[derive(Clone)] +pub struct Binary(pub Rc>); +atomic_inert!(Binary); + +impl TryFrom<&ExprInst> for Binary { + type Error = Rc; + + fn try_from(value: &ExprInst) -> Result { + with_atom(value, "a blob", |a: &Binary| Ok(a.clone())) + } +} + +impl Debug for Binary { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut iter = self.0.iter().copied(); + f.write_str("Binary")?; + for mut chunk in iter.by_ref().take(32).chunks(4).into_iter() { + let a = chunk.next().expect("Chunks cannot be empty"); + let b = unwrap_or!(chunk.next(); return write!(f, "{a:02x}")); + let c = unwrap_or!(chunk.next(); return write!(f, "{a:02x}{b:02x}")); + let d = + unwrap_or!(chunk.next(); return write!(f, "{a:02x}{b:02x}{c:02x}")); + write!(f, "{a:02x}{b:02x}{c:02x}{d:02x}")? + } + if iter.next().is_some() { write!(f, "...") } else { Ok(()) } + } +} + +define_fn! {expr=x in + /// Convert a number into a binary blob + pub FromNum { + size: u64, + is_little_endian: Boolean, + data: u64 + } => { + if size > &8 { + RuntimeError::fail( + "more than 8 bytes requested".to_string(), + "converting number to binary" + )? + } + let bytes = if is_little_endian.0 { + data.to_le_bytes()[0..*size as usize].to_vec() + } else { + data.to_be_bytes()[8 - *size as usize..].to_vec() + }; + Ok(Binary(Rc::new(bytes)).to_atom_cls()) + } +} + +define_fn! {expr=x in + /// Read a number from a binary blob + pub GetNum { + buf: Binary, + loc: u64, + size: u64, + is_little_endian: Boolean + } => { + if buf.0.len() < (loc + size) as usize { + RuntimeError::fail( + "section out of range".to_string(), + "reading number from binary data" + )? + } + if 8 < *size { + RuntimeError::fail( + "more than 8 bytes provided".to_string(), + "reading number from binary data" + )? + } + let mut data = [0u8; 8]; + let section = &buf.0[*loc as usize..(loc + size) as usize]; + let num = if is_little_endian.0 { + data[0..*size as usize].copy_from_slice(section); + u64::from_le_bytes(data) + } else { + data[8 - *size as usize..].copy_from_slice(section); + u64::from_be_bytes(data) + }; + Ok(Literal::Uint(num).into()) + } +} + +define_fn! {expr=x in + /// Append two binary data blocks + pub Concatenate { a: Binary, b: Binary } => { + let data = a.0.iter().chain(b.0.iter()).copied().collect(); + Ok(Binary(Rc::new(data)).to_atom_cls()) + } +} + +define_fn! {expr=x in + /// Extract a subsection of the binary data + pub Slice { + s: Binary, + i: u64 as with_uint(x, Ok), + len: u64 as with_uint(x, Ok) + } => { + if i + len < s.0.len() as u64 { + RuntimeError::fail( + "Byte index out of bounds".to_string(), + "indexing binary" + )? + } + let data = s.0[*i as usize..*i as usize + *len as usize].to_vec(); + Ok(Binary(Rc::new(data)).to_atom_cls()) + } +} + +define_fn! {expr=x in + /// Return the index where the first argument first contains the second, + /// if any + pub Find { haystack: Binary, needle: Binary } => { + let found = iter_find(haystack.0.iter(), needle.0.iter()); + Ok(orchid_opt(found.map(|x| Literal::Uint(x as u64).into()))) + } +} + +define_fn! {expr=x in + /// Split binary data block into two smaller blocks + pub Split { + bin: Binary, + i: u64 as with_uint(x, Ok) + } => { + if bin.0.len() < *i as usize { + RuntimeError::fail( + "Byte index out of bounds".to_string(), + "splitting binary" + )? + } + let (asl, bsl) = bin.0.split_at(*i as usize); + Ok(tuple(vec![ + Binary(Rc::new(asl.to_vec())).to_atom_cls().into(), + Binary(Rc::new(bsl.to_vec())).to_atom_cls().into(), + ])) + } +} + +define_fn! { + /// Detect the number of bytes in the binary data block + pub Size = |x: &ExprInst| { + Ok(Literal::Uint(Binary::try_from(x)?.0.len() as u64).into()) + } +} + +pub fn bin(i: &Interner) -> ConstTree { + ConstTree::tree([( + i.i("bin"), + ConstTree::tree([ + (i.i("concat"), ConstTree::xfn(Concatenate)), + (i.i("slice"), ConstTree::xfn(Slice)), + (i.i("find"), ConstTree::xfn(Find)), + (i.i("split"), ConstTree::xfn(Split)), + (i.i("size"), ConstTree::xfn(Size)) + ]), + )]) +} diff --git a/src/stl/bool.rs b/src/stl/bool.rs index 2cf65b9..a7a3091 100644 --- a/src/stl/bool.rs +++ b/src/stl/bool.rs @@ -1,13 +1,13 @@ use std::rc::Rc; -use crate::foreign::Atom; +use crate::foreign::ExternError; use crate::interner::Interner; use crate::representations::interpreted::{Clause, ExprInst}; -use crate::representations::Primitive; -use crate::stl::litconv::with_lit; use crate::stl::AssertionError; use crate::{atomic_inert, define_fn, ConstTree, Literal, PathSet}; +use super::inspect::with_atom; + /// Booleans exposed to Orchid #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct Boolean(pub bool); @@ -20,32 +20,21 @@ impl From for Boolean { } impl TryFrom<&ExprInst> for Boolean { - type Error = (); + type Error = Rc; fn try_from(value: &ExprInst) -> Result { - let expr = value.expr(); - if let Clause::P(Primitive::Atom(Atom(a))) = &expr.clause { - if let Some(b) = a.as_any().downcast_ref::() { - return Ok(*b); - } - } - Err(()) + with_atom(value, "a boolean", |b| Ok(*b)) } } define_fn! {expr=x in /// Compares the inner values if /// - /// - both values are char, /// - both are string, /// - both are either uint or num - Equals { - a: Literal as with_lit(x, |l| Ok(l.clone())), - b: Literal as with_lit(x, |l| Ok(l.clone())) - } => Ok(Boolean::from(match (a, b) { - (Literal::Char(c1), Literal::Char(c2)) => c1 == c2, - (Literal::Num(n1), Literal::Num(n2)) => n1 == n2, + Equals { a: Literal, b: Literal } => Ok(Boolean::from(match (a, b) { (Literal::Str(s1), Literal::Str(s2)) => s1 == s2, + (Literal::Num(n1), Literal::Num(n2)) => n1 == n2, (Literal::Uint(i1), Literal::Uint(i2)) => i1 == i2, (Literal::Num(n1), Literal::Uint(u1)) => *n1 == (*u1 as f64), (Literal::Uint(u1), Literal::Num(n1)) => *n1 == (*u1 as f64), diff --git a/src/stl/codegen.rs b/src/stl/codegen.rs new file mode 100644 index 0000000..cc67388 --- /dev/null +++ b/src/stl/codegen.rs @@ -0,0 +1,46 @@ +//! Utilities for generating Orchid code in Rust + +use std::rc::Rc; + +use crate::interpreted::{Clause, ExprInst}; +use crate::{PathSet, Side}; + +/// Convert a rust Option into an Orchid Option +pub fn orchid_opt(x: Option) -> Clause { + if let Some(x) = x { some(x) } else { none() } +} + +/// Constructs an instance of the orchid value Some wrapping the given +/// [ExprInst] +fn some(x: ExprInst) -> Clause { + Clause::Lambda { + args: None, + body: Clause::Lambda { + args: Some(PathSet { steps: Rc::new(vec![Side::Left]), next: None }), + body: Clause::Apply { f: Clause::LambdaArg.wrap(), x }.wrap(), + } + .wrap(), + } +} + +/// Constructs an instance of the orchid value None +fn none() -> Clause { + Clause::Lambda { + args: Some(PathSet { steps: Rc::new(vec![]), next: None }), + body: Clause::Lambda { args: None, body: Clause::LambdaArg.wrap() }.wrap(), + } +} + +/// Define a clause that can be called with a callback and passes the provided +/// values to the callback in order. +pub fn tuple(data: Vec) -> Clause { + Clause::Lambda { + args: Some(PathSet { + next: None, + steps: Rc::new(data.iter().map(|_| Side::Left).collect()), + }), + body: data + .into_iter() + .fold(Clause::LambdaArg.wrap(), |f, x| Clause::Apply { f, x }.wrap()), + } +} diff --git a/src/stl/conv.rs b/src/stl/conv.rs index 74b0325..ea5c421 100644 --- a/src/stl/conv.rs +++ b/src/stl/conv.rs @@ -1,7 +1,7 @@ use chumsky::Parser; use ordered_float::NotNan; -use super::litconv::with_lit; +use super::inspect::with_lit; use super::{ArithmeticError, AssertionError}; use crate::foreign::ExternError; use crate::interner::Interner; @@ -20,10 +20,6 @@ define_fn! { Literal::Num(n) => Ok(*n), Literal::Uint(i) => NotNan::new(*i as f64) .map_err(|_| ArithmeticError::NaN.into_extern()), - Literal::Char(char) => char - .to_digit(10) - .map(|i| NotNan::new(i as f64).expect("u32 to f64 yielded NaN")) - .ok_or_else(|| AssertionError::ext(x.clone(), "is not a decimal digit")), }).map(|nn| Literal::Num(nn).into()) } @@ -39,10 +35,6 @@ define_fn! { )), Literal::Num(n) => Ok(n.floor() as u64), Literal::Uint(i) => Ok(*i), - Literal::Char(char) => char - .to_digit(10) - .map(u64::from) - .ok_or(AssertionError::ext(x.clone(), "is not a decimal digit")), }).map(|u| Literal::Uint(u).into()) } @@ -50,7 +42,6 @@ define_fn! { /// Convert a literal to a string using Rust's conversions for floats, chars and /// uints respectively ToString = |x| with_lit(x, |l| Ok(match l { - Literal::Char(c) => c.to_string(), Literal::Uint(i) => i.to_string(), Literal::Num(n) => n.to_string(), Literal::Str(s) => s.clone(), diff --git a/src/stl/litconv.rs b/src/stl/inspect.rs similarity index 52% rename from src/stl/litconv.rs rename to src/stl/inspect.rs index 6cc185e..e4e0c8e 100644 --- a/src/stl/litconv.rs +++ b/src/stl/inspect.rs @@ -3,9 +3,11 @@ use std::rc::Rc; use super::assertion_error::AssertionError; -use crate::foreign::ExternError; +use crate::foreign::{ExternError, Atomic}; +use crate::interpreted::Clause; use crate::representations::interpreted::ExprInst; use crate::representations::Literal; +use crate::Primitive; /// Tries to cast the [ExprInst] as a [Literal], calls the provided function on /// it if successful. Returns a generic [AssertionError] if not. @@ -45,3 +47,48 @@ pub fn with_uint( } }) } + +/// Tries to cast the [ExprInst] into the specified atom type. Throws an +/// assertion error if unsuccessful, or calls the provided function on the +/// extracted atomic type. +pub fn with_atom( + x: &ExprInst, + inexact_typename: &'static str, + predicate: impl FnOnce(&T) -> Result>, +) -> Result> { + x.inspect(|c| { + if let Clause::P(Primitive::Atom(a)) = c { + a.try_cast() + .map(predicate) + .unwrap_or_else(|| AssertionError::fail(x.clone(), inexact_typename)) + } else { + AssertionError::fail(x.clone(), "an atom") + } + }) +} + +// ######## Automatically ######## + +impl TryFrom<&ExprInst> for Literal { + type Error = Rc; + + fn try_from(value: &ExprInst) -> Result { + with_lit(value, |l| Ok(l.clone())) + } +} + +impl TryFrom<&ExprInst> for String { + type Error = Rc; + + fn try_from(value: &ExprInst) -> Result { + with_str(value, |s| Ok(s.clone())) + } +} + +impl TryFrom<&ExprInst> for u64 { + type Error = Rc; + + fn try_from(value: &ExprInst) -> Result { + with_uint(value, Ok) + } +} \ No newline at end of file diff --git a/src/stl/io/command.rs b/src/stl/io/command.rs index 5d8033b..26d4b6e 100644 --- a/src/stl/io/command.rs +++ b/src/stl/io/command.rs @@ -7,6 +7,8 @@ use crate::representations::interpreted::{Clause, ExprInst}; use crate::representations::{Literal, Primitive}; use crate::utils::unwrap_or; + + /// An IO command to be handled by the host application. #[derive(Clone, Debug)] pub enum IO { diff --git a/src/stl/io/panic.rs b/src/stl/io/panic.rs index f8aab8e..5eda39d 100644 --- a/src/stl/io/panic.rs +++ b/src/stl/io/panic.rs @@ -1,6 +1,6 @@ use std::fmt::Display; -use super::super::litconv::with_str; +use super::super::inspect::with_str; use crate::define_fn; use crate::foreign::ExternError; diff --git a/src/stl/io/print.rs b/src/stl/io/print.rs index 35620a3..1823c65 100644 --- a/src/stl/io/print.rs +++ b/src/stl/io/print.rs @@ -1,6 +1,6 @@ use std::fmt::Debug; -use super::super::litconv::with_str; +use super::super::inspect::with_str; use super::command::IO; use crate::foreign::{Atomic, AtomicResult, AtomicReturn}; use crate::interpreter::Context; diff --git a/src/stl/mk_stl.rs b/src/stl/mk_stl.rs index b0aa4e5..5c240f7 100644 --- a/src/stl/mk_stl.rs +++ b/src/stl/mk_stl.rs @@ -1,6 +1,7 @@ use hashbrown::HashMap; use rust_embed::RustEmbed; +use super::bin::bin; use super::bool::bool; use super::conv::conv; use super::io::io; @@ -35,7 +36,7 @@ pub fn mk_stl(i: &Interner, options: StlOptions) -> ProjectTree { let const_tree = from_const_tree( HashMap::from([( i.i("std"), - io(i, options.impure) + conv(i) + bool(i) + str(i) + num(i), + io(i, options.impure) + conv(i) + bool(i) + str(i) + num(i) + bin(i), )]), &[i.i("std")], ); diff --git a/src/stl/mod.rs b/src/stl/mod.rs index 4ab7137..abf31a2 100644 --- a/src/stl/mod.rs +++ b/src/stl/mod.rs @@ -4,11 +4,13 @@ mod assertion_error; mod bool; mod conv; mod io; -pub mod litconv; +pub mod inspect; mod mk_stl; mod num; mod runtime_error; mod str; +pub mod codegen; +mod bin; pub use arithmetic_error::ArithmeticError; pub use assertion_error::AssertionError; diff --git a/src/stl/num.rs b/src/stl/num.rs index a79cbc5..d26f544 100644 --- a/src/stl/num.rs +++ b/src/stl/num.rs @@ -2,7 +2,7 @@ use std::rc::Rc; use ordered_float::NotNan; -use super::litconv::with_lit; +use super::inspect::with_lit; use super::{ArithmeticError, AssertionError}; use crate::foreign::ExternError; use crate::representations::interpreted::{Clause, ExprInst}; diff --git a/src/stl/str.orc b/src/stl/str.orc index 5dfc4df..6f1f314 100644 --- a/src/stl/str.orc +++ b/src/stl/str.orc @@ -1 +1,10 @@ -export ...$a ++ ...$b =0x4p36=> (concatenate (...$a) (...$b)) \ No newline at end of file +import super::(proc::*, bool::*, io::panic) + +export ...$a ++ ...$b =0x4p36=> (concat (...$a) (...$b)) + +export char_at := \s.\i. do{ + let slc = slice s i 1; + if len slc == 1 + then slc + else panic "Character index out of bounds" +} \ No newline at end of file diff --git a/src/stl/str.rs b/src/stl/str.rs index 22b536f..41d83ce 100644 --- a/src/stl/str.rs +++ b/src/stl/str.rs @@ -1,38 +1,87 @@ -use super::litconv::{with_str, with_uint}; +use unicode_segmentation::UnicodeSegmentation; + +use super::codegen::{orchid_opt, tuple}; +use super::inspect::with_str; use super::RuntimeError; use crate::interner::Interner; +use crate::utils::iter_find; use crate::{define_fn, ConstTree, Literal}; define_fn! {expr=x in /// Append a string to another - pub Concatenate { - a: String as with_str(x, |s| Ok(s.clone())), - b: String as with_str(x, |s| Ok(s.clone())) - } => Ok(Literal::Str(a.to_owned() + b).into()) + pub Concatenate { a: String, b: String } + => Ok(Literal::Str(a.to_owned() + b).into()) } define_fn! {expr=x in - pub CharAt { - s: String as with_str(x, |s| Ok(s.clone())), - i: u64 as with_uint(x, Ok) - } => { - if let Some(c) = s.chars().nth(*i as usize) { - Ok(Literal::Char(c).into()) + pub Slice { s: String, i: u64, len: u64 } => { + let graphs = s.graphemes(true); + if *i == 0 { + Ok(Literal::Str(graphs.take(*len as usize).collect()).into()) } else { - RuntimeError::fail( - "Character index out of bounds".to_string(), - "indexing string", - )? + let mut prefix = graphs.skip(*i as usize - 1); + if prefix.next().is_none() { + RuntimeError::fail( + "Character index out of bounds".to_string(), + "indexing string", + ) + } else { + let mut count = 0; + let ret = prefix + .take(*len as usize) + .map(|x| { count+=1; x }) + .collect(); + if count == *len { + Ok(Literal::Str(ret).into()) + } else { + RuntimeError::fail( + "Character index out of bounds".to_string(), + "indexing string" + ) + } + } } } } +define_fn! {expr=x in + pub Find { haystack: String, needle: String } => { + let found = iter_find(haystack.graphemes(true), needle.graphemes(true)); + Ok(orchid_opt(found.map(|x| Literal::Uint(x as u64).into()))) + } +} + +define_fn! {expr=x in + pub Split { s: String, i: u64 } => { + let mut graphs = s.graphemes(true); + let a = graphs.by_ref().take(*i as usize).collect::(); + let b = graphs.collect::(); + Ok(tuple(vec![a.into(), b.into()])) + } +} + +define_fn! { + pub Len = |x| with_str(x, |s| { + Ok(Literal::Uint(s.graphemes(true).count() as u64).into()) + }) +} + +define_fn! { + pub Size = |x| with_str(x, |s| { + Ok(Literal::Uint(s.as_bytes().len() as u64).into()) + }) +} + pub fn str(i: &Interner) -> ConstTree { ConstTree::tree([( i.i("str"), ConstTree::tree([ - (i.i("concatenate"), ConstTree::xfn(Concatenate)), - (i.i("char_at"), ConstTree::xfn(CharAt)), + (i.i("concat"), ConstTree::xfn(Concatenate)), + (i.i("slice"), ConstTree::xfn(Slice)), + (i.i("find"), ConstTree::xfn(Find)), + (i.i("split"), ConstTree::xfn(Split)), + (i.i("len"), ConstTree::xfn(Len)), + (i.i("size"), ConstTree::xfn(Size)), ]), )]) } diff --git a/src/utils/iter_find.rs b/src/utils/iter_find.rs new file mode 100644 index 0000000..552965e --- /dev/null +++ b/src/utils/iter_find.rs @@ -0,0 +1,47 @@ +/// Check if the finite sequence produced by a clonable iterator (`haystack`) +/// contains the finite sequence produced by another clonable iterator +/// (`needle`) +pub fn iter_find( + mut haystack: impl Iterator + Clone, + needle: impl Iterator + Clone, +) -> Option { + let mut start = 0; + loop { + match iter_starts_with(haystack.clone(), needle.clone()) { + ISWResult::StartsWith => return Some(start), + ISWResult::Shorter => return None, + ISWResult::Difference => (), + } + haystack.next(); + start += 1; + } +} + +/// Value returned by iter_starts_with +enum ISWResult { + /// The first iterator starts with the second + StartsWith, + /// The values of the two iterators differ + Difference, + /// The first iterator ends before the second + Shorter, +} + +/// Checks that an iterator starts with another +fn iter_starts_with( + mut a: impl Iterator, + b: impl Iterator, +) -> ISWResult { + // if a starts with b then for every element in b + for item in b { + // a has to contain the same element + if let Some(comp) = a.next() { + if item != comp { + return ISWResult::Difference; + } + } else { + return ISWResult::Shorter; + } + } + ISWResult::StartsWith +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index bae24df..0961a70 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -8,6 +8,7 @@ mod split_max_prefix; mod string_from_charset; mod substack; mod unwrap_or; +mod iter_find; pub use cache::Cache; pub use print_nname::sym2string; @@ -21,3 +22,4 @@ pub(crate) use unwrap_or::unwrap_or; pub mod iter; pub use iter::BoxedIter; pub use string_from_charset::string_from_charset; +pub use iter_find::iter_find;