Some checks failed
Rust / build (push) Failing after 3m52s
terrified to start testing
168 lines
6.1 KiB
Rust
168 lines
6.1 KiB
Rust
use std::rc::Rc;
|
|
|
|
use orchid_base::{fmt, is, mk_errv, sym};
|
|
use orchid_extension::gen_expr::{call, new_atom};
|
|
use orchid_extension::tree::{GenMember, comments, fun, prefix};
|
|
use orchid_extension::{Expr, ForeignAtom, exec, get_arg};
|
|
use unicode_segmentation::UnicodeSegmentation;
|
|
|
|
use super::str_atom::StrAtom;
|
|
use crate::std::protocol::types::{get_impl, proto};
|
|
use crate::std::string::to_string::ToStringMethod;
|
|
use crate::{Int, OrcOpt, OrcString, Tpl};
|
|
|
|
pub fn gen_str_lib() -> Vec<GenMember> {
|
|
prefix("std", [comments(
|
|
["There are two string types, IntStr and Str. Literals are always IntStr, which are quick to \
|
|
equality-compare but may leak, so you can't generally create them at runtime.\n\n\
|
|
All functions here operate on Unicode graphemes. This essentially means that letters with \
|
|
added diacritics and Mandarin multi-codepoint characters are treated as a single character."],
|
|
prefix("string", [
|
|
comments(
|
|
["Concatenate two strings", "|type: Str -> Str -> Str|"],
|
|
fun(true, "concat", async |left: OrcString, right: OrcString| {
|
|
new_atom(StrAtom::new(Rc::new(
|
|
left.get_string().await.to_string() + &right.get_string().await,
|
|
)))
|
|
}),
|
|
),
|
|
comments(
|
|
[
|
|
"Find the size of a string in bytes. Strings are stored in UTF-8. \
|
|
This should be used to determine the computational resource utilization of strings. \
|
|
It should not be used to determine whether to truncate text.",
|
|
"|type: Str -> Int|",
|
|
],
|
|
fun(true, "size", async |s: OrcString| Int(s.get_string().await.len().try_into().unwrap())),
|
|
),
|
|
comments(
|
|
[
|
|
"Find the number of characters in a string. This can be used for example to \
|
|
truncate text. It should not be used to limit the size of messages for security purposes.",
|
|
"|type: Str -> Int|",
|
|
],
|
|
fun(true, "len", async |s: OrcString| {
|
|
Int(s.get_string().await.graphemes(true).count().try_into().unwrap())
|
|
}),
|
|
),
|
|
comments(
|
|
[
|
|
"Takes a string, a start and a length in graphemes. \
|
|
Slices out the specified subsection of the string.",
|
|
"|type: Str -> Int -> Int -> Str|",
|
|
],
|
|
fun(true, "slice", async |s: OrcString, Int(start): Int, Int(len): Int| {
|
|
let str = s.get_string().await;
|
|
if len <= 0 {
|
|
return Ok(new_atom(StrAtom::new(Rc::default())));
|
|
}
|
|
let mut substr_iter = str.graphemes(true).skip(start.try_into().unwrap());
|
|
let new_str: String =
|
|
substr_iter.by_ref().take(usize::try_from(len).unwrap() - 1).collect();
|
|
let Some(s) = substr_iter.next() else {
|
|
let str_len = str.graphemes(true).count();
|
|
return Err(mk_errv(
|
|
is("Index out of bounds").await,
|
|
format!("Tried to select grapheme {start}+{len} from string that only has {str_len}"),
|
|
[get_arg(0).pos().await, get_arg(1).pos().await, get_arg(2).pos().await],
|
|
));
|
|
};
|
|
Ok(new_atom(StrAtom::new(Rc::new(new_str + s))))
|
|
}),
|
|
),
|
|
comments(
|
|
[
|
|
"If the first string contains the second then returns the index.",
|
|
"|type: Str -> Str -> std::option Int|",
|
|
],
|
|
fun(true, "find", async |haystack: OrcString, needle: OrcString| {
|
|
let haystack_str = haystack.get_string().await;
|
|
let needle_str = needle.get_string().await;
|
|
let mut haystack_graphs = haystack_str.graphemes(true);
|
|
let mut index = 0;
|
|
loop {
|
|
let mut needle_graphs = needle_str.graphemes(true);
|
|
// check that all chars are equal
|
|
if haystack_graphs.clone().zip(needle_graphs.by_ref()).all(|(l, r)| l == r) {
|
|
// if we exhausted the haystack but not the needle, we can't succeed
|
|
if needle_graphs.next().is_some() {
|
|
break;
|
|
}
|
|
return OrcOpt(Some(Int(index)));
|
|
}
|
|
if haystack_graphs.next().is_none() {
|
|
break;
|
|
}
|
|
index += 1;
|
|
}
|
|
OrcOpt(None)
|
|
}),
|
|
),
|
|
comments(
|
|
[
|
|
"Splits the string into two substrings at the nth grapheme.",
|
|
"|type: Str -> Int -> std::tuple Str Str|",
|
|
],
|
|
fun(true, "split", async |s: OrcString, i: Int| {
|
|
let str = s.get_string().await;
|
|
let Some((i, _)) = str.grapheme_indices(true).nth(i.0.try_into().unwrap()) else {
|
|
let len = str.graphemes(true).count();
|
|
return Err(mk_errv(
|
|
is("Index out of bounds").await,
|
|
format!("Tried to split string at {}, it only has {} graphemes", i.0, len),
|
|
[get_arg(0).pos().await, get_arg(1).pos().await],
|
|
));
|
|
};
|
|
let (left, right) = str.split_at(i);
|
|
Ok(Tpl((
|
|
new_atom(StrAtom::new(Rc::new(left.to_string()))),
|
|
new_atom(StrAtom::new(Rc::new(right.to_string()))),
|
|
)))
|
|
}),
|
|
),
|
|
comments(
|
|
["Returns the nth grapheme.", "|type: Str -> Int -> Str|"],
|
|
fun(true, "char_at", async |s: OrcString, i: Int| {
|
|
let str = s.get_string().await;
|
|
let Some(s) = str.graphemes(true).nth(i.0.try_into().unwrap()) else {
|
|
let len = str.graphemes(true).count();
|
|
return Err(mk_errv(
|
|
is("Index out of bounds").await,
|
|
format!("Tried to read grapheme {} from string, it only has {}", i.0, len),
|
|
[get_arg(0).pos().await, get_arg(1).pos().await],
|
|
));
|
|
};
|
|
Ok(new_atom(StrAtom::new(Rc::new(s.to_string()))))
|
|
}),
|
|
),
|
|
comments(
|
|
[
|
|
"Converts a value to string. This function is used in interpolation. \
|
|
It supports the std::string::to_string protocol in Orchid, \
|
|
the std::string::to_string request in Rust, \
|
|
and expression debug printing as a fallback (print_atom for Atomic implementors in Rust).\n\n\
|
|
This function is infallible.",
|
|
"|type: any -> Str|",
|
|
],
|
|
fun(true, "to_str", async |input: Expr| {
|
|
exec(async move |mut h| {
|
|
if let Ok(atom) = h.exec::<ForeignAtom>(input.clone()).await {
|
|
if let Some(str) = atom.call(ToStringMethod).await {
|
|
return new_atom(StrAtom::new(Rc::new(str)));
|
|
}
|
|
let proto_ref = sym!(std::string::to_string::__protocol_tag__);
|
|
let proto = h.exec(proto_ref).await.expect("This protocol is defined in this system");
|
|
if let Ok(cb) = get_impl(atom.clone(), proto).await {
|
|
return call(cb, atom).await;
|
|
}
|
|
}
|
|
return new_atom(StrAtom::new(Rc::new(fmt(&input).await)));
|
|
})
|
|
.await
|
|
}),
|
|
),
|
|
proto(true, "to_string").finish(),
|
|
]),
|
|
)])
|
|
}
|