Files
orchid/orchid-std/src/std/string/str_lib.rs
Lawrence Bethlenfalvy 09cfcb1839 partway towards commands
I got very confused and started mucking about with "spawn" when in fact all I needed was the "inline" extension type in orcx that allows the interpreter to expose custom constants.
2026-03-13 16:48:42 +01:00

171 lines
6.2 KiB
Rust

use std::rc::Rc;
use orchid_base::{fmt, is, mk_errv, sym};
use orchid_extension::ForeignAtom;
use orchid_extension::coroutine_exec::exec;
use orchid_extension::expr::Expr;
use orchid_extension::func_atom::get_arg;
use orchid_extension::gen_expr::{call, new_atom};
use orchid_extension::tree::{GenMember, comments, fun, prefix};
use unicode_segmentation::UnicodeSegmentation;
use super::str_atom::StrAtom;
use crate::std::protocol::types::{get_impl, proto};
use crate::std::string::to_string::ToStringMethod;
use crate::{Int, OrcOpt, OrcString, Tpl};
pub fn gen_str_lib() -> Vec<GenMember> {
prefix("std", [comments(
["There are two string types, IntStr and Str. Literals are always IntStr, which are quick to \
equality-compare but may leak, so you can't generally create them at runtime.\n\n\
All functions here operate on Unicode graphemes. This essentially means that letters with \
added diacritics and Mandarin multi-codepoint characters are treated as a single character."],
prefix("string", [
comments(
["Concatenate two strings", "|type: Str -> Str -> Str|"],
fun(true, "concat", async |left: OrcString, right: OrcString| {
new_atom(StrAtom::new(Rc::new(
left.get_string().await.to_string() + &right.get_string().await,
)))
}),
),
comments(
[
"Find the size of a string in bytes. Strings are stored in UTF-8. \
This should be used to determine the computational resource utilization of strings. \
It should not be used to determine whether to truncate text.",
"|type: Str -> Int|",
],
fun(true, "size", async |s: OrcString| Int(s.get_string().await.len().try_into().unwrap())),
),
comments(
[
"Find the number of characters in a string. This can be used for example to \
truncate text. It should not be used to limit the size of messages for security purposes.",
"|type: Str -> Int|",
],
fun(true, "len", async |s: OrcString| {
Int(s.get_string().await.graphemes(true).count().try_into().unwrap())
}),
),
comments(
[
"Takes a string, a start and a length in graphemes. \
Slices out the specified subsection of the string.",
"|type: Str -> Int -> Int -> Str|",
],
fun(true, "slice", async |s: OrcString, Int(start): Int, Int(len): Int| {
let str = s.get_string().await;
if len <= 0 {
return Ok(new_atom(StrAtom::new(Rc::default())));
}
let mut substr_iter = str.graphemes(true).skip(start.try_into().unwrap());
let new_str: String =
substr_iter.by_ref().take(usize::try_from(len).unwrap() - 1).collect();
let Some(s) = substr_iter.next() else {
let str_len = str.graphemes(true).count();
return Err(mk_errv(
is("Index out of bounds").await,
format!("Tried to select grapheme {start}+{len} from string that only has {str_len}"),
[get_arg(0).pos().await, get_arg(1).pos().await, get_arg(2).pos().await],
));
};
Ok(new_atom(StrAtom::new(Rc::new(new_str + s))))
}),
),
comments(
[
"If the first string contains the second then returns the index.",
"|type: Str -> Str -> std::option Int|",
],
fun(true, "find", async |haystack: OrcString, needle: OrcString| {
let haystack_str = haystack.get_string().await;
let needle_str = needle.get_string().await;
let mut haystack_graphs = haystack_str.graphemes(true);
let mut index = 0;
loop {
let mut needle_graphs = needle_str.graphemes(true);
// check that all chars are equal
if haystack_graphs.clone().zip(needle_graphs.by_ref()).all(|(l, r)| l == r) {
// if we exhausted the haystack but not the needle, we can't succeed
if needle_graphs.next().is_some() {
break;
}
return OrcOpt(Some(Int(index)));
}
if haystack_graphs.next().is_none() {
break;
}
index += 1;
}
OrcOpt(None)
}),
),
comments(
[
"Splits the string into two substrings at the nth grapheme.",
"|type: Str -> Int -> std::tuple Str Str|",
],
fun(true, "split", async |s: OrcString, i: Int| {
let str = s.get_string().await;
let Some((i, _)) = str.grapheme_indices(true).nth(i.0.try_into().unwrap()) else {
let len = str.graphemes(true).count();
return Err(mk_errv(
is("Index out of bounds").await,
format!("Tried to split string at {}, it only has {} graphemes", i.0, len),
[get_arg(0).pos().await, get_arg(1).pos().await],
));
};
let (left, right) = str.split_at(i);
Ok(Tpl((
new_atom(StrAtom::new(Rc::new(left.to_string()))),
new_atom(StrAtom::new(Rc::new(right.to_string()))),
)))
}),
),
comments(
["Returns the nth grapheme.", "|type: Str -> Int -> Str|"],
fun(true, "char_at", async |s: OrcString, i: Int| {
let str = s.get_string().await;
let Some(s) = str.graphemes(true).nth(i.0.try_into().unwrap()) else {
let len = str.graphemes(true).count();
return Err(mk_errv(
is("Index out of bounds").await,
format!("Tried to read grapheme {} from string, it only has {}", i.0, len),
[get_arg(0).pos().await, get_arg(1).pos().await],
));
};
Ok(new_atom(StrAtom::new(Rc::new(s.to_string()))))
}),
),
comments(
[
"Converts a value to string. This function is used in interpolation. \
It supports the std::string::to_string protocol in Orchid, \
the std::string::to_string request in Rust, \
and expression debug printing as a fallback (print_atom for Atomic implementors in Rust).\n\n\
This function is infallible.",
"|type: any -> Str|",
],
fun(true, "to_str", async |input: Expr| {
exec(async move |mut h| {
if let Ok(atom) = h.exec::<ForeignAtom>(input.clone()).await {
if let Some(str) = atom.call(ToStringMethod).await {
return new_atom(StrAtom::new(Rc::new(str)));
}
let proto_ref = sym!(std::string::to_string::__protocol_tag__);
let proto = h.exec(proto_ref).await.expect("This protocol is defined in this system");
if let Ok(cb) = get_impl(atom.clone(), proto).await {
return call(cb, atom).await;
}
}
return new_atom(StrAtom::new(Rc::new(fmt(&input).await)));
})
.await
}),
),
proto(true, "to_string").finish(),
]),
)])
}