use std::rc::Rc; use orchid_base::{fmt, is, mk_errv, sym}; use orchid_extension::gen_expr::{call, new_atom}; use orchid_extension::tree::{GenMember, comments, fun, prefix}; use orchid_extension::{Expr, ForeignAtom, exec, get_arg}; use unicode_segmentation::UnicodeSegmentation; use super::str_atom::StrAtom; use crate::std::protocol::types::{get_impl, proto}; use crate::std::string::to_string::ToStringMethod; use crate::{Int, OrcOpt, OrcString, Tpl}; pub fn gen_str_lib() -> Vec { prefix("std", [comments( ["There are two string types, IntStr and Str. Literals are always IntStr, which are quick to \ equality-compare but may leak, so you can't generally create them at runtime.\n\n\ All functions here operate on Unicode graphemes. This essentially means that letters with \ added diacritics and Mandarin multi-codepoint characters are treated as a single character."], prefix("string", [ comments( ["Concatenate two strings", "|type: Str -> Str -> Str|"], fun(true, "concat", async |left: OrcString, right: OrcString| { new_atom(StrAtom::new(Rc::new( left.get_string().await.to_string() + &right.get_string().await, ))) }), ), comments( [ "Find the size of a string in bytes. Strings are stored in UTF-8. \ This should be used to determine the computational resource utilization of strings. \ It should not be used to determine whether to truncate text.", "|type: Str -> Int|", ], fun(true, "size", async |s: OrcString| Int(s.get_string().await.len().try_into().unwrap())), ), comments( [ "Find the number of characters in a string. This can be used for example to \ truncate text. It should not be used to limit the size of messages for security purposes.", "|type: Str -> Int|", ], fun(true, "len", async |s: OrcString| { Int(s.get_string().await.graphemes(true).count().try_into().unwrap()) }), ), comments( [ "Takes a string, a start and a length in graphemes. \ Slices out the specified subsection of the string.", "|type: Str -> Int -> Int -> Str|", ], fun(true, "slice", async |s: OrcString, Int(start): Int, Int(len): Int| { let str = s.get_string().await; if len <= 0 { return Ok(new_atom(StrAtom::new(Rc::default()))); } let mut substr_iter = str.graphemes(true).skip(start.try_into().unwrap()); let new_str: String = substr_iter.by_ref().take(usize::try_from(len).unwrap() - 1).collect(); let Some(s) = substr_iter.next() else { let str_len = str.graphemes(true).count(); return Err(mk_errv( is("Index out of bounds").await, format!("Tried to select grapheme {start}+{len} from string that only has {str_len}"), [get_arg(0).pos().await, get_arg(1).pos().await, get_arg(2).pos().await], )); }; Ok(new_atom(StrAtom::new(Rc::new(new_str + s)))) }), ), comments( [ "If the first string contains the second then returns the index.", "|type: Str -> Str -> std::option Int|", ], fun(true, "find", async |haystack: OrcString, needle: OrcString| { let haystack_str = haystack.get_string().await; let needle_str = needle.get_string().await; let mut haystack_graphs = haystack_str.graphemes(true); let mut index = 0; loop { let mut needle_graphs = needle_str.graphemes(true); // check that all chars are equal if haystack_graphs.clone().zip(needle_graphs.by_ref()).all(|(l, r)| l == r) { // if we exhausted the haystack but not the needle, we can't succeed if needle_graphs.next().is_some() { break; } return OrcOpt(Some(Int(index))); } if haystack_graphs.next().is_none() { break; } index += 1; } OrcOpt(None) }), ), comments( [ "Splits the string into two substrings at the nth grapheme.", "|type: Str -> Int -> std::tuple Str Str|", ], fun(true, "split", async |s: OrcString, i: Int| { let str = s.get_string().await; let Some((i, _)) = str.grapheme_indices(true).nth(i.0.try_into().unwrap()) else { let len = str.graphemes(true).count(); return Err(mk_errv( is("Index out of bounds").await, format!("Tried to split string at {}, it only has {} graphemes", i.0, len), [get_arg(0).pos().await, get_arg(1).pos().await], )); }; let (left, right) = str.split_at(i); Ok(Tpl(( new_atom(StrAtom::new(Rc::new(left.to_string()))), new_atom(StrAtom::new(Rc::new(right.to_string()))), ))) }), ), comments( ["Returns the nth grapheme.", "|type: Str -> Int -> Str|"], fun(true, "char_at", async |s: OrcString, i: Int| { let str = s.get_string().await; let Some(s) = str.graphemes(true).nth(i.0.try_into().unwrap()) else { let len = str.graphemes(true).count(); return Err(mk_errv( is("Index out of bounds").await, format!("Tried to read grapheme {} from string, it only has {}", i.0, len), [get_arg(0).pos().await, get_arg(1).pos().await], )); }; Ok(new_atom(StrAtom::new(Rc::new(s.to_string())))) }), ), comments( [ "Converts a value to string. This function is used in interpolation. \ It supports the std::string::to_string protocol in Orchid, \ the std::string::to_string request in Rust, \ and expression debug printing as a fallback (print_atom for Atomic implementors in Rust).\n\n\ This function is infallible.", "|type: any -> Str|", ], fun(true, "to_str", async |input: Expr| { exec(async move |mut h| { if let Ok(atom) = h.exec::(input.clone()).await { if let Some(str) = atom.call(ToStringMethod).await { return new_atom(StrAtom::new(Rc::new(str))); } let proto_ref = sym!(std::string::to_string::__protocol_tag__); let proto = h.exec(proto_ref).await.expect("This protocol is defined in this system"); if let Ok(cb) = get_impl(atom.clone(), proto).await { return call(cb, atom).await; } } return new_atom(StrAtom::new(Rc::new(fmt(&input).await))); }) .await }), ), proto(true, "to_string").finish(), ]), )]) }