This commit is contained in:
@@ -1,50 +1,174 @@
|
||||
use std::rc::Rc;
|
||||
|
||||
use orchid_base::error::mk_errv;
|
||||
use orchid_base::format::fmt;
|
||||
use orchid_base::interner::is;
|
||||
use orchid_base::sym;
|
||||
use orchid_extension::atom::ForeignAtom;
|
||||
use orchid_extension::conv::ToExpr;
|
||||
use orchid_extension::coroutine_exec::exec;
|
||||
use orchid_extension::expr::Expr;
|
||||
use orchid_extension::gen_expr::{call, sym_ref};
|
||||
use orchid_extension::func_atom::get_arg;
|
||||
use orchid_extension::gen_expr::{call, new_atom, sym_ref};
|
||||
use orchid_extension::tree::{GenMember, comments, fun, prefix};
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
use super::str_atom::StrAtom;
|
||||
use crate::OrcString;
|
||||
use crate::std::protocol::types::{get_impl, proto};
|
||||
use crate::std::string::to_string::ToStringMethod;
|
||||
use crate::{Int, OrcOpt, OrcString, Tpl};
|
||||
|
||||
pub fn gen_str_lib() -> Vec<GenMember> {
|
||||
prefix("std::string", [
|
||||
comments(
|
||||
["Concatenate two strings"],
|
||||
fun(true, "concat", async |left: OrcString, right: OrcString| {
|
||||
StrAtom::new(Rc::new(left.get_string().await.to_string() + &right.get_string().await))
|
||||
}),
|
||||
),
|
||||
comments(
|
||||
["Converts a value to string. This function is used in interpolation. \
|
||||
It supports the std::string::to_string protocol in Orchid, \
|
||||
the std::string::to_string request in Rust, \
|
||||
and expression debug printing as a fallback (print_atom for Atomic implementors in Rust).\n\n\
|
||||
This function is infallible."],
|
||||
fun(true, "to_str", async |input: Expr| {
|
||||
exec(async move |mut h| {
|
||||
if let Ok(atom) = h.exec::<ForeignAtom>(input.clone()).await {
|
||||
if let Some(str) = atom.request(ToStringMethod).await {
|
||||
return StrAtom::new(Rc::new(str)).to_gen().await;
|
||||
}
|
||||
let proto_ref = sym_ref(sym!(std::string::to_string::__protocol_tag__));
|
||||
let proto = h.exec(proto_ref).await.expect("This protocol is defined in this system");
|
||||
if let Ok(cb) = get_impl(atom.clone(), proto).await {
|
||||
return call(cb.to_gen().await, [atom.to_gen().await]).to_gen().await;
|
||||
}
|
||||
prefix("std", [comments(
|
||||
["There are two string types, IntStr and Str. Literals are always IntStr, which are quick to \
|
||||
equality-compare but may leak, so you can't generally create them at runtime.\n\n\
|
||||
All functions here operate on Unicode graphemes. This essentially means that letters with \
|
||||
added diacritics and Mandarin multi-codepoint characters are treated as a single character."],
|
||||
prefix("string", [
|
||||
comments(
|
||||
["Concatenate two strings", "|type: Str -> Str -> Str|"],
|
||||
fun(true, "concat", async |left: OrcString, right: OrcString| {
|
||||
new_atom(StrAtom::new(Rc::new(
|
||||
left.get_string().await.to_string() + &right.get_string().await,
|
||||
)))
|
||||
}),
|
||||
),
|
||||
comments(
|
||||
[
|
||||
"Find the size of a string in bytes. Strings are stored in UTF-8. \
|
||||
This should be used to determine the computational resource utilization of strings. \
|
||||
It should not be used to determine whether to truncate text.",
|
||||
"|type: Str -> Int|",
|
||||
],
|
||||
fun(true, "size", async |s: OrcString| Int(s.get_string().await.len().try_into().unwrap())),
|
||||
),
|
||||
comments(
|
||||
[
|
||||
"Find the number of characters in a string. This can be used for example to \
|
||||
truncate text. It should not be used to limit the size of messages for security purposes.",
|
||||
"|type: Str -> Int|",
|
||||
],
|
||||
fun(true, "len", async |s: OrcString| {
|
||||
Int(s.get_string().await.graphemes(true).count().try_into().unwrap())
|
||||
}),
|
||||
),
|
||||
comments(
|
||||
[
|
||||
"Takes a string, a start and a length in graphemes. \
|
||||
Slices out the specified subsection of the string.",
|
||||
"|type: Str -> Int -> Int -> Str|",
|
||||
],
|
||||
fun(true, "slice", async |s: OrcString, Int(start): Int, Int(len): Int| {
|
||||
let str = s.get_string().await;
|
||||
if len <= 0 {
|
||||
return Ok(new_atom(StrAtom::new(Rc::default())));
|
||||
}
|
||||
return StrAtom::new(Rc::new(fmt(&input).await)).to_gen().await;
|
||||
})
|
||||
.await
|
||||
}),
|
||||
),
|
||||
proto(true, "to_string").finish(),
|
||||
])
|
||||
let mut substr_iter = str.graphemes(true).skip(start.try_into().unwrap());
|
||||
let new_str: String =
|
||||
substr_iter.by_ref().take(usize::try_from(len).unwrap() - 1).collect();
|
||||
let Some(s) = substr_iter.next() else {
|
||||
let str_len = str.graphemes(true).count();
|
||||
return Err(mk_errv(
|
||||
is("Index out of bounds").await,
|
||||
format!("Tried to select grapheme {start}+{len} from string that only has {str_len}"),
|
||||
[get_arg(0).pos().await, get_arg(1).pos().await, get_arg(2).pos().await],
|
||||
));
|
||||
};
|
||||
Ok(new_atom(StrAtom::new(Rc::new(new_str + s))))
|
||||
}),
|
||||
),
|
||||
comments(
|
||||
[
|
||||
"If the first string contains the second then returns the index.",
|
||||
"|type: Str -> Str -> std::option Int|",
|
||||
],
|
||||
fun(true, "find", async |haystack: OrcString, needle: OrcString| {
|
||||
let haystack_str = haystack.get_string().await;
|
||||
let needle_str = needle.get_string().await;
|
||||
let mut haystack_graphs = haystack_str.graphemes(true);
|
||||
let mut index = 0;
|
||||
loop {
|
||||
let mut needle_graphs = needle_str.graphemes(true);
|
||||
// check that all chars are equal
|
||||
if haystack_graphs.clone().zip(needle_graphs.by_ref()).all(|(l, r)| l == r) {
|
||||
// if we exhausted the haystack but not the needle, we can't succeed
|
||||
if needle_graphs.next().is_some() {
|
||||
break;
|
||||
}
|
||||
return OrcOpt(Some(Int(index)));
|
||||
}
|
||||
if haystack_graphs.next().is_none() {
|
||||
break;
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
OrcOpt(None)
|
||||
}),
|
||||
),
|
||||
comments(
|
||||
[
|
||||
"Splits the string into two substrings at the nth grapheme.",
|
||||
"|type: Str -> Int -> std::tuple Str Str|",
|
||||
],
|
||||
fun(true, "split", async |s: OrcString, i: Int| {
|
||||
let str = s.get_string().await;
|
||||
let Some((i, _)) = str.grapheme_indices(true).nth(i.0.try_into().unwrap()) else {
|
||||
let len = str.graphemes(true).count();
|
||||
return Err(mk_errv(
|
||||
is("Index out of bounds").await,
|
||||
format!("Tried to split string at {}, it only has {} graphemes", i.0, len),
|
||||
[get_arg(0).pos().await, get_arg(1).pos().await],
|
||||
));
|
||||
};
|
||||
let (left, right) = str.split_at(i);
|
||||
Ok(Tpl((
|
||||
new_atom(StrAtom::new(Rc::new(left.to_string()))),
|
||||
new_atom(StrAtom::new(Rc::new(right.to_string()))),
|
||||
)))
|
||||
}),
|
||||
),
|
||||
comments(
|
||||
["Returns the nth grapheme.", "|type: Str -> Int -> Str|"],
|
||||
fun(true, "char_at", async |s: OrcString, i: Int| {
|
||||
let str = s.get_string().await;
|
||||
let Some(s) = str.graphemes(true).nth(i.0.try_into().unwrap()) else {
|
||||
let len = str.graphemes(true).count();
|
||||
return Err(mk_errv(
|
||||
is("Index out of bounds").await,
|
||||
format!("Tried to read grapheme {} from string, it only has {}", i.0, len),
|
||||
[get_arg(0).pos().await, get_arg(1).pos().await],
|
||||
));
|
||||
};
|
||||
Ok(new_atom(StrAtom::new(Rc::new(s.to_string()))))
|
||||
}),
|
||||
),
|
||||
comments(
|
||||
[
|
||||
"Converts a value to string. This function is used in interpolation. \
|
||||
It supports the std::string::to_string protocol in Orchid, \
|
||||
the std::string::to_string request in Rust, \
|
||||
and expression debug printing as a fallback (print_atom for Atomic implementors in Rust).\n\n\
|
||||
This function is infallible.",
|
||||
"|type: any -> Str|",
|
||||
],
|
||||
fun(true, "to_str", async |input: Expr| {
|
||||
exec(async move |mut h| {
|
||||
if let Ok(atom) = h.exec::<ForeignAtom>(input.clone()).await {
|
||||
if let Some(str) = atom.request(ToStringMethod).await {
|
||||
return new_atom(StrAtom::new(Rc::new(str)));
|
||||
}
|
||||
let proto_ref = sym_ref(sym!(std::string::to_string::__protocol_tag__));
|
||||
let proto = h.exec(proto_ref).await.expect("This protocol is defined in this system");
|
||||
if let Ok(cb) = get_impl(atom.clone(), proto).await {
|
||||
return call(cb.to_gen().await, [atom.to_gen().await]).to_gen().await;
|
||||
}
|
||||
}
|
||||
return new_atom(StrAtom::new(Rc::new(fmt(&input).await)));
|
||||
})
|
||||
.await
|
||||
}),
|
||||
),
|
||||
proto(true, "to_string").finish(),
|
||||
]),
|
||||
)])
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user