Files
orchid/orchid-base/src/format.rs
Lawrence Bethlenfalvy 09cfcb1839 partway towards commands
I got very confused and started mucking about with "spawn" when in fact all I needed was the "inline" extension type in orcx that allows the interpreter to expose custom constants.
2026-03-13 16:48:42 +01:00

384 lines
13 KiB
Rust

use std::borrow::Borrow;
use std::cmp::Ordering;
use std::convert::Infallible;
use std::future::Future;
use std::iter;
use std::marker::PhantomData;
use std::rc::Rc;
use std::str::FromStr;
use futures::future::join_all;
use itertools::{Itertools, chain};
use never::Never;
use regex::Regex;
use crate::{api, match_mapping};
/// A unit of formattable text where the formatter must make a single choice
/// Converting from various types via [Into::into] keeps strings intact, but
/// [str::parse] resolves escape sequences
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
#[must_use]
pub struct FmtUnit {
/// Sub-units
pub subs: Vec<FmtUnit>,
/// Parsed text templates for how to render this text
pub variants: Rc<Variants>,
}
impl FmtUnit {
pub fn new(variants: Rc<Variants>, subs: impl IntoIterator<Item = FmtUnit>) -> Self {
Self { subs: subs.into_iter().collect(), variants }
}
/// Deserialize from message
pub fn from_api(api: &api::FormattingUnit) -> Self {
Self {
subs: api.subs.iter().map(Self::from_api).collect(),
variants: Rc::new(Variants(
(api.variants.iter().map(|var| FmtVariant {
bounded: var.bounded,
elements: var.elements.iter().map(FmtElement::from_api).collect(),
}))
.collect(),
)),
}
}
/// Serialize into message. String interner IDs used in the structure must
/// remain valid.
pub fn to_api(&self) -> api::FormattingUnit {
api::FormattingUnit {
subs: self.subs.iter().map(Self::to_api).collect(),
variants: (self.variants.0.iter().map(|var| api::FormattingVariant {
bounded: var.bounded,
elements: var.elements.iter().map(FmtElement::to_api).collect(),
}))
.collect(),
}
}
/// Shorthand for a variable-length list that can be formatted in exactly one
/// way
pub fn sequence(
head: &str,
delim: &str,
tail: &str,
seq_bnd: bool,
seq: impl IntoIterator<Item = FmtUnit>,
) -> Self {
let items = seq.into_iter().collect_vec();
Variants::default().sequence(items.len(), head, delim, tail, seq_bnd).units_own(items)
}
}
impl<T> From<T> for FmtUnit
where Variants: From<T>
{
fn from(value: T) -> Self { Self { subs: vec![], variants: Rc::new(Variants::from(value)) } }
}
impl FromStr for FmtUnit {
type Err = Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self { subs: vec![], variants: Rc::new(Variants::default().bounded(s)) })
}
}
/// A single element of a format string. Composes into [FmtVariant]
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub enum FmtElement {
/// a reference to an interpolable subunit in the enclosing [FmtUnit]
Sub {
/// Index into [FmtUnit::subs]
slot: u32,
/// Whether the subunit can use an unbounded (`Some(false)`) [FmtVariant],
/// it is restricted to bounded (`Some(true)`) [FmtVariant], or it should
/// inherit this information from the enclosing unit, meaning that the slot
/// is at the very end of the format string
bounded: Option<bool>,
},
/// a string snippet
String(Rc<String>),
/// an indented block
Indent(Vec<FmtElement>),
}
impl FmtElement {
/// Create a plain string snippet
pub fn str(s: &'_ str) -> Self { Self::String(Rc::new(s.to_string())) }
/// Create a slot for a subunit
pub fn sub(slot: u32, bounded: Option<bool>) -> Self { Self::Sub { slot, bounded } }
/// Create a slot for a subunit's bounded representation
pub fn bounded(i: u32) -> Self { Self::sub(i, Some(true)) }
/// Create a slot for any representation of a subunit
pub fn unbounded(i: u32) -> Self { Self::sub(i, Some(false)) }
/// Create an end slot bounded by the enclosing unit if that is bounded
pub fn last(i: u32) -> Self { Self::sub(i, None) }
/// Create a sequence of `len` unbounded slots capped by a slot of the
/// specified boundedness
pub fn sequence(len: usize, bounded: Option<bool>) -> Vec<Self> {
match len.try_into().unwrap() {
0u32 => vec![],
1u32 => vec![FmtElement::sub(0, bounded)],
n => (0..n - 1).map(FmtElement::unbounded).chain([FmtElement::sub(n - 1, bounded)]).collect(),
}
}
/// Decode from a message
pub fn from_api(api: &api::FormattingElement) -> Self {
match_mapping!(api, api::FormattingElement => FmtElement {
Indent(v => v.iter().map(FmtElement::from_api).collect()),
String(s => Rc::new(s.clone())),
Sub{ *slot, *bounded },
})
}
/// Encode to message
pub fn to_api(&self) -> api::FormattingElement {
match_mapping!(self, FmtElement => api::FormattingElement {
Indent(v => v.iter().map(FmtElement::to_api).collect()),
String(s => s.to_string()),
Sub{ *slot, *bounded },
})
}
}
/// A particular way in which a value may be formatted in text.
#[derive(Clone, Debug, Hash, PartialEq, Eq)]
pub struct FmtVariant {
/// Whether this representation has an intrinsic end marker or it needs the
/// parent to provide one
pub bounded: bool,
/// Template string syntax elements
pub elements: Vec<FmtElement>,
}
/// Represents a collection of formatting strings for the same set of parameters
/// from which the formatter can choose within their associated constraints.
///
/// - {0b} can be replaced by any variant of the parameter.
/// - {0} can only be replaced by a bounded variant of the parameter
/// - {0l} causes the current end restriction to be applied to the parameter.
/// This is to be used if the parameter is at the very end of the variant.
#[derive(Clone, Debug, Hash, PartialEq, Eq, Default)]
pub struct Variants(pub Vec<FmtVariant>);
impl Variants {
fn parse_phs(s: &'_ str) -> Vec<FmtElement> {
let re = Regex::new(r"(?<tpl>\{\d+?[bl]?\})|(\{\{)|(\}\})").unwrap();
let matches = re.captures_iter(s);
let slots = matches.into_iter().filter_map(|m| m.name("tpl")).map(|tpl| {
let no_opencurly = tpl.as_str().strip_prefix("{").expect("required by regex");
let maybe_dash = no_opencurly.strip_suffix("}").expect("required by regex");
// we know it's not empty
let last_char = maybe_dash.as_bytes()[maybe_dash.len() - 1] as char;
let (num, bounded) = if !last_char.is_ascii_digit() {
let bounded = match last_char {
'b' => Some(true),
'l' => None,
_ => panic!("Invalid modifier char"),
};
(&maybe_dash[0..maybe_dash.len() - 1], bounded)
} else {
(maybe_dash, Some(false))
};
let idx = num.parse::<u32>().expect("Decimal digits required by regex");
(tpl.range(), idx, bounded)
});
(iter::once(None).chain(slots.into_iter().map(Some)).chain([None]).tuple_windows())
.flat_map(|(l, r)| {
let string = match (l, &r) {
(None, Some((r, ..))) => &s[..r.start],
(Some((r1, ..)), Some((r2, ..))) => &s[r1.end..r2.start],
(Some((r, ..)), None) => &s[r.end..],
(None, None) => s,
};
let str_item = FmtElement::String(Rc::new(string.replace("{{", "{").replace("}}", "}")));
match r {
None => itertools::Either::Left([str_item]),
Some((_, idx, bounded)) =>
itertools::Either::Right([str_item, FmtElement::Sub { slot: idx, bounded }]),
}
.into_iter()
})
.coalesce(|left, right| match (left, right) {
(FmtElement::String(left), FmtElement::String(right)) =>
Ok(FmtElement::String(Rc::new(left.to_string() + right.as_str()))),
tuple => Err(tuple),
})
.collect_vec()
}
fn parse(s: &'_ str) -> Vec<FmtElement> {
let mut lines = s.lines();
let Some(mut cur) = lines.next() else { return vec![] };
return indent_blk(&mut cur, &mut lines, 0);
fn indent_blk<'a>(
cur: &mut &'a str,
lines: &mut impl Iterator<Item = &'a str>,
blk_lv: usize,
) -> Vec<FmtElement> {
let mut out = Vec::new();
loop {
let line_lv = cur.chars().take_while(|c| *c == '\t').count();
match line_lv.cmp(&blk_lv) {
Ordering::Greater => out.push(FmtElement::Indent(indent_blk(cur, lines, blk_lv + 1))),
Ordering::Equal => out.extend(Variants::parse_phs(&cur[blk_lv..])),
Ordering::Less => return out,
}
match lines.next() {
Some(line) => *cur = line,
None => return out,
}
}
}
}
fn add(&mut self, bounded: bool, s: &'_ str) {
self.0.push(FmtVariant { bounded, elements: Self::parse(s) })
}
/// This option is available in all positions.
/// See [Variants] for a description of the format strings
pub fn bounded(mut self, s: &'_ str) -> Self {
self.add(true, s);
self
}
/// This option is only available in positions immediately preceding the end
/// of the sequence or a parenthesized subsequence.
/// See [Variants] for a description of the format strings
pub fn unbounded(mut self, s: &'_ str) -> Self {
self.add(false, s);
self
}
/// Produces formatting options for `len` parameters separated by `delim`.
/// `seq_bnd` indicates whether `delim` and `tail` can unambiguously indicate
/// the end of a subsequence. For consistency, the stricter of the two is
/// expected to be used
pub fn sequence(
mut self,
len: usize,
head: &str,
delim: &str,
tail: &str,
seq_bnd: bool,
) -> Self {
let seq = chain!(
[FmtElement::str(head)],
Itertools::intersperse(
FmtElement::sequence(len, Some(seq_bnd)).into_iter(),
FmtElement::str(delim),
),
[FmtElement::str(tail)],
);
self.0.push(FmtVariant { bounded: true, elements: seq.collect_vec() });
self
}
/// Pair the slots with subunits to produce a [FmtUnit]
pub fn units_own(self, subs: impl IntoIterator<Item = FmtUnit>) -> FmtUnit {
FmtUnit::new(Rc::new(self), subs)
}
/// Pair the slots with subunits to produce a [FmtUnit] by reference. These
/// objects should preferably be thread-locally cached whenever possible.
pub fn units(self: &Rc<Self>, subs: impl IntoIterator<Item = FmtUnit>) -> FmtUnit {
FmtUnit::new(self.clone(), subs)
}
}
impl From<Rc<String>> for Variants {
fn from(value: Rc<String>) -> Self {
Self(vec![FmtVariant { elements: vec![FmtElement::String(value)], bounded: true }])
}
}
impl From<String> for Variants {
fn from(value: String) -> Self { Self::from(Rc::new(value)) }
}
impl From<&str> for Variants {
fn from(value: &str) -> Self { Self::from(value.to_string()) }
}
impl FromStr for Variants {
type Err = Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> { Ok(Self::default().bounded(s)) }
}
fn indent_str(s: &str, indent: u16) -> String {
s.replace("\n", &format!("\n{}", "\t".repeat(indent.into())))
}
fn fill_slots<'a, 'b>(
elements: impl IntoIterator<Item = &'a FmtElement>,
values: &[FmtUnit],
indent: u16,
last_bounded: bool,
) -> String {
elements
.into_iter()
.map(|el| match el {
FmtElement::String(s) => indent_str(s, indent),
FmtElement::Sub { slot, bounded } =>
indent_str(&take_first(&values[*slot as usize], bounded.unwrap_or(last_bounded)), indent),
FmtElement::Indent(elements) => fill_slots(elements, values, indent + 1, last_bounded),
})
.collect()
}
/// The simplest possible print strategy
pub fn take_first(unit: &FmtUnit, bounded: bool) -> String {
let first = unit.variants.0.iter().find(|v| v.bounded || bounded).expect("No bounded variant!");
fill_slots(&first.elements, &unit.subs, 0, bounded)
}
pub async fn take_first_fmt(v: &(impl Format + ?Sized)) -> String {
take_first(&v.print(&FmtCtxImpl { _foo: PhantomData }).await, false)
}
/// [Default] this if you need one
#[derive(Default)]
pub struct FmtCtxImpl<'a> {
_foo: PhantomData<&'a ()>,
}
/// Additional settings to the formatter. Implemented by [FmtCtxImpl]. Currently
/// not in use
pub trait FmtCtx {}
impl FmtCtx for FmtCtxImpl<'_> {}
/// A value that can be formatted into a string with multiple possible forms
pub trait Format {
#[must_use]
fn print<'a>(&'a self, c: &'a (impl FmtCtx + ?Sized + 'a)) -> impl Future<Output = FmtUnit> + 'a;
}
impl Format for Never {
async fn print<'a>(&'a self, _c: &'a (impl FmtCtx + ?Sized + 'a)) -> FmtUnit { match *self {} }
}
/// Format with default strategy. Currently equal to [take_first_fmt]
pub async fn fmt(v: &(impl Format + ?Sized)) -> String { take_first_fmt(v).await }
/// Format a sequence with default strategy. Currently equal to [take_first_fmt]
pub async fn fmt_v<F: Format + ?Sized>(
v: impl IntoIterator<Item: Borrow<F>>,
) -> impl Iterator<Item = String> {
join_all(v.into_iter().map(|f| async move { take_first_fmt(f.borrow()).await })).await.into_iter()
}
#[cfg(test)]
mod test {
use std::rc::Rc;
use crate::format::{FmtElement, FmtUnit, FmtVariant, Variants, take_first};
#[test]
fn variants_parse_test() {
let vars = Rc::new(Variants::default().bounded("({{{0}}})"));
let expected_vars = Rc::new(Variants(vec![FmtVariant {
bounded: true,
elements: vec![
FmtElement::String(Rc::new("({".to_string())),
FmtElement::Sub { bounded: Some(false), slot: 0 },
FmtElement::String(Rc::new("})".to_string())),
],
}]));
assert_eq!(vars.as_ref(), expected_vars.as_ref());
let unit = vars.units(["1".into()]);
assert_eq!(unit, FmtUnit {
subs: vec![FmtUnit {
subs: vec![],
variants: Rc::new(Variants(vec![FmtVariant {
bounded: true,
elements: vec![FmtElement::String(Rc::new("1".to_string()))]
}]))
}],
variants: expected_vars
});
let str = take_first(&unit, true);
assert_eq!(str, "({1})");
}
}