Interning Orchid string literals

This commit is contained in:
2023-08-19 14:35:24 +01:00
parent 0b887ced70
commit 6693d93944
10 changed files with 96 additions and 29 deletions

View File

@@ -185,12 +185,12 @@ fn paren_parser(lp: char, rp: char) -> impl SimpleParser<char, Lexeme> {
just(lp).to(Lexeme::LP(lp)).or(just(rp).to(Lexeme::RP(lp))) just(lp).to(Lexeme::LP(lp)).or(just(rp).to(Lexeme::RP(lp)))
} }
pub fn literal_parser() -> impl SimpleParser<char, Literal> { pub fn literal_parser<'a>(ctx: impl Context + 'a) -> impl SimpleParser<char, Literal> + 'a {
choice(( choice((
// all ints are valid floats so it takes precedence // all ints are valid floats so it takes precedence
number::int_parser().map(Literal::Uint), number::int_parser().map(Literal::Uint),
number::float_parser().map(Literal::Num), number::float_parser().map(Literal::Num),
string::str_parser().map(Literal::Str), string::str_parser().map(move |s| Literal::Str(ctx.interner().i(&s).into())),
)) ))
} }
@@ -229,7 +229,7 @@ pub fn lexer<'a>(
just(':').to(Lexeme::Type), just(':').to(Lexeme::Type),
just('\n').to(Lexeme::BR), just('\n').to(Lexeme::BR),
just('.').to(Lexeme::Dot), just('.').to(Lexeme::Dot),
literal_parser().map(Lexeme::Literal), literal_parser(ctx.clone()).map(Lexeme::Literal),
name::name_parser(&all_ops).map({ name::name_parser(&all_ops).map({
let ctx = ctx.clone(); let ctx = ctx.clone();
move |n| Lexeme::Name(ctx.interner().i(&n)) move |n| Lexeme::Name(ctx.interner().i(&n))

View File

@@ -2,6 +2,8 @@ use std::fmt::Debug;
use ordered_float::NotNan; use ordered_float::NotNan;
use super::OrcString;
/// Exact values read from the AST which have a shared meaning recognized by all /// Exact values read from the AST which have a shared meaning recognized by all
/// external functions /// external functions
#[derive(Clone, PartialEq, Eq, Hash)] #[derive(Clone, PartialEq, Eq, Hash)]
@@ -11,7 +13,7 @@ pub enum Literal {
/// An unsigned integer; a size, index or pointer /// An unsigned integer; a size, index or pointer
Uint(u64), Uint(u64),
/// A utf-8 character sequence /// A utf-8 character sequence
Str(String), Str(OrcString),
} }
impl Debug for Literal { impl Debug for Literal {
@@ -36,6 +38,6 @@ impl From<u64> for Literal {
} }
impl From<String> for Literal { impl From<String> for Literal {
fn from(value: String) -> Self { fn from(value: String) -> Self {
Self::Str(value) Self::Str(value.into())
} }
} }

View File

@@ -13,8 +13,10 @@ pub mod primitive;
pub mod project; pub mod project;
pub mod sourcefile; pub mod sourcefile;
pub mod tree; pub mod tree;
mod string;
pub use const_tree::{from_const_tree, ConstTree}; pub use const_tree::{from_const_tree, ConstTree};
pub use string::OrcString;
pub use literal::Literal; pub use literal::Literal;
pub use location::Location; pub use location::Location;
pub use namelike::{NameLike, Sym, VName}; pub use namelike::{NameLike, Sym, VName};

View File

@@ -0,0 +1,55 @@
use std::hash::Hash;
use std::ops::Deref;
use std::rc::Rc;
use crate::Tok;
#[derive(Clone, Debug, Eq)]
pub enum OrcString {
Interned(Tok<String>),
Runtime(Rc<String>),
}
impl OrcString {
pub fn get_string(&self) -> String {
self.as_str().to_owned()
}
}
impl Deref for OrcString {
type Target = String;
fn deref(&self) -> &Self::Target {
match self {
Self::Interned(t) => t,
Self::Runtime(r) => r,
}
}
}
impl Hash for OrcString {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.as_str().hash(state)
}
}
impl From<String> for OrcString {
fn from(value: String) -> Self {
Self::Runtime(Rc::new(value))
}
}
impl From<Tok<String>> for OrcString {
fn from(value: Tok<String>) -> Self {
Self::Interned(value)
}
}
impl PartialEq for OrcString {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Interned(t1), Self::Interned(t2)) => t1 == t2,
_ => **self == **other,
}
}
}

View File

@@ -8,7 +8,7 @@ use super::assertion_error::AssertionError;
use crate::foreign::{Atomic, ExternError}; use crate::foreign::{Atomic, ExternError};
use crate::interpreted::Clause; use crate::interpreted::Clause;
use crate::representations::interpreted::ExprInst; use crate::representations::interpreted::ExprInst;
use crate::representations::Literal; use crate::representations::{Literal, OrcString};
use crate::Primitive; use crate::Primitive;
/// Tries to cast the [ExprInst] as a [Literal], calls the provided function on /// Tries to cast the [ExprInst] as a [Literal], calls the provided function on
@@ -25,7 +25,7 @@ pub fn with_lit<T>(
/// Like [with_lit] but also unwraps [Literal::Str] /// Like [with_lit] but also unwraps [Literal::Str]
pub fn with_str<T>( pub fn with_str<T>(
x: &ExprInst, x: &ExprInst,
predicate: impl FnOnce(&String) -> Result<T, Rc<dyn ExternError>>, predicate: impl FnOnce(&OrcString) -> Result<T, Rc<dyn ExternError>>,
) -> Result<T, Rc<dyn ExternError>> { ) -> Result<T, Rc<dyn ExternError>> {
with_lit(x, |l| { with_lit(x, |l| {
if let Literal::Str(s) = l { if let Literal::Str(s) = l {
@@ -93,7 +93,7 @@ impl TryFrom<&ExprInst> for Literal {
} }
} }
impl TryFrom<&ExprInst> for String { impl TryFrom<&ExprInst> for OrcString {
type Error = Rc<dyn ExternError>; type Error = Rc<dyn ExternError>;
fn try_from(value: &ExprInst) -> Result<Self, Self::Error> { fn try_from(value: &ExprInst) -> Result<Self, Self::Error> {

View File

@@ -4,6 +4,7 @@ use super::instances::{
}; };
use crate::foreign::cps_box::init_cps; use crate::foreign::cps_box::init_cps;
use crate::foreign::{Atom, Atomic}; use crate::foreign::{Atom, Atomic};
use crate::representations::OrcString;
use crate::systems::stl::Binary; use crate::systems::stl::Binary;
use crate::systems::RuntimeError; use crate::systems::RuntimeError;
use crate::{ast, define_fn, ConstTree, Interner, Primitive}; use crate::{ast, define_fn, ConstTree, Interner, Primitive};
@@ -53,9 +54,9 @@ define_fn! {
define_fn! { define_fn! {
WriteStr { WriteStr {
stream: SinkHandle, stream: SinkHandle,
string: String string: OrcString
} => Ok(init_cps(3, IOCmdHandlePack { } => Ok(init_cps(3, IOCmdHandlePack {
cmd: WriteCmd::WStr(string.clone()), cmd: WriteCmd::WStr(string.get_string()),
handle: *stream, handle: *stream,
})) }))
} }

View File

@@ -105,7 +105,7 @@ impl IOHandler<ReadCmd> for (ExprInst, ExprInst) {
ReadResult::RBin(_, Ok(bytes)) => ReadResult::RBin(_, Ok(bytes)) =>
call(succ, vec![Binary(Arc::new(bytes)).atom_cls().wrap()]).wrap(), call(succ, vec![Binary(Arc::new(bytes)).atom_cls().wrap()]).wrap(),
ReadResult::RStr(_, Ok(text)) => ReadResult::RStr(_, Ok(text)) =>
call(succ, vec![Literal::Str(text).into()]).wrap(), call(succ, vec![Literal::Str(text.into()).into()]).wrap(),
} }
} }
} }

View File

@@ -4,6 +4,7 @@ use ordered_float::NotNan;
use super::ArithmeticError; use super::ArithmeticError;
use crate::foreign::ExternError; use crate::foreign::ExternError;
use crate::interner::Interner; use crate::interner::Interner;
use crate::interpreted::Clause;
use crate::parse::{float_parser, int_parser}; use crate::parse::{float_parser, int_parser};
use crate::systems::cast_exprinst::with_lit; use crate::systems::cast_exprinst::with_lit;
use crate::systems::AssertionError; use crate::systems::AssertionError;
@@ -43,10 +44,10 @@ define_fn! {
/// Convert a literal to a string using Rust's conversions for floats, chars and /// Convert a literal to a string using Rust's conversions for floats, chars and
/// uints respectively /// uints respectively
ToString = |x| with_lit(x, |l| Ok(match l { ToString = |x| with_lit(x, |l| Ok(match l {
Literal::Uint(i) => i.to_string(), Literal::Uint(i) => Literal::Str(i.to_string().into()),
Literal::Num(n) => n.to_string(), Literal::Num(n) => Literal::Str(n.to_string().into()),
Literal::Str(s) => s.clone(), s@Literal::Str(_) => s.clone(),
})).map(|s| Literal::Str(s).into()) })).map(Clause::from)
} }
pub fn conv(i: &Interner) -> ConstTree { pub fn conv(i: &Interner) -> ConstTree {

View File

@@ -1,4 +1,5 @@
use std::fmt::Display; use std::fmt::Display;
use std::rc::Rc;
use crate::foreign::ExternError; use crate::foreign::ExternError;
use crate::systems::cast_exprinst::with_str; use crate::systems::cast_exprinst::with_str;
@@ -6,7 +7,7 @@ use crate::{define_fn, ConstTree, Interner};
/// An unrecoverable error in Orchid land. Because Orchid is lazy, this only /// An unrecoverable error in Orchid land. Because Orchid is lazy, this only
/// invalidates expressions that reference the one that generated it. /// invalidates expressions that reference the one that generated it.
pub struct OrchidPanic(String); pub struct OrchidPanic(Rc<String>);
impl Display for OrchidPanic { impl Display for OrchidPanic {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@@ -18,7 +19,10 @@ impl ExternError for OrchidPanic {}
define_fn! { define_fn! {
/// Takes a message, returns an [ExternError] unconditionally. /// Takes a message, returns an [ExternError] unconditionally.
Panic = |x| with_str(x, |s| Err(OrchidPanic(s.clone()).into_extern())) Panic = |x| with_str(x, |s| {
let msg = Rc::new(s.get_string());
Err(OrchidPanic(msg).into_extern())
})
} }
pub fn panic(i: &Interner) -> ConstTree { pub fn panic(i: &Interner) -> ConstTree {

View File

@@ -1,6 +1,7 @@
use unicode_segmentation::UnicodeSegmentation; use unicode_segmentation::UnicodeSegmentation;
use crate::interner::Interner; use crate::interner::Interner;
use crate::representations::OrcString;
use crate::systems::cast_exprinst::with_str; use crate::systems::cast_exprinst::with_str;
use crate::systems::codegen::{orchid_opt, tuple}; use crate::systems::codegen::{orchid_opt, tuple};
use crate::systems::RuntimeError; use crate::systems::RuntimeError;
@@ -9,15 +10,16 @@ use crate::{define_fn, ConstTree, Literal};
define_fn! {expr=x in define_fn! {expr=x in
/// Append a string to another /// Append a string to another
pub Concatenate { a: String, b: String } pub Concatenate { a: OrcString, b: OrcString }
=> Ok(Literal::Str(a.to_owned() + b).into()) => Ok(Literal::Str((a.get_string() + b.as_str()).into()).into())
} }
define_fn! {expr=x in define_fn! {expr=x in
pub Slice { s: String, i: u64, len: u64 } => { pub Slice { s: OrcString, i: u64, len: u64 } => {
let graphs = s.graphemes(true); let graphs = s.as_str().graphemes(true);
if *i == 0 { if *i == 0 {
Ok(Literal::Str(graphs.take(*len as usize).collect()).into()) let orc_str = graphs.take(*len as usize).collect::<String>().into();
Ok(Literal::Str(orc_str).into())
} else { } else {
let mut prefix = graphs.skip(*i as usize - 1); let mut prefix = graphs.skip(*i as usize - 1);
if prefix.next().is_none() { if prefix.next().is_none() {
@@ -27,10 +29,9 @@ define_fn! {expr=x in
) )
} else { } else {
let mut count = 0; let mut count = 0;
let ret = prefix let ret = (prefix.take(*len as usize))
.take(*len as usize)
.map(|x| { count+=1; x }) .map(|x| { count+=1; x })
.collect(); .collect::<String>().into();
if count == *len { if count == *len {
Ok(Literal::Str(ret).into()) Ok(Literal::Str(ret).into())
} else { } else {
@@ -45,15 +46,16 @@ define_fn! {expr=x in
} }
define_fn! {expr=x in define_fn! {expr=x in
pub Find { haystack: String, needle: String } => { pub Find { haystack: OrcString, needle: OrcString } => {
let found = iter_find(haystack.graphemes(true), needle.graphemes(true)); let haystack_graphs = haystack.as_str().graphemes(true);
let found = iter_find(haystack_graphs, needle.as_str().graphemes(true));
Ok(orchid_opt(found.map(|x| Literal::Uint(x as u64).into()))) Ok(orchid_opt(found.map(|x| Literal::Uint(x as u64).into())))
} }
} }
define_fn! {expr=x in define_fn! {expr=x in
pub Split { s: String, i: u64 } => { pub Split { s: OrcString, i: u64 } => {
let mut graphs = s.graphemes(true); let mut graphs = s.as_str().graphemes(true);
let a = graphs.by_ref().take(*i as usize).collect::<String>(); let a = graphs.by_ref().take(*i as usize).collect::<String>();
let b = graphs.collect::<String>(); let b = graphs.collect::<String>();
Ok(tuple(vec![a.into(), b.into()])) Ok(tuple(vec![a.into(), b.into()]))