Files
orchid/orchid-extension/src/lexer.rs
2026-03-27 23:50:58 +01:00

139 lines
5.1 KiB
Rust

use std::fmt;
use std::fmt::Debug;
use std::future::Future;
use std::ops::RangeInclusive;
use futures::FutureExt;
use futures::future::LocalBoxFuture;
use orchid_base::{IStr, OrcErrv, OrcRes, Pos, SrcRange, Sym, is, mk_errv};
use crate::tree::GenTokTree;
use crate::{BorrowedExprStore, PTokTree, api, request};
pub(crate) async fn ekey_cascade() -> IStr { is("An error cascading from a recursive call").await }
pub(crate) async fn ekey_not_applicable() -> IStr {
is("Pseudo-error to communicate that the current branch in a dispatch doesn't apply").await
}
const MSG_INTERNAL_ERROR: &str = "This error is a sentinel for the extension library.\
it should not be emitted by the extension.";
pub(crate) async fn err_cascade() -> OrcErrv {
mk_errv(ekey_cascade().await, MSG_INTERNAL_ERROR, [Pos::None])
}
/// Return this error if your lexer can determine that it is not applicable to
/// this piece of syntax. This error will not be raised if another lexer
/// matches, or if the piece of matched syntax is found to be valid until
/// runtime
pub async fn err_not_applicable() -> OrcErrv {
mk_errv(ekey_not_applicable().await, MSG_INTERNAL_ERROR, [Pos::None])
}
/// Object passed to lexers for recursion and position-related convenience
/// methods
pub struct LexContext<'a> {
pub(crate) exprs: &'a BorrowedExprStore,
pub text: &'a IStr,
pub id: api::ParsId,
pub pos: u32,
pub(crate) src: Sym,
}
impl<'a> LexContext<'a> {
pub(crate) fn new(
exprs: &'a BorrowedExprStore,
text: &'a IStr,
id: api::ParsId,
pos: u32,
src: Sym,
) -> Self {
Self { exprs, id, pos, src, text }
}
/// The logical path of the source file, also the path of the file's root
/// module
pub fn src(&self) -> &Sym { &self.src }
/// Lex an interpolated expression of some kind
///
/// This function returns [PTokTree] because it can never return
/// [orchid_base::Token::NewExpr]. You can use
/// [crate::parser::p_tree2gen] to convert this to [crate::tree::GenTokTree]
/// for embedding in the return value.
pub async fn recurse(&self, tail: &'a str) -> OrcRes<(&'a str, PTokTree)> {
let start = self.pos(tail);
let Some(lx) = request(api::SubLex { pos: start, id: self.id }).await else {
return Err(err_cascade().await);
};
let tree = PTokTree::from_api(lx.tree, &mut { self.exprs }, &mut (), &self.src).await;
Ok((&self.text[lx.pos as usize..], tree))
}
/// Find the index of a cursor given the remaining, not-yet-consumed text
pub fn pos(&self, tail: &'a str) -> u32 { (self.text.len() - tail.len()) as u32 }
/// Convenience method to find the source position of a token given the text
/// it was found in and the text after it was parsed.
pub fn pos_tt(&self, tail_with: &'a str, tail_without: &'a str) -> SrcRange {
SrcRange::new(self.pos(tail_with)..self.pos(tail_without), &self.src)
}
/// Convenience method to find the source position of a token given its length
/// and the remaining text afterwards. The length can be any number type but
/// must convert to a u32 without errors
pub fn pos_lt(&self, len: impl TryInto<u32, Error: fmt::Debug>, tail: &'a str) -> SrcRange {
SrcRange::new(self.pos(tail) - len.try_into().unwrap()..self.pos(tail), &self.src)
}
}
/// One or more tokens returned by the parser. In practice, [GenTokTree],
/// `Vec<GenTokTree>`, or `[GenTokTree; usize]`
pub trait LexedData {
fn into_vec(self) -> Vec<GenTokTree>;
}
impl LexedData for GenTokTree {
fn into_vec(self) -> Vec<GenTokTree> { vec![self] }
}
impl LexedData for Vec<GenTokTree> {
fn into_vec(self) -> Vec<GenTokTree> { self }
}
impl<const N: usize> LexedData for [GenTokTree; N] {
fn into_vec(self) -> Vec<GenTokTree> { self.to_vec() }
}
/// A lexer plugin to extend the syntax of Orchid
pub trait Lexer: Debug + Send + Sync + Sized + Default + 'static {
/// As an optimization, your lexer will only receive snippets that start with
/// a character included in one of these ranges. If you have a multi-character
/// discriminator, include all possible starting chars in this and return
/// [err_not_applicable] if the entire discriminator was not found
const CHAR_FILTER: &'static [RangeInclusive<char>];
/// Attempt to lex some custom syntax from the start of the tail string.
/// Return the remaining text and the lexed tokens.
fn lex<'a>(
tail: &'a str,
lctx: &'a LexContext<'a>,
) -> impl Future<Output = OrcRes<(&'a str, impl LexedData)>>;
}
/// Type-erased [Lexer]
pub trait DynLexer: Debug + Send + Sync + 'static {
/// Type-erased [Lexer::CHAR_FILTER]
fn char_filter(&self) -> &'static [RangeInclusive<char>];
/// Type-erased [Lexer::lex]
fn lex<'a>(
&self,
tail: &'a str,
ctx: &'a LexContext<'a>,
) -> LocalBoxFuture<'a, OrcRes<(&'a str, Vec<GenTokTree>)>>;
}
impl<T: Lexer> DynLexer for T {
fn char_filter(&self) -> &'static [RangeInclusive<char>] { T::CHAR_FILTER }
fn lex<'a>(
&self,
tail: &'a str,
ctx: &'a LexContext<'a>,
) -> LocalBoxFuture<'a, OrcRes<(&'a str, Vec<GenTokTree>)>> {
async { T::lex(tail, ctx).await.map(|(s, d)| (s, d.into_vec())) }.boxed_local()
}
}
/// Type-erased instance of a lexer that is returned by
/// [crate::System::lexers]
pub type LexerObj = &'static dyn DynLexer;