use std::fmt; use std::fmt::Debug; use std::future::Future; use std::ops::RangeInclusive; use futures::FutureExt; use futures::future::LocalBoxFuture; use orchid_base::{IStr, OrcErrv, OrcRes, Pos, SrcRange, Sym, is, mk_errv}; use crate::tree::GenTokTree; use crate::{BorrowedExprStore, PTokTree, api, request}; pub(crate) async fn ekey_cascade() -> IStr { is("An error cascading from a recursive call").await } pub(crate) async fn ekey_not_applicable() -> IStr { is("Pseudo-error to communicate that the current branch in a dispatch doesn't apply").await } const MSG_INTERNAL_ERROR: &str = "This error is a sentinel for the extension library.\ it should not be emitted by the extension."; pub(crate) async fn err_cascade() -> OrcErrv { mk_errv(ekey_cascade().await, MSG_INTERNAL_ERROR, [Pos::None]) } /// Return this error if your lexer can determine that it is not applicable to /// this piece of syntax. This error will not be raised if another lexer /// matches, or if the piece of matched syntax is found to be valid until /// runtime pub async fn err_not_applicable() -> OrcErrv { mk_errv(ekey_not_applicable().await, MSG_INTERNAL_ERROR, [Pos::None]) } /// Object passed to lexers for recursion and position-related convenience /// methods pub struct LexContext<'a> { pub(crate) exprs: &'a BorrowedExprStore, pub text: &'a IStr, pub id: api::ParsId, pub pos: u32, pub(crate) src: Sym, } impl<'a> LexContext<'a> { pub(crate) fn new( exprs: &'a BorrowedExprStore, text: &'a IStr, id: api::ParsId, pos: u32, src: Sym, ) -> Self { Self { exprs, id, pos, src, text } } /// The logical path of the source file, also the path of the file's root /// module pub fn src(&self) -> &Sym { &self.src } /// Lex an interpolated expression of some kind /// /// This function returns [PTokTree] because it can never return /// [orchid_base::Token::NewExpr]. You can use /// [crate::parser::p_tree2gen] to convert this to [crate::tree::GenTokTree] /// for embedding in the return value. pub async fn recurse(&self, tail: &'a str) -> OrcRes<(&'a str, PTokTree)> { let start = self.pos(tail); let Some(lx) = request(api::SubLex { pos: start, id: self.id }).await else { return Err(err_cascade().await); }; let tree = PTokTree::from_api(lx.tree, &mut { self.exprs }, &mut (), &self.src).await; Ok((&self.text[lx.pos as usize..], tree)) } /// Find the index of a cursor given the remaining, not-yet-consumed text pub fn pos(&self, tail: &'a str) -> u32 { (self.text.len() - tail.len()) as u32 } /// Convenience method to find the source position of a token given the text /// it was found in and the text after it was parsed. pub fn pos_tt(&self, tail_with: &'a str, tail_without: &'a str) -> SrcRange { SrcRange::new(self.pos(tail_with)..self.pos(tail_without), &self.src) } /// Convenience method to find the source position of a token given its length /// and the remaining text afterwards. The length can be any number type but /// must convert to a u32 without errors pub fn pos_lt(&self, len: impl TryInto, tail: &'a str) -> SrcRange { SrcRange::new(self.pos(tail) - len.try_into().unwrap()..self.pos(tail), &self.src) } } /// One or more tokens returned by the parser. In practice, [GenTokTree], /// `Vec`, or `[GenTokTree; usize]` pub trait LexedData { fn into_vec(self) -> Vec; } impl LexedData for GenTokTree { fn into_vec(self) -> Vec { vec![self] } } impl LexedData for Vec { fn into_vec(self) -> Vec { self } } impl LexedData for [GenTokTree; N] { fn into_vec(self) -> Vec { self.to_vec() } } /// A lexer plugin to extend the syntax of Orchid pub trait Lexer: Debug + Send + Sync + Sized + Default + 'static { /// As an optimization, your lexer will only receive snippets that start with /// a character included in one of these ranges. If you have a multi-character /// discriminator, include all possible starting chars in this and return /// [err_not_applicable] if the entire discriminator was not found const CHAR_FILTER: &'static [RangeInclusive]; /// Attempt to lex some custom syntax from the start of the tail string. /// Return the remaining text and the lexed tokens. fn lex<'a>( tail: &'a str, lctx: &'a LexContext<'a>, ) -> impl Future>; } /// Type-erased [Lexer] pub trait DynLexer: Debug + Send + Sync + 'static { /// Type-erased [Lexer::CHAR_FILTER] fn char_filter(&self) -> &'static [RangeInclusive]; /// Type-erased [Lexer::lex] fn lex<'a>( &self, tail: &'a str, ctx: &'a LexContext<'a>, ) -> LocalBoxFuture<'a, OrcRes<(&'a str, Vec)>>; } impl DynLexer for T { fn char_filter(&self) -> &'static [RangeInclusive] { T::CHAR_FILTER } fn lex<'a>( &self, tail: &'a str, ctx: &'a LexContext<'a>, ) -> LocalBoxFuture<'a, OrcRes<(&'a str, Vec)>> { async { T::lex(tail, ctx).await.map(|(s, d)| (s, d.into_vec())) }.boxed_local() } } /// Type-erased instance of a lexer that is returned by /// [crate::System::lexers] pub type LexerObj = &'static dyn DynLexer;