forked from Orchid/orchid
139 lines
5.1 KiB
Rust
139 lines
5.1 KiB
Rust
use std::fmt;
|
|
use std::fmt::Debug;
|
|
use std::future::Future;
|
|
use std::ops::RangeInclusive;
|
|
|
|
use futures::FutureExt;
|
|
use futures::future::LocalBoxFuture;
|
|
use orchid_base::{IStr, OrcErrv, OrcRes, Pos, SrcRange, Sym, is, mk_errv};
|
|
|
|
use crate::tree::GenTokTree;
|
|
use crate::{BorrowedExprStore, PTokTree, api, request};
|
|
|
|
pub(crate) async fn ekey_cascade() -> IStr { is("An error cascading from a recursive call").await }
|
|
pub(crate) async fn ekey_not_applicable() -> IStr {
|
|
is("Pseudo-error to communicate that the current branch in a dispatch doesn't apply").await
|
|
}
|
|
const MSG_INTERNAL_ERROR: &str = "This error is a sentinel for the extension library.\
|
|
it should not be emitted by the extension.";
|
|
|
|
pub(crate) async fn err_cascade() -> OrcErrv {
|
|
mk_errv(ekey_cascade().await, MSG_INTERNAL_ERROR, [Pos::None])
|
|
}
|
|
|
|
/// Return this error if your lexer can determine that it is not applicable to
|
|
/// this piece of syntax. This error will not be raised if another lexer
|
|
/// matches, or if the piece of matched syntax is found to be valid until
|
|
/// runtime
|
|
pub async fn err_not_applicable() -> OrcErrv {
|
|
mk_errv(ekey_not_applicable().await, MSG_INTERNAL_ERROR, [Pos::None])
|
|
}
|
|
|
|
/// Object passed to lexers for recursion and position-related convenience
|
|
/// methods
|
|
pub struct LexContext<'a> {
|
|
pub(crate) exprs: &'a BorrowedExprStore,
|
|
pub text: &'a IStr,
|
|
pub id: api::ParsId,
|
|
pub pos: u32,
|
|
pub(crate) src: Sym,
|
|
}
|
|
impl<'a> LexContext<'a> {
|
|
pub(crate) fn new(
|
|
exprs: &'a BorrowedExprStore,
|
|
text: &'a IStr,
|
|
id: api::ParsId,
|
|
pos: u32,
|
|
src: Sym,
|
|
) -> Self {
|
|
Self { exprs, id, pos, src, text }
|
|
}
|
|
/// The logical path of the source file, also the path of the file's root
|
|
/// module
|
|
pub fn src(&self) -> &Sym { &self.src }
|
|
/// Lex an interpolated expression of some kind
|
|
///
|
|
/// This function returns [PTokTree] because it can never return
|
|
/// [orchid_base::Token::NewExpr]. You can use
|
|
/// [crate::parser::p_tree2gen] to convert this to [crate::tree::GenTokTree]
|
|
/// for embedding in the return value.
|
|
pub async fn recurse(&self, tail: &'a str) -> OrcRes<(&'a str, PTokTree)> {
|
|
let start = self.pos(tail);
|
|
let Some(lx) = request(api::SubLex { pos: start, id: self.id }).await else {
|
|
return Err(err_cascade().await);
|
|
};
|
|
let tree = PTokTree::from_api(lx.tree, &mut { self.exprs }, &mut (), &self.src).await;
|
|
Ok((&self.text[lx.pos as usize..], tree))
|
|
}
|
|
/// Find the index of a cursor given the remaining, not-yet-consumed text
|
|
pub fn pos(&self, tail: &'a str) -> u32 { (self.text.len() - tail.len()) as u32 }
|
|
/// Convenience method to find the source position of a token given the text
|
|
/// it was found in and the text after it was parsed.
|
|
pub fn pos_tt(&self, tail_with: &'a str, tail_without: &'a str) -> SrcRange {
|
|
SrcRange::new(self.pos(tail_with)..self.pos(tail_without), &self.src)
|
|
}
|
|
/// Convenience method to find the source position of a token given its length
|
|
/// and the remaining text afterwards. The length can be any number type but
|
|
/// must convert to a u32 without errors
|
|
pub fn pos_lt(&self, len: impl TryInto<u32, Error: fmt::Debug>, tail: &'a str) -> SrcRange {
|
|
SrcRange::new(self.pos(tail) - len.try_into().unwrap()..self.pos(tail), &self.src)
|
|
}
|
|
}
|
|
|
|
/// One or more tokens returned by the parser. In practice, [GenTokTree],
|
|
/// `Vec<GenTokTree>`, or `[GenTokTree; usize]`
|
|
pub trait LexedData {
|
|
fn into_vec(self) -> Vec<GenTokTree>;
|
|
}
|
|
impl LexedData for GenTokTree {
|
|
fn into_vec(self) -> Vec<GenTokTree> { vec![self] }
|
|
}
|
|
impl LexedData for Vec<GenTokTree> {
|
|
fn into_vec(self) -> Vec<GenTokTree> { self }
|
|
}
|
|
impl<const N: usize> LexedData for [GenTokTree; N] {
|
|
fn into_vec(self) -> Vec<GenTokTree> { self.to_vec() }
|
|
}
|
|
|
|
/// A lexer plugin to extend the syntax of Orchid
|
|
pub trait Lexer: Debug + Send + Sync + Sized + Default + 'static {
|
|
/// As an optimization, your lexer will only receive snippets that start with
|
|
/// a character included in one of these ranges. If you have a multi-character
|
|
/// discriminator, include all possible starting chars in this and return
|
|
/// [err_not_applicable] if the entire discriminator was not found
|
|
const CHAR_FILTER: &'static [RangeInclusive<char>];
|
|
/// Attempt to lex some custom syntax from the start of the tail string.
|
|
/// Return the remaining text and the lexed tokens.
|
|
fn lex<'a>(
|
|
tail: &'a str,
|
|
lctx: &'a LexContext<'a>,
|
|
) -> impl Future<Output = OrcRes<(&'a str, impl LexedData)>>;
|
|
}
|
|
|
|
/// Type-erased [Lexer]
|
|
pub trait DynLexer: Debug + Send + Sync + 'static {
|
|
/// Type-erased [Lexer::CHAR_FILTER]
|
|
fn char_filter(&self) -> &'static [RangeInclusive<char>];
|
|
/// Type-erased [Lexer::lex]
|
|
fn lex<'a>(
|
|
&self,
|
|
tail: &'a str,
|
|
ctx: &'a LexContext<'a>,
|
|
) -> LocalBoxFuture<'a, OrcRes<(&'a str, Vec<GenTokTree>)>>;
|
|
}
|
|
|
|
impl<T: Lexer> DynLexer for T {
|
|
fn char_filter(&self) -> &'static [RangeInclusive<char>] { T::CHAR_FILTER }
|
|
fn lex<'a>(
|
|
&self,
|
|
tail: &'a str,
|
|
ctx: &'a LexContext<'a>,
|
|
) -> LocalBoxFuture<'a, OrcRes<(&'a str, Vec<GenTokTree>)>> {
|
|
async { T::lex(tail, ctx).await.map(|(s, d)| (s, d.into_vec())) }.boxed_local()
|
|
}
|
|
}
|
|
|
|
/// Type-erased instance of a lexer that is returned by
|
|
/// [crate::System::lexers]
|
|
pub type LexerObj = &'static dyn DynLexer;
|