use std::rc::Rc; use futures::FutureExt; use futures::lock::Mutex; use orchid_base::clone; use orchid_base::error::{OrcErrv, OrcRes, mk_errv}; use orchid_base::interner::Tok; use orchid_base::location::SrcRange; use orchid_base::name::Sym; use orchid_base::parse::{name_char, name_start, op_char, unrep_space}; use orchid_base::tokens::PARENS; use orchid_base::tree::recur; use crate::api; use crate::ctx::Ctx; use crate::expr::{Expr, ExprParseCtx}; use crate::expr_store::ExprStore; use crate::parsed::{ParsTok, ParsTokTree, tt_to_api}; use crate::system::System; pub struct LexCtx<'a> { pub systems: &'a [System], pub source: &'a Tok, pub path: &'a Sym, pub tail: &'a str, pub sub_trees: &'a mut Vec, pub ctx: &'a Ctx, } impl<'a> LexCtx<'a> { #[must_use] pub fn push<'b>(&'b mut self, pos: u32) -> LexCtx<'b> where 'a: 'b { LexCtx { source: self.source, path: self.path, tail: &self.source[pos as usize..], systems: self.systems, sub_trees: &mut *self.sub_trees, ctx: self.ctx, } } #[must_use] pub fn get_pos(&self) -> u32 { self.end_pos() - self.tail.len() as u32 } #[must_use] pub fn end_pos(&self) -> u32 { self.source.len() as u32 } pub fn set_pos(&mut self, pos: u32) { self.tail = &self.source[pos as usize..] } pub fn push_pos(&mut self, delta: u32) { self.set_pos(self.get_pos() + delta) } pub fn set_tail(&mut self, tail: &'a str) { self.tail = tail } #[must_use] pub fn strip_prefix(&mut self, tgt: &str) -> bool { if let Some(src) = self.tail.strip_prefix(tgt) { self.tail = src; return true; } false } #[must_use] pub async fn ser_subtree(&mut self, subtree: ParsTokTree, exprs: ExprStore) -> api::TokenTree { tt_to_api(&mut { exprs }, subtree).await } #[must_use] pub async fn des_subtree(&mut self, tree: &api::TokenTree, exprs: ExprStore) -> ParsTokTree { ParsTokTree::from_api( tree, &mut { exprs }, &mut ExprParseCtx { ctx: self.ctx, exprs: &self.ctx.common_exprs }, self.path, &self.ctx.i, ) .await } #[must_use] pub fn strip_char(&mut self, tgt: char) -> bool { if let Some(src) = self.tail.strip_prefix(tgt) { self.tail = src; return true; } false } pub fn trim(&mut self, filter: impl Fn(char) -> bool) { self.tail = self.tail.trim_start_matches(filter); } pub fn trim_ws(&mut self) { self.trim(|c| c.is_whitespace() && !"\r\n".contains(c)) } #[must_use] pub fn get_start_matches(&mut self, filter: impl Fn(char) -> bool) -> &'a str { let rest = self.tail.trim_start_matches(filter); let matches = &self.tail[..self.tail.len() - rest.len()]; self.tail = rest; matches } } pub async fn lex_once(ctx: &mut LexCtx<'_>) -> OrcRes { let start = ctx.get_pos(); assert!( !ctx.tail.is_empty() && !ctx.tail.starts_with(unrep_space), "Lexing empty string or whitespace to token!\n\ Invocations of lex_tok should check for empty string" ); let tok = if ctx.strip_prefix("\r\n") || ctx.strip_prefix("\r") || ctx.strip_prefix("\n") { ParsTok::BR } else if let Some(tail) = (ctx.tail.starts_with(name_start).then_some(ctx.tail)) .and_then(|t| t.trim_start_matches(name_char).strip_prefix("::")) { let name = &ctx.tail[..ctx.tail.len() - tail.len() - "::".len()]; ctx.set_tail(tail); let body = lex_once(ctx).boxed_local().await?; ParsTok::NS(ctx.ctx.i.i(name).await, Box::new(body)) } else if ctx.strip_prefix("--[") { let Some((cmt, tail)) = ctx.tail.split_once("]--") else { return Err(mk_errv( ctx.ctx.i.i("Unterminated block comment").await, "This block comment has no ending ]--", [SrcRange::new(start..start + 3, ctx.path)], )); }; ctx.set_tail(tail); ParsTok::Comment(Rc::new(cmt.to_string())) } else if let Some(tail) = ctx.tail.strip_prefix("--").filter(|t| !t.starts_with(op_char)) { let end = tail.find(['\n', '\r']).map_or(tail.len(), |n| n - 1); ctx.push_pos(end as u32); ParsTok::Comment(Rc::new(tail[2..end].to_string())) } else if let Some(tail) = ctx.tail.strip_prefix('\\').filter(|t| t.starts_with(name_start)) { // fanciness like \$placeh in templates is resolved in the macro engine. ctx.set_tail(tail); let arg = lex_once(ctx).boxed_local().await?; ctx.trim_ws(); ParsTok::LambdaHead(Box::new(arg)) } else if let Some((lp, rp, paren)) = PARENS.iter().find(|(lp, ..)| ctx.strip_char(*lp)) { let mut body = Vec::new(); ctx.trim_ws(); while !ctx.strip_char(*rp) { if ctx.tail.is_empty() { return Err(mk_errv( ctx.ctx.i.i("unclosed paren").await, format!("this {lp} has no matching {rp}"), [SrcRange::new(start..start + 1, ctx.path)], )); } body.push(lex_once(ctx).boxed_local().await?); ctx.trim_ws(); } ParsTok::S(*paren, body) } else { for sys in ctx.systems { let mut errors = Vec::new(); if ctx.tail.starts_with(|c| sys.can_lex(c)) { let (source, pos, path) = (ctx.source.clone(), ctx.get_pos(), ctx.path.clone()); let ctx_lck = &Mutex::new(&mut *ctx); let errors_lck = &Mutex::new(&mut errors); let temp_store = sys.ext().exprs().derive(true); let temp_store_cb = temp_store.clone(); let lx = sys .lex(source, path, pos, |pos| { clone!(temp_store_cb); async move { let mut ctx_g = ctx_lck.lock().await; match lex_once(&mut ctx_g.push(pos)).boxed_local().await { Ok(t) => Some(api::SubLexed { pos: t.sr.end(), tree: ctx_g.ser_subtree(t, temp_store_cb.clone()).await, }), Err(e) => { errors_lck.lock().await.push(e); None }, } } }) .await; match lx { Err(e) => return Err( errors.into_iter().fold(OrcErrv::from_api(&e, &ctx.ctx.i).await, |a, b| a + b), ), Ok(Some(lexed)) => { ctx.set_pos(lexed.pos); let lexed_tree = ctx.des_subtree(&lexed.expr, temp_store).await; let stable_tree = recur(lexed_tree, &|tt, r| { if let ParsTok::NewExpr(expr) = tt.tok { return ParsTok::Handle(expr).at(tt.sr); } r(tt) }); return Ok(stable_tree); }, Ok(None) => match errors.into_iter().reduce(|a, b| a + b) { Some(errors) => return Err(errors), None => continue, }, } } } if ctx.tail.starts_with(name_start) { ParsTok::Name(ctx.ctx.i.i(ctx.get_start_matches(name_char)).await) } else if ctx.tail.starts_with(op_char) { ParsTok::Name(ctx.ctx.i.i(ctx.get_start_matches(op_char)).await) } else { return Err(mk_errv( ctx.ctx.i.i("Unrecognized character").await, "The following syntax is meaningless.", [SrcRange::new(start..start + 1, ctx.path)], )); } }; Ok(ParsTokTree { tok, sr: SrcRange::new(start..ctx.get_pos(), ctx.path) }) } pub async fn lex( text: Tok, path: Sym, systems: &[System], ctx: &Ctx, ) -> OrcRes> { let mut sub_trees = Vec::new(); let mut ctx = LexCtx { source: &text, sub_trees: &mut sub_trees, tail: &text[..], systems, path: &path, ctx }; let mut tokv = Vec::new(); ctx.trim(unrep_space); while !ctx.tail.is_empty() { tokv.push(lex_once(&mut ctx).await?); ctx.trim(unrep_space); } Ok(tokv) }