use std::collections::VecDeque; use std::ops::Range; use futures::FutureExt; use futures::lock::Mutex; use orchid_base::{ IStr, OrcErrv, OrcRes, PARENS, SrcRange, Sym, clone, is, mk_errv, name_char, name_start, op_char, report, unrep_space, }; use crate::api; use crate::ctx::Ctx; use crate::expr::{Expr, ExprFromApiCtx}; use crate::expr_store::ExprStore; use crate::parsed::{ParsTok, ParsTokTree, tt_to_api}; use crate::system::System; pub struct LexCtx<'a> { pub systems: &'a [System], pub source: &'a IStr, pub path: &'a Sym, pub tail: &'a str, pub sub_trees: &'a mut Vec, pub ctx: &'a Ctx, pub produced: &'a mut VecDeque, } impl<'a> LexCtx<'a> { #[must_use] pub fn sub<'b>(&'b mut self, pos: u32, produced: &'b mut VecDeque) -> LexCtx<'b> where 'a: 'b { LexCtx { source: self.source, path: self.path, tail: &self.source[pos as usize..], systems: self.systems, sub_trees: &mut *self.sub_trees, ctx: self.ctx, produced, } } #[must_use] pub fn get_pos(&self) -> u32 { self.end_pos() - self.tail.len() as u32 } #[must_use] pub fn end_pos(&self) -> u32 { self.source.len() as u32 } pub fn set_pos(&mut self, pos: u32) { self.tail = &self.source[pos as usize..] } pub fn push_pos(&mut self, delta: u32) { self.set_pos(self.get_pos() + delta) } pub fn set_tail(&mut self, tail: &'a str) { self.tail = tail } pub fn pos_from(&self, tail: &'a str) -> u32 { (self.source.len() - tail.len()) as u32 } #[must_use] pub fn strip_prefix(&mut self, tgt: &str) -> bool { if let Some(src) = self.tail.strip_prefix(tgt) { self.tail = src; return true; } false } #[must_use] pub async fn ser_subtree(&mut self, subtree: ParsTokTree, exprs: ExprStore) -> api::TokenTree { tt_to_api(&mut { exprs }, subtree).await } #[must_use] pub async fn des_subtree( &mut self, tree: api::TokenTree, sys: api::SysId, exprs: ExprStore, ) -> ParsTokTree { let mut cx = ExprFromApiCtx { ctx: self.ctx.clone(), sys }; ParsTokTree::from_api(tree, &mut { exprs }, &mut cx, self.path).await } #[must_use] pub fn strip_char(&mut self, tgt: char) -> bool { if let Some(src) = self.tail.strip_prefix(tgt) { self.tail = src; return true; } false } pub fn trim(&mut self, filter: impl Fn(char) -> bool) { self.tail = self.tail.trim_start_matches(filter); } pub fn trim_ws(&mut self) { self.trim(|c| c.is_whitespace() && !"\r\n".contains(c)) } #[must_use] pub fn get_start_matches(&mut self, filter: impl Fn(char) -> bool) -> &'a str { let rest = self.tail.trim_start_matches(filter); let matches = &self.tail[..self.tail.len() - rest.len()]; self.tail = rest; matches } pub fn pop_char(&mut self) -> Option { let mut chars = self.tail.chars(); let ret = chars.next()?; self.tail = chars.as_str(); Some(ret) } pub fn sr_to(&self, start: u32) -> SrcRange { self.sr(start..self.get_pos()) } pub fn sr(&self, range: Range) -> SrcRange { SrcRange::new(range, self.path) } } pub async fn lex_once(ctx: &mut LexCtx<'_>) -> OrcRes { ctx.trim(unrep_space); if ctx.tail.is_empty() { return Ok(false); } let start = ctx.get_pos(); let tok = if ctx.strip_prefix("\r\n") || ctx.strip_prefix("\r") || ctx.strip_prefix("\n") { ParsTok::BR } else if let Some(tail) = (ctx.tail.starts_with(name_start).then_some(ctx.tail)) .and_then(|t| t.trim_start_matches(name_char).strip_prefix("::")) { let name = &ctx.tail[..ctx.tail.len() - tail.len() - "::".len()]; let mut produced = VecDeque::new(); let mut sub_cx = ctx.sub(ctx.pos_from(tail), &mut produced); if !lex_once(&mut sub_cx).boxed_local().await? { return Err(mk_errv( is("Unexpected end of source text").await, ":: cannot be the last token", [SrcRange::new(start..ctx.get_pos(), ctx.path)], )); } let pos = sub_cx.get_pos(); ctx.set_pos(pos); let body = produced.pop_front().expect("lex_once returned true"); ctx.produced.extend(produced.into_iter()); ParsTok::NS(is(name).await, Box::new(body)) } else if ctx.strip_prefix("--[") { let Some((cmt, tail)) = ctx.tail.split_once("]--") else { return Err(mk_errv( is("Unterminated block comment").await, "This block comment has no ending ]--", [SrcRange::new(start..start + 3, ctx.path)], )); }; ctx.set_tail(tail); ParsTok::Comment(is(cmt).await) } else if let Some(tail) = ctx.tail.strip_prefix("--").filter(|t| !t.starts_with(op_char)) { let end = tail.find(['\n', '\r']).map_or(tail.len(), |n| n - 1); ctx.push_pos(end as u32); ParsTok::Comment(is(&tail[2..end]).await) } else if let Some(tail) = ctx.tail.strip_prefix('\\').filter(|t| t.starts_with(name_start)) { // fanciness like \$placeh in templates is resolved in the macro engine. let start = ctx.get_pos(); let mut produced = VecDeque::new(); let mut sub_cx = ctx.sub(ctx.pos_from(tail), &mut produced); if !lex_once(&mut sub_cx).boxed_local().await? { return Err(mk_errv( is("Unexpected end of file").await, "Expected a lambda argument and body", [SrcRange::new(start..ctx.get_pos(), ctx.path)], )); } let pos = sub_cx.get_pos(); ctx.set_pos(pos); let arg = produced.pop_front().expect("lex_once returned true"); ctx.produced.extend(produced); ctx.trim_ws(); ParsTok::LambdaHead(Box::new(arg)) } else if let Some((lp, rp, paren)) = PARENS.iter().find(|(lp, ..)| ctx.strip_char(*lp)) { let mut body = VecDeque::new(); ctx.trim_ws(); while !ctx.strip_char(*rp) { let mut sub_cx = ctx.sub(ctx.get_pos(), &mut body); if !lex_once(&mut sub_cx).boxed_local().await? { return Err(mk_errv( is("unclosed paren").await, format!("this {lp} has no matching {rp}"), [SrcRange::new(start..start + 1, ctx.path)], )); } let pos = sub_cx.get_pos(); ctx.set_pos(pos); ctx.trim_ws(); } ParsTok::S(*paren, body.into_iter().collect()) } else if let Some(res) = sys_lex(ctx).await { let token = res?; ctx.produced.extend(token); return Ok(true); } else if ctx.tail.starts_with(name_start) { ParsTok::Name(is(ctx.get_start_matches(name_char)).await) } else if ctx.tail.starts_with(op_char) { let whole_tail = ctx.tail; ctx.pop_char().expect("The above check would have failed"); let mut tail_after_op = ctx.tail; let mut lookahead = Vec::new(); while !ctx.tail.is_empty() && ctx.tail.starts_with(op_char) { match sys_lex(ctx).await { None => { ctx.pop_char(); tail_after_op = ctx.tail; }, Some(sys_res) => { match sys_res { Err(e) => report(e), Ok(tokv) => lookahead = tokv, } break; }, } } let op_str = &whole_tail[0..whole_tail.len() - tail_after_op.len()]; ctx.produced.push_back(ParsTok::Name(is(op_str).await).at(ctx.sr_to(start))); ctx.produced.extend(lookahead); return Ok(true); } else { return Err(mk_errv( is("Unrecognized character").await, "The following syntax is meaningless.", [SrcRange::new(start..start + 1, ctx.path)], )); }; ctx.produced.push_back(ParsTokTree { tok, sr: ctx.sr_to(start) }); Ok(true) } /// Parse one token via any of the systems, if we can /// /// This function never writes lookahead pub async fn sys_lex(ctx: &mut LexCtx<'_>) -> Option>> { for sys in ctx.systems { let mut errors = Vec::new(); if ctx.tail.starts_with(|c| sys.can_lex(c)) { let (source, pos, path) = (ctx.source.clone(), ctx.get_pos(), ctx.path.clone()); let temp_store = ctx.ctx.exprs.derive(); let ctx_lck = &Mutex::new(&mut *ctx); let errors_lck = &Mutex::new(&mut errors); let temp_store_cb = temp_store.clone(); let lx = sys .lex(source, path, pos, |pos| { clone!(temp_store_cb); async move { let mut ctx_g = ctx_lck.lock().await; let mut produced = VecDeque::new(); let mut sub_cx = ctx_g.sub(pos, &mut produced); let lex_res = lex_once(&mut sub_cx).boxed_local().await; let pos1 = sub_cx.get_pos(); ctx_g.set_pos(pos1); match lex_res { Ok(false) => { errors_lck.lock().await.push(mk_errv( is("End of file").await, "Unexpected end of source text", [ctx_g.sr_to(pos)], )); None }, Ok(true) => { let tok = produced.pop_front().unwrap(); Some(api::SubLexed { pos: tok.sr.end(), tree: ctx_g.ser_subtree(tok, temp_store_cb.clone()).await, }) }, Err(e) => { errors_lck.lock().await.push(e); None }, } } }) .await; match lx { Err(e) => { return Some(Err(errors.into_iter().fold(OrcErrv::from_api(e).await, |a, b| a + b))); }, Ok(Some(lexed)) => { ctx.set_pos(lexed.pos); let mut stable_trees = Vec::new(); for tok in lexed.expr { let tree = ctx.des_subtree(tok, sys.id(), temp_store.clone()).await; stable_trees.push(tree.recur(&|tt, r| { if let ParsTok::NewExpr(expr) = tt.tok { return ParsTok::Handle(expr).at(tt.sr); } r(tt) })); } return Some(Ok(stable_trees)); }, Ok(None) => match errors.into_iter().reduce(|a, b| a + b) { Some(errors) => return Some(Err(errors)), None => continue, }, } } } None } pub async fn lex(text: IStr, path: Sym, systems: &[System], ctx: &Ctx) -> OrcRes> { let mut sub_trees = Vec::new(); let mut produced = VecDeque::new(); let mut ctx = LexCtx { source: &text, sub_trees: &mut sub_trees, tail: &text[..], systems, path: &path, ctx, produced: &mut produced, }; ctx.trim(unrep_space); while lex_once(&mut ctx).await? { ctx.trim(unrep_space); } Ok(produced.into()) }