Files
orchid/orchid-host/src/lex.rs
Lawrence Bethlenfalvy 9b4c7fa7d7
Some checks failed
Rust / build (push) Has been cancelled
partway through fixes, macro system needs resdesign
2026-04-08 18:02:20 +02:00

310 lines
9.5 KiB
Rust

use std::collections::VecDeque;
use std::ops::Range;
use futures::FutureExt;
use futures::lock::Mutex;
use orchid_base::{
IStr, OrcErrv, OrcRes, PARENS, SrcRange, Sym, clone, is, mk_errv, name_char, name_start, op_char,
report, unrep_space,
};
use crate::api;
use crate::ctx::Ctx;
use crate::expr::{Expr, ExprFromApiCtx};
use crate::expr_store::ExprStore;
use crate::parsed::{ParsTok, ParsTokTree, tt_to_api};
use crate::system::System;
pub struct LexCtx<'a> {
pub systems: &'a [System],
pub source: &'a IStr,
pub path: &'a Sym,
pub tail: &'a str,
pub sub_trees: &'a mut Vec<Expr>,
pub ctx: &'a Ctx,
pub produced: &'a mut VecDeque<ParsTokTree>,
}
impl<'a> LexCtx<'a> {
#[must_use]
pub fn sub<'b>(&'b mut self, pos: u32, produced: &'b mut VecDeque<ParsTokTree>) -> LexCtx<'b>
where 'a: 'b {
LexCtx {
source: self.source,
path: self.path,
tail: &self.source[pos as usize..],
systems: self.systems,
sub_trees: &mut *self.sub_trees,
ctx: self.ctx,
produced,
}
}
#[must_use]
pub fn get_pos(&self) -> u32 { self.end_pos() - self.tail.len() as u32 }
#[must_use]
pub fn end_pos(&self) -> u32 { self.source.len() as u32 }
pub fn set_pos(&mut self, pos: u32) { self.tail = &self.source[pos as usize..] }
pub fn push_pos(&mut self, delta: u32) { self.set_pos(self.get_pos() + delta) }
pub fn set_tail(&mut self, tail: &'a str) { self.tail = tail }
pub fn pos_from(&self, tail: &'a str) -> u32 { (self.source.len() - tail.len()) as u32 }
#[must_use]
pub fn strip_prefix(&mut self, tgt: &str) -> bool {
if let Some(src) = self.tail.strip_prefix(tgt) {
self.tail = src;
return true;
}
false
}
#[must_use]
pub async fn ser_subtree(&mut self, subtree: ParsTokTree, exprs: ExprStore) -> api::TokenTree {
tt_to_api(&mut { exprs }, subtree).await
}
#[must_use]
pub async fn des_subtree(
&mut self,
tree: api::TokenTree,
sys: api::SysId,
exprs: ExprStore,
) -> ParsTokTree {
let mut cx = ExprFromApiCtx { ctx: self.ctx.clone(), sys };
ParsTokTree::from_api(tree, &mut { exprs }, &mut cx, self.path).await
}
#[must_use]
pub fn strip_char(&mut self, tgt: char) -> bool {
if let Some(src) = self.tail.strip_prefix(tgt) {
self.tail = src;
return true;
}
false
}
pub fn trim(&mut self, filter: impl Fn(char) -> bool) {
self.tail = self.tail.trim_start_matches(filter);
}
pub fn trim_ws(&mut self) { self.trim(|c| c.is_whitespace() && !"\r\n".contains(c)) }
#[must_use]
pub fn get_start_matches(&mut self, filter: impl Fn(char) -> bool) -> &'a str {
let rest = self.tail.trim_start_matches(filter);
let matches = &self.tail[..self.tail.len() - rest.len()];
self.tail = rest;
matches
}
pub fn pop_char(&mut self) -> Option<char> {
let mut chars = self.tail.chars();
let ret = chars.next()?;
self.tail = chars.as_str();
Some(ret)
}
pub fn sr_to(&self, start: u32) -> SrcRange { self.sr(start..self.get_pos()) }
pub fn sr(&self, range: Range<u32>) -> SrcRange { SrcRange::new(range, self.path) }
}
pub async fn lex_once(ctx: &mut LexCtx<'_>) -> OrcRes<bool> {
ctx.trim(unrep_space);
if ctx.tail.is_empty() {
return Ok(false);
}
let start = ctx.get_pos();
let tok = if ctx.strip_prefix("\r\n") || ctx.strip_prefix("\r") || ctx.strip_prefix("\n") {
ParsTok::BR
} else if let Some(tail) = (ctx.tail.starts_with(name_start).then_some(ctx.tail))
.and_then(|t| t.trim_start_matches(name_char).strip_prefix("::"))
{
let name = &ctx.tail[..ctx.tail.len() - tail.len() - "::".len()];
let mut produced = VecDeque::new();
let mut sub_cx = ctx.sub(ctx.pos_from(tail), &mut produced);
if !lex_once(&mut sub_cx).boxed_local().await? {
return Err(mk_errv(
is("Unexpected end of source text").await,
":: cannot be the last token",
[SrcRange::new(start..ctx.get_pos(), ctx.path)],
));
}
let pos = sub_cx.get_pos();
ctx.set_pos(pos);
let body = produced.pop_front().expect("lex_once returned true");
ctx.produced.extend(produced.into_iter());
ParsTok::NS(is(name).await, Box::new(body))
} else if ctx.strip_prefix("--[") {
let Some((cmt, tail)) = ctx.tail.split_once("]--") else {
return Err(mk_errv(
is("Unterminated block comment").await,
"This block comment has no ending ]--",
[SrcRange::new(start..start + 3, ctx.path)],
));
};
ctx.set_tail(tail);
ParsTok::Comment(is(cmt).await)
} else if let Some(tail) = ctx.tail.strip_prefix("--").filter(|t| !t.starts_with(op_char)) {
let end = tail.find(['\n', '\r']).map_or(tail.len(), |n| n - 1);
ctx.push_pos(end as u32);
ParsTok::Comment(is(&tail[2..end]).await)
} else if let Some(tail) = ctx.tail.strip_prefix('\\').filter(|t| t.starts_with(name_start)) {
// fanciness like \$placeh in templates is resolved in the macro engine.
let start = ctx.get_pos();
let mut produced = VecDeque::new();
let mut sub_cx = ctx.sub(ctx.pos_from(tail), &mut produced);
if !lex_once(&mut sub_cx).boxed_local().await? {
return Err(mk_errv(
is("Unexpected end of file").await,
"Expected a lambda argument and body",
[SrcRange::new(start..ctx.get_pos(), ctx.path)],
));
}
let pos = sub_cx.get_pos();
ctx.set_pos(pos);
let arg = produced.pop_front().expect("lex_once returned true");
ctx.produced.extend(produced);
ctx.trim_ws();
ParsTok::LambdaHead(Box::new(arg))
} else if let Some((lp, rp, paren)) = PARENS.iter().find(|(lp, ..)| ctx.strip_char(*lp)) {
let mut body = VecDeque::new();
ctx.trim_ws();
while !ctx.strip_char(*rp) {
let mut sub_cx = ctx.sub(ctx.get_pos(), &mut body);
if !lex_once(&mut sub_cx).boxed_local().await? {
return Err(mk_errv(
is("unclosed paren").await,
format!("this {lp} has no matching {rp}"),
[SrcRange::new(start..start + 1, ctx.path)],
));
}
let pos = sub_cx.get_pos();
ctx.set_pos(pos);
ctx.trim_ws();
}
ParsTok::S(*paren, body.into_iter().collect())
} else if let Some(res) = sys_lex(ctx).await {
let token = res?;
ctx.produced.extend(token);
return Ok(true);
} else if ctx.tail.starts_with(name_start) {
ParsTok::Name(is(ctx.get_start_matches(name_char)).await)
} else if ctx.tail.starts_with(op_char) {
let whole_tail = ctx.tail;
ctx.pop_char().expect("The above check would have failed");
let mut tail_after_op = ctx.tail;
let mut lookahead = Vec::new();
while !ctx.tail.is_empty() && ctx.tail.starts_with(op_char) {
match sys_lex(ctx).await {
None => {
ctx.pop_char();
tail_after_op = ctx.tail;
},
Some(sys_res) => {
match sys_res {
Err(e) => report(e),
Ok(tokv) => lookahead = tokv,
}
break;
},
}
}
let op_str = &whole_tail[0..whole_tail.len() - tail_after_op.len()];
ctx.produced.push_back(ParsTok::Name(is(op_str).await).at(ctx.sr_to(start)));
ctx.produced.extend(lookahead);
return Ok(true);
} else {
return Err(mk_errv(
is("Unrecognized character").await,
"The following syntax is meaningless.",
[SrcRange::new(start..start + 1, ctx.path)],
));
};
ctx.produced.push_back(ParsTokTree { tok, sr: ctx.sr_to(start) });
Ok(true)
}
/// Parse one token via any of the systems, if we can
///
/// This function never writes lookahead
pub async fn sys_lex(ctx: &mut LexCtx<'_>) -> Option<OrcRes<Vec<ParsTokTree>>> {
for sys in ctx.systems {
let mut errors = Vec::new();
if ctx.tail.starts_with(|c| sys.can_lex(c)) {
let (source, pos, path) = (ctx.source.clone(), ctx.get_pos(), ctx.path.clone());
let temp_store = ctx.ctx.exprs.derive();
let ctx_lck = &Mutex::new(&mut *ctx);
let errors_lck = &Mutex::new(&mut errors);
let temp_store_cb = temp_store.clone();
let lx = sys
.lex(source, path, pos, |pos| {
clone!(temp_store_cb);
async move {
let mut ctx_g = ctx_lck.lock().await;
let mut produced = VecDeque::new();
let mut sub_cx = ctx_g.sub(pos, &mut produced);
let lex_res = lex_once(&mut sub_cx).boxed_local().await;
let pos1 = sub_cx.get_pos();
ctx_g.set_pos(pos1);
match lex_res {
Ok(false) => {
errors_lck.lock().await.push(mk_errv(
is("End of file").await,
"Unexpected end of source text",
[ctx_g.sr_to(pos)],
));
None
},
Ok(true) => {
let tok = produced.pop_front().unwrap();
Some(api::SubLexed {
pos: tok.sr.end(),
tree: ctx_g.ser_subtree(tok, temp_store_cb.clone()).await,
})
},
Err(e) => {
errors_lck.lock().await.push(e);
None
},
}
}
})
.await;
match lx {
Err(e) => {
return Some(Err(errors.into_iter().fold(OrcErrv::from_api(e).await, |a, b| a + b)));
},
Ok(Some(lexed)) => {
ctx.set_pos(lexed.pos);
let mut stable_trees = Vec::new();
for tok in lexed.expr {
let tree = ctx.des_subtree(tok, sys.id(), temp_store.clone()).await;
stable_trees.push(tree.recur(&|tt, r| {
if let ParsTok::NewExpr(expr) = tt.tok {
return ParsTok::Handle(expr).at(tt.sr);
}
r(tt)
}));
}
return Some(Ok(stable_trees));
},
Ok(None) => match errors.into_iter().reduce(|a, b| a + b) {
Some(errors) => return Some(Err(errors)),
None => continue,
},
}
}
}
None
}
pub async fn lex(text: IStr, path: Sym, systems: &[System], ctx: &Ctx) -> OrcRes<Vec<ParsTokTree>> {
let mut sub_trees = Vec::new();
let mut produced = VecDeque::new();
let mut ctx = LexCtx {
source: &text,
sub_trees: &mut sub_trees,
tail: &text[..],
systems,
path: &path,
ctx,
produced: &mut produced,
};
ctx.trim(unrep_space);
while lex_once(&mut ctx).await? {
ctx.trim(unrep_space);
}
Ok(produced.into())
}