226 lines
7.0 KiB
Rust
226 lines
7.0 KiB
Rust
use std::rc::Rc;
|
|
|
|
use futures::FutureExt;
|
|
use futures::lock::Mutex;
|
|
use orchid_base::clone;
|
|
use orchid_base::error::{OrcErrv, OrcRes, mk_errv};
|
|
use orchid_base::interner::Tok;
|
|
use orchid_base::location::SrcRange;
|
|
use orchid_base::name::Sym;
|
|
use orchid_base::parse::{name_char, name_start, op_char, unrep_space};
|
|
use orchid_base::tokens::PARENS;
|
|
use orchid_base::tree::recur;
|
|
|
|
use crate::api;
|
|
use crate::ctx::Ctx;
|
|
use crate::expr::{Expr, ExprParseCtx};
|
|
use crate::expr_store::ExprStore;
|
|
use crate::parsed::{ParsTok, ParsTokTree, tt_to_api};
|
|
use crate::system::System;
|
|
|
|
pub struct LexCtx<'a> {
|
|
pub systems: &'a [System],
|
|
pub source: &'a Tok<String>,
|
|
pub path: &'a Sym,
|
|
pub tail: &'a str,
|
|
pub sub_trees: &'a mut Vec<Expr>,
|
|
pub ctx: &'a Ctx,
|
|
}
|
|
impl<'a> LexCtx<'a> {
|
|
#[must_use]
|
|
pub fn push<'b>(&'b mut self, pos: u32) -> LexCtx<'b>
|
|
where 'a: 'b {
|
|
LexCtx {
|
|
source: self.source,
|
|
path: self.path,
|
|
tail: &self.source[pos as usize..],
|
|
systems: self.systems,
|
|
sub_trees: &mut *self.sub_trees,
|
|
ctx: self.ctx,
|
|
}
|
|
}
|
|
#[must_use]
|
|
pub fn get_pos(&self) -> u32 { self.end_pos() - self.tail.len() as u32 }
|
|
#[must_use]
|
|
pub fn end_pos(&self) -> u32 { self.source.len() as u32 }
|
|
pub fn set_pos(&mut self, pos: u32) { self.tail = &self.source[pos as usize..] }
|
|
pub fn push_pos(&mut self, delta: u32) { self.set_pos(self.get_pos() + delta) }
|
|
pub fn set_tail(&mut self, tail: &'a str) { self.tail = tail }
|
|
#[must_use]
|
|
pub fn strip_prefix(&mut self, tgt: &str) -> bool {
|
|
if let Some(src) = self.tail.strip_prefix(tgt) {
|
|
self.tail = src;
|
|
return true;
|
|
}
|
|
false
|
|
}
|
|
#[must_use]
|
|
pub async fn ser_subtree(&mut self, subtree: ParsTokTree, exprs: ExprStore) -> api::TokenTree {
|
|
tt_to_api(&mut { exprs }, subtree).await
|
|
}
|
|
#[must_use]
|
|
pub async fn des_subtree(&mut self, tree: &api::TokenTree, exprs: ExprStore) -> ParsTokTree {
|
|
ParsTokTree::from_api(
|
|
tree,
|
|
&mut { exprs },
|
|
&mut ExprParseCtx { ctx: self.ctx, exprs: &self.ctx.common_exprs },
|
|
self.path,
|
|
&self.ctx.i,
|
|
)
|
|
.await
|
|
}
|
|
#[must_use]
|
|
pub fn strip_char(&mut self, tgt: char) -> bool {
|
|
if let Some(src) = self.tail.strip_prefix(tgt) {
|
|
self.tail = src;
|
|
return true;
|
|
}
|
|
false
|
|
}
|
|
pub fn trim(&mut self, filter: impl Fn(char) -> bool) {
|
|
self.tail = self.tail.trim_start_matches(filter);
|
|
}
|
|
pub fn trim_ws(&mut self) { self.trim(|c| c.is_whitespace() && !"\r\n".contains(c)) }
|
|
#[must_use]
|
|
pub fn get_start_matches(&mut self, filter: impl Fn(char) -> bool) -> &'a str {
|
|
let rest = self.tail.trim_start_matches(filter);
|
|
let matches = &self.tail[..self.tail.len() - rest.len()];
|
|
self.tail = rest;
|
|
matches
|
|
}
|
|
}
|
|
|
|
pub async fn lex_once(ctx: &mut LexCtx<'_>) -> OrcRes<ParsTokTree> {
|
|
let start = ctx.get_pos();
|
|
assert!(
|
|
!ctx.tail.is_empty() && !ctx.tail.starts_with(unrep_space),
|
|
"Lexing empty string or whitespace to token!\n\
|
|
Invocations of lex_tok should check for empty string"
|
|
);
|
|
let tok = if ctx.strip_prefix("\r\n") || ctx.strip_prefix("\r") || ctx.strip_prefix("\n") {
|
|
ParsTok::BR
|
|
} else if let Some(tail) = (ctx.tail.starts_with(name_start).then_some(ctx.tail))
|
|
.and_then(|t| t.trim_start_matches(name_char).strip_prefix("::"))
|
|
{
|
|
let name = &ctx.tail[..ctx.tail.len() - tail.len() - "::".len()];
|
|
ctx.set_tail(tail);
|
|
let body = lex_once(ctx).boxed_local().await?;
|
|
ParsTok::NS(ctx.ctx.i.i(name).await, Box::new(body))
|
|
} else if ctx.strip_prefix("--[") {
|
|
let Some((cmt, tail)) = ctx.tail.split_once("]--") else {
|
|
return Err(mk_errv(
|
|
ctx.ctx.i.i("Unterminated block comment").await,
|
|
"This block comment has no ending ]--",
|
|
[SrcRange::new(start..start + 3, ctx.path)],
|
|
));
|
|
};
|
|
ctx.set_tail(tail);
|
|
ParsTok::Comment(Rc::new(cmt.to_string()))
|
|
} else if let Some(tail) = ctx.tail.strip_prefix("--").filter(|t| !t.starts_with(op_char)) {
|
|
let end = tail.find(['\n', '\r']).map_or(tail.len(), |n| n - 1);
|
|
ctx.push_pos(end as u32);
|
|
ParsTok::Comment(Rc::new(tail[2..end].to_string()))
|
|
} else if let Some(tail) = ctx.tail.strip_prefix('\\').filter(|t| t.starts_with(name_start)) {
|
|
// fanciness like \$placeh in templates is resolved in the macro engine.
|
|
ctx.set_tail(tail);
|
|
let arg = lex_once(ctx).boxed_local().await?;
|
|
ctx.trim_ws();
|
|
ParsTok::LambdaHead(Box::new(arg))
|
|
} else if let Some((lp, rp, paren)) = PARENS.iter().find(|(lp, ..)| ctx.strip_char(*lp)) {
|
|
let mut body = Vec::new();
|
|
ctx.trim_ws();
|
|
while !ctx.strip_char(*rp) {
|
|
if ctx.tail.is_empty() {
|
|
return Err(mk_errv(
|
|
ctx.ctx.i.i("unclosed paren").await,
|
|
format!("this {lp} has no matching {rp}"),
|
|
[SrcRange::new(start..start + 1, ctx.path)],
|
|
));
|
|
}
|
|
body.push(lex_once(ctx).boxed_local().await?);
|
|
ctx.trim_ws();
|
|
}
|
|
ParsTok::S(*paren, body)
|
|
} else {
|
|
for sys in ctx.systems {
|
|
let mut errors = Vec::new();
|
|
if ctx.tail.starts_with(|c| sys.can_lex(c)) {
|
|
let (source, pos, path) = (ctx.source.clone(), ctx.get_pos(), ctx.path.clone());
|
|
let ctx_lck = &Mutex::new(&mut *ctx);
|
|
let errors_lck = &Mutex::new(&mut errors);
|
|
let temp_store = sys.ext().exprs().derive(true);
|
|
let temp_store_cb = temp_store.clone();
|
|
let lx = sys
|
|
.lex(source, path, pos, |pos| {
|
|
clone!(temp_store_cb);
|
|
async move {
|
|
let mut ctx_g = ctx_lck.lock().await;
|
|
match lex_once(&mut ctx_g.push(pos)).boxed_local().await {
|
|
Ok(t) => Some(api::SubLexed {
|
|
pos: t.sr.end(),
|
|
tree: ctx_g.ser_subtree(t, temp_store_cb.clone()).await,
|
|
}),
|
|
Err(e) => {
|
|
errors_lck.lock().await.push(e);
|
|
None
|
|
},
|
|
}
|
|
}
|
|
})
|
|
.await;
|
|
match lx {
|
|
Err(e) =>
|
|
return Err(
|
|
errors.into_iter().fold(OrcErrv::from_api(&e, &ctx.ctx.i).await, |a, b| a + b),
|
|
),
|
|
Ok(Some(lexed)) => {
|
|
ctx.set_pos(lexed.pos);
|
|
let lexed_tree = ctx.des_subtree(&lexed.expr, temp_store).await;
|
|
let stable_tree = recur(lexed_tree, &|tt, r| {
|
|
if let ParsTok::NewExpr(expr) = tt.tok {
|
|
return ParsTok::Handle(expr).at(tt.sr);
|
|
}
|
|
r(tt)
|
|
});
|
|
return Ok(stable_tree);
|
|
},
|
|
Ok(None) => match errors.into_iter().reduce(|a, b| a + b) {
|
|
Some(errors) => return Err(errors),
|
|
None => continue,
|
|
},
|
|
}
|
|
}
|
|
}
|
|
if ctx.tail.starts_with(name_start) {
|
|
ParsTok::Name(ctx.ctx.i.i(ctx.get_start_matches(name_char)).await)
|
|
} else if ctx.tail.starts_with(op_char) {
|
|
ParsTok::Name(ctx.ctx.i.i(ctx.get_start_matches(op_char)).await)
|
|
} else {
|
|
return Err(mk_errv(
|
|
ctx.ctx.i.i("Unrecognized character").await,
|
|
"The following syntax is meaningless.",
|
|
[SrcRange::new(start..start + 1, ctx.path)],
|
|
));
|
|
}
|
|
};
|
|
Ok(ParsTokTree { tok, sr: SrcRange::new(start..ctx.get_pos(), ctx.path) })
|
|
}
|
|
|
|
pub async fn lex(
|
|
text: Tok<String>,
|
|
path: Sym,
|
|
systems: &[System],
|
|
ctx: &Ctx,
|
|
) -> OrcRes<Vec<ParsTokTree>> {
|
|
let mut sub_trees = Vec::new();
|
|
let mut ctx =
|
|
LexCtx { source: &text, sub_trees: &mut sub_trees, tail: &text[..], systems, path: &path, ctx };
|
|
let mut tokv = Vec::new();
|
|
ctx.trim(unrep_space);
|
|
while !ctx.tail.is_empty() {
|
|
tokv.push(lex_once(&mut ctx).await?);
|
|
ctx.trim(unrep_space);
|
|
}
|
|
Ok(tokv)
|
|
}
|