Changes in api and upwards
- Removed out-of-stack error reporting - Revised module system to match previous Orchid system - Errors are now in a Vec everywhere - Implemented atoms and lexer - Started implementation of line parser - Tree is now ephemeral to avoid copying Atoms held inside - Moved numbers into std and the shared parser into base - Started implementation of Commands
This commit is contained in:
@@ -1,44 +1,70 @@
|
||||
use orchid_api::tree::Paren;
|
||||
use orchid_base::intern;
|
||||
use orchid_base::interner::Tok;
|
||||
use orchid_base::location::Pos;
|
||||
use orchid_base::number::parse_num;
|
||||
use std::num::NonZeroU64;
|
||||
|
||||
use crate::extension::System;
|
||||
use crate::results::{mk_err, num_to_err, OwnedResult};
|
||||
use hashbrown::HashMap;
|
||||
use orchid_api::parser::SubLexed;
|
||||
use orchid_api::system::SysId;
|
||||
use orchid_api::tree::{Paren, Token, TokenTree, TreeTicket};
|
||||
use orchid_base::error::OwnedError;
|
||||
use orchid_base::intern;
|
||||
use orchid_base::interner::{deintern, intern, Tok};
|
||||
use orchid_base::location::Pos;
|
||||
use orchid_base::tokens::OwnedPh;
|
||||
|
||||
use crate::extension::{AtomHand, System};
|
||||
use crate::results::{mk_err, OwnedResult};
|
||||
use crate::tree::{OwnedTok, OwnedTokTree};
|
||||
|
||||
pub struct LexCtx<'a> {
|
||||
pub systems: &'a [System],
|
||||
pub source: Tok<String>,
|
||||
pub src: &'a str,
|
||||
pub source: &'a Tok<String>,
|
||||
pub tail: &'a str,
|
||||
pub sub_trees: &'a mut HashMap<TreeTicket, OwnedTokTree>,
|
||||
}
|
||||
impl<'a> LexCtx<'a> {
|
||||
pub fn get_pos(&self) -> u32 { self.source.len() as u32 - self.src.len() as u32 }
|
||||
pub fn push<'b>(&'b mut self, pos: u32) -> LexCtx<'b>
|
||||
where 'a: 'b {
|
||||
LexCtx {
|
||||
source: self.source,
|
||||
tail: &self.source[pos as usize..],
|
||||
systems: self.systems,
|
||||
sub_trees: &mut *self.sub_trees,
|
||||
}
|
||||
}
|
||||
pub fn get_pos(&self) -> u32 { self.end_pos() - self.tail.len() as u32 }
|
||||
pub fn end_pos(&self) -> u32 { self.source.len() as u32 }
|
||||
pub fn set_pos(&mut self, pos: u32) { self.tail = &self.source[pos as usize..] }
|
||||
pub fn push_pos(&mut self, delta: u32) { self.set_pos(self.get_pos() + delta) }
|
||||
pub fn set_tail(&mut self, tail: &'a str) { self.tail = tail }
|
||||
pub fn strip_prefix(&mut self, tgt: &str) -> bool {
|
||||
if let Some(src) = self.src.strip_prefix(tgt) {
|
||||
self.src = src;
|
||||
if let Some(src) = self.tail.strip_prefix(tgt) {
|
||||
self.tail = src;
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
pub fn add_subtree(&mut self, subtree: OwnedTokTree) -> TreeTicket {
|
||||
let next_idx = TreeTicket(NonZeroU64::new(self.sub_trees.len() as u64 + 1).unwrap());
|
||||
self.sub_trees.insert(next_idx, subtree);
|
||||
next_idx
|
||||
}
|
||||
pub fn rm_subtree(&mut self, ticket: TreeTicket) -> OwnedTokTree {
|
||||
self.sub_trees.remove(&ticket).unwrap()
|
||||
}
|
||||
pub fn strip_char(&mut self, tgt: char) -> bool {
|
||||
if let Some(src) = self.src.strip_prefix(tgt) {
|
||||
self.src = src;
|
||||
if let Some(src) = self.tail.strip_prefix(tgt) {
|
||||
self.tail = src;
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
pub fn trim(&mut self, filter: impl Fn(char) -> bool) {
|
||||
self.src = self.src.trim_start_matches(filter);
|
||||
}
|
||||
pub fn trim_ws(&mut self, br: bool) {
|
||||
self.trim(|c| c.is_whitespace() && br || !"\r\n".contains(c))
|
||||
self.tail = self.tail.trim_start_matches(filter);
|
||||
}
|
||||
pub fn trim_ws(&mut self) { self.trim(|c| c.is_whitespace() && !"\r\n".contains(c)) }
|
||||
pub fn get_start_matches(&mut self, filter: impl Fn(char) -> bool) -> &'a str {
|
||||
let rest = self.src.trim_start_matches(filter);
|
||||
let matches = &self.src[..self.src.len() - rest.len()];
|
||||
self.src = rest;
|
||||
let rest = self.tail.trim_start_matches(filter);
|
||||
let matches = &self.tail[..self.tail.len() - rest.len()];
|
||||
self.tail = rest;
|
||||
matches
|
||||
}
|
||||
}
|
||||
@@ -46,73 +72,129 @@ impl<'a> LexCtx<'a> {
|
||||
const PARENS: &[(char, char, Paren)] =
|
||||
&[('(', ')', Paren::Round), ('[', ']', Paren::Square), ('{', '}', Paren::Curly)];
|
||||
|
||||
pub fn lex_tok(ctx: &mut LexCtx, br: bool) -> OwnedResult<OwnedTokTree> {
|
||||
pub fn lex_once(ctx: &mut LexCtx) -> OwnedResult<OwnedTokTree> {
|
||||
let start = ctx.get_pos();
|
||||
assert!(
|
||||
!ctx.src.is_empty() && !ctx.src.starts_with(char::is_whitespace),
|
||||
"Lexing empty string or whitespace to token! Invocations of lex_tok should check for empty string"
|
||||
!ctx.tail.is_empty() && !ctx.tail.starts_with(unrep_space),
|
||||
"Lexing empty string or whitespace to token!\n\
|
||||
Invocations of lex_tok should check for empty string"
|
||||
);
|
||||
for (open, close, paren) in PARENS {
|
||||
let paren_pos = ctx.get_pos();
|
||||
if ctx.strip_char(*open) {
|
||||
let mut body = Vec::new();
|
||||
return loop {
|
||||
ctx.trim_ws(true);
|
||||
if ctx.strip_char(*close) {
|
||||
break Ok(OwnedTokTree {
|
||||
tok: OwnedTok::S(paren.clone(), body),
|
||||
range: paren_pos..ctx.get_pos(),
|
||||
});
|
||||
} else if ctx.src.is_empty() {
|
||||
return Err(vec![mk_err(
|
||||
intern!(str: "unclosed paren"),
|
||||
format!("this {open} has no matching {close}"),
|
||||
[Pos::Range(paren_pos..paren_pos + 1).into()],
|
||||
)]);
|
||||
}
|
||||
body.push(lex_tok(ctx, true)?);
|
||||
};
|
||||
}
|
||||
}
|
||||
if ctx.strip_char('\\') {
|
||||
let bs_pos = ctx.get_pos() - 1;
|
||||
let tok = if ctx.strip_prefix("\r\n") || ctx.strip_prefix("\r") || ctx.strip_prefix("\n") {
|
||||
OwnedTok::BR
|
||||
} else if ctx.strip_prefix("::") {
|
||||
OwnedTok::NS
|
||||
} else if ctx.strip_prefix("--[") {
|
||||
let (cmt, tail) = ctx.tail.split_once("]--").ok_or_else(|| {
|
||||
vec![mk_err(
|
||||
intern!(str: "Unterminated block comment"),
|
||||
"This block comment has no ending ]--",
|
||||
[Pos::Range(start..start + 3).into()],
|
||||
)]
|
||||
})?;
|
||||
ctx.set_tail(tail);
|
||||
OwnedTok::Comment(cmt.to_string())
|
||||
} else if let Some(tail) = ctx.tail.strip_prefix("--").filter(|t| !t.starts_with(op_char)) {
|
||||
let end = tail.find(['\n', '\r']).map_or(tail.len(), |n| n - 1);
|
||||
ctx.push_pos(end as u32);
|
||||
OwnedTok::Comment(tail[2..end].to_string())
|
||||
} else if ctx.strip_char('\\') {
|
||||
let mut arg = Vec::new();
|
||||
loop {
|
||||
ctx.trim_ws(true);
|
||||
if ctx.strip_char('.') {
|
||||
break;
|
||||
} else if ctx.src.is_empty() {
|
||||
ctx.trim_ws();
|
||||
while !ctx.strip_char('.') {
|
||||
if ctx.tail.is_empty() {
|
||||
return Err(vec![mk_err(
|
||||
intern!(str: "Unclosed lambda"),
|
||||
"Lambdae started with \\ should separate arguments from body with .",
|
||||
[Pos::Range(bs_pos..bs_pos + 1).into()],
|
||||
[Pos::Range(start..start + 1).into()],
|
||||
)]);
|
||||
}
|
||||
arg.push(lex_tok(ctx, true)?);
|
||||
arg.push(lex_once(ctx)?);
|
||||
ctx.trim_ws();
|
||||
}
|
||||
OwnedTok::Lambda(arg)
|
||||
} else if let Some((lp, rp, paren)) = PARENS.iter().find(|(lp, ..)| ctx.strip_char(*lp)) {
|
||||
let mut body = Vec::new();
|
||||
return loop {
|
||||
ctx.trim_ws(br);
|
||||
let pos_before_end = ctx.get_pos();
|
||||
if !br && ctx.strip_char('\n')
|
||||
|| PARENS.iter().any(|(_, e, _)| ctx.strip_char(*e))
|
||||
|| ctx.src.is_empty()
|
||||
{
|
||||
break Ok(OwnedTokTree { tok: OwnedTok::Lambda(arg, body), range: bs_pos..pos_before_end });
|
||||
ctx.trim_ws();
|
||||
while !ctx.strip_char(*rp) {
|
||||
if ctx.tail.is_empty() {
|
||||
return Err(vec![mk_err(
|
||||
intern!(str: "unclosed paren"),
|
||||
format!("this {lp} has no matching {rp}"),
|
||||
[Pos::Range(start..start + 1).into()],
|
||||
)]);
|
||||
}
|
||||
body.push(lex_tok(ctx, br)?);
|
||||
};
|
||||
}
|
||||
if ctx.src.starts_with(char::is_numeric) {
|
||||
let num_pos = ctx.get_pos();
|
||||
let num_str = ctx.get_start_matches(|c| c.is_alphanumeric() || "._".contains(c));
|
||||
return Ok(OwnedTokTree {
|
||||
range: num_pos..ctx.get_pos(),
|
||||
tok: match parse_num(num_str) {
|
||||
Err(e) => OwnedTok::Bottom(num_to_err(e, num_pos)),
|
||||
Ok(v) => todo!(),
|
||||
},
|
||||
});
|
||||
}
|
||||
for sys in ctx.systems {}
|
||||
todo!()
|
||||
body.push(lex_once(ctx)?);
|
||||
ctx.trim_ws();
|
||||
}
|
||||
OwnedTok::S(paren.clone(), body)
|
||||
} else {
|
||||
for sys in ctx.systems {
|
||||
let mut errors = Vec::new();
|
||||
if ctx.tail.starts_with(|c| sys.can_lex(c)) {
|
||||
let lexed = sys.lex(ctx.source.clone(), ctx.get_pos(), |pos| {
|
||||
let mut sub_ctx = ctx.push(pos);
|
||||
let ott = lex_once(&mut sub_ctx).inspect_err(|e| errors.extend(e.iter().cloned())).ok()?;
|
||||
Some(SubLexed { pos: sub_ctx.get_pos(), ticket: sub_ctx.add_subtree(ott) })
|
||||
});
|
||||
match lexed {
|
||||
Ok(None) if errors.is_empty() => continue,
|
||||
Ok(None) => return Err(errors),
|
||||
Err(e) => return Err(e.into_iter().map(|e| OwnedError::from_api(&e)).collect()),
|
||||
Ok(Some(lexed)) => {
|
||||
ctx.set_pos(lexed.pos);
|
||||
return Ok(tt_to_owned(&lexed.expr, sys.id(), ctx))
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
if ctx.tail.starts_with(name_start) {
|
||||
OwnedTok::Name(intern(ctx.get_start_matches(name_char)))
|
||||
} else if ctx.tail.starts_with(op_char) {
|
||||
OwnedTok::Name(intern(ctx.get_start_matches(op_char)))
|
||||
} else {
|
||||
return Err(vec![mk_err(
|
||||
intern!(str: "Unrecognized character"),
|
||||
"The following syntax is meaningless.",
|
||||
[Pos::Range(start..start + 1).into()],
|
||||
)]);
|
||||
}
|
||||
};
|
||||
Ok(OwnedTokTree { tok, range: start..ctx.get_pos() })
|
||||
}
|
||||
|
||||
fn name_start(c: char) -> bool { c.is_alphabetic() || c == '_' }
|
||||
fn name_char(c: char) -> bool { name_start(c) || c.is_numeric() }
|
||||
fn op_char(c: char) -> bool { !name_char(c) && !c.is_whitespace() && !"()[]{}:\\".contains(c) }
|
||||
fn unrep_space(c: char) -> bool { c.is_whitespace() && !"\r\n".contains(c) }
|
||||
|
||||
fn tt_to_owned(api: &TokenTree, sys: SysId, ctx: &mut LexCtx<'_>) -> OwnedTokTree {
|
||||
let tok = match &api.token {
|
||||
Token::Atom(atom) => OwnedTok::Atom(AtomHand::from_api(atom.clone().associate(sys))),
|
||||
Token::Ph(ph) => OwnedTok::Ph(OwnedPh::from_api(ph.clone())),
|
||||
Token::Bottom(err) => OwnedTok::Bottom(err.iter().map(OwnedError::from_api).collect()),
|
||||
Token::Lambda(arg) => OwnedTok::Lambda(arg.iter().map(|t| tt_to_owned(t, sys, ctx)).collect()),
|
||||
Token::Name(name) => OwnedTok::Name(deintern(*name)),
|
||||
Token::S(p, b) => OwnedTok::S(p.clone(), b.iter().map(|t| tt_to_owned(t, sys, ctx)).collect()),
|
||||
Token::Slot(id) => return ctx.rm_subtree(*id),
|
||||
Token::BR => OwnedTok::BR,
|
||||
Token::NS => OwnedTok::NS,
|
||||
};
|
||||
OwnedTokTree { range: api.range.clone(), tok }
|
||||
}
|
||||
|
||||
pub fn lex(text: Tok<String>, systems: &[System]) -> OwnedResult<Vec<OwnedTokTree>> {
|
||||
let mut sub_trees = HashMap::new();
|
||||
let mut ctx = LexCtx {
|
||||
source: &text,
|
||||
sub_trees: &mut sub_trees,
|
||||
tail: &text[..],
|
||||
systems,
|
||||
};
|
||||
let mut tokv = Vec::new();
|
||||
ctx.trim(unrep_space);
|
||||
while !ctx.tail.is_empty() {
|
||||
tokv.push(lex_once(&mut ctx)?);
|
||||
ctx.trim(unrep_space);
|
||||
}
|
||||
Ok(tokv)
|
||||
}
|
||||
Reference in New Issue
Block a user