From 1a25f5247144a1c47a21cefda21a75e38e9ebae5 Mon Sep 17 00:00:00 2001 From: Lawrence Bethlenfalvy Date: Wed, 30 Apr 2025 22:13:09 +0200 Subject: [PATCH] Commit pending merge --- orchid-api/src/lexer.rs | 4 +- orchid-api/src/location.rs | 2 - orchid-api/src/parser.rs | 4 ++ orchid-base/src/location.rs | 22 +++++------ orchid-base/src/number.rs | 12 ++++-- orchid-base/src/parse.rs | 55 +++++++++++++++----------- orchid-base/src/tree.rs | 53 ++++++++++++++----------- orchid-extension/src/entrypoint.rs | 10 +++-- orchid-extension/src/lexer.rs | 13 +++--- orchid-extension/src/tree.rs | 6 +-- orchid-host/src/dealias.rs | 14 +++---- orchid-host/src/extension.rs | 7 +++- orchid-host/src/lex.rs | 30 +++++++++----- orchid-host/src/parse.rs | 36 ++++++++--------- orchid-host/src/parsed.rs | 22 ++++++----- orchid-host/src/tree.rs | 29 ++++++++++++++ orchid-std/src/macros/rule/build.rs | 4 +- orchid-std/src/std/string/str_lexer.rs | 4 +- 18 files changed, 198 insertions(+), 129 deletions(-) diff --git a/orchid-api/src/lexer.rs b/orchid-api/src/lexer.rs index 8a7647c..7e00bbd 100644 --- a/orchid-api/src/lexer.rs +++ b/orchid-api/src/lexer.rs @@ -3,7 +3,7 @@ use std::ops::RangeInclusive; use orchid_api_derive::{Coding, Hierarchy}; use orchid_api_traits::Request; -use crate::{ExtHostReq, HostExtReq, OrcResult, ParsId, SysId, TStr, TokenTree}; +use crate::{ExtHostReq, HostExtReq, OrcResult, ParsId, SysId, TStr, TStrv, TokenTree}; /// - All ranges contain at least one character /// - All ranges are in increasing characeter order @@ -18,6 +18,8 @@ pub struct LexExpr { pub id: ParsId, pub text: TStr, pub pos: u32, + /// Source root module path + pub src: TStrv, } impl Request for LexExpr { type Response = Option>; diff --git a/orchid-api/src/location.rs b/orchid-api/src/location.rs index 55cd13d..361368b 100644 --- a/orchid-api/src/location.rs +++ b/orchid-api/src/location.rs @@ -17,8 +17,6 @@ pub enum Location { Gen(CodeGenInfo), /// Range and file SourceRange(SourceRange), - /// Range only, file implied. Most notably used by parsers - Range(Range), } #[derive(Clone, Debug, Coding)] diff --git a/orchid-api/src/parser.rs b/orchid-api/src/parser.rs index 7102d65..3d0781b 100644 --- a/orchid-api/src/parser.rs +++ b/orchid-api/src/parser.rs @@ -13,7 +13,11 @@ pub struct ParsId(pub NonZeroU64); #[extends(HostExtReq)] pub struct ParseLine { pub sys: SysId, + /// The immediately enclosing module path pub module: TStrv, + /// The root module path for the snipppet of source code, prefix of + /// [ParseLine#module] + pub src: TStrv, pub comments: Vec, pub exported: bool, pub line: Vec, diff --git a/orchid-base/src/location.rs b/orchid-base/src/location.rs index 16b002e..82143c4 100644 --- a/orchid-base/src/location.rs +++ b/orchid-base/src/location.rs @@ -23,15 +23,13 @@ pub enum Pos { Inherit, Gen(CodeGenInfo), /// Range and file - SourceRange(SourceRange), - /// Range only, file implied. Most notably used by parsers - Range(Range), + SrcRange(SrcRange), } impl Pos { pub fn pretty_print(&self, get_src: &mut impl GetSrc) -> String { match self { Self::Gen(g) => g.to_string(), - Self::SourceRange(sr) => sr.pretty_print(&get_src(&sr.path)), + Self::SrcRange(sr) => sr.pretty_print(&get_src(&sr.path)), // Can't pretty print partial and meta-location other => format!("{other:?}"), } @@ -39,17 +37,17 @@ impl Pos { pub async fn from_api(api: &api::Location, i: &Interner) -> Self { match_mapping!(api, api::Location => Pos { None, Inherit, SlotTarget, - Range(r.clone()), Gen(cgi => CodeGenInfo::from_api(cgi, i).await), - SourceRange(sr => SourceRange::from_api(sr, i).await) + } { + api::Location::SourceRange(sr) => Self::SrcRange(SrcRange::from_api(sr, i).await) }) } pub fn to_api(&self) -> api::Location { match_mapping!(self, Pos => api::Location { None, Inherit, SlotTarget, - Range(r.clone()), Gen(cgi.to_api()), - SourceRange(sr.to_api()), + } { + Self::SrcRange(sr) => api::Location::SourceRange(sr.to_api()), }) } } @@ -57,12 +55,12 @@ impl Pos { /// Exact source code location. Includes where the code was loaded from, what /// the original source code was, and a byte range. #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct SourceRange { +pub struct SrcRange { pub(crate) path: Sym, pub(crate) range: Range, } -impl SourceRange { - pub fn new(range: &Range, path: &Sym) -> Self { +impl SrcRange { + pub fn new(range: Range, path: &Sym) -> Self { Self { range: range.clone(), path: path.clone() } } /// Create a dud [SourceRange] for testing. Its value is unspecified and @@ -77,7 +75,7 @@ impl SourceRange { /// 0-based index of last byte + 1 pub fn end(&self) -> u32 { self.range.end } /// Syntactic location - pub fn pos(&self) -> Pos { Pos::SourceRange(self.clone()) } + pub fn pos(&self) -> Pos { Pos::SrcRange(self.clone()) } /// Transform the numeric byte range pub fn map_range(&self, map: impl FnOnce(Range) -> Range) -> Self { Self { range: map(self.range()), path: self.path() } diff --git a/orchid-base/src/number.rs b/orchid-base/src/number.rs index a902a79..7792342 100644 --- a/orchid-base/src/number.rs +++ b/orchid-base/src/number.rs @@ -5,7 +5,8 @@ use ordered_float::NotNan; use crate::error::{OrcErr, mk_err}; use crate::interner::Interner; -use crate::location::Pos; +use crate::location::SrcRange; +use crate::name::Sym; /// A number, either floating point or unsigned int, parsed by Orchid. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] @@ -54,7 +55,12 @@ pub struct NumError { pub kind: NumErrorKind, } -pub async fn num_to_err(NumError { kind, range }: NumError, offset: u32, i: &Interner) -> OrcErr { +pub async fn num_to_err( + NumError { kind, range }: NumError, + offset: u32, + source: &Sym, + i: &Interner, +) -> OrcErr { mk_err( i.i("Failed to parse number").await, match kind { @@ -62,7 +68,7 @@ pub async fn num_to_err(NumError { kind, range }: NumError, offset: u32, i: &Int NumErrorKind::InvalidDigit => "non-digit character encountered", NumErrorKind::Overflow => "The number being described is too large or too accurate", }, - [Pos::Range(offset + range.start as u32..offset + range.end as u32).into()], + [SrcRange::new(offset + range.start as u32..offset + range.end as u32, source).pos().into()], ) } diff --git a/orchid-base/src/parse.rs b/orchid-base/src/parse.rs index a07fa61..c41c83c 100644 --- a/orchid-base/src/parse.rs +++ b/orchid-base/src/parse.rs @@ -1,6 +1,6 @@ use std::fmt::{self, Display}; use std::iter; -use std::ops::{Deref, Range}; +use std::ops::Deref; use futures::FutureExt; use futures::future::join_all; @@ -10,9 +10,9 @@ use crate::api; use crate::error::{OrcRes, Reporter, mk_err, mk_errv}; use crate::format::fmt; use crate::interner::{Interner, Tok}; -use crate::location::Pos; -use crate::name::VPath; -use crate::tree::{ExprRepr, ExtraTok, Paren, TokTree, Token}; +use crate::location::{Pos, SrcRange}; +use crate::name::{VName, VPath}; +use crate::tree::{ExprRepr, ExtraTok, Paren, TokTree, Token, ttv_range}; pub trait ParseCtx { fn i(&self) -> &Interner; @@ -58,10 +58,7 @@ where pub fn get(self, idx: u32) -> Option<&'a TokTree> { self.cur.get(idx as usize) } pub fn len(self) -> u32 { self.cur.len() as u32 } pub fn prev(self) -> &'a TokTree { self.prev } - pub fn pos(self) -> Range { - (self.cur.first().map(|f| f.range.start..self.cur.last().unwrap().range.end)) - .unwrap_or(self.prev.range.clone()) - } + pub fn sr(self) -> SrcRange { ttv_range(self.cur).unwrap_or_else(|| self.prev.sr.clone()) } pub fn pop_front(self) -> Option<(&'a TokTree, Self)> { self.cur.first().map(|r| (r, self.split_at(1).1)) } @@ -107,7 +104,7 @@ pub fn strip_fluff(tt: &TokTree) -> Option Token::S(*p, b.iter().filter_map(strip_fluff).collect()), t => t.clone(), }; - Some(TokTree { tok, range: tt.range.clone() }) + Some(TokTree { tok, sr: tt.sr.clone() }) } #[derive(Clone, Debug)] @@ -116,6 +113,7 @@ pub struct Comment { pub range: Range, } impl Comment { + // XXX: which of these four are actually used? pub async fn from_api(c: &api::Comment, i: &Interner) -> Self { Self { text: i.ex(c.text).await, range: c.range.clone() } } @@ -170,10 +168,11 @@ pub async fn try_pop_no_fluff<'a, A: ExprRepr, X: ExtraTok>( ) -> ParseRes<'a, &'a TokTree, A, X> { match snip.skip_fluff().pop_front() { Some((output, tail)) => Ok(Parsed { output, tail }), - None => Err(mk_errv(ctx.i().i("Unexpected end").await, "Pattern ends abruptly", [Pos::Range( - snip.pos(), - ) - .into()])), + None => Err(mk_errv( + ctx.i().i("Unexpected end").await, + "Line ends abruptly; more tokens were expected", + [snip.sr().pos().into()], + )), } } @@ -185,7 +184,7 @@ pub async fn expect_end( Some(surplus) => Err(mk_errv( ctx.i().i("Extra code after end of line").await, "Code found after the end of the line", - [Pos::Range(surplus.range.clone()).into()], + [surplus.sr.pos().into()], )), None => Ok(()), } @@ -202,7 +201,7 @@ pub async fn expect_tok<'a, A: ExprRepr, X: ExtraTok>( t => Err(mk_errv( ctx.i().i("Expected specific keyword").await, format!("Expected {tok} but found {:?}", fmt(t, ctx.i()).await), - [Pos::Range(head.range.clone()).into()], + [head.sr.pos().into()], )), } } @@ -217,12 +216,12 @@ pub type ParseRes<'a, T, H, X> = OrcRes>; pub async fn parse_multiname<'a, A: ExprRepr, X: ExtraTok>( ctx: &impl ParseCtx, tail: Snippet<'a, A, X>, -) -> ParseRes<'a, Vec<(Import, Pos)>, A, X> { +) -> ParseRes<'a, Vec, A, X> { let Some((tt, tail)) = tail.skip_fluff().pop_front() else { return Err(mk_errv( ctx.i().i("Expected token").await, "Expected a name, a parenthesized list of names, or a globstar.", - [Pos::Range(tail.pos()).into()], + [tail.sr().pos().into()], )); }; let ret = rec(tt, ctx).await; @@ -230,8 +229,8 @@ pub async fn parse_multiname<'a, A: ExprRepr, X: ExtraTok>( pub async fn rec( tt: &TokTree, ctx: &impl ParseCtx, - ) -> OrcRes>, Option>, Pos)>> { - let ttpos = Pos::Range(tt.range.clone()); + ) -> OrcRes>, Option>, SrcRange)>> { + let ttpos = tt.sr.pos(); match &tt.tok { Token::NS(ns, body) => { if !ns.starts_with(name_start) { @@ -247,7 +246,7 @@ pub async fn parse_multiname<'a, A: ExprRepr, X: ExtraTok>( Token::Name(ntok) => { let n = ntok; let nopt = Some(n.clone()); - Ok(vec![(vec![], nopt, Pos::Range(tt.range.clone()))]) + Ok(vec![(vec![], nopt, tt.sr.clone())]) }, Token::S(Paren::Round, b) => { let mut o = Vec::new(); @@ -272,19 +271,29 @@ pub async fn parse_multiname<'a, A: ExprRepr, X: ExtraTok>( } ret.map(|output| { let output = (output.into_iter()) - .map(|(p, name, pos)| (Import { path: VPath::new(p.into_iter().rev()), name }, pos)) + .map(|(p, name, sr)| Import { path: VPath::new(p.into_iter().rev()), name, sr }) .collect_vec(); Parsed { output, tail } }) } -/// A compound name, possibly ending with a globstar +/// A compound name, possibly ending with a globstar. It cannot be just a +/// globstar; either the name has to be known or the path has to be non-empty. #[derive(Debug, Clone)] pub struct Import { pub path: VPath, pub name: Option>, + pub sr: SrcRange, +} +impl Import { + /// Most specific concrete path + pub fn mspath(self) -> VName { + match self.name { + Some(n) => self.path.name_with_suffix(n), + None => self.path.into_name().expect("Import cannot be empty"), + } + } } - impl Display for Import { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}::{}", self.path.iter().join("::"), self.name.as_ref().map_or("*", |t| t.as_str())) diff --git a/orchid-base/src/tree.rs b/orchid-base/src/tree.rs index 4f72992..ea1a4f0 100644 --- a/orchid-base/src/tree.rs +++ b/orchid-base/src/tree.rs @@ -2,7 +2,6 @@ use std::borrow::Borrow; use std::fmt::{self, Debug, Display}; use std::future::Future; use std::marker::PhantomData; -use std::ops::Range; use std::rc::Rc; use async_stream::stream; @@ -16,7 +15,8 @@ use trait_set::trait_set; use crate::error::OrcErrv; use crate::format::{FmtCtx, FmtUnit, Format, Variants}; use crate::interner::{Interner, Tok}; -use crate::location::Pos; +use crate::location::{Pos, SrcRange}; +use crate::name::Sym; use crate::parse::Snippet; use crate::{api, match_mapping, tl_cache}; @@ -26,7 +26,7 @@ pub trait TokenVariant: Format + Clone + fmt:: fn from_api( api: &ApiEquiv, ctx: &mut Self::FromApiCtx<'_>, - pos: Pos, + pos: SrcRange, i: &Interner, ) -> impl Future; fn into_api(self, ctx: &mut Self::ToApiCtx<'_>) -> impl Future; @@ -34,7 +34,7 @@ pub trait TokenVariant: Format + Clone + fmt:: impl TokenVariant for Never { type FromApiCtx<'a> = (); type ToApiCtx<'a> = (); - async fn from_api(_: &T, _: &mut Self::FromApiCtx<'_>, _: Pos, _: &Interner) -> Self { + async fn from_api(_: &T, _: &mut Self::FromApiCtx<'_>, _: SrcRange, _: &Interner) -> Self { panic!("Cannot deserialize Never") } async fn into_api(self, _: &mut Self::ToApiCtx<'_>) -> T { match self {} } @@ -55,7 +55,7 @@ pub fn recur( tt: TokTree, f: &impl Fn(TokTree, &dyn RecurCB) -> TokTree, ) -> TokTree { - f(tt, &|TokTree { range, tok }| { + f(tt, &|TokTree { sr: range, tok }| { let tok = match tok { tok @ (Token::BR | Token::Bottom(_) | Token::Comment(_) | Token::Name(_)) => tok, tok @ (Token::Handle(_) | Token::NewExpr(_)) => tok, @@ -64,7 +64,7 @@ pub fn recur( Token::LambdaHead(arg.into_iter().map(|tt| recur(tt, f)).collect_vec()), Token::S(p, b) => Token::S(p, b.into_iter().map(|tt| recur(tt, f)).collect_vec()), }; - TokTree { range, tok } + TokTree { sr: range, tok } }) } @@ -94,28 +94,33 @@ impl Display for TokHandle<'_> { #[derive(Clone, Debug)] pub struct TokTree { pub tok: Token, - pub range: Range, + /// The protocol has a Range because these are always transmitted in the + /// context of a given snippet, but internal logic and error reporting is + /// easier if the in-memory representation also includes the snippet path. + pub sr: SrcRange, } impl TokTree { pub async fn from_api( tt: &api::TokenTree, hctx: &mut H::FromApiCtx<'_>, xctx: &mut X::FromApiCtx<'_>, + src: &Sym, i: &Interner, ) -> Self { + let pos = SrcRange::new(tt.range.clone(), src); let tok = match_mapping!(&tt.token, api::Token => Token:: { BR, NS(n => Tok::from_api(*n, i).await, - b => Box::new(Self::from_api(b, hctx, xctx, i).boxed_local().await)), + b => Box::new(Self::from_api(b, hctx, xctx, src, i).boxed_local().await)), Bottom(e => OrcErrv::from_api(e, i).await), - LambdaHead(arg => ttv_from_api(arg, hctx, xctx, i).await), + LambdaHead(arg => ttv_from_api(arg, hctx, xctx, src, i).await), Name(n => Tok::from_api(*n, i).await), - S(*par, b => ttv_from_api(b, hctx, xctx, i).await), + S(*par, b => ttv_from_api(b, hctx, xctx, src, i).await), Comment(c.clone()), - NewExpr(expr => X::from_api(expr, xctx, Pos::Range(tt.range.clone()), i).await), - Handle(tk => H::from_api(tk, hctx, Pos::Range(tt.range.clone()), i).await) + NewExpr(expr => X::from_api(expr, xctx, pos.clone(), i).await), + Handle(tk => H::from_api(tk, hctx, pos.clone(), i).await) }); - Self { range: tt.range.clone(), tok } + Self { sr: pos, tok } } pub async fn into_api( @@ -134,7 +139,7 @@ impl TokTree { Handle(hand.into_api(hctx).await), NewExpr(expr.into_api(xctx).await), }); - api::TokenTree { range: self.range.clone(), token } + api::TokenTree { range: self.sr.range.clone(), token } } pub fn is_kw(&self, tk: Tok) -> bool { self.tok.is_kw(tk) } @@ -152,8 +157,9 @@ impl TokTree { } pub fn is_fluff(&self) -> bool { matches!(self.tok, Token::Comment(_) | Token::BR) } pub fn lambda(arg: Vec, mut body: Vec) -> Self { - let arg_range = ttv_range(&arg); - let s_range = arg_range.start..body.last().expect("Lambda with empty body!").range.end; + let arg_range = ttv_range(&arg).expect("Lambda with empty arg!"); + let mut s_range = arg_range.clone(); + s_range.range.end = body.last().expect("Lambda with empty body!").sr.range.end; body.insert(0, Token::LambdaHead(arg).at(arg_range)); Token::S(Paren::Round, body).at(s_range) } @@ -168,11 +174,12 @@ pub async fn ttv_from_api( tokv: impl IntoIterator>, hctx: &mut H::FromApiCtx<'_>, xctx: &mut X::FromApiCtx<'_>, + src: &Sym, i: &Interner, ) -> Vec> { stream! { for tok in tokv { - yield TokTree::::from_api(tok.borrow(), hctx, xctx, i).boxed_local().await + yield TokTree::::from_api(tok.borrow(), hctx, xctx, src, i).boxed_local().await } } .collect() @@ -201,8 +208,8 @@ pub fn wrap_tokv( 0 => panic!("A tokv with no elements is illegal"), 1 => items_v.into_iter().next().unwrap(), _ => { - let range = items_v.first().unwrap().range.start..items_v.last().unwrap().range.end; - Token::S(api::Paren::Round, items_v).at(range) + let sr = ttv_range(&items_v).expect("empty handled above"); + Token::S(api::Paren::Round, items_v).at(sr) }, } } @@ -237,7 +244,7 @@ pub enum Token { Bottom(OrcErrv), } impl Token { - pub fn at(self, range: Range) -> TokTree { TokTree { range, tok: self } } + pub fn at(self, sr: SrcRange) -> TokTree { TokTree { sr, tok: self } } pub fn is_kw(&self, tk: Tok) -> bool { matches!(self, Token::Name(n) if *n == tk) } pub fn as_s(&self, par: Paren) -> Option<&[TokTree]> { match self { @@ -273,9 +280,9 @@ impl Format for Token { } } -pub fn ttv_range<'a>(ttv: &[TokTree]) -> Range { - assert!(!ttv.is_empty(), "Empty slice has no range"); - ttv.first().unwrap().range.start..ttv.last().unwrap().range.end +pub fn ttv_range<'a>(ttv: &[TokTree]) -> Option { + let range = ttv.first()?.sr.range.start..ttv.last().unwrap().sr.range.end; + Some(SrcRange { path: ttv.first().unwrap().sr.path(), range }) } pub async fn ttv_fmt<'a: 'b, 'b>( diff --git a/orchid-extension/src/entrypoint.rs b/orchid-extension/src/entrypoint.rs index 69c8339..0d62a2c 100644 --- a/orchid-extension/src/entrypoint.rs +++ b/orchid-extension/src/entrypoint.rs @@ -268,10 +268,11 @@ pub fn extension_init( let vfs = systems_g[sys_id].vfses[vfs_id].load(&path, ctx).await; hand.handle(&vfs_read, &vfs).await }, - api::HostExtReq::LexExpr(lex @ api::LexExpr { sys, text, pos, id }) => { + api::HostExtReq::LexExpr(lex @ api::LexExpr { sys, src, text, pos, id }) => { let sys_ctx = get_ctx(sys).await; let text = Tok::from_api(text, &i).await; - let ctx = LexContext { id, pos, text: &text, ctx: sys_ctx.clone() }; + let src = Sym::from_api(src, sys_ctx.i()).await; + let ctx = LexContext { id, pos, text: &text, src, ctx: sys_ctx.clone() }; let trigger_char = text.chars().nth(pos as usize).unwrap(); let err_na = err_not_applicable(&i).await; let err_cascade = err_cascade(&i).await; @@ -294,11 +295,12 @@ pub fn extension_init( hand.handle(&lex, &None).await }, api::HostExtReq::ParseLine(pline) => { - let api::ParseLine { module, exported, comments, sys, line } = &pline; + let api::ParseLine { module, src, exported, comments, sys, line } = &pline; let mut ctx = get_ctx(*sys).await; let parsers = ctx.cted().inst().dyn_parsers(); + let src = Sym::from_api(*src, ctx.i()).await; let comments = join_all(comments.iter().map(|c| Comment::from_api(c, &i))).await; - let line: Vec = ttv_from_api(line, &mut ctx, &mut (), &i).await; + let line: Vec = ttv_from_api(line, &mut ctx, &mut (), &src, &i).await; let snip = Snippet::new(line.first().expect("Empty line"), &line); let (head, tail) = snip.pop_front().unwrap(); let name = if let GenTok::Name(n) = &head.tok { n } else { panic!("No line head") }; diff --git a/orchid-extension/src/lexer.rs b/orchid-extension/src/lexer.rs index 3b57967..3649967 100644 --- a/orchid-extension/src/lexer.rs +++ b/orchid-extension/src/lexer.rs @@ -1,11 +1,12 @@ use std::future::Future; -use std::ops::{Range, RangeInclusive}; +use std::ops::RangeInclusive; use futures::FutureExt; use futures::future::LocalBoxFuture; use orchid_base::error::{OrcErr, OrcRes, mk_err}; use orchid_base::interner::{Interner, Tok}; -use orchid_base::location::Pos; +use orchid_base::location::{Pos, SrcRange}; +use orchid_base::name::Sym; use orchid_base::reqnot::Requester; use crate::api; @@ -34,6 +35,7 @@ pub struct LexContext<'a> { pub text: &'a Tok, pub id: api::ParsId, pub pos: u32, + pub src: Sym, } impl<'a> LexContext<'a> { pub async fn recurse(&self, tail: &'a str) -> OrcRes<(&'a str, GenTokTree)> { @@ -41,14 +43,15 @@ impl<'a> LexContext<'a> { let Some(lx) = self.ctx.reqnot().request(api::SubLex { pos: start, id: self.id }).await else { return Err(err_cascade(self.ctx.i()).await.into()); }; - let tree = GenTokTree::from_api(&lx.tree, &mut self.ctx.clone(), &mut (), self.ctx.i()).await; + let tree = + GenTokTree::from_api(&lx.tree, &mut self.ctx.clone(), &mut (), &self.src, self.ctx.i()).await; Ok((&self.text[lx.pos as usize..], tree)) } pub fn pos(&self, tail: &'a str) -> u32 { (self.text.len() - tail.len()) as u32 } - pub fn tok_ran(&self, len: u32, tail: &'a str) -> Range { - self.pos(tail) - len..self.pos(tail) + pub fn tok_ran(&self, len: u32, tail: &'a str) -> SrcRange { + SrcRange::new(self.pos(tail) - len..self.pos(tail), &self.src) } } diff --git a/orchid-extension/src/tree.rs b/orchid-extension/src/tree.rs index 94bf7be..973e9e9 100644 --- a/orchid-extension/src/tree.rs +++ b/orchid-extension/src/tree.rs @@ -8,7 +8,7 @@ use futures::{FutureExt, StreamExt}; use hashbrown::HashMap; use itertools::Itertools; use orchid_base::interner::{Interner, Tok}; -use orchid_base::location::Pos; +use orchid_base::location::SrcRange; use orchid_base::name::Sym; use orchid_base::reqnot::ReqHandlish; use orchid_base::tree::{TokTree, Token, TokenVariant}; @@ -32,7 +32,7 @@ impl TokenVariant for GExpr { async fn from_api( _: &api::Expression, _: &mut Self::FromApiCtx<'_>, - _: Pos, + _: SrcRange, _: &Interner, ) -> Self { panic!("Received new expression from host") @@ -47,7 +47,7 @@ impl TokenVariant for Expr { async fn from_api( api: &api::ExprTicket, ctx: &mut Self::FromApiCtx<'_>, - _: Pos, + _: SrcRange, _: &Interner, ) -> Self { // SAFETY: receiving trees from sublexers implies ownership transfer diff --git a/orchid-host/src/dealias.rs b/orchid-host/src/dealias.rs index abe5d6c..56cd4c6 100644 --- a/orchid-host/src/dealias.rs +++ b/orchid-host/src/dealias.rs @@ -1,10 +1,7 @@ -use std::rc::Rc; - use futures::FutureExt; use hashbrown::{HashMap, HashSet}; use itertools::{Either, Itertools}; use orchid_base::error::{OrcErr, Reporter, mk_err}; -use orchid_base::format::{FmtCtxImpl, Format, take_first}; use orchid_base::interner::{Interner, Tok}; use orchid_base::location::Pos; use orchid_base::name::{NameLike, Sym, VName}; @@ -125,18 +122,17 @@ pub async fn imports_to_aliases( match &item.kind { ItemKind::Import(imp) => match absolute_path(cwd, &imp.path) { Err(e) => - ctx.rep.report(e.err_obj(ctx.i, item.pos.clone(), &imp.path.iter().join("::")).await), + ctx.rep.report(e.err_obj(ctx.i, item.sr.pos(), &imp.path.iter().join("::")).await), Ok(abs_path) => { let names = match imp.name.as_ref() { Some(n) => Either::Right([n.clone()].into_iter()), - None => Either::Left( - resolv_glob(cwd, root, &abs_path, item.pos.clone(), ctx).await.into_iter(), - ), + None => + Either::Left(resolv_glob(cwd, root, &abs_path, item.sr.pos(), ctx).await.into_iter()), }; for name in names { let mut tgt = abs_path.clone().suffix([name.clone()]).to_sym(ctx.i).await; let src = Sym::new(cwd.iter().cloned().chain([name]), ctx.i).await.unwrap(); - import_locs.entry(src.clone()).or_insert(vec![]).push(item.pos.clone()); + import_locs.entry(src.clone()).or_insert(vec![]).push(item.sr.pos()); if let Some(tgt2) = alias_map.get(&tgt) { tgt = tgt2.clone(); } @@ -144,7 +140,7 @@ pub async fn imports_to_aliases( ctx.rep.report(mk_err( ctx.i.i("Circular references").await, format!("{src} circularly refers to itself"), - [item.pos.clone().into()], + [item.sr.pos().into()], )); continue; } diff --git a/orchid-host/src/extension.rs b/orchid-host/src/extension.rs index 46cd143..e109057 100644 --- a/orchid-host/src/extension.rs +++ b/orchid-host/src/extension.rs @@ -20,6 +20,7 @@ use orchid_base::clone; use orchid_base::format::{FmtCtxImpl, Format}; use orchid_base::interner::Tok; use orchid_base::logging::Logger; +use orchid_base::name::Sym; use orchid_base::reqnot::{DynRequester, ReqNot, Requester as _}; use crate::api; @@ -203,6 +204,7 @@ impl Extension { pub(crate) async fn lex_req>>( &self, source: Tok, + src: Sym, pos: u32, sys: api::SysId, mut r: impl FnMut(u32) -> F, @@ -214,8 +216,9 @@ impl Extension { self.0.lex_recur.lock().await.insert(id, req_in); // lex_recur released let (ret, ()) = join( async { - let res = - (self.reqnot()).request(api::LexExpr { id, pos, sys, text: source.to_api() }).await; + let res = (self.reqnot()) + .request(api::LexExpr { id, pos, sys, src: src.to_api(), text: source.to_api() }) + .await; // collect sender to unblock recursion handler branch before returning self.0.lex_recur.lock().await.remove(&id); res diff --git a/orchid-host/src/lex.rs b/orchid-host/src/lex.rs index 0764e50..d4d8988 100644 --- a/orchid-host/src/lex.rs +++ b/orchid-host/src/lex.rs @@ -4,7 +4,8 @@ use async_std::sync::Mutex; use futures::FutureExt; use orchid_base::error::{OrcErrv, OrcRes, mk_errv}; use orchid_base::interner::Tok; -use orchid_base::location::Pos; +use orchid_base::location::SrcRange; +use orchid_base::name::Sym; use orchid_base::parse::{name_char, name_start, op_char, unrep_space}; use orchid_base::tokens::PARENS; use orchid_base::tree::recur; @@ -18,6 +19,7 @@ use crate::system::System; pub struct LexCtx<'a> { pub systems: &'a [System], pub source: &'a Tok, + pub path: &'a Sym, pub tail: &'a str, pub sub_trees: &'a mut Vec, pub ctx: &'a Ctx, @@ -27,6 +29,7 @@ impl<'a> LexCtx<'a> { where 'a: 'b { LexCtx { source: self.source, + path: self.path, tail: &self.source[pos as usize..], systems: self.systems, sub_trees: &mut *self.sub_trees, @@ -49,7 +52,7 @@ impl<'a> LexCtx<'a> { let mut exprs = self.ctx.common_exprs.clone(); let foo = recur(subtree, &|tt, r| { if let ParsTok::NewExpr(expr) = tt.tok { - return ParsTok::Handle(expr).at(tt.range); + return ParsTok::Handle(expr).at(tt.sr); } r(tt) }); @@ -60,6 +63,7 @@ impl<'a> LexCtx<'a> { &tree, &mut self.ctx.common_exprs.clone(), &mut ExprParseCtx { ctx: self.ctx.clone(), exprs: self.ctx.common_exprs.clone() }, + self.path, &self.ctx.i, ) .await @@ -103,7 +107,7 @@ pub async fn lex_once(ctx: &mut LexCtx<'_>) -> OrcRes { return Err(mk_errv( ctx.ctx.i.i("Unterminated block comment").await, "This block comment has no ending ]--", - [Pos::Range(start..start + 3).into()], + [SrcRange::new(start..start + 3, ctx.path).pos().into()], )); }; ctx.set_tail(tail); @@ -120,7 +124,7 @@ pub async fn lex_once(ctx: &mut LexCtx<'_>) -> OrcRes { return Err(mk_errv( ctx.ctx.i.i("Unclosed lambda").await, "Lambdae started with \\ should separate arguments from body with .", - [Pos::Range(start..start + 1).into()], + [SrcRange::new(start..start + 1, ctx.path).pos().into()], )); } arg.push(lex_once(ctx).boxed_local().await?); @@ -135,7 +139,7 @@ pub async fn lex_once(ctx: &mut LexCtx<'_>) -> OrcRes { return Err(mk_errv( ctx.ctx.i.i("unclosed paren").await, format!("this {lp} has no matching {rp}"), - [Pos::Range(start..start + 1).into()], + [SrcRange::new(start..start + 1, ctx.path).pos().into()], )); } body.push(lex_once(ctx).boxed_local().await?); @@ -153,7 +157,7 @@ pub async fn lex_once(ctx: &mut LexCtx<'_>) -> OrcRes { .lex(source, pos, |pos| async move { let mut ctx_g = ctx_lck.lock().await; match lex_once(&mut ctx_g.push(pos)).boxed_local().await { - Ok(t) => Some(api::SubLexed { pos: t.range.end, tree: ctx_g.ser_subtree(t).await }), + Ok(t) => Some(api::SubLexed { pos: t.sr.end(), tree: ctx_g.ser_subtree(t).await }), Err(e) => { errors_lck.lock().await.push(e); None @@ -185,16 +189,22 @@ pub async fn lex_once(ctx: &mut LexCtx<'_>) -> OrcRes { return Err(mk_errv( ctx.ctx.i.i("Unrecognized character").await, "The following syntax is meaningless.", - [Pos::Range(start..start + 1).into()], + [SrcRange::new(start..start + 1, ctx.path).pos().into()], )); } }; - Ok(ParsTokTree { tok, range: start..ctx.get_pos() }) + Ok(ParsTokTree { tok, sr: SrcRange::new(start..ctx.get_pos(), ctx.path) }) } -pub async fn lex(text: Tok, systems: &[System], ctx: &Ctx) -> OrcRes> { +pub async fn lex( + text: Tok, + path: Sym, + systems: &[System], + ctx: &Ctx, +) -> OrcRes> { let mut sub_trees = Vec::new(); - let mut ctx = LexCtx { source: &text, sub_trees: &mut sub_trees, tail: &text[..], systems, ctx }; + let mut ctx = + LexCtx { source: &text, sub_trees: &mut sub_trees, tail: &text[..], systems, path: &path, ctx }; let mut tokv = Vec::new(); ctx.trim(unrep_space); while !ctx.tail.is_empty() { diff --git a/orchid-host/src/parse.rs b/orchid-host/src/parse.rs index 69df744..cec5b75 100644 --- a/orchid-host/src/parse.rs +++ b/orchid-host/src/parse.rs @@ -7,7 +7,6 @@ use itertools::Itertools; use orchid_base::error::{OrcRes, Reporter, mk_err, mk_errv}; use orchid_base::format::fmt; use orchid_base::interner::{Interner, Tok}; -use orchid_base::location::Pos; use orchid_base::name::Sym; use orchid_base::parse::{ Comment, Import, ParseCtx, Parsed, Snippet, expect_end, line_items, parse_multiname, @@ -25,6 +24,7 @@ type ParsSnippet<'a> = Snippet<'a, Expr, Expr>; pub struct HostParseCtxImpl<'a> { pub ctx: Ctx, + pub src: Sym, pub systems: &'a [System], pub reporter: &'a Reporter, pub interner: &'a Interner, @@ -77,19 +77,19 @@ pub async fn parse_item( expect_end(ctx, tail).await?; let mut ok = Vec::new(); for tt in body { - let pos = Pos::Range(tt.range.clone()); + let sr = tt.sr.clone(); match &tt.tok { Token::Name(n) => - ok.push(Item { comments: comments.clone(), pos, kind: ItemKind::Export(n.clone()) }), + ok.push(Item { comments: comments.clone(), sr, kind: ItemKind::Export(n.clone()) }), Token::NS(..) => ctx.reporter().report(mk_err( ctx.i().i("Compound export").await, "Cannot export compound names (names containing the :: separator)", - [pos.into()], + [sr.pos().into()], )), t => ctx.reporter().report(mk_err( ctx.i().i("Invalid export").await, format!("Invalid export target {}", fmt(t, ctx.i()).await), - [pos.into()], + [sr.pos().into()], )), } } @@ -99,14 +99,14 @@ pub async fn parse_item( Parsed { output, tail: _ } => Err(mk_errv( ctx.i().i("Malformed export").await, "`export` can either prefix other lines or list names inside ( )", - [Pos::Range(output.range.clone()).into()], + [output.sr.pos().into()], )), }, n if *n == ctx.i().i("import").await => { let imports = parse_import(ctx, postdisc).await?; - Ok(Vec::from_iter(imports.into_iter().map(|(t, pos)| Item { + Ok(Vec::from_iter(imports.into_iter().map(|t| Item { comments: comments.clone(), - pos, + sr: t.sr.clone(), kind: ItemKind::Import(t), }))) }, @@ -115,7 +115,7 @@ pub async fn parse_item( Some(_) => Err(mk_errv( ctx.i().i("Expected a line type").await, "All lines must begin with a keyword", - [Pos::Range(item.pos()).into()], + [item.sr().pos().into()], )), None => unreachable!("These lines are filtered and aggregated in earlier stages"), } @@ -124,7 +124,7 @@ pub async fn parse_item( pub async fn parse_import<'a>( ctx: &impl HostParseCtx, tail: ParsSnippet<'a>, -) -> OrcRes> { +) -> OrcRes> { let Parsed { output: imports, tail } = parse_multiname(ctx, tail).await?; expect_end(ctx, tail).await?; Ok(imports) @@ -153,10 +153,10 @@ pub async fn parse_exportable_item<'a>( return Err(mk_errv( ctx.i().i("Unrecognized line type").await, format!("Line types are: const, mod, macro, grammar, {ext_lines}"), - [Pos::Range(tail.prev().range.clone()).into()], + [tail.prev().sr.pos().into()], )); }; - Ok(vec![Item { comments, pos: Pos::Range(tail.pos()), kind }]) + Ok(vec![Item { comments, sr: tail.sr(), kind }]) } pub async fn parse_module<'a>( @@ -170,7 +170,7 @@ pub async fn parse_module<'a>( return Err(mk_errv( ctx.i().i("Missing module name").await, format!("A name was expected, {} was found", fmt(output, ctx.i()).await), - [Pos::Range(output.range.clone()).into()], + [output.sr.pos().into()], )); }, }; @@ -180,7 +180,7 @@ pub async fn parse_module<'a>( return Err(mk_errv( ctx.i().i("Expected module body").await, format!("A ( block ) was expected, {} was found", fmt(output, ctx.i()).await), - [Pos::Range(output.range.clone()).into()], + [output.sr.pos().into()], )); }; let path = path.push(name.clone()); @@ -197,7 +197,7 @@ pub async fn parse_const<'a>( return Err(mk_errv( ctx.i().i("Missing module name").await, format!("A name was expected, {} was found", fmt(output, ctx.i()).await), - [Pos::Range(output.range.clone()).into()], + [output.sr.pos().into()], )); }; let Parsed { output, tail } = try_pop_no_fluff(ctx, tail).await?; @@ -205,7 +205,7 @@ pub async fn parse_const<'a>( return Err(mk_errv( ctx.i().i("Missing = separator").await, format!("Expected = , found {}", fmt(output, ctx.i()).await), - [Pos::Range(output.range.clone()).into()], + [output.sr.pos().into()], )); } try_pop_no_fluff(ctx, tail).await?; @@ -223,11 +223,11 @@ pub async fn parse_expr( .or_else(|| tail.iter().enumerate().rev().find(|(_, tt)| !tt.is_fluff())) else { return Err(mk_errv(ctx.i().i("Empty expression").await, "Expression ends abruptly here", [ - Pos::Range(tail.pos()).into(), + tail.sr().pos().into(), ])); }; let (function, value) = tail.split_at(last_idx as u32); - let pos = Pos::Range(tail.pos()); + let pos = tail.sr().pos(); if !function.iter().all(TokTree::is_fluff) { let (f_psb, x_psb) = psb.split(); let x_expr = parse_expr(ctx, path.clone(), x_psb, value).boxed_local().await?; diff --git a/orchid-host/src/parsed.rs b/orchid-host/src/parsed.rs index 76a6f69..5090687 100644 --- a/orchid-host/src/parsed.rs +++ b/orchid-host/src/parsed.rs @@ -1,4 +1,3 @@ -use std::cell::RefCell; use std::fmt::Debug; use std::rc::Rc; @@ -12,7 +11,7 @@ use itertools::Itertools; use orchid_base::error::{OrcRes, mk_errv}; use orchid_base::format::{FmtCtx, FmtUnit, Format, Variants}; use orchid_base::interner::Tok; -use orchid_base::location::Pos; +use orchid_base::location::{Pos, SrcRange}; use orchid_base::name::{NameLike, Sym}; use orchid_base::parse::{Comment, Import}; use orchid_base::tl_cache; @@ -37,7 +36,7 @@ impl TokenVariant for Expr { async fn from_api( api: &api::ExprTicket, ctx: &mut Self::FromApiCtx<'_>, - _: Pos, + _: SrcRange, _: &orchid_base::interner::Interner, ) -> Self { let expr = ctx.get_expr(*api).expect("Dangling expr"); @@ -51,7 +50,7 @@ impl TokenVariant for Expr { async fn from_api( api: &api::Expression, ctx: &mut Self::FromApiCtx<'_>, - _: Pos, + _: SrcRange, _: &orchid_base::interner::Interner, ) -> Self { Expr::from_api(api, PathSetBuilder::new(), ctx).await @@ -76,7 +75,7 @@ impl<'a> ParsedFromApiCx<'a> { #[derive(Debug)] pub struct Item { - pub pos: Pos, + pub sr: SrcRange, pub comments: Vec, pub kind: ItemKind, } @@ -88,8 +87,9 @@ pub enum ItemKind { Import(Import), } impl ItemKind { - pub fn at(self, pos: Pos) -> Item { Item { comments: vec![], pos, kind: self } } + pub fn at(self, sr: SrcRange) -> Item { Item { comments: vec![], sr, kind: self } } } + impl Format for Item { async fn print<'a>(&'a self, c: &'a (impl FmtCtx + ?Sized + 'a)) -> FmtUnit { let comment_text = self.comments.iter().join("\n"); @@ -135,7 +135,6 @@ pub enum ParsedMemberKind { #[derive(Debug, Default)] pub struct ParsedModule { - pub imports: Vec, pub exports: Vec>, pub items: Vec, } @@ -148,7 +147,7 @@ impl ParsedModule { _ => None, }) .collect_vec(); - Self { imports: vec![], exports, items } + Self { exports, items } } pub fn merge(&mut self, other: ParsedModule) { let mut swap = ParsedModule::default(); @@ -178,6 +177,10 @@ impl ParsedModule { } Ok(cur) } + pub fn get_imports(&self) -> impl IntoIterator { + (self.items.iter()) + .filter_map(|it| if let ItemKind::Import(i) = &it.kind { Some(i) } else { None }) + } } #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub enum WalkErrorKind { @@ -192,8 +195,7 @@ pub struct WalkError { } impl Format for ParsedModule { async fn print<'a>(&'a self, c: &'a (impl FmtCtx + ?Sized + 'a)) -> FmtUnit { - let import_str = self.imports.iter().map(|i| format!("import {i}")).join("\n"); - let head_str = format!("{import_str}\nexport ::({})\n", self.exports.iter().join(", ")); + let head_str = format!("export ::({})\n", self.exports.iter().join(", ")); Variants::sequence(self.items.len() + 1, "\n", None).units( [head_str.into()].into_iter().chain(join_all(self.items.iter().map(|i| i.print(c))).await), ) diff --git a/orchid-host/src/tree.rs b/orchid-host/src/tree.rs index adf26eb..cf77c45 100644 --- a/orchid-host/src/tree.rs +++ b/orchid-host/src/tree.rs @@ -10,6 +10,7 @@ use orchid_base::name::Sym; use crate::api; use crate::ctx::Ctx; +use crate::dealias::absolute_path; use crate::expr::Expr; use crate::parsed::{ParsedMemberKind, ParsedModule}; use crate::system::System; @@ -37,6 +38,34 @@ impl Module { } Self { members } } + async fn walk(&self, mut path: impl Iterator>, ) -> &Self { todo!()} + async fn from_parsed( + parsed: &ParsedModule, + path: Sym, + parsed_root_path: Sym, + parsed_root: &ParsedModule, + root: &Module, + preload: &mut HashMap, + ) -> Self { + let mut imported_names = Vec::new(); + for import in parsed.get_imports() { + if let Some(n) = import.name.clone() { + imported_names.push(n); + continue; + } + // the path in a wildcard import has to be a module + if import.path.is_empty() { + panic!("Imported root") + } + if let Some(subpath) = import.path.strip_prefix(&parsed_root_path) { + let abs = absolute_path(&path, subpath); + // path is in parsed_root + } else { + // path is in root + } + } + todo!() + } } pub struct Member { diff --git a/orchid-std/src/macros/rule/build.rs b/orchid-std/src/macros/rule/build.rs index d38f015..7e8b5f9 100644 --- a/orchid-std/src/macros/rule/build.rs +++ b/orchid-std/src/macros/rule/build.rs @@ -110,7 +110,7 @@ mod test { use orchid_api::PhKind; use orchid_base::interner::Interner; - use orchid_base::location::SourceRange; + use orchid_base::location::SrcRange; use orchid_base::sym; use orchid_base::tokens::Paren; use orchid_base::tree::Ph; @@ -124,7 +124,7 @@ mod test { spin_on(async { let i = Interner::new_master(); let ex = |tok: MacTok| async { - MacTree { tok: Rc::new(tok), pos: SourceRange::mock(&i).await.pos() } + MacTree { tok: Rc::new(tok), pos: SrcRange::mock(&i).await.pos() } }; let pattern = vec![ ex(MacTok::Ph(Ph { diff --git a/orchid-std/src/std/string/str_lexer.rs b/orchid-std/src/std/string/str_lexer.rs index 76a27d2..2b145f1 100644 --- a/orchid-std/src/std/string/str_lexer.rs +++ b/orchid-std/src/std/string/str_lexer.rs @@ -118,8 +118,8 @@ impl Lexer for StringLexer { } let add_frag = |prev: Option>, new: GenTokTree<'a>| async { let Some(prev) = prev else { return new }; - let concat_fn = GenTok::Reference(sym!(std::string::concat; ctx.i).await) - .at(prev.range.start..prev.range.start); + let concat_fn = + GenTok::Reference(sym!(std::string::concat; ctx.i).await).at(prev.sr.start..prev.sr.start); wrap_tokv([concat_fn, prev, new]) }; loop {