in midst of refactor

This commit is contained in:
2024-04-29 21:46:42 +02:00
parent ed0d64d52e
commit aa3f7e99ab
221 changed files with 5431 additions and 685 deletions

View File

@@ -0,0 +1,163 @@
//! Definition and implementations of the parsing context, which is used
use std::ops::Range;
use std::sync::Arc;
use super::lex_plugin::LexerPlugin;
use super::parse_plugin::ParseLinePlugin;
use crate::error::Reporter;
use crate::location::{SourceCode, SourceRange};
use crate::utils::boxed_iter::{box_empty, BoxedIter};
use crate::utils::sequence::Sequence;
/// Trait enclosing all context features
///
/// The main implementation is [ParseCtxImpl]
pub trait ParseCtx {
/// Get an object describing the file this source code comes from
#[must_use]
fn code_info(&self) -> SourceCode;
/// Get the list of all lexer plugins
#[must_use]
fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin>;
/// Get the list of all parser plugins
#[must_use]
fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin>;
/// Error reporter
#[must_use]
fn reporter(&self) -> &Reporter;
/// Find our position in the text given the text we've yet to parse
#[must_use]
fn pos(&self, tail: &str) -> usize {
let tail_len = tail.len();
let source_len = self.source().len();
(self.source().len().checked_sub(tail.len())).unwrap_or_else(|| {
panic!("tail.len()={tail_len} greater than self.source().len()={source_len}; tail={tail:?}")
})
}
/// Generate a location given the length of a token and the unparsed text
/// after it. See also [ParseCtx::range_loc] if the maths gets complex.
#[must_use]
fn range(&self, len: usize, tl: &str) -> Range<usize> {
match self.pos(tl).checked_sub(len) {
Some(start) => start..self.pos(tl),
None => {
panic!("len={len} greater than tail.len()={}; tail={tl:?}", tl.len())
},
}
}
/// Create a contextful location for error reporting
#[must_use]
fn source_range(&self, len: usize, tl: &str) -> SourceRange {
self.range_loc(&self.range(len, tl))
}
/// Create a contentful location from a range directly.
#[must_use]
fn range_loc(&self, range: &Range<usize>) -> SourceRange {
SourceRange { code: self.code_info(), range: range.clone() }
}
/// Get a reference to the full source text. This should not be used for
/// position math.
#[must_use]
fn source(&self) -> Arc<String> { self.code_info().text.clone() }
}
impl<'a, C: ParseCtx + 'a + ?Sized> ParseCtx for &'a C {
fn reporter(&self) -> &Reporter { (*self).reporter() }
fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { (*self).lexers() }
fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> { (*self).line_parsers() }
fn pos(&self, tail: &str) -> usize { (*self).pos(tail) }
fn code_info(&self) -> SourceCode { (*self).code_info() }
fn source(&self) -> Arc<String> { (*self).source() }
fn range(&self, l: usize, t: &str) -> Range<usize> { (*self).range(l, t) }
}
/// Struct implementing context
#[derive(Clone)]
pub struct ParseCtxImpl<'a, 'b> {
/// File to be parsed; where it belongs in the tree and its text
pub code: SourceCode,
/// Error aggregator
pub reporter: &'b Reporter,
/// Lexer plugins for parsing custom literals
pub lexers: Sequence<'a, &'a (dyn LexerPlugin + 'a)>,
/// Parser plugins for parsing custom line structures
pub line_parsers: Sequence<'a, &'a dyn ParseLinePlugin>,
}
impl<'a, 'b> ParseCtx for ParseCtxImpl<'a, 'b> {
fn reporter(&self) -> &Reporter { self.reporter }
// Rust doesn't realize that this lifetime is covariant
#[allow(clippy::map_identity)]
fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { Box::new(self.lexers.iter().map(|r| r)) }
#[allow(clippy::map_identity)]
fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> {
Box::new(self.line_parsers.iter().map(|r| r))
}
fn code_info(&self) -> SourceCode { self.code.clone() }
}
/// Context instance for testing. Implicitly provides a reporter and panics if
/// any errors are reported
pub struct MockContext(pub Reporter);
impl MockContext {
/// Create a new mock
pub fn new() -> Self { Self(Reporter::new()) }
}
impl Default for MockContext {
fn default() -> Self { Self::new() }
}
impl ParseCtx for MockContext {
fn reporter(&self) -> &Reporter { &self.0 }
fn pos(&self, tail: &str) -> usize { usize::MAX / 2 - tail.len() }
// these are expendable
fn code_info(&self) -> SourceCode { SourceRange::mock().code() }
fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { box_empty() }
fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> { box_empty() }
}
impl Drop for MockContext {
fn drop(&mut self) { self.0.assert() }
}
/// Context that assigns the same location to every subset of the source code.
/// Its main use case is to process source code that was dynamically generated
/// in response to some user code. See also [ReporterContext]
pub struct FlatLocContext<'a, C: ParseCtx + ?Sized> {
sub: &'a C,
range: &'a SourceRange,
}
impl<'a, C: ParseCtx + ?Sized> FlatLocContext<'a, C> {
/// Create a new context that will use the same provided range for every
/// parsed token
pub fn new(sub: &'a C, range: &'a SourceRange) -> Self { Self { sub, range } }
}
impl<'a, C: ParseCtx + ?Sized> ParseCtx for FlatLocContext<'a, C> {
fn reporter(&self) -> &Reporter { self.sub.reporter() }
fn pos(&self, _: &str) -> usize { 0 }
fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { self.sub.lexers() }
fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> { self.sub.line_parsers() }
fn code_info(&self) -> SourceCode { self.range.code() }
fn range(&self, _: usize, _: &str) -> Range<usize> { self.range.range() }
}
/// Context that forwards everything to a wrapped context except for error
/// reporting. See also [FlatLocContext]
pub struct ReporterContext<'a, C: ParseCtx + ?Sized> {
sub: &'a C,
reporter: &'a Reporter,
}
impl<'a, C: ParseCtx + ?Sized> ReporterContext<'a, C> {
/// Create a new context that will collect errors separately and forward
/// everything else to an enclosed context
pub fn new(sub: &'a C, reporter: &'a Reporter) -> Self { Self { sub, reporter } }
}
impl<'a, C: ParseCtx + ?Sized> ParseCtx for ReporterContext<'a, C> {
fn reporter(&self) -> &Reporter { self.reporter }
fn pos(&self, tail: &str) -> usize { self.sub.pos(tail) }
fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { self.sub.lexers() }
fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> { self.sub.line_parsers() }
fn code_info(&self) -> SourceCode { self.sub.code_info() }
fn range(&self, len: usize, tl: &str) -> Range<usize> { self.sub.range(len, tl) }
fn range_loc(&self, range: &Range<usize>) -> SourceRange { self.sub.range_loc(range) }
fn source(&self) -> Arc<String> { self.sub.source() }
fn source_range(&self, len: usize, tl: &str) -> SourceRange { self.sub.source_range(len, tl) }
}

View File

@@ -0,0 +1,215 @@
//! Errors produced by the parser. Plugins are encouraged to reuse these where
//! applicable.
use intern_all::Tok;
use itertools::Itertools;
use super::context::ParseCtx;
use super::frag::Frag;
use super::lexer::{Entry, Lexeme};
use crate::error::{ProjectError, ProjectErrorObj, ProjectResult};
use crate::location::{CodeOrigin, SourceRange};
use crate::parse::parsed::PType;
/// Parse error information without a location. Location data is added by the
/// parser.
pub trait ParseErrorKind: Sized + Send + Sync + 'static {
/// A general description of the error condition
const DESCRIPTION: &'static str;
/// A specific description of the error with concrete text sections
fn message(&self) -> String { Self::DESCRIPTION.to_string() }
/// Convert this error to a type-erased [ProjectError] to be handled together
/// with other Orchid errors.
fn pack(self, range: SourceRange) -> ProjectErrorObj { ParseError { kind: self, range }.pack() }
}
struct ParseError<T> {
pub range: SourceRange,
pub kind: T,
}
impl<T: ParseErrorKind> ProjectError for ParseError<T> {
const DESCRIPTION: &'static str = T::DESCRIPTION;
fn one_position(&self) -> CodeOrigin { self.range.origin() }
fn message(&self) -> String { self.kind.message() }
}
/// A line does not begin with an identifying keyword. Raised on the first token
pub(super) struct LineNeedsPrefix(pub Lexeme);
impl ParseErrorKind for LineNeedsPrefix {
const DESCRIPTION: &'static str = "This linetype requires a prefix";
fn message(&self) -> String { format!("{} cannot appear at the beginning of a line", self.0) }
}
/// The line ends abruptly. Raised on the last token
pub(super) struct UnexpectedEOL(pub Lexeme);
impl ParseErrorKind for UnexpectedEOL {
const DESCRIPTION: &'static str = "The line ended abruptly";
fn message(&self) -> String {
"In Orchid, all line breaks outside parentheses start a new declaration".to_string()
}
}
/// The line should have ended. Raised on last valid or first excess token
pub(super) struct ExpectedEOL;
impl ParseErrorKind for ExpectedEOL {
const DESCRIPTION: &'static str = "Expected the end of the line";
}
/// A name was expected.
pub(super) struct ExpectedName(pub Lexeme);
impl ParseErrorKind for ExpectedName {
const DESCRIPTION: &'static str = "A name was expected";
fn message(&self) -> String { format!("Expected a name, found {}", self.0) }
}
/// Unwrap a name or operator.
pub(super) fn expect_name(
Entry { lexeme, range }: &Entry,
ctx: &(impl ParseCtx + ?Sized),
) -> ProjectResult<Tok<String>> {
match lexeme {
Lexeme::Name(n) => Ok(n.clone()),
lex => Err(ExpectedName(lex.clone()).pack(ctx.range_loc(range))),
}
}
/// A specific lexeme was expected
pub(super) struct Expected {
/// The lexemes that would have been acceptable
pub expected: Vec<Lexeme>,
/// Whether a name would also have been acceptable (multiname)
pub or_name: bool,
/// What was actually found
pub found: Lexeme,
}
impl ParseErrorKind for Expected {
const DESCRIPTION: &'static str = "A concrete token was expected";
fn message(&self) -> String {
let list = match &self.expected[..] {
&[] => return "Unsatisfiable expectation".to_string(),
[only] => only.to_string(),
[a, b] => format!("either {a} or {b}"),
[variants @ .., last] => {
format!("any of {} or {last}", variants.iter().join(", "))
},
};
let or_name = if self.or_name { " or a name" } else { "" };
format!("Expected {list}{or_name} but found {}", self.found)
}
}
/// Assert that the entry contains exactly the specified lexeme
pub(super) fn expect(l: Lexeme, e: &Entry, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<()> {
if e.lexeme.strict_eq(&l) {
return Ok(());
}
let found = e.lexeme.clone();
let kind = Expected { expected: vec![l], or_name: false, found };
Err(kind.pack(ctx.range_loc(&e.range)))
}
/// A token reserved for future use was found in the code
pub(super) struct ReservedToken(pub Lexeme);
impl ParseErrorKind for ReservedToken {
const DESCRIPTION: &'static str = "Syntax reserved for future use";
fn message(&self) -> String { format!("{} is a reserved token", self.0) }
}
/// A token was found where it doesn't belong
pub(super) struct BadTokenInRegion {
/// What was found
pub lexeme: Lexeme,
/// Human-readable name of the region where it should not appear
pub region: &'static str,
}
impl ParseErrorKind for BadTokenInRegion {
const DESCRIPTION: &'static str = "An unexpected token was found";
fn message(&self) -> String { format!("{} cannot appear in {}", self.lexeme, self.region) }
}
/// Some construct was searched but not found.
pub(super) struct NotFound(pub &'static str);
impl ParseErrorKind for NotFound {
const DESCRIPTION: &'static str = "A specific lexeme was expected";
fn message(&self) -> String { format!("{} was expected", self.0) }
}
/// :: found on its own somewhere other than a general export
pub(super) struct LeadingNS;
impl ParseErrorKind for LeadingNS {
const DESCRIPTION: &'static str = ":: can only follow a name token";
}
/// Parens don't pair up
pub(super) struct MisalignedParen(pub Lexeme);
impl ParseErrorKind for MisalignedParen {
const DESCRIPTION: &'static str = "(), [] and {} must always pair up";
fn message(&self) -> String { format!("This {} has no pair", self.0) }
}
/// Export line contains a complex name
pub(super) struct NamespacedExport;
impl ParseErrorKind for NamespacedExport {
const DESCRIPTION: &'static str = "Only local names may be exported";
}
/// Export line contains *
pub(super) struct GlobExport;
impl ParseErrorKind for GlobExport {
const DESCRIPTION: &'static str = "Globstars are not allowed in exports";
}
/// Comment never ends
pub(super) struct NoCommentEnd;
impl ParseErrorKind for NoCommentEnd {
const DESCRIPTION: &'static str = "a comment was not closed with `]--`";
}
/// A placeholder's priority is a floating point number
pub(super) struct FloatPlacehPrio;
impl ParseErrorKind for FloatPlacehPrio {
const DESCRIPTION: &'static str =
"a placeholder priority has a decimal point or a negative exponent";
}
/// A number literal decodes to NaN
pub(super) struct NaNLiteral;
impl ParseErrorKind for NaNLiteral {
const DESCRIPTION: &'static str = "float literal decoded to NaN";
}
/// A sequence of digits in a number literal overflows [usize].
pub(super) struct LiteralOverflow;
impl ParseErrorKind for LiteralOverflow {
const DESCRIPTION: &'static str = "number literal described number greater than usize::MAX";
}
/// A digit was expected but something else was found
pub(super) struct ExpectedDigit;
impl ParseErrorKind for ExpectedDigit {
const DESCRIPTION: &'static str = "expected a digit";
}
/// Expected a parenthesized block at the end of the line
pub(super) struct ExpectedBlock;
impl ParseErrorKind for ExpectedBlock {
const DESCRIPTION: &'static str = "Expected a parenthesized block";
}
/// Remove two parentheses from the ends of the cursor
pub(super) fn expect_block<'a>(
tail: Frag<'a>,
typ: PType,
ctx: &(impl ParseCtx + ?Sized),
) -> ProjectResult<Frag<'a>> {
let (lp, tail) = tail.trim().pop(ctx)?;
expect(Lexeme::LP(typ), lp, ctx)?;
let (rp, tail) = tail.pop_back(ctx)?;
expect(Lexeme::RP(typ), rp, ctx)?;
Ok(tail.trim())
}
/// A namespaced name was expected but a glob pattern or a branching multiname
/// was found.
pub(super) struct ExpectedSingleName;
impl ParseErrorKind for ExpectedSingleName {
const DESCRIPTION: &'static str = "expected a single name, no wildcards, no branches";
}

View File

@@ -0,0 +1,42 @@
//! Entrypoints to the parser that combine lexing and parsing
use never::Never;
use super::context::{FlatLocContext, ParseCtx, ReporterContext};
use super::frag::Frag;
use super::lexer::lex;
use super::sourcefile::parse_module_body;
use crate::error::Reporter;
use crate::location::SourceRange;
use crate::parse::parsed::SourceLine;
use crate::parse::sourcefile::{parse_line, split_lines};
/// Parse a file
pub fn parse_file(ctx: &impl ParseCtx) -> Vec<SourceLine> {
let tokens = lex(vec![], ctx.source().as_str(), ctx, |_| Ok::<_, Never>(false))
.unwrap_or_else(|e| match e {})
.tokens;
if tokens.is_empty() { Vec::new() } else { parse_module_body(Frag::from_slice(&tokens), ctx) }
}
/// Parse a statically defined line sequence
///
/// # Panics
///
/// On any parse error, which is why it only accepts a string literal
pub fn parse_entries(
ctx: &dyn ParseCtx,
text: &'static str,
range: SourceRange,
) -> Vec<SourceLine> {
let reporter = Reporter::new();
let flctx = FlatLocContext::new(ctx, &range);
let ctx = ReporterContext::new(&flctx, &reporter);
let res = lex(vec![], text, &ctx, |_| Ok::<_, Never>(false)).unwrap_or_else(|e| match e {});
let out = split_lines(Frag::from_slice(&res.tokens), &ctx)
.flat_map(|tokens| parse_line(tokens, &ctx).expect("pre-specified source"))
.map(|kind| kind.wrap(range.clone()))
.collect();
reporter.assert();
out
}

View File

@@ -0,0 +1,133 @@
//! The [Frag] is the main input datastructure of parsers. Beyond the slice of
//! tokens, it contains a fallback value that can be used for error reporting if
//! the fragment is empty.
use std::ops::Range;
use super::context::ParseCtx;
use super::errors::{ExpectedEOL, NotFound, ParseErrorKind, UnexpectedEOL};
use super::lexer::{Entry, Lexeme};
use crate::error::ProjectResult;
/// Represents a slice which may or may not contain items, and a fallback entry
/// used for error reporting whenever the errant fragment is empty.
#[must_use = "fragment of code should not be discarded implicitly"]
#[derive(Clone, Copy)]
pub struct Frag<'a> {
/// Entry to place in errors if the fragment contains no tokens
pub fallback: &'a Entry,
/// Tokens to parse
pub data: &'a [Entry],
}
impl<'a> Frag<'a> {
/// Create a new fragment
pub fn new(fallback: &'a Entry, data: &'a [Entry]) -> Self { Self { fallback, data } }
/// Remove comments and line breaks from both ends of the text
pub fn trim(self) -> Self {
let Self { data, fallback } = self;
let front = data.iter().take_while(|e| e.is_filler()).count();
let (_, right) = data.split_at(front);
let back = right.iter().rev().take_while(|e| e.is_filler()).count();
let (data, _) = right.split_at(right.len() - back);
Self { fallback, data }
}
/// Discard the first entry
pub fn step(self, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<Self> {
let Self { data, fallback: Entry { lexeme, range } } = self;
match data.split_first() {
Some((fallback, data)) => Ok(Frag { data, fallback }),
None => Err(UnexpectedEOL(lexeme.clone()).pack(ctx.range_loc(range))),
}
}
/// Get the first entry
pub fn pop(self, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<(&'a Entry, Self)> {
Ok((self.get(0, ctx)?, self.step(ctx)?))
}
/// Retrieve an index from a slice or raise an error if it isn't found.
pub fn get(self, idx: usize, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<&'a Entry> {
self.data.get(idx).ok_or_else(|| {
let entry = self.data.last().unwrap_or(self.fallback).clone();
UnexpectedEOL(entry.lexeme).pack(ctx.range_loc(&entry.range))
})
}
/// Area covered by this fragment
#[must_use]
pub fn range(self) -> Range<usize> {
self.data.first().map_or_else(
|| self.fallback.range.clone(),
|f| f.range.start..self.data.last().unwrap().range.end,
)
}
/// Find a given token, split the fragment there and read some value from the
/// separator. See also [Frag::find]
pub fn find_map<T>(
self,
msg: &'static str,
ctx: &(impl ParseCtx + ?Sized),
mut f: impl FnMut(&'a Lexeme) -> Option<T>,
) -> ProjectResult<(Self, T, Self)> {
let Self { data, fallback } = self;
let (dot_idx, output) = skip_parenthesized(data.iter())
.find_map(|(i, e)| f(&e.lexeme).map(|t| (i, t)))
.ok_or_else(|| NotFound(msg).pack(ctx.range_loc(&self.range())))?;
let (left, not_left) = data.split_at(dot_idx);
let (middle_ent, right) = not_left.split_first().unwrap();
Ok((Self::new(fallback, left), output, Self::new(middle_ent, right)))
}
/// Split the fragment at a token and return just the two sides.
/// See also [Frag::find_map].
pub fn find(
self,
descr: &'static str,
ctx: &(impl ParseCtx + ?Sized),
mut f: impl FnMut(&Lexeme) -> bool,
) -> ProjectResult<(Self, Self)> {
let (l, _, r) = self.find_map(descr, ctx, |l| Some(l).filter(|l| f(l)))?;
Ok((l, r))
}
/// Remove the last item from the fragment
pub fn pop_back(self, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<(&'a Entry, Self)> {
let Self { data, fallback } = self;
let (last, data) = (data.split_last())
.ok_or_else(|| UnexpectedEOL(fallback.lexeme.clone()).pack(ctx.range_loc(&fallback.range)))?;
Ok((last, Self { fallback, data }))
}
/// # Panics
///
/// If the slice is empty
pub fn from_slice(data: &'a [Entry]) -> Self {
let fallback = (data.first()).expect("Empty slice cannot be converted into a parseable");
Self { data, fallback }
}
/// Assert that the fragment is empty.
pub fn expect_empty(self, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<()> {
match self.data.first() {
Some(x) => Err(ExpectedEOL.pack(ctx.range_loc(&x.range))),
None => Ok(()),
}
}
}
fn skip_parenthesized<'a>(
it: impl Iterator<Item = &'a Entry>,
) -> impl Iterator<Item = (usize, &'a Entry)> {
let mut paren_lvl = 1;
it.enumerate().filter(move |(_, e)| {
match e.lexeme {
Lexeme::LP(_) => paren_lvl += 1,
Lexeme::RP(_) => paren_lvl -= 1,
_ => (),
}
paren_lvl <= 1
})
}

View File

@@ -0,0 +1,65 @@
//! Abstractions for dynamic extensions to the lexer to parse custom literals
use dyn_clone::DynClone;
use never::Never;
use super::context::{FlatLocContext, ParseCtx};
use super::lexer::{lex, Entry, LexRes};
use crate::error::ProjectResult;
use crate::location::SourceRange;
/// Data passed to the recursive sub-lexer
pub struct LexPluginRecur<'a, 'b> {
/// Text to tokenize
pub tail: &'a str,
/// Callback that will be called between lexemes on the leftover text.
/// When it returns true, the lexer exits and leaves the remaining text for
/// you.
pub exit: &'b mut dyn for<'c> FnMut(&'c str) -> ProjectResult<bool>,
}
/// Data and actions available to a lexer plugin
pub trait LexPluginReq<'a> {
/// Text to tokenize
fn tail(&self) -> &'a str;
/// [ParseCtx] instance for calculating locations and such
fn ctx(&self) -> &dyn ParseCtx;
/// Start a child lexer that calls back between lexemes and exits on your
/// command. You can combine this with custom atoms to create holes for
/// expressions in your literals like the template strings of most languages
/// other than Rust.
fn recurse(&self, req: LexPluginRecur<'a, '_>) -> ProjectResult<LexRes<'a>>;
/// Lex an inserted piece of text, especially when translating custom syntax
/// into multiple lexemes.
///
/// # Panics
///
/// If tokenization fails
fn insert(&self, data: &str, range: SourceRange) -> Vec<Entry>;
}
/// External plugin that parses a literal into recognized Orchid lexemes, most
/// likely atoms.
pub trait LexerPlugin: Send + Sync + DynClone {
/// Run the lexer
fn lex<'a>(&self, req: &'_ dyn LexPluginReq<'a>) -> Option<ProjectResult<LexRes<'a>>>;
}
/// Implementation of [LexPluginReq]
pub struct LexPlugReqImpl<'a, 'b, TCtx: ParseCtx> {
/// Text to be lexed
pub tail: &'a str,
/// Context data
pub ctx: &'b TCtx,
}
impl<'a, 'b, TCtx: ParseCtx> LexPluginReq<'a> for LexPlugReqImpl<'a, 'b, TCtx> {
fn tail(&self) -> &'a str { self.tail }
fn ctx(&self) -> &dyn ParseCtx { self.ctx }
fn recurse(&self, req: LexPluginRecur<'a, '_>) -> ProjectResult<LexRes<'a>> {
lex(Vec::new(), req.tail, self.ctx, |s| (req.exit)(s))
}
fn insert(&self, data: &str, range: SourceRange) -> Vec<Entry> {
let ctx = FlatLocContext::new(self.ctx as &dyn ParseCtx, &range);
lex(Vec::new(), data, &ctx, |_| Ok::<_, Never>(false)).unwrap_or_else(|e| match e {}).tokens
}
}

View File

@@ -0,0 +1,318 @@
//! Convert source text into a sequence of tokens. Newlines and comments are
//! included, but spacing is converted into numerical ranges on the elements.
//!
//! Literals lose their syntax form here and are handled in an abstract
//! representation hence
use std::fmt;
use std::ops::Range;
use std::sync::Arc;
use intern_all::{i, Tok};
use itertools::Itertools;
use ordered_float::NotNan;
use super::context::ParseCtx;
use super::errors::{FloatPlacehPrio, NoCommentEnd};
use super::lex_plugin::LexerPlugin;
use super::numeric::{numstart, parse_num, print_nat16};
use crate::foreign::atom::AtomGenerator;
use crate::libs::std::number::Numeric;
use crate::parse::errors::ParseErrorKind;
use crate::parse::lex_plugin::LexPlugReqImpl;
use crate::parse::numeric::{numchar, NumericLexer};
use crate::parse::parsed::{PHClass, PType, Placeholder};
/// A lexeme and the location where it was found
#[derive(Clone, Debug)]
pub struct Entry {
/// the lexeme
pub lexeme: Lexeme,
/// the range in bytes
pub range: Range<usize>,
}
impl Entry {
/// Checks if the lexeme is a comment or line break
#[must_use]
pub fn is_filler(&self) -> bool { matches!(self.lexeme, Lexeme::Comment(_) | Lexeme::BR) }
/// Create a new entry
#[must_use]
pub fn new(range: Range<usize>, lexeme: Lexeme) -> Self { Self { lexeme, range } }
}
impl fmt::Display for Entry {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.lexeme.fmt(f) }
}
impl PartialEq<Lexeme> for Entry {
fn eq(&self, other: &Lexeme) -> bool { self.lexeme == *other }
}
/// A unit of syntax
#[derive(Clone, Debug, PartialEq)]
pub enum Lexeme {
/// Atoms parsed by plugins
Atom(AtomGenerator),
/// Keyword or name
Name(Tok<String>),
/// Macro operator `=`number`=>`
Arrow(NotNan<f64>),
/// `:=`
Walrus,
/// Line break
BR,
/// `::`
NS,
/// Left paren `([{`
LP(PType),
/// Right paren `)]}`
RP(PType),
/// `\`
BS,
/// `@``
At,
/// `:`
Type,
/// comment
Comment(Arc<String>),
/// placeholder in a macro.
Placeh(Placeholder),
}
impl fmt::Display for Lexeme {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Atom(a) => write!(f, "{a:?}"),
Self::Name(token) => write!(f, "{}", **token),
Self::Walrus => write!(f, ":="),
Self::Arrow(prio) => write!(f, "={}=>", print_nat16(*prio)),
Self::NS => write!(f, "::"),
Self::LP(t) => write!(f, "{}", t.l()),
Self::RP(t) => write!(f, "{}", t.r()),
Self::BR => writeln!(f),
Self::BS => write!(f, "\\"),
Self::At => write!(f, "@"),
Self::Type => write!(f, ":"),
Self::Comment(text) => write!(f, "--[{}]--", text),
Self::Placeh(ph) => write!(f, "{ph}"),
}
}
}
impl Lexeme {
/// Compare lexemes for equality. It's `strict` because for atoms it uses the
/// strict equality comparison
pub fn strict_eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Arrow(f1), Self::Arrow(f2)) => f1 == f2,
(Self::At, Self::At) | (Self::BR, Self::BR) => true,
(Self::BS, Self::BS) => true,
(Self::NS, Self::NS) | (Self::Type, Self::Type) => true,
(Self::Walrus, Self::Walrus) => true,
(Self::Atom(a1), Self::Atom(a2)) => a1.run().0.parser_eq(&*a2.run().0),
(Self::Comment(c1), Self::Comment(c2)) => c1 == c2,
(Self::LP(p1), Self::LP(p2)) | (Self::RP(p1), Self::RP(p2)) => p1 == p2,
(Self::Name(n1), Self::Name(n2)) => n1 == n2,
(Self::Placeh(ph1), Self::Placeh(ph2)) => ph1 == ph2,
(..) => false,
}
}
}
/// Data returned from the lexer
pub struct LexRes<'a> {
/// Leftover text. If the bail callback never returned true, this is empty
pub tail: &'a str,
/// Lexemes extracted from the text
pub tokens: Vec<Entry>,
}
/// Neatly format source code
#[allow(unused)]
pub fn format(lexed: &[Entry]) -> String { lexed.iter().join(" ") }
/// Character filter that can appear in a keyword or name
pub fn namechar(c: char) -> bool { c.is_alphanumeric() | (c == '_') }
/// Character filter that can start a name
pub fn namestart(c: char) -> bool { c.is_alphabetic() | (c == '_') }
/// Character filter that can appear in operators.
pub fn opchar(c: char) -> bool {
!namestart(c) && !numstart(c) && !c.is_whitespace() && !"()[]{},'\"\\".contains(c)
}
/// Split off all characters from the beginning that match a filter
pub fn split_filter(s: &str, mut pred: impl FnMut(char) -> bool) -> (&str, &str) {
s.find(|c| !pred(c)).map_or((s, ""), |i| s.split_at(i))
}
fn lit_table() -> impl IntoIterator<Item = (&'static str, Lexeme)> {
[
("\\", Lexeme::BS),
("@", Lexeme::At),
("(", Lexeme::LP(PType::Par)),
("[", Lexeme::LP(PType::Sqr)),
("{", Lexeme::LP(PType::Curl)),
(")", Lexeme::RP(PType::Par)),
("]", Lexeme::RP(PType::Sqr)),
("}", Lexeme::RP(PType::Curl)),
("\n", Lexeme::BR),
(":=", Lexeme::Walrus),
("::", Lexeme::NS),
(":", Lexeme::Type),
]
}
static BUILTIN_ATOMS: &[&dyn LexerPlugin] = &[&NumericLexer];
/// Convert source code to a flat list of tokens. The bail callback will be
/// called between lexemes. When it returns true, the remaining text is
/// returned without processing.
pub fn lex<'a, E>(
mut tokens: Vec<Entry>,
mut data: &'a str,
ctx: &'_ impl ParseCtx,
mut bail: impl FnMut(&str) -> Result<bool, E>,
) -> Result<LexRes<'a>, E> {
let mut prev_len = data.len() + 1;
'tail: loop {
if prev_len == data.len() {
panic!("got stuck at {data:?}, parsed {:?}", tokens.last().unwrap());
}
prev_len = data.len();
data = data.trim_start_matches(|c: char| c.is_whitespace() && c != '\n');
if bail(data)? {
return Ok(LexRes { tokens, tail: data });
}
let mut chars = data.chars();
let head = match chars.next() {
None => return Ok(LexRes { tokens, tail: data }),
Some(h) => h,
};
for lexer in ctx.lexers().chain(BUILTIN_ATOMS.iter().copied()) {
let req = LexPlugReqImpl { tail: data, ctx };
if let Some(res) = lexer.lex(&req) {
let LexRes { tail, tokens: mut new_tokens } =
ctx.reporter().fallback(res, |_| LexRes { tail: "", tokens: vec![] });
// fallback: no tokens left, no additional tokens parsed
if tail.len() == data.len() {
panic!("lexer plugin consumed 0 characters")
}
tokens.append(&mut new_tokens);
data = tail;
continue 'tail;
}
}
for (prefix, lexeme) in lit_table() {
if let Some(tail) = data.strip_prefix(prefix) {
tokens.push(Entry::new(ctx.range(prefix.len(), tail), lexeme.clone()));
data = tail;
continue 'tail;
}
}
if let Some(tail) = data.strip_prefix(',') {
tokens.push(Entry::new(ctx.range(1, tail), Lexeme::Name(i!(str: ","))));
data = tail;
continue 'tail;
}
if let Some(tail) = data.strip_prefix("--[") {
let (note, tail) = tail.split_once("]--").unwrap_or_else(|| {
ctx.reporter().report(NoCommentEnd.pack(ctx.source_range(tail.len(), "")));
(tail, "") // fallback: the rest of the file is in the comment
});
let lexeme = Lexeme::Comment(Arc::new(note.to_string()));
tokens.push(Entry::new(ctx.range(note.len() + 3, tail), lexeme));
data = tail;
continue 'tail;
}
if let Some(tail) = data.strip_prefix("--") {
let (note, tail) = split_filter(tail, |c| c != '\n');
let lexeme = Lexeme::Comment(Arc::new(note.to_string()));
tokens.push(Entry::new(ctx.range(note.len(), tail), lexeme));
data = tail;
continue 'tail;
}
// Parse a rule arrow
if let Some(tail) = data.strip_prefix('=') {
if tail.chars().next().map_or(false, numstart) {
let (num, post_num) = split_filter(tail, numchar);
if let Some(tail) = post_num.strip_prefix("=>") {
let prio = parse_num(num).unwrap_or_else(|e| {
ctx.reporter().report(e.into_proj(num.len(), post_num, ctx));
Numeric::Uint(0)
});
let lexeme = Lexeme::Arrow(prio.as_float());
tokens.push(Entry::new(ctx.range(num.len() + 3, tail), lexeme));
data = tail;
continue 'tail;
}
}
}
// Parse scalar placeholder $_name or $name
if let Some(tail) = data.strip_prefix('$') {
let (nameonly, tail) = tail.strip_prefix('_').map_or((false, tail), |t| (true, t));
let (name, tail) = split_filter(tail, namechar);
if !name.is_empty() {
let class = if nameonly { PHClass::Name } else { PHClass::Scalar };
let lexeme = Lexeme::Placeh(Placeholder { name: i(name), class });
tokens.push(Entry::new(ctx.range(name.len() + 1, tail), lexeme));
data = tail;
continue 'tail;
}
}
// Parse vectorial placeholder. `..` or `...`, then `$name`, then an optional
// `:n` where n is a number.
if let Some(tail) = data.strip_prefix("..") {
let (nonzero, tail) = tail.strip_prefix('.').map_or((false, tail), |t| (true, t));
if let Some(tail) = tail.strip_prefix('$') {
let (name, tail) = split_filter(tail, namechar);
if !name.is_empty() {
let (prio, priolen, tail) = tail
.strip_prefix(':')
.map(|tail| split_filter(tail, numchar))
.filter(|(num, _)| !num.is_empty())
.map(|(num_str, tail)| {
let p = ctx.reporter().fallback(
parse_num(num_str).map_err(|e| e.into_proj(num_str.len(), tail, ctx)).and_then(
|num| match num {
Numeric::Uint(usize) => Ok(usize),
Numeric::Float(_) =>
Err(FloatPlacehPrio.pack(ctx.source_range(num_str.len(), tail))),
},
),
|_| 0,
);
(p, num_str.len() + 1, tail)
})
.unwrap_or((0, 0, tail));
let byte_len = if nonzero { 4 } else { 3 } + priolen + name.len();
let class = PHClass::Vec { nonzero, prio };
let lexeme = Lexeme::Placeh(Placeholder { name: i(name), class });
tokens.push(Entry::new(ctx.range(byte_len, tail), lexeme));
data = tail;
continue 'tail;
}
}
}
if namestart(head) {
let (name, tail) = split_filter(data, namechar);
if !name.is_empty() {
let lexeme = Lexeme::Name(i(name));
tokens.push(Entry::new(ctx.range(name.len(), tail), lexeme));
data = tail;
continue 'tail;
}
}
if opchar(head) {
let (name, tail) = split_filter(data, opchar);
if !name.is_empty() {
let lexeme = Lexeme::Name(i(name));
tokens.push(Entry::new(ctx.range(name.len(), tail), lexeme));
data = tail;
continue 'tail;
}
}
unreachable!(r#"opchar is pretty much defined as "not namechar" "#)
}
}

View File

@@ -0,0 +1,12 @@
//! Parser, and abstractions for interacting with it from language extensions
pub mod context;
pub mod errors;
pub mod facade;
pub mod frag;
pub mod lex_plugin;
pub mod lexer;
pub mod multiname;
pub mod numeric;
pub mod parse_plugin;
pub mod parsed;
mod sourcefile;

View File

@@ -0,0 +1,146 @@
//! Parse the tree-like name sets used to represent imports
use std::collections::VecDeque;
use std::ops::Range;
use intern_all::{i, Tok};
use super::context::ParseCtx;
use super::errors::{Expected, ParseErrorKind};
use super::frag::Frag;
use super::lexer::{Entry, Lexeme};
use crate::error::ProjectResult;
use crate::location::SourceRange;
use crate::name::VPath;
use crate::parse::parsed::{Import, PType};
use crate::utils::boxed_iter::{box_chain, box_once, BoxedIter};
struct Subresult {
glob: bool,
deque: VecDeque<Tok<String>>,
range: Range<usize>,
}
impl Subresult {
#[must_use]
fn new_glob(range: &Range<usize>) -> Self {
Self { glob: true, deque: VecDeque::new(), range: range.clone() }
}
#[must_use]
fn new_named(name: Tok<String>, range: &Range<usize>) -> Self {
Self { glob: false, deque: VecDeque::from([name]), range: range.clone() }
}
#[must_use]
fn push_front(mut self, name: Tok<String>) -> Self {
self.deque.push_front(name);
self
}
#[must_use]
fn finalize(self, ctx: &(impl ParseCtx + ?Sized)) -> Import {
let Self { mut deque, glob, range } = self;
debug_assert!(glob || !deque.is_empty(), "The constructors forbid this");
let name = if glob { None } else { deque.pop_back() };
let range = ctx.range_loc(&range);
Import { name, range, path: VPath(deque.into()) }
}
}
fn parse_multiname_branch<'a>(
cursor: Frag<'a>,
ctx: &(impl ParseCtx + ?Sized),
) -> ProjectResult<(BoxedIter<'a, Subresult>, Frag<'a>)> {
let comma = i!(str: ",");
let (subnames, cursor) = parse_multiname_rec(cursor, ctx)?;
let (Entry { lexeme, range }, cursor) = cursor.trim().pop(ctx)?;
match &lexeme {
Lexeme::RP(PType::Par) => Ok((subnames, cursor)),
Lexeme::Name(n) if n == &comma => {
let (tail, cont) = parse_multiname_branch(cursor, ctx)?;
Ok((box_chain!(subnames, tail), cont))
},
_ => {
let expected = vec![Lexeme::Name(comma), Lexeme::RP(PType::Par)];
let err = Expected { expected, or_name: false, found: lexeme.clone() };
Err(err.pack(SourceRange { range: range.clone(), code: ctx.code_info() }))
},
}
}
fn parse_multiname_rec<'a>(
cursor: Frag<'a>,
ctx: &(impl ParseCtx + ?Sized),
) -> ProjectResult<(BoxedIter<'a, Subresult>, Frag<'a>)> {
let (head, mut cursor) = cursor.trim().pop(ctx)?;
match &head.lexeme {
Lexeme::LP(PType::Par) => parse_multiname_branch(cursor, ctx),
Lexeme::LP(PType::Sqr) => {
let mut names = Vec::new();
loop {
let (Entry { lexeme, range }, tail) = cursor.trim().pop(ctx)?;
cursor = tail;
match lexeme {
Lexeme::Name(n) => names.push((n.clone(), range)),
Lexeme::RP(PType::Sqr) => break,
_ => {
let err = Expected {
expected: vec![Lexeme::RP(PType::Sqr)],
or_name: true,
found: head.lexeme.clone(),
};
return Err(err.pack(ctx.range_loc(range)));
},
}
}
Ok((
Box::new(
names.into_iter().map(|(name, location)| Subresult::new_named(name.clone(), location)),
),
cursor,
))
},
Lexeme::Name(n) if *n == i!(str: "*") =>
Ok((box_once(Subresult::new_glob(&head.range)), cursor)),
Lexeme::Name(n) if ![i!(str: ","), i!(str: "*")].contains(n) => {
let cursor = cursor.trim();
if cursor.get(0, ctx).map_or(false, |e| e.lexeme.strict_eq(&Lexeme::NS)) {
let cursor = cursor.step(ctx)?;
let (out, cursor) = parse_multiname_rec(cursor, ctx)?;
let out = Box::new(out.map(|sr| sr.push_front(n.clone())));
Ok((out, cursor))
} else {
Ok((box_once(Subresult::new_named(n.clone(), &head.range)), cursor))
}
},
_ => {
let expected = vec![Lexeme::LP(PType::Par)];
let err = Expected { expected, or_name: true, found: head.lexeme.clone() };
Err(err.pack(ctx.range_loc(&head.range)))
},
}
}
/// Parse a tree that describes several names. The tree can be
///
/// - name (except `,` or `*`)
/// - name (except `,` or `*`) `::` tree
/// - `(` tree `,` tree ... `)`
/// - `*` (wildcard)
/// - `[` name name ... `]` (including `,` or `*`).
///
/// Examples of valid syntax:
///
/// ```txt
/// foo
/// foo::bar::baz
/// foo::bar::(baz, quz::quux, fimble::*)
/// foo::bar::[baz quz * +]
/// ```
pub fn parse_multiname<'a>(
cursor: Frag<'a>,
ctx: &(impl ParseCtx + ?Sized),
) -> ProjectResult<(Vec<Import>, Frag<'a>)> {
let (output, cont) = parse_multiname_rec(cursor, ctx)?;
Ok((output.map(|sr| sr.finalize(ctx)).collect(), cont))
}

View File

@@ -0,0 +1,179 @@
//! Parse a float or integer. These functions are also used for the macro
//! priority numbers
use std::num::IntErrorKind;
use std::ops::Range;
use ordered_float::NotNan;
use super::context::ParseCtx;
use super::errors::{ExpectedDigit, LiteralOverflow, NaNLiteral, ParseErrorKind};
use super::lex_plugin::LexPluginReq;
#[allow(unused)] // for doc
use super::lex_plugin::LexerPlugin;
use super::lexer::{split_filter, Entry, LexRes, Lexeme};
use crate::error::{ProjectErrorObj, ProjectResult};
use crate::foreign::atom::AtomGenerator;
use crate::foreign::inert::Inert;
use crate::libs::std::number::Numeric;
/// Rasons why [parse_num] might fail. See [NumError].
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum NumErrorKind {
/// The literal describes [f64::NAN]
NaN,
/// Some integer appearing in the literal overflows [usize]
Overflow,
/// A character that isn't a digit in the given base was found
InvalidDigit,
}
impl NumErrorKind {
fn from_int(kind: &IntErrorKind) -> Self {
match kind {
IntErrorKind::InvalidDigit => Self::InvalidDigit,
IntErrorKind::NegOverflow | IntErrorKind::PosOverflow => Self::Overflow,
_ => panic!("Impossible error condition"),
}
}
}
/// Error produced by [parse_num]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct NumError {
/// Location
pub range: Range<usize>,
/// Reason
pub kind: NumErrorKind,
}
impl NumError {
/// Convert into [ProjectErrorObj]
pub fn into_proj(
self,
len: usize,
tail: &str,
ctx: &(impl ParseCtx + ?Sized),
) -> ProjectErrorObj {
let start = ctx.source().len() - tail.len() - len + self.range.start;
let location = ctx.range_loc(&(start..start + self.range.len()));
match self.kind {
NumErrorKind::NaN => NaNLiteral.pack(location),
NumErrorKind::InvalidDigit => ExpectedDigit.pack(location),
NumErrorKind::Overflow => LiteralOverflow.pack(location),
}
}
}
/// Parse a numbre literal out of text
pub fn parse_num(string: &str) -> Result<Numeric, NumError> {
let overflow_err = NumError { range: 0..string.len(), kind: NumErrorKind::Overflow };
let (radix, noprefix, pos) = (string.strip_prefix("0x").map(|s| (16u8, s, 2)))
.or_else(|| string.strip_prefix("0b").map(|s| (2u8, s, 2)))
.or_else(|| string.strip_prefix("0o").map(|s| (8u8, s, 2)))
.unwrap_or((10u8, string, 0));
// identity
let (base, exponent) = match noprefix.split_once('p') {
Some((b, e)) => {
let (s, d, len) = e.strip_prefix('-').map_or((1, e, 0), |ue| (-1, ue, 1));
(b, s * int_parse(d, 10, pos + b.len() + 1 + len)? as i32)
},
None => (noprefix, 0),
};
match base.split_once('.') {
None => {
let base_usize = int_parse(base, radix, pos)?;
if let Ok(pos_exp) = u32::try_from(exponent) {
if let Some(radical) = usize::from(radix).checked_pow(pos_exp) {
let number = base_usize.checked_mul(radical).ok_or(overflow_err)?;
return Ok(Numeric::Uint(number));
}
}
let f = (base_usize as f64) * (radix as f64).powi(exponent);
let err = NumError { range: 0..string.len(), kind: NumErrorKind::NaN };
Ok(Numeric::Float(NotNan::new(f).map_err(|_| err)?))
},
Some((whole, part)) => {
let whole_n = int_parse(whole, radix, pos)? as f64;
let part_n = int_parse(part, radix, pos + whole.len() + 1)? as f64;
let real_val = whole_n + (part_n / (radix as f64).powi(part.len() as i32));
let f = real_val * (radix as f64).powi(exponent);
Ok(Numeric::Float(NotNan::new(f).expect("None of the inputs are NaN")))
},
}
}
fn int_parse(s: &str, radix: u8, start: usize) -> Result<usize, NumError> {
let s = s.chars().filter(|c| *c != '_').collect::<String>();
let range = start..(start + s.len());
usize::from_str_radix(&s, radix as u32)
.map_err(|e| NumError { range, kind: NumErrorKind::from_int(e.kind()) })
}
/// Filter for characters that can appear in numbers
pub fn numchar(c: char) -> bool { c.is_alphanumeric() | "._-".contains(c) }
/// Filter for characters that can start numbers
pub fn numstart(c: char) -> bool { c.is_ascii_digit() }
/// Print a number as a base-16 floating point literal
#[must_use]
pub fn print_nat16(num: NotNan<f64>) -> String {
if *num == 0.0 {
return "0x0".to_string();
} else if num.is_infinite() {
return match num.is_sign_positive() {
true => "Infinity".to_string(),
false => "-Infinity".to_string(),
};
} else if num.is_nan() {
return "NaN".to_string();
}
let exp = num.log(16.0).floor();
let man = *num / 16_f64.powf(exp);
format!("0x{man}p{exp:.0}")
}
/// [LexerPlugin] for a number literal
#[derive(Clone)]
pub struct NumericLexer;
impl LexerPlugin for NumericLexer {
fn lex<'b>(&self, req: &'_ dyn LexPluginReq<'b>) -> Option<ProjectResult<LexRes<'b>>> {
req.tail().chars().next().filter(|c| numstart(*c)).map(|_| {
let (num_str, tail) = split_filter(req.tail(), numchar);
let ag = match parse_num(num_str) {
Ok(Numeric::Float(f)) => AtomGenerator::cloner(Inert(f)),
Ok(Numeric::Uint(i)) => AtomGenerator::cloner(Inert(i)),
Err(e) => return Err(e.into_proj(num_str.len(), tail, req.ctx())),
};
let range = req.ctx().range(num_str.len(), tail);
let entry = Entry { lexeme: Lexeme::Atom(ag), range };
Ok(LexRes { tail, tokens: vec![entry] })
})
}
}
#[cfg(test)]
mod test {
use crate::libs::std::number::Numeric;
use crate::parse::numeric::parse_num;
#[test]
fn just_ints() {
let test = |s, n| assert_eq!(parse_num(s), Ok(Numeric::Uint(n)));
test("12345", 12345);
test("0xcafebabe", 0xcafebabe);
test("0o751", 0o751);
test("0b111000111", 0b111000111);
}
#[test]
fn decimals() {
let test = |s, n| assert_eq!(parse_num(s).map(|n| n.as_f64()), Ok(n));
test("3.1417", 3.1417);
test("3.1417", 3_f64 + 1417_f64 / 10000_f64);
test("0xf.cafe", 0xf as f64 + 0xcafe as f64 / 0x10000 as f64);
test("34p3", 34000f64);
test("0x2p3", (0x2 * 0x1000) as f64);
test("1.5p3", 1500f64);
test("0x2.5p3", (0x25 * 0x100) as f64);
}
}

View File

@@ -0,0 +1,142 @@
//! Abstractions for dynamic extensions to the parser that act across entries.
//! Macros are the primary syntax extension mechanism, but they only operate
//! within a constant and can't interfere with name reproject.
use std::ops::Range;
use dyn_clone::DynClone;
use intern_all::Tok;
use super::context::ParseCtx;
use super::errors::{expect, expect_block, expect_name};
use super::facade::parse_entries;
use super::frag::Frag;
use super::lexer::{Entry, Lexeme};
use super::parsed::{Constant, Expr, ModuleBlock, PType, Rule, SourceLine, SourceLineKind};
use super::sourcefile::{
exprv_to_single, parse_const, parse_exprv, parse_line, parse_module, parse_module_body,
parse_nsname, parse_rule, split_lines,
};
use crate::error::{ProjectErrorObj, ProjectResult};
use crate::location::SourceRange;
use crate::name::VName;
use crate::utils::boxed_iter::BoxedIter;
/// Information and actions exposed to [ParseLinePlugin]. A plugin should never
/// import and call the parser directly because it might be executed in a
/// different version of the parser.
pub trait ParsePluginReq<'t> {
// ################ Frag and ParseCtx ################
/// The token sequence this parser must parse
fn frag(&self) -> Frag;
/// Get the location of a fragment
fn frag_loc(&self, f: Frag) -> SourceRange;
/// Convert a numeric byte range into a location
fn range_loc(&self, r: Range<usize>) -> SourceRange;
/// Remove the first token of the fragment
fn pop<'a>(&self, f: Frag<'a>) -> ProjectResult<(&'a Entry, Frag<'a>)>;
/// Remove the last element of the fragment
fn pop_back<'a>(&self, f: Frag<'a>) -> ProjectResult<(&'a Entry, Frag<'a>)>;
// ################ Parser states ################
/// Split up the lines in a fragment. The fragment must outlive the iterator
/// and the request itself must outlive both
fn split_lines<'a: 'b, 'b>(&'b self, f: Frag<'a>) -> BoxedIter<'b, Frag<'a>>
where 't: 'b + 'a;
/// Parse a sequence of source lines separated by line breaks
fn parse_module_body(&self, frag: Frag) -> ProjectResult<Vec<SourceLine>>;
/// Parse a single source line. This returns a vector because plugins can
/// convert a single line into multiple entries
fn parse_line(&self, frag: Frag) -> ProjectResult<Vec<SourceLineKind>>;
/// Parse a macro rule `<exprv> =prio=> <exprv>`
fn parse_rule(&self, frag: Frag) -> ProjectResult<Rule>;
/// Parse a constant declaration `<name> := <exprv>`
fn parse_const(&self, frag: Frag) -> ProjectResult<Constant>;
/// Parse a namespaced name `name::name`
fn parse_nsname<'a>(&self, f: Frag<'a>) -> ProjectResult<(VName, Frag<'a>)>;
/// Parse a module declaration. `<name> ( <module_body> )`
fn parse_module(&self, frag: Frag) -> ProjectResult<ModuleBlock>;
/// Parse a sequence of expressions. In principle, it never makes sense to
/// parse a single expression because it could always be a macro invocation.
fn parse_exprv<'a>(&self, f: Frag<'a>, p: Option<PType>) -> ProjectResult<(Vec<Expr>, Frag<'a>)>;
/// Parse a prepared string of code
fn parse_entries(&self, t: &'static str, r: SourceRange) -> Vec<SourceLine>;
/// Convert a sequence of expressions to a single one by parenthesization if
/// necessary
fn vec_to_single(&self, fallback: &Entry, v: Vec<Expr>) -> ProjectResult<Expr>;
// ################ Assertions ################
/// Unwrap a single name token or raise an error
fn expect_name(&self, entry: &Entry) -> ProjectResult<Tok<String>>;
/// Assert that the entry contains exactly the specified lexeme
fn expect(&self, l: Lexeme, e: &Entry) -> ProjectResult<()>;
/// Remove two parentheses from the ends of the cursor
fn expect_block<'a>(&self, f: Frag<'a>, p: PType) -> ProjectResult<Frag<'a>>;
/// Ensure that the fragment is empty
fn expect_empty(&self, f: Frag) -> ProjectResult<()>;
/// Report a fatal error while also producing output to be consumed by later
/// stages for improved error reporting
fn report_err(&self, e: ProjectErrorObj);
}
/// External plugin that parses an unrecognized source line into lines of
/// recognized types
pub trait ParseLinePlugin: Sync + Send + DynClone {
/// Attempt to parse a line. Returns [None] if the line isn't recognized,
/// [Some][Err] if it's recognized but incorrect.
fn parse(&self, req: &dyn ParsePluginReq) -> Option<ProjectResult<Vec<SourceLineKind>>>;
}
/// Implementation of [ParsePluginReq] exposing sub-parsers and data to the
/// plugin via dynamic dispatch
pub struct ParsePlugReqImpl<'a, TCtx: ParseCtx + ?Sized> {
/// Fragment of text to be parsed by the plugin
pub frag: Frag<'a>,
/// Context for recursive commands and to expose to the plugin
pub ctx: &'a TCtx,
}
impl<'ty, TCtx: ParseCtx + ?Sized> ParsePluginReq<'ty> for ParsePlugReqImpl<'ty, TCtx> {
fn frag(&self) -> Frag { self.frag }
fn frag_loc(&self, f: Frag) -> SourceRange { self.range_loc(f.range()) }
fn range_loc(&self, r: Range<usize>) -> SourceRange { self.ctx.range_loc(&r) }
fn pop<'a>(&self, f: Frag<'a>) -> ProjectResult<(&'a Entry, Frag<'a>)> { f.pop(self.ctx) }
fn pop_back<'a>(&self, f: Frag<'a>) -> ProjectResult<(&'a Entry, Frag<'a>)> {
f.pop_back(self.ctx)
}
fn split_lines<'a: 'b, 'b>(&'b self, f: Frag<'a>) -> BoxedIter<'b, Frag<'a>>
where
'ty: 'b,
'ty: 'a,
{
Box::new(split_lines(f, self.ctx))
}
fn parse_module_body(&self, f: Frag) -> ProjectResult<Vec<SourceLine>> {
Ok(parse_module_body(f, self.ctx))
}
fn parse_line(&self, f: Frag) -> ProjectResult<Vec<SourceLineKind>> { parse_line(f, self.ctx) }
fn parse_rule(&self, f: Frag) -> ProjectResult<Rule> { parse_rule(f, self.ctx) }
fn parse_const(&self, f: Frag) -> ProjectResult<Constant> { parse_const(f, self.ctx) }
fn parse_nsname<'a>(&self, f: Frag<'a>) -> ProjectResult<(VName, Frag<'a>)> {
parse_nsname(f, self.ctx)
}
fn parse_module(&self, f: Frag) -> ProjectResult<ModuleBlock> { parse_module(f, self.ctx) }
fn parse_exprv<'a>(&self, f: Frag<'a>, p: Option<PType>) -> ProjectResult<(Vec<Expr>, Frag<'a>)> {
parse_exprv(f, p, self.ctx)
}
fn parse_entries(&self, s: &'static str, r: SourceRange) -> Vec<SourceLine> {
parse_entries(&self.ctx, s, r)
}
fn vec_to_single(&self, fb: &Entry, v: Vec<Expr>) -> ProjectResult<Expr> {
exprv_to_single(fb, v, self.ctx)
}
fn expect_name(&self, e: &Entry) -> ProjectResult<Tok<String>> { expect_name(e, self.ctx) }
fn expect(&self, l: Lexeme, e: &Entry) -> ProjectResult<()> { expect(l, e, self.ctx) }
fn expect_block<'a>(&self, f: Frag<'a>, t: PType) -> ProjectResult<Frag<'a>> {
expect_block(f, t, self.ctx)
}
fn expect_empty(&self, f: Frag) -> ProjectResult<()> { f.expect_empty(self.ctx) }
fn report_err(&self, e: ProjectErrorObj) { self.ctx.reporter().report(e) }
}

View File

@@ -0,0 +1,507 @@
//! Datastructures representing the units of macro execution
//!
//! These structures are produced by the pipeline, processed by the macro
//! executor, and then converted to other usable formats.
use std::fmt;
use std::hash::Hash;
use std::rc::Rc;
use hashbrown::HashSet;
use intern_all::Tok;
use itertools::Itertools;
use ordered_float::NotNan;
use crate::foreign::atom::AtomGenerator;
#[allow(unused)] // for doc
use crate::interpreter::nort;
use crate::location::SourceRange;
use crate::name::{Sym, VName, VPath};
use crate::parse::numeric::print_nat16;
/// A [Clause] with associated metadata
#[derive(Clone, Debug)]
pub struct Expr {
/// The actual value
pub value: Clause,
/// Information about the code that produced this value
pub range: SourceRange,
}
impl Expr {
/// Process all names with the given mapper.
/// Return a new object if anything was processed
#[must_use]
pub fn map_names(&self, pred: &mut impl FnMut(Sym) -> Option<Sym>) -> Option<Self> {
(self.value.map_names(pred)).map(|value| Self { value, range: self.range.clone() })
}
/// Visit all expressions in the tree. The search can be exited early by
/// returning [Some]
///
/// See also [crate::interpreter::nort::Expr::search_all]
pub fn search_all<T>(&self, f: &mut impl FnMut(&Self) -> Option<T>) -> Option<T> {
f(self).or_else(|| self.value.search_all(f))
}
}
/// Visit all expression sequences including this sequence itself.
pub fn search_all_slcs<T>(this: &[Expr], f: &mut impl FnMut(&[Expr]) -> Option<T>) -> Option<T> {
f(this).or_else(|| this.iter().find_map(|expr| expr.value.search_all_slcs(f)))
}
impl Expr {
/// Add the specified prefix to every Name
#[must_use]
pub fn prefix(&self, prefix: &[Tok<String>], except: &impl Fn(Tok<String>) -> bool) -> Self {
Self { value: self.value.prefix(prefix, except), range: self.range.clone() }
}
}
impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.value.fmt(f) }
}
/// Various types of placeholders
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum PHClass {
/// Matches multiple tokens, lambdas or parenthesized groups
Vec {
/// If true, must match at least one clause
nonzero: bool,
/// Greediness in the allocation of tokens
prio: usize,
},
/// Matches exactly one token, lambda or parenthesized group
Scalar,
/// Matches exactly one name
Name,
}
/// Properties of a placeholder that matches unknown tokens in macros
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Placeholder {
/// Identifier to pair placeholders in the pattern and template
pub name: Tok<String>,
/// The nature of the token set matched by this placeholder
pub class: PHClass,
}
impl fmt::Display for Placeholder {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let name = &self.name;
match self.class {
PHClass::Scalar => write!(f, "${name}"),
PHClass::Name => write!(f, "$_{name}"),
PHClass::Vec { nonzero, prio } => {
if nonzero { write!(f, "...") } else { write!(f, "..") }?;
write!(f, "${name}:{prio}")
},
}
}
}
/// Different types of brackets supported by Orchid
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub enum PType {
/// ()
Par,
/// []
Sqr,
/// {}
Curl,
}
impl PType {
/// Left paren character for this paren type
pub fn l(self) -> char {
match self {
PType::Curl => '{',
PType::Par => '(',
PType::Sqr => '[',
}
}
/// Right paren character for this paren type
pub fn r(self) -> char {
match self {
PType::Curl => '}',
PType::Par => ')',
PType::Sqr => ']',
}
}
}
/// An S-expression as read from a source file
#[derive(Debug, Clone)]
pub enum Clause {
/// An opaque non-callable value, eg. a file handle
Atom(AtomGenerator),
/// A c-style name or an operator, eg. `+`, `i`, `foo::bar`
Name(Sym),
/// A parenthesized expression
/// eg. `(print out "hello")`, `[1, 2, 3]`, `{Some(t) => t}`
S(PType, Rc<Vec<Expr>>),
/// A function expression, eg. `\x. x + 1`
Lambda(Rc<Vec<Expr>>, Rc<Vec<Expr>>),
/// A placeholder for macros, eg. `$name`, `...$body`, `...$lhs:1`
Placeh(Placeholder),
}
impl Clause {
/// Extract the expressions from an auto, lambda or S
#[must_use]
pub fn body(&self) -> Option<Rc<Vec<Expr>>> {
match self {
Self::Lambda(_, body) | Self::S(_, body) => Some(body.clone()),
_ => None,
}
}
/// Convert with identical meaning
#[must_use]
pub fn into_expr(self, range: SourceRange) -> Expr {
if let Self::S(PType::Par, body) = &self {
if let [wrapped] = &body[..] {
return wrapped.clone();
}
}
Expr { value: self, range }
}
/// Convert with identical meaning
#[must_use]
pub fn from_exprs(exprs: &[Expr]) -> Option<Self> {
match exprs {
[] => None,
[only] => Some(only.value.clone()),
_ => Some(Self::S(PType::Par, Rc::new(exprs.to_vec()))),
}
}
/// Convert with identical meaning
#[must_use]
pub fn from_exprv(exprv: &Rc<Vec<Expr>>) -> Option<Clause> {
if exprv.len() < 2 { Self::from_exprs(exprv) } else { Some(Self::S(PType::Par, exprv.clone())) }
}
/// Collect all names that appear in this expression.
/// NOTICE: this isn't the total set of unbound names, it's mostly useful to
/// make weak statements for optimization.
#[must_use]
pub fn collect_names(&self) -> HashSet<Sym> {
if let Self::Name(n) = self {
return HashSet::from([n.clone()]);
}
let mut glossary = HashSet::new();
let result = self.search_all(&mut |e| {
if let Clause::Name(n) = &e.value {
glossary.insert(n.clone());
}
None::<()>
});
assert!(result.is_none(), "Callback never returns Some");
glossary
}
/// Process all names with the given mapper.
/// Return a new object if anything was processed
#[must_use]
pub fn map_names(&self, pred: &mut impl FnMut(Sym) -> Option<Sym>) -> Option<Self> {
match self {
Clause::Atom(_) | Clause::Placeh(_) => None,
Clause::Name(name) => pred(name.clone()).map(Clause::Name),
Clause::S(c, body) => {
let mut any_some = false;
let new_body = body
.iter()
.map(|e| {
let val = e.map_names(pred);
any_some |= val.is_some();
val.unwrap_or_else(|| e.clone())
})
.collect();
if any_some { Some(Clause::S(*c, Rc::new(new_body))) } else { None }
},
Clause::Lambda(arg, body) => {
let mut any_some = false;
let new_arg = (arg.iter())
.map(|e| {
let val = e.map_names(pred);
any_some |= val.is_some();
val.unwrap_or_else(|| e.clone())
})
.collect();
let new_body = (body.iter())
.map(|e| {
let val = e.map_names(pred);
any_some |= val.is_some();
val.unwrap_or_else(|| e.clone())
})
.collect();
if any_some { Some(Clause::Lambda(Rc::new(new_arg), Rc::new(new_body))) } else { None }
},
}
}
/// Pair of [Expr::search_all]
pub fn search_all<T>(&self, f: &mut impl FnMut(&Expr) -> Option<T>) -> Option<T> {
match self {
Clause::Lambda(arg, body) =>
arg.iter().chain(body.iter()).find_map(|expr| expr.search_all(f)),
Clause::Name(_) | Clause::Atom(_) | Clause::Placeh(_) => None,
Clause::S(_, body) => body.iter().find_map(|expr| expr.search_all(f)),
}
}
/// Visit all expression sequences. Most useful when looking for some pattern
pub fn search_all_slcs<T>(&self, f: &mut impl FnMut(&[Expr]) -> Option<T>) -> Option<T> {
match self {
Clause::Lambda(arg, body) => search_all_slcs(arg, f).or_else(|| search_all_slcs(body, f)),
Clause::Name(_) | Clause::Atom(_) | Clause::Placeh(_) => None,
Clause::S(_, body) => search_all_slcs(body, f),
}
}
/// Generate a parenthesized expression sequence
pub fn s(delimiter: char, body: impl IntoIterator<Item = Self>, range: SourceRange) -> Self {
let ptype = match delimiter {
'(' => PType::Par,
'[' => PType::Sqr,
'{' => PType::Curl,
_ => panic!("not an opening paren"),
};
let body = body.into_iter().map(|it| it.into_expr(range.clone())).collect();
Self::S(ptype, Rc::new(body))
}
}
impl Clause {
/// Add the specified prefix to every Name
#[must_use]
pub fn prefix(&self, prefix: &[Tok<String>], except: &impl Fn(Tok<String>) -> bool) -> Self {
self
.map_names(&mut |name| match except(name[0].clone()) {
true => None,
false => {
let prefixed = prefix.iter().cloned().chain(name.iter()).collect::<Vec<_>>();
Some(Sym::from_tok(name.tok().interner().i(&prefixed)).unwrap())
},
})
.unwrap_or_else(|| self.clone())
}
}
impl fmt::Display for Clause {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Atom(a) => write!(f, "{a:?}"),
Self::Name(name) => write!(f, "{}", name),
Self::S(t, items) => {
let body = items.iter().join(" ");
write!(f, "{}{body}{}", t.l(), t.r())
},
Self::Lambda(arg, body) => {
let args = arg.iter().join(" ");
let bodys = body.iter().join(" ");
write!(f, "\\{args}.{bodys}")
},
Self::Placeh(ph) => ph.fmt(f),
}
}
}
/// A substitution rule as loaded from source
#[derive(Debug, Clone)]
pub struct Rule {
/// Expressions on the left side of the arrow
pub pattern: Vec<Expr>,
/// Priority number written inside the arrow
pub prio: NotNan<f64>,
/// Expressions on the right side of the arrow
pub template: Vec<Expr>,
}
impl fmt::Display for Rule {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"rule {} ={}=> {}",
self.pattern.iter().join(" "),
print_nat16(self.prio),
self.template.iter().join(" ")
)
}
}
/// A named constant
#[derive(Debug, Clone)]
pub struct Constant {
/// Used to reference the constant
pub name: Tok<String>,
/// The constant value inserted where the name is found
pub value: Expr,
}
impl fmt::Display for Constant {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "const {} := {}", *self.name, self.value)
}
}
/// An import pointing at another module, either specifying the symbol to be
/// imported or importing all available symbols with a globstar (*)
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Import {
/// Import path, a sequence of module names. Can either start with
///
/// - `self` to reference the current module
/// - any number of `super` to reference the parent module of the implied
/// `self`
/// - a root name
pub path: VPath,
/// If name is None, this is a wildcard import
pub name: Option<Tok<String>>,
/// Location of the final name segment, which uniquely identifies this name
pub range: SourceRange,
}
impl Import {
/// Constructor
pub fn new(
path: impl IntoIterator<Item = Tok<String>>,
name: Option<Tok<String>>,
range: SourceRange,
) -> Self {
let path = VPath(path.into_iter().collect());
assert!(name.is_some() || !path.0.is_empty(), "import * not allowed");
Self { range, name, path }
}
/// Get the preload target space for this import - the prefix below
/// which all files should be included in the compilation
///
/// Returns the path if this is a glob import, or the path plus the
/// name if this is a specific import
#[must_use]
pub fn nonglob_path(&self) -> VName {
VName::new(self.path.0.iter().chain(&self.name).cloned())
.expect("Everything import (`import *`) not allowed")
}
}
impl fmt::Display for Import {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.name {
None => write!(f, "{}::*", self.path),
Some(n) => write!(f, "{}::{}", self.path, n),
}
}
}
/// A namespace block
#[derive(Debug, Clone)]
pub struct ModuleBlock {
/// Name prefixed to all names in the block
pub name: Tok<String>,
/// Prefixed entries
pub body: Vec<SourceLine>,
}
impl fmt::Display for ModuleBlock {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let bodys = self.body.iter().map(|e| e.to_string()).join("\n");
write!(f, "module {} {{\n{}\n}}", self.name, bodys)
}
}
/// see [Member]
#[derive(Debug, Clone)]
pub enum MemberKind {
/// A substitution rule. Rules apply even when they're not in scope, if the
/// absolute names are present eg. because they're produced by other rules
Rule(Rule),
/// A constant (or function) associated with a name
Constant(Constant),
/// A prefixed set of other entries
Module(ModuleBlock),
}
impl MemberKind {
/// Convert to [SourceLine]
pub fn into_line(self, exported: bool, range: SourceRange) -> SourceLine {
SourceLineKind::Member(Member { exported, kind: self }).wrap(range)
}
}
impl fmt::Display for MemberKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Constant(c) => c.fmt(f),
Self::Module(m) => m.fmt(f),
Self::Rule(r) => r.fmt(f),
}
}
}
/// Things that may be prefixed with an export
/// see [MemberKind]
#[derive(Debug, Clone)]
pub struct Member {
/// Various members
pub kind: MemberKind,
/// Whether this member is exported or not
pub exported: bool,
}
impl fmt::Display for Member {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self { exported: true, kind } => write!(f, "export {kind}"),
Self { exported: false, kind } => write!(f, "{kind}"),
}
}
}
/// See [SourceLine]
#[derive(Debug, Clone)]
pub enum SourceLineKind {
/// Imports one or all names in a module
Import(Vec<Import>),
/// Comments are kept here in case dev tooling wants to parse documentation
Comment(String),
/// An element with visibility information
Member(Member),
/// A list of tokens exported explicitly. This can also create new exported
/// tokens that the local module doesn't actually define a role for
Export(Vec<(Tok<String>, SourceRange)>),
}
impl SourceLineKind {
/// Wrap with no location
pub fn wrap(self, range: SourceRange) -> SourceLine { SourceLine { kind: self, range } }
}
impl fmt::Display for SourceLineKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Comment(s) => write!(f, "--[{s}]--"),
Self::Export(s) => {
write!(f, "export ::({})", s.iter().map(|t| &**t.0).join(", "))
},
Self::Member(member) => write!(f, "{member}"),
Self::Import(i) => {
write!(f, "import ({})", i.iter().map(|i| i.to_string()).join(", "))
},
}
}
}
/// Anything the parser might encounter in a file. See [SourceLineKind]
#[derive(Debug, Clone)]
pub struct SourceLine {
/// What we encountered
pub kind: SourceLineKind,
/// Where we encountered it.
pub range: SourceRange,
}
impl fmt::Display for SourceLine {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.kind.fmt(f) }
}

View File

@@ -0,0 +1,313 @@
//! Internal states of the parser.
use std::iter;
use std::rc::Rc;
use intern_all::i;
use itertools::Itertools;
use super::context::ParseCtx;
use super::errors::{
expect, expect_block, expect_name, BadTokenInRegion, ExpectedSingleName, GlobExport, LeadingNS,
MisalignedParen, NamespacedExport, ParseErrorKind, ReservedToken, UnexpectedEOL,
};
use super::frag::Frag;
use super::lexer::{Entry, Lexeme};
use super::multiname::parse_multiname;
use super::parse_plugin::ParsePlugReqImpl;
use crate::error::ProjectResult;
use crate::name::VName;
use crate::parse::parsed::{
Clause, Constant, Expr, Import, Member, MemberKind, ModuleBlock, PType, Rule, SourceLine,
SourceLineKind,
};
use crate::sym;
/// Split the fragment at each line break outside parentheses
pub fn split_lines<'a>(
module: Frag<'a>,
ctx: &'a (impl ParseCtx + ?Sized),
) -> impl Iterator<Item = Frag<'a>> {
let mut source = module.data.iter().enumerate();
let mut fallback = module.fallback;
let mut last_slice = 0;
let mut finished = false;
iter::from_fn(move || {
let mut paren_count = 0;
for (i, Entry { lexeme, .. }) in source.by_ref() {
match lexeme {
Lexeme::LP(_) => paren_count += 1,
Lexeme::RP(_) => paren_count -= 1,
Lexeme::BR if paren_count == 0 => {
let begin = last_slice;
last_slice = i + 1;
let cur_prev = fallback;
fallback = &module.data[i];
return Some(Frag::new(cur_prev, &module.data[begin..i]));
},
_ => (),
}
}
// Include last line even without trailing newline
if !finished {
finished = true;
return Some(Frag::new(fallback, &module.data[last_slice..]));
}
None
})
.map(Frag::trim)
.map(|s| {
match s.pop(ctx).and_then(|(f, i)| i.pop_back(ctx).map(|(l, i)| (&f.lexeme, i, &l.lexeme))) {
Ok((Lexeme::LP(PType::Par), inner, Lexeme::RP(PType::Par))) => inner.trim(),
_ => s,
}
})
.filter(|l| !l.data.is_empty())
}
/// Parse linebreak-separated entries
pub fn parse_module_body(cursor: Frag<'_>, ctx: &(impl ParseCtx + ?Sized)) -> Vec<SourceLine> {
let mut lines = Vec::new();
for l in split_lines(cursor, ctx) {
let kinds = ctx.reporter().fallback(parse_line(l, ctx), |_| vec![]);
let r = ctx.range_loc(&l.range());
lines.extend(kinds.into_iter().map(|kind| SourceLine { range: r.clone(), kind }));
}
lines
}
/// Parse a single, possibly exported entry
pub fn parse_line(
cursor: Frag<'_>,
ctx: &(impl ParseCtx + ?Sized),
) -> ProjectResult<Vec<SourceLineKind>> {
let req = ParsePlugReqImpl { ctx, frag: cursor };
for line_parser in ctx.line_parsers() {
if let Some(result) = line_parser.parse(&req) {
return result;
}
}
let head = cursor.get(0, ctx)?;
match &head.lexeme {
Lexeme::Comment(cmt) => cmt.strip_prefix('|').and_then(|c| c.strip_suffix('|')).map_or_else(
|| parse_line(cursor.step(ctx)?, ctx),
|cmt| Ok(vec![SourceLineKind::Comment(cmt.to_string())]),
),
Lexeme::BR => parse_line(cursor.step(ctx)?, ctx),
Lexeme::Name(n) if **n == "export" =>
parse_export_line(cursor.step(ctx)?, ctx).map(|k| vec![k]),
Lexeme::Name(n) if ["const", "macro", "module"].contains(&n.as_str()) => {
let member = Member { exported: false, kind: parse_member(cursor, ctx)? };
Ok(vec![SourceLineKind::Member(member)])
},
Lexeme::Name(n) if **n == "import" => {
let (imports, cont) = parse_multiname(cursor.step(ctx)?, ctx)?;
cont.expect_empty(ctx)?;
Ok(vec![SourceLineKind::Import(imports)])
},
lexeme => {
let lexeme = lexeme.clone();
Err(BadTokenInRegion { lexeme, region: "start of line" }.pack(ctx.range_loc(&head.range)))
},
}
}
fn parse_export_line(
cursor: Frag<'_>,
ctx: &(impl ParseCtx + ?Sized),
) -> ProjectResult<SourceLineKind> {
let cursor = cursor.trim();
let head = cursor.get(0, ctx)?;
match &head.lexeme {
Lexeme::NS => {
let (names, cont) = parse_multiname(cursor.step(ctx)?, ctx)?;
cont.expect_empty(ctx)?;
let names = (names.into_iter())
.map(|Import { name, path, range }| match name {
Some(n) if path.is_empty() => Ok((n, range)),
Some(_) => Err(NamespacedExport.pack(range)),
None => Err(GlobExport.pack(range)),
})
.collect::<Result<Vec<_>, _>>()?;
Ok(SourceLineKind::Export(names))
},
Lexeme::Name(n) if ["const", "macro", "module"].contains(&n.as_str()) =>
Ok(SourceLineKind::Member(Member { kind: parse_member(cursor, ctx)?, exported: true })),
lexeme => {
let lexeme = lexeme.clone();
let err = BadTokenInRegion { lexeme, region: "exported line" };
Err(err.pack(ctx.range_loc(&head.range)))
},
}
}
fn parse_member(cursor: Frag<'_>, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<MemberKind> {
let (typemark, cursor) = cursor.trim().pop(ctx)?;
match &typemark.lexeme {
Lexeme::Name(n) if **n == "const" => {
let constant = parse_const(cursor, ctx)?;
Ok(MemberKind::Constant(constant))
},
Lexeme::Name(n) if **n == "macro" => {
let rule = parse_rule(cursor, ctx)?;
Ok(MemberKind::Rule(rule))
},
Lexeme::Name(n) if **n == "module" => {
let module = parse_module(cursor, ctx)?;
Ok(MemberKind::Module(module))
},
lexeme => {
let lexeme = lexeme.clone();
let err = BadTokenInRegion { lexeme, region: "member type" };
Err(err.pack(ctx.range_loc(&typemark.range)))
},
}
}
/// Parse a macro rule
pub fn parse_rule(cursor: Frag<'_>, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<Rule> {
let (pattern, prio, template) = cursor.find_map("arrow", ctx, |a| match a {
Lexeme::Arrow(p) => Some(*p),
_ => None,
})?;
let (pattern, _) = parse_exprv(pattern, None, ctx)?;
let (template, _) = parse_exprv(template, None, ctx)?;
Ok(Rule { pattern, prio, template })
}
/// Parse a constant declaration
pub fn parse_const(cursor: Frag<'_>, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<Constant> {
let (name_ent, cursor) = cursor.trim().pop(ctx)?;
let name = expect_name(name_ent, ctx)?;
let (walrus_ent, cursor) = cursor.trim().pop(ctx)?;
expect(Lexeme::Walrus, walrus_ent, ctx)?;
let value = ctx.reporter().fallback(
parse_exprv(cursor, None, ctx).and_then(|(body, _)| exprv_to_single(walrus_ent, body, ctx)),
|_| Clause::Name(sym!(__syntax_error__)).into_expr(ctx.range_loc(&cursor.range())),
);
Ok(Constant { name, value })
}
/// Parse a namespaced name. TODO: use this for modules
pub fn parse_nsname<'a>(
cursor: Frag<'a>,
ctx: &(impl ParseCtx + ?Sized),
) -> ProjectResult<(VName, Frag<'a>)> {
let (name, tail) = parse_multiname(cursor, ctx)?;
match name.into_iter().exactly_one() {
Ok(Import { name: Some(name), path, .. }) =>
Ok((VName::new([name]).unwrap().prefix(path), tail)),
Err(_) | Ok(Import { name: None, .. }) => {
let range = cursor.data[0].range.start..tail.data[0].range.end;
Err(ExpectedSingleName.pack(ctx.range_loc(&range)))
},
}
}
/// Parse a submodule declaration
pub fn parse_module(
cursor: Frag<'_>,
ctx: &(impl ParseCtx + ?Sized),
) -> ProjectResult<ModuleBlock> {
let (name_ent, cursor) = cursor.trim().pop(ctx)?;
let name = expect_name(name_ent, ctx)?;
let body = expect_block(cursor, PType::Par, ctx)?;
Ok(ModuleBlock { name, body: parse_module_body(body, ctx) })
}
/// Parse a sequence of expressions
pub fn parse_exprv<'a>(
mut cursor: Frag<'a>,
paren: Option<PType>,
ctx: &(impl ParseCtx + ?Sized),
) -> ProjectResult<(Vec<Expr>, Frag<'a>)> {
let mut output = Vec::new();
cursor = cursor.trim();
while let Ok(current) = cursor.get(0, ctx) {
match &current.lexeme {
Lexeme::BR | Lexeme::Comment(_) => unreachable!("Fillers skipped"),
Lexeme::At | Lexeme::Type => {
let err = ReservedToken(current.lexeme.clone());
return Err(err.pack(ctx.range_loc(&current.range)));
},
Lexeme::Atom(a) => {
let value = Clause::Atom(a.clone());
output.push(Expr { value, range: ctx.range_loc(&current.range) });
cursor = cursor.step(ctx)?;
},
Lexeme::Placeh(ph) => {
output
.push(Expr { value: Clause::Placeh(ph.clone()), range: ctx.range_loc(&current.range) });
cursor = cursor.step(ctx)?;
},
Lexeme::Name(n) => {
let mut range = ctx.range_loc(&current.range);
let mut fullname = VName::new([n.clone()]).unwrap();
while cursor.get(1, ctx).is_ok_and(|e| e.lexeme.strict_eq(&Lexeme::NS)) {
let next_seg = cursor.get(2, ctx)?;
range.range.end = next_seg.range.end;
fullname = fullname.suffix([expect_name(next_seg, ctx)?]);
cursor = cursor.step(ctx)?.step(ctx)?;
}
let clause = Clause::Name(fullname.to_sym());
output.push(Expr { value: clause, range });
cursor = cursor.step(ctx)?;
},
Lexeme::NS => return Err(LeadingNS.pack(ctx.range_loc(&current.range))),
Lexeme::RP(c) => match paren {
Some(exp_c) if exp_c == *c => return Ok((output, cursor.step(ctx)?)),
_ => {
let err = MisalignedParen(current.lexeme.clone());
return Err(err.pack(ctx.range_loc(&current.range)));
},
},
Lexeme::LP(c) => {
let (result, leftover) = parse_exprv(cursor.step(ctx)?, Some(*c), ctx)?;
let range = current.range.start..leftover.fallback.range.end;
let value = Clause::S(*c, Rc::new(result));
output.push(Expr { value, range: ctx.range_loc(&range) });
cursor = leftover;
},
Lexeme::BS => {
let dot = i!(str: ".");
let (arg, body) =
(cursor.step(ctx))?.find("A '.'", ctx, |l| l.strict_eq(&Lexeme::Name(dot.clone())))?;
let (arg, _) = parse_exprv(arg, None, ctx)?;
let (body, leftover) = parse_exprv(body, paren, ctx)?;
output.push(Expr {
range: ctx.range_loc(&cursor.range()),
value: Clause::Lambda(Rc::new(arg), Rc::new(body)),
});
return Ok((output, leftover));
},
lexeme => {
let lexeme = lexeme.clone();
let err = BadTokenInRegion { lexeme, region: "expression" };
return Err(err.pack(ctx.range_loc(&current.range)));
},
}
cursor = cursor.trim();
}
Ok((output, Frag::new(cursor.fallback, &[])))
}
/// Wrap an expression list in parentheses if necessary
pub fn exprv_to_single(
fallback: &Entry,
v: Vec<Expr>,
ctx: &(impl ParseCtx + ?Sized),
) -> ProjectResult<Expr> {
match v.len() {
0 => {
let err = UnexpectedEOL(fallback.lexeme.clone());
Err(err.pack(ctx.range_loc(&fallback.range)))
},
1 => Ok(v.into_iter().exactly_one().unwrap()),
_ => {
let f_range = &v.first().unwrap().range;
let l_range = &v.last().unwrap().range;
let range = f_range.map_range(|r| r.start..l_range.end());
Ok(Expr { range, value: Clause::S(PType::Par, Rc::new(v)) })
},
}
}