forked from Orchid/orchid
in midst of refactor
This commit is contained in:
163
orchidlang/src/parse/context.rs
Normal file
163
orchidlang/src/parse/context.rs
Normal file
@@ -0,0 +1,163 @@
|
||||
//! Definition and implementations of the parsing context, which is used
|
||||
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::lex_plugin::LexerPlugin;
|
||||
use super::parse_plugin::ParseLinePlugin;
|
||||
use crate::error::Reporter;
|
||||
use crate::location::{SourceCode, SourceRange};
|
||||
use crate::utils::boxed_iter::{box_empty, BoxedIter};
|
||||
use crate::utils::sequence::Sequence;
|
||||
|
||||
/// Trait enclosing all context features
|
||||
///
|
||||
/// The main implementation is [ParseCtxImpl]
|
||||
pub trait ParseCtx {
|
||||
/// Get an object describing the file this source code comes from
|
||||
#[must_use]
|
||||
fn code_info(&self) -> SourceCode;
|
||||
/// Get the list of all lexer plugins
|
||||
#[must_use]
|
||||
fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin>;
|
||||
/// Get the list of all parser plugins
|
||||
#[must_use]
|
||||
fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin>;
|
||||
/// Error reporter
|
||||
#[must_use]
|
||||
fn reporter(&self) -> &Reporter;
|
||||
/// Find our position in the text given the text we've yet to parse
|
||||
#[must_use]
|
||||
fn pos(&self, tail: &str) -> usize {
|
||||
let tail_len = tail.len();
|
||||
let source_len = self.source().len();
|
||||
(self.source().len().checked_sub(tail.len())).unwrap_or_else(|| {
|
||||
panic!("tail.len()={tail_len} greater than self.source().len()={source_len}; tail={tail:?}")
|
||||
})
|
||||
}
|
||||
/// Generate a location given the length of a token and the unparsed text
|
||||
/// after it. See also [ParseCtx::range_loc] if the maths gets complex.
|
||||
#[must_use]
|
||||
fn range(&self, len: usize, tl: &str) -> Range<usize> {
|
||||
match self.pos(tl).checked_sub(len) {
|
||||
Some(start) => start..self.pos(tl),
|
||||
None => {
|
||||
panic!("len={len} greater than tail.len()={}; tail={tl:?}", tl.len())
|
||||
},
|
||||
}
|
||||
}
|
||||
/// Create a contextful location for error reporting
|
||||
#[must_use]
|
||||
fn source_range(&self, len: usize, tl: &str) -> SourceRange {
|
||||
self.range_loc(&self.range(len, tl))
|
||||
}
|
||||
/// Create a contentful location from a range directly.
|
||||
#[must_use]
|
||||
fn range_loc(&self, range: &Range<usize>) -> SourceRange {
|
||||
SourceRange { code: self.code_info(), range: range.clone() }
|
||||
}
|
||||
/// Get a reference to the full source text. This should not be used for
|
||||
/// position math.
|
||||
#[must_use]
|
||||
fn source(&self) -> Arc<String> { self.code_info().text.clone() }
|
||||
}
|
||||
|
||||
impl<'a, C: ParseCtx + 'a + ?Sized> ParseCtx for &'a C {
|
||||
fn reporter(&self) -> &Reporter { (*self).reporter() }
|
||||
fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { (*self).lexers() }
|
||||
fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> { (*self).line_parsers() }
|
||||
fn pos(&self, tail: &str) -> usize { (*self).pos(tail) }
|
||||
fn code_info(&self) -> SourceCode { (*self).code_info() }
|
||||
fn source(&self) -> Arc<String> { (*self).source() }
|
||||
fn range(&self, l: usize, t: &str) -> Range<usize> { (*self).range(l, t) }
|
||||
}
|
||||
|
||||
/// Struct implementing context
|
||||
#[derive(Clone)]
|
||||
pub struct ParseCtxImpl<'a, 'b> {
|
||||
/// File to be parsed; where it belongs in the tree and its text
|
||||
pub code: SourceCode,
|
||||
/// Error aggregator
|
||||
pub reporter: &'b Reporter,
|
||||
/// Lexer plugins for parsing custom literals
|
||||
pub lexers: Sequence<'a, &'a (dyn LexerPlugin + 'a)>,
|
||||
/// Parser plugins for parsing custom line structures
|
||||
pub line_parsers: Sequence<'a, &'a dyn ParseLinePlugin>,
|
||||
}
|
||||
impl<'a, 'b> ParseCtx for ParseCtxImpl<'a, 'b> {
|
||||
fn reporter(&self) -> &Reporter { self.reporter }
|
||||
// Rust doesn't realize that this lifetime is covariant
|
||||
#[allow(clippy::map_identity)]
|
||||
fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { Box::new(self.lexers.iter().map(|r| r)) }
|
||||
#[allow(clippy::map_identity)]
|
||||
fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> {
|
||||
Box::new(self.line_parsers.iter().map(|r| r))
|
||||
}
|
||||
fn code_info(&self) -> SourceCode { self.code.clone() }
|
||||
}
|
||||
|
||||
/// Context instance for testing. Implicitly provides a reporter and panics if
|
||||
/// any errors are reported
|
||||
pub struct MockContext(pub Reporter);
|
||||
impl MockContext {
|
||||
/// Create a new mock
|
||||
pub fn new() -> Self { Self(Reporter::new()) }
|
||||
}
|
||||
impl Default for MockContext {
|
||||
fn default() -> Self { Self::new() }
|
||||
}
|
||||
impl ParseCtx for MockContext {
|
||||
fn reporter(&self) -> &Reporter { &self.0 }
|
||||
fn pos(&self, tail: &str) -> usize { usize::MAX / 2 - tail.len() }
|
||||
// these are expendable
|
||||
fn code_info(&self) -> SourceCode { SourceRange::mock().code() }
|
||||
fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { box_empty() }
|
||||
fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> { box_empty() }
|
||||
}
|
||||
impl Drop for MockContext {
|
||||
fn drop(&mut self) { self.0.assert() }
|
||||
}
|
||||
|
||||
/// Context that assigns the same location to every subset of the source code.
|
||||
/// Its main use case is to process source code that was dynamically generated
|
||||
/// in response to some user code. See also [ReporterContext]
|
||||
pub struct FlatLocContext<'a, C: ParseCtx + ?Sized> {
|
||||
sub: &'a C,
|
||||
range: &'a SourceRange,
|
||||
}
|
||||
impl<'a, C: ParseCtx + ?Sized> FlatLocContext<'a, C> {
|
||||
/// Create a new context that will use the same provided range for every
|
||||
/// parsed token
|
||||
pub fn new(sub: &'a C, range: &'a SourceRange) -> Self { Self { sub, range } }
|
||||
}
|
||||
impl<'a, C: ParseCtx + ?Sized> ParseCtx for FlatLocContext<'a, C> {
|
||||
fn reporter(&self) -> &Reporter { self.sub.reporter() }
|
||||
fn pos(&self, _: &str) -> usize { 0 }
|
||||
fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { self.sub.lexers() }
|
||||
fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> { self.sub.line_parsers() }
|
||||
fn code_info(&self) -> SourceCode { self.range.code() }
|
||||
fn range(&self, _: usize, _: &str) -> Range<usize> { self.range.range() }
|
||||
}
|
||||
|
||||
/// Context that forwards everything to a wrapped context except for error
|
||||
/// reporting. See also [FlatLocContext]
|
||||
pub struct ReporterContext<'a, C: ParseCtx + ?Sized> {
|
||||
sub: &'a C,
|
||||
reporter: &'a Reporter,
|
||||
}
|
||||
impl<'a, C: ParseCtx + ?Sized> ReporterContext<'a, C> {
|
||||
/// Create a new context that will collect errors separately and forward
|
||||
/// everything else to an enclosed context
|
||||
pub fn new(sub: &'a C, reporter: &'a Reporter) -> Self { Self { sub, reporter } }
|
||||
}
|
||||
impl<'a, C: ParseCtx + ?Sized> ParseCtx for ReporterContext<'a, C> {
|
||||
fn reporter(&self) -> &Reporter { self.reporter }
|
||||
fn pos(&self, tail: &str) -> usize { self.sub.pos(tail) }
|
||||
fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { self.sub.lexers() }
|
||||
fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> { self.sub.line_parsers() }
|
||||
fn code_info(&self) -> SourceCode { self.sub.code_info() }
|
||||
fn range(&self, len: usize, tl: &str) -> Range<usize> { self.sub.range(len, tl) }
|
||||
fn range_loc(&self, range: &Range<usize>) -> SourceRange { self.sub.range_loc(range) }
|
||||
fn source(&self) -> Arc<String> { self.sub.source() }
|
||||
fn source_range(&self, len: usize, tl: &str) -> SourceRange { self.sub.source_range(len, tl) }
|
||||
}
|
||||
215
orchidlang/src/parse/errors.rs
Normal file
215
orchidlang/src/parse/errors.rs
Normal file
@@ -0,0 +1,215 @@
|
||||
//! Errors produced by the parser. Plugins are encouraged to reuse these where
|
||||
//! applicable.
|
||||
|
||||
use intern_all::Tok;
|
||||
use itertools::Itertools;
|
||||
|
||||
use super::context::ParseCtx;
|
||||
use super::frag::Frag;
|
||||
use super::lexer::{Entry, Lexeme};
|
||||
use crate::error::{ProjectError, ProjectErrorObj, ProjectResult};
|
||||
use crate::location::{CodeOrigin, SourceRange};
|
||||
use crate::parse::parsed::PType;
|
||||
|
||||
/// Parse error information without a location. Location data is added by the
|
||||
/// parser.
|
||||
pub trait ParseErrorKind: Sized + Send + Sync + 'static {
|
||||
/// A general description of the error condition
|
||||
const DESCRIPTION: &'static str;
|
||||
/// A specific description of the error with concrete text sections
|
||||
fn message(&self) -> String { Self::DESCRIPTION.to_string() }
|
||||
/// Convert this error to a type-erased [ProjectError] to be handled together
|
||||
/// with other Orchid errors.
|
||||
fn pack(self, range: SourceRange) -> ProjectErrorObj { ParseError { kind: self, range }.pack() }
|
||||
}
|
||||
|
||||
struct ParseError<T> {
|
||||
pub range: SourceRange,
|
||||
pub kind: T,
|
||||
}
|
||||
impl<T: ParseErrorKind> ProjectError for ParseError<T> {
|
||||
const DESCRIPTION: &'static str = T::DESCRIPTION;
|
||||
fn one_position(&self) -> CodeOrigin { self.range.origin() }
|
||||
fn message(&self) -> String { self.kind.message() }
|
||||
}
|
||||
|
||||
/// A line does not begin with an identifying keyword. Raised on the first token
|
||||
pub(super) struct LineNeedsPrefix(pub Lexeme);
|
||||
impl ParseErrorKind for LineNeedsPrefix {
|
||||
const DESCRIPTION: &'static str = "This linetype requires a prefix";
|
||||
fn message(&self) -> String { format!("{} cannot appear at the beginning of a line", self.0) }
|
||||
}
|
||||
|
||||
/// The line ends abruptly. Raised on the last token
|
||||
pub(super) struct UnexpectedEOL(pub Lexeme);
|
||||
impl ParseErrorKind for UnexpectedEOL {
|
||||
const DESCRIPTION: &'static str = "The line ended abruptly";
|
||||
fn message(&self) -> String {
|
||||
"In Orchid, all line breaks outside parentheses start a new declaration".to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// The line should have ended. Raised on last valid or first excess token
|
||||
pub(super) struct ExpectedEOL;
|
||||
impl ParseErrorKind for ExpectedEOL {
|
||||
const DESCRIPTION: &'static str = "Expected the end of the line";
|
||||
}
|
||||
|
||||
/// A name was expected.
|
||||
pub(super) struct ExpectedName(pub Lexeme);
|
||||
impl ParseErrorKind for ExpectedName {
|
||||
const DESCRIPTION: &'static str = "A name was expected";
|
||||
fn message(&self) -> String { format!("Expected a name, found {}", self.0) }
|
||||
}
|
||||
|
||||
/// Unwrap a name or operator.
|
||||
pub(super) fn expect_name(
|
||||
Entry { lexeme, range }: &Entry,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
) -> ProjectResult<Tok<String>> {
|
||||
match lexeme {
|
||||
Lexeme::Name(n) => Ok(n.clone()),
|
||||
lex => Err(ExpectedName(lex.clone()).pack(ctx.range_loc(range))),
|
||||
}
|
||||
}
|
||||
|
||||
/// A specific lexeme was expected
|
||||
pub(super) struct Expected {
|
||||
/// The lexemes that would have been acceptable
|
||||
pub expected: Vec<Lexeme>,
|
||||
/// Whether a name would also have been acceptable (multiname)
|
||||
pub or_name: bool,
|
||||
/// What was actually found
|
||||
pub found: Lexeme,
|
||||
}
|
||||
impl ParseErrorKind for Expected {
|
||||
const DESCRIPTION: &'static str = "A concrete token was expected";
|
||||
fn message(&self) -> String {
|
||||
let list = match &self.expected[..] {
|
||||
&[] => return "Unsatisfiable expectation".to_string(),
|
||||
[only] => only.to_string(),
|
||||
[a, b] => format!("either {a} or {b}"),
|
||||
[variants @ .., last] => {
|
||||
format!("any of {} or {last}", variants.iter().join(", "))
|
||||
},
|
||||
};
|
||||
let or_name = if self.or_name { " or a name" } else { "" };
|
||||
format!("Expected {list}{or_name} but found {}", self.found)
|
||||
}
|
||||
}
|
||||
/// Assert that the entry contains exactly the specified lexeme
|
||||
pub(super) fn expect(l: Lexeme, e: &Entry, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<()> {
|
||||
if e.lexeme.strict_eq(&l) {
|
||||
return Ok(());
|
||||
}
|
||||
let found = e.lexeme.clone();
|
||||
let kind = Expected { expected: vec![l], or_name: false, found };
|
||||
Err(kind.pack(ctx.range_loc(&e.range)))
|
||||
}
|
||||
|
||||
/// A token reserved for future use was found in the code
|
||||
pub(super) struct ReservedToken(pub Lexeme);
|
||||
impl ParseErrorKind for ReservedToken {
|
||||
const DESCRIPTION: &'static str = "Syntax reserved for future use";
|
||||
fn message(&self) -> String { format!("{} is a reserved token", self.0) }
|
||||
}
|
||||
|
||||
/// A token was found where it doesn't belong
|
||||
pub(super) struct BadTokenInRegion {
|
||||
/// What was found
|
||||
pub lexeme: Lexeme,
|
||||
/// Human-readable name of the region where it should not appear
|
||||
pub region: &'static str,
|
||||
}
|
||||
impl ParseErrorKind for BadTokenInRegion {
|
||||
const DESCRIPTION: &'static str = "An unexpected token was found";
|
||||
fn message(&self) -> String { format!("{} cannot appear in {}", self.lexeme, self.region) }
|
||||
}
|
||||
|
||||
/// Some construct was searched but not found.
|
||||
pub(super) struct NotFound(pub &'static str);
|
||||
impl ParseErrorKind for NotFound {
|
||||
const DESCRIPTION: &'static str = "A specific lexeme was expected";
|
||||
fn message(&self) -> String { format!("{} was expected", self.0) }
|
||||
}
|
||||
|
||||
/// :: found on its own somewhere other than a general export
|
||||
pub(super) struct LeadingNS;
|
||||
impl ParseErrorKind for LeadingNS {
|
||||
const DESCRIPTION: &'static str = ":: can only follow a name token";
|
||||
}
|
||||
|
||||
/// Parens don't pair up
|
||||
pub(super) struct MisalignedParen(pub Lexeme);
|
||||
impl ParseErrorKind for MisalignedParen {
|
||||
const DESCRIPTION: &'static str = "(), [] and {} must always pair up";
|
||||
fn message(&self) -> String { format!("This {} has no pair", self.0) }
|
||||
}
|
||||
|
||||
/// Export line contains a complex name
|
||||
pub(super) struct NamespacedExport;
|
||||
impl ParseErrorKind for NamespacedExport {
|
||||
const DESCRIPTION: &'static str = "Only local names may be exported";
|
||||
}
|
||||
|
||||
/// Export line contains *
|
||||
pub(super) struct GlobExport;
|
||||
impl ParseErrorKind for GlobExport {
|
||||
const DESCRIPTION: &'static str = "Globstars are not allowed in exports";
|
||||
}
|
||||
|
||||
/// Comment never ends
|
||||
pub(super) struct NoCommentEnd;
|
||||
impl ParseErrorKind for NoCommentEnd {
|
||||
const DESCRIPTION: &'static str = "a comment was not closed with `]--`";
|
||||
}
|
||||
|
||||
/// A placeholder's priority is a floating point number
|
||||
pub(super) struct FloatPlacehPrio;
|
||||
impl ParseErrorKind for FloatPlacehPrio {
|
||||
const DESCRIPTION: &'static str =
|
||||
"a placeholder priority has a decimal point or a negative exponent";
|
||||
}
|
||||
|
||||
/// A number literal decodes to NaN
|
||||
pub(super) struct NaNLiteral;
|
||||
impl ParseErrorKind for NaNLiteral {
|
||||
const DESCRIPTION: &'static str = "float literal decoded to NaN";
|
||||
}
|
||||
|
||||
/// A sequence of digits in a number literal overflows [usize].
|
||||
pub(super) struct LiteralOverflow;
|
||||
impl ParseErrorKind for LiteralOverflow {
|
||||
const DESCRIPTION: &'static str = "number literal described number greater than usize::MAX";
|
||||
}
|
||||
|
||||
/// A digit was expected but something else was found
|
||||
pub(super) struct ExpectedDigit;
|
||||
impl ParseErrorKind for ExpectedDigit {
|
||||
const DESCRIPTION: &'static str = "expected a digit";
|
||||
}
|
||||
|
||||
/// Expected a parenthesized block at the end of the line
|
||||
pub(super) struct ExpectedBlock;
|
||||
impl ParseErrorKind for ExpectedBlock {
|
||||
const DESCRIPTION: &'static str = "Expected a parenthesized block";
|
||||
}
|
||||
/// Remove two parentheses from the ends of the cursor
|
||||
pub(super) fn expect_block<'a>(
|
||||
tail: Frag<'a>,
|
||||
typ: PType,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
) -> ProjectResult<Frag<'a>> {
|
||||
let (lp, tail) = tail.trim().pop(ctx)?;
|
||||
expect(Lexeme::LP(typ), lp, ctx)?;
|
||||
let (rp, tail) = tail.pop_back(ctx)?;
|
||||
expect(Lexeme::RP(typ), rp, ctx)?;
|
||||
Ok(tail.trim())
|
||||
}
|
||||
|
||||
/// A namespaced name was expected but a glob pattern or a branching multiname
|
||||
/// was found.
|
||||
pub(super) struct ExpectedSingleName;
|
||||
impl ParseErrorKind for ExpectedSingleName {
|
||||
const DESCRIPTION: &'static str = "expected a single name, no wildcards, no branches";
|
||||
}
|
||||
42
orchidlang/src/parse/facade.rs
Normal file
42
orchidlang/src/parse/facade.rs
Normal file
@@ -0,0 +1,42 @@
|
||||
//! Entrypoints to the parser that combine lexing and parsing
|
||||
|
||||
use never::Never;
|
||||
|
||||
use super::context::{FlatLocContext, ParseCtx, ReporterContext};
|
||||
use super::frag::Frag;
|
||||
use super::lexer::lex;
|
||||
use super::sourcefile::parse_module_body;
|
||||
use crate::error::Reporter;
|
||||
use crate::location::SourceRange;
|
||||
use crate::parse::parsed::SourceLine;
|
||||
use crate::parse::sourcefile::{parse_line, split_lines};
|
||||
|
||||
/// Parse a file
|
||||
pub fn parse_file(ctx: &impl ParseCtx) -> Vec<SourceLine> {
|
||||
let tokens = lex(vec![], ctx.source().as_str(), ctx, |_| Ok::<_, Never>(false))
|
||||
.unwrap_or_else(|e| match e {})
|
||||
.tokens;
|
||||
if tokens.is_empty() { Vec::new() } else { parse_module_body(Frag::from_slice(&tokens), ctx) }
|
||||
}
|
||||
|
||||
/// Parse a statically defined line sequence
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// On any parse error, which is why it only accepts a string literal
|
||||
pub fn parse_entries(
|
||||
ctx: &dyn ParseCtx,
|
||||
text: &'static str,
|
||||
range: SourceRange,
|
||||
) -> Vec<SourceLine> {
|
||||
let reporter = Reporter::new();
|
||||
let flctx = FlatLocContext::new(ctx, &range);
|
||||
let ctx = ReporterContext::new(&flctx, &reporter);
|
||||
let res = lex(vec![], text, &ctx, |_| Ok::<_, Never>(false)).unwrap_or_else(|e| match e {});
|
||||
let out = split_lines(Frag::from_slice(&res.tokens), &ctx)
|
||||
.flat_map(|tokens| parse_line(tokens, &ctx).expect("pre-specified source"))
|
||||
.map(|kind| kind.wrap(range.clone()))
|
||||
.collect();
|
||||
reporter.assert();
|
||||
out
|
||||
}
|
||||
133
orchidlang/src/parse/frag.rs
Normal file
133
orchidlang/src/parse/frag.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
//! The [Frag] is the main input datastructure of parsers. Beyond the slice of
|
||||
//! tokens, it contains a fallback value that can be used for error reporting if
|
||||
//! the fragment is empty.
|
||||
|
||||
use std::ops::Range;
|
||||
|
||||
use super::context::ParseCtx;
|
||||
use super::errors::{ExpectedEOL, NotFound, ParseErrorKind, UnexpectedEOL};
|
||||
use super::lexer::{Entry, Lexeme};
|
||||
use crate::error::ProjectResult;
|
||||
|
||||
/// Represents a slice which may or may not contain items, and a fallback entry
|
||||
/// used for error reporting whenever the errant fragment is empty.
|
||||
#[must_use = "fragment of code should not be discarded implicitly"]
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Frag<'a> {
|
||||
/// Entry to place in errors if the fragment contains no tokens
|
||||
pub fallback: &'a Entry,
|
||||
/// Tokens to parse
|
||||
pub data: &'a [Entry],
|
||||
}
|
||||
impl<'a> Frag<'a> {
|
||||
/// Create a new fragment
|
||||
pub fn new(fallback: &'a Entry, data: &'a [Entry]) -> Self { Self { fallback, data } }
|
||||
|
||||
/// Remove comments and line breaks from both ends of the text
|
||||
pub fn trim(self) -> Self {
|
||||
let Self { data, fallback } = self;
|
||||
let front = data.iter().take_while(|e| e.is_filler()).count();
|
||||
let (_, right) = data.split_at(front);
|
||||
let back = right.iter().rev().take_while(|e| e.is_filler()).count();
|
||||
let (data, _) = right.split_at(right.len() - back);
|
||||
Self { fallback, data }
|
||||
}
|
||||
|
||||
/// Discard the first entry
|
||||
pub fn step(self, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<Self> {
|
||||
let Self { data, fallback: Entry { lexeme, range } } = self;
|
||||
match data.split_first() {
|
||||
Some((fallback, data)) => Ok(Frag { data, fallback }),
|
||||
None => Err(UnexpectedEOL(lexeme.clone()).pack(ctx.range_loc(range))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the first entry
|
||||
pub fn pop(self, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<(&'a Entry, Self)> {
|
||||
Ok((self.get(0, ctx)?, self.step(ctx)?))
|
||||
}
|
||||
|
||||
/// Retrieve an index from a slice or raise an error if it isn't found.
|
||||
pub fn get(self, idx: usize, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<&'a Entry> {
|
||||
self.data.get(idx).ok_or_else(|| {
|
||||
let entry = self.data.last().unwrap_or(self.fallback).clone();
|
||||
UnexpectedEOL(entry.lexeme).pack(ctx.range_loc(&entry.range))
|
||||
})
|
||||
}
|
||||
|
||||
/// Area covered by this fragment
|
||||
#[must_use]
|
||||
pub fn range(self) -> Range<usize> {
|
||||
self.data.first().map_or_else(
|
||||
|| self.fallback.range.clone(),
|
||||
|f| f.range.start..self.data.last().unwrap().range.end,
|
||||
)
|
||||
}
|
||||
|
||||
/// Find a given token, split the fragment there and read some value from the
|
||||
/// separator. See also [Frag::find]
|
||||
pub fn find_map<T>(
|
||||
self,
|
||||
msg: &'static str,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
mut f: impl FnMut(&'a Lexeme) -> Option<T>,
|
||||
) -> ProjectResult<(Self, T, Self)> {
|
||||
let Self { data, fallback } = self;
|
||||
let (dot_idx, output) = skip_parenthesized(data.iter())
|
||||
.find_map(|(i, e)| f(&e.lexeme).map(|t| (i, t)))
|
||||
.ok_or_else(|| NotFound(msg).pack(ctx.range_loc(&self.range())))?;
|
||||
let (left, not_left) = data.split_at(dot_idx);
|
||||
let (middle_ent, right) = not_left.split_first().unwrap();
|
||||
Ok((Self::new(fallback, left), output, Self::new(middle_ent, right)))
|
||||
}
|
||||
|
||||
/// Split the fragment at a token and return just the two sides.
|
||||
/// See also [Frag::find_map].
|
||||
pub fn find(
|
||||
self,
|
||||
descr: &'static str,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
mut f: impl FnMut(&Lexeme) -> bool,
|
||||
) -> ProjectResult<(Self, Self)> {
|
||||
let (l, _, r) = self.find_map(descr, ctx, |l| Some(l).filter(|l| f(l)))?;
|
||||
Ok((l, r))
|
||||
}
|
||||
|
||||
/// Remove the last item from the fragment
|
||||
pub fn pop_back(self, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<(&'a Entry, Self)> {
|
||||
let Self { data, fallback } = self;
|
||||
let (last, data) = (data.split_last())
|
||||
.ok_or_else(|| UnexpectedEOL(fallback.lexeme.clone()).pack(ctx.range_loc(&fallback.range)))?;
|
||||
Ok((last, Self { fallback, data }))
|
||||
}
|
||||
|
||||
/// # Panics
|
||||
///
|
||||
/// If the slice is empty
|
||||
pub fn from_slice(data: &'a [Entry]) -> Self {
|
||||
let fallback = (data.first()).expect("Empty slice cannot be converted into a parseable");
|
||||
Self { data, fallback }
|
||||
}
|
||||
|
||||
/// Assert that the fragment is empty.
|
||||
pub fn expect_empty(self, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<()> {
|
||||
match self.data.first() {
|
||||
Some(x) => Err(ExpectedEOL.pack(ctx.range_loc(&x.range))),
|
||||
None => Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_parenthesized<'a>(
|
||||
it: impl Iterator<Item = &'a Entry>,
|
||||
) -> impl Iterator<Item = (usize, &'a Entry)> {
|
||||
let mut paren_lvl = 1;
|
||||
it.enumerate().filter(move |(_, e)| {
|
||||
match e.lexeme {
|
||||
Lexeme::LP(_) => paren_lvl += 1,
|
||||
Lexeme::RP(_) => paren_lvl -= 1,
|
||||
_ => (),
|
||||
}
|
||||
paren_lvl <= 1
|
||||
})
|
||||
}
|
||||
65
orchidlang/src/parse/lex_plugin.rs
Normal file
65
orchidlang/src/parse/lex_plugin.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
//! Abstractions for dynamic extensions to the lexer to parse custom literals
|
||||
|
||||
use dyn_clone::DynClone;
|
||||
use never::Never;
|
||||
|
||||
use super::context::{FlatLocContext, ParseCtx};
|
||||
use super::lexer::{lex, Entry, LexRes};
|
||||
use crate::error::ProjectResult;
|
||||
use crate::location::SourceRange;
|
||||
|
||||
/// Data passed to the recursive sub-lexer
|
||||
pub struct LexPluginRecur<'a, 'b> {
|
||||
/// Text to tokenize
|
||||
pub tail: &'a str,
|
||||
/// Callback that will be called between lexemes on the leftover text.
|
||||
/// When it returns true, the lexer exits and leaves the remaining text for
|
||||
/// you.
|
||||
pub exit: &'b mut dyn for<'c> FnMut(&'c str) -> ProjectResult<bool>,
|
||||
}
|
||||
|
||||
/// Data and actions available to a lexer plugin
|
||||
pub trait LexPluginReq<'a> {
|
||||
/// Text to tokenize
|
||||
fn tail(&self) -> &'a str;
|
||||
/// [ParseCtx] instance for calculating locations and such
|
||||
fn ctx(&self) -> &dyn ParseCtx;
|
||||
/// Start a child lexer that calls back between lexemes and exits on your
|
||||
/// command. You can combine this with custom atoms to create holes for
|
||||
/// expressions in your literals like the template strings of most languages
|
||||
/// other than Rust.
|
||||
fn recurse(&self, req: LexPluginRecur<'a, '_>) -> ProjectResult<LexRes<'a>>;
|
||||
/// Lex an inserted piece of text, especially when translating custom syntax
|
||||
/// into multiple lexemes.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// If tokenization fails
|
||||
fn insert(&self, data: &str, range: SourceRange) -> Vec<Entry>;
|
||||
}
|
||||
|
||||
/// External plugin that parses a literal into recognized Orchid lexemes, most
|
||||
/// likely atoms.
|
||||
pub trait LexerPlugin: Send + Sync + DynClone {
|
||||
/// Run the lexer
|
||||
fn lex<'a>(&self, req: &'_ dyn LexPluginReq<'a>) -> Option<ProjectResult<LexRes<'a>>>;
|
||||
}
|
||||
|
||||
/// Implementation of [LexPluginReq]
|
||||
pub struct LexPlugReqImpl<'a, 'b, TCtx: ParseCtx> {
|
||||
/// Text to be lexed
|
||||
pub tail: &'a str,
|
||||
/// Context data
|
||||
pub ctx: &'b TCtx,
|
||||
}
|
||||
impl<'a, 'b, TCtx: ParseCtx> LexPluginReq<'a> for LexPlugReqImpl<'a, 'b, TCtx> {
|
||||
fn tail(&self) -> &'a str { self.tail }
|
||||
fn ctx(&self) -> &dyn ParseCtx { self.ctx }
|
||||
fn recurse(&self, req: LexPluginRecur<'a, '_>) -> ProjectResult<LexRes<'a>> {
|
||||
lex(Vec::new(), req.tail, self.ctx, |s| (req.exit)(s))
|
||||
}
|
||||
fn insert(&self, data: &str, range: SourceRange) -> Vec<Entry> {
|
||||
let ctx = FlatLocContext::new(self.ctx as &dyn ParseCtx, &range);
|
||||
lex(Vec::new(), data, &ctx, |_| Ok::<_, Never>(false)).unwrap_or_else(|e| match e {}).tokens
|
||||
}
|
||||
}
|
||||
318
orchidlang/src/parse/lexer.rs
Normal file
318
orchidlang/src/parse/lexer.rs
Normal file
@@ -0,0 +1,318 @@
|
||||
//! Convert source text into a sequence of tokens. Newlines and comments are
|
||||
//! included, but spacing is converted into numerical ranges on the elements.
|
||||
//!
|
||||
//! Literals lose their syntax form here and are handled in an abstract
|
||||
//! representation hence
|
||||
|
||||
use std::fmt;
|
||||
use std::ops::Range;
|
||||
use std::sync::Arc;
|
||||
|
||||
use intern_all::{i, Tok};
|
||||
use itertools::Itertools;
|
||||
use ordered_float::NotNan;
|
||||
|
||||
use super::context::ParseCtx;
|
||||
use super::errors::{FloatPlacehPrio, NoCommentEnd};
|
||||
use super::lex_plugin::LexerPlugin;
|
||||
use super::numeric::{numstart, parse_num, print_nat16};
|
||||
use crate::foreign::atom::AtomGenerator;
|
||||
use crate::libs::std::number::Numeric;
|
||||
use crate::parse::errors::ParseErrorKind;
|
||||
use crate::parse::lex_plugin::LexPlugReqImpl;
|
||||
use crate::parse::numeric::{numchar, NumericLexer};
|
||||
use crate::parse::parsed::{PHClass, PType, Placeholder};
|
||||
|
||||
/// A lexeme and the location where it was found
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Entry {
|
||||
/// the lexeme
|
||||
pub lexeme: Lexeme,
|
||||
/// the range in bytes
|
||||
pub range: Range<usize>,
|
||||
}
|
||||
impl Entry {
|
||||
/// Checks if the lexeme is a comment or line break
|
||||
#[must_use]
|
||||
pub fn is_filler(&self) -> bool { matches!(self.lexeme, Lexeme::Comment(_) | Lexeme::BR) }
|
||||
|
||||
/// Create a new entry
|
||||
#[must_use]
|
||||
pub fn new(range: Range<usize>, lexeme: Lexeme) -> Self { Self { lexeme, range } }
|
||||
}
|
||||
|
||||
impl fmt::Display for Entry {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.lexeme.fmt(f) }
|
||||
}
|
||||
|
||||
impl PartialEq<Lexeme> for Entry {
|
||||
fn eq(&self, other: &Lexeme) -> bool { self.lexeme == *other }
|
||||
}
|
||||
|
||||
/// A unit of syntax
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub enum Lexeme {
|
||||
/// Atoms parsed by plugins
|
||||
Atom(AtomGenerator),
|
||||
/// Keyword or name
|
||||
Name(Tok<String>),
|
||||
/// Macro operator `=`number`=>`
|
||||
Arrow(NotNan<f64>),
|
||||
/// `:=`
|
||||
Walrus,
|
||||
/// Line break
|
||||
BR,
|
||||
/// `::`
|
||||
NS,
|
||||
/// Left paren `([{`
|
||||
LP(PType),
|
||||
/// Right paren `)]}`
|
||||
RP(PType),
|
||||
/// `\`
|
||||
BS,
|
||||
/// `@``
|
||||
At,
|
||||
/// `:`
|
||||
Type,
|
||||
/// comment
|
||||
Comment(Arc<String>),
|
||||
/// placeholder in a macro.
|
||||
Placeh(Placeholder),
|
||||
}
|
||||
|
||||
impl fmt::Display for Lexeme {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Atom(a) => write!(f, "{a:?}"),
|
||||
Self::Name(token) => write!(f, "{}", **token),
|
||||
Self::Walrus => write!(f, ":="),
|
||||
Self::Arrow(prio) => write!(f, "={}=>", print_nat16(*prio)),
|
||||
Self::NS => write!(f, "::"),
|
||||
Self::LP(t) => write!(f, "{}", t.l()),
|
||||
Self::RP(t) => write!(f, "{}", t.r()),
|
||||
Self::BR => writeln!(f),
|
||||
Self::BS => write!(f, "\\"),
|
||||
Self::At => write!(f, "@"),
|
||||
Self::Type => write!(f, ":"),
|
||||
Self::Comment(text) => write!(f, "--[{}]--", text),
|
||||
Self::Placeh(ph) => write!(f, "{ph}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Lexeme {
|
||||
/// Compare lexemes for equality. It's `strict` because for atoms it uses the
|
||||
/// strict equality comparison
|
||||
pub fn strict_eq(&self, other: &Self) -> bool {
|
||||
match (self, other) {
|
||||
(Self::Arrow(f1), Self::Arrow(f2)) => f1 == f2,
|
||||
(Self::At, Self::At) | (Self::BR, Self::BR) => true,
|
||||
(Self::BS, Self::BS) => true,
|
||||
(Self::NS, Self::NS) | (Self::Type, Self::Type) => true,
|
||||
(Self::Walrus, Self::Walrus) => true,
|
||||
(Self::Atom(a1), Self::Atom(a2)) => a1.run().0.parser_eq(&*a2.run().0),
|
||||
(Self::Comment(c1), Self::Comment(c2)) => c1 == c2,
|
||||
(Self::LP(p1), Self::LP(p2)) | (Self::RP(p1), Self::RP(p2)) => p1 == p2,
|
||||
(Self::Name(n1), Self::Name(n2)) => n1 == n2,
|
||||
(Self::Placeh(ph1), Self::Placeh(ph2)) => ph1 == ph2,
|
||||
(..) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Data returned from the lexer
|
||||
pub struct LexRes<'a> {
|
||||
/// Leftover text. If the bail callback never returned true, this is empty
|
||||
pub tail: &'a str,
|
||||
/// Lexemes extracted from the text
|
||||
pub tokens: Vec<Entry>,
|
||||
}
|
||||
|
||||
/// Neatly format source code
|
||||
#[allow(unused)]
|
||||
pub fn format(lexed: &[Entry]) -> String { lexed.iter().join(" ") }
|
||||
|
||||
/// Character filter that can appear in a keyword or name
|
||||
pub fn namechar(c: char) -> bool { c.is_alphanumeric() | (c == '_') }
|
||||
/// Character filter that can start a name
|
||||
pub fn namestart(c: char) -> bool { c.is_alphabetic() | (c == '_') }
|
||||
/// Character filter that can appear in operators.
|
||||
pub fn opchar(c: char) -> bool {
|
||||
!namestart(c) && !numstart(c) && !c.is_whitespace() && !"()[]{},'\"\\".contains(c)
|
||||
}
|
||||
|
||||
/// Split off all characters from the beginning that match a filter
|
||||
pub fn split_filter(s: &str, mut pred: impl FnMut(char) -> bool) -> (&str, &str) {
|
||||
s.find(|c| !pred(c)).map_or((s, ""), |i| s.split_at(i))
|
||||
}
|
||||
|
||||
fn lit_table() -> impl IntoIterator<Item = (&'static str, Lexeme)> {
|
||||
[
|
||||
("\\", Lexeme::BS),
|
||||
("@", Lexeme::At),
|
||||
("(", Lexeme::LP(PType::Par)),
|
||||
("[", Lexeme::LP(PType::Sqr)),
|
||||
("{", Lexeme::LP(PType::Curl)),
|
||||
(")", Lexeme::RP(PType::Par)),
|
||||
("]", Lexeme::RP(PType::Sqr)),
|
||||
("}", Lexeme::RP(PType::Curl)),
|
||||
("\n", Lexeme::BR),
|
||||
(":=", Lexeme::Walrus),
|
||||
("::", Lexeme::NS),
|
||||
(":", Lexeme::Type),
|
||||
]
|
||||
}
|
||||
|
||||
static BUILTIN_ATOMS: &[&dyn LexerPlugin] = &[&NumericLexer];
|
||||
|
||||
/// Convert source code to a flat list of tokens. The bail callback will be
|
||||
/// called between lexemes. When it returns true, the remaining text is
|
||||
/// returned without processing.
|
||||
pub fn lex<'a, E>(
|
||||
mut tokens: Vec<Entry>,
|
||||
mut data: &'a str,
|
||||
ctx: &'_ impl ParseCtx,
|
||||
mut bail: impl FnMut(&str) -> Result<bool, E>,
|
||||
) -> Result<LexRes<'a>, E> {
|
||||
let mut prev_len = data.len() + 1;
|
||||
'tail: loop {
|
||||
if prev_len == data.len() {
|
||||
panic!("got stuck at {data:?}, parsed {:?}", tokens.last().unwrap());
|
||||
}
|
||||
prev_len = data.len();
|
||||
data = data.trim_start_matches(|c: char| c.is_whitespace() && c != '\n');
|
||||
if bail(data)? {
|
||||
return Ok(LexRes { tokens, tail: data });
|
||||
}
|
||||
let mut chars = data.chars();
|
||||
let head = match chars.next() {
|
||||
None => return Ok(LexRes { tokens, tail: data }),
|
||||
Some(h) => h,
|
||||
};
|
||||
for lexer in ctx.lexers().chain(BUILTIN_ATOMS.iter().copied()) {
|
||||
let req = LexPlugReqImpl { tail: data, ctx };
|
||||
if let Some(res) = lexer.lex(&req) {
|
||||
let LexRes { tail, tokens: mut new_tokens } =
|
||||
ctx.reporter().fallback(res, |_| LexRes { tail: "", tokens: vec![] });
|
||||
// fallback: no tokens left, no additional tokens parsed
|
||||
if tail.len() == data.len() {
|
||||
panic!("lexer plugin consumed 0 characters")
|
||||
}
|
||||
tokens.append(&mut new_tokens);
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
for (prefix, lexeme) in lit_table() {
|
||||
if let Some(tail) = data.strip_prefix(prefix) {
|
||||
tokens.push(Entry::new(ctx.range(prefix.len(), tail), lexeme.clone()));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(tail) = data.strip_prefix(',') {
|
||||
tokens.push(Entry::new(ctx.range(1, tail), Lexeme::Name(i!(str: ","))));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
if let Some(tail) = data.strip_prefix("--[") {
|
||||
let (note, tail) = tail.split_once("]--").unwrap_or_else(|| {
|
||||
ctx.reporter().report(NoCommentEnd.pack(ctx.source_range(tail.len(), "")));
|
||||
(tail, "") // fallback: the rest of the file is in the comment
|
||||
});
|
||||
let lexeme = Lexeme::Comment(Arc::new(note.to_string()));
|
||||
tokens.push(Entry::new(ctx.range(note.len() + 3, tail), lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
if let Some(tail) = data.strip_prefix("--") {
|
||||
let (note, tail) = split_filter(tail, |c| c != '\n');
|
||||
let lexeme = Lexeme::Comment(Arc::new(note.to_string()));
|
||||
tokens.push(Entry::new(ctx.range(note.len(), tail), lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
// Parse a rule arrow
|
||||
if let Some(tail) = data.strip_prefix('=') {
|
||||
if tail.chars().next().map_or(false, numstart) {
|
||||
let (num, post_num) = split_filter(tail, numchar);
|
||||
if let Some(tail) = post_num.strip_prefix("=>") {
|
||||
let prio = parse_num(num).unwrap_or_else(|e| {
|
||||
ctx.reporter().report(e.into_proj(num.len(), post_num, ctx));
|
||||
Numeric::Uint(0)
|
||||
});
|
||||
let lexeme = Lexeme::Arrow(prio.as_float());
|
||||
tokens.push(Entry::new(ctx.range(num.len() + 3, tail), lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Parse scalar placeholder $_name or $name
|
||||
if let Some(tail) = data.strip_prefix('$') {
|
||||
let (nameonly, tail) = tail.strip_prefix('_').map_or((false, tail), |t| (true, t));
|
||||
let (name, tail) = split_filter(tail, namechar);
|
||||
if !name.is_empty() {
|
||||
let class = if nameonly { PHClass::Name } else { PHClass::Scalar };
|
||||
let lexeme = Lexeme::Placeh(Placeholder { name: i(name), class });
|
||||
tokens.push(Entry::new(ctx.range(name.len() + 1, tail), lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
// Parse vectorial placeholder. `..` or `...`, then `$name`, then an optional
|
||||
// `:n` where n is a number.
|
||||
if let Some(tail) = data.strip_prefix("..") {
|
||||
let (nonzero, tail) = tail.strip_prefix('.').map_or((false, tail), |t| (true, t));
|
||||
if let Some(tail) = tail.strip_prefix('$') {
|
||||
let (name, tail) = split_filter(tail, namechar);
|
||||
if !name.is_empty() {
|
||||
let (prio, priolen, tail) = tail
|
||||
.strip_prefix(':')
|
||||
.map(|tail| split_filter(tail, numchar))
|
||||
.filter(|(num, _)| !num.is_empty())
|
||||
.map(|(num_str, tail)| {
|
||||
let p = ctx.reporter().fallback(
|
||||
parse_num(num_str).map_err(|e| e.into_proj(num_str.len(), tail, ctx)).and_then(
|
||||
|num| match num {
|
||||
Numeric::Uint(usize) => Ok(usize),
|
||||
Numeric::Float(_) =>
|
||||
Err(FloatPlacehPrio.pack(ctx.source_range(num_str.len(), tail))),
|
||||
},
|
||||
),
|
||||
|_| 0,
|
||||
);
|
||||
(p, num_str.len() + 1, tail)
|
||||
})
|
||||
.unwrap_or((0, 0, tail));
|
||||
let byte_len = if nonzero { 4 } else { 3 } + priolen + name.len();
|
||||
let class = PHClass::Vec { nonzero, prio };
|
||||
let lexeme = Lexeme::Placeh(Placeholder { name: i(name), class });
|
||||
tokens.push(Entry::new(ctx.range(byte_len, tail), lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
}
|
||||
if namestart(head) {
|
||||
let (name, tail) = split_filter(data, namechar);
|
||||
if !name.is_empty() {
|
||||
let lexeme = Lexeme::Name(i(name));
|
||||
tokens.push(Entry::new(ctx.range(name.len(), tail), lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
if opchar(head) {
|
||||
let (name, tail) = split_filter(data, opchar);
|
||||
if !name.is_empty() {
|
||||
let lexeme = Lexeme::Name(i(name));
|
||||
tokens.push(Entry::new(ctx.range(name.len(), tail), lexeme));
|
||||
data = tail;
|
||||
continue 'tail;
|
||||
}
|
||||
}
|
||||
unreachable!(r#"opchar is pretty much defined as "not namechar" "#)
|
||||
}
|
||||
}
|
||||
12
orchidlang/src/parse/mod.rs
Normal file
12
orchidlang/src/parse/mod.rs
Normal file
@@ -0,0 +1,12 @@
|
||||
//! Parser, and abstractions for interacting with it from language extensions
|
||||
pub mod context;
|
||||
pub mod errors;
|
||||
pub mod facade;
|
||||
pub mod frag;
|
||||
pub mod lex_plugin;
|
||||
pub mod lexer;
|
||||
pub mod multiname;
|
||||
pub mod numeric;
|
||||
pub mod parse_plugin;
|
||||
pub mod parsed;
|
||||
mod sourcefile;
|
||||
146
orchidlang/src/parse/multiname.rs
Normal file
146
orchidlang/src/parse/multiname.rs
Normal file
@@ -0,0 +1,146 @@
|
||||
//! Parse the tree-like name sets used to represent imports
|
||||
|
||||
use std::collections::VecDeque;
|
||||
use std::ops::Range;
|
||||
|
||||
use intern_all::{i, Tok};
|
||||
|
||||
use super::context::ParseCtx;
|
||||
use super::errors::{Expected, ParseErrorKind};
|
||||
use super::frag::Frag;
|
||||
use super::lexer::{Entry, Lexeme};
|
||||
use crate::error::ProjectResult;
|
||||
use crate::location::SourceRange;
|
||||
use crate::name::VPath;
|
||||
use crate::parse::parsed::{Import, PType};
|
||||
use crate::utils::boxed_iter::{box_chain, box_once, BoxedIter};
|
||||
|
||||
struct Subresult {
|
||||
glob: bool,
|
||||
deque: VecDeque<Tok<String>>,
|
||||
range: Range<usize>,
|
||||
}
|
||||
impl Subresult {
|
||||
#[must_use]
|
||||
fn new_glob(range: &Range<usize>) -> Self {
|
||||
Self { glob: true, deque: VecDeque::new(), range: range.clone() }
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
fn new_named(name: Tok<String>, range: &Range<usize>) -> Self {
|
||||
Self { glob: false, deque: VecDeque::from([name]), range: range.clone() }
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
fn push_front(mut self, name: Tok<String>) -> Self {
|
||||
self.deque.push_front(name);
|
||||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
fn finalize(self, ctx: &(impl ParseCtx + ?Sized)) -> Import {
|
||||
let Self { mut deque, glob, range } = self;
|
||||
debug_assert!(glob || !deque.is_empty(), "The constructors forbid this");
|
||||
let name = if glob { None } else { deque.pop_back() };
|
||||
let range = ctx.range_loc(&range);
|
||||
Import { name, range, path: VPath(deque.into()) }
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_multiname_branch<'a>(
|
||||
cursor: Frag<'a>,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
) -> ProjectResult<(BoxedIter<'a, Subresult>, Frag<'a>)> {
|
||||
let comma = i!(str: ",");
|
||||
let (subnames, cursor) = parse_multiname_rec(cursor, ctx)?;
|
||||
let (Entry { lexeme, range }, cursor) = cursor.trim().pop(ctx)?;
|
||||
match &lexeme {
|
||||
Lexeme::RP(PType::Par) => Ok((subnames, cursor)),
|
||||
Lexeme::Name(n) if n == &comma => {
|
||||
let (tail, cont) = parse_multiname_branch(cursor, ctx)?;
|
||||
Ok((box_chain!(subnames, tail), cont))
|
||||
},
|
||||
_ => {
|
||||
let expected = vec![Lexeme::Name(comma), Lexeme::RP(PType::Par)];
|
||||
let err = Expected { expected, or_name: false, found: lexeme.clone() };
|
||||
Err(err.pack(SourceRange { range: range.clone(), code: ctx.code_info() }))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_multiname_rec<'a>(
|
||||
cursor: Frag<'a>,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
) -> ProjectResult<(BoxedIter<'a, Subresult>, Frag<'a>)> {
|
||||
let (head, mut cursor) = cursor.trim().pop(ctx)?;
|
||||
match &head.lexeme {
|
||||
Lexeme::LP(PType::Par) => parse_multiname_branch(cursor, ctx),
|
||||
Lexeme::LP(PType::Sqr) => {
|
||||
let mut names = Vec::new();
|
||||
loop {
|
||||
let (Entry { lexeme, range }, tail) = cursor.trim().pop(ctx)?;
|
||||
cursor = tail;
|
||||
match lexeme {
|
||||
Lexeme::Name(n) => names.push((n.clone(), range)),
|
||||
Lexeme::RP(PType::Sqr) => break,
|
||||
_ => {
|
||||
let err = Expected {
|
||||
expected: vec![Lexeme::RP(PType::Sqr)],
|
||||
or_name: true,
|
||||
found: head.lexeme.clone(),
|
||||
};
|
||||
return Err(err.pack(ctx.range_loc(range)));
|
||||
},
|
||||
}
|
||||
}
|
||||
Ok((
|
||||
Box::new(
|
||||
names.into_iter().map(|(name, location)| Subresult::new_named(name.clone(), location)),
|
||||
),
|
||||
cursor,
|
||||
))
|
||||
},
|
||||
Lexeme::Name(n) if *n == i!(str: "*") =>
|
||||
Ok((box_once(Subresult::new_glob(&head.range)), cursor)),
|
||||
Lexeme::Name(n) if ![i!(str: ","), i!(str: "*")].contains(n) => {
|
||||
let cursor = cursor.trim();
|
||||
if cursor.get(0, ctx).map_or(false, |e| e.lexeme.strict_eq(&Lexeme::NS)) {
|
||||
let cursor = cursor.step(ctx)?;
|
||||
let (out, cursor) = parse_multiname_rec(cursor, ctx)?;
|
||||
let out = Box::new(out.map(|sr| sr.push_front(n.clone())));
|
||||
Ok((out, cursor))
|
||||
} else {
|
||||
Ok((box_once(Subresult::new_named(n.clone(), &head.range)), cursor))
|
||||
}
|
||||
},
|
||||
_ => {
|
||||
let expected = vec![Lexeme::LP(PType::Par)];
|
||||
let err = Expected { expected, or_name: true, found: head.lexeme.clone() };
|
||||
Err(err.pack(ctx.range_loc(&head.range)))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a tree that describes several names. The tree can be
|
||||
///
|
||||
/// - name (except `,` or `*`)
|
||||
/// - name (except `,` or `*`) `::` tree
|
||||
/// - `(` tree `,` tree ... `)`
|
||||
/// - `*` (wildcard)
|
||||
/// - `[` name name ... `]` (including `,` or `*`).
|
||||
///
|
||||
/// Examples of valid syntax:
|
||||
///
|
||||
/// ```txt
|
||||
/// foo
|
||||
/// foo::bar::baz
|
||||
/// foo::bar::(baz, quz::quux, fimble::*)
|
||||
/// foo::bar::[baz quz * +]
|
||||
/// ```
|
||||
pub fn parse_multiname<'a>(
|
||||
cursor: Frag<'a>,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
) -> ProjectResult<(Vec<Import>, Frag<'a>)> {
|
||||
let (output, cont) = parse_multiname_rec(cursor, ctx)?;
|
||||
Ok((output.map(|sr| sr.finalize(ctx)).collect(), cont))
|
||||
}
|
||||
179
orchidlang/src/parse/numeric.rs
Normal file
179
orchidlang/src/parse/numeric.rs
Normal file
@@ -0,0 +1,179 @@
|
||||
//! Parse a float or integer. These functions are also used for the macro
|
||||
//! priority numbers
|
||||
|
||||
use std::num::IntErrorKind;
|
||||
use std::ops::Range;
|
||||
|
||||
use ordered_float::NotNan;
|
||||
|
||||
use super::context::ParseCtx;
|
||||
use super::errors::{ExpectedDigit, LiteralOverflow, NaNLiteral, ParseErrorKind};
|
||||
use super::lex_plugin::LexPluginReq;
|
||||
#[allow(unused)] // for doc
|
||||
use super::lex_plugin::LexerPlugin;
|
||||
use super::lexer::{split_filter, Entry, LexRes, Lexeme};
|
||||
use crate::error::{ProjectErrorObj, ProjectResult};
|
||||
use crate::foreign::atom::AtomGenerator;
|
||||
use crate::foreign::inert::Inert;
|
||||
use crate::libs::std::number::Numeric;
|
||||
|
||||
/// Rasons why [parse_num] might fail. See [NumError].
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum NumErrorKind {
|
||||
/// The literal describes [f64::NAN]
|
||||
NaN,
|
||||
/// Some integer appearing in the literal overflows [usize]
|
||||
Overflow,
|
||||
/// A character that isn't a digit in the given base was found
|
||||
InvalidDigit,
|
||||
}
|
||||
impl NumErrorKind {
|
||||
fn from_int(kind: &IntErrorKind) -> Self {
|
||||
match kind {
|
||||
IntErrorKind::InvalidDigit => Self::InvalidDigit,
|
||||
IntErrorKind::NegOverflow | IntErrorKind::PosOverflow => Self::Overflow,
|
||||
_ => panic!("Impossible error condition"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Error produced by [parse_num]
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct NumError {
|
||||
/// Location
|
||||
pub range: Range<usize>,
|
||||
/// Reason
|
||||
pub kind: NumErrorKind,
|
||||
}
|
||||
|
||||
impl NumError {
|
||||
/// Convert into [ProjectErrorObj]
|
||||
pub fn into_proj(
|
||||
self,
|
||||
len: usize,
|
||||
tail: &str,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
) -> ProjectErrorObj {
|
||||
let start = ctx.source().len() - tail.len() - len + self.range.start;
|
||||
let location = ctx.range_loc(&(start..start + self.range.len()));
|
||||
match self.kind {
|
||||
NumErrorKind::NaN => NaNLiteral.pack(location),
|
||||
NumErrorKind::InvalidDigit => ExpectedDigit.pack(location),
|
||||
NumErrorKind::Overflow => LiteralOverflow.pack(location),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a numbre literal out of text
|
||||
pub fn parse_num(string: &str) -> Result<Numeric, NumError> {
|
||||
let overflow_err = NumError { range: 0..string.len(), kind: NumErrorKind::Overflow };
|
||||
let (radix, noprefix, pos) = (string.strip_prefix("0x").map(|s| (16u8, s, 2)))
|
||||
.or_else(|| string.strip_prefix("0b").map(|s| (2u8, s, 2)))
|
||||
.or_else(|| string.strip_prefix("0o").map(|s| (8u8, s, 2)))
|
||||
.unwrap_or((10u8, string, 0));
|
||||
// identity
|
||||
let (base, exponent) = match noprefix.split_once('p') {
|
||||
Some((b, e)) => {
|
||||
let (s, d, len) = e.strip_prefix('-').map_or((1, e, 0), |ue| (-1, ue, 1));
|
||||
(b, s * int_parse(d, 10, pos + b.len() + 1 + len)? as i32)
|
||||
},
|
||||
None => (noprefix, 0),
|
||||
};
|
||||
match base.split_once('.') {
|
||||
None => {
|
||||
let base_usize = int_parse(base, radix, pos)?;
|
||||
if let Ok(pos_exp) = u32::try_from(exponent) {
|
||||
if let Some(radical) = usize::from(radix).checked_pow(pos_exp) {
|
||||
let number = base_usize.checked_mul(radical).ok_or(overflow_err)?;
|
||||
return Ok(Numeric::Uint(number));
|
||||
}
|
||||
}
|
||||
let f = (base_usize as f64) * (radix as f64).powi(exponent);
|
||||
let err = NumError { range: 0..string.len(), kind: NumErrorKind::NaN };
|
||||
Ok(Numeric::Float(NotNan::new(f).map_err(|_| err)?))
|
||||
},
|
||||
Some((whole, part)) => {
|
||||
let whole_n = int_parse(whole, radix, pos)? as f64;
|
||||
let part_n = int_parse(part, radix, pos + whole.len() + 1)? as f64;
|
||||
let real_val = whole_n + (part_n / (radix as f64).powi(part.len() as i32));
|
||||
let f = real_val * (radix as f64).powi(exponent);
|
||||
Ok(Numeric::Float(NotNan::new(f).expect("None of the inputs are NaN")))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn int_parse(s: &str, radix: u8, start: usize) -> Result<usize, NumError> {
|
||||
let s = s.chars().filter(|c| *c != '_').collect::<String>();
|
||||
let range = start..(start + s.len());
|
||||
usize::from_str_radix(&s, radix as u32)
|
||||
.map_err(|e| NumError { range, kind: NumErrorKind::from_int(e.kind()) })
|
||||
}
|
||||
|
||||
/// Filter for characters that can appear in numbers
|
||||
pub fn numchar(c: char) -> bool { c.is_alphanumeric() | "._-".contains(c) }
|
||||
/// Filter for characters that can start numbers
|
||||
pub fn numstart(c: char) -> bool { c.is_ascii_digit() }
|
||||
|
||||
/// Print a number as a base-16 floating point literal
|
||||
#[must_use]
|
||||
pub fn print_nat16(num: NotNan<f64>) -> String {
|
||||
if *num == 0.0 {
|
||||
return "0x0".to_string();
|
||||
} else if num.is_infinite() {
|
||||
return match num.is_sign_positive() {
|
||||
true => "Infinity".to_string(),
|
||||
false => "-Infinity".to_string(),
|
||||
};
|
||||
} else if num.is_nan() {
|
||||
return "NaN".to_string();
|
||||
}
|
||||
let exp = num.log(16.0).floor();
|
||||
let man = *num / 16_f64.powf(exp);
|
||||
format!("0x{man}p{exp:.0}")
|
||||
}
|
||||
|
||||
/// [LexerPlugin] for a number literal
|
||||
#[derive(Clone)]
|
||||
pub struct NumericLexer;
|
||||
impl LexerPlugin for NumericLexer {
|
||||
fn lex<'b>(&self, req: &'_ dyn LexPluginReq<'b>) -> Option<ProjectResult<LexRes<'b>>> {
|
||||
req.tail().chars().next().filter(|c| numstart(*c)).map(|_| {
|
||||
let (num_str, tail) = split_filter(req.tail(), numchar);
|
||||
let ag = match parse_num(num_str) {
|
||||
Ok(Numeric::Float(f)) => AtomGenerator::cloner(Inert(f)),
|
||||
Ok(Numeric::Uint(i)) => AtomGenerator::cloner(Inert(i)),
|
||||
Err(e) => return Err(e.into_proj(num_str.len(), tail, req.ctx())),
|
||||
};
|
||||
let range = req.ctx().range(num_str.len(), tail);
|
||||
let entry = Entry { lexeme: Lexeme::Atom(ag), range };
|
||||
Ok(LexRes { tail, tokens: vec![entry] })
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::libs::std::number::Numeric;
|
||||
use crate::parse::numeric::parse_num;
|
||||
|
||||
#[test]
|
||||
fn just_ints() {
|
||||
let test = |s, n| assert_eq!(parse_num(s), Ok(Numeric::Uint(n)));
|
||||
test("12345", 12345);
|
||||
test("0xcafebabe", 0xcafebabe);
|
||||
test("0o751", 0o751);
|
||||
test("0b111000111", 0b111000111);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decimals() {
|
||||
let test = |s, n| assert_eq!(parse_num(s).map(|n| n.as_f64()), Ok(n));
|
||||
test("3.1417", 3.1417);
|
||||
test("3.1417", 3_f64 + 1417_f64 / 10000_f64);
|
||||
test("0xf.cafe", 0xf as f64 + 0xcafe as f64 / 0x10000 as f64);
|
||||
test("34p3", 34000f64);
|
||||
test("0x2p3", (0x2 * 0x1000) as f64);
|
||||
test("1.5p3", 1500f64);
|
||||
test("0x2.5p3", (0x25 * 0x100) as f64);
|
||||
}
|
||||
}
|
||||
142
orchidlang/src/parse/parse_plugin.rs
Normal file
142
orchidlang/src/parse/parse_plugin.rs
Normal file
@@ -0,0 +1,142 @@
|
||||
//! Abstractions for dynamic extensions to the parser that act across entries.
|
||||
//! Macros are the primary syntax extension mechanism, but they only operate
|
||||
//! within a constant and can't interfere with name reproject.
|
||||
|
||||
use std::ops::Range;
|
||||
|
||||
use dyn_clone::DynClone;
|
||||
use intern_all::Tok;
|
||||
|
||||
use super::context::ParseCtx;
|
||||
use super::errors::{expect, expect_block, expect_name};
|
||||
use super::facade::parse_entries;
|
||||
use super::frag::Frag;
|
||||
use super::lexer::{Entry, Lexeme};
|
||||
use super::parsed::{Constant, Expr, ModuleBlock, PType, Rule, SourceLine, SourceLineKind};
|
||||
use super::sourcefile::{
|
||||
exprv_to_single, parse_const, parse_exprv, parse_line, parse_module, parse_module_body,
|
||||
parse_nsname, parse_rule, split_lines,
|
||||
};
|
||||
use crate::error::{ProjectErrorObj, ProjectResult};
|
||||
use crate::location::SourceRange;
|
||||
use crate::name::VName;
|
||||
use crate::utils::boxed_iter::BoxedIter;
|
||||
|
||||
/// Information and actions exposed to [ParseLinePlugin]. A plugin should never
|
||||
/// import and call the parser directly because it might be executed in a
|
||||
/// different version of the parser.
|
||||
pub trait ParsePluginReq<'t> {
|
||||
// ################ Frag and ParseCtx ################
|
||||
|
||||
/// The token sequence this parser must parse
|
||||
fn frag(&self) -> Frag;
|
||||
/// Get the location of a fragment
|
||||
fn frag_loc(&self, f: Frag) -> SourceRange;
|
||||
/// Convert a numeric byte range into a location
|
||||
fn range_loc(&self, r: Range<usize>) -> SourceRange;
|
||||
/// Remove the first token of the fragment
|
||||
fn pop<'a>(&self, f: Frag<'a>) -> ProjectResult<(&'a Entry, Frag<'a>)>;
|
||||
/// Remove the last element of the fragment
|
||||
fn pop_back<'a>(&self, f: Frag<'a>) -> ProjectResult<(&'a Entry, Frag<'a>)>;
|
||||
|
||||
// ################ Parser states ################
|
||||
|
||||
/// Split up the lines in a fragment. The fragment must outlive the iterator
|
||||
/// and the request itself must outlive both
|
||||
fn split_lines<'a: 'b, 'b>(&'b self, f: Frag<'a>) -> BoxedIter<'b, Frag<'a>>
|
||||
where 't: 'b + 'a;
|
||||
/// Parse a sequence of source lines separated by line breaks
|
||||
fn parse_module_body(&self, frag: Frag) -> ProjectResult<Vec<SourceLine>>;
|
||||
/// Parse a single source line. This returns a vector because plugins can
|
||||
/// convert a single line into multiple entries
|
||||
fn parse_line(&self, frag: Frag) -> ProjectResult<Vec<SourceLineKind>>;
|
||||
/// Parse a macro rule `<exprv> =prio=> <exprv>`
|
||||
fn parse_rule(&self, frag: Frag) -> ProjectResult<Rule>;
|
||||
/// Parse a constant declaration `<name> := <exprv>`
|
||||
fn parse_const(&self, frag: Frag) -> ProjectResult<Constant>;
|
||||
/// Parse a namespaced name `name::name`
|
||||
fn parse_nsname<'a>(&self, f: Frag<'a>) -> ProjectResult<(VName, Frag<'a>)>;
|
||||
/// Parse a module declaration. `<name> ( <module_body> )`
|
||||
fn parse_module(&self, frag: Frag) -> ProjectResult<ModuleBlock>;
|
||||
/// Parse a sequence of expressions. In principle, it never makes sense to
|
||||
/// parse a single expression because it could always be a macro invocation.
|
||||
fn parse_exprv<'a>(&self, f: Frag<'a>, p: Option<PType>) -> ProjectResult<(Vec<Expr>, Frag<'a>)>;
|
||||
/// Parse a prepared string of code
|
||||
fn parse_entries(&self, t: &'static str, r: SourceRange) -> Vec<SourceLine>;
|
||||
/// Convert a sequence of expressions to a single one by parenthesization if
|
||||
/// necessary
|
||||
fn vec_to_single(&self, fallback: &Entry, v: Vec<Expr>) -> ProjectResult<Expr>;
|
||||
|
||||
// ################ Assertions ################
|
||||
|
||||
/// Unwrap a single name token or raise an error
|
||||
fn expect_name(&self, entry: &Entry) -> ProjectResult<Tok<String>>;
|
||||
/// Assert that the entry contains exactly the specified lexeme
|
||||
fn expect(&self, l: Lexeme, e: &Entry) -> ProjectResult<()>;
|
||||
/// Remove two parentheses from the ends of the cursor
|
||||
fn expect_block<'a>(&self, f: Frag<'a>, p: PType) -> ProjectResult<Frag<'a>>;
|
||||
/// Ensure that the fragment is empty
|
||||
fn expect_empty(&self, f: Frag) -> ProjectResult<()>;
|
||||
/// Report a fatal error while also producing output to be consumed by later
|
||||
/// stages for improved error reporting
|
||||
fn report_err(&self, e: ProjectErrorObj);
|
||||
}
|
||||
|
||||
/// External plugin that parses an unrecognized source line into lines of
|
||||
/// recognized types
|
||||
pub trait ParseLinePlugin: Sync + Send + DynClone {
|
||||
/// Attempt to parse a line. Returns [None] if the line isn't recognized,
|
||||
/// [Some][Err] if it's recognized but incorrect.
|
||||
fn parse(&self, req: &dyn ParsePluginReq) -> Option<ProjectResult<Vec<SourceLineKind>>>;
|
||||
}
|
||||
|
||||
/// Implementation of [ParsePluginReq] exposing sub-parsers and data to the
|
||||
/// plugin via dynamic dispatch
|
||||
pub struct ParsePlugReqImpl<'a, TCtx: ParseCtx + ?Sized> {
|
||||
/// Fragment of text to be parsed by the plugin
|
||||
pub frag: Frag<'a>,
|
||||
/// Context for recursive commands and to expose to the plugin
|
||||
pub ctx: &'a TCtx,
|
||||
}
|
||||
impl<'ty, TCtx: ParseCtx + ?Sized> ParsePluginReq<'ty> for ParsePlugReqImpl<'ty, TCtx> {
|
||||
fn frag(&self) -> Frag { self.frag }
|
||||
fn frag_loc(&self, f: Frag) -> SourceRange { self.range_loc(f.range()) }
|
||||
fn range_loc(&self, r: Range<usize>) -> SourceRange { self.ctx.range_loc(&r) }
|
||||
fn pop<'a>(&self, f: Frag<'a>) -> ProjectResult<(&'a Entry, Frag<'a>)> { f.pop(self.ctx) }
|
||||
fn pop_back<'a>(&self, f: Frag<'a>) -> ProjectResult<(&'a Entry, Frag<'a>)> {
|
||||
f.pop_back(self.ctx)
|
||||
}
|
||||
fn split_lines<'a: 'b, 'b>(&'b self, f: Frag<'a>) -> BoxedIter<'b, Frag<'a>>
|
||||
where
|
||||
'ty: 'b,
|
||||
'ty: 'a,
|
||||
{
|
||||
Box::new(split_lines(f, self.ctx))
|
||||
}
|
||||
fn parse_module_body(&self, f: Frag) -> ProjectResult<Vec<SourceLine>> {
|
||||
Ok(parse_module_body(f, self.ctx))
|
||||
}
|
||||
fn parse_line(&self, f: Frag) -> ProjectResult<Vec<SourceLineKind>> { parse_line(f, self.ctx) }
|
||||
fn parse_rule(&self, f: Frag) -> ProjectResult<Rule> { parse_rule(f, self.ctx) }
|
||||
fn parse_const(&self, f: Frag) -> ProjectResult<Constant> { parse_const(f, self.ctx) }
|
||||
fn parse_nsname<'a>(&self, f: Frag<'a>) -> ProjectResult<(VName, Frag<'a>)> {
|
||||
parse_nsname(f, self.ctx)
|
||||
}
|
||||
fn parse_module(&self, f: Frag) -> ProjectResult<ModuleBlock> { parse_module(f, self.ctx) }
|
||||
fn parse_exprv<'a>(&self, f: Frag<'a>, p: Option<PType>) -> ProjectResult<(Vec<Expr>, Frag<'a>)> {
|
||||
parse_exprv(f, p, self.ctx)
|
||||
}
|
||||
fn parse_entries(&self, s: &'static str, r: SourceRange) -> Vec<SourceLine> {
|
||||
parse_entries(&self.ctx, s, r)
|
||||
}
|
||||
fn vec_to_single(&self, fb: &Entry, v: Vec<Expr>) -> ProjectResult<Expr> {
|
||||
exprv_to_single(fb, v, self.ctx)
|
||||
}
|
||||
fn expect_name(&self, e: &Entry) -> ProjectResult<Tok<String>> { expect_name(e, self.ctx) }
|
||||
fn expect(&self, l: Lexeme, e: &Entry) -> ProjectResult<()> { expect(l, e, self.ctx) }
|
||||
fn expect_block<'a>(&self, f: Frag<'a>, t: PType) -> ProjectResult<Frag<'a>> {
|
||||
expect_block(f, t, self.ctx)
|
||||
}
|
||||
fn expect_empty(&self, f: Frag) -> ProjectResult<()> { f.expect_empty(self.ctx) }
|
||||
fn report_err(&self, e: ProjectErrorObj) { self.ctx.reporter().report(e) }
|
||||
}
|
||||
507
orchidlang/src/parse/parsed.rs
Normal file
507
orchidlang/src/parse/parsed.rs
Normal file
@@ -0,0 +1,507 @@
|
||||
//! Datastructures representing the units of macro execution
|
||||
//!
|
||||
//! These structures are produced by the pipeline, processed by the macro
|
||||
//! executor, and then converted to other usable formats.
|
||||
|
||||
use std::fmt;
|
||||
use std::hash::Hash;
|
||||
use std::rc::Rc;
|
||||
|
||||
use hashbrown::HashSet;
|
||||
use intern_all::Tok;
|
||||
use itertools::Itertools;
|
||||
use ordered_float::NotNan;
|
||||
|
||||
use crate::foreign::atom::AtomGenerator;
|
||||
#[allow(unused)] // for doc
|
||||
use crate::interpreter::nort;
|
||||
use crate::location::SourceRange;
|
||||
use crate::name::{Sym, VName, VPath};
|
||||
use crate::parse::numeric::print_nat16;
|
||||
|
||||
/// A [Clause] with associated metadata
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Expr {
|
||||
/// The actual value
|
||||
pub value: Clause,
|
||||
/// Information about the code that produced this value
|
||||
pub range: SourceRange,
|
||||
}
|
||||
|
||||
impl Expr {
|
||||
/// Process all names with the given mapper.
|
||||
/// Return a new object if anything was processed
|
||||
#[must_use]
|
||||
pub fn map_names(&self, pred: &mut impl FnMut(Sym) -> Option<Sym>) -> Option<Self> {
|
||||
(self.value.map_names(pred)).map(|value| Self { value, range: self.range.clone() })
|
||||
}
|
||||
|
||||
/// Visit all expressions in the tree. The search can be exited early by
|
||||
/// returning [Some]
|
||||
///
|
||||
/// See also [crate::interpreter::nort::Expr::search_all]
|
||||
pub fn search_all<T>(&self, f: &mut impl FnMut(&Self) -> Option<T>) -> Option<T> {
|
||||
f(self).or_else(|| self.value.search_all(f))
|
||||
}
|
||||
}
|
||||
|
||||
/// Visit all expression sequences including this sequence itself.
|
||||
pub fn search_all_slcs<T>(this: &[Expr], f: &mut impl FnMut(&[Expr]) -> Option<T>) -> Option<T> {
|
||||
f(this).or_else(|| this.iter().find_map(|expr| expr.value.search_all_slcs(f)))
|
||||
}
|
||||
|
||||
impl Expr {
|
||||
/// Add the specified prefix to every Name
|
||||
#[must_use]
|
||||
pub fn prefix(&self, prefix: &[Tok<String>], except: &impl Fn(Tok<String>) -> bool) -> Self {
|
||||
Self { value: self.value.prefix(prefix, except), range: self.range.clone() }
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Expr {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.value.fmt(f) }
|
||||
}
|
||||
|
||||
/// Various types of placeholders
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum PHClass {
|
||||
/// Matches multiple tokens, lambdas or parenthesized groups
|
||||
Vec {
|
||||
/// If true, must match at least one clause
|
||||
nonzero: bool,
|
||||
/// Greediness in the allocation of tokens
|
||||
prio: usize,
|
||||
},
|
||||
/// Matches exactly one token, lambda or parenthesized group
|
||||
Scalar,
|
||||
/// Matches exactly one name
|
||||
Name,
|
||||
}
|
||||
|
||||
/// Properties of a placeholder that matches unknown tokens in macros
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct Placeholder {
|
||||
/// Identifier to pair placeholders in the pattern and template
|
||||
pub name: Tok<String>,
|
||||
/// The nature of the token set matched by this placeholder
|
||||
pub class: PHClass,
|
||||
}
|
||||
|
||||
impl fmt::Display for Placeholder {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let name = &self.name;
|
||||
match self.class {
|
||||
PHClass::Scalar => write!(f, "${name}"),
|
||||
PHClass::Name => write!(f, "$_{name}"),
|
||||
PHClass::Vec { nonzero, prio } => {
|
||||
if nonzero { write!(f, "...") } else { write!(f, "..") }?;
|
||||
write!(f, "${name}:{prio}")
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Different types of brackets supported by Orchid
|
||||
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
|
||||
pub enum PType {
|
||||
/// ()
|
||||
Par,
|
||||
/// []
|
||||
Sqr,
|
||||
/// {}
|
||||
Curl,
|
||||
}
|
||||
impl PType {
|
||||
/// Left paren character for this paren type
|
||||
pub fn l(self) -> char {
|
||||
match self {
|
||||
PType::Curl => '{',
|
||||
PType::Par => '(',
|
||||
PType::Sqr => '[',
|
||||
}
|
||||
}
|
||||
|
||||
/// Right paren character for this paren type
|
||||
pub fn r(self) -> char {
|
||||
match self {
|
||||
PType::Curl => '}',
|
||||
PType::Par => ')',
|
||||
PType::Sqr => ']',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An S-expression as read from a source file
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Clause {
|
||||
/// An opaque non-callable value, eg. a file handle
|
||||
Atom(AtomGenerator),
|
||||
/// A c-style name or an operator, eg. `+`, `i`, `foo::bar`
|
||||
Name(Sym),
|
||||
/// A parenthesized expression
|
||||
/// eg. `(print out "hello")`, `[1, 2, 3]`, `{Some(t) => t}`
|
||||
S(PType, Rc<Vec<Expr>>),
|
||||
/// A function expression, eg. `\x. x + 1`
|
||||
Lambda(Rc<Vec<Expr>>, Rc<Vec<Expr>>),
|
||||
/// A placeholder for macros, eg. `$name`, `...$body`, `...$lhs:1`
|
||||
Placeh(Placeholder),
|
||||
}
|
||||
|
||||
impl Clause {
|
||||
/// Extract the expressions from an auto, lambda or S
|
||||
#[must_use]
|
||||
pub fn body(&self) -> Option<Rc<Vec<Expr>>> {
|
||||
match self {
|
||||
Self::Lambda(_, body) | Self::S(_, body) => Some(body.clone()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert with identical meaning
|
||||
#[must_use]
|
||||
pub fn into_expr(self, range: SourceRange) -> Expr {
|
||||
if let Self::S(PType::Par, body) = &self {
|
||||
if let [wrapped] = &body[..] {
|
||||
return wrapped.clone();
|
||||
}
|
||||
}
|
||||
Expr { value: self, range }
|
||||
}
|
||||
|
||||
/// Convert with identical meaning
|
||||
#[must_use]
|
||||
pub fn from_exprs(exprs: &[Expr]) -> Option<Self> {
|
||||
match exprs {
|
||||
[] => None,
|
||||
[only] => Some(only.value.clone()),
|
||||
_ => Some(Self::S(PType::Par, Rc::new(exprs.to_vec()))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert with identical meaning
|
||||
#[must_use]
|
||||
pub fn from_exprv(exprv: &Rc<Vec<Expr>>) -> Option<Clause> {
|
||||
if exprv.len() < 2 { Self::from_exprs(exprv) } else { Some(Self::S(PType::Par, exprv.clone())) }
|
||||
}
|
||||
|
||||
/// Collect all names that appear in this expression.
|
||||
/// NOTICE: this isn't the total set of unbound names, it's mostly useful to
|
||||
/// make weak statements for optimization.
|
||||
#[must_use]
|
||||
pub fn collect_names(&self) -> HashSet<Sym> {
|
||||
if let Self::Name(n) = self {
|
||||
return HashSet::from([n.clone()]);
|
||||
}
|
||||
let mut glossary = HashSet::new();
|
||||
let result = self.search_all(&mut |e| {
|
||||
if let Clause::Name(n) = &e.value {
|
||||
glossary.insert(n.clone());
|
||||
}
|
||||
None::<()>
|
||||
});
|
||||
assert!(result.is_none(), "Callback never returns Some");
|
||||
glossary
|
||||
}
|
||||
|
||||
/// Process all names with the given mapper.
|
||||
/// Return a new object if anything was processed
|
||||
#[must_use]
|
||||
pub fn map_names(&self, pred: &mut impl FnMut(Sym) -> Option<Sym>) -> Option<Self> {
|
||||
match self {
|
||||
Clause::Atom(_) | Clause::Placeh(_) => None,
|
||||
Clause::Name(name) => pred(name.clone()).map(Clause::Name),
|
||||
Clause::S(c, body) => {
|
||||
let mut any_some = false;
|
||||
let new_body = body
|
||||
.iter()
|
||||
.map(|e| {
|
||||
let val = e.map_names(pred);
|
||||
any_some |= val.is_some();
|
||||
val.unwrap_or_else(|| e.clone())
|
||||
})
|
||||
.collect();
|
||||
if any_some { Some(Clause::S(*c, Rc::new(new_body))) } else { None }
|
||||
},
|
||||
Clause::Lambda(arg, body) => {
|
||||
let mut any_some = false;
|
||||
let new_arg = (arg.iter())
|
||||
.map(|e| {
|
||||
let val = e.map_names(pred);
|
||||
any_some |= val.is_some();
|
||||
val.unwrap_or_else(|| e.clone())
|
||||
})
|
||||
.collect();
|
||||
let new_body = (body.iter())
|
||||
.map(|e| {
|
||||
let val = e.map_names(pred);
|
||||
any_some |= val.is_some();
|
||||
val.unwrap_or_else(|| e.clone())
|
||||
})
|
||||
.collect();
|
||||
if any_some { Some(Clause::Lambda(Rc::new(new_arg), Rc::new(new_body))) } else { None }
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Pair of [Expr::search_all]
|
||||
pub fn search_all<T>(&self, f: &mut impl FnMut(&Expr) -> Option<T>) -> Option<T> {
|
||||
match self {
|
||||
Clause::Lambda(arg, body) =>
|
||||
arg.iter().chain(body.iter()).find_map(|expr| expr.search_all(f)),
|
||||
Clause::Name(_) | Clause::Atom(_) | Clause::Placeh(_) => None,
|
||||
Clause::S(_, body) => body.iter().find_map(|expr| expr.search_all(f)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Visit all expression sequences. Most useful when looking for some pattern
|
||||
pub fn search_all_slcs<T>(&self, f: &mut impl FnMut(&[Expr]) -> Option<T>) -> Option<T> {
|
||||
match self {
|
||||
Clause::Lambda(arg, body) => search_all_slcs(arg, f).or_else(|| search_all_slcs(body, f)),
|
||||
Clause::Name(_) | Clause::Atom(_) | Clause::Placeh(_) => None,
|
||||
Clause::S(_, body) => search_all_slcs(body, f),
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a parenthesized expression sequence
|
||||
pub fn s(delimiter: char, body: impl IntoIterator<Item = Self>, range: SourceRange) -> Self {
|
||||
let ptype = match delimiter {
|
||||
'(' => PType::Par,
|
||||
'[' => PType::Sqr,
|
||||
'{' => PType::Curl,
|
||||
_ => panic!("not an opening paren"),
|
||||
};
|
||||
let body = body.into_iter().map(|it| it.into_expr(range.clone())).collect();
|
||||
Self::S(ptype, Rc::new(body))
|
||||
}
|
||||
}
|
||||
|
||||
impl Clause {
|
||||
/// Add the specified prefix to every Name
|
||||
#[must_use]
|
||||
pub fn prefix(&self, prefix: &[Tok<String>], except: &impl Fn(Tok<String>) -> bool) -> Self {
|
||||
self
|
||||
.map_names(&mut |name| match except(name[0].clone()) {
|
||||
true => None,
|
||||
false => {
|
||||
let prefixed = prefix.iter().cloned().chain(name.iter()).collect::<Vec<_>>();
|
||||
Some(Sym::from_tok(name.tok().interner().i(&prefixed)).unwrap())
|
||||
},
|
||||
})
|
||||
.unwrap_or_else(|| self.clone())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Clause {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Atom(a) => write!(f, "{a:?}"),
|
||||
Self::Name(name) => write!(f, "{}", name),
|
||||
Self::S(t, items) => {
|
||||
let body = items.iter().join(" ");
|
||||
write!(f, "{}{body}{}", t.l(), t.r())
|
||||
},
|
||||
Self::Lambda(arg, body) => {
|
||||
let args = arg.iter().join(" ");
|
||||
let bodys = body.iter().join(" ");
|
||||
write!(f, "\\{args}.{bodys}")
|
||||
},
|
||||
Self::Placeh(ph) => ph.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A substitution rule as loaded from source
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Rule {
|
||||
/// Expressions on the left side of the arrow
|
||||
pub pattern: Vec<Expr>,
|
||||
/// Priority number written inside the arrow
|
||||
pub prio: NotNan<f64>,
|
||||
/// Expressions on the right side of the arrow
|
||||
pub template: Vec<Expr>,
|
||||
}
|
||||
|
||||
impl fmt::Display for Rule {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"rule {} ={}=> {}",
|
||||
self.pattern.iter().join(" "),
|
||||
print_nat16(self.prio),
|
||||
self.template.iter().join(" ")
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// A named constant
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Constant {
|
||||
/// Used to reference the constant
|
||||
pub name: Tok<String>,
|
||||
/// The constant value inserted where the name is found
|
||||
pub value: Expr,
|
||||
}
|
||||
|
||||
impl fmt::Display for Constant {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "const {} := {}", *self.name, self.value)
|
||||
}
|
||||
}
|
||||
|
||||
/// An import pointing at another module, either specifying the symbol to be
|
||||
/// imported or importing all available symbols with a globstar (*)
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Import {
|
||||
/// Import path, a sequence of module names. Can either start with
|
||||
///
|
||||
/// - `self` to reference the current module
|
||||
/// - any number of `super` to reference the parent module of the implied
|
||||
/// `self`
|
||||
/// - a root name
|
||||
pub path: VPath,
|
||||
/// If name is None, this is a wildcard import
|
||||
pub name: Option<Tok<String>>,
|
||||
/// Location of the final name segment, which uniquely identifies this name
|
||||
pub range: SourceRange,
|
||||
}
|
||||
impl Import {
|
||||
/// Constructor
|
||||
pub fn new(
|
||||
path: impl IntoIterator<Item = Tok<String>>,
|
||||
name: Option<Tok<String>>,
|
||||
range: SourceRange,
|
||||
) -> Self {
|
||||
let path = VPath(path.into_iter().collect());
|
||||
assert!(name.is_some() || !path.0.is_empty(), "import * not allowed");
|
||||
Self { range, name, path }
|
||||
}
|
||||
|
||||
/// Get the preload target space for this import - the prefix below
|
||||
/// which all files should be included in the compilation
|
||||
///
|
||||
/// Returns the path if this is a glob import, or the path plus the
|
||||
/// name if this is a specific import
|
||||
#[must_use]
|
||||
pub fn nonglob_path(&self) -> VName {
|
||||
VName::new(self.path.0.iter().chain(&self.name).cloned())
|
||||
.expect("Everything import (`import *`) not allowed")
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Import {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match &self.name {
|
||||
None => write!(f, "{}::*", self.path),
|
||||
Some(n) => write!(f, "{}::{}", self.path, n),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A namespace block
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ModuleBlock {
|
||||
/// Name prefixed to all names in the block
|
||||
pub name: Tok<String>,
|
||||
/// Prefixed entries
|
||||
pub body: Vec<SourceLine>,
|
||||
}
|
||||
|
||||
impl fmt::Display for ModuleBlock {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let bodys = self.body.iter().map(|e| e.to_string()).join("\n");
|
||||
write!(f, "module {} {{\n{}\n}}", self.name, bodys)
|
||||
}
|
||||
}
|
||||
|
||||
/// see [Member]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum MemberKind {
|
||||
/// A substitution rule. Rules apply even when they're not in scope, if the
|
||||
/// absolute names are present eg. because they're produced by other rules
|
||||
Rule(Rule),
|
||||
/// A constant (or function) associated with a name
|
||||
Constant(Constant),
|
||||
/// A prefixed set of other entries
|
||||
Module(ModuleBlock),
|
||||
}
|
||||
impl MemberKind {
|
||||
/// Convert to [SourceLine]
|
||||
pub fn into_line(self, exported: bool, range: SourceRange) -> SourceLine {
|
||||
SourceLineKind::Member(Member { exported, kind: self }).wrap(range)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MemberKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Constant(c) => c.fmt(f),
|
||||
Self::Module(m) => m.fmt(f),
|
||||
Self::Rule(r) => r.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Things that may be prefixed with an export
|
||||
/// see [MemberKind]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Member {
|
||||
/// Various members
|
||||
pub kind: MemberKind,
|
||||
/// Whether this member is exported or not
|
||||
pub exported: bool,
|
||||
}
|
||||
|
||||
impl fmt::Display for Member {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self { exported: true, kind } => write!(f, "export {kind}"),
|
||||
Self { exported: false, kind } => write!(f, "{kind}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// See [SourceLine]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum SourceLineKind {
|
||||
/// Imports one or all names in a module
|
||||
Import(Vec<Import>),
|
||||
/// Comments are kept here in case dev tooling wants to parse documentation
|
||||
Comment(String),
|
||||
/// An element with visibility information
|
||||
Member(Member),
|
||||
/// A list of tokens exported explicitly. This can also create new exported
|
||||
/// tokens that the local module doesn't actually define a role for
|
||||
Export(Vec<(Tok<String>, SourceRange)>),
|
||||
}
|
||||
impl SourceLineKind {
|
||||
/// Wrap with no location
|
||||
pub fn wrap(self, range: SourceRange) -> SourceLine { SourceLine { kind: self, range } }
|
||||
}
|
||||
|
||||
impl fmt::Display for SourceLineKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Comment(s) => write!(f, "--[{s}]--"),
|
||||
Self::Export(s) => {
|
||||
write!(f, "export ::({})", s.iter().map(|t| &**t.0).join(", "))
|
||||
},
|
||||
Self::Member(member) => write!(f, "{member}"),
|
||||
Self::Import(i) => {
|
||||
write!(f, "import ({})", i.iter().map(|i| i.to_string()).join(", "))
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Anything the parser might encounter in a file. See [SourceLineKind]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SourceLine {
|
||||
/// What we encountered
|
||||
pub kind: SourceLineKind,
|
||||
/// Where we encountered it.
|
||||
pub range: SourceRange,
|
||||
}
|
||||
|
||||
impl fmt::Display for SourceLine {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.kind.fmt(f) }
|
||||
}
|
||||
313
orchidlang/src/parse/sourcefile.rs
Normal file
313
orchidlang/src/parse/sourcefile.rs
Normal file
@@ -0,0 +1,313 @@
|
||||
//! Internal states of the parser.
|
||||
|
||||
use std::iter;
|
||||
use std::rc::Rc;
|
||||
|
||||
use intern_all::i;
|
||||
use itertools::Itertools;
|
||||
|
||||
use super::context::ParseCtx;
|
||||
use super::errors::{
|
||||
expect, expect_block, expect_name, BadTokenInRegion, ExpectedSingleName, GlobExport, LeadingNS,
|
||||
MisalignedParen, NamespacedExport, ParseErrorKind, ReservedToken, UnexpectedEOL,
|
||||
};
|
||||
use super::frag::Frag;
|
||||
use super::lexer::{Entry, Lexeme};
|
||||
use super::multiname::parse_multiname;
|
||||
use super::parse_plugin::ParsePlugReqImpl;
|
||||
use crate::error::ProjectResult;
|
||||
use crate::name::VName;
|
||||
use crate::parse::parsed::{
|
||||
Clause, Constant, Expr, Import, Member, MemberKind, ModuleBlock, PType, Rule, SourceLine,
|
||||
SourceLineKind,
|
||||
};
|
||||
use crate::sym;
|
||||
|
||||
/// Split the fragment at each line break outside parentheses
|
||||
pub fn split_lines<'a>(
|
||||
module: Frag<'a>,
|
||||
ctx: &'a (impl ParseCtx + ?Sized),
|
||||
) -> impl Iterator<Item = Frag<'a>> {
|
||||
let mut source = module.data.iter().enumerate();
|
||||
let mut fallback = module.fallback;
|
||||
let mut last_slice = 0;
|
||||
let mut finished = false;
|
||||
iter::from_fn(move || {
|
||||
let mut paren_count = 0;
|
||||
for (i, Entry { lexeme, .. }) in source.by_ref() {
|
||||
match lexeme {
|
||||
Lexeme::LP(_) => paren_count += 1,
|
||||
Lexeme::RP(_) => paren_count -= 1,
|
||||
Lexeme::BR if paren_count == 0 => {
|
||||
let begin = last_slice;
|
||||
last_slice = i + 1;
|
||||
let cur_prev = fallback;
|
||||
fallback = &module.data[i];
|
||||
return Some(Frag::new(cur_prev, &module.data[begin..i]));
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
// Include last line even without trailing newline
|
||||
if !finished {
|
||||
finished = true;
|
||||
return Some(Frag::new(fallback, &module.data[last_slice..]));
|
||||
}
|
||||
None
|
||||
})
|
||||
.map(Frag::trim)
|
||||
.map(|s| {
|
||||
match s.pop(ctx).and_then(|(f, i)| i.pop_back(ctx).map(|(l, i)| (&f.lexeme, i, &l.lexeme))) {
|
||||
Ok((Lexeme::LP(PType::Par), inner, Lexeme::RP(PType::Par))) => inner.trim(),
|
||||
_ => s,
|
||||
}
|
||||
})
|
||||
.filter(|l| !l.data.is_empty())
|
||||
}
|
||||
|
||||
/// Parse linebreak-separated entries
|
||||
pub fn parse_module_body(cursor: Frag<'_>, ctx: &(impl ParseCtx + ?Sized)) -> Vec<SourceLine> {
|
||||
let mut lines = Vec::new();
|
||||
for l in split_lines(cursor, ctx) {
|
||||
let kinds = ctx.reporter().fallback(parse_line(l, ctx), |_| vec![]);
|
||||
let r = ctx.range_loc(&l.range());
|
||||
lines.extend(kinds.into_iter().map(|kind| SourceLine { range: r.clone(), kind }));
|
||||
}
|
||||
lines
|
||||
}
|
||||
|
||||
/// Parse a single, possibly exported entry
|
||||
pub fn parse_line(
|
||||
cursor: Frag<'_>,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
) -> ProjectResult<Vec<SourceLineKind>> {
|
||||
let req = ParsePlugReqImpl { ctx, frag: cursor };
|
||||
for line_parser in ctx.line_parsers() {
|
||||
if let Some(result) = line_parser.parse(&req) {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
let head = cursor.get(0, ctx)?;
|
||||
match &head.lexeme {
|
||||
Lexeme::Comment(cmt) => cmt.strip_prefix('|').and_then(|c| c.strip_suffix('|')).map_or_else(
|
||||
|| parse_line(cursor.step(ctx)?, ctx),
|
||||
|cmt| Ok(vec![SourceLineKind::Comment(cmt.to_string())]),
|
||||
),
|
||||
Lexeme::BR => parse_line(cursor.step(ctx)?, ctx),
|
||||
Lexeme::Name(n) if **n == "export" =>
|
||||
parse_export_line(cursor.step(ctx)?, ctx).map(|k| vec![k]),
|
||||
Lexeme::Name(n) if ["const", "macro", "module"].contains(&n.as_str()) => {
|
||||
let member = Member { exported: false, kind: parse_member(cursor, ctx)? };
|
||||
Ok(vec![SourceLineKind::Member(member)])
|
||||
},
|
||||
Lexeme::Name(n) if **n == "import" => {
|
||||
let (imports, cont) = parse_multiname(cursor.step(ctx)?, ctx)?;
|
||||
cont.expect_empty(ctx)?;
|
||||
Ok(vec![SourceLineKind::Import(imports)])
|
||||
},
|
||||
lexeme => {
|
||||
let lexeme = lexeme.clone();
|
||||
Err(BadTokenInRegion { lexeme, region: "start of line" }.pack(ctx.range_loc(&head.range)))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_export_line(
|
||||
cursor: Frag<'_>,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
) -> ProjectResult<SourceLineKind> {
|
||||
let cursor = cursor.trim();
|
||||
let head = cursor.get(0, ctx)?;
|
||||
match &head.lexeme {
|
||||
Lexeme::NS => {
|
||||
let (names, cont) = parse_multiname(cursor.step(ctx)?, ctx)?;
|
||||
cont.expect_empty(ctx)?;
|
||||
let names = (names.into_iter())
|
||||
.map(|Import { name, path, range }| match name {
|
||||
Some(n) if path.is_empty() => Ok((n, range)),
|
||||
Some(_) => Err(NamespacedExport.pack(range)),
|
||||
None => Err(GlobExport.pack(range)),
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
Ok(SourceLineKind::Export(names))
|
||||
},
|
||||
Lexeme::Name(n) if ["const", "macro", "module"].contains(&n.as_str()) =>
|
||||
Ok(SourceLineKind::Member(Member { kind: parse_member(cursor, ctx)?, exported: true })),
|
||||
lexeme => {
|
||||
let lexeme = lexeme.clone();
|
||||
let err = BadTokenInRegion { lexeme, region: "exported line" };
|
||||
Err(err.pack(ctx.range_loc(&head.range)))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_member(cursor: Frag<'_>, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<MemberKind> {
|
||||
let (typemark, cursor) = cursor.trim().pop(ctx)?;
|
||||
match &typemark.lexeme {
|
||||
Lexeme::Name(n) if **n == "const" => {
|
||||
let constant = parse_const(cursor, ctx)?;
|
||||
Ok(MemberKind::Constant(constant))
|
||||
},
|
||||
Lexeme::Name(n) if **n == "macro" => {
|
||||
let rule = parse_rule(cursor, ctx)?;
|
||||
Ok(MemberKind::Rule(rule))
|
||||
},
|
||||
Lexeme::Name(n) if **n == "module" => {
|
||||
let module = parse_module(cursor, ctx)?;
|
||||
Ok(MemberKind::Module(module))
|
||||
},
|
||||
lexeme => {
|
||||
let lexeme = lexeme.clone();
|
||||
let err = BadTokenInRegion { lexeme, region: "member type" };
|
||||
Err(err.pack(ctx.range_loc(&typemark.range)))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a macro rule
|
||||
pub fn parse_rule(cursor: Frag<'_>, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<Rule> {
|
||||
let (pattern, prio, template) = cursor.find_map("arrow", ctx, |a| match a {
|
||||
Lexeme::Arrow(p) => Some(*p),
|
||||
_ => None,
|
||||
})?;
|
||||
let (pattern, _) = parse_exprv(pattern, None, ctx)?;
|
||||
let (template, _) = parse_exprv(template, None, ctx)?;
|
||||
Ok(Rule { pattern, prio, template })
|
||||
}
|
||||
|
||||
/// Parse a constant declaration
|
||||
pub fn parse_const(cursor: Frag<'_>, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<Constant> {
|
||||
let (name_ent, cursor) = cursor.trim().pop(ctx)?;
|
||||
let name = expect_name(name_ent, ctx)?;
|
||||
let (walrus_ent, cursor) = cursor.trim().pop(ctx)?;
|
||||
expect(Lexeme::Walrus, walrus_ent, ctx)?;
|
||||
let value = ctx.reporter().fallback(
|
||||
parse_exprv(cursor, None, ctx).and_then(|(body, _)| exprv_to_single(walrus_ent, body, ctx)),
|
||||
|_| Clause::Name(sym!(__syntax_error__)).into_expr(ctx.range_loc(&cursor.range())),
|
||||
);
|
||||
Ok(Constant { name, value })
|
||||
}
|
||||
|
||||
/// Parse a namespaced name. TODO: use this for modules
|
||||
pub fn parse_nsname<'a>(
|
||||
cursor: Frag<'a>,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
) -> ProjectResult<(VName, Frag<'a>)> {
|
||||
let (name, tail) = parse_multiname(cursor, ctx)?;
|
||||
match name.into_iter().exactly_one() {
|
||||
Ok(Import { name: Some(name), path, .. }) =>
|
||||
Ok((VName::new([name]).unwrap().prefix(path), tail)),
|
||||
Err(_) | Ok(Import { name: None, .. }) => {
|
||||
let range = cursor.data[0].range.start..tail.data[0].range.end;
|
||||
Err(ExpectedSingleName.pack(ctx.range_loc(&range)))
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a submodule declaration
|
||||
pub fn parse_module(
|
||||
cursor: Frag<'_>,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
) -> ProjectResult<ModuleBlock> {
|
||||
let (name_ent, cursor) = cursor.trim().pop(ctx)?;
|
||||
let name = expect_name(name_ent, ctx)?;
|
||||
let body = expect_block(cursor, PType::Par, ctx)?;
|
||||
Ok(ModuleBlock { name, body: parse_module_body(body, ctx) })
|
||||
}
|
||||
|
||||
/// Parse a sequence of expressions
|
||||
pub fn parse_exprv<'a>(
|
||||
mut cursor: Frag<'a>,
|
||||
paren: Option<PType>,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
) -> ProjectResult<(Vec<Expr>, Frag<'a>)> {
|
||||
let mut output = Vec::new();
|
||||
cursor = cursor.trim();
|
||||
while let Ok(current) = cursor.get(0, ctx) {
|
||||
match ¤t.lexeme {
|
||||
Lexeme::BR | Lexeme::Comment(_) => unreachable!("Fillers skipped"),
|
||||
Lexeme::At | Lexeme::Type => {
|
||||
let err = ReservedToken(current.lexeme.clone());
|
||||
return Err(err.pack(ctx.range_loc(¤t.range)));
|
||||
},
|
||||
Lexeme::Atom(a) => {
|
||||
let value = Clause::Atom(a.clone());
|
||||
output.push(Expr { value, range: ctx.range_loc(¤t.range) });
|
||||
cursor = cursor.step(ctx)?;
|
||||
},
|
||||
Lexeme::Placeh(ph) => {
|
||||
output
|
||||
.push(Expr { value: Clause::Placeh(ph.clone()), range: ctx.range_loc(¤t.range) });
|
||||
cursor = cursor.step(ctx)?;
|
||||
},
|
||||
Lexeme::Name(n) => {
|
||||
let mut range = ctx.range_loc(¤t.range);
|
||||
let mut fullname = VName::new([n.clone()]).unwrap();
|
||||
while cursor.get(1, ctx).is_ok_and(|e| e.lexeme.strict_eq(&Lexeme::NS)) {
|
||||
let next_seg = cursor.get(2, ctx)?;
|
||||
range.range.end = next_seg.range.end;
|
||||
fullname = fullname.suffix([expect_name(next_seg, ctx)?]);
|
||||
cursor = cursor.step(ctx)?.step(ctx)?;
|
||||
}
|
||||
let clause = Clause::Name(fullname.to_sym());
|
||||
output.push(Expr { value: clause, range });
|
||||
cursor = cursor.step(ctx)?;
|
||||
},
|
||||
Lexeme::NS => return Err(LeadingNS.pack(ctx.range_loc(¤t.range))),
|
||||
Lexeme::RP(c) => match paren {
|
||||
Some(exp_c) if exp_c == *c => return Ok((output, cursor.step(ctx)?)),
|
||||
_ => {
|
||||
let err = MisalignedParen(current.lexeme.clone());
|
||||
return Err(err.pack(ctx.range_loc(¤t.range)));
|
||||
},
|
||||
},
|
||||
Lexeme::LP(c) => {
|
||||
let (result, leftover) = parse_exprv(cursor.step(ctx)?, Some(*c), ctx)?;
|
||||
let range = current.range.start..leftover.fallback.range.end;
|
||||
let value = Clause::S(*c, Rc::new(result));
|
||||
output.push(Expr { value, range: ctx.range_loc(&range) });
|
||||
cursor = leftover;
|
||||
},
|
||||
Lexeme::BS => {
|
||||
let dot = i!(str: ".");
|
||||
let (arg, body) =
|
||||
(cursor.step(ctx))?.find("A '.'", ctx, |l| l.strict_eq(&Lexeme::Name(dot.clone())))?;
|
||||
let (arg, _) = parse_exprv(arg, None, ctx)?;
|
||||
let (body, leftover) = parse_exprv(body, paren, ctx)?;
|
||||
output.push(Expr {
|
||||
range: ctx.range_loc(&cursor.range()),
|
||||
value: Clause::Lambda(Rc::new(arg), Rc::new(body)),
|
||||
});
|
||||
return Ok((output, leftover));
|
||||
},
|
||||
lexeme => {
|
||||
let lexeme = lexeme.clone();
|
||||
let err = BadTokenInRegion { lexeme, region: "expression" };
|
||||
return Err(err.pack(ctx.range_loc(¤t.range)));
|
||||
},
|
||||
}
|
||||
cursor = cursor.trim();
|
||||
}
|
||||
Ok((output, Frag::new(cursor.fallback, &[])))
|
||||
}
|
||||
|
||||
/// Wrap an expression list in parentheses if necessary
|
||||
pub fn exprv_to_single(
|
||||
fallback: &Entry,
|
||||
v: Vec<Expr>,
|
||||
ctx: &(impl ParseCtx + ?Sized),
|
||||
) -> ProjectResult<Expr> {
|
||||
match v.len() {
|
||||
0 => {
|
||||
let err = UnexpectedEOL(fallback.lexeme.clone());
|
||||
Err(err.pack(ctx.range_loc(&fallback.range)))
|
||||
},
|
||||
1 => Ok(v.into_iter().exactly_one().unwrap()),
|
||||
_ => {
|
||||
let f_range = &v.first().unwrap().range;
|
||||
let l_range = &v.last().unwrap().range;
|
||||
let range = f_range.map_range(|r| r.start..l_range.end());
|
||||
Ok(Expr { range, value: Clause::S(PType::Par, Rc::new(v)) })
|
||||
},
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user