in midst of refactor

2024-04-29 21:46:42 +02:00
parent ed0d64d52e
commit aa3f7e99ab
221 changed files with 5431 additions and 685 deletions
--- a/orchidlang/src/parse/context.rs
+++ b/orchidlang/src/parse/context.rs
@@ -0,0 +1,163 @@
+//! Definition and implementations of the parsing context, which is used
+
+use std::ops::Range;
+use std::sync::Arc;
+
+use super::lex_plugin::LexerPlugin;
+use super::parse_plugin::ParseLinePlugin;
+use crate::error::Reporter;
+use crate::location::{SourceCode, SourceRange};
+use crate::utils::boxed_iter::{box_empty, BoxedIter};
+use crate::utils::sequence::Sequence;
+
+/// Trait enclosing all context features
+///
+/// The main implementation is [ParseCtxImpl]
+pub trait ParseCtx {
+  /// Get an object describing the file this source code comes from
+  #[must_use]
+  fn code_info(&self) -> SourceCode;
+  /// Get the list of all lexer plugins
+  #[must_use]
+  fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin>;
+  /// Get the list of all parser plugins
+  #[must_use]
+  fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin>;
+  /// Error reporter
+  #[must_use]
+  fn reporter(&self) -> &Reporter;
+  /// Find our position in the text given the text we've yet to parse
+  #[must_use]
+  fn pos(&self, tail: &str) -> usize {
+    let tail_len = tail.len();
+    let source_len = self.source().len();
+    (self.source().len().checked_sub(tail.len())).unwrap_or_else(|| {
+      panic!("tail.len()={tail_len} greater than self.source().len()={source_len}; tail={tail:?}")
+    })
+  }
+  /// Generate a location given the length of a token and the unparsed text
+  /// after it. See also [ParseCtx::range_loc] if the maths gets complex.
+  #[must_use]
+  fn range(&self, len: usize, tl: &str) -> Range<usize> {
+    match self.pos(tl).checked_sub(len) {
+      Some(start) => start..self.pos(tl),
+      None => {
+        panic!("len={len} greater than tail.len()={}; tail={tl:?}", tl.len())
+      },
+    }
+  }
+  /// Create a contextful location for error reporting
+  #[must_use]
+  fn source_range(&self, len: usize, tl: &str) -> SourceRange {
+    self.range_loc(&self.range(len, tl))
+  }
+  /// Create a contentful location from a range directly.
+  #[must_use]
+  fn range_loc(&self, range: &Range<usize>) -> SourceRange {
+    SourceRange { code: self.code_info(), range: range.clone() }
+  }
+  /// Get a reference to the full source text. This should not be used for
+  /// position math.
+  #[must_use]
+  fn source(&self) -> Arc<String> { self.code_info().text.clone() }
+}
+
+impl<'a, C: ParseCtx + 'a + ?Sized> ParseCtx for &'a C {
+  fn reporter(&self) -> &Reporter { (*self).reporter() }
+  fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { (*self).lexers() }
+  fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> { (*self).line_parsers() }
+  fn pos(&self, tail: &str) -> usize { (*self).pos(tail) }
+  fn code_info(&self) -> SourceCode { (*self).code_info() }
+  fn source(&self) -> Arc<String> { (*self).source() }
+  fn range(&self, l: usize, t: &str) -> Range<usize> { (*self).range(l, t) }
+}
+
+/// Struct implementing context
+#[derive(Clone)]
+pub struct ParseCtxImpl<'a, 'b> {
+  /// File to be parsed; where it belongs in the tree and its text
+  pub code: SourceCode,
+  /// Error aggregator
+  pub reporter: &'b Reporter,
+  /// Lexer plugins for parsing custom literals
+  pub lexers: Sequence<'a, &'a (dyn LexerPlugin + 'a)>,
+  /// Parser plugins for parsing custom line structures
+  pub line_parsers: Sequence<'a, &'a dyn ParseLinePlugin>,
+}
+impl<'a, 'b> ParseCtx for ParseCtxImpl<'a, 'b> {
+  fn reporter(&self) -> &Reporter { self.reporter }
+  // Rust doesn't realize that this lifetime is covariant
+  #[allow(clippy::map_identity)]
+  fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { Box::new(self.lexers.iter().map(|r| r)) }
+  #[allow(clippy::map_identity)]
+  fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> {
+    Box::new(self.line_parsers.iter().map(|r| r))
+  }
+  fn code_info(&self) -> SourceCode { self.code.clone() }
+}
+
+/// Context instance for testing. Implicitly provides a reporter and panics if
+/// any errors are reported
+pub struct MockContext(pub Reporter);
+impl MockContext {
+  /// Create a new mock
+  pub fn new() -> Self { Self(Reporter::new()) }
+}
+impl Default for MockContext {
+  fn default() -> Self { Self::new() }
+}
+impl ParseCtx for MockContext {
+  fn reporter(&self) -> &Reporter { &self.0 }
+  fn pos(&self, tail: &str) -> usize { usize::MAX / 2 - tail.len() }
+  // these are expendable
+  fn code_info(&self) -> SourceCode { SourceRange::mock().code() }
+  fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { box_empty() }
+  fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> { box_empty() }
+}
+impl Drop for MockContext {
+  fn drop(&mut self) { self.0.assert() }
+}
+
+/// Context that assigns the same location to every subset of the source code.
+/// Its main use case is to process source code that was dynamically generated
+/// in response to some user code. See also [ReporterContext]
+pub struct FlatLocContext<'a, C: ParseCtx + ?Sized> {
+  sub: &'a C,
+  range: &'a SourceRange,
+}
+impl<'a, C: ParseCtx + ?Sized> FlatLocContext<'a, C> {
+  /// Create a new context that will use the same provided range for every
+  /// parsed token
+  pub fn new(sub: &'a C, range: &'a SourceRange) -> Self { Self { sub, range } }
+}
+impl<'a, C: ParseCtx + ?Sized> ParseCtx for FlatLocContext<'a, C> {
+  fn reporter(&self) -> &Reporter { self.sub.reporter() }
+  fn pos(&self, _: &str) -> usize { 0 }
+  fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { self.sub.lexers() }
+  fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> { self.sub.line_parsers() }
+  fn code_info(&self) -> SourceCode { self.range.code() }
+  fn range(&self, _: usize, _: &str) -> Range<usize> { self.range.range() }
+}
+
+/// Context that forwards everything to a wrapped context except for error
+/// reporting. See also [FlatLocContext]
+pub struct ReporterContext<'a, C: ParseCtx + ?Sized> {
+  sub: &'a C,
+  reporter: &'a Reporter,
+}
+impl<'a, C: ParseCtx + ?Sized> ReporterContext<'a, C> {
+  /// Create a new context that will collect errors separately and forward
+  /// everything else to an enclosed context
+  pub fn new(sub: &'a C, reporter: &'a Reporter) -> Self { Self { sub, reporter } }
+}
+impl<'a, C: ParseCtx + ?Sized> ParseCtx for ReporterContext<'a, C> {
+  fn reporter(&self) -> &Reporter { self.reporter }
+  fn pos(&self, tail: &str) -> usize { self.sub.pos(tail) }
+  fn lexers(&self) -> BoxedIter<'_, &dyn LexerPlugin> { self.sub.lexers() }
+  fn line_parsers(&self) -> BoxedIter<'_, &dyn ParseLinePlugin> { self.sub.line_parsers() }
+  fn code_info(&self) -> SourceCode { self.sub.code_info() }
+  fn range(&self, len: usize, tl: &str) -> Range<usize> { self.sub.range(len, tl) }
+  fn range_loc(&self, range: &Range<usize>) -> SourceRange { self.sub.range_loc(range) }
+  fn source(&self) -> Arc<String> { self.sub.source() }
+  fn source_range(&self, len: usize, tl: &str) -> SourceRange { self.sub.source_range(len, tl) }
+}
--- a/orchidlang/src/parse/errors.rs
+++ b/orchidlang/src/parse/errors.rs
@@ -0,0 +1,215 @@
+//! Errors produced by the parser. Plugins are encouraged to reuse these where
+//! applicable.
+
+use intern_all::Tok;
+use itertools::Itertools;
+
+use super::context::ParseCtx;
+use super::frag::Frag;
+use super::lexer::{Entry, Lexeme};
+use crate::error::{ProjectError, ProjectErrorObj, ProjectResult};
+use crate::location::{CodeOrigin, SourceRange};
+use crate::parse::parsed::PType;
+
+/// Parse error information without a location. Location data is added by the
+/// parser.
+pub trait ParseErrorKind: Sized + Send + Sync + 'static {
+  /// A general description of the error condition
+  const DESCRIPTION: &'static str;
+  /// A specific description of the error with concrete text sections
+  fn message(&self) -> String { Self::DESCRIPTION.to_string() }
+  /// Convert this error to a type-erased [ProjectError] to be handled together
+  /// with other Orchid errors.
+  fn pack(self, range: SourceRange) -> ProjectErrorObj { ParseError { kind: self, range }.pack() }
+}
+
+struct ParseError<T> {
+  pub range: SourceRange,
+  pub kind: T,
+}
+impl<T: ParseErrorKind> ProjectError for ParseError<T> {
+  const DESCRIPTION: &'static str = T::DESCRIPTION;
+  fn one_position(&self) -> CodeOrigin { self.range.origin() }
+  fn message(&self) -> String { self.kind.message() }
+}
+
+/// A line does not begin with an identifying keyword. Raised on the first token
+pub(super) struct LineNeedsPrefix(pub Lexeme);
+impl ParseErrorKind for LineNeedsPrefix {
+  const DESCRIPTION: &'static str = "This linetype requires a prefix";
+  fn message(&self) -> String { format!("{} cannot appear at the beginning of a line", self.0) }
+}
+
+/// The line ends abruptly. Raised on the last token
+pub(super) struct UnexpectedEOL(pub Lexeme);
+impl ParseErrorKind for UnexpectedEOL {
+  const DESCRIPTION: &'static str = "The line ended abruptly";
+  fn message(&self) -> String {
+    "In Orchid, all line breaks outside parentheses start a new declaration".to_string()
+  }
+}
+
+/// The line should have ended. Raised on last valid or first excess token
+pub(super) struct ExpectedEOL;
+impl ParseErrorKind for ExpectedEOL {
+  const DESCRIPTION: &'static str = "Expected the end of the line";
+}
+
+/// A name was expected.
+pub(super) struct ExpectedName(pub Lexeme);
+impl ParseErrorKind for ExpectedName {
+  const DESCRIPTION: &'static str = "A name was expected";
+  fn message(&self) -> String { format!("Expected a name, found {}", self.0) }
+}
+
+/// Unwrap a name or operator.
+pub(super) fn expect_name(
+  Entry { lexeme, range }: &Entry,
+  ctx: &(impl ParseCtx + ?Sized),
+) -> ProjectResult<Tok<String>> {
+  match lexeme {
+    Lexeme::Name(n) => Ok(n.clone()),
+    lex => Err(ExpectedName(lex.clone()).pack(ctx.range_loc(range))),
+  }
+}
+
+/// A specific lexeme was expected
+pub(super) struct Expected {
+  /// The lexemes that would have been acceptable
+  pub expected: Vec<Lexeme>,
+  /// Whether a name would also have been acceptable (multiname)
+  pub or_name: bool,
+  /// What was actually found
+  pub found: Lexeme,
+}
+impl ParseErrorKind for Expected {
+  const DESCRIPTION: &'static str = "A concrete token was expected";
+  fn message(&self) -> String {
+    let list = match &self.expected[..] {
+      &[] => return "Unsatisfiable expectation".to_string(),
+      [only] => only.to_string(),
+      [a, b] => format!("either {a} or {b}"),
+      [variants @ .., last] => {
+        format!("any of {} or {last}", variants.iter().join(", "))
+      },
+    };
+    let or_name = if self.or_name { " or a name" } else { "" };
+    format!("Expected {list}{or_name} but found {}", self.found)
+  }
+}
+/// Assert that the entry contains exactly the specified lexeme
+pub(super) fn expect(l: Lexeme, e: &Entry, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<()> {
+  if e.lexeme.strict_eq(&l) {
+    return Ok(());
+  }
+  let found = e.lexeme.clone();
+  let kind = Expected { expected: vec![l], or_name: false, found };
+  Err(kind.pack(ctx.range_loc(&e.range)))
+}
+
+/// A token reserved for future use was found in the code
+pub(super) struct ReservedToken(pub Lexeme);
+impl ParseErrorKind for ReservedToken {
+  const DESCRIPTION: &'static str = "Syntax reserved for future use";
+  fn message(&self) -> String { format!("{} is a reserved token", self.0) }
+}
+
+/// A token was found where it doesn't belong
+pub(super) struct BadTokenInRegion {
+  /// What was found
+  pub lexeme: Lexeme,
+  /// Human-readable name of the region where it should not appear
+  pub region: &'static str,
+}
+impl ParseErrorKind for BadTokenInRegion {
+  const DESCRIPTION: &'static str = "An unexpected token was found";
+  fn message(&self) -> String { format!("{} cannot appear in {}", self.lexeme, self.region) }
+}
+
+/// Some construct was searched but not found.
+pub(super) struct NotFound(pub &'static str);
+impl ParseErrorKind for NotFound {
+  const DESCRIPTION: &'static str = "A specific lexeme was expected";
+  fn message(&self) -> String { format!("{} was expected", self.0) }
+}
+
+/// :: found on its own somewhere other than a general export
+pub(super) struct LeadingNS;
+impl ParseErrorKind for LeadingNS {
+  const DESCRIPTION: &'static str = ":: can only follow a name token";
+}
+
+/// Parens don't pair up
+pub(super) struct MisalignedParen(pub Lexeme);
+impl ParseErrorKind for MisalignedParen {
+  const DESCRIPTION: &'static str = "(), [] and {} must always pair up";
+  fn message(&self) -> String { format!("This {} has no pair", self.0) }
+}
+
+/// Export line contains a complex name
+pub(super) struct NamespacedExport;
+impl ParseErrorKind for NamespacedExport {
+  const DESCRIPTION: &'static str = "Only local names may be exported";
+}
+
+/// Export line contains *
+pub(super) struct GlobExport;
+impl ParseErrorKind for GlobExport {
+  const DESCRIPTION: &'static str = "Globstars are not allowed in exports";
+}
+
+/// Comment never ends
+pub(super) struct NoCommentEnd;
+impl ParseErrorKind for NoCommentEnd {
+  const DESCRIPTION: &'static str = "a comment was not closed with `]--`";
+}
+
+/// A placeholder's priority is a floating point number
+pub(super) struct FloatPlacehPrio;
+impl ParseErrorKind for FloatPlacehPrio {
+  const DESCRIPTION: &'static str =
+    "a placeholder priority has a decimal point or a negative exponent";
+}
+
+/// A number literal decodes to NaN
+pub(super) struct NaNLiteral;
+impl ParseErrorKind for NaNLiteral {
+  const DESCRIPTION: &'static str = "float literal decoded to NaN";
+}
+
+/// A sequence of digits in a number literal overflows [usize].
+pub(super) struct LiteralOverflow;
+impl ParseErrorKind for LiteralOverflow {
+  const DESCRIPTION: &'static str = "number literal described number greater than usize::MAX";
+}
+
+/// A digit was expected but something else was found
+pub(super) struct ExpectedDigit;
+impl ParseErrorKind for ExpectedDigit {
+  const DESCRIPTION: &'static str = "expected a digit";
+}
+
+/// Expected a parenthesized block at the end of the line
+pub(super) struct ExpectedBlock;
+impl ParseErrorKind for ExpectedBlock {
+  const DESCRIPTION: &'static str = "Expected a parenthesized block";
+}
+/// Remove two parentheses from the ends of the cursor
+pub(super) fn expect_block<'a>(
+  tail: Frag<'a>,
+  typ: PType,
+  ctx: &(impl ParseCtx + ?Sized),
+) -> ProjectResult<Frag<'a>> {
+  let (lp, tail) = tail.trim().pop(ctx)?;
+  expect(Lexeme::LP(typ), lp, ctx)?;
+  let (rp, tail) = tail.pop_back(ctx)?;
+  expect(Lexeme::RP(typ), rp, ctx)?;
+  Ok(tail.trim())
+}
+
+/// A namespaced name was expected but a glob pattern or a branching multiname
+/// was found.
+pub(super) struct ExpectedSingleName;
+impl ParseErrorKind for ExpectedSingleName {
+  const DESCRIPTION: &'static str = "expected a single name, no wildcards, no branches";
+}
--- a/orchidlang/src/parse/facade.rs
+++ b/orchidlang/src/parse/facade.rs
@@ -0,0 +1,42 @@
+//! Entrypoints to the parser that combine lexing and parsing
+
+use never::Never;
+
+use super::context::{FlatLocContext, ParseCtx, ReporterContext};
+use super::frag::Frag;
+use super::lexer::lex;
+use super::sourcefile::parse_module_body;
+use crate::error::Reporter;
+use crate::location::SourceRange;
+use crate::parse::parsed::SourceLine;
+use crate::parse::sourcefile::{parse_line, split_lines};
+
+/// Parse a file
+pub fn parse_file(ctx: &impl ParseCtx) -> Vec<SourceLine> {
+  let tokens = lex(vec![], ctx.source().as_str(), ctx, |_| Ok::<_, Never>(false))
+    .unwrap_or_else(|e| match e {})
+    .tokens;
+  if tokens.is_empty() { Vec::new() } else { parse_module_body(Frag::from_slice(&tokens), ctx) }
+}
+
+/// Parse a statically defined line sequence
+///
+/// # Panics
+///
+/// On any parse error, which is why it only accepts a string literal
+pub fn parse_entries(
+  ctx: &dyn ParseCtx,
+  text: &'static str,
+  range: SourceRange,
+) -> Vec<SourceLine> {
+  let reporter = Reporter::new();
+  let flctx = FlatLocContext::new(ctx, &range);
+  let ctx = ReporterContext::new(&flctx, &reporter);
+  let res = lex(vec![], text, &ctx, |_| Ok::<_, Never>(false)).unwrap_or_else(|e| match e {});
+  let out = split_lines(Frag::from_slice(&res.tokens), &ctx)
+    .flat_map(|tokens| parse_line(tokens, &ctx).expect("pre-specified source"))
+    .map(|kind| kind.wrap(range.clone()))
+    .collect();
+  reporter.assert();
+  out
+}
--- a/orchidlang/src/parse/frag.rs
+++ b/orchidlang/src/parse/frag.rs
@@ -0,0 +1,133 @@
+//! The [Frag] is the main input datastructure of parsers. Beyond the slice of
+//! tokens, it contains a fallback value that can be used for error reporting if
+//! the fragment is empty.
+
+use std::ops::Range;
+
+use super::context::ParseCtx;
+use super::errors::{ExpectedEOL, NotFound, ParseErrorKind, UnexpectedEOL};
+use super::lexer::{Entry, Lexeme};
+use crate::error::ProjectResult;
+
+/// Represents a slice which may or may not contain items, and a fallback entry
+/// used for error reporting whenever the errant fragment is empty.
+#[must_use = "fragment of code should not be discarded implicitly"]
+#[derive(Clone, Copy)]
+pub struct Frag<'a> {
+  /// Entry to place in errors if the fragment contains no tokens
+  pub fallback: &'a Entry,
+  /// Tokens to parse
+  pub data: &'a [Entry],
+}
+impl<'a> Frag<'a> {
+  /// Create a new fragment
+  pub fn new(fallback: &'a Entry, data: &'a [Entry]) -> Self { Self { fallback, data } }
+
+  /// Remove comments and line breaks from both ends of the text
+  pub fn trim(self) -> Self {
+    let Self { data, fallback } = self;
+    let front = data.iter().take_while(|e| e.is_filler()).count();
+    let (_, right) = data.split_at(front);
+    let back = right.iter().rev().take_while(|e| e.is_filler()).count();
+    let (data, _) = right.split_at(right.len() - back);
+    Self { fallback, data }
+  }
+
+  /// Discard the first entry
+  pub fn step(self, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<Self> {
+    let Self { data, fallback: Entry { lexeme, range } } = self;
+    match data.split_first() {
+      Some((fallback, data)) => Ok(Frag { data, fallback }),
+      None => Err(UnexpectedEOL(lexeme.clone()).pack(ctx.range_loc(range))),
+    }
+  }
+
+  /// Get the first entry
+  pub fn pop(self, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<(&'a Entry, Self)> {
+    Ok((self.get(0, ctx)?, self.step(ctx)?))
+  }
+
+  /// Retrieve an index from a slice or raise an error if it isn't found.
+  pub fn get(self, idx: usize, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<&'a Entry> {
+    self.data.get(idx).ok_or_else(|| {
+      let entry = self.data.last().unwrap_or(self.fallback).clone();
+      UnexpectedEOL(entry.lexeme).pack(ctx.range_loc(&entry.range))
+    })
+  }
+
+  /// Area covered by this fragment
+  #[must_use]
+  pub fn range(self) -> Range<usize> {
+    self.data.first().map_or_else(
+      || self.fallback.range.clone(),
+      |f| f.range.start..self.data.last().unwrap().range.end,
+    )
+  }
+
+  /// Find a given token, split the fragment there and read some value from the
+  /// separator. See also [Frag::find]
+  pub fn find_map<T>(
+    self,
+    msg: &'static str,
+    ctx: &(impl ParseCtx + ?Sized),
+    mut f: impl FnMut(&'a Lexeme) -> Option<T>,
+  ) -> ProjectResult<(Self, T, Self)> {
+    let Self { data, fallback } = self;
+    let (dot_idx, output) = skip_parenthesized(data.iter())
+      .find_map(|(i, e)| f(&e.lexeme).map(|t| (i, t)))
+      .ok_or_else(|| NotFound(msg).pack(ctx.range_loc(&self.range())))?;
+    let (left, not_left) = data.split_at(dot_idx);
+    let (middle_ent, right) = not_left.split_first().unwrap();
+    Ok((Self::new(fallback, left), output, Self::new(middle_ent, right)))
+  }
+
+  /// Split the fragment at a token and return just the two sides.
+  /// See also [Frag::find_map].
+  pub fn find(
+    self,
+    descr: &'static str,
+    ctx: &(impl ParseCtx + ?Sized),
+    mut f: impl FnMut(&Lexeme) -> bool,
+  ) -> ProjectResult<(Self, Self)> {
+    let (l, _, r) = self.find_map(descr, ctx, |l| Some(l).filter(|l| f(l)))?;
+    Ok((l, r))
+  }
+
+  /// Remove the last item from the fragment
+  pub fn pop_back(self, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<(&'a Entry, Self)> {
+    let Self { data, fallback } = self;
+    let (last, data) = (data.split_last())
+      .ok_or_else(|| UnexpectedEOL(fallback.lexeme.clone()).pack(ctx.range_loc(&fallback.range)))?;
+    Ok((last, Self { fallback, data }))
+  }
+
+  /// # Panics
+  ///
+  /// If the slice is empty
+  pub fn from_slice(data: &'a [Entry]) -> Self {
+    let fallback = (data.first()).expect("Empty slice cannot be converted into a parseable");
+    Self { data, fallback }
+  }
+
+  /// Assert that the fragment is empty.
+  pub fn expect_empty(self, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<()> {
+    match self.data.first() {
+      Some(x) => Err(ExpectedEOL.pack(ctx.range_loc(&x.range))),
+      None => Ok(()),
+    }
+  }
+}
+
+fn skip_parenthesized<'a>(
+  it: impl Iterator<Item = &'a Entry>,
+) -> impl Iterator<Item = (usize, &'a Entry)> {
+  let mut paren_lvl = 1;
+  it.enumerate().filter(move |(_, e)| {
+    match e.lexeme {
+      Lexeme::LP(_) => paren_lvl += 1,
+      Lexeme::RP(_) => paren_lvl -= 1,
+      _ => (),
+    }
+    paren_lvl <= 1
+  })
+}
--- a/orchidlang/src/parse/lex_plugin.rs
+++ b/orchidlang/src/parse/lex_plugin.rs
@@ -0,0 +1,65 @@
+//! Abstractions for dynamic extensions to the lexer to parse custom literals
+
+use dyn_clone::DynClone;
+use never::Never;
+
+use super::context::{FlatLocContext, ParseCtx};
+use super::lexer::{lex, Entry, LexRes};
+use crate::error::ProjectResult;
+use crate::location::SourceRange;
+
+/// Data passed to the recursive sub-lexer
+pub struct LexPluginRecur<'a, 'b> {
+  /// Text to tokenize
+  pub tail: &'a str,
+  /// Callback that will be called between lexemes on the leftover text.
+  /// When it returns true, the lexer exits and leaves the remaining text for
+  /// you.
+  pub exit: &'b mut dyn for<'c> FnMut(&'c str) -> ProjectResult<bool>,
+}
+
+/// Data and actions available to a lexer plugin
+pub trait LexPluginReq<'a> {
+  /// Text to tokenize
+  fn tail(&self) -> &'a str;
+  /// [ParseCtx] instance for calculating locations and such
+  fn ctx(&self) -> &dyn ParseCtx;
+  /// Start a child lexer that calls back between lexemes and exits on your
+  /// command. You can combine this with custom atoms to create holes for
+  /// expressions in your literals like the template strings of most languages
+  /// other than Rust.
+  fn recurse(&self, req: LexPluginRecur<'a, '_>) -> ProjectResult<LexRes<'a>>;
+  /// Lex an inserted piece of text, especially when translating custom syntax
+  /// into multiple lexemes.
+  ///
+  /// # Panics
+  ///
+  /// If tokenization fails
+  fn insert(&self, data: &str, range: SourceRange) -> Vec<Entry>;
+}
+
+/// External plugin that parses a literal into recognized Orchid lexemes, most
+/// likely atoms.
+pub trait LexerPlugin: Send + Sync + DynClone {
+  /// Run the lexer
+  fn lex<'a>(&self, req: &'_ dyn LexPluginReq<'a>) -> Option<ProjectResult<LexRes<'a>>>;
+}
+
+/// Implementation of [LexPluginReq]
+pub struct LexPlugReqImpl<'a, 'b, TCtx: ParseCtx> {
+  /// Text to be lexed
+  pub tail: &'a str,
+  /// Context data
+  pub ctx: &'b TCtx,
+}
+impl<'a, 'b, TCtx: ParseCtx> LexPluginReq<'a> for LexPlugReqImpl<'a, 'b, TCtx> {
+  fn tail(&self) -> &'a str { self.tail }
+  fn ctx(&self) -> &dyn ParseCtx { self.ctx }
+  fn recurse(&self, req: LexPluginRecur<'a, '_>) -> ProjectResult<LexRes<'a>> {
+    lex(Vec::new(), req.tail, self.ctx, |s| (req.exit)(s))
+  }
+  fn insert(&self, data: &str, range: SourceRange) -> Vec<Entry> {
+    let ctx = FlatLocContext::new(self.ctx as &dyn ParseCtx, &range);
+    lex(Vec::new(), data, &ctx, |_| Ok::<_, Never>(false)).unwrap_or_else(|e| match e {}).tokens
+  }
+}
--- a/orchidlang/src/parse/lexer.rs
+++ b/orchidlang/src/parse/lexer.rs
@@ -0,0 +1,318 @@
+//! Convert source text into a sequence of tokens. Newlines and comments are
+//! included, but spacing is converted into numerical ranges on the elements.
+//!
+//! Literals lose their syntax form here and are handled in an abstract
+//! representation hence
+
+use std::fmt;
+use std::ops::Range;
+use std::sync::Arc;
+
+use intern_all::{i, Tok};
+use itertools::Itertools;
+use ordered_float::NotNan;
+
+use super::context::ParseCtx;
+use super::errors::{FloatPlacehPrio, NoCommentEnd};
+use super::lex_plugin::LexerPlugin;
+use super::numeric::{numstart, parse_num, print_nat16};
+use crate::foreign::atom::AtomGenerator;
+use crate::libs::std::number::Numeric;
+use crate::parse::errors::ParseErrorKind;
+use crate::parse::lex_plugin::LexPlugReqImpl;
+use crate::parse::numeric::{numchar, NumericLexer};
+use crate::parse::parsed::{PHClass, PType, Placeholder};
+
+/// A lexeme and the location where it was found
+#[derive(Clone, Debug)]
+pub struct Entry {
+  /// the lexeme
+  pub lexeme: Lexeme,
+  /// the range in bytes
+  pub range: Range<usize>,
+}
+impl Entry {
+  /// Checks if the lexeme is a comment or line break
+  #[must_use]
+  pub fn is_filler(&self) -> bool { matches!(self.lexeme, Lexeme::Comment(_) | Lexeme::BR) }
+
+  /// Create a new entry
+  #[must_use]
+  pub fn new(range: Range<usize>, lexeme: Lexeme) -> Self { Self { lexeme, range } }
+}
+
+impl fmt::Display for Entry {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.lexeme.fmt(f) }
+}
+
+impl PartialEq<Lexeme> for Entry {
+  fn eq(&self, other: &Lexeme) -> bool { self.lexeme == *other }
+}
+
+/// A unit of syntax
+#[derive(Clone, Debug, PartialEq)]
+pub enum Lexeme {
+  /// Atoms parsed by plugins
+  Atom(AtomGenerator),
+  /// Keyword or name
+  Name(Tok<String>),
+  /// Macro operator `=`number`=>`
+  Arrow(NotNan<f64>),
+  /// `:=`
+  Walrus,
+  /// Line break
+  BR,
+  /// `::`
+  NS,
+  /// Left paren `([{`
+  LP(PType),
+  /// Right paren `)]}`
+  RP(PType),
+  /// `\`
+  BS,
+  /// `@``
+  At,
+  /// `:`
+  Type,
+  /// comment
+  Comment(Arc<String>),
+  /// placeholder in a macro.
+  Placeh(Placeholder),
+}
+
+impl fmt::Display for Lexeme {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    match self {
+      Self::Atom(a) => write!(f, "{a:?}"),
+      Self::Name(token) => write!(f, "{}", **token),
+      Self::Walrus => write!(f, ":="),
+      Self::Arrow(prio) => write!(f, "={}=>", print_nat16(*prio)),
+      Self::NS => write!(f, "::"),
+      Self::LP(t) => write!(f, "{}", t.l()),
+      Self::RP(t) => write!(f, "{}", t.r()),
+      Self::BR => writeln!(f),
+      Self::BS => write!(f, "\\"),
+      Self::At => write!(f, "@"),
+      Self::Type => write!(f, ":"),
+      Self::Comment(text) => write!(f, "--[{}]--", text),
+      Self::Placeh(ph) => write!(f, "{ph}"),
+    }
+  }
+}
+
+impl Lexeme {
+  /// Compare lexemes for equality. It's `strict` because for atoms it uses the
+  /// strict equality comparison
+  pub fn strict_eq(&self, other: &Self) -> bool {
+    match (self, other) {
+      (Self::Arrow(f1), Self::Arrow(f2)) => f1 == f2,
+      (Self::At, Self::At) | (Self::BR, Self::BR) => true,
+      (Self::BS, Self::BS) => true,
+      (Self::NS, Self::NS) | (Self::Type, Self::Type) => true,
+      (Self::Walrus, Self::Walrus) => true,
+      (Self::Atom(a1), Self::Atom(a2)) => a1.run().0.parser_eq(&*a2.run().0),
+      (Self::Comment(c1), Self::Comment(c2)) => c1 == c2,
+      (Self::LP(p1), Self::LP(p2)) | (Self::RP(p1), Self::RP(p2)) => p1 == p2,
+      (Self::Name(n1), Self::Name(n2)) => n1 == n2,
+      (Self::Placeh(ph1), Self::Placeh(ph2)) => ph1 == ph2,
+      (..) => false,
+    }
+  }
+}
+
+/// Data returned from the lexer
+pub struct LexRes<'a> {
+  /// Leftover text. If the bail callback never returned true, this is empty
+  pub tail: &'a str,
+  /// Lexemes extracted from the text
+  pub tokens: Vec<Entry>,
+}
+
+/// Neatly format source code
+#[allow(unused)]
+pub fn format(lexed: &[Entry]) -> String { lexed.iter().join(" ") }
+
+/// Character filter that can appear in a keyword or name
+pub fn namechar(c: char) -> bool { c.is_alphanumeric() | (c == '_') }
+/// Character filter that can start a name
+pub fn namestart(c: char) -> bool { c.is_alphabetic() | (c == '_') }
+/// Character filter that can appear in operators.
+pub fn opchar(c: char) -> bool {
+  !namestart(c) && !numstart(c) && !c.is_whitespace() && !"()[]{},'\"\\".contains(c)
+}
+
+/// Split off all characters from the beginning that match a filter
+pub fn split_filter(s: &str, mut pred: impl FnMut(char) -> bool) -> (&str, &str) {
+  s.find(|c| !pred(c)).map_or((s, ""), |i| s.split_at(i))
+}
+
+fn lit_table() -> impl IntoIterator<Item = (&'static str, Lexeme)> {
+  [
+    ("\\", Lexeme::BS),
+    ("@", Lexeme::At),
+    ("(", Lexeme::LP(PType::Par)),
+    ("[", Lexeme::LP(PType::Sqr)),
+    ("{", Lexeme::LP(PType::Curl)),
+    (")", Lexeme::RP(PType::Par)),
+    ("]", Lexeme::RP(PType::Sqr)),
+    ("}", Lexeme::RP(PType::Curl)),
+    ("\n", Lexeme::BR),
+    (":=", Lexeme::Walrus),
+    ("::", Lexeme::NS),
+    (":", Lexeme::Type),
+  ]
+}
+
+static BUILTIN_ATOMS: &[&dyn LexerPlugin] = &[&NumericLexer];
+
+/// Convert source code to a flat list of tokens. The bail callback will be
+/// called between lexemes. When it returns true, the remaining text is
+/// returned without processing.
+pub fn lex<'a, E>(
+  mut tokens: Vec<Entry>,
+  mut data: &'a str,
+  ctx: &'_ impl ParseCtx,
+  mut bail: impl FnMut(&str) -> Result<bool, E>,
+) -> Result<LexRes<'a>, E> {
+  let mut prev_len = data.len() + 1;
+  'tail: loop {
+    if prev_len == data.len() {
+      panic!("got stuck at {data:?}, parsed {:?}", tokens.last().unwrap());
+    }
+    prev_len = data.len();
+    data = data.trim_start_matches(|c: char| c.is_whitespace() && c != '\n');
+    if bail(data)? {
+      return Ok(LexRes { tokens, tail: data });
+    }
+    let mut chars = data.chars();
+    let head = match chars.next() {
+      None => return Ok(LexRes { tokens, tail: data }),
+      Some(h) => h,
+    };
+    for lexer in ctx.lexers().chain(BUILTIN_ATOMS.iter().copied()) {
+      let req = LexPlugReqImpl { tail: data, ctx };
+      if let Some(res) = lexer.lex(&req) {
+        let LexRes { tail, tokens: mut new_tokens } =
+          ctx.reporter().fallback(res, |_| LexRes { tail: "", tokens: vec![] });
+        // fallback: no tokens left, no additional tokens parsed
+        if tail.len() == data.len() {
+          panic!("lexer plugin consumed 0 characters")
+        }
+        tokens.append(&mut new_tokens);
+        data = tail;
+        continue 'tail;
+      }
+    }
+    for (prefix, lexeme) in lit_table() {
+      if let Some(tail) = data.strip_prefix(prefix) {
+        tokens.push(Entry::new(ctx.range(prefix.len(), tail), lexeme.clone()));
+        data = tail;
+        continue 'tail;
+      }
+    }
+
+    if let Some(tail) = data.strip_prefix(',') {
+      tokens.push(Entry::new(ctx.range(1, tail), Lexeme::Name(i!(str: ","))));
+      data = tail;
+      continue 'tail;
+    }
+    if let Some(tail) = data.strip_prefix("--[") {
+      let (note, tail) = tail.split_once("]--").unwrap_or_else(|| {
+        ctx.reporter().report(NoCommentEnd.pack(ctx.source_range(tail.len(), "")));
+        (tail, "") // fallback: the rest of the file is in the comment
+      });
+      let lexeme = Lexeme::Comment(Arc::new(note.to_string()));
+      tokens.push(Entry::new(ctx.range(note.len() + 3, tail), lexeme));
+      data = tail;
+      continue 'tail;
+    }
+    if let Some(tail) = data.strip_prefix("--") {
+      let (note, tail) = split_filter(tail, |c| c != '\n');
+      let lexeme = Lexeme::Comment(Arc::new(note.to_string()));
+      tokens.push(Entry::new(ctx.range(note.len(), tail), lexeme));
+      data = tail;
+      continue 'tail;
+    }
+    // Parse a rule arrow
+    if let Some(tail) = data.strip_prefix('=') {
+      if tail.chars().next().map_or(false, numstart) {
+        let (num, post_num) = split_filter(tail, numchar);
+        if let Some(tail) = post_num.strip_prefix("=>") {
+          let prio = parse_num(num).unwrap_or_else(|e| {
+            ctx.reporter().report(e.into_proj(num.len(), post_num, ctx));
+            Numeric::Uint(0)
+          });
+          let lexeme = Lexeme::Arrow(prio.as_float());
+          tokens.push(Entry::new(ctx.range(num.len() + 3, tail), lexeme));
+          data = tail;
+          continue 'tail;
+        }
+      }
+    }
+    // Parse scalar placeholder $_name or $name
+    if let Some(tail) = data.strip_prefix('$') {
+      let (nameonly, tail) = tail.strip_prefix('_').map_or((false, tail), |t| (true, t));
+      let (name, tail) = split_filter(tail, namechar);
+      if !name.is_empty() {
+        let class = if nameonly { PHClass::Name } else { PHClass::Scalar };
+        let lexeme = Lexeme::Placeh(Placeholder { name: i(name), class });
+        tokens.push(Entry::new(ctx.range(name.len() + 1, tail), lexeme));
+        data = tail;
+        continue 'tail;
+      }
+    }
+    // Parse vectorial placeholder. `..` or `...`, then `$name`, then an optional
+    // `:n` where n is a number.
+    if let Some(tail) = data.strip_prefix("..") {
+      let (nonzero, tail) = tail.strip_prefix('.').map_or((false, tail), |t| (true, t));
+      if let Some(tail) = tail.strip_prefix('$') {
+        let (name, tail) = split_filter(tail, namechar);
+        if !name.is_empty() {
+          let (prio, priolen, tail) = tail
+            .strip_prefix(':')
+            .map(|tail| split_filter(tail, numchar))
+            .filter(|(num, _)| !num.is_empty())
+            .map(|(num_str, tail)| {
+              let p = ctx.reporter().fallback(
+                parse_num(num_str).map_err(|e| e.into_proj(num_str.len(), tail, ctx)).and_then(
+                  |num| match num {
+                    Numeric::Uint(usize) => Ok(usize),
+                    Numeric::Float(_) =>
+                      Err(FloatPlacehPrio.pack(ctx.source_range(num_str.len(), tail))),
+                  },
+                ),
+                |_| 0,
+              );
+              (p, num_str.len() + 1, tail)
+            })
+            .unwrap_or((0, 0, tail));
+          let byte_len = if nonzero { 4 } else { 3 } + priolen + name.len();
+          let class = PHClass::Vec { nonzero, prio };
+          let lexeme = Lexeme::Placeh(Placeholder { name: i(name), class });
+          tokens.push(Entry::new(ctx.range(byte_len, tail), lexeme));
+          data = tail;
+          continue 'tail;
+        }
+      }
+    }
+    if namestart(head) {
+      let (name, tail) = split_filter(data, namechar);
+      if !name.is_empty() {
+        let lexeme = Lexeme::Name(i(name));
+        tokens.push(Entry::new(ctx.range(name.len(), tail), lexeme));
+        data = tail;
+        continue 'tail;
+      }
+    }
+    if opchar(head) {
+      let (name, tail) = split_filter(data, opchar);
+      if !name.is_empty() {
+        let lexeme = Lexeme::Name(i(name));
+        tokens.push(Entry::new(ctx.range(name.len(), tail), lexeme));
+        data = tail;
+        continue 'tail;
+      }
+    }
+    unreachable!(r#"opchar is pretty much defined as "not namechar" "#)
+  }
+}
--- a/orchidlang/src/parse/mod.rs
+++ b/orchidlang/src/parse/mod.rs
@@ -0,0 +1,12 @@
+//! Parser, and abstractions for interacting with it from language extensions
+pub mod context;
+pub mod errors;
+pub mod facade;
+pub mod frag;
+pub mod lex_plugin;
+pub mod lexer;
+pub mod multiname;
+pub mod numeric;
+pub mod parse_plugin;
+pub mod parsed;
+mod sourcefile;
--- a/orchidlang/src/parse/multiname.rs
+++ b/orchidlang/src/parse/multiname.rs
@@ -0,0 +1,146 @@
+//! Parse the tree-like name sets used to represent imports
+
+use std::collections::VecDeque;
+use std::ops::Range;
+
+use intern_all::{i, Tok};
+
+use super::context::ParseCtx;
+use super::errors::{Expected, ParseErrorKind};
+use super::frag::Frag;
+use super::lexer::{Entry, Lexeme};
+use crate::error::ProjectResult;
+use crate::location::SourceRange;
+use crate::name::VPath;
+use crate::parse::parsed::{Import, PType};
+use crate::utils::boxed_iter::{box_chain, box_once, BoxedIter};
+
+struct Subresult {
+  glob: bool,
+  deque: VecDeque<Tok<String>>,
+  range: Range<usize>,
+}
+impl Subresult {
+  #[must_use]
+  fn new_glob(range: &Range<usize>) -> Self {
+    Self { glob: true, deque: VecDeque::new(), range: range.clone() }
+  }
+
+  #[must_use]
+  fn new_named(name: Tok<String>, range: &Range<usize>) -> Self {
+    Self { glob: false, deque: VecDeque::from([name]), range: range.clone() }
+  }
+
+  #[must_use]
+  fn push_front(mut self, name: Tok<String>) -> Self {
+    self.deque.push_front(name);
+    self
+  }
+
+  #[must_use]
+  fn finalize(self, ctx: &(impl ParseCtx + ?Sized)) -> Import {
+    let Self { mut deque, glob, range } = self;
+    debug_assert!(glob || !deque.is_empty(), "The constructors forbid this");
+    let name = if glob { None } else { deque.pop_back() };
+    let range = ctx.range_loc(&range);
+    Import { name, range, path: VPath(deque.into()) }
+  }
+}
+
+fn parse_multiname_branch<'a>(
+  cursor: Frag<'a>,
+  ctx: &(impl ParseCtx + ?Sized),
+) -> ProjectResult<(BoxedIter<'a, Subresult>, Frag<'a>)> {
+  let comma = i!(str: ",");
+  let (subnames, cursor) = parse_multiname_rec(cursor, ctx)?;
+  let (Entry { lexeme, range }, cursor) = cursor.trim().pop(ctx)?;
+  match &lexeme {
+    Lexeme::RP(PType::Par) => Ok((subnames, cursor)),
+    Lexeme::Name(n) if n == &comma => {
+      let (tail, cont) = parse_multiname_branch(cursor, ctx)?;
+      Ok((box_chain!(subnames, tail), cont))
+    },
+    _ => {
+      let expected = vec![Lexeme::Name(comma), Lexeme::RP(PType::Par)];
+      let err = Expected { expected, or_name: false, found: lexeme.clone() };
+      Err(err.pack(SourceRange { range: range.clone(), code: ctx.code_info() }))
+    },
+  }
+}
+
+fn parse_multiname_rec<'a>(
+  cursor: Frag<'a>,
+  ctx: &(impl ParseCtx + ?Sized),
+) -> ProjectResult<(BoxedIter<'a, Subresult>, Frag<'a>)> {
+  let (head, mut cursor) = cursor.trim().pop(ctx)?;
+  match &head.lexeme {
+    Lexeme::LP(PType::Par) => parse_multiname_branch(cursor, ctx),
+    Lexeme::LP(PType::Sqr) => {
+      let mut names = Vec::new();
+      loop {
+        let (Entry { lexeme, range }, tail) = cursor.trim().pop(ctx)?;
+        cursor = tail;
+        match lexeme {
+          Lexeme::Name(n) => names.push((n.clone(), range)),
+          Lexeme::RP(PType::Sqr) => break,
+          _ => {
+            let err = Expected {
+              expected: vec![Lexeme::RP(PType::Sqr)],
+              or_name: true,
+              found: head.lexeme.clone(),
+            };
+            return Err(err.pack(ctx.range_loc(range)));
+          },
+        }
+      }
+      Ok((
+        Box::new(
+          names.into_iter().map(|(name, location)| Subresult::new_named(name.clone(), location)),
+        ),
+        cursor,
+      ))
+    },
+    Lexeme::Name(n) if *n == i!(str: "*") =>
+      Ok((box_once(Subresult::new_glob(&head.range)), cursor)),
+    Lexeme::Name(n) if ![i!(str: ","), i!(str: "*")].contains(n) => {
+      let cursor = cursor.trim();
+      if cursor.get(0, ctx).map_or(false, |e| e.lexeme.strict_eq(&Lexeme::NS)) {
+        let cursor = cursor.step(ctx)?;
+        let (out, cursor) = parse_multiname_rec(cursor, ctx)?;
+        let out = Box::new(out.map(|sr| sr.push_front(n.clone())));
+        Ok((out, cursor))
+      } else {
+        Ok((box_once(Subresult::new_named(n.clone(), &head.range)), cursor))
+      }
+    },
+    _ => {
+      let expected = vec![Lexeme::LP(PType::Par)];
+      let err = Expected { expected, or_name: true, found: head.lexeme.clone() };
+      Err(err.pack(ctx.range_loc(&head.range)))
+    },
+  }
+}
+
+/// Parse a tree that describes several names. The tree can be
+///
+/// - name (except `,` or `*`)
+/// - name (except `,` or `*`) `::` tree
+/// - `(` tree `,` tree ... `)`
+/// - `*` (wildcard)
+/// - `[` name name ... `]` (including `,` or `*`).
+///
+/// Examples of valid syntax:
+///
+/// ```txt
+/// foo
+/// foo::bar::baz
+/// foo::bar::(baz, quz::quux, fimble::*)
+/// foo::bar::[baz quz * +]
+/// ```
+pub fn parse_multiname<'a>(
+  cursor: Frag<'a>,
+  ctx: &(impl ParseCtx + ?Sized),
+) -> ProjectResult<(Vec<Import>, Frag<'a>)> {
+  let (output, cont) = parse_multiname_rec(cursor, ctx)?;
+  Ok((output.map(|sr| sr.finalize(ctx)).collect(), cont))
+}
--- a/orchidlang/src/parse/numeric.rs
+++ b/orchidlang/src/parse/numeric.rs
@@ -0,0 +1,179 @@
+//! Parse a float or integer. These functions are also used for the macro
+//! priority numbers
+
+use std::num::IntErrorKind;
+use std::ops::Range;
+
+use ordered_float::NotNan;
+
+use super::context::ParseCtx;
+use super::errors::{ExpectedDigit, LiteralOverflow, NaNLiteral, ParseErrorKind};
+use super::lex_plugin::LexPluginReq;
+#[allow(unused)] // for doc
+use super::lex_plugin::LexerPlugin;
+use super::lexer::{split_filter, Entry, LexRes, Lexeme};
+use crate::error::{ProjectErrorObj, ProjectResult};
+use crate::foreign::atom::AtomGenerator;
+use crate::foreign::inert::Inert;
+use crate::libs::std::number::Numeric;
+
+/// Rasons why [parse_num] might fail. See [NumError].
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum NumErrorKind {
+  /// The literal describes [f64::NAN]
+  NaN,
+  /// Some integer appearing in the literal overflows [usize]
+  Overflow,
+  /// A character that isn't a digit in the given base was found
+  InvalidDigit,
+}
+impl NumErrorKind {
+  fn from_int(kind: &IntErrorKind) -> Self {
+    match kind {
+      IntErrorKind::InvalidDigit => Self::InvalidDigit,
+      IntErrorKind::NegOverflow | IntErrorKind::PosOverflow => Self::Overflow,
+      _ => panic!("Impossible error condition"),
+    }
+  }
+}
+
+/// Error produced by [parse_num]
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct NumError {
+  /// Location
+  pub range: Range<usize>,
+  /// Reason
+  pub kind: NumErrorKind,
+}
+
+impl NumError {
+  /// Convert into [ProjectErrorObj]
+  pub fn into_proj(
+    self,
+    len: usize,
+    tail: &str,
+    ctx: &(impl ParseCtx + ?Sized),
+  ) -> ProjectErrorObj {
+    let start = ctx.source().len() - tail.len() - len + self.range.start;
+    let location = ctx.range_loc(&(start..start + self.range.len()));
+    match self.kind {
+      NumErrorKind::NaN => NaNLiteral.pack(location),
+      NumErrorKind::InvalidDigit => ExpectedDigit.pack(location),
+      NumErrorKind::Overflow => LiteralOverflow.pack(location),
+    }
+  }
+}
+
+/// Parse a numbre literal out of text
+pub fn parse_num(string: &str) -> Result<Numeric, NumError> {
+  let overflow_err = NumError { range: 0..string.len(), kind: NumErrorKind::Overflow };
+  let (radix, noprefix, pos) = (string.strip_prefix("0x").map(|s| (16u8, s, 2)))
+    .or_else(|| string.strip_prefix("0b").map(|s| (2u8, s, 2)))
+    .or_else(|| string.strip_prefix("0o").map(|s| (8u8, s, 2)))
+    .unwrap_or((10u8, string, 0));
+  // identity
+  let (base, exponent) = match noprefix.split_once('p') {
+    Some((b, e)) => {
+      let (s, d, len) = e.strip_prefix('-').map_or((1, e, 0), |ue| (-1, ue, 1));
+      (b, s * int_parse(d, 10, pos + b.len() + 1 + len)? as i32)
+    },
+    None => (noprefix, 0),
+  };
+  match base.split_once('.') {
+    None => {
+      let base_usize = int_parse(base, radix, pos)?;
+      if let Ok(pos_exp) = u32::try_from(exponent) {
+        if let Some(radical) = usize::from(radix).checked_pow(pos_exp) {
+          let number = base_usize.checked_mul(radical).ok_or(overflow_err)?;
+          return Ok(Numeric::Uint(number));
+        }
+      }
+      let f = (base_usize as f64) * (radix as f64).powi(exponent);
+      let err = NumError { range: 0..string.len(), kind: NumErrorKind::NaN };
+      Ok(Numeric::Float(NotNan::new(f).map_err(|_| err)?))
+    },
+    Some((whole, part)) => {
+      let whole_n = int_parse(whole, radix, pos)? as f64;
+      let part_n = int_parse(part, radix, pos + whole.len() + 1)? as f64;
+      let real_val = whole_n + (part_n / (radix as f64).powi(part.len() as i32));
+      let f = real_val * (radix as f64).powi(exponent);
+      Ok(Numeric::Float(NotNan::new(f).expect("None of the inputs are NaN")))
+    },
+  }
+}
+
+fn int_parse(s: &str, radix: u8, start: usize) -> Result<usize, NumError> {
+  let s = s.chars().filter(|c| *c != '_').collect::<String>();
+  let range = start..(start + s.len());
+  usize::from_str_radix(&s, radix as u32)
+    .map_err(|e| NumError { range, kind: NumErrorKind::from_int(e.kind()) })
+}
+
+/// Filter for characters that can appear in numbers
+pub fn numchar(c: char) -> bool { c.is_alphanumeric() | "._-".contains(c) }
+/// Filter for characters that can start numbers
+pub fn numstart(c: char) -> bool { c.is_ascii_digit() }
+
+/// Print a number as a base-16 floating point literal
+#[must_use]
+pub fn print_nat16(num: NotNan<f64>) -> String {
+  if *num == 0.0 {
+    return "0x0".to_string();
+  } else if num.is_infinite() {
+    return match num.is_sign_positive() {
+      true => "Infinity".to_string(),
+      false => "-Infinity".to_string(),
+    };
+  } else if num.is_nan() {
+    return "NaN".to_string();
+  }
+  let exp = num.log(16.0).floor();
+  let man = *num / 16_f64.powf(exp);
+  format!("0x{man}p{exp:.0}")
+}
+
+/// [LexerPlugin] for a number literal
+#[derive(Clone)]
+pub struct NumericLexer;
+impl LexerPlugin for NumericLexer {
+  fn lex<'b>(&self, req: &'_ dyn LexPluginReq<'b>) -> Option<ProjectResult<LexRes<'b>>> {
+    req.tail().chars().next().filter(|c| numstart(*c)).map(|_| {
+      let (num_str, tail) = split_filter(req.tail(), numchar);
+      let ag = match parse_num(num_str) {
+        Ok(Numeric::Float(f)) => AtomGenerator::cloner(Inert(f)),
+        Ok(Numeric::Uint(i)) => AtomGenerator::cloner(Inert(i)),
+        Err(e) => return Err(e.into_proj(num_str.len(), tail, req.ctx())),
+      };
+      let range = req.ctx().range(num_str.len(), tail);
+      let entry = Entry { lexeme: Lexeme::Atom(ag), range };
+      Ok(LexRes { tail, tokens: vec![entry] })
+    })
+  }
+}
+
+#[cfg(test)]
+mod test {
+  use crate::libs::std::number::Numeric;
+  use crate::parse::numeric::parse_num;
+
+  #[test]
+  fn just_ints() {
+    let test = |s, n| assert_eq!(parse_num(s), Ok(Numeric::Uint(n)));
+    test("12345", 12345);
+    test("0xcafebabe", 0xcafebabe);
+    test("0o751", 0o751);
+    test("0b111000111", 0b111000111);
+  }
+
+  #[test]
+  fn decimals() {
+    let test = |s, n| assert_eq!(parse_num(s).map(|n| n.as_f64()), Ok(n));
+    test("3.1417", 3.1417);
+    test("3.1417", 3_f64 + 1417_f64 / 10000_f64);
+    test("0xf.cafe", 0xf as f64 + 0xcafe as f64 / 0x10000 as f64);
+    test("34p3", 34000f64);
+    test("0x2p3", (0x2 * 0x1000) as f64);
+    test("1.5p3", 1500f64);
+    test("0x2.5p3", (0x25 * 0x100) as f64);
+  }
+}
--- a/orchidlang/src/parse/parse_plugin.rs
+++ b/orchidlang/src/parse/parse_plugin.rs
@@ -0,0 +1,142 @@
+//! Abstractions for dynamic extensions to the parser that act across entries.
+//! Macros are the primary syntax extension  mechanism, but they only operate
+//! within a constant and can't interfere with name reproject.
+
+use std::ops::Range;
+
+use dyn_clone::DynClone;
+use intern_all::Tok;
+
+use super::context::ParseCtx;
+use super::errors::{expect, expect_block, expect_name};
+use super::facade::parse_entries;
+use super::frag::Frag;
+use super::lexer::{Entry, Lexeme};
+use super::parsed::{Constant, Expr, ModuleBlock, PType, Rule, SourceLine, SourceLineKind};
+use super::sourcefile::{
+  exprv_to_single, parse_const, parse_exprv, parse_line, parse_module, parse_module_body,
+  parse_nsname, parse_rule, split_lines,
+};
+use crate::error::{ProjectErrorObj, ProjectResult};
+use crate::location::SourceRange;
+use crate::name::VName;
+use crate::utils::boxed_iter::BoxedIter;
+
+/// Information and actions exposed to [ParseLinePlugin]. A plugin should never
+/// import and call the parser directly because it might be executed in a
+/// different version of the parser.
+pub trait ParsePluginReq<'t> {
+  // ################ Frag and ParseCtx ################
+
+  /// The token sequence this parser must parse
+  fn frag(&self) -> Frag;
+  /// Get the location of a fragment
+  fn frag_loc(&self, f: Frag) -> SourceRange;
+  /// Convert a numeric byte range into a location
+  fn range_loc(&self, r: Range<usize>) -> SourceRange;
+  /// Remove the first token of the fragment
+  fn pop<'a>(&self, f: Frag<'a>) -> ProjectResult<(&'a Entry, Frag<'a>)>;
+  /// Remove the last element of the fragment
+  fn pop_back<'a>(&self, f: Frag<'a>) -> ProjectResult<(&'a Entry, Frag<'a>)>;
+
+  // ################ Parser states ################
+
+  /// Split up the lines in a fragment. The fragment must outlive the iterator
+  /// and the request itself must outlive both
+  fn split_lines<'a: 'b, 'b>(&'b self, f: Frag<'a>) -> BoxedIter<'b, Frag<'a>>
+  where 't: 'b + 'a;
+  /// Parse a sequence of source lines separated by line breaks
+  fn parse_module_body(&self, frag: Frag) -> ProjectResult<Vec<SourceLine>>;
+  /// Parse a single source line. This returns a vector because plugins can
+  /// convert a single line into multiple entries
+  fn parse_line(&self, frag: Frag) -> ProjectResult<Vec<SourceLineKind>>;
+  /// Parse a macro rule `<exprv> =prio=> <exprv>`
+  fn parse_rule(&self, frag: Frag) -> ProjectResult<Rule>;
+  /// Parse a constant declaration `<name> := <exprv>`
+  fn parse_const(&self, frag: Frag) -> ProjectResult<Constant>;
+  /// Parse a namespaced name `name::name`
+  fn parse_nsname<'a>(&self, f: Frag<'a>) -> ProjectResult<(VName, Frag<'a>)>;
+  /// Parse a module declaration. `<name> ( <module_body> )`
+  fn parse_module(&self, frag: Frag) -> ProjectResult<ModuleBlock>;
+  /// Parse a sequence of expressions. In principle, it never makes sense to
+  /// parse a single expression because it could always be a macro invocation.
+  fn parse_exprv<'a>(&self, f: Frag<'a>, p: Option<PType>) -> ProjectResult<(Vec<Expr>, Frag<'a>)>;
+  /// Parse a prepared string of code
+  fn parse_entries(&self, t: &'static str, r: SourceRange) -> Vec<SourceLine>;
+  /// Convert a sequence of expressions to a single one by parenthesization if
+  /// necessary
+  fn vec_to_single(&self, fallback: &Entry, v: Vec<Expr>) -> ProjectResult<Expr>;
+
+  // ################ Assertions ################
+
+  /// Unwrap a single name token or raise an error
+  fn expect_name(&self, entry: &Entry) -> ProjectResult<Tok<String>>;
+  /// Assert that the entry contains exactly the specified lexeme
+  fn expect(&self, l: Lexeme, e: &Entry) -> ProjectResult<()>;
+  /// Remove two parentheses from the ends of the cursor
+  fn expect_block<'a>(&self, f: Frag<'a>, p: PType) -> ProjectResult<Frag<'a>>;
+  /// Ensure that the fragment is empty
+  fn expect_empty(&self, f: Frag) -> ProjectResult<()>;
+  /// Report a fatal error while also producing output to be consumed by later
+  /// stages for improved error reporting
+  fn report_err(&self, e: ProjectErrorObj);
+}
+
+/// External plugin that parses an unrecognized source line into lines of
+/// recognized types
+pub trait ParseLinePlugin: Sync + Send + DynClone {
+  /// Attempt to parse a line. Returns [None] if the line isn't recognized,
+  /// [Some][Err] if it's recognized but incorrect.
+  fn parse(&self, req: &dyn ParsePluginReq) -> Option<ProjectResult<Vec<SourceLineKind>>>;
+}
+
+/// Implementation of [ParsePluginReq] exposing sub-parsers and data to the
+/// plugin via dynamic dispatch
+pub struct ParsePlugReqImpl<'a, TCtx: ParseCtx + ?Sized> {
+  /// Fragment of text to be parsed by the plugin
+  pub frag: Frag<'a>,
+  /// Context for recursive commands and to expose to the plugin
+  pub ctx: &'a TCtx,
+}
+impl<'ty, TCtx: ParseCtx + ?Sized> ParsePluginReq<'ty> for ParsePlugReqImpl<'ty, TCtx> {
+  fn frag(&self) -> Frag { self.frag }
+  fn frag_loc(&self, f: Frag) -> SourceRange { self.range_loc(f.range()) }
+  fn range_loc(&self, r: Range<usize>) -> SourceRange { self.ctx.range_loc(&r) }
+  fn pop<'a>(&self, f: Frag<'a>) -> ProjectResult<(&'a Entry, Frag<'a>)> { f.pop(self.ctx) }
+  fn pop_back<'a>(&self, f: Frag<'a>) -> ProjectResult<(&'a Entry, Frag<'a>)> {
+    f.pop_back(self.ctx)
+  }
+  fn split_lines<'a: 'b, 'b>(&'b self, f: Frag<'a>) -> BoxedIter<'b, Frag<'a>>
+  where
+    'ty: 'b,
+    'ty: 'a,
+  {
+    Box::new(split_lines(f, self.ctx))
+  }
+  fn parse_module_body(&self, f: Frag) -> ProjectResult<Vec<SourceLine>> {
+    Ok(parse_module_body(f, self.ctx))
+  }
+  fn parse_line(&self, f: Frag) -> ProjectResult<Vec<SourceLineKind>> { parse_line(f, self.ctx) }
+  fn parse_rule(&self, f: Frag) -> ProjectResult<Rule> { parse_rule(f, self.ctx) }
+  fn parse_const(&self, f: Frag) -> ProjectResult<Constant> { parse_const(f, self.ctx) }
+  fn parse_nsname<'a>(&self, f: Frag<'a>) -> ProjectResult<(VName, Frag<'a>)> {
+    parse_nsname(f, self.ctx)
+  }
+  fn parse_module(&self, f: Frag) -> ProjectResult<ModuleBlock> { parse_module(f, self.ctx) }
+  fn parse_exprv<'a>(&self, f: Frag<'a>, p: Option<PType>) -> ProjectResult<(Vec<Expr>, Frag<'a>)> {
+    parse_exprv(f, p, self.ctx)
+  }
+  fn parse_entries(&self, s: &'static str, r: SourceRange) -> Vec<SourceLine> {
+    parse_entries(&self.ctx, s, r)
+  }
+  fn vec_to_single(&self, fb: &Entry, v: Vec<Expr>) -> ProjectResult<Expr> {
+    exprv_to_single(fb, v, self.ctx)
+  }
+  fn expect_name(&self, e: &Entry) -> ProjectResult<Tok<String>> { expect_name(e, self.ctx) }
+  fn expect(&self, l: Lexeme, e: &Entry) -> ProjectResult<()> { expect(l, e, self.ctx) }
+  fn expect_block<'a>(&self, f: Frag<'a>, t: PType) -> ProjectResult<Frag<'a>> {
+    expect_block(f, t, self.ctx)
+  }
+  fn expect_empty(&self, f: Frag) -> ProjectResult<()> { f.expect_empty(self.ctx) }
+  fn report_err(&self, e: ProjectErrorObj) { self.ctx.reporter().report(e) }
+}
--- a/orchidlang/src/parse/parsed.rs
+++ b/orchidlang/src/parse/parsed.rs
@@ -0,0 +1,507 @@
+//! Datastructures representing the units of macro execution
+//!
+//! These structures are produced by the pipeline, processed by the macro
+//! executor, and then converted to other usable formats.
+
+use std::fmt;
+use std::hash::Hash;
+use std::rc::Rc;
+
+use hashbrown::HashSet;
+use intern_all::Tok;
+use itertools::Itertools;
+use ordered_float::NotNan;
+
+use crate::foreign::atom::AtomGenerator;
+#[allow(unused)] // for doc
+use crate::interpreter::nort;
+use crate::location::SourceRange;
+use crate::name::{Sym, VName, VPath};
+use crate::parse::numeric::print_nat16;
+
+/// A [Clause] with associated metadata
+#[derive(Clone, Debug)]
+pub struct Expr {
+  /// The actual value
+  pub value: Clause,
+  /// Information about the code that produced this value
+  pub range: SourceRange,
+}
+
+impl Expr {
+  /// Process all names with the given mapper.
+  /// Return a new object if anything was processed
+  #[must_use]
+  pub fn map_names(&self, pred: &mut impl FnMut(Sym) -> Option<Sym>) -> Option<Self> {
+    (self.value.map_names(pred)).map(|value| Self { value, range: self.range.clone() })
+  }
+
+  /// Visit all expressions in the tree. The search can be exited early by
+  /// returning [Some]
+  ///
+  /// See also [crate::interpreter::nort::Expr::search_all]
+  pub fn search_all<T>(&self, f: &mut impl FnMut(&Self) -> Option<T>) -> Option<T> {
+    f(self).or_else(|| self.value.search_all(f))
+  }
+}
+
+/// Visit all expression sequences including this sequence itself.
+pub fn search_all_slcs<T>(this: &[Expr], f: &mut impl FnMut(&[Expr]) -> Option<T>) -> Option<T> {
+  f(this).or_else(|| this.iter().find_map(|expr| expr.value.search_all_slcs(f)))
+}
+
+impl Expr {
+  /// Add the specified prefix to every Name
+  #[must_use]
+  pub fn prefix(&self, prefix: &[Tok<String>], except: &impl Fn(Tok<String>) -> bool) -> Self {
+    Self { value: self.value.prefix(prefix, except), range: self.range.clone() }
+  }
+}
+
+impl fmt::Display for Expr {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.value.fmt(f) }
+}
+
+/// Various types of placeholders
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum PHClass {
+  /// Matches multiple tokens, lambdas or parenthesized groups
+  Vec {
+    /// If true, must match at least one clause
+    nonzero: bool,
+    /// Greediness in the allocation of tokens
+    prio: usize,
+  },
+  /// Matches exactly one token, lambda or parenthesized group
+  Scalar,
+  /// Matches exactly one name
+  Name,
+}
+
+/// Properties of a placeholder that matches unknown tokens in macros
+#[derive(Clone, Debug, PartialEq, Eq, Hash)]
+pub struct Placeholder {
+  /// Identifier to pair placeholders in the pattern and template
+  pub name: Tok<String>,
+  /// The nature of the token set matched by this placeholder
+  pub class: PHClass,
+}
+
+impl fmt::Display for Placeholder {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    let name = &self.name;
+    match self.class {
+      PHClass::Scalar => write!(f, "${name}"),
+      PHClass::Name => write!(f, "$_{name}"),
+      PHClass::Vec { nonzero, prio } => {
+        if nonzero { write!(f, "...") } else { write!(f, "..") }?;
+        write!(f, "${name}:{prio}")
+      },
+    }
+  }
+}
+
+/// Different types of brackets supported by Orchid
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
+pub enum PType {
+  /// ()
+  Par,
+  /// []
+  Sqr,
+  /// {}
+  Curl,
+}
+impl PType {
+  /// Left paren character for this paren type
+  pub fn l(self) -> char {
+    match self {
+      PType::Curl => '{',
+      PType::Par => '(',
+      PType::Sqr => '[',
+    }
+  }
+
+  /// Right paren character for this paren type
+  pub fn r(self) -> char {
+    match self {
+      PType::Curl => '}',
+      PType::Par => ')',
+      PType::Sqr => ']',
+    }
+  }
+}
+
+/// An S-expression as read from a source file
+#[derive(Debug, Clone)]
+pub enum Clause {
+  /// An opaque non-callable value, eg. a file handle
+  Atom(AtomGenerator),
+  /// A c-style name or an operator, eg. `+`, `i`, `foo::bar`
+  Name(Sym),
+  /// A parenthesized expression
+  /// eg. `(print out "hello")`, `[1, 2, 3]`, `{Some(t) => t}`
+  S(PType, Rc<Vec<Expr>>),
+  /// A function expression, eg. `\x. x + 1`
+  Lambda(Rc<Vec<Expr>>, Rc<Vec<Expr>>),
+  /// A placeholder for macros, eg. `$name`, `...$body`, `...$lhs:1`
+  Placeh(Placeholder),
+}
+
+impl Clause {
+  /// Extract the expressions from an auto, lambda or S
+  #[must_use]
+  pub fn body(&self) -> Option<Rc<Vec<Expr>>> {
+    match self {
+      Self::Lambda(_, body) | Self::S(_, body) => Some(body.clone()),
+      _ => None,
+    }
+  }
+
+  /// Convert with identical meaning
+  #[must_use]
+  pub fn into_expr(self, range: SourceRange) -> Expr {
+    if let Self::S(PType::Par, body) = &self {
+      if let [wrapped] = &body[..] {
+        return wrapped.clone();
+      }
+    }
+    Expr { value: self, range }
+  }
+
+  /// Convert with identical meaning
+  #[must_use]
+  pub fn from_exprs(exprs: &[Expr]) -> Option<Self> {
+    match exprs {
+      [] => None,
+      [only] => Some(only.value.clone()),
+      _ => Some(Self::S(PType::Par, Rc::new(exprs.to_vec()))),
+    }
+  }
+
+  /// Convert with identical meaning
+  #[must_use]
+  pub fn from_exprv(exprv: &Rc<Vec<Expr>>) -> Option<Clause> {
+    if exprv.len() < 2 { Self::from_exprs(exprv) } else { Some(Self::S(PType::Par, exprv.clone())) }
+  }
+
+  /// Collect all names that appear in this expression.
+  /// NOTICE: this isn't the total set of unbound names, it's mostly useful to
+  /// make weak statements for optimization.
+  #[must_use]
+  pub fn collect_names(&self) -> HashSet<Sym> {
+    if let Self::Name(n) = self {
+      return HashSet::from([n.clone()]);
+    }
+    let mut glossary = HashSet::new();
+    let result = self.search_all(&mut |e| {
+      if let Clause::Name(n) = &e.value {
+        glossary.insert(n.clone());
+      }
+      None::<()>
+    });
+    assert!(result.is_none(), "Callback never returns Some");
+    glossary
+  }
+
+  /// Process all names with the given mapper.
+  /// Return a new object if anything was processed
+  #[must_use]
+  pub fn map_names(&self, pred: &mut impl FnMut(Sym) -> Option<Sym>) -> Option<Self> {
+    match self {
+      Clause::Atom(_) | Clause::Placeh(_) => None,
+      Clause::Name(name) => pred(name.clone()).map(Clause::Name),
+      Clause::S(c, body) => {
+        let mut any_some = false;
+        let new_body = body
+          .iter()
+          .map(|e| {
+            let val = e.map_names(pred);
+            any_some |= val.is_some();
+            val.unwrap_or_else(|| e.clone())
+          })
+          .collect();
+        if any_some { Some(Clause::S(*c, Rc::new(new_body))) } else { None }
+      },
+      Clause::Lambda(arg, body) => {
+        let mut any_some = false;
+        let new_arg = (arg.iter())
+          .map(|e| {
+            let val = e.map_names(pred);
+            any_some |= val.is_some();
+            val.unwrap_or_else(|| e.clone())
+          })
+          .collect();
+        let new_body = (body.iter())
+          .map(|e| {
+            let val = e.map_names(pred);
+            any_some |= val.is_some();
+            val.unwrap_or_else(|| e.clone())
+          })
+          .collect();
+        if any_some { Some(Clause::Lambda(Rc::new(new_arg), Rc::new(new_body))) } else { None }
+      },
+    }
+  }
+
+  /// Pair of [Expr::search_all]
+  pub fn search_all<T>(&self, f: &mut impl FnMut(&Expr) -> Option<T>) -> Option<T> {
+    match self {
+      Clause::Lambda(arg, body) =>
+        arg.iter().chain(body.iter()).find_map(|expr| expr.search_all(f)),
+      Clause::Name(_) | Clause::Atom(_) | Clause::Placeh(_) => None,
+      Clause::S(_, body) => body.iter().find_map(|expr| expr.search_all(f)),
+    }
+  }
+
+  /// Visit all expression sequences. Most useful when looking for some pattern
+  pub fn search_all_slcs<T>(&self, f: &mut impl FnMut(&[Expr]) -> Option<T>) -> Option<T> {
+    match self {
+      Clause::Lambda(arg, body) => search_all_slcs(arg, f).or_else(|| search_all_slcs(body, f)),
+      Clause::Name(_) | Clause::Atom(_) | Clause::Placeh(_) => None,
+      Clause::S(_, body) => search_all_slcs(body, f),
+    }
+  }
+
+  /// Generate a parenthesized expression sequence
+  pub fn s(delimiter: char, body: impl IntoIterator<Item = Self>, range: SourceRange) -> Self {
+    let ptype = match delimiter {
+      '(' => PType::Par,
+      '[' => PType::Sqr,
+      '{' => PType::Curl,
+      _ => panic!("not an opening paren"),
+    };
+    let body = body.into_iter().map(|it| it.into_expr(range.clone())).collect();
+    Self::S(ptype, Rc::new(body))
+  }
+}
+
+impl Clause {
+  /// Add the specified prefix to every Name
+  #[must_use]
+  pub fn prefix(&self, prefix: &[Tok<String>], except: &impl Fn(Tok<String>) -> bool) -> Self {
+    self
+      .map_names(&mut |name| match except(name[0].clone()) {
+        true => None,
+        false => {
+          let prefixed = prefix.iter().cloned().chain(name.iter()).collect::<Vec<_>>();
+          Some(Sym::from_tok(name.tok().interner().i(&prefixed)).unwrap())
+        },
+      })
+      .unwrap_or_else(|| self.clone())
+  }
+}
+
+impl fmt::Display for Clause {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    match self {
+      Self::Atom(a) => write!(f, "{a:?}"),
+      Self::Name(name) => write!(f, "{}", name),
+      Self::S(t, items) => {
+        let body = items.iter().join(" ");
+        write!(f, "{}{body}{}", t.l(), t.r())
+      },
+      Self::Lambda(arg, body) => {
+        let args = arg.iter().join(" ");
+        let bodys = body.iter().join(" ");
+        write!(f, "\\{args}.{bodys}")
+      },
+      Self::Placeh(ph) => ph.fmt(f),
+    }
+  }
+}
+
+/// A substitution rule as loaded from source
+#[derive(Debug, Clone)]
+pub struct Rule {
+  /// Expressions on the left side of the arrow
+  pub pattern: Vec<Expr>,
+  /// Priority number written inside the arrow
+  pub prio: NotNan<f64>,
+  /// Expressions on the right side of the arrow
+  pub template: Vec<Expr>,
+}
+
+impl fmt::Display for Rule {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    write!(
+      f,
+      "rule {} ={}=> {}",
+      self.pattern.iter().join(" "),
+      print_nat16(self.prio),
+      self.template.iter().join(" ")
+    )
+  }
+}
+
+/// A named constant
+#[derive(Debug, Clone)]
+pub struct Constant {
+  /// Used to reference the constant
+  pub name: Tok<String>,
+  /// The constant value inserted where the name is found
+  pub value: Expr,
+}
+
+impl fmt::Display for Constant {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    write!(f, "const {} := {}", *self.name, self.value)
+  }
+}
+
+/// An import pointing at another module, either specifying the symbol to be
+/// imported or importing all available symbols with a globstar (*)
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct Import {
+  /// Import path, a sequence of module names. Can either start with
+  ///
+  /// - `self` to reference the current module
+  /// - any number of `super` to reference the parent module of the implied
+  ///   `self`
+  /// - a root name
+  pub path: VPath,
+  /// If name is None, this is a wildcard import
+  pub name: Option<Tok<String>>,
+  /// Location of the final name segment, which uniquely identifies this name
+  pub range: SourceRange,
+}
+impl Import {
+  /// Constructor
+  pub fn new(
+    path: impl IntoIterator<Item = Tok<String>>,
+    name: Option<Tok<String>>,
+    range: SourceRange,
+  ) -> Self {
+    let path = VPath(path.into_iter().collect());
+    assert!(name.is_some() || !path.0.is_empty(), "import * not allowed");
+    Self { range, name, path }
+  }
+
+  /// Get the preload target space for this import - the prefix below
+  /// which all files should be included in the compilation
+  ///
+  /// Returns the path if this is a glob import, or the path plus the
+  /// name if this is a specific import
+  #[must_use]
+  pub fn nonglob_path(&self) -> VName {
+    VName::new(self.path.0.iter().chain(&self.name).cloned())
+      .expect("Everything import (`import *`) not allowed")
+  }
+}
+
+impl fmt::Display for Import {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    match &self.name {
+      None => write!(f, "{}::*", self.path),
+      Some(n) => write!(f, "{}::{}", self.path, n),
+    }
+  }
+}
+
+/// A namespace block
+#[derive(Debug, Clone)]
+pub struct ModuleBlock {
+  /// Name prefixed to all names in the block
+  pub name: Tok<String>,
+  /// Prefixed entries
+  pub body: Vec<SourceLine>,
+}
+
+impl fmt::Display for ModuleBlock {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    let bodys = self.body.iter().map(|e| e.to_string()).join("\n");
+    write!(f, "module {} {{\n{}\n}}", self.name, bodys)
+  }
+}
+
+/// see [Member]
+#[derive(Debug, Clone)]
+pub enum MemberKind {
+  /// A substitution rule. Rules apply even when they're not in scope, if the
+  /// absolute names are present eg. because they're produced by other rules
+  Rule(Rule),
+  /// A constant (or function) associated with a name
+  Constant(Constant),
+  /// A prefixed set of other entries
+  Module(ModuleBlock),
+}
+impl MemberKind {
+  /// Convert to [SourceLine]
+  pub fn into_line(self, exported: bool, range: SourceRange) -> SourceLine {
+    SourceLineKind::Member(Member { exported, kind: self }).wrap(range)
+  }
+}
+
+impl fmt::Display for MemberKind {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    match self {
+      Self::Constant(c) => c.fmt(f),
+      Self::Module(m) => m.fmt(f),
+      Self::Rule(r) => r.fmt(f),
+    }
+  }
+}
+
+/// Things that may be prefixed with an export
+/// see [MemberKind]
+#[derive(Debug, Clone)]
+pub struct Member {
+  /// Various members
+  pub kind: MemberKind,
+  /// Whether this member is exported or not
+  pub exported: bool,
+}
+
+impl fmt::Display for Member {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    match self {
+      Self { exported: true, kind } => write!(f, "export {kind}"),
+      Self { exported: false, kind } => write!(f, "{kind}"),
+    }
+  }
+}
+
+/// See [SourceLine]
+#[derive(Debug, Clone)]
+pub enum SourceLineKind {
+  /// Imports one or all names in a module
+  Import(Vec<Import>),
+  /// Comments are kept here in case dev tooling wants to parse documentation
+  Comment(String),
+  /// An element with visibility information
+  Member(Member),
+  /// A list of tokens exported explicitly. This can also create new exported
+  /// tokens that the local module doesn't actually define a role for
+  Export(Vec<(Tok<String>, SourceRange)>),
+}
+impl SourceLineKind {
+  /// Wrap with no location
+  pub fn wrap(self, range: SourceRange) -> SourceLine { SourceLine { kind: self, range } }
+}
+
+impl fmt::Display for SourceLineKind {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+    match self {
+      Self::Comment(s) => write!(f, "--[{s}]--"),
+      Self::Export(s) => {
+        write!(f, "export ::({})", s.iter().map(|t| &**t.0).join(", "))
+      },
+      Self::Member(member) => write!(f, "{member}"),
+      Self::Import(i) => {
+        write!(f, "import ({})", i.iter().map(|i| i.to_string()).join(", "))
+      },
+    }
+  }
+}
+
+/// Anything the parser might encounter in a file. See [SourceLineKind]
+#[derive(Debug, Clone)]
+pub struct SourceLine {
+  /// What we encountered
+  pub kind: SourceLineKind,
+  /// Where we encountered it.
+  pub range: SourceRange,
+}
+
+impl fmt::Display for SourceLine {
+  fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { self.kind.fmt(f) }
+}
--- a/orchidlang/src/parse/sourcefile.rs
+++ b/orchidlang/src/parse/sourcefile.rs
@@ -0,0 +1,313 @@
+//! Internal states of the parser.
+
+use std::iter;
+use std::rc::Rc;
+
+use intern_all::i;
+use itertools::Itertools;
+
+use super::context::ParseCtx;
+use super::errors::{
+  expect, expect_block, expect_name, BadTokenInRegion, ExpectedSingleName, GlobExport, LeadingNS,
+  MisalignedParen, NamespacedExport, ParseErrorKind, ReservedToken, UnexpectedEOL,
+};
+use super::frag::Frag;
+use super::lexer::{Entry, Lexeme};
+use super::multiname::parse_multiname;
+use super::parse_plugin::ParsePlugReqImpl;
+use crate::error::ProjectResult;
+use crate::name::VName;
+use crate::parse::parsed::{
+  Clause, Constant, Expr, Import, Member, MemberKind, ModuleBlock, PType, Rule, SourceLine,
+  SourceLineKind,
+};
+use crate::sym;
+
+/// Split the fragment at each line break outside parentheses
+pub fn split_lines<'a>(
+  module: Frag<'a>,
+  ctx: &'a (impl ParseCtx + ?Sized),
+) -> impl Iterator<Item = Frag<'a>> {
+  let mut source = module.data.iter().enumerate();
+  let mut fallback = module.fallback;
+  let mut last_slice = 0;
+  let mut finished = false;
+  iter::from_fn(move || {
+    let mut paren_count = 0;
+    for (i, Entry { lexeme, .. }) in source.by_ref() {
+      match lexeme {
+        Lexeme::LP(_) => paren_count += 1,
+        Lexeme::RP(_) => paren_count -= 1,
+        Lexeme::BR if paren_count == 0 => {
+          let begin = last_slice;
+          last_slice = i + 1;
+          let cur_prev = fallback;
+          fallback = &module.data[i];
+          return Some(Frag::new(cur_prev, &module.data[begin..i]));
+        },
+        _ => (),
+      }
+    }
+    // Include last line even without trailing newline
+    if !finished {
+      finished = true;
+      return Some(Frag::new(fallback, &module.data[last_slice..]));
+    }
+    None
+  })
+  .map(Frag::trim)
+  .map(|s| {
+    match s.pop(ctx).and_then(|(f, i)| i.pop_back(ctx).map(|(l, i)| (&f.lexeme, i, &l.lexeme))) {
+      Ok((Lexeme::LP(PType::Par), inner, Lexeme::RP(PType::Par))) => inner.trim(),
+      _ => s,
+    }
+  })
+  .filter(|l| !l.data.is_empty())
+}
+
+/// Parse linebreak-separated entries
+pub fn parse_module_body(cursor: Frag<'_>, ctx: &(impl ParseCtx + ?Sized)) -> Vec<SourceLine> {
+  let mut lines = Vec::new();
+  for l in split_lines(cursor, ctx) {
+    let kinds = ctx.reporter().fallback(parse_line(l, ctx), |_| vec![]);
+    let r = ctx.range_loc(&l.range());
+    lines.extend(kinds.into_iter().map(|kind| SourceLine { range: r.clone(), kind }));
+  }
+  lines
+}
+
+/// Parse a single, possibly exported entry
+pub fn parse_line(
+  cursor: Frag<'_>,
+  ctx: &(impl ParseCtx + ?Sized),
+) -> ProjectResult<Vec<SourceLineKind>> {
+  let req = ParsePlugReqImpl { ctx, frag: cursor };
+  for line_parser in ctx.line_parsers() {
+    if let Some(result) = line_parser.parse(&req) {
+      return result;
+    }
+  }
+  let head = cursor.get(0, ctx)?;
+  match &head.lexeme {
+    Lexeme::Comment(cmt) => cmt.strip_prefix('|').and_then(|c| c.strip_suffix('|')).map_or_else(
+      || parse_line(cursor.step(ctx)?, ctx),
+      |cmt| Ok(vec![SourceLineKind::Comment(cmt.to_string())]),
+    ),
+    Lexeme::BR => parse_line(cursor.step(ctx)?, ctx),
+    Lexeme::Name(n) if **n == "export" =>
+      parse_export_line(cursor.step(ctx)?, ctx).map(|k| vec![k]),
+    Lexeme::Name(n) if ["const", "macro", "module"].contains(&n.as_str()) => {
+      let member = Member { exported: false, kind: parse_member(cursor, ctx)? };
+      Ok(vec![SourceLineKind::Member(member)])
+    },
+    Lexeme::Name(n) if **n == "import" => {
+      let (imports, cont) = parse_multiname(cursor.step(ctx)?, ctx)?;
+      cont.expect_empty(ctx)?;
+      Ok(vec![SourceLineKind::Import(imports)])
+    },
+    lexeme => {
+      let lexeme = lexeme.clone();
+      Err(BadTokenInRegion { lexeme, region: "start of line" }.pack(ctx.range_loc(&head.range)))
+    },
+  }
+}
+
+fn parse_export_line(
+  cursor: Frag<'_>,
+  ctx: &(impl ParseCtx + ?Sized),
+) -> ProjectResult<SourceLineKind> {
+  let cursor = cursor.trim();
+  let head = cursor.get(0, ctx)?;
+  match &head.lexeme {
+    Lexeme::NS => {
+      let (names, cont) = parse_multiname(cursor.step(ctx)?, ctx)?;
+      cont.expect_empty(ctx)?;
+      let names = (names.into_iter())
+        .map(|Import { name, path, range }| match name {
+          Some(n) if path.is_empty() => Ok((n, range)),
+          Some(_) => Err(NamespacedExport.pack(range)),
+          None => Err(GlobExport.pack(range)),
+        })
+        .collect::<Result<Vec<_>, _>>()?;
+      Ok(SourceLineKind::Export(names))
+    },
+    Lexeme::Name(n) if ["const", "macro", "module"].contains(&n.as_str()) =>
+      Ok(SourceLineKind::Member(Member { kind: parse_member(cursor, ctx)?, exported: true })),
+    lexeme => {
+      let lexeme = lexeme.clone();
+      let err = BadTokenInRegion { lexeme, region: "exported line" };
+      Err(err.pack(ctx.range_loc(&head.range)))
+    },
+  }
+}
+
+fn parse_member(cursor: Frag<'_>, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<MemberKind> {
+  let (typemark, cursor) = cursor.trim().pop(ctx)?;
+  match &typemark.lexeme {
+    Lexeme::Name(n) if **n == "const" => {
+      let constant = parse_const(cursor, ctx)?;
+      Ok(MemberKind::Constant(constant))
+    },
+    Lexeme::Name(n) if **n == "macro" => {
+      let rule = parse_rule(cursor, ctx)?;
+      Ok(MemberKind::Rule(rule))
+    },
+    Lexeme::Name(n) if **n == "module" => {
+      let module = parse_module(cursor, ctx)?;
+      Ok(MemberKind::Module(module))
+    },
+    lexeme => {
+      let lexeme = lexeme.clone();
+      let err = BadTokenInRegion { lexeme, region: "member type" };
+      Err(err.pack(ctx.range_loc(&typemark.range)))
+    },
+  }
+}
+
+/// Parse a macro rule
+pub fn parse_rule(cursor: Frag<'_>, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<Rule> {
+  let (pattern, prio, template) = cursor.find_map("arrow", ctx, |a| match a {
+    Lexeme::Arrow(p) => Some(*p),
+    _ => None,
+  })?;
+  let (pattern, _) = parse_exprv(pattern, None, ctx)?;
+  let (template, _) = parse_exprv(template, None, ctx)?;
+  Ok(Rule { pattern, prio, template })
+}
+
+/// Parse a constant declaration
+pub fn parse_const(cursor: Frag<'_>, ctx: &(impl ParseCtx + ?Sized)) -> ProjectResult<Constant> {
+  let (name_ent, cursor) = cursor.trim().pop(ctx)?;
+  let name = expect_name(name_ent, ctx)?;
+  let (walrus_ent, cursor) = cursor.trim().pop(ctx)?;
+  expect(Lexeme::Walrus, walrus_ent, ctx)?;
+  let value = ctx.reporter().fallback(
+    parse_exprv(cursor, None, ctx).and_then(|(body, _)| exprv_to_single(walrus_ent, body, ctx)),
+    |_| Clause::Name(sym!(__syntax_error__)).into_expr(ctx.range_loc(&cursor.range())),
+  );
+  Ok(Constant { name, value })
+}
+
+/// Parse a namespaced name. TODO: use this for modules
+pub fn parse_nsname<'a>(
+  cursor: Frag<'a>,
+  ctx: &(impl ParseCtx + ?Sized),
+) -> ProjectResult<(VName, Frag<'a>)> {
+  let (name, tail) = parse_multiname(cursor, ctx)?;
+  match name.into_iter().exactly_one() {
+    Ok(Import { name: Some(name), path, .. }) =>
+      Ok((VName::new([name]).unwrap().prefix(path), tail)),
+    Err(_) | Ok(Import { name: None, .. }) => {
+      let range = cursor.data[0].range.start..tail.data[0].range.end;
+      Err(ExpectedSingleName.pack(ctx.range_loc(&range)))
+    },
+  }
+}
+
+/// Parse a submodule declaration
+pub fn parse_module(
+  cursor: Frag<'_>,
+  ctx: &(impl ParseCtx + ?Sized),
+) -> ProjectResult<ModuleBlock> {
+  let (name_ent, cursor) = cursor.trim().pop(ctx)?;
+  let name = expect_name(name_ent, ctx)?;
+  let body = expect_block(cursor, PType::Par, ctx)?;
+  Ok(ModuleBlock { name, body: parse_module_body(body, ctx) })
+}
+
+/// Parse a sequence of expressions
+pub fn parse_exprv<'a>(
+  mut cursor: Frag<'a>,
+  paren: Option<PType>,
+  ctx: &(impl ParseCtx + ?Sized),
+) -> ProjectResult<(Vec<Expr>, Frag<'a>)> {
+  let mut output = Vec::new();
+  cursor = cursor.trim();
+  while let Ok(current) = cursor.get(0, ctx) {
+    match &current.lexeme {
+      Lexeme::BR | Lexeme::Comment(_) => unreachable!("Fillers skipped"),
+      Lexeme::At | Lexeme::Type => {
+        let err = ReservedToken(current.lexeme.clone());
+        return Err(err.pack(ctx.range_loc(&current.range)));
+      },
+      Lexeme::Atom(a) => {
+        let value = Clause::Atom(a.clone());
+        output.push(Expr { value, range: ctx.range_loc(&current.range) });
+        cursor = cursor.step(ctx)?;
+      },
+      Lexeme::Placeh(ph) => {
+        output
+          .push(Expr { value: Clause::Placeh(ph.clone()), range: ctx.range_loc(&current.range) });
+        cursor = cursor.step(ctx)?;
+      },
+      Lexeme::Name(n) => {
+        let mut range = ctx.range_loc(&current.range);
+        let mut fullname = VName::new([n.clone()]).unwrap();
+        while cursor.get(1, ctx).is_ok_and(|e| e.lexeme.strict_eq(&Lexeme::NS)) {
+          let next_seg = cursor.get(2, ctx)?;
+          range.range.end = next_seg.range.end;
+          fullname = fullname.suffix([expect_name(next_seg, ctx)?]);
+          cursor = cursor.step(ctx)?.step(ctx)?;
+        }
+        let clause = Clause::Name(fullname.to_sym());
+        output.push(Expr { value: clause, range });
+        cursor = cursor.step(ctx)?;
+      },
+      Lexeme::NS => return Err(LeadingNS.pack(ctx.range_loc(&current.range))),
+      Lexeme::RP(c) => match paren {
+        Some(exp_c) if exp_c == *c => return Ok((output, cursor.step(ctx)?)),
+        _ => {
+          let err = MisalignedParen(current.lexeme.clone());
+          return Err(err.pack(ctx.range_loc(&current.range)));
+        },
+      },
+      Lexeme::LP(c) => {
+        let (result, leftover) = parse_exprv(cursor.step(ctx)?, Some(*c), ctx)?;
+        let range = current.range.start..leftover.fallback.range.end;
+        let value = Clause::S(*c, Rc::new(result));
+        output.push(Expr { value, range: ctx.range_loc(&range) });
+        cursor = leftover;
+      },
+      Lexeme::BS => {
+        let dot = i!(str: ".");
+        let (arg, body) =
+          (cursor.step(ctx))?.find("A '.'", ctx, |l| l.strict_eq(&Lexeme::Name(dot.clone())))?;
+        let (arg, _) = parse_exprv(arg, None, ctx)?;
+        let (body, leftover) = parse_exprv(body, paren, ctx)?;
+        output.push(Expr {
+          range: ctx.range_loc(&cursor.range()),
+          value: Clause::Lambda(Rc::new(arg), Rc::new(body)),
+        });
+        return Ok((output, leftover));
+      },
+      lexeme => {
+        let lexeme = lexeme.clone();
+        let err = BadTokenInRegion { lexeme, region: "expression" };
+        return Err(err.pack(ctx.range_loc(&current.range)));
+      },
+    }
+    cursor = cursor.trim();
+  }
+  Ok((output, Frag::new(cursor.fallback, &[])))
+}
+
+/// Wrap an expression list in parentheses if necessary
+pub fn exprv_to_single(
+  fallback: &Entry,
+  v: Vec<Expr>,
+  ctx: &(impl ParseCtx + ?Sized),
+) -> ProjectResult<Expr> {
+  match v.len() {
+    0 => {
+      let err = UnexpectedEOL(fallback.lexeme.clone());
+      Err(err.pack(ctx.range_loc(&fallback.range)))
+    },
+    1 => Ok(v.into_iter().exactly_one().unwrap()),
+    _ => {
+      let f_range = &v.first().unwrap().range;
+      let l_range = &v.last().unwrap().range;
+      let range = f_range.map_range(|r| r.start..l_range.end());
+      Ok(Expr { range, value: Clause::S(PType::Par, Rc::new(v)) })
+    },
+  }
+}