Working example
This commit is contained in:
@@ -93,14 +93,14 @@ pub fn xpr_parser() -> impl Parser<Lexeme, Expr, Error = Simple<Lexeme>> {
|
||||
let clause =
|
||||
enum_parser!(Lexeme::Comment).repeated()
|
||||
.ignore_then(choice((
|
||||
enum_parser!(Lexeme >> Literal; Int, Num, Char, Str).map(Primitive::Literal).map(Clause::P),
|
||||
enum_parser!(Lexeme >> Literal; Uint, Num, Char, Str).map(Primitive::Literal).map(Clause::P),
|
||||
placeholder_parser().map(|key| Clause::Placeh{key, vec: None}),
|
||||
just(Lexeme::name("...")).to(true)
|
||||
.or(just(Lexeme::name("..")).to(false))
|
||||
.then(placeholder_parser())
|
||||
.then(
|
||||
just(Lexeme::Type)
|
||||
.ignore_then(enum_parser!(Lexeme::Int))
|
||||
.ignore_then(enum_parser!(Lexeme::Uint))
|
||||
.or_not().map(Option::unwrap_or_default)
|
||||
)
|
||||
.map(|((nonzero, key), prio)| Clause::Placeh{key, vec: Some((
|
||||
|
||||
@@ -24,7 +24,7 @@ impl From<Entry> for (Lexeme, Range<usize>) {
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum Lexeme {
|
||||
Num(NotNan<f64>),
|
||||
Int(u64),
|
||||
Uint(u64),
|
||||
Char(char),
|
||||
Str(String),
|
||||
Name(String),
|
||||
@@ -35,14 +35,16 @@ pub enum Lexeme {
|
||||
BS, // Backslash
|
||||
At,
|
||||
Type, // type operator
|
||||
Comment(String)
|
||||
Comment(String),
|
||||
Export,
|
||||
Import,
|
||||
}
|
||||
|
||||
impl Debug for Lexeme {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Num(n) => write!(f, "{}", n),
|
||||
Self::Int(i) => write!(f, "{}", i),
|
||||
Self::Uint(i) => write!(f, "{}", i),
|
||||
Self::Char(c) => write!(f, "{:?}", c),
|
||||
Self::Str(s) => write!(f, "{:?}", s),
|
||||
Self::Name(name) => write!(f, "{}", name),
|
||||
@@ -59,6 +61,8 @@ impl Debug for Lexeme {
|
||||
Self::At => write!(f, "@"),
|
||||
Self::Type => write!(f, ":"),
|
||||
Self::Comment(text) => write!(f, "--[{}]--", text),
|
||||
Self::Export => write!(f, "export"),
|
||||
Self::Import => write!(f, "import"),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -118,11 +122,14 @@ fn paren_parser<'a>(
|
||||
})
|
||||
}
|
||||
|
||||
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, LexedText, Error=Simple<char>> + 'a
|
||||
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, Vec<Entry>, Error=Simple<char>> + 'a
|
||||
where T: AsRef<str> + Clone {
|
||||
let all_ops = ops.iter().map(|o| o.as_ref().to_string())
|
||||
.chain(iter::once(".".to_string())).collect::<Vec<_>>();
|
||||
recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
|
||||
.chain([",", ".", "..", "..."].into_iter().map(str::to_string))
|
||||
.collect::<Vec<_>>();
|
||||
just("export").padded().to(Lexeme::Export)
|
||||
.or(just("import").padded().to(Lexeme::Import))
|
||||
.or_not().then(recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
|
||||
choice((
|
||||
paren_parser(recurse.clone(), '(', ')'),
|
||||
paren_parser(recurse.clone(), '[', ']'),
|
||||
@@ -135,7 +142,7 @@ where T: AsRef<str> + Clone {
|
||||
just('\\').padded().to(Lexeme::BS),
|
||||
just('@').padded().to(Lexeme::At),
|
||||
just(':').to(Lexeme::Type),
|
||||
number::int_parser().map(Lexeme::Int), // all ints are valid floats so it takes precedence
|
||||
number::int_parser().map(Lexeme::Uint), // all ints are valid floats so it takes precedence
|
||||
number::float_parser().map(Lexeme::Num),
|
||||
string::char_parser().map(Lexeme::Char),
|
||||
string::str_parser().map(Lexeme::Str),
|
||||
@@ -143,7 +150,9 @@ where T: AsRef<str> + Clone {
|
||||
)).map_with_span(|lx, span| box_once(Entry(lx, span)) as LexSubres)
|
||||
))
|
||||
}).separated_by(one_of("\t ").repeated())
|
||||
.flatten().collect()
|
||||
.separated_by(just('\n').then(text::whitespace()).ignored())
|
||||
.map(LexedText)
|
||||
.flatten().collect())
|
||||
.map(|(prefix, rest): (Option<Lexeme>, Vec<Entry>)| {
|
||||
prefix.into_iter().map(|l| Entry(l, 0..6)).chain(rest.into_iter()).collect()
|
||||
})
|
||||
.then_ignore(text::whitespace()).then_ignore(end())
|
||||
}
|
||||
|
||||
@@ -15,4 +15,6 @@ pub use sourcefile::imports;
|
||||
pub use sourcefile::exported_names;
|
||||
pub use lexer::{lexer, Lexeme, Entry as LexerEntry};
|
||||
pub use name::is_op;
|
||||
pub use parse::{parse, reparse, ParseError};
|
||||
pub use parse::{parse, reparse, ParseError};
|
||||
pub use import::Import;
|
||||
pub use number::{float_parser, int_parser};
|
||||
@@ -4,7 +4,7 @@ use chumsky::{prelude::{Simple, end}, Stream, Parser};
|
||||
use itertools::Itertools;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::{ast::Rule, parse::lexer::LexedText};
|
||||
use crate::{ast::Rule, parse::{lexer::LexedText, sourcefile::split_lines}};
|
||||
|
||||
use super::{Lexeme, FileEntry, lexer, line_parser, LexerEntry};
|
||||
|
||||
@@ -17,14 +17,13 @@ pub enum ParseError {
|
||||
Ast(Vec<Simple<Lexeme>>)
|
||||
}
|
||||
|
||||
pub fn parse<'a, Iter, S, Op>(ops: &[Op], stream: S) -> Result<Vec<FileEntry>, ParseError>
|
||||
where
|
||||
Op: 'a + AsRef<str> + Clone,
|
||||
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
|
||||
S: Into<Stream<'a, char, Range<usize>, Iter>> {
|
||||
let lexed = lexer(ops).parse(stream).map_err(ParseError::Lex)?;
|
||||
println!("Lexed:\n{:?}", lexed);
|
||||
let LexedText(token_batchv) = lexed;
|
||||
pub fn parse<'a, Op>(ops: &[Op], data: &str) -> Result<Vec<FileEntry>, ParseError>
|
||||
where Op: 'a + AsRef<str> + Clone {
|
||||
let lexie = lexer(ops);
|
||||
let token_batchv = split_lines(data).map(|line| {
|
||||
lexie.parse(line).map_err(ParseError::Lex)
|
||||
}).collect::<Result<Vec<_>, _>>()?;
|
||||
println!("Lexed:\n{:?}", LexedText(token_batchv.clone()));
|
||||
let parsr = line_parser().then_ignore(end());
|
||||
let (parsed_lines, errors_per_line) = token_batchv.into_iter().filter(|v| {
|
||||
!v.is_empty()
|
||||
@@ -34,7 +33,7 @@ where
|
||||
// Stream expects tuples, lexer outputs structs
|
||||
let tuples = v.into_iter().map_into::<(Lexeme, Range<usize>)>();
|
||||
parsr.parse(Stream::from_iter(end..end+1, tuples))
|
||||
// ^^^^^^^^^^
|
||||
// ^^^^^^^^^^
|
||||
// I haven't the foggiest idea why this is needed, parsers are supposed to be lazy so the
|
||||
// end of input should make little difference
|
||||
}).map(|res| match res {
|
||||
@@ -48,13 +47,10 @@ where
|
||||
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
|
||||
}
|
||||
|
||||
pub fn reparse<'a, Iter, S, Op>(ops: &[Op], stream: S, pre: &[FileEntry])
|
||||
pub fn reparse<'a, Op>(ops: &[Op], data: &str, pre: &[FileEntry])
|
||||
-> Result<Vec<FileEntry>, ParseError>
|
||||
where
|
||||
Op: 'a + AsRef<str> + Clone,
|
||||
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
|
||||
S: Into<Stream<'a, char, Range<usize>, Iter>> {
|
||||
let result = parse(ops, stream)?;
|
||||
where Op: 'a + AsRef<str> + Clone {
|
||||
let result = parse(ops, data)?;
|
||||
Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| {
|
||||
if let FileEntry::Rule(Rule{source, ..}, _) = &mut output {
|
||||
if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor {
|
||||
|
||||
@@ -1,23 +1,27 @@
|
||||
use std::collections::HashSet;
|
||||
use std::iter;
|
||||
|
||||
use crate::{enum_parser, box_chain};
|
||||
use crate::ast::{Expr, Clause, Rule};
|
||||
use crate::utils::to_mrc_slice;
|
||||
use crate::utils::{to_mrc_slice, one_mrc_slice};
|
||||
use crate::utils::Stackframe;
|
||||
use crate::utils::iter::box_empty;
|
||||
|
||||
use super::expression::xpr_parser;
|
||||
use super::import;
|
||||
use super::import::{self, Import};
|
||||
use super::import::import_parser;
|
||||
use super::lexer::Lexeme;
|
||||
use chumsky::{Parser, prelude::*};
|
||||
use ordered_float::NotNan;
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
/// Anything we might encounter in a file
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum FileEntry {
|
||||
Import(Vec<import::Import>),
|
||||
Comment(String),
|
||||
/// The bool indicates whether the rule is exported - whether tokens uniquely defined inside it
|
||||
/// should be exported
|
||||
Rule(Rule, bool),
|
||||
Export(Vec<Vec<String>>)
|
||||
}
|
||||
@@ -103,10 +107,10 @@ pub fn line_parser() -> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> {
|
||||
choice((
|
||||
// In case the usercode wants to parse doc
|
||||
enum_parser!(Lexeme >> FileEntry; Comment),
|
||||
just(Lexeme::name("import"))
|
||||
just(Lexeme::Import)
|
||||
.ignore_then(import_parser().map(FileEntry::Import))
|
||||
.then_ignore(enum_parser!(Lexeme::Comment)),
|
||||
just(Lexeme::name("export")).map_err_with_span(|e, s| {
|
||||
.then_ignore(enum_parser!(Lexeme::Comment).or_not()),
|
||||
just(Lexeme::Export).map_err_with_span(|e, s| {
|
||||
println!("{:?} could not yield an export", s); e
|
||||
}).ignore_then(
|
||||
just(Lexeme::NS).ignore_then(
|
||||
@@ -114,13 +118,14 @@ pub fn line_parser() -> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> {
|
||||
.separated_by(just(Lexeme::name(",")))
|
||||
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
|
||||
).map(FileEntry::Export)
|
||||
).or(rule_parser().map(|(source, prio, target)| {
|
||||
FileEntry::Rule(Rule {
|
||||
source: to_mrc_slice(source),
|
||||
prio,
|
||||
target: to_mrc_slice(target)
|
||||
}, true)
|
||||
})),
|
||||
.or(rule_parser().map(|(source, prio, target)| {
|
||||
FileEntry::Rule(Rule {
|
||||
source: to_mrc_slice(source),
|
||||
prio,
|
||||
target: to_mrc_slice(target)
|
||||
}, true)
|
||||
}))
|
||||
),
|
||||
// This could match almost anything so it has to go last
|
||||
rule_parser().map(|(source, prio, target)| FileEntry::Rule(Rule{
|
||||
source: to_mrc_slice(source),
|
||||
@@ -153,3 +158,24 @@ where I: Iterator<Item = &'b FileEntry> + 'a {
|
||||
_ => None
|
||||
}).flatten()
|
||||
}
|
||||
|
||||
pub fn split_lines(data: &str) -> impl Iterator<Item = &str> {
|
||||
let mut source = data.char_indices();
|
||||
let mut last_slice = 0;
|
||||
iter::from_fn(move || {
|
||||
let mut paren_count = 0;
|
||||
while let Some((i, c)) = source.next() {
|
||||
match c {
|
||||
'(' | '{' | '[' => paren_count += 1,
|
||||
')' | '}' | ']' => paren_count -= 1,
|
||||
'\n' if paren_count == 0 => {
|
||||
let begin = last_slice;
|
||||
last_slice = i;
|
||||
return Some(&data[begin..i]);
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user