Working example

This commit is contained in:
2023-03-10 13:58:16 +00:00
parent 35a081162f
commit 180ebb56fa
62 changed files with 1487 additions and 372 deletions

View File

@@ -93,14 +93,14 @@ pub fn xpr_parser() -> impl Parser<Lexeme, Expr, Error = Simple<Lexeme>> {
let clause =
enum_parser!(Lexeme::Comment).repeated()
.ignore_then(choice((
enum_parser!(Lexeme >> Literal; Int, Num, Char, Str).map(Primitive::Literal).map(Clause::P),
enum_parser!(Lexeme >> Literal; Uint, Num, Char, Str).map(Primitive::Literal).map(Clause::P),
placeholder_parser().map(|key| Clause::Placeh{key, vec: None}),
just(Lexeme::name("...")).to(true)
.or(just(Lexeme::name("..")).to(false))
.then(placeholder_parser())
.then(
just(Lexeme::Type)
.ignore_then(enum_parser!(Lexeme::Int))
.ignore_then(enum_parser!(Lexeme::Uint))
.or_not().map(Option::unwrap_or_default)
)
.map(|((nonzero, key), prio)| Clause::Placeh{key, vec: Some((

View File

@@ -24,7 +24,7 @@ impl From<Entry> for (Lexeme, Range<usize>) {
#[derive(Clone, PartialEq, Eq, Hash)]
pub enum Lexeme {
Num(NotNan<f64>),
Int(u64),
Uint(u64),
Char(char),
Str(String),
Name(String),
@@ -35,14 +35,16 @@ pub enum Lexeme {
BS, // Backslash
At,
Type, // type operator
Comment(String)
Comment(String),
Export,
Import,
}
impl Debug for Lexeme {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Num(n) => write!(f, "{}", n),
Self::Int(i) => write!(f, "{}", i),
Self::Uint(i) => write!(f, "{}", i),
Self::Char(c) => write!(f, "{:?}", c),
Self::Str(s) => write!(f, "{:?}", s),
Self::Name(name) => write!(f, "{}", name),
@@ -59,6 +61,8 @@ impl Debug for Lexeme {
Self::At => write!(f, "@"),
Self::Type => write!(f, ":"),
Self::Comment(text) => write!(f, "--[{}]--", text),
Self::Export => write!(f, "export"),
Self::Import => write!(f, "import"),
}
}
}
@@ -118,11 +122,14 @@ fn paren_parser<'a>(
})
}
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, LexedText, Error=Simple<char>> + 'a
pub fn lexer<'a, T: 'a>(ops: &[T]) -> impl Parser<char, Vec<Entry>, Error=Simple<char>> + 'a
where T: AsRef<str> + Clone {
let all_ops = ops.iter().map(|o| o.as_ref().to_string())
.chain(iter::once(".".to_string())).collect::<Vec<_>>();
recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
.chain([",", ".", "..", "..."].into_iter().map(str::to_string))
.collect::<Vec<_>>();
just("export").padded().to(Lexeme::Export)
.or(just("import").padded().to(Lexeme::Import))
.or_not().then(recursive(move |recurse: Recursive<char, LexSubres, Simple<char>>| {
choice((
paren_parser(recurse.clone(), '(', ')'),
paren_parser(recurse.clone(), '[', ']'),
@@ -135,7 +142,7 @@ where T: AsRef<str> + Clone {
just('\\').padded().to(Lexeme::BS),
just('@').padded().to(Lexeme::At),
just(':').to(Lexeme::Type),
number::int_parser().map(Lexeme::Int), // all ints are valid floats so it takes precedence
number::int_parser().map(Lexeme::Uint), // all ints are valid floats so it takes precedence
number::float_parser().map(Lexeme::Num),
string::char_parser().map(Lexeme::Char),
string::str_parser().map(Lexeme::Str),
@@ -143,7 +150,9 @@ where T: AsRef<str> + Clone {
)).map_with_span(|lx, span| box_once(Entry(lx, span)) as LexSubres)
))
}).separated_by(one_of("\t ").repeated())
.flatten().collect()
.separated_by(just('\n').then(text::whitespace()).ignored())
.map(LexedText)
.flatten().collect())
.map(|(prefix, rest): (Option<Lexeme>, Vec<Entry>)| {
prefix.into_iter().map(|l| Entry(l, 0..6)).chain(rest.into_iter()).collect()
})
.then_ignore(text::whitespace()).then_ignore(end())
}

View File

@@ -15,4 +15,6 @@ pub use sourcefile::imports;
pub use sourcefile::exported_names;
pub use lexer::{lexer, Lexeme, Entry as LexerEntry};
pub use name::is_op;
pub use parse::{parse, reparse, ParseError};
pub use parse::{parse, reparse, ParseError};
pub use import::Import;
pub use number::{float_parser, int_parser};

View File

@@ -4,7 +4,7 @@ use chumsky::{prelude::{Simple, end}, Stream, Parser};
use itertools::Itertools;
use thiserror::Error;
use crate::{ast::Rule, parse::lexer::LexedText};
use crate::{ast::Rule, parse::{lexer::LexedText, sourcefile::split_lines}};
use super::{Lexeme, FileEntry, lexer, line_parser, LexerEntry};
@@ -17,14 +17,13 @@ pub enum ParseError {
Ast(Vec<Simple<Lexeme>>)
}
pub fn parse<'a, Iter, S, Op>(ops: &[Op], stream: S) -> Result<Vec<FileEntry>, ParseError>
where
Op: 'a + AsRef<str> + Clone,
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
S: Into<Stream<'a, char, Range<usize>, Iter>> {
let lexed = lexer(ops).parse(stream).map_err(ParseError::Lex)?;
println!("Lexed:\n{:?}", lexed);
let LexedText(token_batchv) = lexed;
pub fn parse<'a, Op>(ops: &[Op], data: &str) -> Result<Vec<FileEntry>, ParseError>
where Op: 'a + AsRef<str> + Clone {
let lexie = lexer(ops);
let token_batchv = split_lines(data).map(|line| {
lexie.parse(line).map_err(ParseError::Lex)
}).collect::<Result<Vec<_>, _>>()?;
println!("Lexed:\n{:?}", LexedText(token_batchv.clone()));
let parsr = line_parser().then_ignore(end());
let (parsed_lines, errors_per_line) = token_batchv.into_iter().filter(|v| {
!v.is_empty()
@@ -34,7 +33,7 @@ where
// Stream expects tuples, lexer outputs structs
let tuples = v.into_iter().map_into::<(Lexeme, Range<usize>)>();
parsr.parse(Stream::from_iter(end..end+1, tuples))
// ^^^^^^^^^^
// ^^^^^^^^^^
// I haven't the foggiest idea why this is needed, parsers are supposed to be lazy so the
// end of input should make little difference
}).map(|res| match res {
@@ -48,13 +47,10 @@ where
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
}
pub fn reparse<'a, Iter, S, Op>(ops: &[Op], stream: S, pre: &[FileEntry])
pub fn reparse<'a, Op>(ops: &[Op], data: &str, pre: &[FileEntry])
-> Result<Vec<FileEntry>, ParseError>
where
Op: 'a + AsRef<str> + Clone,
Iter: Iterator<Item = (char, Range<usize>)> + 'a,
S: Into<Stream<'a, char, Range<usize>, Iter>> {
let result = parse(ops, stream)?;
where Op: 'a + AsRef<str> + Clone {
let result = parse(ops, data)?;
Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| {
if let FileEntry::Rule(Rule{source, ..}, _) = &mut output {
if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor {

View File

@@ -1,23 +1,27 @@
use std::collections::HashSet;
use std::iter;
use crate::{enum_parser, box_chain};
use crate::ast::{Expr, Clause, Rule};
use crate::utils::to_mrc_slice;
use crate::utils::{to_mrc_slice, one_mrc_slice};
use crate::utils::Stackframe;
use crate::utils::iter::box_empty;
use super::expression::xpr_parser;
use super::import;
use super::import::{self, Import};
use super::import::import_parser;
use super::lexer::Lexeme;
use chumsky::{Parser, prelude::*};
use ordered_float::NotNan;
use lazy_static::lazy_static;
/// Anything we might encounter in a file
#[derive(Debug, Clone)]
pub enum FileEntry {
Import(Vec<import::Import>),
Comment(String),
/// The bool indicates whether the rule is exported - whether tokens uniquely defined inside it
/// should be exported
Rule(Rule, bool),
Export(Vec<Vec<String>>)
}
@@ -103,10 +107,10 @@ pub fn line_parser() -> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> {
choice((
// In case the usercode wants to parse doc
enum_parser!(Lexeme >> FileEntry; Comment),
just(Lexeme::name("import"))
just(Lexeme::Import)
.ignore_then(import_parser().map(FileEntry::Import))
.then_ignore(enum_parser!(Lexeme::Comment)),
just(Lexeme::name("export")).map_err_with_span(|e, s| {
.then_ignore(enum_parser!(Lexeme::Comment).or_not()),
just(Lexeme::Export).map_err_with_span(|e, s| {
println!("{:?} could not yield an export", s); e
}).ignore_then(
just(Lexeme::NS).ignore_then(
@@ -114,13 +118,14 @@ pub fn line_parser() -> impl Parser<Lexeme, FileEntry, Error = Simple<Lexeme>> {
.separated_by(just(Lexeme::name(",")))
.delimited_by(just(Lexeme::LP('(')), just(Lexeme::RP('(')))
).map(FileEntry::Export)
).or(rule_parser().map(|(source, prio, target)| {
FileEntry::Rule(Rule {
source: to_mrc_slice(source),
prio,
target: to_mrc_slice(target)
}, true)
})),
.or(rule_parser().map(|(source, prio, target)| {
FileEntry::Rule(Rule {
source: to_mrc_slice(source),
prio,
target: to_mrc_slice(target)
}, true)
}))
),
// This could match almost anything so it has to go last
rule_parser().map(|(source, prio, target)| FileEntry::Rule(Rule{
source: to_mrc_slice(source),
@@ -153,3 +158,24 @@ where I: Iterator<Item = &'b FileEntry> + 'a {
_ => None
}).flatten()
}
pub fn split_lines(data: &str) -> impl Iterator<Item = &str> {
let mut source = data.char_indices();
let mut last_slice = 0;
iter::from_fn(move || {
let mut paren_count = 0;
while let Some((i, c)) = source.next() {
match c {
'(' | '{' | '[' => paren_count += 1,
')' | '}' | ']' => paren_count -= 1,
'\n' if paren_count == 0 => {
let begin = last_slice;
last_slice = i;
return Some(&data[begin..i]);
},
_ => (),
}
}
None
})
}