Files
orchid/src/parse/parse.rs
2023-03-21 19:36:40 +00:00

76 lines
2.4 KiB
Rust

use std::{ops::Range, fmt::Debug};
use chumsky::{prelude::{Simple, end}, Stream, Parser};
use itertools::Itertools;
use lasso::Spur;
use thiserror::Error;
use crate::{ast::Rule, parse::{lexer::LexedText, sourcefile::split_lines}, representations::sourcefile::FileEntry};
use super::{Lexeme, lexer, line_parser, LexerEntry};
#[derive(Error, Debug, Clone)]
pub enum ParseError {
#[error("Could not tokenize {0:?}")]
Lex(Vec<Simple<char>>),
#[error("Could not parse {0:#?}")]
Ast(Vec<Simple<Lexeme>>)
}
pub fn parse<'a, Op, F>(
ops: &[Op], data: &str, intern: &F
) -> Result<Vec<FileEntry>, ParseError>
where
Op: 'a + AsRef<str> + Clone,
F: Fn(&str) -> Spur
{
let lexie = lexer(ops);
let token_batchv = split_lines(data).map(|line| {
lexie.parse(line).map_err(ParseError::Lex)
}).collect::<Result<Vec<_>, _>>()?;
println!("Lexed:\n{:?}", LexedText(token_batchv.clone()));
let parsr = line_parser(intern).then_ignore(end());
let (parsed_lines, errors_per_line) = token_batchv.into_iter().filter(|v| {
!v.is_empty()
}).map(|v| {
// Find the first invalid position for Stream::for_iter
let LexerEntry(_, Range{ end, .. }) = v.last().unwrap().clone();
// Stream expects tuples, lexer outputs structs
let tuples = v.into_iter().map_into::<(Lexeme, Range<usize>)>();
parsr.parse(Stream::from_iter(end..end+1, tuples))
// ^^^^^^^^^^
// I haven't the foggiest idea why this is needed, parsers are supposed to be lazy so the
// end of input should make little difference
}).map(|res| match res {
Ok(r) => (Some(r), vec![]),
Err(e) => (None, e)
}).unzip::<_, _, Vec<_>, Vec<_>>();
let total_err = errors_per_line.into_iter()
.flat_map(Vec::into_iter)
.collect::<Vec<_>>();
if !total_err.is_empty() { Err(ParseError::Ast(total_err)) }
else { Ok(parsed_lines.into_iter().map(Option::unwrap).collect()) }
}
pub fn reparse<'a, Op, F>(
ops: &[Op], data: &str, pre: &[FileEntry], intern: &F
)
-> Result<Vec<FileEntry>, ParseError>
where
Op: 'a + AsRef<str> + Clone,
F: Fn(&str) -> Spur
{
let result = parse(ops, data, intern)?;
Ok(result.into_iter().zip(pre.iter()).map(|(mut output, donor)| {
if let FileEntry::Rule(Rule{source, ..}, _) = &mut output {
if let FileEntry::Rule(Rule{source: s2, ..}, _) = donor {
*source = s2.clone()
} else {
panic!("Preparse and reparse received different row types!")
}
}
output
}).collect())
}