1
0
This commit is contained in:
2024-10-18 12:59:34 +03:00
parent 4827bdb4c0
commit d13dfe82ae
8 changed files with 491 additions and 67 deletions

76
src/ast/mod.rs Normal file
View File

@@ -0,0 +1,76 @@
use core::fmt;
#[derive(Debug, Copy, Clone)]
pub struct Span {
pub start: usize,
pub end: usize,
}
impl Span {
pub fn new(start: usize, end: usize) -> Self {
Self { start, end }
}
}
#[derive(Debug)]
pub enum Expr {
Int {
span: Span,
value: i64,
},
Float {
span: Span,
value: f64,
},
Var {
span: Span,
id: usize,
},
BinOp {
span: Span,
lhs: Box<Expr>,
op: BinOp,
rhs: Box<Expr>,
},
}
impl Expr {
pub fn span(&self) -> Span {
match self {
Expr::Float { span, .. } => *span,
Expr::Int { span, .. } => *span,
Expr::Var { span, .. } => *span,
Expr::BinOp { span, .. } => *span,
}
}
}
#[derive(Debug, Clone, Copy)]
pub enum BinOp {
Add,
Sub,
Mul,
Div,
}
impl BinOp {
pub fn precedence(&self) -> u8 {
match self {
BinOp::Add => 1,
BinOp::Sub => 1,
BinOp::Mul => 2,
BinOp::Div => 2,
}
}
}
impl fmt::Display for BinOp {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
BinOp::Add => f.write_str("+"),
BinOp::Sub => f.write_str("-"),
BinOp::Mul => f.write_str("*"),
BinOp::Div => f.write_str("/"),
}
}
}

View File

@@ -1,6 +1,6 @@
use std::{ops::Deref, path::PathBuf}; use std::{ops::Deref, path::PathBuf};
use clap::{CommandFactory, Parser}; use clap::{CommandFactory, Parser, Subcommand};
pub struct Args { pub struct Args {
inner: ArgsInner, inner: ArgsInner,
@@ -8,9 +8,21 @@ pub struct Args {
#[derive(Parser)] #[derive(Parser)]
pub struct ArgsInner { pub struct ArgsInner {
pub input: PathBuf, #[clap(subcommand)]
pub output_tokens: PathBuf, pub command: Command,
pub output_symbols: PathBuf, }
#[derive(Subcommand)]
pub enum Command {
Lex {
input: PathBuf,
output_tokens: PathBuf,
output_symbols: PathBuf,
},
Syn {
input: PathBuf,
output_tree: PathBuf,
},
} }
impl Args { impl Args {
@@ -36,26 +48,61 @@ impl Deref for Args {
} }
fn validate_inner(args: ArgsInner) -> Result<ArgsInner, clap::Error> { fn validate_inner(args: ArgsInner) -> Result<ArgsInner, clap::Error> {
if !args.input.is_file() { match &args.command {
return Err(clap::Error::raw( Command::Lex {
clap::error::ErrorKind::InvalidValue, input,
format!("Input file '{}' does not exist", args.input.display()), output_tokens,
)); output_symbols,
} } => validate_lex(input, output_tokens, output_symbols)?,
Command::Syn { input, output_tree } => validate_syn(input, output_tree)?,
if args.input == args.output_tokens { };
return Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
"Input and output files cannot be the same",
));
}
if args.input == args.output_symbols {
return Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
"Input and output files cannot be the same",
));
}
Ok(args) Ok(args)
} }
fn validate_lex(
input: &PathBuf,
output_tokens: &PathBuf,
output_symbols: &PathBuf,
) -> Result<(), clap::Error> {
if !input.is_file() {
return Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
format!("Input file '{}' does not exist", input.display()),
));
}
if input == output_tokens {
return Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
"Input and output files cannot be the same",
));
}
if input == output_symbols {
return Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
"Input and output files cannot be the same",
));
};
Ok(())
}
fn validate_syn(input: &PathBuf, output_tree: &PathBuf) -> Result<(), clap::Error> {
if !input.is_file() {
return Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
format!("Input file '{}' does not exist", input.display()),
));
}
if output_tree == input {
return Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
"Input and output files cannot be the same",
));
};
Ok(())
}

View File

@@ -1,34 +1,89 @@
use std::io; use std::io::{self, Write};
use std::io::Write;
use ast::Expr;
use parse::parser::Parser;
use symbols::SymbolsTable; use symbols::SymbolsTable;
mod ast;
mod cli; mod cli;
mod parse; mod parse;
mod symbols; mod symbols;
fn write_expr(expr: &Expr, writer: &mut impl Write, prefix: &str, is_last: bool) -> io::Result<()> {
let branch = if is_last { "+--" } else { "|--" };
match expr {
Expr::Int { value, .. } => writeln!(writer, "{}{}<{}>", prefix, branch, value),
Expr::Float { value, .. } => writeln!(writer, "{}{}<{}>", prefix, branch, value),
Expr::Var { id, .. } => writeln!(writer, "{}{}<id,{}>", prefix, branch, id),
Expr::BinOp { lhs, op, rhs, .. } => {
writeln!(writer, "{}{}<{}>", prefix, branch, op)?;
let new_prefix = if is_last {
format!("{} ", prefix)
} else {
format!("{}| ", prefix)
};
write_expr(lhs, writer, &new_prefix, false)?;
write_expr(rhs, writer, &new_prefix, true)
}
}
}
fn print_expr(expr: &Expr, writer: &mut impl Write) -> io::Result<()> {
write_expr(expr, writer, "", true)
}
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
let args = cli::Args::parse(); let args = cli::Args::parse();
let input = std::fs::read_to_string(&args.input).unwrap(); match &args.command {
let mut symbols = SymbolsTable::default(); cli::Command::Lex {
let tokens = parse::lexer::make_tokenizer(&input, &mut symbols).collect::<Result<Vec<_>, _>>(); input,
match tokens { output_tokens,
Ok(tokens) => { output_symbols,
let mut writer_tokens = io::BufWriter::new(std::fs::File::create(&args.output_tokens)?); } => {
let input = std::fs::read_to_string(input)?;
let mut symbols = SymbolsTable::default();
let tokens =
parse::lexer::make_tokenizer(&input, &mut symbols).collect::<Result<Vec<_>, _>>();
for (_, token, _) in tokens { match tokens {
writeln!(writer_tokens, "{token:>6} - {}", token.as_str())?; Ok(tokens) => {
} let mut writer_tokens =
io::BufWriter::new(std::fs::File::create(output_tokens)?);
let mut writer_symbols = for (_, token, _) in tokens {
io::BufWriter::new(std::fs::File::create(&args.output_symbols)?); writeln!(writer_tokens, "{token:>6} - {}", token.as_str())?;
for (name, id) in &symbols { }
writeln!(writer_symbols, "{name} -> {id}")?;
let mut writer_symbols =
io::BufWriter::new(std::fs::File::create(output_symbols)?);
for (name, id) in &symbols {
writeln!(writer_symbols, "{name} -> {id}")?;
}
}
Err(e) => {
eprintln!("error: {}", e);
}
} }
} }
Err(err) => { cli::Command::Syn { input, output_tree } => {
eprintln!("error: {err}"); let input = std::fs::read_to_string(input)?;
let mut symbols = SymbolsTable::default();
let tokens = parse::lexer::make_tokenizer(&input, &mut symbols);
let mut parser = Parser::new(tokens);
let res = parser.parse();
match res {
Ok(expr) => {
let mut writer_tree = io::BufWriter::new(std::fs::File::create(output_tree)?);
print_expr(&expr, &mut writer_tree)?;
}
Err(e) => eprintln!("error: {}", e),
}
} }
} }

View File

@@ -1,20 +1,91 @@
#![allow(dead_code)]
use core::fmt; use core::fmt;
use crate::ast::Span;
#[derive(Debug)] #[derive(Debug)]
pub enum LexicalError { pub struct LexicalError {
UnrecognizedToken(usize, char), span: Span,
BadNumber(usize, String), kind: LexicalErrorKind,
}
#[derive(Debug)]
pub enum LexicalErrorKind {
UnrecognizedToken,
BadNumber(String),
}
impl LexicalError {
pub fn new(span: Span, kind: LexicalErrorKind) -> Self {
Self { span, kind }
}
pub fn span(&self) -> Span {
self.span
}
pub fn kind(&self) -> &LexicalErrorKind {
&self.kind
}
}
impl fmt::Display for LexicalErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LexicalErrorKind::UnrecognizedToken => write!(f, "unrecognized token"),
LexicalErrorKind::BadNumber(s) => write!(f, "bad number '{}'", s),
}
}
} }
impl fmt::Display for LexicalError { impl fmt::Display for LexicalError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} at position {}", self.kind, self.span.start)
}
}
#[derive(Debug)]
pub struct ParseError {
span: Span,
kind: ParseErrorKind,
}
#[derive(Debug)]
pub enum ParseErrorKind {
ExpectedExpr,
UnexpectedToken,
UnexpectedEOF,
Lexical(LexicalError),
}
impl ParseError {
pub fn new(span: Span, kind: ParseErrorKind) -> Self {
Self { span, kind }
}
pub fn span(&self) -> Span {
self.span
}
pub fn kind(&self) -> &ParseErrorKind {
&self.kind
}
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} at position {}", self.kind, self.span.start)
}
}
impl fmt::Display for ParseErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self { match self {
LexicalError::UnrecognizedToken(pos, c) => { ParseErrorKind::ExpectedExpr => write!(f, "expected expression"),
write!(f, "unrecognized token '{c}' at position {pos}") ParseErrorKind::UnexpectedToken => write!(f, "unexpected token"),
} ParseErrorKind::UnexpectedEOF => write!(f, "unexpected end of file"),
LexicalError::BadNumber(pos, number) => { ParseErrorKind::Lexical(e) => write!(f, "lexical error: {}", e.kind()),
write!(f, "bad number '{number}' at position {pos}")
}
} }
} }
} }

View File

@@ -1,8 +1,11 @@
use itertools::PeekNth; use itertools::PeekNth;
use crate::symbols::SymbolsTable; use crate::{ast::Span, symbols::SymbolsTable};
use super::{error::LexicalError, token::Token}; use super::{
error::{LexicalError, LexicalErrorKind},
token::Token,
};
pub type SpannedToken = (usize, Token, usize); pub type SpannedToken = (usize, Token, usize);
pub type LexerResult = Result<SpannedToken, LexicalError>; pub type LexerResult = Result<SpannedToken, LexicalError>;
@@ -85,9 +88,11 @@ impl<'s, T: Iterator<Item = (usize, char)>> Lexer<'s, T> {
} }
fn lex_number(&mut self) -> LexerResult { fn lex_number(&mut self) -> LexerResult {
let mut number = String::new();
let start = self.get_pos(); let start = self.get_pos();
let mut number = String::new();
number.push(self.next_char().expect("lex_number: no more characters"));
let mut passed_dot = false; let mut passed_dot = false;
while let Some(c) = self.peek_char_nth(0) { while let Some(c) = self.peek_char_nth(0) {
if self.is_digit(c) { if self.is_digit(c) {
@@ -108,7 +113,12 @@ impl<'s, T: Iterator<Item = (usize, char)>> Lexer<'s, T> {
Ok(n) => Token::Int(n), Ok(n) => Token::Int(n),
Err(_) => match number.parse::<f64>() { Err(_) => match number.parse::<f64>() {
Ok(n) => Token::Float(n), Ok(n) => Token::Float(n),
Err(_) => return Err(LexicalError::BadNumber(start, number)), Err(_) => {
return Err(LexicalError::new(
Span::new(start, end),
LexicalErrorKind::BadNumber(number),
))
}
}, },
}, },
end, end,
@@ -124,18 +134,21 @@ impl<'s, T: Iterator<Item = (usize, char)>> Lexer<'s, T> {
'(' => self.eat_next_char(Token::LParen), '(' => self.eat_next_char(Token::LParen),
')' => self.eat_next_char(Token::RParen), ')' => self.eat_next_char(Token::RParen),
c if c.is_whitespace() => { c if c.is_whitespace() => {
let start = self.get_pos(); let _start = self.get_pos();
let _ = self.next_char(); let _c = self.next_char();
let end = self.get_pos(); let _end = self.get_pos();
if c == '\n' { // if c == '\n' {
self.emit((start, Token::NewLine, end)); // self.emit((start, Token::NewLine, end));
} // }
} }
_ => { _ => {
let pos = self.get_pos(); let pos = self.get_pos();
let _ = self.next_char(); let _ = self.next_char();
return Err(LexicalError::UnrecognizedToken(pos, c)); return Err(LexicalError::new(
Span::new(pos, self.get_pos()),
LexicalErrorKind::UnrecognizedToken,
));
} }
} }
@@ -150,8 +163,8 @@ impl<'s, T: Iterator<Item = (usize, char)>> Lexer<'s, T> {
self.emit((start, token, end)); self.emit((start, token, end));
} }
fn is_number_start(&self, c: char, _c1: Option<char>) -> bool { fn is_number_start(&self, c: char, c1: Option<char>) -> bool {
c.is_ascii_digit() c.is_ascii_digit() || (c == '-' && c1.map_or(false, |c| c.is_ascii_digit()))
} }
fn is_digit(&self, c: char) -> bool { fn is_digit(&self, c: char) -> bool {

View File

@@ -1,3 +1,4 @@
pub mod lexer; pub mod lexer;
pub mod token; pub mod token;
pub mod error; pub mod error;
pub mod parser;

149
src/parse/parser.rs Normal file
View File

@@ -0,0 +1,149 @@
use itertools::PeekNth;
use crate::{
ast::{Expr, Span},
parse::error::ParseErrorKind,
};
use super::{
error::ParseError,
lexer::{LexerResult, SpannedToken},
token::Token,
};
#[derive(Debug)]
pub struct Parser<T: Iterator<Item = LexerResult>> {
tokens: PeekNth<T>,
last_span: Span,
}
impl<T> Parser<T>
where
T: Iterator<Item = LexerResult>,
{
pub fn new(tokens: T) -> Self {
Self {
tokens: itertools::peek_nth(tokens),
last_span: Span::new(0, 0),
}
}
pub fn parse(&mut self) -> Result<Expr, ParseError> {
let expr = self.parse_expr()?;
if self.has_next() {
let (start, _, end) = self.next_token()?;
return Err(ParseError::new(
Span::new(start, end),
ParseErrorKind::UnexpectedToken,
));
}
Ok(expr)
}
fn parse_expr(&mut self) -> Result<Expr, ParseError> {
let lhs = self.parse_primary_expr()?;
self.parse_expr_inner(lhs, 0)
}
fn parse_expr_inner(&mut self, lhs: Expr, min_precedence: u8) -> Result<Expr, ParseError> {
let mut lhs = lhs;
let mut op = self.peek_token_nth(0).and_then(|token| token.as_bin_op());
while let Some(op1) = op {
if op1.precedence() < min_precedence {
break;
}
let _ = self.next_token()?;
let mut rhs = self.parse_primary_expr()?;
op = self.peek_token_nth(0).and_then(|token| token.as_bin_op());
while let Some(op2) = op {
if op2.precedence() <= op1.precedence() {
break;
}
rhs = self.parse_expr_inner(rhs, op1.precedence() + 1)?;
op = self.peek_token_nth(0).and_then(|token| token.as_bin_op());
}
lhs = Expr::BinOp {
span: Span::new(lhs.span().start, rhs.span().end),
lhs: Box::new(lhs),
op: op1,
rhs: Box::new(rhs),
}
}
Ok(lhs)
}
fn parse_primary_expr(&mut self) -> Result<Expr, ParseError> {
let token = self.next_token()?;
match token {
(start, Token::Float(value), end) => Ok(Expr::Float {
span: Span::new(start, end),
value,
}),
(start, Token::Int(value), end) => Ok(Expr::Int {
span: Span::new(start, end),
value,
}),
(start, Token::Name(id), end) => Ok(Expr::Var {
span: Span::new(start, end),
id,
}),
(_, Token::LParen, _) => {
let expr = self.parse_expr()?;
let _ = self.expect_token(Token::RParen)?;
Ok(expr)
}
(start, _, end) => Err(ParseError::new(
Span::new(start, end),
ParseErrorKind::ExpectedExpr,
)),
}
}
fn next_token(&mut self) -> Result<SpannedToken, ParseError> {
let token = self.tokens.next();
match token {
Some(Ok((start, token, end))) => {
self.last_span = Span::new(start, end);
Ok((start, token, end))
}
Some(Err(e)) => Err(ParseError::new(e.span(), ParseErrorKind::Lexical(e))),
None => Err(ParseError::new(
self.last_span,
ParseErrorKind::UnexpectedEOF,
)),
}
}
fn peek_token_nth(&mut self, n: usize) -> Option<&Token> {
self.tokens.peek_nth(n).and_then(|res| match res {
Ok((_, token, _)) => Some(token),
Err(_) => None,
})
}
fn has_next(&mut self) -> bool {
self.tokens.peek().is_some()
}
fn expect_token(&mut self, token: Token) -> Result<(usize, usize), ParseError> {
let t = self.next_token()?;
match t {
(start, t, end) if t == token => Ok((start, end)),
(start, _, end) => Err(ParseError::new(
Span::new(start, end),
ParseErrorKind::UnexpectedToken,
)),
}
}
}

View File

@@ -1,5 +1,7 @@
use std::fmt; use std::fmt;
use crate::ast::BinOp;
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Token { pub enum Token {
Name(usize), Name(usize),
@@ -14,7 +16,7 @@ pub enum Token {
LParen, // ( LParen, // (
RParen, // ) RParen, // )
// extra // extra
NewLine, // new line // NewLine, // new line
EndOfFile, // end of file EndOfFile, // end of file
} }
@@ -30,10 +32,20 @@ impl Token {
Token::Slash => "div", Token::Slash => "div",
Token::LParen => "lparen", Token::LParen => "lparen",
Token::RParen => "rparen", Token::RParen => "rparen",
Token::NewLine => "new line", // Token::NewLine => "new line",
Token::EndOfFile => "end of file", Token::EndOfFile => "end of file",
} }
} }
pub fn as_bin_op(&self) -> Option<BinOp> {
match self {
Token::Plus => Some(BinOp::Add),
Token::Minus => Some(BinOp::Sub),
Token::Star => Some(BinOp::Mul),
Token::Slash => Some(BinOp::Div),
_ => None,
}
}
} }
impl fmt::Display for Token { impl fmt::Display for Token {
@@ -48,7 +60,7 @@ impl fmt::Display for Token {
Token::Slash => write!(f, "</>"), Token::Slash => write!(f, "</>"),
Token::LParen => write!(f, "<(>"), Token::LParen => write!(f, "<(>"),
Token::RParen => write!(f, "<)>"), Token::RParen => write!(f, "<)>"),
Token::NewLine => write!(f, "<new line>"), // Token::NewLine => write!(f, "<new line>"),
Token::EndOfFile => write!(f, "<end of file>"), Token::EndOfFile => write!(f, "<end of file>"),
} }
} }