1
0
This commit is contained in:
2024-11-01 05:16:51 +03:00
parent 552dec9401
commit f83c37287a
15 changed files with 684 additions and 207 deletions

25
Cargo.lock generated
View File

@@ -110,6 +110,7 @@ dependencies = [
"anyhow", "anyhow",
"clap", "clap",
"itertools", "itertools",
"thiserror",
] ]
[[package]] [[package]]
@@ -165,15 +166,35 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.79" version = "2.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" checksum = "e89275301d38033efb81a6e60e3497e734dfcc62571f2854bf4b16690398824c"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"unicode-ident", "unicode-ident",
] ]
[[package]]
name = "thiserror"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d171f59dbaa811dbbb1aee1e73db92ec2b122911a48e1390dfe327a821ddede"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.66"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b08be0f17bd307950653ce45db00cd31200d82b624b36e181337d9c7d92765b5"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.13" version = "1.0.13"

View File

@@ -1,9 +1,10 @@
[package] [package]
name = "developing-compilers" edition = "2021"
version = "0.1.0" name = "developing-compilers"
edition = "2021" version = "0.1.0"
[dependencies] [dependencies]
anyhow = "1.0.89" anyhow = "1.0.89"
clap = { version = "4.5.19", features = ["derive"] } clap = { version = "4.5.19", features = [ "derive" ] }
itertools = "0.13.0" itertools = "0.13.0"
thiserror = "1.0.66"

View File

@@ -1,4 +1,16 @@
pub mod typed;
pub mod untyped;
use core::fmt; use core::fmt;
use std::str::FromStr;
use typed::{Type, TypedExpr};
pub use untyped::UntypedExpr;
use crate::{
error::{self, SemanticError, SemanticErrorKind},
symbols::SymbolsTable,
};
#[derive(Debug, Copy, Clone)] #[derive(Debug, Copy, Clone)]
pub struct Span { pub struct Span {
@@ -12,39 +24,6 @@ impl Span {
} }
} }
#[derive(Debug)]
pub enum Expr {
Int {
span: Span,
value: i64,
},
Float {
span: Span,
value: f64,
},
Var {
span: Span,
id: usize,
},
BinOp {
span: Span,
lhs: Box<Expr>,
op: BinOp,
rhs: Box<Expr>,
},
}
impl Expr {
pub fn span(&self) -> Span {
match self {
Expr::Float { span, .. } => *span,
Expr::Int { span, .. } => *span,
Expr::Var { span, .. } => *span,
Expr::BinOp { span, .. } => *span,
}
}
}
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub enum BinOp { pub enum BinOp {
Add, Add,
@@ -74,3 +53,78 @@ impl fmt::Display for BinOp {
} }
} }
} }
pub fn to_typed_expr(expr: UntypedExpr, symbols: &SymbolsTable) -> error::Result<TypedExpr> {
let expr = match expr {
UntypedExpr::Int { span, value } => TypedExpr::Int { span, value },
UntypedExpr::Float { span, value } => TypedExpr::Float { span, value },
UntypedExpr::Var {
span,
name,
typename,
} => {
let ty = typename
.and_then(|t| symbols.resolve(t))
.map(Type::from_str)
.transpose()
.map_err(|e| SemanticError::new(span, e))?
.unwrap_or(Type::Int);
TypedExpr::Var { span, name, ty }
}
UntypedExpr::BinOp { span, lhs, op, rhs } => {
let rhs = *rhs;
let lhs = *lhs;
match op {
BinOp::Div
if matches!(rhs, UntypedExpr::Int { .. } | UntypedExpr::Float { .. }) =>
{
match &rhs {
UntypedExpr::Int { span, value } if *value == 0 => {
return Err(SemanticError::new(
*span,
SemanticErrorKind::DivisionByZero,
)
.into())
}
UntypedExpr::Float { span, value } if *value == 0.0 => {
return Err(SemanticError::new(
*span,
SemanticErrorKind::DivisionByZero,
)
.into())
}
_ => {}
}
}
_ => {}
}
let lhs = to_typed_expr(lhs, symbols)?;
let rhs = to_typed_expr(rhs, symbols)?;
let (lhs, rhs) = match (lhs.ty(), rhs.ty()) {
(Type::Int, Type::Int) => (lhs, rhs),
(Type::Float, Type::Float) => (lhs, rhs),
(Type::Int, Type::Float) => {
let lhs = TypedExpr::cast_to_float(lhs);
(lhs, rhs)
}
(Type::Float, Type::Int) => {
let rhs = TypedExpr::cast_to_float(rhs);
(lhs, rhs)
}
};
TypedExpr::BinOp {
span,
lhs: Box::new(lhs),
op,
rhs: Box::new(rhs),
}
}
};
Ok(expr)
}

86
src/ast/typed.rs Normal file
View File

@@ -0,0 +1,86 @@
use std::{fmt, str::FromStr};
use crate::{error::SemanticErrorKind, symbols::Symbol};
use super::{BinOp, Span};
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum Type {
Int,
Float,
}
impl FromStr for Type {
type Err = SemanticErrorKind;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"I" => Ok(Type::Int),
"F" => Ok(Type::Float),
_ => Err(SemanticErrorKind::UnknownType(s.to_string())),
}
}
}
impl fmt::Display for Type {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Type::Int => write!(f, "int"),
Type::Float => write!(f, "float"),
}
}
}
#[derive(Debug)]
pub enum TypedExpr {
Int {
span: Span,
value: i64,
},
Float {
span: Span,
value: f64,
},
Var {
span: Span,
name: Symbol,
ty: Type,
},
BinOp {
span: Span,
lhs: Box<TypedExpr>,
op: BinOp,
rhs: Box<TypedExpr>,
},
IntToFloat {
value: Box<TypedExpr>,
},
}
impl TypedExpr {
pub fn cast_to_float(self) -> TypedExpr {
TypedExpr::IntToFloat {
value: Box::new(self),
}
}
pub fn span(&self) -> Span {
match self {
TypedExpr::Float { span, .. } => *span,
TypedExpr::Int { span, .. } => *span,
TypedExpr::Var { span, .. } => *span,
TypedExpr::BinOp { span, .. } => *span,
TypedExpr::IntToFloat { value } => value.span(),
}
}
pub fn ty(&self) -> Type {
match self {
TypedExpr::Int { .. } => Type::Int,
TypedExpr::Float { .. } => Type::Float,
TypedExpr::Var { ty, .. } => *ty,
TypedExpr::BinOp { rhs, .. } => rhs.ty(),
TypedExpr::IntToFloat { .. } => Type::Float,
}
}
}

37
src/ast/untyped.rs Normal file
View File

@@ -0,0 +1,37 @@
use crate::symbols::Symbol;
use super::{BinOp, Span};
#[derive(Debug)]
pub enum UntypedExpr {
Int {
span: Span,
value: i64,
},
Float {
span: Span,
value: f64,
},
Var {
span: Span,
name: Symbol,
typename: Option<Symbol>,
},
BinOp {
span: Span,
lhs: Box<UntypedExpr>,
op: BinOp,
rhs: Box<UntypedExpr>,
},
}
impl UntypedExpr {
pub fn span(&self) -> Span {
match self {
UntypedExpr::Float { span, .. } => *span,
UntypedExpr::Int { span, .. } => *span,
UntypedExpr::Var { span, .. } => *span,
UntypedExpr::BinOp { span, .. } => *span,
}
}
}

View File

@@ -23,6 +23,10 @@ pub enum Command {
input: PathBuf, input: PathBuf,
output_tree: PathBuf, output_tree: PathBuf,
}, },
Sem {
input: PathBuf,
output_tree: PathBuf,
},
} }
impl Args { impl Args {
@@ -55,6 +59,7 @@ fn validate_inner(args: ArgsInner) -> Result<ArgsInner, clap::Error> {
output_symbols, output_symbols,
} => validate_lex(input, output_tokens, output_symbols)?, } => validate_lex(input, output_tokens, output_symbols)?,
Command::Syn { input, output_tree } => validate_syn(input, output_tree)?, Command::Syn { input, output_tree } => validate_syn(input, output_tree)?,
Command::Sem { input, output_tree } => validate_sem(input, output_tree)?,
}; };
Ok(args) Ok(args)
@@ -106,3 +111,21 @@ fn validate_syn(input: &PathBuf, output_tree: &PathBuf) -> Result<(), clap::Erro
Ok(()) Ok(())
} }
fn validate_sem(input: &PathBuf, output_tree: &PathBuf) -> Result<(), clap::Error> {
if !input.is_file() {
return Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
format!("Input file '{}' does not exist", input.display()),
));
}
if output_tree == input {
return Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
"Input and output files cannot be the same",
));
};
Ok(())
}

202
src/error.rs Normal file
View File

@@ -0,0 +1,202 @@
#![allow(dead_code)]
use std::fmt;
use crate::ast::Span;
pub type Result<T> = std::result::Result<T, Error>;
#[derive(Debug, thiserror::Error)]
pub struct Error {
span: Span,
kind: ErrorKind,
}
#[derive(Debug)]
pub enum ErrorKind {
Lexical(LexicalErrorKind),
Parse(ParseErrorKind),
Semantic(SemanticErrorKind),
}
#[derive(Debug, thiserror::Error)]
pub struct LexicalError {
span: Span,
kind: LexicalErrorKind,
}
#[derive(Debug)]
pub enum LexicalErrorKind {
UnrecognizedToken,
BadNumber(String),
}
#[derive(Debug, thiserror::Error)]
pub struct ParseError {
span: Span,
kind: ParseErrorKind,
}
#[derive(Debug)]
pub enum ParseErrorKind {
ExpectedExpr,
UnexpectedToken,
UnexpectedEOF,
}
#[derive(Debug, thiserror::Error)]
pub struct SemanticError {
span: Span,
kind: SemanticErrorKind,
}
#[derive(Debug)]
pub enum SemanticErrorKind {
UnknownType(String),
DivisionByZero,
}
impl Error {
pub fn new(span: Span, kind: ErrorKind) -> Self {
Self { span, kind }
}
pub fn span(&self) -> Span {
self.span
}
pub fn kind(&self) -> &ErrorKind {
&self.kind
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{} at position {}", self.kind, self.span.start)
}
}
impl fmt::Display for ErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ErrorKind::Lexical(e) => write!(f, "lexical error: {}", e),
ErrorKind::Parse(e) => write!(f, "parse error: {}", e),
ErrorKind::Semantic(e) => write!(f, "semantic error: {}", e),
}
}
}
impl From<LexicalError> for Error {
fn from(e: LexicalError) -> Self {
Self {
span: e.span(),
kind: ErrorKind::Lexical(e.kind),
}
}
}
impl From<ParseError> for Error {
fn from(e: ParseError) -> Self {
Self {
span: e.span(),
kind: ErrorKind::Parse(e.kind),
}
}
}
impl From<SemanticError> for Error {
fn from(e: SemanticError) -> Self {
Self {
span: e.span(),
kind: ErrorKind::Semantic(e.kind),
}
}
}
impl LexicalError {
pub fn new(span: Span, kind: LexicalErrorKind) -> Self {
Self { span, kind }
}
pub fn span(&self) -> Span {
self.span
}
pub fn kind(&self) -> &LexicalErrorKind {
&self.kind
}
}
impl fmt::Display for LexicalError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} at position {}", self.kind, self.span.start)
}
}
impl fmt::Display for LexicalErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LexicalErrorKind::UnrecognizedToken => write!(f, "unrecognized token"),
LexicalErrorKind::BadNumber(s) => write!(f, "bad number '{}'", s),
}
}
}
impl ParseError {
pub fn new(span: Span, kind: ParseErrorKind) -> Self {
Self { span, kind }
}
pub fn span(&self) -> Span {
self.span
}
pub fn kind(&self) -> &ParseErrorKind {
&self.kind
}
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} at position {}", self.kind, self.span.start)
}
}
impl fmt::Display for ParseErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ParseErrorKind::ExpectedExpr => write!(f, "expected expression"),
ParseErrorKind::UnexpectedToken => write!(f, "unexpected token"),
ParseErrorKind::UnexpectedEOF => write!(f, "unexpected end of file"),
}
}
}
impl SemanticError {
pub fn new(span: Span, kind: SemanticErrorKind) -> Self {
Self { span, kind }
}
pub fn span(&self) -> Span {
self.span
}
pub fn kind(&self) -> &SemanticErrorKind {
&self.kind
}
}
impl fmt::Display for SemanticError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} at position {}", self.kind, self.span.start)
}
}
impl fmt::Display for SemanticErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
SemanticErrorKind::UnknownType(s) => write!(f, "unknown type '{}'", s),
SemanticErrorKind::DivisionByZero => write!(f, "division by zero"),
}
}
}

View File

@@ -1,23 +1,33 @@
use std::io::{self, Write};
use ast::Expr;
use parse::parser::Parser;
use symbols::SymbolsTable;
mod ast; mod ast;
mod cli; mod cli;
mod error;
mod parse; mod parse;
mod symbols; mod symbols;
fn write_expr(expr: &Expr, writer: &mut impl Write, prefix: &str, is_last: bool) -> io::Result<()> { use std::io::{self, Write};
use ast::{typed::TypedExpr, UntypedExpr};
use parse::parser::Parser;
use symbols::SymbolsTable;
fn write_typed_expr(
expr: &TypedExpr,
writer: &mut impl Write,
prefix: &str,
is_last: bool,
) -> io::Result<()> {
let branch = if is_last { "└──" } else { "├──" }; let branch = if is_last { "└──" } else { "├──" };
write!(writer, "{}{}", prefix, branch)?;
match expr { match expr {
Expr::Int { value, .. } => writeln!(writer, "{}{}<{}>", prefix, branch, value), TypedExpr::Int { value, .. } => writeln!(writer, "<{}>", value),
Expr::Float { value, .. } => writeln!(writer, "{}{}<{}>", prefix, branch, value), TypedExpr::Float { value, .. } => writeln!(writer, "<{}>", value),
Expr::Var { id, .. } => writeln!(writer, "{}{}<id,{}>", prefix, branch, id), TypedExpr::Var { name: id, ty, .. } => {
Expr::BinOp { lhs, op, rhs, .. } => { writeln!(writer, "<id,{},{}>", id, ty)
writeln!(writer, "{}{}<{}>", prefix, branch, op)?; }
TypedExpr::BinOp { lhs, op, rhs, .. } => {
writeln!(writer, "<{}>", op)?;
let new_prefix = if is_last { let new_prefix = if is_last {
format!("{} ", prefix) format!("{} ", prefix)
@@ -25,14 +35,66 @@ fn write_expr(expr: &Expr, writer: &mut impl Write, prefix: &str, is_last: bool)
format!("{}", prefix) format!("{}", prefix)
}; };
write_expr(lhs, writer, &new_prefix, false)?; write_typed_expr(lhs, writer, &new_prefix, false)?;
write_expr(rhs, writer, &new_prefix, true) write_typed_expr(rhs, writer, &new_prefix, true)
}
TypedExpr::IntToFloat { value, .. } => {
writeln!(writer, "i2f")?;
let new_prefix = if is_last {
format!("{} ", prefix)
} else {
format!("{}", prefix)
};
write_typed_expr(value, writer, &new_prefix, true)
} }
} }
} }
fn print_expr(expr: &Expr, writer: &mut impl Write) -> io::Result<()> { fn print_typed_expr(expr: &TypedExpr, writer: &mut impl Write) -> io::Result<()> {
write_expr(expr, writer, "", true) write_typed_expr(expr, writer, "", true)
}
fn write_untyped_expr(
expr: &UntypedExpr,
writer: &mut impl Write,
prefix: &str,
is_last: bool,
) -> io::Result<()> {
let branch = if is_last { "└──" } else { "├──" };
write!(writer, "{}{}", prefix, branch)?;
match expr {
UntypedExpr::Int { value, .. } => writeln!(writer, "<{}>", value),
UntypedExpr::Float { value, .. } => writeln!(writer, "<{}>", value),
UntypedExpr::Var {
name: id, typename, ..
} => {
write!(writer, "<id,{}", id)?;
if let Some(typename) = typename {
write!(writer, ",{}", typename)?;
}
writeln!(writer, ">")
}
UntypedExpr::BinOp { lhs, op, rhs, .. } => {
writeln!(writer, "<{}>", op)?;
let new_prefix = if is_last {
format!("{} ", prefix)
} else {
format!("{}", prefix)
};
write_untyped_expr(lhs, writer, &new_prefix, false)?;
write_untyped_expr(rhs, writer, &new_prefix, true)
}
}
}
fn print_untyped_expr(expr: &UntypedExpr, writer: &mut impl Write) -> io::Result<()> {
write_untyped_expr(expr, writer, "", true)
} }
fn main() -> anyhow::Result<()> { fn main() -> anyhow::Result<()> {
@@ -80,7 +142,25 @@ fn main() -> anyhow::Result<()> {
match res { match res {
Ok(expr) => { Ok(expr) => {
let mut writer_tree = io::BufWriter::new(std::fs::File::create(output_tree)?); let mut writer_tree = io::BufWriter::new(std::fs::File::create(output_tree)?);
print_expr(&expr, &mut writer_tree)?; print_untyped_expr(&expr, &mut writer_tree)?;
}
Err(e) => eprintln!("error: {}", e),
}
}
cli::Command::Sem { input, output_tree } => {
let input = std::fs::read_to_string(input)?;
let mut symbols = SymbolsTable::default();
let res = {
let tokens = parse::lexer::make_tokenizer(&input, &mut symbols);
let mut parser = Parser::new(tokens);
parser.parse()
}
.and_then(|expr| ast::to_typed_expr(expr, &symbols));
match res {
Ok(expr) => {
let mut writer_tree = io::BufWriter::new(std::fs::File::create(output_tree)?);
print_typed_expr(&expr, &mut writer_tree)?;
} }
Err(e) => eprintln!("error: {}", e), Err(e) => eprintln!("error: {}", e),
} }

View File

@@ -1,91 +0,0 @@
#![allow(dead_code)]
use core::fmt;
use crate::ast::Span;
#[derive(Debug)]
pub struct LexicalError {
span: Span,
kind: LexicalErrorKind,
}
#[derive(Debug)]
pub enum LexicalErrorKind {
UnrecognizedToken,
BadNumber(String),
}
impl LexicalError {
pub fn new(span: Span, kind: LexicalErrorKind) -> Self {
Self { span, kind }
}
pub fn span(&self) -> Span {
self.span
}
pub fn kind(&self) -> &LexicalErrorKind {
&self.kind
}
}
impl fmt::Display for LexicalErrorKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LexicalErrorKind::UnrecognizedToken => write!(f, "unrecognized token"),
LexicalErrorKind::BadNumber(s) => write!(f, "bad number '{}'", s),
}
}
}
impl fmt::Display for LexicalError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} at position {}", self.kind, self.span.start)
}
}
#[derive(Debug)]
pub struct ParseError {
span: Span,
kind: ParseErrorKind,
}
#[derive(Debug)]
pub enum ParseErrorKind {
ExpectedExpr,
UnexpectedToken,
UnexpectedEOF,
Lexical(LexicalError),
}
impl ParseError {
pub fn new(span: Span, kind: ParseErrorKind) -> Self {
Self { span, kind }
}
pub fn span(&self) -> Span {
self.span
}
pub fn kind(&self) -> &ParseErrorKind {
&self.kind
}
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{} at position {}", self.kind, self.span.start)
}
}
impl fmt::Display for ParseErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ParseErrorKind::ExpectedExpr => write!(f, "expected expression"),
ParseErrorKind::UnexpectedToken => write!(f, "unexpected token"),
ParseErrorKind::UnexpectedEOF => write!(f, "unexpected end of file"),
ParseErrorKind::Lexical(e) => write!(f, "lexical error: {}", e.kind()),
}
}
}

View File

@@ -1,12 +1,13 @@
use itertools::PeekNth; use itertools::PeekNth;
use crate::{ast::Span, symbols::SymbolsTable}; use crate::{
ast::Span,
use super::{
error::{LexicalError, LexicalErrorKind}, error::{LexicalError, LexicalErrorKind},
token::Token, symbols::SymbolsTable,
}; };
use super::token::Token;
pub type SpannedToken = (usize, Token, usize); pub type SpannedToken = (usize, Token, usize);
pub type LexerResult = Result<SpannedToken, LexicalError>; pub type LexerResult = Result<SpannedToken, LexicalError>;
@@ -15,19 +16,19 @@ pub struct Lexer<'s, T: Iterator<Item = (usize, char)>> {
chars: PeekNth<T>, chars: PeekNth<T>,
pos: usize, pos: usize,
pending: Vec<SpannedToken>, pending: Vec<SpannedToken>,
symbols: &'s mut SymbolsTable<String>, symbols: &'s mut SymbolsTable,
} }
pub fn make_tokenizer<'s>( pub fn make_tokenizer<'s>(
input: &'s str, input: &'s str,
symbols: &'s mut SymbolsTable<String>, symbols: &'s mut SymbolsTable,
) -> impl Iterator<Item = LexerResult> + 's { ) -> impl Iterator<Item = LexerResult> + 's {
let chars = input.char_indices(); let chars = input.char_indices();
Lexer::new(chars, symbols) Lexer::new(chars, symbols)
} }
impl<'s, T: Iterator<Item = (usize, char)>> Lexer<'s, T> { impl<'s, T: Iterator<Item = (usize, char)>> Lexer<'s, T> {
pub fn new(chars: T, symbols: &'s mut SymbolsTable<String>) -> Self { pub fn new(chars: T, symbols: &'s mut SymbolsTable) -> Self {
Self { Self {
chars: itertools::peek_nth(chars), chars: itertools::peek_nth(chars),
pos: 0, pos: 0,
@@ -133,6 +134,8 @@ impl<'s, T: Iterator<Item = (usize, char)>> Lexer<'s, T> {
'/' => self.eat_next_char(Token::Slash), '/' => self.eat_next_char(Token::Slash),
'(' => self.eat_next_char(Token::LParen), '(' => self.eat_next_char(Token::LParen),
')' => self.eat_next_char(Token::RParen), ')' => self.eat_next_char(Token::RParen),
'[' => self.eat_next_char(Token::LBracket),
']' => self.eat_next_char(Token::RBracket),
c if c.is_whitespace() => { c if c.is_whitespace() => {
let _start = self.get_pos(); let _start = self.get_pos();
let _c = self.next_char(); let _c = self.next_char();

View File

@@ -1,4 +1,3 @@
pub mod lexer; pub mod lexer;
pub mod parser;
pub mod token; pub mod token;
pub mod error;
pub mod parser;

View File

@@ -1,12 +1,11 @@
use itertools::PeekNth; use itertools::PeekNth;
use crate::{ use crate::{
ast::{Expr, Span}, ast::{Span, UntypedExpr},
parse::error::ParseErrorKind, error::{self, ParseError, ParseErrorKind},
}; };
use super::{ use super::{
error::ParseError,
lexer::{LexerResult, SpannedToken}, lexer::{LexerResult, SpannedToken},
token::Token, token::Token,
}; };
@@ -28,26 +27,30 @@ where
} }
} }
pub fn parse(&mut self) -> Result<Expr, ParseError> { pub fn parse(&mut self) -> error::Result<UntypedExpr> {
let expr = self.parse_expr()?; let expr = self.parse_expr()?;
if self.has_next() { if self.has_next() {
let (start, _, end) = self.next_token()?; let (start, _, end) = self.next_token()?;
return Err(ParseError::new(
Span::new(start, end), return Err(
ParseErrorKind::UnexpectedToken, ParseError::new(Span::new(start, end), ParseErrorKind::UnexpectedToken).into(),
)); );
} }
Ok(expr) Ok(expr)
} }
fn parse_expr(&mut self) -> Result<Expr, ParseError> { fn parse_expr(&mut self) -> error::Result<UntypedExpr> {
let lhs = self.parse_primary_expr()?; let lhs = self.parse_primary_expr()?;
self.parse_expr_inner(lhs, 0) self.parse_expr_inner(lhs, 0)
} }
fn parse_expr_inner(&mut self, lhs: Expr, min_precedence: u8) -> Result<Expr, ParseError> { fn parse_expr_inner(
&mut self,
lhs: UntypedExpr,
min_precedence: u8,
) -> error::Result<UntypedExpr> {
let mut lhs = lhs; let mut lhs = lhs;
let mut op = self.peek_token_nth(0).and_then(|token| token.as_bin_op()); let mut op = self.peek_token_nth(0).and_then(|token| token.as_bin_op());
@@ -70,7 +73,7 @@ where
op = self.peek_token_nth(0).and_then(|token| token.as_bin_op()); op = self.peek_token_nth(0).and_then(|token| token.as_bin_op());
} }
lhs = Expr::BinOp { lhs = UntypedExpr::BinOp {
span: Span::new(lhs.span().start, rhs.span().end), span: Span::new(lhs.span().start, rhs.span().end),
lhs: Box::new(lhs), lhs: Box::new(lhs),
op: op1, op: op1,
@@ -81,22 +84,42 @@ where
Ok(lhs) Ok(lhs)
} }
fn parse_primary_expr(&mut self) -> Result<Expr, ParseError> { fn parse_primary_expr(&mut self) -> error::Result<UntypedExpr> {
let token = self.next_token()?; let token = self.next_token()?;
match token { match token {
(start, Token::Float(value), end) => Ok(Expr::Float { (start, Token::Float(value), end) => Ok(UntypedExpr::Float {
span: Span::new(start, end), span: Span::new(start, end),
value, value,
}), }),
(start, Token::Int(value), end) => Ok(Expr::Int { (start, Token::Int(value), end) => Ok(UntypedExpr::Int {
span: Span::new(start, end), span: Span::new(start, end),
value, value,
}), }),
(start, Token::Name(id), end) => Ok(Expr::Var { (start, Token::Name(name), end) => {
span: Span::new(start, end), let typename = match self.peek_token_nth(0) {
id, Some(Token::LBracket) => {
}), let _ = self.next_token()?;
let typename =
self.expect_token_predicate(|t| matches!(t, Token::Name(_)))?;
let _ = self.expect_token(Token::RBracket)?;
let typename = match typename.1 {
Token::Name(id) => id,
_ => unreachable!(),
};
Some(typename)
}
_ => None,
};
Ok(UntypedExpr::Var {
span: Span::new(start, end),
name,
typename,
})
}
(_, Token::LParen, _) => { (_, Token::LParen, _) => {
let expr = self.parse_expr()?; let expr = self.parse_expr()?;
let _ = self.expect_token(Token::RParen)?; let _ = self.expect_token(Token::RParen)?;
@@ -106,22 +129,19 @@ where
(start, _, end) => Err(ParseError::new( (start, _, end) => Err(ParseError::new(
Span::new(start, end), Span::new(start, end),
ParseErrorKind::ExpectedExpr, ParseErrorKind::ExpectedExpr,
)), ))?,
} }
} }
fn next_token(&mut self) -> Result<SpannedToken, ParseError> { fn next_token(&mut self) -> error::Result<SpannedToken> {
let token = self.tokens.next(); let token = self.tokens.next();
match token { match token {
Some(Ok((start, token, end))) => { Some(Ok((start, token, end))) => {
self.last_span = Span::new(start, end); self.last_span = Span::new(start, end);
Ok((start, token, end)) Ok((start, token, end))
} }
Some(Err(e)) => Err(ParseError::new(e.span(), ParseErrorKind::Lexical(e))), Some(Err(e)) => Err(e.into()),
None => Err(ParseError::new( None => Err(ParseError::new(self.last_span, ParseErrorKind::UnexpectedEOF).into()),
self.last_span,
ParseErrorKind::UnexpectedEOF,
)),
} }
} }
@@ -136,14 +156,26 @@ where
self.tokens.peek().is_some() self.tokens.peek().is_some()
} }
fn expect_token(&mut self, token: Token) -> Result<(usize, usize), ParseError> { fn expect_token(&mut self, token: Token) -> error::Result<(usize, usize)> {
let t = self.next_token()?; let t = self.next_token()?;
match t { match t {
(start, t, end) if t == token => Ok((start, end)), (start, t, end) if t == token => Ok((start, end)),
(start, _, end) => Err(ParseError::new( (start, _, end) => {
Span::new(start, end), Err(ParseError::new(Span::new(start, end), ParseErrorKind::UnexpectedToken).into())
ParseErrorKind::UnexpectedToken, }
)), }
}
fn expect_token_predicate(
&mut self,
pred: impl Fn(&Token) -> bool,
) -> error::Result<SpannedToken> {
let t = self.next_token()?;
match t {
(start, t, end) if pred(&t) => Ok((start, t, end)),
(start, _, end) => {
Err(ParseError::new(Span::new(start, end), ParseErrorKind::UnexpectedToken).into())
}
} }
} }
} }

View File

@@ -1,10 +1,10 @@
use std::fmt; use std::fmt;
use crate::ast::BinOp; use crate::{ast::BinOp, symbols::Symbol};
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Token { pub enum Token {
Name(usize), Name(Symbol),
Float(f64), Float(f64),
Int(i64), Int(i64),
// operators // operators
@@ -13,8 +13,10 @@ pub enum Token {
Star, // * Star, // *
Slash, // / Slash, // /
// punctuation // punctuation
LParen, // ( LParen, // (
RParen, // ) RParen, // )
LBracket, // [
RBracket, // ]
// extra // extra
// NewLine, // new line // NewLine, // new line
EndOfFile, // end of file EndOfFile, // end of file
@@ -32,6 +34,8 @@ impl Token {
Token::Slash => "div", Token::Slash => "div",
Token::LParen => "lparen", Token::LParen => "lparen",
Token::RParen => "rparen", Token::RParen => "rparen",
Token::LBracket => "lbracket",
Token::RBracket => "rbracket",
// Token::NewLine => "new line", // Token::NewLine => "new line",
Token::EndOfFile => "end of file", Token::EndOfFile => "end of file",
} }
@@ -60,6 +64,8 @@ impl fmt::Display for Token {
Token::Slash => write!(f, "</>"), Token::Slash => write!(f, "</>"),
Token::LParen => write!(f, "<(>"), Token::LParen => write!(f, "<(>"),
Token::RParen => write!(f, "<)>"), Token::RParen => write!(f, "<)>"),
Token::LBracket => write!(f, "<[>"),
Token::RBracket => write!(f, "<]>"),
// Token::NewLine => write!(f, "<new line>"), // Token::NewLine => write!(f, "<new line>"),
Token::EndOfFile => write!(f, "<end of file>"), Token::EndOfFile => write!(f, "<end of file>"),
} }

0
src/symbols/builtin.rs Normal file
View File

View File

@@ -1,12 +1,26 @@
use std::collections::{hash_map, HashMap}; pub mod builtin;
use std::{
collections::{hash_map, HashMap},
fmt::Display,
};
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
pub struct Symbol(usize);
impl Display for Symbol {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
#[derive(Debug)] #[derive(Debug)]
pub struct SymbolsTable<S: std::hash::Hash + Eq> { pub struct SymbolsTable {
symbols: HashMap<S, usize>, symbols: HashMap<String, Symbol>,
next_id: usize, next_id: usize,
} }
impl<S: std::hash::Hash + Eq> SymbolsTable<S> { impl SymbolsTable {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
symbols: HashMap::new(), symbols: HashMap::new(),
@@ -14,36 +28,46 @@ impl<S: std::hash::Hash + Eq> SymbolsTable<S> {
} }
} }
pub fn add(&mut self, symbol: S) { pub fn add(&mut self, symbol: impl Into<String>) {
if let hash_map::Entry::Vacant(e) = self.symbols.entry(symbol) { if let hash_map::Entry::Vacant(e) = self.symbols.entry(symbol.into()) {
e.insert(self.next_id); e.insert(Symbol(self.next_id));
self.next_id += 1; self.next_id += 1;
} }
} }
pub fn get(&self, symbol: &S) -> Option<usize> { pub fn get(&self, symbol: &str) -> Option<Symbol> {
self.symbols.get(symbol).copied() self.symbols.get(symbol).copied()
} }
pub fn resolve(&self, symbol: Symbol) -> Option<&str> {
self.symbols.iter().find_map(|(name, id)| {
if *id == symbol {
Some(name.as_str())
} else {
None
}
})
}
} }
impl<S: std::hash::Hash + Eq> Default for SymbolsTable<S> { impl Default for SymbolsTable {
fn default() -> Self { fn default() -> Self {
Self::new() Self::new()
} }
} }
impl<S: std::hash::Hash + Eq> IntoIterator for SymbolsTable<S> { impl IntoIterator for SymbolsTable {
type Item = (S, usize); type Item = (String, Symbol);
type IntoIter = hash_map::IntoIter<S, usize>; type IntoIter = hash_map::IntoIter<String, Symbol>;
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> Self::IntoIter {
self.symbols.into_iter() self.symbols.into_iter()
} }
} }
impl<'a, S: std::hash::Hash + Eq> IntoIterator for &'a SymbolsTable<S> { impl<'a> IntoIterator for &'a SymbolsTable {
type Item = (&'a S, &'a usize); type Item = (&'a String, &'a Symbol);
type IntoIter = hash_map::Iter<'a, S, usize>; type IntoIter = hash_map::Iter<'a, String, Symbol>;
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> Self::IntoIter {
self.symbols.iter() self.symbols.iter()