1
0
This commit is contained in:
2024-10-04 16:17:45 +03:00
parent 532a0560f2
commit 4827bdb4c0
4 changed files with 54 additions and 45 deletions

View File

@@ -1,7 +1,6 @@
use std::io; use std::io;
use std::io::Write; use std::io::Write;
use parse::token::Token;
use symbols::SymbolsTable; use symbols::SymbolsTable;
mod cli; mod cli;
@@ -12,20 +11,14 @@ fn main() -> anyhow::Result<()> {
let args = cli::Args::parse(); let args = cli::Args::parse();
let input = std::fs::read_to_string(&args.input).unwrap(); let input = std::fs::read_to_string(&args.input).unwrap();
let tokens = parse::lexer::make_tokenizer(&input).collect::<Result<Vec<_>, _>>(); let mut symbols = SymbolsTable::default();
let tokens = parse::lexer::make_tokenizer(&input, &mut symbols).collect::<Result<Vec<_>, _>>();
match tokens { match tokens {
Ok(tokens) => { Ok(tokens) => {
let symbols = SymbolsTable::from(tokens.iter().map(|(_, token, _)| token));
let mut writer_tokens = io::BufWriter::new(std::fs::File::create(&args.output_tokens)?); let mut writer_tokens = io::BufWriter::new(std::fs::File::create(&args.output_tokens)?);
for (_, token, _) in tokens { for (_, token, _) in tokens {
match token { writeln!(writer_tokens, "{token:>6} - {}", token.as_str())?;
Token::Name(ref name) => match symbols.get(name) {
Some(id) => writeln!(writer_tokens, "{token} ({id})")?,
None => writeln!(writer_tokens, "{token} (undefined)")?,
},
_ => writeln!(writer_tokens, "{token}")?,
}
} }
let mut writer_symbols = let mut writer_symbols =

View File

@@ -1,28 +1,35 @@
use itertools::PeekNth; use itertools::PeekNth;
use crate::symbols::SymbolsTable;
use super::{error::LexicalError, token::Token}; use super::{error::LexicalError, token::Token};
pub type SpannedToken = (usize, Token, usize); pub type SpannedToken = (usize, Token, usize);
pub type LexerResult = Result<SpannedToken, LexicalError>; pub type LexerResult = Result<SpannedToken, LexicalError>;
#[derive(Debug)] #[derive(Debug)]
pub struct Lexer<T: Iterator<Item = (usize, char)>> { pub struct Lexer<'s, T: Iterator<Item = (usize, char)>> {
chars: PeekNth<T>, chars: PeekNth<T>,
pos: usize, pos: usize,
pending: Vec<SpannedToken>, pending: Vec<SpannedToken>,
symbols: &'s mut SymbolsTable<String>,
} }
pub fn make_tokenizer(input: &str) -> impl Iterator<Item = LexerResult> + '_ { pub fn make_tokenizer<'s>(
input: &'s str,
symbols: &'s mut SymbolsTable<String>,
) -> impl Iterator<Item = LexerResult> + 's {
let chars = input.char_indices(); let chars = input.char_indices();
Lexer::new(chars) Lexer::new(chars, symbols)
} }
impl<T: Iterator<Item = (usize, char)>> Lexer<T> { impl<'s, T: Iterator<Item = (usize, char)>> Lexer<'s, T> {
pub fn new(chars: T) -> Self { pub fn new(chars: T, symbols: &'s mut SymbolsTable<String>) -> Self {
Self { Self {
chars: itertools::peek_nth(chars), chars: itertools::peek_nth(chars),
pos: 0, pos: 0,
pending: vec![], pending: vec![],
symbols,
} }
} }
@@ -68,7 +75,13 @@ impl<T: Iterator<Item = (usize, char)>> Lexer<T> {
let end = self.get_pos(); let end = self.get_pos();
Ok((start, Token::Name(name), end)) if let Some(id) = self.symbols.get(&name) {
Ok((start, Token::Name(id), end))
} else {
self.symbols.add(name.clone());
let id = self.symbols.get(&name).unwrap();
Ok((start, Token::Name(id), end))
}
} }
fn lex_number(&mut self) -> LexerResult { fn lex_number(&mut self) -> LexerResult {
@@ -137,8 +150,8 @@ impl<T: Iterator<Item = (usize, char)>> Lexer<T> {
self.emit((start, token, end)); self.emit((start, token, end));
} }
fn is_number_start(&self, c: char, c1: Option<char>) -> bool { fn is_number_start(&self, c: char, _c1: Option<char>) -> bool {
c.is_ascii_digit() || (c == '.' && c1.map_or(false, |c| c.is_ascii_digit())) c.is_ascii_digit()
} }
fn is_digit(&self, c: char) -> bool { fn is_digit(&self, c: char) -> bool {
@@ -173,7 +186,7 @@ impl<T: Iterator<Item = (usize, char)>> Lexer<T> {
} }
} }
impl<T: Iterator<Item = (usize, char)>> Iterator for Lexer<T> { impl<'s, T: Iterator<Item = (usize, char)>> Iterator for Lexer<'s, T> {
type Item = LexerResult; type Item = LexerResult;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {

View File

@@ -2,7 +2,7 @@ use std::fmt;
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum Token { pub enum Token {
Name(String), Name(usize),
Float(f64), Float(f64),
Int(i64), Int(i64),
// operators // operators
@@ -18,18 +18,36 @@ pub enum Token {
EndOfFile, // end of file EndOfFile, // end of file
} }
impl Token {
pub fn as_str(&self) -> &str {
match self {
Token::Name(_) => "name",
Token::Float(_) => "float",
Token::Int(_) => "int",
Token::Plus => "plus",
Token::Minus => "minus",
Token::Star => "mul",
Token::Slash => "div",
Token::LParen => "lparen",
Token::RParen => "rparen",
Token::NewLine => "new line",
Token::EndOfFile => "end of file",
}
}
}
impl fmt::Display for Token { impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self { match self {
Token::Name(name) => write!(f, "{name} - name"), Token::Name(id) => write!(f, "<id,{id}>"),
Token::Float(number) => write!(f, "{number} - float"), Token::Float(number) => write!(f, "<{number}>"),
Token::Int(number) => write!(f, "{number} - int"), Token::Int(number) => write!(f, "<{number}>"),
Token::Plus => write!(f, "+"), Token::Plus => write!(f, "<+>"),
Token::Minus => write!(f, "-"), Token::Minus => write!(f, "<->"),
Token::Star => write!(f, "*"), Token::Star => write!(f, "<*>"),
Token::Slash => write!(f, "/"), Token::Slash => write!(f, "</>"),
Token::LParen => write!(f, "("), Token::LParen => write!(f, "<(>"),
Token::RParen => write!(f, ")"), Token::RParen => write!(f, "<)>"),
Token::NewLine => write!(f, "<new line>"), Token::NewLine => write!(f, "<new line>"),
Token::EndOfFile => write!(f, "<end of file>"), Token::EndOfFile => write!(f, "<end of file>"),
} }

View File

@@ -1,7 +1,6 @@
use std::collections::{hash_map, HashMap}; use std::collections::{hash_map, HashMap};
use crate::parse::token::Token; #[derive(Debug)]
pub struct SymbolsTable<S: std::hash::Hash + Eq> { pub struct SymbolsTable<S: std::hash::Hash + Eq> {
symbols: HashMap<S, usize>, symbols: HashMap<S, usize>,
next_id: usize, next_id: usize,
@@ -50,17 +49,3 @@ impl<'a, S: std::hash::Hash + Eq> IntoIterator for &'a SymbolsTable<S> {
self.symbols.iter() self.symbols.iter()
} }
} }
impl<'a, I: Iterator<Item = &'a Token>> From<I> for SymbolsTable<String> {
fn from(value: I) -> Self {
let mut symbols = Self::new();
for token in value {
if let Token::Name(name) = token {
symbols.add(name.clone())
}
}
symbols
}
}