1
0
This commit is contained in:
2024-10-04 02:14:52 +03:00
parent 1f0729ca81
commit 532a0560f2
10 changed files with 679 additions and 2 deletions

2
.gitignore vendored
View File

@@ -1 +1,3 @@
/target
/test
/.vscode

260
Cargo.lock generated Normal file
View File

@@ -0,0 +1,260 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "anstream"
version = "0.6.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
[[package]]
name = "anstyle-parse"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "anyhow"
version = "1.0.89"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6"
[[package]]
name = "clap"
version = "4.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7be5744db7978a28d9df86a214130d106a89ce49644cbc4e3f0c22c3fba30615"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a5fbc17d3ef8278f55b282b2a2e75ae6f6c7d4bb70ed3d0382375104bfafdb4b"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
[[package]]
name = "colorchoice"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
[[package]]
name = "developing-compilers"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"itertools",
]
[[package]]
name = "either"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
name = "proc-macro2"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
dependencies = [
"proc-macro2",
]
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.79"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

View File

@@ -4,3 +4,6 @@ version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1.0.89"
clap = { version = "4.5.19", features = ["derive"] }
itertools = "0.13.0"

61
src/cli.rs Normal file
View File

@@ -0,0 +1,61 @@
use std::{ops::Deref, path::PathBuf};
use clap::{CommandFactory, Parser};
pub struct Args {
inner: ArgsInner,
}
#[derive(Parser)]
pub struct ArgsInner {
pub input: PathBuf,
pub output_tokens: PathBuf,
pub output_symbols: PathBuf,
}
impl Args {
pub fn parse() -> Self {
let inner = match validate_inner(ArgsInner::parse()) {
Ok(args) => args,
Err(err) => {
let mut command = ArgsInner::command();
err.format(&mut command).exit();
}
};
Self { inner }
}
}
impl Deref for Args {
type Target = ArgsInner;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
fn validate_inner(args: ArgsInner) -> Result<ArgsInner, clap::Error> {
if !args.input.is_file() {
return Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
format!("Input file '{}' does not exist", args.input.display()),
));
}
if args.input == args.output_tokens {
return Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
"Input and output files cannot be the same",
));
}
if args.input == args.output_symbols {
return Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
"Input and output files cannot be the same",
));
}
Ok(args)
}

View File

@@ -1,3 +1,43 @@
fn main() {
println!("Hello, world!");
use std::io;
use std::io::Write;
use parse::token::Token;
use symbols::SymbolsTable;
mod cli;
mod parse;
mod symbols;
fn main() -> anyhow::Result<()> {
let args = cli::Args::parse();
let input = std::fs::read_to_string(&args.input).unwrap();
let tokens = parse::lexer::make_tokenizer(&input).collect::<Result<Vec<_>, _>>();
match tokens {
Ok(tokens) => {
let symbols = SymbolsTable::from(tokens.iter().map(|(_, token, _)| token));
let mut writer_tokens = io::BufWriter::new(std::fs::File::create(&args.output_tokens)?);
for (_, token, _) in tokens {
match token {
Token::Name(ref name) => match symbols.get(name) {
Some(id) => writeln!(writer_tokens, "{token} ({id})")?,
None => writeln!(writer_tokens, "{token} (undefined)")?,
},
_ => writeln!(writer_tokens, "{token}")?,
}
}
let mut writer_symbols =
io::BufWriter::new(std::fs::File::create(&args.output_symbols)?);
for (name, id) in &symbols {
writeln!(writer_symbols, "{name} -> {id}")?;
}
}
Err(err) => {
eprintln!("error: {err}");
}
}
Ok(())
}

20
src/parse/error.rs Normal file
View File

@@ -0,0 +1,20 @@
use core::fmt;
#[derive(Debug)]
pub enum LexicalError {
UnrecognizedToken(usize, char),
BadNumber(usize, String),
}
impl fmt::Display for LexicalError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
LexicalError::UnrecognizedToken(pos, c) => {
write!(f, "unrecognized token '{c}' at position {pos}")
}
LexicalError::BadNumber(pos, number) => {
write!(f, "bad number '{number}' at position {pos}")
}
}
}
}

185
src/parse/lexer.rs Normal file
View File

@@ -0,0 +1,185 @@
use itertools::PeekNth;
use super::{error::LexicalError, token::Token};
pub type SpannedToken = (usize, Token, usize);
pub type LexerResult = Result<SpannedToken, LexicalError>;
#[derive(Debug)]
pub struct Lexer<T: Iterator<Item = (usize, char)>> {
chars: PeekNth<T>,
pos: usize,
pending: Vec<SpannedToken>,
}
pub fn make_tokenizer(input: &str) -> impl Iterator<Item = LexerResult> + '_ {
let chars = input.char_indices();
Lexer::new(chars)
}
impl<T: Iterator<Item = (usize, char)>> Lexer<T> {
pub fn new(chars: T) -> Self {
Self {
chars: itertools::peek_nth(chars),
pos: 0,
pending: vec![],
}
}
fn next_token(&mut self) -> LexerResult {
while self.pending.is_empty() {
self.consume_token()?;
}
Ok(self.pending.remove(0))
}
fn consume_token(&mut self) -> Result<(), LexicalError> {
if let Some(c) = self.peek_char_nth(0) {
let c1 = self.peek_char_nth(1);
if self.is_name_start(c) {
let name = self.lex_name()?;
self.emit(name);
} else if self.is_number_start(c, c1) {
let number = self.lex_number()?;
self.emit(number);
} else {
self.consume_char(c)?;
}
} else {
let pos = self.get_pos();
self.emit((pos, Token::EndOfFile, pos));
}
Ok(())
}
fn lex_name(&mut self) -> LexerResult {
let mut name = String::new();
let start = self.get_pos();
while let Some(c) = self.peek_char_nth(0) {
if self.is_name_continue(c) {
name.push(self.next_char().expect("lex_name: no more characters"));
} else {
break;
}
}
let end = self.get_pos();
Ok((start, Token::Name(name), end))
}
fn lex_number(&mut self) -> LexerResult {
let mut number = String::new();
let start = self.get_pos();
let mut passed_dot = false;
while let Some(c) = self.peek_char_nth(0) {
if self.is_digit(c) {
number.push(self.next_char().expect("lex_number: no more characters"));
} else if !passed_dot && c == '.' {
passed_dot = true;
number.push(self.next_char().expect("lex_number: no more characters"));
} else {
break;
}
}
let end = self.get_pos();
Ok((
start,
match number.parse::<i64>() {
Ok(n) => Token::Int(n),
Err(_) => match number.parse::<f64>() {
Ok(n) => Token::Float(n),
Err(_) => return Err(LexicalError::BadNumber(start, number)),
},
},
end,
))
}
fn consume_char(&mut self, c: char) -> Result<(), LexicalError> {
match c {
'+' => self.eat_next_char(Token::Plus),
'-' => self.eat_next_char(Token::Minus),
'*' => self.eat_next_char(Token::Star),
'/' => self.eat_next_char(Token::Slash),
'(' => self.eat_next_char(Token::LParen),
')' => self.eat_next_char(Token::RParen),
c if c.is_whitespace() => {
let start = self.get_pos();
let _ = self.next_char();
let end = self.get_pos();
if c == '\n' {
self.emit((start, Token::NewLine, end));
}
}
_ => {
let pos = self.get_pos();
let _ = self.next_char();
return Err(LexicalError::UnrecognizedToken(pos, c));
}
}
Ok(())
}
fn eat_next_char(&mut self, token: Token) {
let start = self.get_pos();
let _ = self.next_char().expect("eat_next_char: no more characters");
let end = self.get_pos();
self.emit((start, token, end));
}
fn is_number_start(&self, c: char, c1: Option<char>) -> bool {
c.is_ascii_digit() || (c == '.' && c1.map_or(false, |c| c.is_ascii_digit()))
}
fn is_digit(&self, c: char) -> bool {
c.is_ascii_digit()
}
fn is_name_start(&self, c: char) -> bool {
c.is_alphabetic() || c == '_'
}
fn is_name_continue(&self, c: char) -> bool {
c.is_alphanumeric() || c == '_'
}
fn next_char(&mut self) -> Option<char> {
let (pos, char) = self.chars.next()?;
self.pos = pos + 1;
Some(char)
}
fn emit(&mut self, token: SpannedToken) {
self.pending.push(token);
}
fn get_pos(&self) -> usize {
self.pos
}
fn peek_char_nth(&mut self, n: usize) -> Option<char> {
self.chars.peek_nth(n).map(|(_, c)| *c)
}
}
impl<T: Iterator<Item = (usize, char)>> Iterator for Lexer<T> {
type Item = LexerResult;
fn next(&mut self) -> Option<Self::Item> {
match self.next_token() {
Ok((_, Token::EndOfFile, _)) => None,
r => Some(r),
}
}
}

3
src/parse/mod.rs Normal file
View File

@@ -0,0 +1,3 @@
pub mod lexer;
pub mod token;
pub mod error;

37
src/parse/token.rs Normal file
View File

@@ -0,0 +1,37 @@
use std::fmt;
#[derive(Debug, Clone, PartialEq)]
pub enum Token {
Name(String),
Float(f64),
Int(i64),
// operators
Plus, // +
Minus, // -
Star, // *
Slash, // /
// punctuation
LParen, // (
RParen, // )
// extra
NewLine, // new line
EndOfFile, // end of file
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Token::Name(name) => write!(f, "{name} - name"),
Token::Float(number) => write!(f, "{number} - float"),
Token::Int(number) => write!(f, "{number} - int"),
Token::Plus => write!(f, "+"),
Token::Minus => write!(f, "-"),
Token::Star => write!(f, "*"),
Token::Slash => write!(f, "/"),
Token::LParen => write!(f, "("),
Token::RParen => write!(f, ")"),
Token::NewLine => write!(f, "<new line>"),
Token::EndOfFile => write!(f, "<end of file>"),
}
}
}

66
src/symbols/mod.rs Normal file
View File

@@ -0,0 +1,66 @@
use std::collections::{hash_map, HashMap};
use crate::parse::token::Token;
pub struct SymbolsTable<S: std::hash::Hash + Eq> {
symbols: HashMap<S, usize>,
next_id: usize,
}
impl<S: std::hash::Hash + Eq> SymbolsTable<S> {
pub fn new() -> Self {
Self {
symbols: HashMap::new(),
next_id: 0,
}
}
pub fn add(&mut self, symbol: S) {
if let hash_map::Entry::Vacant(e) = self.symbols.entry(symbol) {
e.insert(self.next_id);
self.next_id += 1;
}
}
pub fn get(&self, symbol: &S) -> Option<usize> {
self.symbols.get(symbol).copied()
}
}
impl<S: std::hash::Hash + Eq> Default for SymbolsTable<S> {
fn default() -> Self {
Self::new()
}
}
impl<S: std::hash::Hash + Eq> IntoIterator for SymbolsTable<S> {
type Item = (S, usize);
type IntoIter = hash_map::IntoIter<S, usize>;
fn into_iter(self) -> Self::IntoIter {
self.symbols.into_iter()
}
}
impl<'a, S: std::hash::Hash + Eq> IntoIterator for &'a SymbolsTable<S> {
type Item = (&'a S, &'a usize);
type IntoIter = hash_map::Iter<'a, S, usize>;
fn into_iter(self) -> Self::IntoIter {
self.symbols.iter()
}
}
impl<'a, I: Iterator<Item = &'a Token>> From<I> for SymbolsTable<String> {
fn from(value: I) -> Self {
let mut symbols = Self::new();
for token in value {
if let Token::Name(name) = token {
symbols.add(name.clone())
}
}
symbols
}
}