lab2
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1 +1,3 @@
|
|||||||
/target
|
/target
|
||||||
|
/test
|
||||||
|
/.vscode
|
||||||
260
Cargo.lock
generated
Normal file
260
Cargo.lock
generated
Normal file
@@ -0,0 +1,260 @@
|
|||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstream"
|
||||||
|
version = "0.6.15"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"anstyle-parse",
|
||||||
|
"anstyle-query",
|
||||||
|
"anstyle-wincon",
|
||||||
|
"colorchoice",
|
||||||
|
"is_terminal_polyfill",
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle"
|
||||||
|
version = "1.0.8"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-parse"
|
||||||
|
version = "0.2.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb"
|
||||||
|
dependencies = [
|
||||||
|
"utf8parse",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-query"
|
||||||
|
version = "1.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a"
|
||||||
|
dependencies = [
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anstyle-wincon"
|
||||||
|
version = "3.0.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8"
|
||||||
|
dependencies = [
|
||||||
|
"anstyle",
|
||||||
|
"windows-sys",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "anyhow"
|
||||||
|
version = "1.0.89"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap"
|
||||||
|
version = "4.5.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7be5744db7978a28d9df86a214130d106a89ce49644cbc4e3f0c22c3fba30615"
|
||||||
|
dependencies = [
|
||||||
|
"clap_builder",
|
||||||
|
"clap_derive",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_builder"
|
||||||
|
version = "4.5.19"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "a5fbc17d3ef8278f55b282b2a2e75ae6f6c7d4bb70ed3d0382375104bfafdb4b"
|
||||||
|
dependencies = [
|
||||||
|
"anstream",
|
||||||
|
"anstyle",
|
||||||
|
"clap_lex",
|
||||||
|
"strsim",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_derive"
|
||||||
|
version = "4.5.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "4ac6a0c7b1a9e9a5186361f67dfa1b88213572f427fb9ab038efb2bd8c582dab"
|
||||||
|
dependencies = [
|
||||||
|
"heck",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "clap_lex"
|
||||||
|
version = "0.7.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "colorchoice"
|
||||||
|
version = "1.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "developing-compilers"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"clap",
|
||||||
|
"itertools",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "either"
|
||||||
|
version = "1.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "heck"
|
||||||
|
version = "0.5.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "is_terminal_polyfill"
|
||||||
|
version = "1.70.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "itertools"
|
||||||
|
version = "0.13.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
|
||||||
|
dependencies = [
|
||||||
|
"either",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "proc-macro2"
|
||||||
|
version = "1.0.86"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
|
||||||
|
dependencies = [
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "quote"
|
||||||
|
version = "1.0.37"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "strsim"
|
||||||
|
version = "0.11.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "syn"
|
||||||
|
version = "2.0.79"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"unicode-ident",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unicode-ident"
|
||||||
|
version = "1.0.13"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf8parse"
|
||||||
|
version = "0.2.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-sys"
|
||||||
|
version = "0.52.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
|
||||||
|
dependencies = [
|
||||||
|
"windows-targets",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows-targets"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||||
|
dependencies = [
|
||||||
|
"windows_aarch64_gnullvm",
|
||||||
|
"windows_aarch64_msvc",
|
||||||
|
"windows_i686_gnu",
|
||||||
|
"windows_i686_gnullvm",
|
||||||
|
"windows_i686_msvc",
|
||||||
|
"windows_x86_64_gnu",
|
||||||
|
"windows_x86_64_gnullvm",
|
||||||
|
"windows_x86_64_msvc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_aarch64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_i686_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnu"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_gnullvm"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "windows_x86_64_msvc"
|
||||||
|
version = "0.52.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||||
@@ -4,3 +4,6 @@ version = "0.1.0"
|
|||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
anyhow = "1.0.89"
|
||||||
|
clap = { version = "4.5.19", features = ["derive"] }
|
||||||
|
itertools = "0.13.0"
|
||||||
|
|||||||
61
src/cli.rs
Normal file
61
src/cli.rs
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
use std::{ops::Deref, path::PathBuf};
|
||||||
|
|
||||||
|
use clap::{CommandFactory, Parser};
|
||||||
|
|
||||||
|
pub struct Args {
|
||||||
|
inner: ArgsInner,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Parser)]
|
||||||
|
pub struct ArgsInner {
|
||||||
|
pub input: PathBuf,
|
||||||
|
pub output_tokens: PathBuf,
|
||||||
|
pub output_symbols: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Args {
|
||||||
|
pub fn parse() -> Self {
|
||||||
|
let inner = match validate_inner(ArgsInner::parse()) {
|
||||||
|
Ok(args) => args,
|
||||||
|
Err(err) => {
|
||||||
|
let mut command = ArgsInner::command();
|
||||||
|
err.format(&mut command).exit();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Self { inner }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for Args {
|
||||||
|
type Target = ArgsInner;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.inner
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn validate_inner(args: ArgsInner) -> Result<ArgsInner, clap::Error> {
|
||||||
|
if !args.input.is_file() {
|
||||||
|
return Err(clap::Error::raw(
|
||||||
|
clap::error::ErrorKind::InvalidValue,
|
||||||
|
format!("Input file '{}' does not exist", args.input.display()),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if args.input == args.output_tokens {
|
||||||
|
return Err(clap::Error::raw(
|
||||||
|
clap::error::ErrorKind::InvalidValue,
|
||||||
|
"Input and output files cannot be the same",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if args.input == args.output_symbols {
|
||||||
|
return Err(clap::Error::raw(
|
||||||
|
clap::error::ErrorKind::InvalidValue,
|
||||||
|
"Input and output files cannot be the same",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(args)
|
||||||
|
}
|
||||||
44
src/main.rs
44
src/main.rs
@@ -1,3 +1,43 @@
|
|||||||
fn main() {
|
use std::io;
|
||||||
println!("Hello, world!");
|
use std::io::Write;
|
||||||
|
|
||||||
|
use parse::token::Token;
|
||||||
|
use symbols::SymbolsTable;
|
||||||
|
|
||||||
|
mod cli;
|
||||||
|
mod parse;
|
||||||
|
mod symbols;
|
||||||
|
|
||||||
|
fn main() -> anyhow::Result<()> {
|
||||||
|
let args = cli::Args::parse();
|
||||||
|
|
||||||
|
let input = std::fs::read_to_string(&args.input).unwrap();
|
||||||
|
let tokens = parse::lexer::make_tokenizer(&input).collect::<Result<Vec<_>, _>>();
|
||||||
|
match tokens {
|
||||||
|
Ok(tokens) => {
|
||||||
|
let symbols = SymbolsTable::from(tokens.iter().map(|(_, token, _)| token));
|
||||||
|
let mut writer_tokens = io::BufWriter::new(std::fs::File::create(&args.output_tokens)?);
|
||||||
|
|
||||||
|
for (_, token, _) in tokens {
|
||||||
|
match token {
|
||||||
|
Token::Name(ref name) => match symbols.get(name) {
|
||||||
|
Some(id) => writeln!(writer_tokens, "{token} ({id})")?,
|
||||||
|
None => writeln!(writer_tokens, "{token} (undefined)")?,
|
||||||
|
},
|
||||||
|
_ => writeln!(writer_tokens, "{token}")?,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut writer_symbols =
|
||||||
|
io::BufWriter::new(std::fs::File::create(&args.output_symbols)?);
|
||||||
|
for (name, id) in &symbols {
|
||||||
|
writeln!(writer_symbols, "{name} -> {id}")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
eprintln!("error: {err}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|||||||
20
src/parse/error.rs
Normal file
20
src/parse/error.rs
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
use core::fmt;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum LexicalError {
|
||||||
|
UnrecognizedToken(usize, char),
|
||||||
|
BadNumber(usize, String),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for LexicalError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
LexicalError::UnrecognizedToken(pos, c) => {
|
||||||
|
write!(f, "unrecognized token '{c}' at position {pos}")
|
||||||
|
}
|
||||||
|
LexicalError::BadNumber(pos, number) => {
|
||||||
|
write!(f, "bad number '{number}' at position {pos}")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
185
src/parse/lexer.rs
Normal file
185
src/parse/lexer.rs
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
use itertools::PeekNth;
|
||||||
|
|
||||||
|
use super::{error::LexicalError, token::Token};
|
||||||
|
|
||||||
|
pub type SpannedToken = (usize, Token, usize);
|
||||||
|
pub type LexerResult = Result<SpannedToken, LexicalError>;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Lexer<T: Iterator<Item = (usize, char)>> {
|
||||||
|
chars: PeekNth<T>,
|
||||||
|
pos: usize,
|
||||||
|
pending: Vec<SpannedToken>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn make_tokenizer(input: &str) -> impl Iterator<Item = LexerResult> + '_ {
|
||||||
|
let chars = input.char_indices();
|
||||||
|
Lexer::new(chars)
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Iterator<Item = (usize, char)>> Lexer<T> {
|
||||||
|
pub fn new(chars: T) -> Self {
|
||||||
|
Self {
|
||||||
|
chars: itertools::peek_nth(chars),
|
||||||
|
pos: 0,
|
||||||
|
pending: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_token(&mut self) -> LexerResult {
|
||||||
|
while self.pending.is_empty() {
|
||||||
|
self.consume_token()?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(self.pending.remove(0))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume_token(&mut self) -> Result<(), LexicalError> {
|
||||||
|
if let Some(c) = self.peek_char_nth(0) {
|
||||||
|
let c1 = self.peek_char_nth(1);
|
||||||
|
if self.is_name_start(c) {
|
||||||
|
let name = self.lex_name()?;
|
||||||
|
self.emit(name);
|
||||||
|
} else if self.is_number_start(c, c1) {
|
||||||
|
let number = self.lex_number()?;
|
||||||
|
self.emit(number);
|
||||||
|
} else {
|
||||||
|
self.consume_char(c)?;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let pos = self.get_pos();
|
||||||
|
self.emit((pos, Token::EndOfFile, pos));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lex_name(&mut self) -> LexerResult {
|
||||||
|
let mut name = String::new();
|
||||||
|
let start = self.get_pos();
|
||||||
|
|
||||||
|
while let Some(c) = self.peek_char_nth(0) {
|
||||||
|
if self.is_name_continue(c) {
|
||||||
|
name.push(self.next_char().expect("lex_name: no more characters"));
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let end = self.get_pos();
|
||||||
|
|
||||||
|
Ok((start, Token::Name(name), end))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lex_number(&mut self) -> LexerResult {
|
||||||
|
let mut number = String::new();
|
||||||
|
let start = self.get_pos();
|
||||||
|
|
||||||
|
let mut passed_dot = false;
|
||||||
|
while let Some(c) = self.peek_char_nth(0) {
|
||||||
|
if self.is_digit(c) {
|
||||||
|
number.push(self.next_char().expect("lex_number: no more characters"));
|
||||||
|
} else if !passed_dot && c == '.' {
|
||||||
|
passed_dot = true;
|
||||||
|
number.push(self.next_char().expect("lex_number: no more characters"));
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let end = self.get_pos();
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
start,
|
||||||
|
match number.parse::<i64>() {
|
||||||
|
Ok(n) => Token::Int(n),
|
||||||
|
Err(_) => match number.parse::<f64>() {
|
||||||
|
Ok(n) => Token::Float(n),
|
||||||
|
Err(_) => return Err(LexicalError::BadNumber(start, number)),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
end,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume_char(&mut self, c: char) -> Result<(), LexicalError> {
|
||||||
|
match c {
|
||||||
|
'+' => self.eat_next_char(Token::Plus),
|
||||||
|
'-' => self.eat_next_char(Token::Minus),
|
||||||
|
'*' => self.eat_next_char(Token::Star),
|
||||||
|
'/' => self.eat_next_char(Token::Slash),
|
||||||
|
'(' => self.eat_next_char(Token::LParen),
|
||||||
|
')' => self.eat_next_char(Token::RParen),
|
||||||
|
c if c.is_whitespace() => {
|
||||||
|
let start = self.get_pos();
|
||||||
|
let _ = self.next_char();
|
||||||
|
let end = self.get_pos();
|
||||||
|
|
||||||
|
if c == '\n' {
|
||||||
|
self.emit((start, Token::NewLine, end));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let pos = self.get_pos();
|
||||||
|
let _ = self.next_char();
|
||||||
|
return Err(LexicalError::UnrecognizedToken(pos, c));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eat_next_char(&mut self, token: Token) {
|
||||||
|
let start = self.get_pos();
|
||||||
|
let _ = self.next_char().expect("eat_next_char: no more characters");
|
||||||
|
let end = self.get_pos();
|
||||||
|
|
||||||
|
self.emit((start, token, end));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_number_start(&self, c: char, c1: Option<char>) -> bool {
|
||||||
|
c.is_ascii_digit() || (c == '.' && c1.map_or(false, |c| c.is_ascii_digit()))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_digit(&self, c: char) -> bool {
|
||||||
|
c.is_ascii_digit()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_name_start(&self, c: char) -> bool {
|
||||||
|
c.is_alphabetic() || c == '_'
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_name_continue(&self, c: char) -> bool {
|
||||||
|
c.is_alphanumeric() || c == '_'
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_char(&mut self) -> Option<char> {
|
||||||
|
let (pos, char) = self.chars.next()?;
|
||||||
|
self.pos = pos + 1;
|
||||||
|
|
||||||
|
Some(char)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emit(&mut self, token: SpannedToken) {
|
||||||
|
self.pending.push(token);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_pos(&self) -> usize {
|
||||||
|
self.pos
|
||||||
|
}
|
||||||
|
|
||||||
|
fn peek_char_nth(&mut self, n: usize) -> Option<char> {
|
||||||
|
self.chars.peek_nth(n).map(|(_, c)| *c)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Iterator<Item = (usize, char)>> Iterator for Lexer<T> {
|
||||||
|
type Item = LexerResult;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match self.next_token() {
|
||||||
|
Ok((_, Token::EndOfFile, _)) => None,
|
||||||
|
r => Some(r),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
3
src/parse/mod.rs
Normal file
3
src/parse/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
pub mod lexer;
|
||||||
|
pub mod token;
|
||||||
|
pub mod error;
|
||||||
37
src/parse/token.rs
Normal file
37
src/parse/token.rs
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
use std::fmt;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum Token {
|
||||||
|
Name(String),
|
||||||
|
Float(f64),
|
||||||
|
Int(i64),
|
||||||
|
// operators
|
||||||
|
Plus, // +
|
||||||
|
Minus, // -
|
||||||
|
Star, // *
|
||||||
|
Slash, // /
|
||||||
|
// punctuation
|
||||||
|
LParen, // (
|
||||||
|
RParen, // )
|
||||||
|
// extra
|
||||||
|
NewLine, // new line
|
||||||
|
EndOfFile, // end of file
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Token {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Token::Name(name) => write!(f, "{name} - name"),
|
||||||
|
Token::Float(number) => write!(f, "{number} - float"),
|
||||||
|
Token::Int(number) => write!(f, "{number} - int"),
|
||||||
|
Token::Plus => write!(f, "+"),
|
||||||
|
Token::Minus => write!(f, "-"),
|
||||||
|
Token::Star => write!(f, "*"),
|
||||||
|
Token::Slash => write!(f, "/"),
|
||||||
|
Token::LParen => write!(f, "("),
|
||||||
|
Token::RParen => write!(f, ")"),
|
||||||
|
Token::NewLine => write!(f, "<new line>"),
|
||||||
|
Token::EndOfFile => write!(f, "<end of file>"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
66
src/symbols/mod.rs
Normal file
66
src/symbols/mod.rs
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
use std::collections::{hash_map, HashMap};
|
||||||
|
|
||||||
|
use crate::parse::token::Token;
|
||||||
|
|
||||||
|
pub struct SymbolsTable<S: std::hash::Hash + Eq> {
|
||||||
|
symbols: HashMap<S, usize>,
|
||||||
|
next_id: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: std::hash::Hash + Eq> SymbolsTable<S> {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
symbols: HashMap::new(),
|
||||||
|
next_id: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add(&mut self, symbol: S) {
|
||||||
|
if let hash_map::Entry::Vacant(e) = self.symbols.entry(symbol) {
|
||||||
|
e.insert(self.next_id);
|
||||||
|
self.next_id += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get(&self, symbol: &S) -> Option<usize> {
|
||||||
|
self.symbols.get(symbol).copied()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: std::hash::Hash + Eq> Default for SymbolsTable<S> {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<S: std::hash::Hash + Eq> IntoIterator for SymbolsTable<S> {
|
||||||
|
type Item = (S, usize);
|
||||||
|
type IntoIter = hash_map::IntoIter<S, usize>;
|
||||||
|
|
||||||
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
|
self.symbols.into_iter()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, S: std::hash::Hash + Eq> IntoIterator for &'a SymbolsTable<S> {
|
||||||
|
type Item = (&'a S, &'a usize);
|
||||||
|
type IntoIter = hash_map::Iter<'a, S, usize>;
|
||||||
|
|
||||||
|
fn into_iter(self) -> Self::IntoIter {
|
||||||
|
self.symbols.iter()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, I: Iterator<Item = &'a Token>> From<I> for SymbolsTable<String> {
|
||||||
|
fn from(value: I) -> Self {
|
||||||
|
let mut symbols = Self::new();
|
||||||
|
|
||||||
|
for token in value {
|
||||||
|
if let Token::Name(name) = token {
|
||||||
|
symbols.add(name.clone())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
symbols
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user