From 0f31e7a52e4bddaf936ea8a5b60b0bdb07016faa Mon Sep 17 00:00:00 2001 From: lionarius Date: Mon, 11 Nov 2024 05:24:56 +0300 Subject: [PATCH] lab 7 --- Cargo.lock | 369 +++++++++++++++++++++++++++-- Cargo.toml | 16 +- rustfmt.toml | 7 + src/ast/mod.rs | 85 ++++--- src/ast/typed.rs | 31 ++- src/ast/untyped.rs | 3 +- src/cli.rs | 45 +++- src/lib.rs | 6 + src/main.rs | 301 +++++++++++++++++++---- src/parse/lexer.rs | 52 ++-- src/parse/parser.rs | 39 ++- src/parse/token.rs | 11 +- src/representation/intermediate.rs | 361 +++++++++++++++------------- src/representation/mod.rs | 1 + src/representation/util.rs | 71 ++++++ src/symbols/builtin.rs | 0 src/symbols/mod.rs | 114 +++++++-- src/util.rs | 69 +++--- 18 files changed, 1202 insertions(+), 379 deletions(-) create mode 100644 rustfmt.toml create mode 100644 src/lib.rs create mode 100644 src/representation/util.rs delete mode 100644 src/symbols/builtin.rs diff --git a/Cargo.lock b/Cargo.lock index 3a918b2..29eeba9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,7 +38,7 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" dependencies = [ - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -48,7 +48,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" dependencies = [ "anstyle", - "windows-sys", + "windows-sys 0.52.0", ] [[package]] @@ -57,6 +57,36 @@ version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" +[[package]] +name = "autocfg" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "clap" version = "4.5.19" @@ -103,28 +133,96 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +[[package]] +name = "crossterm" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e64e6c0fbe2c17357405f7c758c1ef960fce08bdfb2c03d88d2a18d7e09c4b67" +dependencies = [ + "bitflags 1.3.2", + "crossterm_winapi", + "libc", + "mio", + "parking_lot", + "signal-hook", + "signal-hook-mio", + "winapi", +] + +[[package]] +name = "crossterm_winapi" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b" +dependencies = [ + "winapi", +] + [[package]] name = "developing-compilers" version = "0.1.0" dependencies = [ "anyhow", + "byteorder", "clap", + "inquire", "itertools", "thiserror", ] +[[package]] +name = "dyn-clone" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" + [[package]] name = "either" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "fuzzy-matcher" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54614a3312934d066701a80f20f15fa3b56d67ac7722b39eea5b4c9dd1d66c94" +dependencies = [ + "thread_local", +] + +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "inquire" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fddf93031af70e75410a2511ec04d49e758ed2f26dad3404a934e0fb45cc12a" +dependencies = [ + "bitflags 2.6.0", + "crossterm", + "dyn-clone", + "fuzzy-matcher", + "fxhash", + "newline-converter", + "once_cell", + "unicode-segmentation", + "unicode-width", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -140,6 +238,78 @@ dependencies = [ "either", ] +[[package]] +name = "libc" +version = "0.2.162" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d287de67fe55fd7e1581fe933d965a5a9477b38e949cfa9f8574ef01506398" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "mio" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +dependencies = [ + "libc", + "log", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "newline-converter" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b6b097ecb1cbfed438542d16e84fd7ad9b0c76c8a65b7f9039212a3d14dc7f" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "once_cell" +version = "1.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.52.6", +] + [[package]] name = "proc-macro2" version = "1.0.86" @@ -158,6 +328,57 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "redox_syscall" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +dependencies = [ + "bitflags 2.6.0", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "signal-hook" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801" +dependencies = [ + "libc", + "signal-hook-registry", +] + +[[package]] +name = "signal-hook-mio" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34db1a06d485c9142248b7a054f034b349b212551f3dfd19c94d45a754a217cd" +dependencies = [ + "libc", + "mio", + "signal-hook", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" +dependencies = [ + "libc", +] + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + [[package]] name = "strsim" version = "0.11.1" @@ -166,9 +387,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.86" +version = "2.0.87" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89275301d38033efb81a6e60e3497e734dfcc62571f2854bf4b16690398824c" +checksum = "25aa4ce346d03a6dcd68dd8b4010bcb74e54e62c90c573f394c46eae99aba32d" dependencies = [ "proc-macro2", "quote", @@ -177,43 +398,117 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.66" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d171f59dbaa811dbbb1aee1e73db92ec2b122911a48e1390dfe327a821ddede" +checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.66" +version = "2.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b08be0f17bd307950653ce45db00cd31200d82b624b36e181337d9c7d92765b5" +checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "unicode-ident" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "utf8parse" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets", + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] @@ -222,28 +517,46 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.6" @@ -256,24 +569,48 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.6" diff --git a/Cargo.toml b/Cargo.toml index a86570f..1a4410c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,10 +1,12 @@ [package] - edition = "2021" - name = "developing-compilers" - version = "0.1.0" +edition = "2021" +name = "developing-compilers" +version = "0.1.0" [dependencies] - anyhow = "1.0.89" - clap = { version = "4.5.19", features = [ "derive" ] } - itertools = "0.13.0" - thiserror = "1.0.66" +anyhow = "1.0.89" +byteorder = "1.5.0" +clap = { version = "4.5.19", features = ["derive"] } +inquire = "0.7.5" +itertools = "0.13.0" +thiserror = "2.0.3" diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..ae9894f --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,7 @@ +match_block_trailing_comma = true +newline_style = "Unix" +style_edition = "2024" + +group_imports = "StdExternalCrate" +imports_granularity = "Module" +unstable_features = true diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 45d341d..2620dc2 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -7,10 +7,8 @@ use std::str::FromStr; use typed::{Type, TypedExpr}; pub use untyped::UntypedExpr; -use crate::{ - error::{self, SemanticError, SemanticErrorKind}, - symbols::SymbolsTable, -}; +use crate::error::{self, SemanticError, SemanticErrorKind}; +use crate::symbols::SymbolsTable; #[derive(Debug, Copy, Clone)] pub struct Span { @@ -43,6 +41,31 @@ impl BinOp { } } +impl From<&BinOp> for u8 { + fn from(value: &BinOp) -> Self { + match value { + BinOp::Add => 1, + BinOp::Sub => 2, + BinOp::Mul => 3, + BinOp::Div => 4, + } + } +} + +impl TryFrom for BinOp { + type Error = u8; + + fn try_from(value: u8) -> Result { + match value { + 1 => Ok(BinOp::Add), + 2 => Ok(BinOp::Sub), + 3 => Ok(BinOp::Mul), + 4 => Ok(BinOp::Div), + b => Err(b), + } + } +} + impl fmt::Display for BinOp { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { if f.alternate() { @@ -92,7 +115,7 @@ fn optimize_expr(expr: TypedExpr) -> TypedExpr { BinOp::Div => lhs / rhs, }, } - } + }, (TypedExpr::Float { value: lhs, .. }, TypedExpr::Float { value: rhs, .. }) => { TypedExpr::Float { span, @@ -103,42 +126,42 @@ fn optimize_expr(expr: TypedExpr) -> TypedExpr { BinOp::Div => lhs / rhs, }, } - } + }, (lhs, TypedExpr::Int { value: 0, .. }) | (TypedExpr::Float { value: 0.0, .. }, lhs) if matches!(op, BinOp::Add | BinOp::Sub) => { lhs - } + }, (lhs, TypedExpr::Int { value: 1, .. }) | (lhs, TypedExpr::Float { value: 1.0, .. }) if matches!(op, BinOp::Mul | BinOp::Div) => { lhs - } + }, (TypedExpr::Int { value: 0, .. }, rhs) | (TypedExpr::Float { value: 0.0, .. }, rhs) if matches!(op, BinOp::Add) => { rhs - } + }, (TypedExpr::Int { value: 1, .. }, rhs) | (TypedExpr::Float { value: 1.0, .. }, rhs) if matches!(op, BinOp::Mul) => { rhs - } + }, (TypedExpr::Int { value: 0, .. }, _) | (_, TypedExpr::Int { value: 0, .. }) if matches!(op, BinOp::Mul) => { TypedExpr::Int { span, value: 0 } - } + }, (TypedExpr::Float { value: 0.0, .. }, _) | (_, TypedExpr::Float { value: 0.0, .. }) if matches!(op, BinOp::Mul) => { TypedExpr::Float { span, value: 0.0 } - } + }, (lhs, rhs) => TypedExpr::BinOp { span, lhs: Box::new(lhs), @@ -146,7 +169,7 @@ fn optimize_expr(expr: TypedExpr) -> TypedExpr { rhs: Box::new(rhs), }, } - } + }, TypedExpr::IntToFloat { value } => { let value = optimize_expr(*value); if let TypedExpr::Int { value, span } = value { @@ -159,7 +182,7 @@ fn optimize_expr(expr: TypedExpr) -> TypedExpr { value: Box::new(value), } } - } + }, expr => expr, }; @@ -192,17 +215,17 @@ fn convert_to_typed_expr( span, SemanticErrorKind::DuplicateSymbol(symbol.name.clone()), ) - .into()) - } + .into()); + }, (None, Some(ty)) => { symbol.ty = Some(ty); - } - _ => {} + }, + _ => {}, } } TypedExpr::Var { span, name } - } + }, UntypedExpr::BinOp { span, lhs, op, rhs } => { let rhs = *rhs; let lhs = *lhs; @@ -217,19 +240,19 @@ fn convert_to_typed_expr( *span, SemanticErrorKind::DivisionByZero, ) - .into()) - } + .into()); + }, UntypedExpr::Float { span, value } if *value == 0.0 => { return Err(SemanticError::new( *span, SemanticErrorKind::DivisionByZero, ) - .into()) - } - _ => {} + .into()); + }, + _ => {}, } - } - _ => {} + }, + _ => {}, } let lhs = convert_to_typed_expr(lhs, symbols)?; @@ -241,7 +264,7 @@ fn convert_to_typed_expr( op, rhs: Box::new(rhs), } - } + }, }; Ok(expr) @@ -259,7 +282,7 @@ fn coerce_types(expr: TypedExpr, symbols: &mut SymbolsTable) -> error::Result { let lhs = coerce_types(*lhs, symbols)?; let rhs = coerce_types(*rhs, symbols)?; @@ -270,11 +293,11 @@ fn coerce_types(expr: TypedExpr, symbols: &mut SymbolsTable) -> error::Result { let lhs = TypedExpr::cast_to_float(lhs); (lhs, rhs) - } + }, (Type::Float, Type::Int) => { let rhs = TypedExpr::cast_to_float(rhs); (lhs, rhs) - } + }, }; TypedExpr::BinOp { @@ -283,7 +306,7 @@ fn coerce_types(expr: TypedExpr, symbols: &mut SymbolsTable) -> error::Result expr, }; diff --git a/src/ast/typed.rs b/src/ast/typed.rs index 2f98ea7..97bb604 100644 --- a/src/ast/typed.rs +++ b/src/ast/typed.rs @@ -1,11 +1,9 @@ -use std::{fmt, str::FromStr}; - -use crate::{ - error::SemanticErrorKind, - symbols::{Symbol, SymbolsTable}, -}; +use std::fmt; +use std::str::FromStr; use super::{BinOp, Span}; +use crate::error::SemanticErrorKind; +use crate::symbols::{Symbol, SymbolsTable}; #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum Type { @@ -13,6 +11,27 @@ pub enum Type { Float, } +impl From for u8 { + fn from(value: Type) -> Self { + match value { + Type::Int => 1, + Type::Float => 2, + } + } +} + +impl TryFrom for Type { + type Error = u8; + + fn try_from(value: u8) -> Result { + match value { + 1 => Ok(Type::Int), + 2 => Ok(Type::Float), + b => Err(b), + } + } +} + impl FromStr for Type { type Err = SemanticErrorKind; diff --git a/src/ast/untyped.rs b/src/ast/untyped.rs index c3761c9..40c353f 100644 --- a/src/ast/untyped.rs +++ b/src/ast/untyped.rs @@ -1,6 +1,5 @@ -use crate::symbols::Symbol; - use super::{BinOp, Span}; +use crate::symbols::Symbol; #[derive(Debug)] pub enum UntypedExpr { diff --git a/src/cli.rs b/src/cli.rs index 226c8f7..606fdde 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -1,4 +1,5 @@ -use std::{ops::Deref, path::PathBuf}; +use std::ops::Deref; +use std::path::PathBuf; use clap::{CommandFactory, Parser, Subcommand}; @@ -35,6 +36,15 @@ pub enum Command { output: PathBuf, output_symbols: PathBuf, }, + Com { + #[clap(short, long, default_value_t = false)] + optimize: bool, + input: PathBuf, + output: PathBuf, + }, + Int { + input: PathBuf, + }, } #[derive(Copy, Clone, PartialEq, Eq, clap::ValueEnum)] @@ -52,7 +62,7 @@ impl Args { Err(err) => { let mut command = ArgsInner::command(); err.format(&mut command).exit(); - } + }, }; Self { inner } @@ -82,6 +92,8 @@ fn validate_inner(args: ArgsInner) -> Result { output_symbols, .. } => validate_gen(input, output, output_symbols)?, + Command::Com { input, output, .. } => validate_com(input, output)?, + Command::Int { input } => validate_int(input)?, }; Ok(args) @@ -180,3 +192,32 @@ fn validate_gen( Ok(()) } + +fn validate_com(input: &PathBuf, output: &PathBuf) -> Result<(), clap::Error> { + if !input.is_file() { + return Err(clap::Error::raw( + clap::error::ErrorKind::InvalidValue, + format!("Input file '{}' does not exist", input.display()), + )); + } + + if input == output { + return Err(clap::Error::raw( + clap::error::ErrorKind::InvalidValue, + "Input and output files cannot be the same", + )); + }; + + Ok(()) +} + +fn validate_int(input: &PathBuf) -> Result<(), clap::Error> { + if !input.is_file() { + return Err(clap::Error::raw( + clap::error::ErrorKind::InvalidValue, + format!("Input file '{}' does not exist", input.display()), + )); + } + + Ok(()) +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..2bcf013 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,6 @@ +pub mod ast; +pub mod error; +pub mod parse; +pub mod representation; +pub mod symbols; +pub mod util; diff --git a/src/main.rs b/src/main.rs index e2fa4fa..3257923 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,47 +1,254 @@ -mod ast; mod cli; -mod error; -mod parse; -mod representation; -mod symbols; -mod util; +use std::collections::HashMap; +use std::io::{self, Write}; +use std::path::Path; +use std::{fmt, fs}; + +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; use cli::GenMode; +use developing_compilers::ast::BinOp; +use developing_compilers::ast::typed::Type; +use developing_compilers::representation::intermediate::{IntermediateExpr, IntermediateValue}; +use developing_compilers::symbols::Symbol; +use developing_compilers::*; +use inquire::CustomType; use parse::parser::Parser; -use std::{ - io::{self, Write}, - path::Path, -}; use symbols::SymbolsTable; fn main() -> anyhow::Result<()> { let args = cli::Args::parse(); - match &args.command { + let result = match &args.command { cli::Command::Lex { input, output_tokens, output_symbols, - } => { - lex_command(input, output_tokens, output_symbols)?; - } - cli::Command::Syn { input, output_tree } => { - syn_command(input, output_tree)?; - } - cli::Command::Sem { input, output_tree } => { - sem_command(input, output_tree)?; - } + } => lex_command(input, output_tokens, output_symbols), + cli::Command::Syn { input, output_tree } => syn_command(input, output_tree), + cli::Command::Sem { input, output_tree } => sem_command(input, output_tree), cli::Command::Gen { mode, optimize, input, output, output_symbols, - } => { - gen_command(mode, *optimize, input, output, output_symbols)?; + } => gen_command(mode, *optimize, input, output, output_symbols), + cli::Command::Com { + optimize, + input, + output, + } => com_command(*optimize, input, output), + cli::Command::Int { input } => int_command(input), + }; + + if let Err(e) = result { + eprintln!("error: {}", e); + return Ok(()); + } + + Ok(()) +} + +fn int_command(input: &Path) -> Result<(), anyhow::Error> { + enum VarValue { + Int(i64), + Float(f64), + } + + impl fmt::Display for VarValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + VarValue::Int(v) => write!(f, "{}", v), + VarValue::Float(v) => { + if v == &v.trunc() { + write!(f, "{}.0", v) + } else { + write!(f, "{}", v) + } + }, + } } } + fn value_as_i64(value: &IntermediateValue, vars: &HashMap) -> i64 { + match value { + IntermediateValue::Int { value } => *value, + IntermediateValue::Var { name } => match vars.get(name) { + Some(VarValue::Int(v)) => *v, + _ => unreachable!(), + }, + _ => unreachable!(), + } + } + + fn value_as_f64(value: &IntermediateValue, vars: &HashMap) -> f64 { + match value { + IntermediateValue::Float { value } => *value, + IntermediateValue::Var { name } => match vars.get(name) { + Some(VarValue::Float(v)) => *v, + _ => unreachable!(), + }, + _ => unreachable!(), + } + } + + let (symbols, ir) = { + let mut reader = io::BufReader::new(fs::File::open(input)?); + let symbols = SymbolsTable::from_reader(&mut reader)?; + let n = reader.read_u64::()?; + let mut ir = Vec::with_capacity(n as usize); + for _ in 0..n { + ir.push(IntermediateExpr::from_reader(&mut reader)?); + } + (symbols, ir) + }; + + let mut vars = HashMap::new(); + for (_, data) in &symbols { + let value = match data.ty { + Some(Type::Int) if !data.temporary => VarValue::Int( + CustomType::new(&format!("Input int {}", data.name)) + .with_default(0) + .prompt()?, + ), + Some(Type::Float) if !data.temporary => VarValue::Float( + CustomType::new(&format!("Input float {}", data.name)) + .with_default(0.0) + .prompt()?, + ), + Some(Type::Int) => VarValue::Int(0), + Some(Type::Float) => VarValue::Float(0.0), + None => continue, + }; + vars.insert(data.id, value); + } + + let mut last_result = None; + for expr in ir { + match expr { + IntermediateExpr::IntToFloat { result, value } => { + let value = value_as_i64(&value, &vars) as f64; + + match vars.get_mut(&result) { + Some(VarValue::Float(v)) => *v = value, + _ => unreachable!(), + }; + + last_result = Some(result); + }, + IntermediateExpr::BinOp { + result, + lhs, + op, + rhs, + } if symbols.type_of(&result) == Some(Type::Int) => { + let lhs = value_as_i64(&lhs, &vars); + let rhs = value_as_i64(&rhs, &vars); + + let value = match op { + BinOp::Add => lhs + rhs, + BinOp::Sub => lhs - rhs, + BinOp::Mul => lhs * rhs, + BinOp::Div => lhs / rhs, + }; + + match vars.get_mut(&result) { + Some(VarValue::Int(v)) => *v = value, + _ => unreachable!(), + }; + + last_result = Some(result); + }, + IntermediateExpr::BinOp { + result, + lhs, + op, + rhs, + } if symbols.type_of(&result) == Some(Type::Float) => { + let lhs = value_as_f64(&lhs, &vars); + let rhs = value_as_f64(&rhs, &vars); + + let value = match op { + BinOp::Add => lhs + rhs, + BinOp::Sub => lhs - rhs, + BinOp::Mul => lhs * rhs, + BinOp::Div => lhs / rhs, + }; + + match vars.get_mut(&result) { + Some(VarValue::Float(v)) => *v = value, + _ => unreachable!(), + }; + + last_result = Some(result); + }, + IntermediateExpr::Identity { result, value } + if symbols.type_of(&result) == Some(Type::Float) => + { + let value = value_as_f64(&value, &vars); + + match vars.get_mut(&result) { + Some(VarValue::Float(v)) => *v = value, + _ => unreachable!(), + }; + + last_result = Some(result); + }, + IntermediateExpr::Identity { result, value } + if symbols.type_of(&result) == Some(Type::Int) => + { + let value = value_as_i64(&value, &vars); + + match vars.get_mut(&result) { + Some(VarValue::Int(v)) => *v = value, + _ => unreachable!(), + }; + + last_result = Some(result); + }, + _ => unreachable!(), + } + } + + if let Some(result) = last_result { + println!("Result: {}", vars.get(&result).unwrap()); + } else { + println!("No result"); + } + + Ok(()) +} + +fn com_command(optimize: bool, input: &Path, output: &Path) -> Result<(), anyhow::Error> { + let input = fs::read_to_string(input)?; + let mut symbols = SymbolsTable::default(); + let typed_expr = match { + let tokens = parse::lexer::make_tokenizer(&input, &mut symbols); + let mut parser = Parser::new(tokens); + parser.parse() + } + .and_then(|expr| ast::to_typed_expr(expr, optimize, &mut symbols)) + { + Ok(expr) => expr, + Err(e) => { + return Err(e.into()); + }, + }; + + let ir = IntermediateExpr::from_typed_expr(typed_expr, optimize, &mut symbols); + { + let used_symbols = util::collect_used_symbols(&ir); + symbols.retain(&used_symbols); + } + + let mut writer = io::BufWriter::new(fs::File::create(output)?); + symbols.to_writer(&mut writer)?; + writer.write_u64::(ir.len() as u64)?; + for expr in ir { + expr.to_writer(&mut writer)?; + } + Ok(()) } @@ -52,7 +259,7 @@ fn gen_command( output: &Path, output_symbols: &Path, ) -> Result<(), anyhow::Error> { - let input = std::fs::read_to_string(input)?; + let input = fs::read_to_string(input)?; let mut symbols = SymbolsTable::default(); let typed_expr = match { let tokens = parse::lexer::make_tokenizer(&input, &mut symbols); @@ -63,32 +270,28 @@ fn gen_command( { Ok(expr) => expr, Err(e) => { - eprintln!("error: {}", e); - return Ok(()); - } + return Err(e.into()); + }, }; - let mut writer = io::BufWriter::new(std::fs::File::create(output)?); + let mut writer = io::BufWriter::new(fs::File::create(output)?); match mode { GenMode::Intermediate => { - let intermediate_exprs = representation::intermediate::to_intermediate_expr( - typed_expr, - optimize, - &mut symbols, - ); + let intermediate_exprs = + IntermediateExpr::from_typed_expr(typed_expr, optimize, &mut symbols); let used_symbols = util::collect_used_symbols(&intermediate_exprs); symbols.retain(&used_symbols); util::print_intermediate_exprs(&intermediate_exprs, &mut writer)?; - } + }, GenMode::Postfix => { util::print_postfix_expr(&typed_expr, &mut writer)?; - } + }, } - let mut writer_symbols = io::BufWriter::new(std::fs::File::create(output_symbols)?); + let mut writer_symbols = io::BufWriter::new(fs::File::create(output_symbols)?); for (name, data) in &symbols { writeln!(writer_symbols, "{name} -> {}", data)?; } @@ -97,7 +300,7 @@ fn gen_command( } fn sem_command(input: &Path, output_tree: &Path) -> Result<(), anyhow::Error> { - let input = std::fs::read_to_string(input)?; + let input = fs::read_to_string(input)?; let mut symbols = SymbolsTable::default(); let res = { @@ -109,17 +312,17 @@ fn sem_command(input: &Path, output_tree: &Path) -> Result<(), anyhow::Error> { match res { Ok(expr) => { - let mut writer_tree = io::BufWriter::new(std::fs::File::create(output_tree)?); + let mut writer_tree = io::BufWriter::new(fs::File::create(output_tree)?); util::print_typed_expr(&expr, &symbols, &mut writer_tree)?; - } - Err(e) => eprintln!("error: {}", e), + }, + Err(e) => return Err(e.into()), }; Ok(()) } fn syn_command(input: &Path, output_tree: &Path) -> Result<(), anyhow::Error> { - let input = std::fs::read_to_string(input)?; + let input = fs::read_to_string(input)?; let mut symbols = SymbolsTable::default(); let tokens = parse::lexer::make_tokenizer(&input, &mut symbols); @@ -128,10 +331,10 @@ fn syn_command(input: &Path, output_tree: &Path) -> Result<(), anyhow::Error> { match res { Ok(expr) => { - let mut writer_tree = io::BufWriter::new(std::fs::File::create(output_tree)?); + let mut writer_tree = io::BufWriter::new(fs::File::create(output_tree)?); util::print_untyped_expr(&expr, &mut writer_tree)?; - } - Err(e) => eprintln!("error: {}", e), + }, + Err(e) => return Err(e.into()), }; Ok(()) @@ -142,26 +345,26 @@ fn lex_command( output_tokens: &Path, output_symbols: &Path, ) -> Result<(), anyhow::Error> { - let input = std::fs::read_to_string(input)?; + let input = fs::read_to_string(input)?; let mut symbols = SymbolsTable::default(); let tokens = parse::lexer::make_tokenizer(&input, &mut symbols).collect::, _>>(); match tokens { Ok(tokens) => { - let mut writer_tokens = io::BufWriter::new(std::fs::File::create(output_tokens)?); + let mut writer_tokens = io::BufWriter::new(fs::File::create(output_tokens)?); for (_, token, _) in tokens { writeln!(writer_tokens, "{token:>6} - {}", token.as_str())?; } - let mut writer_symbols = io::BufWriter::new(std::fs::File::create(output_symbols)?); + let mut writer_symbols = io::BufWriter::new(fs::File::create(output_symbols)?); for (name, data) in &symbols { writeln!(writer_symbols, "{name} -> {}", data)?; } - } + }, Err(e) => { - eprintln!("error: {}", e); - } + return Err(e.into()); + }, }; Ok(()) diff --git a/src/parse/lexer.rs b/src/parse/lexer.rs index cdc2a57..6cca85a 100644 --- a/src/parse/lexer.rs +++ b/src/parse/lexer.rs @@ -1,21 +1,20 @@ +use std::collections::VecDeque; + use itertools::PeekNth; -use crate::{ - ast::Span, - error::{LexicalError, LexicalErrorKind}, - symbols::SymbolsTable, -}; - use super::token::Token; +use crate::ast::Span; +use crate::error::{LexicalError, LexicalErrorKind}; +use crate::symbols::SymbolsTable; pub type SpannedToken = (usize, Token, usize); pub type LexerResult = Result; #[derive(Debug)] -pub struct Lexer<'s, T: Iterator> { +pub struct Lexer<'s, T: Iterator> { chars: PeekNth, pos: usize, - pending: Vec, + pending: VecDeque, symbols: &'s mut SymbolsTable, } @@ -23,16 +22,16 @@ pub fn make_tokenizer<'s>( input: &'s str, symbols: &'s mut SymbolsTable, ) -> impl Iterator + 's { - let chars = input.char_indices(); + let chars = input.chars(); Lexer::new(chars, symbols) } -impl<'s, T: Iterator> Lexer<'s, T> { +impl<'s, T: Iterator> Lexer<'s, T> { pub fn new(chars: T, symbols: &'s mut SymbolsTable) -> Self { Self { chars: itertools::peek_nth(chars), pos: 0, - pending: vec![], + pending: VecDeque::new(), symbols, } } @@ -42,16 +41,15 @@ impl<'s, T: Iterator> Lexer<'s, T> { self.consume_token()?; } - Ok(self.pending.remove(0)) + Ok(self.pending.pop_front().unwrap()) } fn consume_token(&mut self) -> Result<(), LexicalError> { if let Some(c) = self.peek_char_nth(0) { - let c1 = self.peek_char_nth(1); if self.is_name_start(c) { let name = self.lex_name()?; self.emit(name); - } else if self.is_number_start(c, c1) { + } else if self.is_number_start(c) { let number = self.lex_number()?; self.emit(number); } else { @@ -98,7 +96,10 @@ impl<'s, T: Iterator> Lexer<'s, T> { while let Some(c) = self.peek_char_nth(0) { if self.is_digit(c) { number.push(self.next_char().expect("lex_number: no more characters")); - } else if !passed_dot && c == '.' { + } else if !passed_dot + && c == '.' + && self.peek_char_nth(1).map_or(false, |c| self.is_digit(c)) + { passed_dot = true; number.push(self.next_char().expect("lex_number: no more characters")); } else { @@ -118,8 +119,8 @@ impl<'s, T: Iterator> Lexer<'s, T> { return Err(LexicalError::new( Span::new(start, end), LexicalErrorKind::BadNumber(number), - )) - } + )); + }, }, }, end, @@ -144,7 +145,7 @@ impl<'s, T: Iterator> Lexer<'s, T> { // if c == '\n' { // self.emit((start, Token::NewLine, end)); // } - } + }, _ => { let pos = self.get_pos(); let _ = self.next_char(); @@ -152,7 +153,7 @@ impl<'s, T: Iterator> Lexer<'s, T> { Span::new(pos, self.get_pos()), LexicalErrorKind::UnrecognizedToken, )); - } + }, } Ok(()) @@ -166,9 +167,8 @@ impl<'s, T: Iterator> Lexer<'s, T> { self.emit((start, token, end)); } - fn is_number_start(&self, c: char, c1: Option) -> bool { + fn is_number_start(&self, c: char) -> bool { c.is_ascii_digit() - // || (c == '-' && c1.map_or(false, |c| c.is_ascii_digit())) } fn is_digit(&self, c: char) -> bool { @@ -184,14 +184,14 @@ impl<'s, T: Iterator> Lexer<'s, T> { } fn next_char(&mut self) -> Option { - let (pos, char) = self.chars.next()?; - self.pos = pos + 1; + let char = self.chars.next()?; + self.pos += char.len_utf8(); Some(char) } fn emit(&mut self, token: SpannedToken) { - self.pending.push(token); + self.pending.push_back(token); } fn get_pos(&self) -> usize { @@ -199,11 +199,11 @@ impl<'s, T: Iterator> Lexer<'s, T> { } fn peek_char_nth(&mut self, n: usize) -> Option { - self.chars.peek_nth(n).map(|(_, c)| *c) + self.chars.peek_nth(n).cloned() } } -impl<'s, T: Iterator> Iterator for Lexer<'s, T> { +impl<'s, T: Iterator> Iterator for Lexer<'s, T> { type Item = LexerResult; fn next(&mut self) -> Option { diff --git a/src/parse/parser.rs b/src/parse/parser.rs index d6268cc..bc8a89c 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -1,14 +1,9 @@ use itertools::PeekNth; -use crate::{ - ast::{Span, UntypedExpr}, - error::{self, ParseError, ParseErrorKind}, -}; - -use super::{ - lexer::{LexerResult, SpannedToken}, - token::Token, -}; +use super::lexer::{LexerResult, SpannedToken}; +use super::token::Token; +use crate::ast::{Span, UntypedExpr}; +use crate::error::{self, ParseError, ParseErrorKind}; #[derive(Debug)] pub struct Parser> { @@ -68,7 +63,7 @@ where break; } - rhs = self.parse_expr_inner(rhs, op1.precedence() + 1)?; + rhs = self.parse_expr_inner(rhs, op2.precedence())?; op = self.peek_token_nth(0).and_then(|token| token.as_bin_op()); } @@ -97,21 +92,21 @@ where value, }), (start, Token::Name(name), end) => { - let typename = match self.peek_token_nth(0) { + let (typename, end) = match self.peek_token_nth(0) { Some(Token::LBracket) => { let _ = self.next_token()?; - let typename = + let (_, typename, _) = self.expect_token_predicate(|t| matches!(t, Token::Name(_)))?; - let _ = self.expect_token(Token::RBracket)?; + let (_, end) = self.expect_token(Token::RBracket)?; - let typename = match typename.1 { + let typename = match typename { Token::Name(id) => id, _ => unreachable!(), }; - Some(typename) - } - _ => None, + (Some(typename), end) + }, + _ => (None, end), }; Ok(UntypedExpr::Var { @@ -119,13 +114,13 @@ where name, typename, }) - } + }, (_, Token::LParen, _) => { let expr = self.parse_expr()?; let _ = self.expect_token(Token::RParen)?; Ok(expr) - } + }, (start, _, end) => Err(ParseError::new( Span::new(start, end), ParseErrorKind::ExpectedExpr, @@ -139,7 +134,7 @@ where Some(Ok((start, token, end))) => { self.last_span = Span::new(start, end); Ok((start, token, end)) - } + }, Some(Err(e)) => Err(e.into()), None => Err(ParseError::new(self.last_span, ParseErrorKind::UnexpectedEOF).into()), } @@ -162,7 +157,7 @@ where (start, t, end) if t == token => Ok((start, end)), (start, _, end) => { Err(ParseError::new(Span::new(start, end), ParseErrorKind::UnexpectedToken).into()) - } + }, } } @@ -175,7 +170,7 @@ where (start, t, end) if pred(&t) => Ok((start, t, end)), (start, _, end) => { Err(ParseError::new(Span::new(start, end), ParseErrorKind::UnexpectedToken).into()) - } + }, } } } diff --git a/src/parse/token.rs b/src/parse/token.rs index f12ae18..3c0a08c 100644 --- a/src/parse/token.rs +++ b/src/parse/token.rs @@ -1,6 +1,7 @@ use std::fmt; -use crate::{ast::BinOp, symbols::Symbol}; +use crate::ast::BinOp; +use crate::symbols::Symbol; #[derive(Debug, Clone, PartialEq)] pub enum Token { @@ -56,7 +57,13 @@ impl fmt::Display for Token { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Token::Name(id) => write!(f, ""), - Token::Float(number) => write!(f, "<{number}>"), + Token::Float(number) => { + if number == &number.trunc() { + write!(f, "<{number}.0>") + } else { + write!(f, "<{number}>") + } + }, Token::Int(number) => write!(f, "<{number}>"), Token::Plus => write!(f, "<+>"), Token::Minus => write!(f, "<->"), diff --git a/src/representation/intermediate.rs b/src/representation/intermediate.rs index 58d8c50..49c3b25 100644 --- a/src/representation/intermediate.rs +++ b/src/representation/intermediate.rs @@ -1,10 +1,11 @@ -use std::fmt; +use std::{fmt, io}; -use crate::ast::typed::Type; -use crate::{ - ast::{typed::TypedExpr, BinOp}, - symbols::{Symbol, SymbolsTable}, -}; +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; + +use crate::ast::BinOp; +use crate::ast::typed::TypedExpr; +use crate::representation::util::TempVarContext; +use crate::symbols::{Symbol, SymbolsTable}; #[derive(Debug)] pub enum IntermediateValue { @@ -13,11 +14,58 @@ pub enum IntermediateValue { Var { name: Symbol }, } +impl IntermediateValue { + pub fn as_u8(&self) -> u8 { + match self { + IntermediateValue::Int { .. } => b'i', + IntermediateValue::Float { .. } => b'f', + IntermediateValue::Var { .. } => b'v', + } + } + + pub fn to_writer(&self, writer: &mut impl io::Write) -> io::Result<()> { + writer.write_u8(self.as_u8())?; + + match self { + IntermediateValue::Int { value } => writer.write_i64::(*value)?, + IntermediateValue::Float { value } => writer.write_f64::(*value)?, + IntermediateValue::Var { name } => writer.write_u64::(name.into())?, + } + + Ok(()) + } + + pub fn from_reader(reader: &mut impl io::Read) -> io::Result { + let b = reader.read_u8()?; + match b { + b'i' => Ok(IntermediateValue::Int { + value: reader.read_i64::()?, + }), + b'f' => Ok(IntermediateValue::Float { + value: reader.read_f64::()?, + }), + b'v' => Ok(IntermediateValue::Var { + name: reader.read_u64::()?.into(), + }), + b => Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("invalid byte: {b}"), + )), + } + } +} + impl fmt::Display for IntermediateValue { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { IntermediateValue::Int { value } => write!(f, "{}", value), - IntermediateValue::Float { value } => write!(f, "{:.2}", value), + IntermediateValue::Float { value } => { + if *value == value.trunc() { + write!(f, "{value}.0") + } else { + write!(f, "{value}") + } + }, IntermediateValue::Var { name } => write!(f, "", name), } } @@ -35,169 +83,164 @@ pub enum IntermediateExpr { op: BinOp, rhs: IntermediateValue, }, + Identity { + result: Symbol, + value: IntermediateValue, + }, } -fn add_temp_var(temp_var_counter: &mut usize, ty: Type, symbols: &mut SymbolsTable) -> Symbol { - let result = symbols.add(format!("#T{}", temp_var_counter)); - { - let data = symbols.resolve_mut(&result).unwrap(); - data.ty = Some(ty); - data.temporary = true; - } - *temp_var_counter += 1; - - result -} - -fn to_intermediate_expr_inner( - expr: TypedExpr, - reuse_symbols: bool, - symbols: &mut SymbolsTable, - temp_var_counter: &mut usize, - exprs: &mut Vec, - temp_float_vars: &mut Vec, - temp_int_vars: &mut Vec, -) -> IntermediateValue { - let ty = expr.ty(symbols); - - match expr { - TypedExpr::Int { value, .. } => IntermediateValue::Int { value }, - TypedExpr::Float { value, .. } => IntermediateValue::Float { value }, - TypedExpr::Var { name, .. } => IntermediateValue::Var { name }, - TypedExpr::BinOp { lhs, op, rhs, .. } => { - let lhs = to_intermediate_expr_inner( - *lhs, - reuse_symbols, - symbols, - temp_var_counter, - exprs, - temp_float_vars, - temp_int_vars, - ); - let rhs = to_intermediate_expr_inner( - *rhs, - reuse_symbols, - symbols, - temp_var_counter, - exprs, - temp_float_vars, - temp_int_vars, - ); - - if let IntermediateValue::Var { name } = lhs { - free_temp_var(name, ty, temp_int_vars, temp_float_vars, symbols) - } - - if let IntermediateValue::Var { name } = rhs { - free_temp_var(name, ty, temp_int_vars, temp_float_vars, symbols) - } - - let result = allocate_temp_var( - ty, - reuse_symbols, - temp_var_counter, - temp_float_vars, - temp_int_vars, - symbols, - ); - - exprs.push(IntermediateExpr::BinOp { +impl fmt::Display for IntermediateExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + IntermediateExpr::IntToFloat { result, value } => { + write!(f, "i2f , {}", result, value) + }, + IntermediateExpr::BinOp { result, lhs, op, rhs, - }); - - IntermediateValue::Var { name: result } - } - TypedExpr::IntToFloat { value, .. } => { - let value = to_intermediate_expr_inner( - *value, - reuse_symbols, - symbols, - temp_var_counter, - exprs, - temp_float_vars, - temp_int_vars, - ); - - if let IntermediateValue::Var { name: value } = value { - free_temp_var(value, Type::Int, temp_int_vars, temp_float_vars, symbols) - } - - let result = allocate_temp_var( - ty, - reuse_symbols, - temp_var_counter, - temp_float_vars, - temp_int_vars, - symbols, - ); - - exprs.push(IntermediateExpr::IntToFloat { result, value }); - - IntermediateValue::Var { name: result } + } => { + write!(f, "{:#} , {}, {}", op, result, lhs, rhs) + }, + IntermediateExpr::Identity { result, value } => { + write!(f, "idt , {}", result, value) + }, } } } -fn allocate_temp_var( - ty: Type, - reuse_symbols: bool, - temp_var_counter: &mut usize, - temp_float_vars: &mut Vec, - temp_int_vars: &mut Vec, - symbols: &mut SymbolsTable, -) -> Symbol { - if reuse_symbols { - match ty { - Type::Int if !temp_int_vars.is_empty() => temp_int_vars.pop().unwrap(), - Type::Float if !temp_float_vars.is_empty() => temp_float_vars.pop().unwrap(), - _ => add_temp_var(temp_var_counter, ty, symbols), +impl IntermediateExpr { + pub fn as_u8(&self) -> u8 { + match self { + IntermediateExpr::IntToFloat { .. } => 0x01, + IntermediateExpr::BinOp { .. } => 0x02, + IntermediateExpr::Identity { .. } => 0x03, } - } else { - add_temp_var(temp_var_counter, ty, symbols) - } -} - -fn free_temp_var( - name: Symbol, - ty: Type, - temp_int_vars: &mut Vec, - temp_float_vars: &mut Vec, - symbols: &mut SymbolsTable, -) { - let temporary = symbols.temporary(&name).unwrap(); - if !temporary { - return; } - if ty == Type::Int { - temp_int_vars.push(name); - } else { - temp_float_vars.push(name); + pub fn to_writer(&self, writer: &mut impl io::Write) -> io::Result<()> { + writer.write_u8(self.as_u8())?; + + match self { + IntermediateExpr::IntToFloat { result, value } => { + writer.write_u64::(result.into())?; + value.to_writer(writer)?; + }, + IntermediateExpr::BinOp { + result, + lhs, + op, + rhs, + } => { + writer.write_u64::(result.into())?; + lhs.to_writer(writer)?; + writer.write_u8(op.into())?; + rhs.to_writer(writer)?; + }, + IntermediateExpr::Identity { result, value } => { + writer.write_u64::(result.into())?; + value.to_writer(writer)?; + }, + } + + Ok(()) + } + + pub fn from_reader(reader: &mut impl io::Read) -> io::Result { + let b = reader.read_u8()?; + match b { + 0x01 => Ok(IntermediateExpr::IntToFloat { + result: reader.read_u64::()?.into(), + value: IntermediateValue::from_reader(reader)?, + }), + 0x02 => Ok(IntermediateExpr::BinOp { + result: reader.read_u64::()?.into(), + lhs: IntermediateValue::from_reader(reader)?, + op: BinOp::try_from(reader.read_u8()?) + .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "invalid bin op"))?, + rhs: IntermediateValue::from_reader(reader)?, + }), + 0x03 => Ok(IntermediateExpr::Identity { + result: reader.read_u64::()?.into(), + value: IntermediateValue::from_reader(reader)?, + }), + b => Err(io::Error::new( + io::ErrorKind::InvalidData, + format!("invalid byte: {b}"), + )), + } + } + + pub fn from_typed_expr( + expr: TypedExpr, + reuse_symbols: bool, + symbols: &mut SymbolsTable, + ) -> Vec { + let ty = expr.ty(symbols); + + let mut exprs = Vec::new(); + let mut temp_var_context = TempVarContext::new(reuse_symbols, symbols); + + let value = Self::from_typed_expr_inner(expr, &mut exprs, &mut temp_var_context); + + if exprs.is_empty() { + return vec![IntermediateExpr::Identity { + result: temp_var_context.get_temp_var(ty), + value, + }]; + } + + exprs + } + + fn from_typed_expr_inner( + expr: TypedExpr, + exprs: &mut Vec, + temp_var_context: &mut TempVarContext, + ) -> IntermediateValue { + let ty = expr.ty(temp_var_context.symbols()); + + match expr { + TypedExpr::Int { value, .. } => IntermediateValue::Int { value }, + TypedExpr::Float { value, .. } => IntermediateValue::Float { value }, + TypedExpr::Var { name, .. } => IntermediateValue::Var { name }, + TypedExpr::BinOp { lhs, op, rhs, .. } => { + let lhs = Self::from_typed_expr_inner(*lhs, exprs, temp_var_context); + let rhs = Self::from_typed_expr_inner(*rhs, exprs, temp_var_context); + + if let IntermediateValue::Var { name } = lhs { + temp_var_context.free_temp_var(name) + } + + if let IntermediateValue::Var { name } = rhs { + temp_var_context.free_temp_var(name) + } + + let result = temp_var_context.get_temp_var(ty); + + exprs.push(IntermediateExpr::BinOp { + result, + lhs, + op, + rhs, + }); + + IntermediateValue::Var { name: result } + }, + TypedExpr::IntToFloat { value, .. } => { + let value = Self::from_typed_expr_inner(*value, exprs, temp_var_context); + + if let IntermediateValue::Var { name: value } = value { + temp_var_context.free_temp_var(value) + } + + let result = temp_var_context.get_temp_var(ty); + + exprs.push(IntermediateExpr::IntToFloat { result, value }); + + IntermediateValue::Var { name: result } + }, + } } } - -pub fn to_intermediate_expr( - expr: TypedExpr, - reuse_symbols: bool, - symbols: &mut SymbolsTable, -) -> Vec { - let mut exprs = Vec::new(); - let mut intermediate_var_counter = 0; - let mut temp_float_vars = Vec::new(); - let mut temp_int_vars = Vec::new(); - - to_intermediate_expr_inner( - expr, - reuse_symbols, - symbols, - &mut intermediate_var_counter, - &mut exprs, - &mut temp_float_vars, - &mut temp_int_vars, - ); - - exprs -} diff --git a/src/representation/mod.rs b/src/representation/mod.rs index 51da266..6ec0fa3 100644 --- a/src/representation/mod.rs +++ b/src/representation/mod.rs @@ -1 +1,2 @@ pub mod intermediate; +mod util; diff --git a/src/representation/util.rs b/src/representation/util.rs new file mode 100644 index 0000000..0be3374 --- /dev/null +++ b/src/representation/util.rs @@ -0,0 +1,71 @@ +use crate::ast::typed::Type; +use crate::symbols::{Symbol, SymbolsTable}; + +#[derive(Debug)] +pub struct TempVarContext<'t> { + counter: usize, + floats: Vec, + ints: Vec, + reuse_symbols: bool, + symbols: &'t mut SymbolsTable, +} + +impl<'t> TempVarContext<'t> { + pub fn new(reuse_symbols: bool, symbols: &'t mut SymbolsTable) -> Self { + Self { + counter: 0, + floats: Vec::new(), + ints: Vec::new(), + reuse_symbols, + symbols, + } + } + + pub fn get_temp_var(&mut self, ty: Type) -> Symbol { + if self.reuse_symbols { + match ty { + Type::Int if !self.ints.is_empty() => self.ints.pop().unwrap(), + Type::Float if !self.floats.is_empty() => self.floats.pop().unwrap(), + _ => self.allocate_temp_var(ty), + } + } else { + self.allocate_temp_var(ty) + } + } + + pub fn symbols(&self) -> &SymbolsTable { + self.symbols + } + + fn allocate_temp_var(&mut self, ty: Type) -> Symbol { + let result = self.symbols.add(format!("#T{}", self.counter)); + { + let data = self.symbols.resolve_mut(&result).unwrap(); + data.ty = Some(ty); + data.temporary = true; + } + self.counter += 1; + + result + } + + pub fn free_temp_var(&mut self, name: Symbol) { + let Some(data) = self.symbols.resolve(&name) else { + return; + }; + + if !data.temporary { + return; + } + + let Some(ty) = data.ty else { + return; + }; + + if ty == Type::Int { + self.ints.push(name); + } else { + self.floats.push(name); + } + } +} diff --git a/src/symbols/builtin.rs b/src/symbols/builtin.rs deleted file mode 100644 index e69de29..0000000 diff --git a/src/symbols/mod.rs b/src/symbols/mod.rs index 0baa837..f155019 100644 --- a/src/symbols/mod.rs +++ b/src/symbols/mod.rs @@ -1,14 +1,25 @@ -pub mod builtin; +use std::collections::{HashMap, hash_map}; +use std::fmt::Display; +use std::io; -use std::{ - collections::{hash_map, HashMap}, - fmt::Display, -}; +use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; use crate::ast::typed::Type; #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub struct Symbol(usize); +pub struct Symbol(u64); + +impl From<&Symbol> for u64 { + fn from(value: &Symbol) -> Self { + value.0 + } +} + +impl From for Symbol { + fn from(value: u64) -> Self { + Self(value) + } +} #[derive(Debug)] pub struct SymbolData { @@ -18,6 +29,43 @@ pub struct SymbolData { pub temporary: bool, } +impl SymbolData { + fn from_reader(reader: &mut impl io::Read) -> io::Result { + let id = reader.read_u64::()?; + let name = { + let n = reader.read_u64::()?; + let mut buf = vec![0; n as usize]; + reader.read_exact(&mut buf)?; + String::from_utf8(buf) + .map_err(|_| io::Error::new(io::ErrorKind::InvalidData, "invalid utf-8"))? + }; + + let ty = { + let b = reader.read_u8()?; + Type::try_from(b).ok() + }; + + let temporary = reader.read_u8()? != 0; + + Ok(Self { + id: Symbol(id), + name, + ty, + temporary, + }) + } + + fn to_writer(&self, writer: &mut impl io::Write) -> io::Result<()> { + writer.write_u64::(self.id.0)?; + writer.write_u64::(self.name.len() as u64)?; + writer.write_all(self.name.as_bytes())?; + writer.write_u8(self.ty.map_or(0, |ty| ty.into()))?; + writer.write_u8(self.temporary as u8)?; + + Ok(()) + } +} + impl Display for SymbolData { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut builder = f.debug_struct("symbol"); @@ -44,7 +92,7 @@ impl Display for Symbol { #[derive(Debug)] pub struct SymbolsTable { symbols: HashMap, - next_id: usize, + next_id: u64, } impl SymbolsTable { @@ -55,6 +103,36 @@ impl SymbolsTable { } } + pub fn from_reader(reader: &mut impl io::Read) -> io::Result { + let n = reader.read_u64::()?; + + let mut max_id = 0; + let mut symbols = Vec::with_capacity(n as usize); + for _ in 0..n { + let data = SymbolData::from_reader(reader)?; + max_id = max_id.max(data.id.0); + symbols.push(data); + } + + Ok(Self { + symbols: symbols + .into_iter() + .map(|data| (data.name.clone(), data)) + .collect(), + next_id: max_id + 1, + }) + } + + pub fn to_writer(&self, writer: &mut impl io::Write) -> io::Result<()> { + writer.write_u64::(self.symbols.len() as u64)?; + + for data in self.symbols.values() { + data.to_writer(writer)?; + } + + Ok(()) + } + pub fn retain(&mut self, symbols: &[Symbol]) { self.symbols.retain(|_, data| symbols.contains(&data.id)); } @@ -81,34 +159,18 @@ impl SymbolsTable { } pub fn resolve(&self, symbol: &Symbol) -> Option<&SymbolData> { - self.symbols.iter().find_map( - |(_name, data)| { - if &data.id == symbol { - Some(data) - } else { - None - } - }, - ) + self.symbols.values().find(|data| &data.id == symbol) } pub fn resolve_mut(&mut self, symbol: &Symbol) -> Option<&mut SymbolData> { - self.symbols.iter_mut().find_map( - |(_name, data)| { - if &data.id == symbol { - Some(data) - } else { - None - } - }, - ) + self.symbols.values_mut().find(|data| &data.id == symbol) } pub fn type_of(&self, symbol: &Symbol) -> Option { self.resolve(symbol).and_then(|data| data.ty) } - pub fn temporary(&self, symbol: &Symbol) -> Option { + pub fn is_temporary(&self, symbol: &Symbol) -> Option { self.resolve(symbol).map(|data| data.temporary) } } diff --git a/src/util.rs b/src/util.rs index b9659dd..6048132 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,13 +1,10 @@ use std::collections::HashSet; use std::io; -use crate::representation::intermediate::IntermediateValue; -use crate::symbols::Symbol; -use crate::{ - ast::{typed::TypedExpr, UntypedExpr}, - representation::intermediate::IntermediateExpr, - symbols::SymbolsTable, -}; +use crate::ast::UntypedExpr; +use crate::ast::typed::TypedExpr; +use crate::representation::intermediate::{IntermediateExpr, IntermediateValue}; +use crate::symbols::{Symbol, SymbolsTable}; pub fn collect_used_symbols(expr: &[IntermediateExpr]) -> Vec { let mut used_symbols = HashSet::new(); @@ -16,21 +13,31 @@ pub fn collect_used_symbols(expr: &[IntermediateExpr]) -> Vec { match expr { IntermediateExpr::IntToFloat { result, value } => { used_symbols.insert(*result); + if let IntermediateValue::Var { name } = value { used_symbols.insert(*name); } - } + }, IntermediateExpr::BinOp { result, lhs, rhs, .. } => { used_symbols.insert(*result); + if let IntermediateValue::Var { name } = lhs { used_symbols.insert(*name); } + if let IntermediateValue::Var { name } = rhs { used_symbols.insert(*name); } - } + }, + IntermediateExpr::Identity { result, value } => { + used_symbols.insert(*result); + + if let IntermediateValue::Var { name } = value { + used_symbols.insert(*name); + } + }, } } @@ -42,19 +49,7 @@ pub fn print_intermediate_exprs( writer: &mut impl io::Write, ) -> io::Result<()> { for expr in exprs { - match expr { - IntermediateExpr::IntToFloat { result, value } => { - writeln!(writer, "i2f , {}", result, value)?; - } - IntermediateExpr::BinOp { - result, - lhs, - op, - rhs, - } => { - writeln!(writer, "{:#} , {}, {}", op, result, lhs, rhs)?; - } - } + writeln!(writer, "{}", expr)?; } Ok(()) @@ -69,11 +64,11 @@ pub fn print_postfix_expr(expr: &TypedExpr, writer: &mut impl io::Write) -> io:: print_postfix_expr(lhs, writer)?; print_postfix_expr(rhs, writer)?; write!(writer, "{} ", op)?; - } + }, TypedExpr::IntToFloat { value, .. } => { print_postfix_expr(value, writer)?; write!(writer, "i2f ")?; - } + }, } Ok(()) @@ -92,11 +87,17 @@ fn write_typed_expr( match expr { TypedExpr::Int { value, .. } => writeln!(writer, "<{}>", value), - TypedExpr::Float { value, .. } => writeln!(writer, "<{}>", value), + TypedExpr::Float { value, .. } => { + if value == &value.trunc() { + writeln!(writer, "<{}.0>", value) + } else { + writeln!(writer, "<{}>", value) + } + }, TypedExpr::Var { name, .. } => { let ty = symbols.resolve(name).unwrap().ty.unwrap(); writeln!(writer, "", name, ty) - } + }, TypedExpr::BinOp { lhs, op, rhs, .. } => { writeln!(writer, "<{}>", op)?; @@ -108,7 +109,7 @@ fn write_typed_expr( write_typed_expr(lhs, symbols, writer, &new_prefix, false)?; write_typed_expr(rhs, symbols, writer, &new_prefix, true) - } + }, TypedExpr::IntToFloat { value, .. } => { writeln!(writer, "i2f")?; @@ -119,7 +120,7 @@ fn write_typed_expr( }; write_typed_expr(value, symbols, writer, &new_prefix, true) - } + }, } } @@ -143,7 +144,13 @@ fn write_untyped_expr( match expr { UntypedExpr::Int { value, .. } => writeln!(writer, "<{}>", value), - UntypedExpr::Float { value, .. } => writeln!(writer, "<{}>", value), + UntypedExpr::Float { value, .. } => { + if value == &value.trunc() { + writeln!(writer, "<{}.0>", value) + } else { + writeln!(writer, "<{}>", value) + } + }, UntypedExpr::Var { name: id, typename, .. } => { @@ -152,7 +159,7 @@ fn write_untyped_expr( write!(writer, ",{}", typename)?; } writeln!(writer, ">") - } + }, UntypedExpr::BinOp { lhs, op, rhs, .. } => { writeln!(writer, "<{}>", op)?; @@ -164,7 +171,7 @@ fn write_untyped_expr( write_untyped_expr(lhs, writer, &new_prefix, false)?; write_untyped_expr(rhs, writer, &new_prefix, true) - } + }, } }