commit 9c9a4cab1c2cb3e5bf84de2570f8110f7cd74a25 Author: vanten Date: Wed Sep 13 09:34:05 2023 +0200 Initial Commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..26350ad --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,92 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "libc" +version = "0.2.147" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" + +[[package]] +name = "plonkus" +version = "0.1.0" +dependencies = [ + "socket2", +] + +[[package]] +name = "socket2" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..7cbb650 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "plonkus" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +socket2 = "0.5.3" diff --git a/src/assembly.rs b/src/assembly.rs new file mode 100644 index 0000000..be9cfee --- /dev/null +++ b/src/assembly.rs @@ -0,0 +1,11 @@ +use crate::tokeniser::Token; + +pub fn assemblize(tokens: Vec) -> String { + let mut output = String::new(); + + for token in tokens { + output += "hii" + } + + output +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..b01cbec --- /dev/null +++ b/src/main.rs @@ -0,0 +1,12 @@ +mod assembly; +mod tokeniser; + +fn main() { + let filename = std::env::args().nth(1).expect("No filename argument"); + let contents = + std::fs::read_to_string(&filename).expect(&format!("No file named {}", &filename)); + + let tokens = dbg!(tokeniser::tokenise(contents)); + + let assembled = dbg!(assembly::assemblize(tokens)); +} diff --git a/src/tokeniser.rs b/src/tokeniser.rs new file mode 100644 index 0000000..ea7d96e --- /dev/null +++ b/src/tokeniser.rs @@ -0,0 +1,107 @@ +#[derive(Debug, Clone)] +pub enum Token { + SEMI, + NUMBER(String), + IDENTIFIER(String), + KEYWORD(Keyword), +} + +#[derive(Debug, Clone)] +pub enum Keyword { + RETURN, +} + +enum TokenFinishedUnfinished { + UNFINISHED(Option), + FINISHED(Token), +} + +use std::collections::HashMap; + +use self::TokenFinishedUnfinished::*; + +fn generate_keyword_map() -> HashMap { + let mut keywords: HashMap = HashMap::new(); + + use Keyword as K; + keywords.insert("return".to_string(), K::RETURN); + + keywords +} + +fn tokenise_current_character( + current_character: char, + current_token_type: Option, +) -> TokenFinishedUnfinished { + let keywords = generate_keyword_map(); + + match current_token_type { + Some(Token::NUMBER(ref v)) => { + let mut current_characters = v.clone(); + + if current_character.is_digit(10) { + current_characters.push(current_character); + return UNFINISHED(Some(Token::NUMBER(current_characters))); + } + + FINISHED(Token::NUMBER(current_characters)) + } + Some(Token::IDENTIFIER(ref v)) => { + let mut current_characters = v.clone(); + + if current_character.is_alphanumeric() { + current_characters.push(current_character); + return UNFINISHED(Some(Token::IDENTIFIER(current_characters))); + } + + match keywords.get(¤t_characters) { + Some(keyword) => FINISHED(Token::KEYWORD(keyword.clone())), + None => FINISHED(Token::IDENTIFIER(current_characters)), + } + } + Some(Token::SEMI) => FINISHED(Token::SEMI), // Needed because we're always going back a step after returning finished token + None => { + if current_character.is_digit(10) { + return UNFINISHED(Some(Token::NUMBER(String::from(current_character)))); + } + + if current_character.is_alphabetic() { + return UNFINISHED(Some(Token::IDENTIFIER(String::from(current_character)))); + } + + if current_character == ';' { + return UNFINISHED(Some(Token::SEMI)); + } + + UNFINISHED(None) + } + Some(v) => FINISHED(v), + } +} + +pub fn tokenise(input: String) -> Vec { + let mut tokens: Vec = vec![]; + + let mut current_character: char; + let mut current_token_type: Option = None; + + let mut i: usize = 0; + while i < input.len() { + current_character = input.as_bytes()[i] as char; + + match tokenise_current_character(current_character, current_token_type.clone()) { + UNFINISHED(v) => { + current_token_type = v.clone(); + } + FINISHED(v) => { + tokens.push(v); + current_token_type = None; + i -= 1; + } + } + + i += 1; + } + + tokens +} diff --git a/test.plonkus b/test.plonkus new file mode 100644 index 0000000..d232c62 --- /dev/null +++ b/test.plonkus @@ -0,0 +1 @@ +return 42;