diff --git a/Cargo.toml b/Cargo.toml index 7cbb650..bd20c9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,4 +6,4 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -socket2 = "0.5.3" +rand = "0.8.5" diff --git a/src/assembly.rs b/src/assembly.rs deleted file mode 100644 index be9cfee..0000000 --- a/src/assembly.rs +++ /dev/null @@ -1,11 +0,0 @@ -use crate::tokeniser::Token; - -pub fn assemblize(tokens: Vec) -> String { - let mut output = String::new(); - - for token in tokens { - output += "hii" - } - - output -} diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..8c56714 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,357 @@ +extern crate rand; + +use rand::{distributions::Alphanumeric, Rng}; +use std::collections::HashMap; + +#[derive(Debug, Clone)] +pub struct Function { + pub name: String, + pub statements: Vec, + assembly: Option, +} + +#[derive(Debug, Clone)] +pub enum Statement { + FunctionCall(String, Vec), + VariableDeclaration(String, ValueNode), +} + +#[derive(Debug, Clone)] +pub enum ValueNode { + Literal(Literal), + Operator(Operator), +} + +#[derive(Debug, Clone)] +pub enum Literal { + Int(i64), +} + +#[derive(Debug, Clone)] +pub enum Operator { + Add(Box, Box), + Sub(Box, Box), +} + +#[derive(Debug, Clone)] +pub struct AssemblerState { + pub functions: HashMap, +} + +pub trait Node { + fn to_assembly(&self, state: &mut AssemblerState) -> String; +} + +impl Function { + pub fn new(name: String, statements: Vec) -> Function { + Function { + name, + statements, + assembly: None, + } + } + + pub fn add(&self, state: &mut AssemblerState) -> Function { + state.functions.insert(self.name.clone(), self.to_owned()); + self.to_owned() + } +} + +impl AssemblerState { + pub fn new() -> AssemblerState { + AssemblerState { + functions: get_default_functions(), + } + } +} + +fn get_default_functions() -> HashMap { + let mut functions = HashMap::new(); + functions.insert( + "out".to_string(), + Function { + name: "exit".to_string(), + statements: vec![], + assembly: Some( + " +out: +; +; Pop the argument of the stack +; + +SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end +RAM-OUT OUTPUT-IN 0xFF ; Output top address onto output register + +ROM-OUT FLAGS-IN 0x80 ; Subtract mode +SE-OUT A-IN 0xFF ; Fetch SE +ROM-OUT B-IN 0x01 ; Subtract one +ALU-OUT SE-IN 0xFF ; Store in SE +ROM-OUT FLAGS-IN 0x00 ; Add mode + +; +; Pop the return address of the stack +; + +SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end +RAM-OUT ROM-ADDR-U-IN 0xFF ; Store top address in ROM jump address + +ROM-OUT FLAGS-IN 0x80 ; Subtract mode +SE-OUT A-IN 0xFF ; Fetch SE +ROM-OUT B-IN 0x01 ; Subtract one +ALU-OUT SE-IN 0xFF ; Store in SE +ROM-OUT FLAGS-IN 0x00 ; Add mode + +F-OUT JMP 0xFF" + .to_string(), + ), + }, + ); + functions.insert( + "exit".to_string(), + Function { + name: "exit".to_string(), + statements: vec![], + assembly: Some( + " +exit: +F-OUT JMP 0xFF" + .to_string(), + ), + }, + ); + functions +} + +impl Node for Function { + fn to_assembly(&self, state: &mut AssemblerState) -> String { + match &self.assembly { + Some(v) => v.to_string(), + None => format!( + "{}:\n{} +; +; Pop the return address of the stack +; + +SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end +RAM-OUT ROM-ADDR-U-IN 0xFF ; Store top address in ROM jump address + +ROM-OUT FLAGS-IN 0x80 ; Subtract mode +SE-OUT A-IN 0xFF ; Fetch SE +ROM-OUT B-IN 0x01 ; Subtract one +ALU-OUT SE-IN 0xFF ; Store in SE +ROM-OUT FLAGS-IN 0x00 ; Add mode + +F-OUT JMP 0xFF +", + self.name, + { + self.statements + .iter() + .map(|x| x.to_assembly(state)) + .collect::>() + .join("\n") + } + ), + } + } +} + +impl Node for Statement { + fn to_assembly(&self, state: &mut AssemblerState) -> String { + match self { + Statement::FunctionCall(name, arguments) => { + if !state.functions.contains_key(name) { + todo!() + } + + let argument_assembly: String = arguments + .into_iter() + .map(|x| x.to_assembly(state)) + .collect::>() + .join("\n"); + + let call_id: String = rand::thread_rng() + .sample_iter(&Alphanumeric) + .take(30) + .map(char::from) + .collect(); + + format!( + " +; +; Stack +; + +; Increment Stack Pointer + +SE-OUT A-IN 0xFF ; Fetch SE +ROM-OUT B-IN 0x01 ; Add 1 +ALU-OUT SE-IN 0xFF ; Store + +; Store in top stack pointer + +ALU-OUT RAM-ADDR-U-IN 0xFF ; Store in RAM address +ROM-OUT RAM-IN ${name}-{call_id} ; Store address after jump in ram + +; +; Arguments +; + +{argument_assembly} + +; +; Jump +; +ROM-OUT ROM-ADDR-U-IN ${name} ; Jump to function label +F-OUT JMP 0xFF ; Jump to function + +{name}-{call_id}: +" + ) + } + Statement::VariableDeclaration(_name, _value) => { + todo!() + } + } + } +} + +impl Node for ValueNode { + fn to_assembly(&self, state: &mut AssemblerState) -> String { + match self { + ValueNode::Literal(v) => v.to_assembly(state), + ValueNode::Operator(o) => o.to_assembly(state), + } + } +} + +impl Node for Literal { + fn to_assembly(&self, _state: &mut AssemblerState) -> String { + match self { + Literal::Int(v) => { + format!( + " +; +; Stack +; + +; Increment Stack Pointer + +SE-OUT A-IN 0xFF ; Fetch SE +ROM-OUT B-IN 0x01 ; Add 1 +ALU-OUT SE-IN 0xFF ; Store + +; Store in top stack pointer + +ALU-OUT RAM-ADDR-U-IN 0xFF ; Store in RAM address +ROM-OUT RAM-IN {v}; Store literal value in RAM" + ) + } + } + } +} + +impl Node for Operator { + fn to_assembly(&self, state: &mut AssemblerState) -> String { + match self { + Operator::Add(a, b) => { + format!( + " +{} +{} + +SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end +RAM-OUT C-IN 0xFF ; Output top address onto output register + +ROM-OUT FLAGS-IN 0x80 ; Subtract mode +SE-OUT A-IN 0xFF ; Fetch SE +ROM-OUT B-IN 0x01 ; Subtract one +ALU-OUT SE-IN 0xFF ; Store in SE + +SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end +RAM-OUT D-IN 0xFF ; Output top address onto output register + +ROM-OUT FLAGS-IN 0x80 ; Subtract mode +SE-OUT A-IN 0xFF ; Fetch SE +ROM-OUT B-IN 0x01 ; Subtract one +ALU-OUT SE-IN 0xFF ; Store in SE +ROM-OUT FLAGS-IN 0x00 ; Add mode + +C-OUT A-IN 0xFF ; Move temporary values into a register for alu computation +D-OUT B-IN 0xFF ; Move temporary values into a register for alu computation +ALU-OUT C-IN 0xFF ; Move ALU output into temporary register + +SE-OUT A-IN 0xFF ; Fetch SE +ROM-OUT B-IN 0x01 ; Add 1 +ALU-OUT SE-IN 0xFF ; Store + +ALU-OUT RAM-ADDR-U-IN 0xFF ; Store in RAM address +C-OUT RAM-IN 0xFF ; Store address after jump in ram +", + a.to_assembly(state), + b.to_assembly(state) + ) + } + Operator::Sub(a, b) => { + format!( + " +{} +{} + +SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end +RAM-OUT C-IN 0xFF ; Output top address onto output register + +ROM-OUT FLAGS-IN 0x80 ; Subtract mode +SE-OUT A-IN 0xFF ; Fetch SE +ROM-OUT B-IN 0x01 ; Subtract one +ALU-OUT SE-IN 0xFF ; Store in SE + +SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end +RAM-OUT D-IN 0xFF ; Output top address onto output register + +ROM-OUT FLAGS-IN 0x80 ; Subtract mode +SE-OUT A-IN 0xFF ; Fetch SE +ROM-OUT B-IN 0x01 ; Subtract one +ALU-OUT SE-IN 0xFF ; Store in SE +ROM-OUT FLAGS-IN 0x00 ; Add mode + +ROM-OUT FLAGS-IN 0x80 ; Subtract mode +C-OUT B-IN 0xFF ; Move temporary values into a register for alu computation +D-OUT A-IN 0xFF ; Move temporary values into a register for alu computation +ALU-OUT C-IN 0xFF ; Move ALU output into temporary register +ROM-OUT FLAGS-IN 0x00 ; Add mode + +SE-OUT A-IN 0xFF ; Fetch SE +ROM-OUT B-IN 0x01 ; Add 1 +ALU-OUT SE-IN 0xFF ; Store + +ALU-OUT RAM-ADDR-U-IN 0xFF ; Store in RAM address +C-OUT RAM-IN 0xFF ; Store address after jump in ram +", + a.to_assembly(state), + b.to_assembly(state) + ) + } + } + } +} + +pub fn to_assembly(functions: Vec) -> String { + let mut final_product: String = "".to_owned(); + let mut std_function_assembly = "".to_owned(); + let mut state = AssemblerState::new(); + + for std_function in state.clone().functions { + std_function_assembly += &std_function.1.to_assembly(&mut state); + } + + for standard_function in &functions { + standard_function.add(&mut state); + } + + for standard_function in &functions { + final_product += &standard_function.to_assembly(&mut state); + } + + final_product + &std_function_assembly +} diff --git a/src/tokeniser.rs b/src/tokeniser.rs index ea7d96e..9eb505e 100644 --- a/src/tokeniser.rs +++ b/src/tokeniser.rs @@ -1,19 +1,29 @@ -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Token { - SEMI, - NUMBER(String), - IDENTIFIER(String), - KEYWORD(Keyword), + Semi, + Equals, + Comma, + Plus, + Minus, + Number(String), + Identifier(String), + Keyword(Keyword), + OpenParanthesis, + CloseParanthesis, + OpenSquiglyBracket, + CloseSquiglyBracket, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Keyword { - RETURN, + Return, + Function, + Let, } enum TokenFinishedUnfinished { - UNFINISHED(Option), - FINISHED(Token), + Unfinished(Option), + Finished(Token), } use std::collections::HashMap; @@ -24,7 +34,9 @@ fn generate_keyword_map() -> HashMap { let mut keywords: HashMap = HashMap::new(); use Keyword as K; - keywords.insert("return".to_string(), K::RETURN); + keywords.insert("return".to_string(), K::Return); + keywords.insert("fn".to_string(), K::Function); + keywords.insert("let".to_string(), K::Let); keywords } @@ -36,46 +48,75 @@ fn tokenise_current_character( let keywords = generate_keyword_map(); match current_token_type { - Some(Token::NUMBER(ref v)) => { + Some(Token::Number(ref v)) => { let mut current_characters = v.clone(); if current_character.is_digit(10) { current_characters.push(current_character); - return UNFINISHED(Some(Token::NUMBER(current_characters))); + return Unfinished(Some(Token::Number(current_characters))); } - FINISHED(Token::NUMBER(current_characters)) + Finished(Token::Number(current_characters)) } - Some(Token::IDENTIFIER(ref v)) => { + Some(Token::Identifier(ref v)) => { let mut current_characters = v.clone(); if current_character.is_alphanumeric() { current_characters.push(current_character); - return UNFINISHED(Some(Token::IDENTIFIER(current_characters))); + return Unfinished(Some(Token::Identifier(current_characters))); } match keywords.get(¤t_characters) { - Some(keyword) => FINISHED(Token::KEYWORD(keyword.clone())), - None => FINISHED(Token::IDENTIFIER(current_characters)), + Some(keyword) => Finished(Token::Keyword(keyword.clone())), + None => Finished(Token::Identifier(current_characters)), } } - Some(Token::SEMI) => FINISHED(Token::SEMI), // Needed because we're always going back a step after returning finished token None => { if current_character.is_digit(10) { - return UNFINISHED(Some(Token::NUMBER(String::from(current_character)))); + return Unfinished(Some(Token::Number(String::from(current_character)))); } if current_character.is_alphabetic() { - return UNFINISHED(Some(Token::IDENTIFIER(String::from(current_character)))); + return Unfinished(Some(Token::Identifier(String::from(current_character)))); } if current_character == ';' { - return UNFINISHED(Some(Token::SEMI)); + return Unfinished(Some(Token::Semi)); } - UNFINISHED(None) + if current_character == '(' { + return Unfinished(Some(Token::OpenParanthesis)); + } + if current_character == ')' { + return Unfinished(Some(Token::CloseParanthesis)); + } + + if current_character == '{' { + return Unfinished(Some(Token::OpenSquiglyBracket)); + } + if current_character == '}' { + return Unfinished(Some(Token::CloseSquiglyBracket)); + } + + if current_character == '=' { + return Unfinished(Some(Token::Equals)); + } + + if current_character == ',' { + return Unfinished(Some(Token::Comma)); + } + + if current_character == '+' { + return Unfinished(Some(Token::Plus)); + } + + if current_character == '-' { + return Unfinished(Some(Token::Minus)); + } + + Unfinished(None) } - Some(v) => FINISHED(v), + Some(v) => Finished(v), } } @@ -90,10 +131,10 @@ pub fn tokenise(input: String) -> Vec { current_character = input.as_bytes()[i] as char; match tokenise_current_character(current_character, current_token_type.clone()) { - UNFINISHED(v) => { + Unfinished(v) => { current_token_type = v.clone(); } - FINISHED(v) => { + Finished(v) => { tokens.push(v); current_token_type = None; i -= 1;