Added an ast

This commit is contained in:
vanten-s 2023-09-16 22:21:40 +02:00
parent 2dca0b55f4
commit 6ea0c5e9ac
Signed by: vanten-s
GPG key ID: DE3060396884D3F2
4 changed files with 424 additions and 37 deletions

View file

@ -6,4 +6,4 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
socket2 = "0.5.3" rand = "0.8.5"

View file

@ -1,11 +0,0 @@
use crate::tokeniser::Token;
pub fn assemblize(tokens: Vec<Token>) -> String {
let mut output = String::new();
for token in tokens {
output += "hii"
}
output
}

357
src/ast.rs Normal file
View file

@ -0,0 +1,357 @@
extern crate rand;
use rand::{distributions::Alphanumeric, Rng};
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct Function {
pub name: String,
pub statements: Vec<Statement>,
assembly: Option<String>,
}
#[derive(Debug, Clone)]
pub enum Statement {
FunctionCall(String, Vec<ValueNode>),
VariableDeclaration(String, ValueNode),
}
#[derive(Debug, Clone)]
pub enum ValueNode {
Literal(Literal),
Operator(Operator),
}
#[derive(Debug, Clone)]
pub enum Literal {
Int(i64),
}
#[derive(Debug, Clone)]
pub enum Operator {
Add(Box<ValueNode>, Box<ValueNode>),
Sub(Box<ValueNode>, Box<ValueNode>),
}
#[derive(Debug, Clone)]
pub struct AssemblerState {
pub functions: HashMap<String, Function>,
}
pub trait Node {
fn to_assembly(&self, state: &mut AssemblerState) -> String;
}
impl Function {
pub fn new(name: String, statements: Vec<Statement>) -> Function {
Function {
name,
statements,
assembly: None,
}
}
pub fn add(&self, state: &mut AssemblerState) -> Function {
state.functions.insert(self.name.clone(), self.to_owned());
self.to_owned()
}
}
impl AssemblerState {
pub fn new() -> AssemblerState {
AssemblerState {
functions: get_default_functions(),
}
}
}
fn get_default_functions() -> HashMap<String, Function> {
let mut functions = HashMap::new();
functions.insert(
"out".to_string(),
Function {
name: "exit".to_string(),
statements: vec![],
assembly: Some(
"
out:
;
; Pop the argument of the stack
;
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT OUTPUT-IN 0xFF ; Output top address onto output register
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
ROM-OUT FLAGS-IN 0x00 ; Add mode
;
; Pop the return address of the stack
;
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT ROM-ADDR-U-IN 0xFF ; Store top address in ROM jump address
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
ROM-OUT FLAGS-IN 0x00 ; Add mode
F-OUT JMP 0xFF"
.to_string(),
),
},
);
functions.insert(
"exit".to_string(),
Function {
name: "exit".to_string(),
statements: vec![],
assembly: Some(
"
exit:
F-OUT JMP 0xFF"
.to_string(),
),
},
);
functions
}
impl Node for Function {
fn to_assembly(&self, state: &mut AssemblerState) -> String {
match &self.assembly {
Some(v) => v.to_string(),
None => format!(
"{}:\n{}
;
; Pop the return address of the stack
;
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT ROM-ADDR-U-IN 0xFF ; Store top address in ROM jump address
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
ROM-OUT FLAGS-IN 0x00 ; Add mode
F-OUT JMP 0xFF
",
self.name,
{
self.statements
.iter()
.map(|x| x.to_assembly(state))
.collect::<Vec<String>>()
.join("\n")
}
),
}
}
}
impl Node for Statement {
fn to_assembly(&self, state: &mut AssemblerState) -> String {
match self {
Statement::FunctionCall(name, arguments) => {
if !state.functions.contains_key(name) {
todo!()
}
let argument_assembly: String = arguments
.into_iter()
.map(|x| x.to_assembly(state))
.collect::<Vec<String>>()
.join("\n");
let call_id: String = rand::thread_rng()
.sample_iter(&Alphanumeric)
.take(30)
.map(char::from)
.collect();
format!(
"
;
; Stack
;
; Increment Stack Pointer
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Add 1
ALU-OUT SE-IN 0xFF ; Store
; Store in top stack pointer
ALU-OUT RAM-ADDR-U-IN 0xFF ; Store in RAM address
ROM-OUT RAM-IN ${name}-{call_id} ; Store address after jump in ram
;
; Arguments
;
{argument_assembly}
;
; Jump
;
ROM-OUT ROM-ADDR-U-IN ${name} ; Jump to function label
F-OUT JMP 0xFF ; Jump to function
{name}-{call_id}:
"
)
}
Statement::VariableDeclaration(_name, _value) => {
todo!()
}
}
}
}
impl Node for ValueNode {
fn to_assembly(&self, state: &mut AssemblerState) -> String {
match self {
ValueNode::Literal(v) => v.to_assembly(state),
ValueNode::Operator(o) => o.to_assembly(state),
}
}
}
impl Node for Literal {
fn to_assembly(&self, _state: &mut AssemblerState) -> String {
match self {
Literal::Int(v) => {
format!(
"
;
; Stack
;
; Increment Stack Pointer
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Add 1
ALU-OUT SE-IN 0xFF ; Store
; Store in top stack pointer
ALU-OUT RAM-ADDR-U-IN 0xFF ; Store in RAM address
ROM-OUT RAM-IN {v}; Store literal value in RAM"
)
}
}
}
}
impl Node for Operator {
fn to_assembly(&self, state: &mut AssemblerState) -> String {
match self {
Operator::Add(a, b) => {
format!(
"
{}
{}
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT C-IN 0xFF ; Output top address onto output register
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT D-IN 0xFF ; Output top address onto output register
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
ROM-OUT FLAGS-IN 0x00 ; Add mode
C-OUT A-IN 0xFF ; Move temporary values into a register for alu computation
D-OUT B-IN 0xFF ; Move temporary values into a register for alu computation
ALU-OUT C-IN 0xFF ; Move ALU output into temporary register
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Add 1
ALU-OUT SE-IN 0xFF ; Store
ALU-OUT RAM-ADDR-U-IN 0xFF ; Store in RAM address
C-OUT RAM-IN 0xFF ; Store address after jump in ram
",
a.to_assembly(state),
b.to_assembly(state)
)
}
Operator::Sub(a, b) => {
format!(
"
{}
{}
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT C-IN 0xFF ; Output top address onto output register
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT D-IN 0xFF ; Output top address onto output register
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
ROM-OUT FLAGS-IN 0x00 ; Add mode
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
C-OUT B-IN 0xFF ; Move temporary values into a register for alu computation
D-OUT A-IN 0xFF ; Move temporary values into a register for alu computation
ALU-OUT C-IN 0xFF ; Move ALU output into temporary register
ROM-OUT FLAGS-IN 0x00 ; Add mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Add 1
ALU-OUT SE-IN 0xFF ; Store
ALU-OUT RAM-ADDR-U-IN 0xFF ; Store in RAM address
C-OUT RAM-IN 0xFF ; Store address after jump in ram
",
a.to_assembly(state),
b.to_assembly(state)
)
}
}
}
}
pub fn to_assembly(functions: Vec<Function>) -> String {
let mut final_product: String = "".to_owned();
let mut std_function_assembly = "".to_owned();
let mut state = AssemblerState::new();
for std_function in state.clone().functions {
std_function_assembly += &std_function.1.to_assembly(&mut state);
}
for standard_function in &functions {
standard_function.add(&mut state);
}
for standard_function in &functions {
final_product += &standard_function.to_assembly(&mut state);
}
final_product + &std_function_assembly
}

View file

@ -1,19 +1,29 @@
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Token { pub enum Token {
SEMI, Semi,
NUMBER(String), Equals,
IDENTIFIER(String), Comma,
KEYWORD(Keyword), Plus,
Minus,
Number(String),
Identifier(String),
Keyword(Keyword),
OpenParanthesis,
CloseParanthesis,
OpenSquiglyBracket,
CloseSquiglyBracket,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Keyword { pub enum Keyword {
RETURN, Return,
Function,
Let,
} }
enum TokenFinishedUnfinished { enum TokenFinishedUnfinished {
UNFINISHED(Option<Token>), Unfinished(Option<Token>),
FINISHED(Token), Finished(Token),
} }
use std::collections::HashMap; use std::collections::HashMap;
@ -24,7 +34,9 @@ fn generate_keyword_map() -> HashMap<String, Keyword> {
let mut keywords: HashMap<String, Keyword> = HashMap::new(); let mut keywords: HashMap<String, Keyword> = HashMap::new();
use Keyword as K; use Keyword as K;
keywords.insert("return".to_string(), K::RETURN); keywords.insert("return".to_string(), K::Return);
keywords.insert("fn".to_string(), K::Function);
keywords.insert("let".to_string(), K::Let);
keywords keywords
} }
@ -36,46 +48,75 @@ fn tokenise_current_character(
let keywords = generate_keyword_map(); let keywords = generate_keyword_map();
match current_token_type { match current_token_type {
Some(Token::NUMBER(ref v)) => { Some(Token::Number(ref v)) => {
let mut current_characters = v.clone(); let mut current_characters = v.clone();
if current_character.is_digit(10) { if current_character.is_digit(10) {
current_characters.push(current_character); current_characters.push(current_character);
return UNFINISHED(Some(Token::NUMBER(current_characters))); return Unfinished(Some(Token::Number(current_characters)));
} }
FINISHED(Token::NUMBER(current_characters)) Finished(Token::Number(current_characters))
} }
Some(Token::IDENTIFIER(ref v)) => { Some(Token::Identifier(ref v)) => {
let mut current_characters = v.clone(); let mut current_characters = v.clone();
if current_character.is_alphanumeric() { if current_character.is_alphanumeric() {
current_characters.push(current_character); current_characters.push(current_character);
return UNFINISHED(Some(Token::IDENTIFIER(current_characters))); return Unfinished(Some(Token::Identifier(current_characters)));
} }
match keywords.get(&current_characters) { match keywords.get(&current_characters) {
Some(keyword) => FINISHED(Token::KEYWORD(keyword.clone())), Some(keyword) => Finished(Token::Keyword(keyword.clone())),
None => FINISHED(Token::IDENTIFIER(current_characters)), None => Finished(Token::Identifier(current_characters)),
} }
} }
Some(Token::SEMI) => FINISHED(Token::SEMI), // Needed because we're always going back a step after returning finished token
None => { None => {
if current_character.is_digit(10) { if current_character.is_digit(10) {
return UNFINISHED(Some(Token::NUMBER(String::from(current_character)))); return Unfinished(Some(Token::Number(String::from(current_character))));
} }
if current_character.is_alphabetic() { if current_character.is_alphabetic() {
return UNFINISHED(Some(Token::IDENTIFIER(String::from(current_character)))); return Unfinished(Some(Token::Identifier(String::from(current_character))));
} }
if current_character == ';' { if current_character == ';' {
return UNFINISHED(Some(Token::SEMI)); return Unfinished(Some(Token::Semi));
} }
UNFINISHED(None) if current_character == '(' {
return Unfinished(Some(Token::OpenParanthesis));
}
if current_character == ')' {
return Unfinished(Some(Token::CloseParanthesis));
}
if current_character == '{' {
return Unfinished(Some(Token::OpenSquiglyBracket));
}
if current_character == '}' {
return Unfinished(Some(Token::CloseSquiglyBracket));
}
if current_character == '=' {
return Unfinished(Some(Token::Equals));
}
if current_character == ',' {
return Unfinished(Some(Token::Comma));
}
if current_character == '+' {
return Unfinished(Some(Token::Plus));
}
if current_character == '-' {
return Unfinished(Some(Token::Minus));
}
Unfinished(None)
} }
Some(v) => FINISHED(v), Some(v) => Finished(v),
} }
} }
@ -90,10 +131,10 @@ pub fn tokenise(input: String) -> Vec<Token> {
current_character = input.as_bytes()[i] as char; current_character = input.as_bytes()[i] as char;
match tokenise_current_character(current_character, current_token_type.clone()) { match tokenise_current_character(current_character, current_token_type.clone()) {
UNFINISHED(v) => { Unfinished(v) => {
current_token_type = v.clone(); current_token_type = v.clone();
} }
FINISHED(v) => { Finished(v) => {
tokens.push(v); tokens.push(v);
current_token_type = None; current_token_type = None;
i -= 1; i -= 1;