Compare commits

...

4 commits

Author SHA1 Message Date
vanten-s 33162fda6e
Changed example and added parser 2023-09-16 22:21:53 +02:00
vanten-s 6ea0c5e9ac
Added an ast 2023-09-16 22:21:40 +02:00
vanten-s 2dca0b55f4
Deleted Cargo.lock 2023-09-16 22:21:01 +02:00
vanten-s 743878a1db
Addde cargo.lock to gititgnore 2023-09-16 22:19:55 +02:00
9 changed files with 639 additions and 136 deletions

1
.gitignore vendored
View file

@ -1 +1,2 @@
/target
Cargo.lock

92
Cargo.lock generated
View file

@ -1,92 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "libc"
version = "0.2.147"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
[[package]]
name = "plonkus"
version = "0.1.0"
dependencies = [
"socket2",
]
[[package]]
name = "socket2"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877"
dependencies = [
"libc",
"windows-sys",
]
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
[[package]]
name = "windows_aarch64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
[[package]]
name = "windows_i686_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
[[package]]
name = "windows_i686_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
[[package]]
name = "windows_x86_64_gnu"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
[[package]]
name = "windows_x86_64_msvc"
version = "0.48.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"

View file

@ -6,4 +6,4 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
socket2 = "0.5.3"
rand = "0.8.5"

View file

@ -1,11 +0,0 @@
use crate::tokeniser::Token;
pub fn assemblize(tokens: Vec<Token>) -> String {
let mut output = String::new();
for token in tokens {
output += "hii"
}
output
}

357
src/ast.rs Normal file
View file

@ -0,0 +1,357 @@
extern crate rand;
use rand::{distributions::Alphanumeric, Rng};
use std::collections::HashMap;
#[derive(Debug, Clone)]
pub struct Function {
pub name: String,
pub statements: Vec<Statement>,
assembly: Option<String>,
}
#[derive(Debug, Clone)]
pub enum Statement {
FunctionCall(String, Vec<ValueNode>),
VariableDeclaration(String, ValueNode),
}
#[derive(Debug, Clone)]
pub enum ValueNode {
Literal(Literal),
Operator(Operator),
}
#[derive(Debug, Clone)]
pub enum Literal {
Int(i64),
}
#[derive(Debug, Clone)]
pub enum Operator {
Add(Box<ValueNode>, Box<ValueNode>),
Sub(Box<ValueNode>, Box<ValueNode>),
}
#[derive(Debug, Clone)]
pub struct AssemblerState {
pub functions: HashMap<String, Function>,
}
pub trait Node {
fn to_assembly(&self, state: &mut AssemblerState) -> String;
}
impl Function {
pub fn new(name: String, statements: Vec<Statement>) -> Function {
Function {
name,
statements,
assembly: None,
}
}
pub fn add(&self, state: &mut AssemblerState) -> Function {
state.functions.insert(self.name.clone(), self.to_owned());
self.to_owned()
}
}
impl AssemblerState {
pub fn new() -> AssemblerState {
AssemblerState {
functions: get_default_functions(),
}
}
}
fn get_default_functions() -> HashMap<String, Function> {
let mut functions = HashMap::new();
functions.insert(
"out".to_string(),
Function {
name: "exit".to_string(),
statements: vec![],
assembly: Some(
"
out:
;
; Pop the argument of the stack
;
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT OUTPUT-IN 0xFF ; Output top address onto output register
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
ROM-OUT FLAGS-IN 0x00 ; Add mode
;
; Pop the return address of the stack
;
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT ROM-ADDR-U-IN 0xFF ; Store top address in ROM jump address
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
ROM-OUT FLAGS-IN 0x00 ; Add mode
F-OUT JMP 0xFF"
.to_string(),
),
},
);
functions.insert(
"exit".to_string(),
Function {
name: "exit".to_string(),
statements: vec![],
assembly: Some(
"
exit:
F-OUT JMP 0xFF"
.to_string(),
),
},
);
functions
}
impl Node for Function {
fn to_assembly(&self, state: &mut AssemblerState) -> String {
match &self.assembly {
Some(v) => v.to_string(),
None => format!(
"{}:\n{}
;
; Pop the return address of the stack
;
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT ROM-ADDR-U-IN 0xFF ; Store top address in ROM jump address
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
ROM-OUT FLAGS-IN 0x00 ; Add mode
F-OUT JMP 0xFF
",
self.name,
{
self.statements
.iter()
.map(|x| x.to_assembly(state))
.collect::<Vec<String>>()
.join("\n")
}
),
}
}
}
impl Node for Statement {
fn to_assembly(&self, state: &mut AssemblerState) -> String {
match self {
Statement::FunctionCall(name, arguments) => {
if !state.functions.contains_key(name) {
todo!()
}
let argument_assembly: String = arguments
.into_iter()
.map(|x| x.to_assembly(state))
.collect::<Vec<String>>()
.join("\n");
let call_id: String = rand::thread_rng()
.sample_iter(&Alphanumeric)
.take(30)
.map(char::from)
.collect();
format!(
"
;
; Stack
;
; Increment Stack Pointer
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Add 1
ALU-OUT SE-IN 0xFF ; Store
; Store in top stack pointer
ALU-OUT RAM-ADDR-U-IN 0xFF ; Store in RAM address
ROM-OUT RAM-IN ${name}-{call_id} ; Store address after jump in ram
;
; Arguments
;
{argument_assembly}
;
; Jump
;
ROM-OUT ROM-ADDR-U-IN ${name} ; Jump to function label
F-OUT JMP 0xFF ; Jump to function
{name}-{call_id}:
"
)
}
Statement::VariableDeclaration(_name, _value) => {
todo!()
}
}
}
}
impl Node for ValueNode {
fn to_assembly(&self, state: &mut AssemblerState) -> String {
match self {
ValueNode::Literal(v) => v.to_assembly(state),
ValueNode::Operator(o) => o.to_assembly(state),
}
}
}
impl Node for Literal {
fn to_assembly(&self, _state: &mut AssemblerState) -> String {
match self {
Literal::Int(v) => {
format!(
"
;
; Stack
;
; Increment Stack Pointer
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Add 1
ALU-OUT SE-IN 0xFF ; Store
; Store in top stack pointer
ALU-OUT RAM-ADDR-U-IN 0xFF ; Store in RAM address
ROM-OUT RAM-IN {v}; Store literal value in RAM"
)
}
}
}
}
impl Node for Operator {
fn to_assembly(&self, state: &mut AssemblerState) -> String {
match self {
Operator::Add(a, b) => {
format!(
"
{}
{}
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT C-IN 0xFF ; Output top address onto output register
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT D-IN 0xFF ; Output top address onto output register
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
ROM-OUT FLAGS-IN 0x00 ; Add mode
C-OUT A-IN 0xFF ; Move temporary values into a register for alu computation
D-OUT B-IN 0xFF ; Move temporary values into a register for alu computation
ALU-OUT C-IN 0xFF ; Move ALU output into temporary register
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Add 1
ALU-OUT SE-IN 0xFF ; Store
ALU-OUT RAM-ADDR-U-IN 0xFF ; Store in RAM address
C-OUT RAM-IN 0xFF ; Store address after jump in ram
",
a.to_assembly(state),
b.to_assembly(state)
)
}
Operator::Sub(a, b) => {
format!(
"
{}
{}
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT C-IN 0xFF ; Output top address onto output register
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
SE-OUT RAM-ADDR-U-IN 0xFF ; Set ram address to stack end
RAM-OUT D-IN 0xFF ; Output top address onto output register
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Subtract one
ALU-OUT SE-IN 0xFF ; Store in SE
ROM-OUT FLAGS-IN 0x00 ; Add mode
ROM-OUT FLAGS-IN 0x80 ; Subtract mode
C-OUT B-IN 0xFF ; Move temporary values into a register for alu computation
D-OUT A-IN 0xFF ; Move temporary values into a register for alu computation
ALU-OUT C-IN 0xFF ; Move ALU output into temporary register
ROM-OUT FLAGS-IN 0x00 ; Add mode
SE-OUT A-IN 0xFF ; Fetch SE
ROM-OUT B-IN 0x01 ; Add 1
ALU-OUT SE-IN 0xFF ; Store
ALU-OUT RAM-ADDR-U-IN 0xFF ; Store in RAM address
C-OUT RAM-IN 0xFF ; Store address after jump in ram
",
a.to_assembly(state),
b.to_assembly(state)
)
}
}
}
}
pub fn to_assembly(functions: Vec<Function>) -> String {
let mut final_product: String = "".to_owned();
let mut std_function_assembly = "".to_owned();
let mut state = AssemblerState::new();
for std_function in state.clone().functions {
std_function_assembly += &std_function.1.to_assembly(&mut state);
}
for standard_function in &functions {
standard_function.add(&mut state);
}
for standard_function in &functions {
final_product += &standard_function.to_assembly(&mut state);
}
final_product + &std_function_assembly
}

View file

@ -1,12 +1,17 @@
mod assembly;
mod ast;
mod parse;
mod tokeniser;
fn main() {
let filename = std::env::args().nth(1).expect("No filename argument");
let contents =
std::fs::read_to_string(&filename).expect(&format!("No file named {}", &filename));
let filename = "test.plonkus";
let tokens = dbg!(tokeniser::tokenise(contents));
let contents = std::fs::read_to_string(filename).expect(&format!("No file named {}", filename));
let assembled = dbg!(assembly::assemblize(tokens));
let tokens = tokeniser::tokenise(contents);
let functions = parse::ast(tokens);
let assembly = ast::to_assembly(functions);
println!("{}", assembly);
}

199
src/parse.rs Normal file
View file

@ -0,0 +1,199 @@
use std::collections::{HashMap, HashSet};
use crate::ast::*;
use crate::tokeniser::Keyword;
use crate::tokeniser::Token;
#[derive(Clone, Debug)]
enum ParseState {
Function(Function),
Statement(Function),
None(Vec<Function>),
}
impl Function {
fn push_statement(&mut self, statement: Statement) -> Function {
self.statements.push(statement);
self.to_owned()
}
}
fn parse_math(tokens: &Vec<Token>) -> ValueNode {
let mut prioritizing_order: HashMap<i64, HashSet<Token>> = HashMap::new();
prioritizing_order.insert(0, HashSet::from([Token::Plus, Token::Minus]));
let mut ordered_binops: Vec<Token> = Vec::new();
let mut ordered_binops_indexes: Vec<usize> = Vec::new();
if tokens.len() == 1 {
return match &tokens[0] {
Token::Number(n) => ValueNode::Literal(Literal::Int(n.parse::<i64>().unwrap())),
_ => todo!(),
};
}
for j in prioritizing_order.keys() {
let mut i = 0;
let current_order_binops = prioritizing_order.get(j).unwrap();
while i < tokens.len() {
for binop in current_order_binops {
if &tokens[i] == binop {
ordered_binops.push(binop.to_owned());
ordered_binops_indexes.push(i);
}
}
i += 1;
}
}
let binop = ordered_binops.pop().unwrap();
let binop_index = ordered_binops_indexes.pop().unwrap();
let mut a: Vec<Token> = Vec::new();
let mut b: Vec<Token> = Vec::new();
let mut i = 0;
while i < tokens.len() {
if i < binop_index {
a.push(tokens[i].clone());
}
if i > binop_index {
b.push(tokens[i].clone());
}
i += 1;
}
return match binop {
Token::Plus => ValueNode::Operator(Operator::Add(
Box::new(parse_math(&a)),
Box::new(parse_math(&b)),
)),
Token::Minus => ValueNode::Operator(Operator::Sub(
Box::new(parse_math(&a)),
Box::new(parse_math(&b)),
)),
_ => todo!(),
};
}
fn parse_statement(
parse_state: &mut ParseState,
i: &mut usize,
tokens: &Vec<Token>,
functions: &Vec<Function>,
) -> ParseState {
let mut current_function = match parse_state {
ParseState::Statement(f) => f.to_owned(),
_ => todo!(),
};
if tokens[i.to_owned()] == Token::CloseSquiglyBracket {
let mut functions = functions.to_owned();
functions.push(current_function);
return ParseState::None(functions);
}
let statement = match &tokens[i.to_owned()] {
Token::Keyword(Keyword::Let) => {
*i += 1;
let name = match tokens[*i].clone() {
Token::Identifier(identifier) => identifier,
_ => todo!(),
};
*i += 1;
match tokens[*i].clone() {
Token::Equals => {}
_ => todo!(),
};
let mut declaration_tokens: Vec<Token> = Vec::new();
while tokens[*i] != Token::Semi {
*i += 1;
declaration_tokens.push(tokens[*i].clone());
}
Statement::VariableDeclaration(name, parse_math(&declaration_tokens))
}
Token::Identifier(identifier) => {
*i += 1;
let mut declaration_tokens: Vec<Vec<Token>> = Vec::new();
*i += 1;
while tokens[*i] != Token::CloseParanthesis {
let mut argument_tokens: Vec<Token> = Vec::new();
while tokens[*i] != Token::Comma {
argument_tokens.push(tokens[*i].clone());
*i += 1;
}
declaration_tokens.push(argument_tokens);
*i += 1;
}
*i += 1;
let mut arguments: Vec<ValueNode> = Vec::new();
for token_vec in &declaration_tokens {
if token_vec == &Vec::new() {
continue;
}
arguments.push(parse_math(token_vec));
}
Statement::FunctionCall(identifier.to_owned(), arguments)
}
_other => {
todo!()
}
};
current_function.push_statement(statement);
return ParseState::Statement(current_function);
}
pub fn ast(tokens: Vec<Token>) -> Vec<Function> {
let mut parse_state = ParseState::None(Vec::new());
let mut functions: Vec<Function> = Vec::new();
let mut i = 0;
while i < tokens.len() {
parse_state = match &parse_state {
ParseState::None(funcs) => {
functions = funcs.to_owned();
// Expect function and shit
match &tokens[i] {
Token::Keyword(keyword) => match keyword {
Keyword::Function => {
ParseState::Function(Function::new("".to_string(), Vec::new()))
}
_ => todo!(),
},
_ => todo!(),
}
}
ParseState::Function(func) => match &tokens[i] {
Token::Identifier(identifier) => {
ParseState::Function(Function::new(identifier.to_owned(), Vec::new()))
}
Token::OpenSquiglyBracket => ParseState::Statement(func.to_owned()),
_ => todo!(),
},
ParseState::Statement(_function) => {
parse_statement(&mut parse_state, &mut i, &tokens, &functions)
}
};
i += 1;
}
match parse_state {
ParseState::None(v) => v,
_ => todo!(),
}
}

View file

@ -1,19 +1,29 @@
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Token {
SEMI,
NUMBER(String),
IDENTIFIER(String),
KEYWORD(Keyword),
Semi,
Equals,
Comma,
Plus,
Minus,
Number(String),
Identifier(String),
Keyword(Keyword),
OpenParanthesis,
CloseParanthesis,
OpenSquiglyBracket,
CloseSquiglyBracket,
}
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Keyword {
RETURN,
Return,
Function,
Let,
}
enum TokenFinishedUnfinished {
UNFINISHED(Option<Token>),
FINISHED(Token),
Unfinished(Option<Token>),
Finished(Token),
}
use std::collections::HashMap;
@ -24,7 +34,9 @@ fn generate_keyword_map() -> HashMap<String, Keyword> {
let mut keywords: HashMap<String, Keyword> = HashMap::new();
use Keyword as K;
keywords.insert("return".to_string(), K::RETURN);
keywords.insert("return".to_string(), K::Return);
keywords.insert("fn".to_string(), K::Function);
keywords.insert("let".to_string(), K::Let);
keywords
}
@ -36,46 +48,75 @@ fn tokenise_current_character(
let keywords = generate_keyword_map();
match current_token_type {
Some(Token::NUMBER(ref v)) => {
Some(Token::Number(ref v)) => {
let mut current_characters = v.clone();
if current_character.is_digit(10) {
current_characters.push(current_character);
return UNFINISHED(Some(Token::NUMBER(current_characters)));
return Unfinished(Some(Token::Number(current_characters)));
}
FINISHED(Token::NUMBER(current_characters))
Finished(Token::Number(current_characters))
}
Some(Token::IDENTIFIER(ref v)) => {
Some(Token::Identifier(ref v)) => {
let mut current_characters = v.clone();
if current_character.is_alphanumeric() {
current_characters.push(current_character);
return UNFINISHED(Some(Token::IDENTIFIER(current_characters)));
return Unfinished(Some(Token::Identifier(current_characters)));
}
match keywords.get(&current_characters) {
Some(keyword) => FINISHED(Token::KEYWORD(keyword.clone())),
None => FINISHED(Token::IDENTIFIER(current_characters)),
Some(keyword) => Finished(Token::Keyword(keyword.clone())),
None => Finished(Token::Identifier(current_characters)),
}
}
Some(Token::SEMI) => FINISHED(Token::SEMI), // Needed because we're always going back a step after returning finished token
None => {
if current_character.is_digit(10) {
return UNFINISHED(Some(Token::NUMBER(String::from(current_character))));
return Unfinished(Some(Token::Number(String::from(current_character))));
}
if current_character.is_alphabetic() {
return UNFINISHED(Some(Token::IDENTIFIER(String::from(current_character))));
return Unfinished(Some(Token::Identifier(String::from(current_character))));
}
if current_character == ';' {
return UNFINISHED(Some(Token::SEMI));
return Unfinished(Some(Token::Semi));
}
UNFINISHED(None)
if current_character == '(' {
return Unfinished(Some(Token::OpenParanthesis));
}
if current_character == ')' {
return Unfinished(Some(Token::CloseParanthesis));
}
if current_character == '{' {
return Unfinished(Some(Token::OpenSquiglyBracket));
}
if current_character == '}' {
return Unfinished(Some(Token::CloseSquiglyBracket));
}
if current_character == '=' {
return Unfinished(Some(Token::Equals));
}
if current_character == ',' {
return Unfinished(Some(Token::Comma));
}
if current_character == '+' {
return Unfinished(Some(Token::Plus));
}
if current_character == '-' {
return Unfinished(Some(Token::Minus));
}
Unfinished(None)
}
Some(v) => FINISHED(v),
Some(v) => Finished(v),
}
}
@ -90,10 +131,10 @@ pub fn tokenise(input: String) -> Vec<Token> {
current_character = input.as_bytes()[i] as char;
match tokenise_current_character(current_character, current_token_type.clone()) {
UNFINISHED(v) => {
Unfinished(v) => {
current_token_type = v.clone();
}
FINISHED(v) => {
Finished(v) => {
tokens.push(v);
current_token_type = None;
i -= 1;

View file

@ -1 +1,4 @@
return 42;
fn main {
out(42 - 3 - 1,);
exit(,);
}