Added beginning of a lexer and format

This commit is contained in:
vanten-s 2024-09-09 01:08:04 +02:00
parent e309d2208a
commit 4020536ae6
Signed by: vanten-s
GPG key ID: DE3060396884D3F2
4 changed files with 185 additions and 21 deletions

View file

@ -4,3 +4,5 @@ version = "0.1.0"
edition = "2021"
[dependencies]
color-eyre = "0.6.3"
thiserror = "1.0.63"

View file

@ -31,7 +31,7 @@ impl Assemble for Value {
Ok(())
}
_ => todo!()
_ => todo!(),
}
}
}
@ -46,7 +46,7 @@ impl Assemble for Instruction {
let function = match assembler.functions.get(&self.name) {
Some(v) => v,
None => return Err(crate::Error::UndefinedIdentifier(self.name))
None => return Err(crate::Error::UndefinedIdentifier(self.name)),
};
function(assembler, n_arguments);
@ -88,10 +88,10 @@ impl Assembler {
fn default_functions() -> HashMap<String, Function> {
let mut map: HashMap<String, Function> = HashMap::new();
map.insert(
"+".to_string(),
|assembler, n_arguments| {
if n_arguments != 2 { todo!() }
map.insert("+".to_string(), |assembler, n_arguments| {
if n_arguments != 2 {
todo!()
}
assembler.add_instruction(&[0x58]); // Pop EAX
assembler.pop();
@ -103,8 +103,7 @@ impl Assembler {
assembler.add_instruction(&[0x50]); // Push EAX
assembler.push("".to_string());
},
);
});
map
}

150
src/lexer.rs Normal file
View file

@ -0,0 +1,150 @@
use std::str::FromStr;
use thiserror::Error;
#[derive(Debug)]
pub enum TokenType {
OpenParantheses,
ClosedParantheses,
Integer(usize),
String(String),
Identifier(String),
}
#[derive(Debug)]
pub struct Token {
row: usize,
col: usize,
token_type: TokenType,
}
#[derive(Debug)]
pub struct TokenString {
tokens: Vec<Token>,
}
#[derive(Debug)]
struct UnfinishedTokenString {
position: usize,
input: Vec<u8>,
tokens: Vec<Token>,
eof: bool,
row: usize,
col: usize,
}
#[derive(Error, Debug)]
#[error("Syntax error at: {row}:{col}. {message}")]
pub struct LexerError {
row: usize,
col: usize,
message: String,
}
impl FromStr for TokenString {
fn from_str(s: &str) -> Result<Self, Self::Err> {
let uts = UnfinishedTokenString {
position: 0,
input: s.to_string().as_bytes().to_vec(),
tokens: Vec::new(),
eof: false,
row: 0,
col: 1,
};
uts.lex_file()
}
type Err = LexerError;
}
impl From<UnfinishedTokenString> for TokenString {
fn from(value: UnfinishedTokenString) -> Self {
Self {
tokens: value.tokens,
}
}
}
trait Lexer {
fn lex_file(self) -> Result<Self::TokenString, Self::Err>;
type TokenString;
type Err;
}
impl Lexer for UnfinishedTokenString {
fn lex_file(mut self) -> Result<Self::TokenString, Self::Err> {
self.skip_whitespace();
while !self.eof {
use TokenType as TT;
let token_type = match self.input[self.position] as char {
'(' => Ok(TT::OpenParantheses),
')' => Ok(TT::ClosedParantheses),
'a'..='z' | 'A'..='Z' => Ok(TT::Identifier(self.lex_identifier())),
'0'..='9' | '-' => Ok(TT::Integer(self.lex_integer())),
v => Err(format!("Expected new token, found: {v}")),
};
let token = match token_type {
Ok(v) => Token {
col: self.col,
row: self.row,
token_type: v,
},
Err(e) => {
return Err(LexerError {
col: self.col,
row: self.row,
message: e,
})
}
};
self.tokens.push(token);
self.next_char();
self.skip_whitespace();
}
return Ok(self.into());
}
type TokenString = TokenString;
type Err = LexerError;
}
impl UnfinishedTokenString {
fn skip_whitespace(&mut self) {
while !self.eof && self.input[self.position].is_ascii_whitespace() {
self.next_char();
}
}
fn next_char(&mut self) {
self.position += 1;
if self.position == self.input.len() {
self.eof = true;
return;
}
if self.input[self.position] == b'\n' {
self.row += 1;
self.col = 0;
} else {
self.col += 1;
}
}
fn lex_identifier(&mut self) -> String {
todo!()
}
fn lex_integer(&mut self) -> usize {
todo!()
}
}

View file

@ -1,13 +1,26 @@
mod assembler;
mod parser;
// mod assembler;
mod lexer;
// mod parser;
mod error;
pub use error::*;
use color_eyre::eyre::Result;
// pub use error::*;
use lexer::TokenString;
fn main() {
let code = "(+ 1 (+ 5 5))".to_string();
fn main() -> Result<()> {
// let code = "(+ 1 (+ 5 5))".to_string();
color_eyre::install()?;
let code = "()".to_string();
let lexed: TokenString = code.parse()?;
dbg!(lexed);
Ok(())
/*
let parsed = dbg!(parser::Parser::parse(code).unwrap());
let assembled = assembler::Assembler::assemble(parsed).unwrap();
@ -15,5 +28,5 @@ fn main() {
print!("{byte:#X}, ");
}
println!("");
println!(""); */
}