From 4020536ae632b308ccd2f86c4f64109c7bb401e0 Mon Sep 17 00:00:00 2001 From: vanten-s Date: Mon, 9 Sep 2024 01:08:04 +0200 Subject: [PATCH] Added beginning of a lexer and format --- Cargo.toml | 2 + src/assembler.rs | 29 +++++---- src/lexer.rs | 150 +++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 25 ++++++-- 4 files changed, 185 insertions(+), 21 deletions(-) create mode 100644 src/lexer.rs diff --git a/Cargo.toml b/Cargo.toml index e981f3c..440382b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,5 @@ version = "0.1.0" edition = "2021" [dependencies] +color-eyre = "0.6.3" +thiserror = "1.0.63" diff --git a/src/assembler.rs b/src/assembler.rs index 07580ff..9a7b759 100644 --- a/src/assembler.rs +++ b/src/assembler.rs @@ -31,7 +31,7 @@ impl Assemble for Value { Ok(()) } - _ => todo!() + _ => todo!(), } } } @@ -46,7 +46,7 @@ impl Assemble for Instruction { let function = match assembler.functions.get(&self.name) { Some(v) => v, - None => return Err(crate::Error::UndefinedIdentifier(self.name)) + None => return Err(crate::Error::UndefinedIdentifier(self.name)), }; function(assembler, n_arguments); @@ -88,23 +88,22 @@ impl Assembler { fn default_functions() -> HashMap { let mut map: HashMap = HashMap::new(); - map.insert( - "+".to_string(), - |assembler, n_arguments| { - if n_arguments != 2 { todo!() } + map.insert("+".to_string(), |assembler, n_arguments| { + if n_arguments != 2 { + todo!() + } - assembler.add_instruction(&[0x58]); // Pop EAX - assembler.pop(); + assembler.add_instruction(&[0x58]); // Pop EAX + assembler.pop(); - assembler.add_instruction(&[0x03, 0x04, 0x24]); // add (%esp), %eax + assembler.add_instruction(&[0x03, 0x04, 0x24]); // add (%esp), %eax - assembler.add_instruction(&[0x5B]); // Pop EBX - assembler.pop(); + assembler.add_instruction(&[0x5B]); // Pop EBX + assembler.pop(); - assembler.add_instruction(&[0x50]); // Push EAX - assembler.push("".to_string()); - }, - ); + assembler.add_instruction(&[0x50]); // Push EAX + assembler.push("".to_string()); + }); map } diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 0000000..e684847 --- /dev/null +++ b/src/lexer.rs @@ -0,0 +1,150 @@ +use std::str::FromStr; + +use thiserror::Error; + +#[derive(Debug)] +pub enum TokenType { + OpenParantheses, + ClosedParantheses, + Integer(usize), + String(String), + Identifier(String), +} + +#[derive(Debug)] +pub struct Token { + row: usize, + col: usize, + + token_type: TokenType, +} + +#[derive(Debug)] +pub struct TokenString { + tokens: Vec, +} + +#[derive(Debug)] +struct UnfinishedTokenString { + position: usize, + input: Vec, + tokens: Vec, + eof: bool, + + row: usize, + col: usize, +} + +#[derive(Error, Debug)] +#[error("Syntax error at: {row}:{col}. {message}")] +pub struct LexerError { + row: usize, + col: usize, + message: String, +} + +impl FromStr for TokenString { + fn from_str(s: &str) -> Result { + let uts = UnfinishedTokenString { + position: 0, + input: s.to_string().as_bytes().to_vec(), + tokens: Vec::new(), + eof: false, + + row: 0, + col: 1, + }; + + uts.lex_file() + } + + type Err = LexerError; +} + +impl From for TokenString { + fn from(value: UnfinishedTokenString) -> Self { + Self { + tokens: value.tokens, + } + } +} + +trait Lexer { + fn lex_file(self) -> Result; + + type TokenString; + type Err; +} + +impl Lexer for UnfinishedTokenString { + fn lex_file(mut self) -> Result { + self.skip_whitespace(); + + while !self.eof { + use TokenType as TT; + + let token_type = match self.input[self.position] as char { + '(' => Ok(TT::OpenParantheses), + ')' => Ok(TT::ClosedParantheses), + 'a'..='z' | 'A'..='Z' => Ok(TT::Identifier(self.lex_identifier())), + '0'..='9' | '-' => Ok(TT::Integer(self.lex_integer())), + v => Err(format!("Expected new token, found: {v}")), + }; + + let token = match token_type { + Ok(v) => Token { + col: self.col, + row: self.row, + token_type: v, + }, + Err(e) => { + return Err(LexerError { + col: self.col, + row: self.row, + message: e, + }) + } + }; + + self.tokens.push(token); + + self.next_char(); + self.skip_whitespace(); + } + + return Ok(self.into()); + } + + type TokenString = TokenString; + type Err = LexerError; +} + +impl UnfinishedTokenString { + fn skip_whitespace(&mut self) { + while !self.eof && self.input[self.position].is_ascii_whitespace() { + self.next_char(); + } + } + + fn next_char(&mut self) { + self.position += 1; + if self.position == self.input.len() { + self.eof = true; + return; + } + if self.input[self.position] == b'\n' { + self.row += 1; + self.col = 0; + } else { + self.col += 1; + } + } + + fn lex_identifier(&mut self) -> String { + todo!() + } + + fn lex_integer(&mut self) -> usize { + todo!() + } +} diff --git a/src/main.rs b/src/main.rs index 1ce4ac5..a2df24b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,26 @@ -mod assembler; -mod parser; +// mod assembler; +mod lexer; +// mod parser; mod error; -pub use error::*; +use color_eyre::eyre::Result; +// pub use error::*; +use lexer::TokenString; -fn main() { - let code = "(+ 1 (+ 5 5))".to_string(); +fn main() -> Result<()> { + // let code = "(+ 1 (+ 5 5))".to_string(); + color_eyre::install()?; + let code = "()".to_string(); + + let lexed: TokenString = code.parse()?; + + dbg!(lexed); + + Ok(()) + + /* let parsed = dbg!(parser::Parser::parse(code).unwrap()); let assembled = assembler::Assembler::assemble(parsed).unwrap(); @@ -15,5 +28,5 @@ fn main() { print!("{byte:#X}, "); } - println!(""); + println!(""); */ }