fix: cleanup structs into correct files

2025-05-08 10:05:09 +09:00
parent 849895a437
commit 1c7d3f3adc
5 changed files with 229 additions and 229 deletions
--- a/src/aout.rs
+++ b/src/aout.rs
@@ -5,6 +5,7 @@ pub type c_long = i32; // we use a a.out with 32 byte
 #[derive(Debug)]
 #[allow(dead_code)]
 /// Internal representation of the a.out binary format.
 pub struct Aout {
    pub header: Header,
    pub text: Vec<u8>,
--- a/src/decode.rs
+++ b/src/decode.rs
@@ -1 +0,0 @@
--- a/src/disasm.rs
+++ b/src/disasm.rs
@@ -2,13 +2,14 @@ use core::fmt;
 use std::{fs::File, io::Read, process::exit};
 use crate::aout::Aout;
-use crate::instructions::MemoryIndex;
+use crate::instructions::{Displacement, MemoryIndex};
 use crate::{
    Args,
-    instructions::{ImmediateByte, ImmediateWord, Instruction, MetaInstruction, Register},
+    instructions::{ImmediateByte, ImmediateWord, Instruction, Opcode, Register},
 };
 #[derive(Debug)]
 /// Generic errors, which are encountered during parsing.
 pub enum DisasmError {
    NoFile(Option<String>),
    IoError(std::io::Error),
@@ -29,13 +30,12 @@ impl fmt::Display for DisasmError {
    }
 }
-/// Disassemble the binary in `path` into a vector of instructions
+/// Disassemble the binary in `path` into a vector of instructions.
-/// This function just calls all other
+/// Main entry point to the disassembly.
-pub fn disasm(args: &Args) -> Result<Vec<MetaInstruction>, DisasmError> {
+pub fn disasm(args: &Args) -> Result<Vec<Instruction>, DisasmError> {
    let contents = path_to_buf(args)?;
    let aout = Aout::new(contents);
    // XXX: 00 is just 0, maybe this could be a problem?
    log::debug!("{:?}", aout);
    let instructions = decode_instructions(&aout)?;
@@ -43,7 +43,7 @@ pub fn disasm(args: &Args) -> Result<Vec<MetaInstruction>, DisasmError> {
    Ok(instructions)
 }
-/// Read a filepath into a buffer
+/// Read a filepath into a u8 buffer.
 fn path_to_buf(args: &Args) -> Result<Vec<u8>, DisasmError> {
    let path = args
        .path
@@ -57,7 +57,7 @@ fn path_to_buf(args: &Args) -> Result<Vec<u8>, DisasmError> {
 }
 /// Decode instructions from the text section of the provided binary
-fn decode_instructions(aout: &Aout) -> Result<Vec<MetaInstruction>, DisasmError> {
+fn decode_instructions(aout: &Aout) -> Result<Vec<Instruction>, DisasmError> {
    // naive approach:
    // 1. read byte
    // 2. pattern match to see which instruction it is
@@ -65,38 +65,38 @@ fn decode_instructions(aout: &Aout) -> Result<Vec<MetaInstruction>, DisasmError>
    // repeat until no bytes left
    let mut instructions = Vec::new();
-    let mut offset = 0;
+    let mut disassembler = Disassembler {
        offset: 0,
        text: aout.text.clone(),
    };
-    let text = &aout.text;
+    while disassembler.offset < disassembler.text.len() {
-    while offset < aout.text.len() {
+        let mut instr = Instruction::new();
-        let mut instr = MetaInstruction::new();
+        instr.start = disassembler.offset;
        instr.start = offset;
-        let opcode = text[offset];
+        let opcode = disassembler.text[disassembler.offset];
        instr.raw.push(opcode);
        match opcode {
            // ADD
            0x00 => {
-                let (mem_index, mut raw) = parse_modrm_byte(&mut offset, text);
+                let (mem_index, mut raw) = disassembler.parse_modrm_byte();
-                let reg = parse_byte(&mut offset, text);
+                let reg = disassembler.parse_byte();
                instr.size = 2 + raw.len();
                instr.raw = Vec::from([opcode]);
                instr.raw.append(&mut raw);
                instr.raw.push(reg);
-                instr.instruction = Instruction::ADD_EbGb(mem_index, Register::by_id(reg));
+                instr.opcode = Opcode::ADD_EbGb(mem_index, Register::by_id(reg));
            }
            // INT
            0xCD => {
-                let byte = parse_byte(&mut offset, text);
+                let byte = disassembler.parse_byte();
-                instr.size = 2;
+                instr.raw.push(byte);
-                instr.raw = Vec::from([opcode, byte]);
+                instr.opcode = Opcode::INT(ImmediateByte(byte));
                instr.instruction = Instruction::INT(ImmediateByte(byte));
            }
            // MOV
            0xBB => {
-                let (word, raw) = parse_word(&mut offset, text);
+                let (word, raw) = disassembler.parse_word();
-                instr.size = 3;
+                instr.raw.push(raw.0);
-                instr.raw = Vec::from([opcode, raw.0, raw.1]);
+                instr.raw.push(raw.1);
-                instr.instruction = Instruction::MOV_BXIv(Register::BX, ImmediateWord(word));
+                instr.opcode = Opcode::MOV_BXIv(Register::BX, ImmediateWord(word));
            }
            _ => {
                eprintln!("Encountered unknown instruction '0x{:x}'", opcode);
@@ -113,117 +113,111 @@ fn decode_instructions(aout: &Aout) -> Result<Vec<MetaInstruction>, DisasmError>
    Ok(instructions)
 }
 /// Parse a single byte of binary, return it and advance the offset.
 pub fn parse_byte(offset: &mut usize, text: &Vec<u8>) -> u8 {
    *offset += 1;
    let byte = text[*offset];
    *offset += 1;
    byte
 }
 /// Parse a single word of binary, return it and advance the offset.
 pub fn parse_word(offset: &mut usize, text: &Vec<u8>) -> (u16, (u8, u8)) {
    *offset += 1;
    let byte1 = text[*offset];
    let byte2 = text[*offset + 1];
    *offset += 2;
    (u16::from_le_bytes([byte1, byte2]), (byte1, byte2))
 }
 /// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset.
 pub fn parse_modrm_byte(offset: &mut usize, text: &Vec<u8>) -> (MemoryIndex, Vec<u8>) {
    // Calculate ModRM byte with bitmask
    let opcode = text[*offset];
    let modulo = opcode >> 6;
    let reg = (opcode >> 3) & 7;
    let rm = opcode & 7;
    let mut displacement_raw = Vec::new();
    let displacement = match modulo {
        0 => {
            if rm == 6 {
                // XXX: handle special case
                panic!("Handle modulo == 0, rm == 6");
            }
            None
        }
        1 => {
            *offset += 2; // one additional byte was read
            let byte = parse_byte(offset, text);
            displacement_raw.push(byte);
            log::debug!("Additional byte during ModRM parsing was read.");
            Some(Displacement::Byte(byte))
        }
        2 => {
            *offset += 3; // two additional bytes (word) was read
            let (word, raw) = parse_word(offset, text);
            displacement_raw.push(raw.0);
            displacement_raw.push(raw.1);
            log::debug!("Additional two bytes during ModRM parsing was read.");
            Some(Displacement::Word(word))
        }
        3 => panic!("TODO: handle modulo == 3"),
        _ => panic!("Invalid ModRM byte encountered"),
    };
    let index = match rm {
        0 => MemoryIndex {
            base: Some(Register::BX),
            index: Some(Register::SI),
            displacement,
        },
        1 => MemoryIndex {
            base: Some(Register::BX),
            index: Some(Register::DI),
            displacement,
        },
        2 => MemoryIndex {
            base: Some(Register::BP),
            index: Some(Register::SI),
            displacement,
        },
        3 => MemoryIndex {
            base: Some(Register::BP),
            index: Some(Register::DI),
            displacement,
        },
        4 => MemoryIndex {
            base: None,
            index: Some(Register::SI),
            displacement,
        },
        5 => MemoryIndex {
            base: None,
            index: Some(Register::DI),
            displacement,
        },
        6 => MemoryIndex {
            base: Some(Register::BP),
            index: None,
            displacement,
        },
        7 => MemoryIndex {
            base: Some(Register::BX),
            index: None,
            displacement,
        },
        _ => panic!("Invalid ModRM byte encountered"),
    };
    return (index, displacement_raw);
 }
 #[derive(Debug)]
-#[allow(dead_code)]
+struct Disassembler {
-/// Displacement for ModRM
+    pub offset: usize,
-pub enum Displacement {
+    pub text: Vec<u8>,
    Byte(u8),
    Word(u16),
 }
-impl fmt::Display for Displacement {
+impl Disassembler {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+    /// Parse a single byte of binary, return it and advance the offset.
-        match self {
+    /// Returns the read byte.
-            Self::Byte(byte) => write!(f, "{}", byte),
+    pub fn parse_byte(&mut self) -> u8 {
-            Self::Word(word) => write!(f, "{}", word),
+        self.offset += 1;
-        }
+        let byte = self.text[self.offset];
        self.offset += 1;
        byte
    }
    /// Parse a single word of binary, return it and advance the offset.
    /// Returns the read word and a tuple of the read raw bytes
    pub fn parse_word(&mut self) -> (u16, (u8, u8)) {
        self.offset += 1;
        let byte1 = self.text[self.offset];
        let byte2 = self.text[self.offset + 1];
        self.offset += 2;
        (u16::from_le_bytes([byte1, byte2]), (byte1, byte2))
    }
    /// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset.
    /// Returns the parsed modrm memory access, as well as all read raw bytes
    pub fn parse_modrm_byte(&mut self) -> (MemoryIndex, Vec<u8>) {
        // Calculate ModRM byte with bitmask
        let opcode = self.text[self.offset];
        let modulo = opcode >> 6;
        let reg = (opcode >> 3) & 7;
        let rm = opcode & 7;
        let mut displacement_raw = Vec::new();
        let displacement = match modulo {
            0 => {
                if rm == 6 {
                    // XXX: handle special case
                    panic!("Handle modulo == 0, rm == 6");
                }
                None
            }
            1 => {
                self.offset += 2; // one additional byte was read
                let byte = self.parse_byte();
                displacement_raw.push(byte);
                log::debug!("Additional byte during ModRM parsing was read.");
                Some(Displacement::Byte(byte))
            }
            2 => {
                self.offset += 3; // two additional bytes (word) was read
                let (word, raw) = self.parse_word();
                displacement_raw.push(raw.0);
                displacement_raw.push(raw.1);
                log::debug!("Additional two bytes during ModRM parsing was read.");
                Some(Displacement::Word(word))
            }
            3 => panic!("TODO: handle modulo == 3"),
            _ => panic!("Invalid ModRM byte encountered"),
        };
        let index = match rm {
            0 => MemoryIndex {
                base: Some(Register::BX),
                index: Some(Register::SI),
                displacement,
            },
            1 => MemoryIndex {
                base: Some(Register::BX),
                index: Some(Register::DI),
                displacement,
            },
            2 => MemoryIndex {
                base: Some(Register::BP),
                index: Some(Register::SI),
                displacement,
            },
            3 => MemoryIndex {
                base: Some(Register::BP),
                index: Some(Register::DI),
                displacement,
            },
            4 => MemoryIndex {
                base: None,
                index: Some(Register::SI),
                displacement,
            },
            5 => MemoryIndex {
                base: None,
                index: Some(Register::DI),
                displacement,
            },
            6 => MemoryIndex {
                base: Some(Register::BP),
                index: None,
                displacement,
            },
            7 => MemoryIndex {
                base: Some(Register::BX),
                index: None,
                displacement,
            },
            _ => panic!("Invalid ModRM byte encountered"),
        };
        return (index, displacement_raw);
    }
 }
--- a/src/instructions.rs
+++ b/src/instructions.rs
@@ -1,76 +1,45 @@
 use core::fmt;
-use crate::disasm::Displacement;
+// b: 8, w: 16, v: 16 -> i just treat v and w the same, if nothing blows up
-
+#[allow(non_camel_case_types)]
-pub type MemAddress = u8;
+pub type b = u8;
 #[allow(non_camel_case_types)]
 pub type w = u16;
 #[derive(Debug)]
 #[allow(dead_code)]
-/// A single 'line' of executable ASM is called a MetaInstruction, which
+/// A single 'line' of executable ASM is called an Instruction, which
-/// contains the `Instruction`, which will be executed, alongside some Meta
+/// contains the `Opcode` that will be executed, alongside its starting offset
-/// Informations.
+/// and the raw parsed bytes
-pub struct MetaInstruction {
+pub struct Instruction {
-    pub start: usize,             // location of the instruction start
+    pub start: usize,   // location of the instruction start
-    pub size: usize,              // size of the instruction in bytes
+    pub raw: Vec<u8>,   // raw value of instruction
-    pub raw: Vec<u8>,             // raw value of instruction
+    pub opcode: Opcode, // actual instruction
    pub instruction: Instruction, // actual instruction
 }
-impl MetaInstruction {
+impl Instruction {
    pub fn new() -> Self {
-        MetaInstruction {
+        Instruction {
            start: 0,
            size: 0,
            raw: Vec::new(),
-            instruction: Instruction::NOP(),
+            opcode: Opcode::NOP(),
        }
    }
 }
-impl fmt::Display for MetaInstruction {
+impl fmt::Display for Instruction {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        write!(f, "{:04x}: ", self.start).unwrap();
        for b in self.raw.iter() {
            write!(f, "{:02x}", b).unwrap();
        }
-        write!(f, "\t{}", self.instruction)
+        write!(f, "\t{}", self.opcode)
    }
 }
 #[derive(Debug)]
 pub struct MemoryIndex {
    pub base: Option<Register>,
    pub index: Option<Register>,
    pub displacement: Option<Displacement>,
 }
 impl fmt::Display for MemoryIndex {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match &self.base {
            Some(base) => match &self.index {
                Some(index) => match &self.displacement {
                    Some(displacement) => write!(f, "[{}+{}+{}]", base, index, displacement),
                    None => write!(f, "[{}+{}]", base, index),
                },
                None => match &self.displacement {
                    Some(displacement) => write!(f, "[{}+{}]", base, displacement),
                    None => write!(f, "[{}]", base),
                },
            },
            None => match &self.index {
                Some(index) => match &self.displacement {
                    Some(displacement) => write!(f, "{}+{}", index, displacement),
                    None => write!(f, "[{}]", index),
                },
                None => panic!("Invalid MemoryIndex encountered"),
            },
        }
    }
 }
 #[derive(Debug)]
 #[allow(dead_code, non_camel_case_types)]
-pub enum Instruction {
+pub enum Opcode {
    NOP(),
    // ADD
    ADD_EbGb(MemoryIndex, Register),
@@ -80,7 +49,7 @@ pub enum Instruction {
    INT(ImmediateByte),
 }
-impl fmt::Display for Instruction {
+impl fmt::Display for Opcode {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Self::INT(byte) => write!(f, "INT, {:x}", byte),
@@ -91,46 +60,6 @@ impl fmt::Display for Instruction {
    }
 }
 // Types for operand encoding
 #[derive(Debug)]
 pub struct Memory(pub MemAddress);
 // b: 8, w: 16, v: 16 -> i just treat v and w the same, if nothing blows up
 #[derive(Debug)]
 pub struct ImmediateByte(pub u8);
 #[derive(Debug)]
 pub struct ImmediateWord(pub u16);
 // ... and the displays for all of them
 macro_rules! impl_display {
    ($name:ident) => {
        impl std::fmt::Display for $name {
            fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
                write!(f, "{}", self.0)
            }
        }
    };
 }
 macro_rules! impl_display_and_lowerhex {
    ($name:ident) => {
        impl std::fmt::Display for $name {
            fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
                write!(f, "{}", self.0)
            }
        }
        impl std::fmt::LowerHex for $name {
            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
                std::fmt::LowerHex::fmt(&self.0, f)
            }
        }
    };
 }
 impl_display!(Memory);
 impl_display_and_lowerhex!(ImmediateByte);
 impl_display_and_lowerhex!(ImmediateWord);
 /// Registers of a 8086 processor
 #[derive(Debug)]
 #[allow(dead_code)]
@@ -236,3 +165,80 @@ impl fmt::Display for SegmentRegister {
        }
    }
 }
 /// An immediate byte value for an instruction.
 #[derive(Debug)]
 pub struct ImmediateByte(pub b);
 /// An immediate word value for an instruction
 #[derive(Debug)]
 pub struct ImmediateWord(pub w);
 macro_rules! impl_display_and_lowerhex {
    ($name:ident) => {
        impl std::fmt::Display for $name {
            fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
                write!(f, "{}", self.0)
            }
        }
        impl std::fmt::LowerHex for $name {
            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
                std::fmt::LowerHex::fmt(&self.0, f)
            }
        }
    };
 }
 impl_display_and_lowerhex!(ImmediateByte);
 impl_display_and_lowerhex!(ImmediateWord);
 /// A memory index operand is usually created by ModRM bytes or words.
 /// e.g. [bx+si]
 #[derive(Debug)]
 pub struct MemoryIndex {
    pub base: Option<Register>,
    pub index: Option<Register>,
    pub displacement: Option<Displacement>,
 }
 impl fmt::Display for MemoryIndex {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match &self.base {
            Some(base) => match &self.index {
                Some(index) => match &self.displacement {
                    Some(displacement) => write!(f, "[{}+{}+{}]", base, index, displacement),
                    None => write!(f, "[{}+{}]", base, index),
                },
                None => match &self.displacement {
                    Some(displacement) => write!(f, "[{}+{}]", base, displacement),
                    None => write!(f, "[{}]", base),
                },
            },
            None => match &self.index {
                Some(index) => match &self.displacement {
                    Some(displacement) => write!(f, "{}+{}", index, displacement),
                    None => write!(f, "[{}]", index),
                },
                None => panic!("Invalid MemoryIndex encountered"),
            },
        }
    }
 }
 #[derive(Debug)]
 #[allow(dead_code)]
 /// Displacement for ModRM
 pub enum Displacement {
    Byte(u8),
    Word(u16),
 }
 impl fmt::Display for Displacement {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match self {
            Self::Byte(byte) => write!(f, "{}", byte),
            Self::Word(word) => write!(f, "{}", word),
        }
    }
 }
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,6 @@
 use clap::{Parser, Subcommand};
 mod aout;
 mod decode;
 mod disasm;
 mod instructions;
@@ -34,7 +33,8 @@ fn main() {
    match args.command {
        Command::Disasm => {
-            let _instructions = disasm::disasm(&args).unwrap();
+            let instructions = disasm::disasm(&args).unwrap();
            log::debug!("{:?}", &instructions);
        }
        _ => panic!("Command not yet implemented"),
    }