From 0893969f4e0a4a3723a8d34b26a64bfe592f5c7e Mon Sep 17 00:00:00 2001 From: Marco Thomas Date: Wed, 28 May 2025 09:41:40 +0900 Subject: [PATCH] chore: whole swoop of enhanced documentation --- README.md | 32 +++--- src/aout.rs | 45 +++++++- src/disasm.rs | 264 ++++++++++++++++++++++--------------------- src/disasm_macros.rs | 6 +- src/instructions.rs | 31 +++-- src/operands.rs | 1 - src/register.rs | 4 - 7 files changed, 210 insertions(+), 173 deletions(-) diff --git a/README.md b/README.md index ed52ffc..8f2623b 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ minix-8086-rs is a Rust-based toolchain for analyzing and interpreting 16-bit 80 It includes: - 📦 a.out Parser: Parses legacy MINIX executables. -- 🛠 8086 Disassembler: Parses 16-bit instructions into an IR and prints them in a `objdump(1)`-style fasion. +- 🛠 8086 Disassembler: Parses 16-bit instructions into an IR and prints them in a `objdump(1)`-style fashion. - 💻 8086 Interpreter: Interprets the 8086 instructions, i.e., the MINIX binary. ## Usage @@ -19,6 +19,11 @@ Or run it directly: cargo run -- --help ``` +Run with debug output: +``` +RUST_LOG=debug cargo run -- interpret -p ./a.out +``` + CLI Options: ``` $ cargo run -- --help @@ -37,32 +42,21 @@ Options: -V, --version Print version ``` -## Examples - -Disassemble a binary: - -``` -cargo run -- disasm -p ./a.out -``` - -Interpret a binary: -``` -cargo run -- interpret -p ./a.out -``` - -Run with debug output: -``` -RUST_LOG=debug cargo run -- interpret -p ./a.out -``` - ## Status This project is under active development and primarily used by me to explore some Intel disassembly and learn some more Rust. Expect bugs and some missing features. +I mainly test with 'official' binaries from the MINIX source tree. ## Documentation +The documentation of the project itself can be accessed by using `cargo doc`. +``` +$ cargo doc +$ firefox target/doc/minix_8086_rs/index.html +``` + For the implementation of all instructions I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix. diff --git a/src/aout.rs b/src/aout.rs index 80417ed..2cac670 100644 --- a/src/aout.rs +++ b/src/aout.rs @@ -1,12 +1,12 @@ //! Internal a.out File abstraction. +use core::fmt; use std::ffi::{c_uchar, c_ushort}; #[allow(non_camel_case_types)] pub type c_long = i32; // we use a a.out with 32 byte #[derive(Debug)] -#[allow(dead_code)] /// Internal representation of the a.out binary format. pub struct Aout { pub header: Header, @@ -14,6 +14,14 @@ pub struct Aout { pub data: Vec, } +impl fmt::Display for Aout { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "Header: {:#?}\n", self.header).unwrap(); + write!(f, "Text: {:#?}\n", self.text).unwrap(); + write!(f, "Data: {:#?}\n", self.data) + } +} + impl Aout { pub fn new(buf: Vec) -> Self { let hdr = Header { @@ -48,7 +56,6 @@ impl Aout { } #[derive(Debug)] -#[allow(dead_code)] pub struct Header { pub magic: [c_uchar; 2], // magic number pub flags: c_uchar, // flags, see below @@ -63,3 +70,37 @@ pub struct Header { pub total: c_long, // total memory allocated pub syms: c_long, // size of symbol table } + +impl fmt::Display for Header { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "Header: + Magic: {:02X?} + Flags: {:#04X} + CPU: {} + Header Length: {} + Unused: {} + Version: {:#06X} + Text Size: {} + Data Size: {} + BSS Size: {} + Entry Point: {:#X} + Total Allocated: {} + Symbol Table Size: {} +", + self.magic, + self.flags, + self.cpu, + self.hdrlen, + self.unused, + self.version, + self.text, + self.data, + self.bss, + self.entry, + self.total, + self.syms + ) + } +} diff --git a/src/disasm.rs b/src/disasm.rs index 3a5f676..3c94964 100644 --- a/src/disasm.rs +++ b/src/disasm.rs @@ -9,7 +9,7 @@ use crate::{ Args, instructions::{Instruction, Mnemonic}, }; -use crate::{modrm_instruction_sregister, modrm_instruction_wordwidth, modrm_target_bytewidth}; +use crate::{modrm_8b_register, modrm_16b_register, modrm_sregister}; use core::fmt; use std::{fs::File, io::Read, process::exit}; @@ -103,8 +103,37 @@ impl Disassembler { } } - /// Parse a single byte of binary, return it and advance the offset. - /// Returns the read byte. + /// Start the disassmble and allow for some error handling wrapped around + /// the actual decoding function. + pub fn disassemble(&mut self) -> Result, DisasmError> { + let is_ok = self.decode_instructions(); + + // a.out pads the text section to byte align, so the fasely interpreted + // instructions have to be removed. + self.remove_trailing_padding(); + + // read instructions from disassembler object instead of decode function + // to allow some error's to act as warnings (see below) + let instructions = self.instructions.clone(); + + // allow for warning-type errors to pass through, as they are not fatal + match is_ok { + Ok(_) => Ok(instructions), + Err(e) => match e { + DisasmError::EndOfTextSection => { + log::debug!("Solo padded 0-byte at end of file was found. Ignoring."); + Ok(instructions) + } + _ => { + println!("Encountered error during disassembly: {e}"); + Err(e) + } + }, + } + } + + /// Parse a single byte of the binary and advance the offset. + /// Returns the read byte (Intel b operand). fn parse_byte(&mut self) -> Result { log::debug!("Attempting to parse byte at {:#04x} ...", self.offset); // check if the byte would be out of bounds @@ -130,9 +159,9 @@ impl Disassembler { Ok(*byte) } - /// Parse a single word of binary. + /// Parse a single word of the binary and advance the offset. /// Just a wrapper for parsing a byte twice. - /// Returns the read word. + /// Returns the read word (Intel w/v operand). fn parse_word(&mut self) -> Result { log::debug!("Attempting to parse word at {:#04x} ...", self.offset); let byte1 = self.parse_byte()?; @@ -140,9 +169,10 @@ impl Disassembler { Ok(u16::from_le_bytes([byte1, byte2])) } - /// Parse a single byte of binary and interpret as as signed. - /// The isize contains a relative offset to be added to the address - /// of the subsequent instruction. + /// Parse a single of the binary, interpret it as signed and advance the + /// offset. + /// Returns the read byte added to the address of the subsequent instruction + /// to act as a relative offset (Intel Jb operand). fn parse_j_byte(&mut self) -> Result { log::debug!("Attempting to parse Jb at {:#04x} ...", self.offset); // first interpret as 2-complement, then cast for addition @@ -155,9 +185,10 @@ impl Disassembler { Ok(byte + next_addr) } - /// Parse a single byte of binary and interpret as signed. - /// The isize contains a relative offset to be added to the address - /// of the subsequent instruction. + /// Parse a word of the binary, interpret it as signed and advance the + /// offset. + /// Returns the read word added to the address of the subsequent instruction + /// to act as a relative offset (Intel Jw/Jv operand). pub fn parse_j_word(&mut self) -> Result { log::debug!("Attempting to parse Jv at {:#04x} ...", self.offset); // first interpret as 2-complement, then cast for addition @@ -170,7 +201,10 @@ impl Disassembler { Ok(word + next_addr) } - /// Parse a pointer type. + /// Parse a single pointer of the binary and advance the offset. + /// Just a wrapper for parsing a byte 4 types and constructing a pointer + /// type. + /// Returns the read pointer (Intel p operand). fn parse_ptr(&mut self) -> Result { log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset); let byte0 = self.parse_byte()?; @@ -185,24 +219,30 @@ impl Disassembler { }) } - /// Takes in a modrm byte and returns mod, reg and r/m. - fn deconstruct_modrm_byte(modrm: u8) -> (u8, u8, u8) { - let mode = (modrm >> 6) & 0b11; - let reg = (modrm >> 3) & 0b111; - let rm = modrm & 0b111; - - (mode, reg, rm) + /// Parse an Mp Operand (Memory Pointer). + /// An Mp is a ModRM byte with the `reg` bits ignored and an additional + /// 2 [`Word`]s parsed for a [`Pointer`] type. + fn parse_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> { + let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?; + let ptr = self.parse_ptr()?; + Ok((target, ptr)) } - /// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset. - /// Returns the parsed modrm target and the source register + /// Parse a single ModRM byte, calculate the [`ModRmTarget`] (Memory or + /// Register) from that byte and advance the offset. + /// It is always just a single byte, even for word-width instructions. + /// Returns the [`ModRmTarget`] (either memory or a register) as well as the + /// `reg` bitfield, which will later be used to determine another register + /// or even mnemonic in the group-type instructions. fn parse_modrm_byte( &mut self, register_width: Operand, ) -> Result<(ModRmTarget, RegisterId), DisasmError> { let modrm = self.parse_byte()?; - let (mode, reg, rm) = Self::deconstruct_modrm_byte(modrm); + let mode = (modrm >> 6) & 0b11; + let reg = (modrm >> 3) & 0b111; + let rm = modrm & 0b111; log::debug!( "{:#04x} deconstructed into: {:#b}, {:#b}, {:#b}", @@ -304,17 +344,17 @@ impl Disassembler { Ok((ModRmTarget::Memory(index), reg)) } - /// Match the modrm reg bits to the GPR1 mnemonics. - /// Group 1 always have an ModRM target (all modrm bits, without reg) as - /// first and an imm value as second operand (which has to be parsed before - /// call to this function), but is available in both Byte and Word length. + /// Match the ModRM `reg` bitfield to Intel Group 1-type instructions. Group + /// 1 always has an [`ModRmTarget`] as first and a [`Register`] as second + /// operand, which is determined by the ModRM `reg` field, aswell as the + /// bit-width of the instruction currently being parsed. fn modrm_reg_to_grp1( - modrm_reg_byte: u8, + reg: u8, target: ModRmTarget, - register_id: Operand, + instruction_width: Operand, ) -> Result { - match register_id { - Operand::Byte(b) => match modrm_reg_byte { + match instruction_width { + Operand::Byte(b) => match reg { 0b000 => Ok(Mnemonic::ADD_Ib(target, b)), 0b001 => Ok(Mnemonic::OR_Ib(target, b)), 0b010 => Ok(Mnemonic::ADC_Ib(target, b)), @@ -323,9 +363,9 @@ impl Disassembler { 0b101 => Ok(Mnemonic::SUB_Ib(target, b)), 0b110 => Ok(Mnemonic::XOR_Ib(target, b)), 0b111 => Ok(Mnemonic::CMP_Ib(target, b)), - _ => return Err(DisasmError::IllegalGroupMnemonic(1, modrm_reg_byte)), + _ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)), }, - Operand::Word(w) => match modrm_reg_byte { + Operand::Word(w) => match reg { 0b000 => Ok(Mnemonic::ADD_Iv(target, w)), 0b001 => Ok(Mnemonic::OR_Iv(target, w)), 0b010 => Ok(Mnemonic::ADC_Iv(target, w)), @@ -334,15 +374,16 @@ impl Disassembler { 0b101 => Ok(Mnemonic::SUB_Iv(target, w)), 0b110 => Ok(Mnemonic::XOR_Iv(target, w)), 0b111 => Ok(Mnemonic::CMP_Iv(target, w)), - _ => return Err(DisasmError::IllegalGroupMnemonic(1, modrm_reg_byte)), + _ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)), }, } } - /// Match the modrm reg bits to the GPR2 mnemonics. - /// Group 2 only has a single operand, the other one is either a constant - /// 1 (not present in the binary) or the CL register. - /// This function assumes the operand to be 1 + /// Match the ModRM `reg` bits to Intel Group 2-type instructions. Group 2 + /// always only has a single operand, the other is either `1` or the `CL` + /// register. + /// This function assumes the operand to be `1`. + /// See [`Self::modrm_reg_to_grp2_cl`] for the counter part. fn modrm_reg_to_grp2_1(reg: u8, target: ModRmTarget) -> Result { match reg { 0b000 => Ok(Mnemonic::ROL_b(target, 1)), @@ -357,10 +398,11 @@ impl Disassembler { } } - /// Match the modrm reg bits to the GPR2 mnemonics. - /// Group 2 only has a single operand, the other one is either a constant - /// 1 (not present in the binary) or the CL register. - /// This function assumes the operand to be CL register. + /// Match the ModRM `reg` bits to Intel Group 2-type instructions. Group 2 + /// always only has a single operand, the other is either `1` or the `CL` + /// register. + /// This function assumes the operand to be [`Register::CL`]. + /// See [`Self::modrm_reg_to_grp2_cl`] for the counter part. fn modrm_reg_to_grp2_cl(reg: u8, target: ModRmTarget) -> Result { match reg { 0b000 => Ok(Mnemonic::ROL_fromReg(target, Register::CL)), @@ -375,9 +417,9 @@ impl Disassembler { } } - /// Match the modrm reg bits to the GPR3a/b mnemonics. - /// Group 3 only has a single operand, which is the ModRmTarget selected - /// by modrm bits. + /// Match the ModRM `reg` bits to Intel Group 3a/b-type instructions. + /// Group 3 selects an unary mnemonic with the `reg` bit fiels. The operand + /// is the [`ModRmTarget`]. fn modrm_reg_to_grp3( &mut self, reg: u8, @@ -400,21 +442,12 @@ impl Disassembler { } } - /// Parse an Mp Operand (Memory Pointer). - /// An Mp is a ModRM byte with the `reg` bits ignored and an additional - /// 2 words parsed for a `Pointer` type. - fn modrm_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> { - let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?; - let ptr = self.parse_ptr()?; - Ok((target, ptr)) - } - /// a.out pads the text section with 0x00 bytes. During parsing, these get /// interpreted as `0x00 0x00`, which have to get removed for an authentic /// disassembly. /// This is done in favor of removing all 0x00 bytes in the beginning, - /// as this could remove an actual 0x00 byte as operand of the final - /// instruction. Of course, this could remove an actual `0x00 0x00` + /// as this could remove an actual `0x00` byte as operand of the final + /// real instruction. Of course, this could remove an actual `0x00 0x00` /// instruction from the end, but they would not have any effect on /// execution anyway. fn remove_trailing_padding(&mut self) { @@ -441,33 +474,10 @@ impl Disassembler { self.instructions.truncate(until); } - /// Start the disassmble and allow for some error handling wrapped around - /// the actual decoding function. - pub fn disassemble(&mut self) -> Result, DisasmError> { - let parsing = self.decode_instructions(); - - // a.out pads the text section to byte align, so the fasely interpreted - // instructions have to be removed. - self.remove_trailing_padding(); - let instructions = self.instructions.clone(); - - // allow for warning-type errors to pass through, as they are not fatal - match parsing { - Ok(_) => Ok(instructions), - Err(e) => match e { - DisasmError::EndOfTextSection => { - log::debug!("Solo padded 0-byte at end of file was found. Ignoring."); - Ok(instructions) - } - _ => { - println!("Encountered error during disassembly: {e}"); - Err(e) - } - }, - } - } - - /// Decode instructions by matching their byte signature to their mnemonics. + /// Decode instructions by matching byte signature to their mnemonics and + /// depending on the instruction, parsing some operands afterwards. + /// All parsing is done in capsulated functions, here everything just + /// gets consolodated. fn decode_instructions(&mut self) -> Result<(), DisasmError> { log::debug!("Starting to decode text of length {}", self.text.len()); while self.offset < self.text.len() { @@ -482,20 +492,20 @@ impl Disassembler { self.instruction.raw.push(opcode); self.instruction.opcode = match opcode { - 0x00 => modrm_target_bytewidth!(self, ADD_FromReg), - 0x01 => modrm_instruction_wordwidth!(self, ADD_FromReg), - 0x02 => modrm_target_bytewidth!(self, ADD_ToReg), - 0x03 => modrm_instruction_wordwidth!(self, ADD_ToReg), + 0x00 => modrm_8b_register!(self, ADD_FromReg), + 0x01 => modrm_16b_register!(self, ADD_FromReg), + 0x02 => modrm_8b_register!(self, ADD_ToReg), + 0x03 => modrm_16b_register!(self, ADD_ToReg), 0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?), 0x05 => Mnemonic::ADD_AXIv(self.parse_word()?), 0x06 => Mnemonic::PUSH_S(SegmentRegister::ES), 0x07 => Mnemonic::POP_S(SegmentRegister::ES), - 0x08 => modrm_target_bytewidth!(self, OR_FromReg), - 0x09 => modrm_instruction_wordwidth!(self, OR_FromReg), - 0x0A => modrm_target_bytewidth!(self, OR_ToReg), - 0x0B => modrm_instruction_wordwidth!(self, OR_ToReg), + 0x08 => modrm_8b_register!(self, OR_FromReg), + 0x09 => modrm_16b_register!(self, OR_FromReg), + 0x0A => modrm_8b_register!(self, OR_ToReg), + 0x0B => modrm_16b_register!(self, OR_ToReg), 0x0C => Mnemonic::OR_ALIb(self.parse_byte()?), 0x0D => Mnemonic::OR_AXIv(self.parse_word()?), @@ -503,60 +513,60 @@ impl Disassembler { 0x0F => return Err(DisasmError::OpcodeUndefined(opcode)), - 0x10 => modrm_target_bytewidth!(self, ADC_FromReg), - 0x11 => modrm_instruction_wordwidth!(self, ADC_FromReg), - 0x12 => modrm_target_bytewidth!(self, ADC_ToReg), - 0x13 => modrm_instruction_wordwidth!(self, ADC_ToReg), + 0x10 => modrm_8b_register!(self, ADC_FromReg), + 0x11 => modrm_16b_register!(self, ADC_FromReg), + 0x12 => modrm_8b_register!(self, ADC_ToReg), + 0x13 => modrm_16b_register!(self, ADC_ToReg), 0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?), 0x15 => Mnemonic::ADC_AXIv(self.parse_word()?), 0x16 => Mnemonic::PUSH_S(SegmentRegister::SS), 0x17 => Mnemonic::POP_S(SegmentRegister::SS), - 0x18 => modrm_target_bytewidth!(self, SBB_FromReg), - 0x19 => modrm_instruction_wordwidth!(self, SBB_FromReg), - 0x1A => modrm_target_bytewidth!(self, SBB_ToReg), - 0x1B => modrm_instruction_wordwidth!(self, SBB_ToReg), + 0x18 => modrm_8b_register!(self, SBB_FromReg), + 0x19 => modrm_16b_register!(self, SBB_FromReg), + 0x1A => modrm_8b_register!(self, SBB_ToReg), + 0x1B => modrm_16b_register!(self, SBB_ToReg), 0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?), 0x1D => Mnemonic::SBB_AXIv(self.parse_word()?), 0x1E => Mnemonic::PUSH_S(SegmentRegister::DS), 0x1F => Mnemonic::POP_S(SegmentRegister::DS), - 0x20 => modrm_target_bytewidth!(self, AND_FromReg), - 0x21 => modrm_instruction_wordwidth!(self, AND_FromReg), - 0x22 => modrm_target_bytewidth!(self, AND_ToReg), - 0x23 => modrm_instruction_wordwidth!(self, AND_ToReg), + 0x20 => modrm_8b_register!(self, AND_FromReg), + 0x21 => modrm_16b_register!(self, AND_FromReg), + 0x22 => modrm_8b_register!(self, AND_ToReg), + 0x23 => modrm_16b_register!(self, AND_ToReg), 0x24 => Mnemonic::AND_ALIb(self.parse_byte()?), 0x25 => Mnemonic::AND_AXIv(self.parse_word()?), 0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES), 0x27 => Mnemonic::DAA, - 0x28 => modrm_target_bytewidth!(self, SUB_FromReg), - 0x29 => modrm_instruction_wordwidth!(self, SUB_FromReg), - 0x2A => modrm_target_bytewidth!(self, SUB_ToReg), - 0x2B => modrm_instruction_wordwidth!(self, SUB_ToReg), + 0x28 => modrm_8b_register!(self, SUB_FromReg), + 0x29 => modrm_16b_register!(self, SUB_FromReg), + 0x2A => modrm_8b_register!(self, SUB_ToReg), + 0x2B => modrm_16b_register!(self, SUB_ToReg), 0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?), 0x2D => Mnemonic::SUB_AXIv(self.parse_word()?), 0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS), 0x2F => Mnemonic::DAS, - 0x30 => modrm_target_bytewidth!(self, XOR_FromReg), - 0x31 => modrm_instruction_wordwidth!(self, XOR_FromReg), - 0x32 => modrm_target_bytewidth!(self, XOR_ToReg), - 0x33 => modrm_instruction_wordwidth!(self, XOR_ToReg), + 0x30 => modrm_8b_register!(self, XOR_FromReg), + 0x31 => modrm_16b_register!(self, XOR_FromReg), + 0x32 => modrm_8b_register!(self, XOR_ToReg), + 0x33 => modrm_16b_register!(self, XOR_ToReg), 0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?), 0x35 => Mnemonic::XOR_AXIv(self.parse_word()?), 0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS), 0x37 => Mnemonic::AAA, - 0x38 => modrm_target_bytewidth!(self, CMP_FromReg), - 0x39 => modrm_instruction_wordwidth!(self, CMP_FromReg), - 0x3A => modrm_target_bytewidth!(self, CMP_ToReg), - 0x3B => modrm_instruction_wordwidth!(self, CMP_ToReg), + 0x38 => modrm_8b_register!(self, CMP_FromReg), + 0x39 => modrm_16b_register!(self, CMP_FromReg), + 0x3A => modrm_8b_register!(self, CMP_ToReg), + 0x3B => modrm_16b_register!(self, CMP_ToReg), 0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?), 0x3D => Mnemonic::CMP_AXIv(self.parse_word()?), @@ -642,20 +652,20 @@ impl Disassembler { Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))? } - 0x84 => modrm_target_bytewidth!(self, TEST), - 0x85 => modrm_instruction_wordwidth!(self, TEST), + 0x84 => modrm_8b_register!(self, TEST), + 0x85 => modrm_16b_register!(self, TEST), - 0x86 => modrm_target_bytewidth!(self, XCHG), - 0x87 => modrm_instruction_wordwidth!(self, XCHG), + 0x86 => modrm_8b_register!(self, XCHG), + 0x87 => modrm_16b_register!(self, XCHG), - 0x88 => modrm_target_bytewidth!(self, MOV_FromReg), - 0x89 => modrm_instruction_wordwidth!(self, MOV_FromReg), - 0x8A => modrm_target_bytewidth!(self, MOV_ToReg), - 0x8B => modrm_instruction_wordwidth!(self, MOV_ToReg), - 0x8C => modrm_instruction_sregister!(self, MOV_FromSReg), - 0x8E => modrm_instruction_sregister!(self, MOV_ToSReg), + 0x88 => modrm_8b_register!(self, MOV_FromReg), + 0x89 => modrm_16b_register!(self, MOV_FromReg), + 0x8A => modrm_8b_register!(self, MOV_ToReg), + 0x8B => modrm_16b_register!(self, MOV_ToReg), + 0x8C => modrm_sregister!(self, MOV_FromSReg), + 0x8E => modrm_sregister!(self, MOV_ToSReg), - 0x8D => modrm_instruction_wordwidth!(self, LEA), + 0x8D => modrm_16b_register!(self, LEA), 0x8F => { let (target, _) = self.parse_modrm_byte(Operand::Word(0))?; @@ -734,11 +744,11 @@ impl Disassembler { 0xC3 => Mnemonic::RET, 0xC4 => { - let (target, ptr) = self.modrm_mp()?; + let (target, ptr) = self.parse_mp()?; Mnemonic::LES(target, ptr) } 0xC5 => { - let (target, ptr) = self.modrm_mp()?; + let (target, ptr) = self.parse_mp()?; Mnemonic::LDS(target, ptr) } diff --git a/src/disasm_macros.rs b/src/disasm_macros.rs index c126806..bdfcdee 100644 --- a/src/disasm_macros.rs +++ b/src/disasm_macros.rs @@ -2,7 +2,7 @@ #[macro_export] /// Generate a Mnemonic for an 8-bit Register from a ModRM byte. -macro_rules! modrm_target_bytewidth { +macro_rules! modrm_8b_register { ($self:ident, $variant:ident) => {{ let (target, reg) = $self.parse_modrm_byte(Operand::Byte(0))?; Mnemonic::$variant(target, Register::by_id(Operand::Byte(reg))?) @@ -11,7 +11,7 @@ macro_rules! modrm_target_bytewidth { #[macro_export] /// Generate a Mnemonic for a 16-bit Register from a ModRM byte. -macro_rules! modrm_instruction_wordwidth { +macro_rules! modrm_16b_register { ($self:ident, $variant:ident) => {{ let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?; Mnemonic::$variant(target, Register::by_id(Operand::Word(reg.into()))?) @@ -20,7 +20,7 @@ macro_rules! modrm_instruction_wordwidth { #[macro_export] /// Generate a Mnemonic for a 16-bit Segment Register from a ModRM byte. -macro_rules! modrm_instruction_sregister { +macro_rules! modrm_sregister { ($self:ident, $variant:ident) => {{ let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?; Mnemonic::$variant(target, SegmentRegister::by_id(reg)?) diff --git a/src/instructions.rs b/src/instructions.rs index fb858d5..8badb48 100644 --- a/src/instructions.rs +++ b/src/instructions.rs @@ -7,7 +7,6 @@ use crate::{ use core::fmt; #[derive(Debug, Clone, Eq, PartialEq)] -#[allow(dead_code)] /// A single 'line' of executable ASM is called an Instruction, which /// contains the `Mnemonic` that will be executed, alongside its starting offset /// and the raw parsed bytes @@ -47,7 +46,7 @@ impl fmt::Display for Instruction { } #[derive(Debug, Clone, PartialEq, Eq)] -#[allow(dead_code, non_camel_case_types)] +#[allow(non_camel_case_types)] /// All possible mnemonic variantions. /// These are sorted by type and are not in hex-encoding order. // XXX: convert this copy and paste horror in a proc macro like @@ -296,8 +295,6 @@ pub enum Mnemonic { AAD(Byte), // MISC XLAT, - // Not part of 8086: - EOT, // End of Text Section } impl fmt::Display for Mnemonic { @@ -414,8 +411,8 @@ impl fmt::Display for Mnemonic { Self::MOV_ToReg(target, reg) => write!(f, "mov {reg}, {target}"), Self::MOV_FromSReg(target, reg) => write!(f, "mov {target}, {reg}"), Self::MOV_ToSReg(target, reg) => write!(f, "mov {reg}, {target}"), - Self::MOV_Ib(target, byte) => write!(f, "mov byte {target}, {byte:#04x}"), - Self::MOV_Iv(target, word) => write!(f, "mov word {target}, {word:#04x}"), + Self::MOV_Ib(target, byte) => write!(f, "mov byte ptr {target}, {byte:#04x}"), + Self::MOV_Iv(target, word) => write!(f, "mov word ptr {target}, {word:#04x}"), Self::MOV_AL0b(byte) => write!(f, "mov {}, {byte:#04x}", Register::AL), Self::MOV_AX0v(word) => write!(f, "mov {}, {word:#04x}", Register::AX), @@ -492,13 +489,13 @@ impl fmt::Display for Mnemonic { Self::HLT => write!(f, "hlt"), - Self::ROL_b(target, byte) => write!(f, "rol byte {target}, {byte:#04x}"), - Self::ROR_b(target, byte) => write!(f, "ror byte {target}, {byte:#04x}"), - Self::RCL_b(target, byte) => write!(f, "rcl byte {target}, {byte:#04x}"), - Self::RCR_b(target, byte) => write!(f, "rcr byte {target}, {byte:#04x}"), - Self::SHL_b(target, byte) => write!(f, "shl byte {target}, {byte:#04x}"), - Self::SHR_b(target, byte) => write!(f, "shr byte {target}, {byte:#04x}"), - Self::SAR_b(target, byte) => write!(f, "sar byte {target}, {byte:#04x}"), + Self::ROL_b(target, byte) => write!(f, "rol byte ptr {target}, {byte:#04x}"), + Self::ROR_b(target, byte) => write!(f, "ror byte ptr {target}, {byte:#04x}"), + Self::RCL_b(target, byte) => write!(f, "rcl byte ptr {target}, {byte:#04x}"), + Self::RCR_b(target, byte) => write!(f, "rcr byte ptr {target}, {byte:#04x}"), + Self::SHL_b(target, byte) => write!(f, "shl byte ptr {target}, {byte:#04x}"), + Self::SHR_b(target, byte) => write!(f, "shr byte ptr {target}, {byte:#04x}"), + Self::SAR_b(target, byte) => write!(f, "sar byte ptr {target}, {byte:#04x}"), Self::ROL_fromReg(target, reg) => write!(f, "rol {target}, {reg}"), Self::ROR_fromReg(target, reg) => write!(f, "ror {target}, {reg}"), Self::RCL_fromReg(target, reg) => write!(f, "rcl {target}, {reg}"), @@ -507,13 +504,13 @@ impl fmt::Display for Mnemonic { Self::SHR_fromReg(target, reg) => write!(f, "shr {target}, {reg}"), Self::SAR_fromReg(target, reg) => write!(f, "sar {target}, {reg}"), - Self::IN_AL(byte) => write!(f, "in byte {}, {byte:#04x}", Register::AL), - Self::IN_AX(byte) => write!(f, "in byte {}, {byte:#04x}", Register::AX), + Self::IN_AL(byte) => write!(f, "in byte ptr {}, {byte:#04x}", Register::AL), + Self::IN_AX(byte) => write!(f, "in byte ptr {}, {byte:#04x}", Register::AX), Self::IN_ALDX => write!(f, "in {}, {}", Register::AL, Register::DX), Self::IN_AXDX => write!(f, "in {}, {}", Register::AX, Register::DX), - Self::OUT_AL(byte) => write!(f, "out byte {}, {byte:#04x}", Register::AL), - Self::OUT_AX(byte) => write!(f, "out byte {}, {byte:#04x}", Register::AX), + Self::OUT_AL(byte) => write!(f, "out byte ptr {}, {byte:#04x}", Register::AL), + Self::OUT_AX(byte) => write!(f, "out byte ptr {}, {byte:#04x}", Register::AX), Self::OUT_ALDX => write!(f, "out {}, {}", Register::AL, Register::DX), Self::OUT_AXDX => write!(f, "out {}, {}", Register::AX, Register::DX), diff --git a/src/operands.rs b/src/operands.rs index c28c26d..529ba07 100644 --- a/src/operands.rs +++ b/src/operands.rs @@ -11,7 +11,6 @@ pub type IWord = i16; // used for displacement of memory access pub type DWord = u32; #[derive(Debug, Clone)] -#[allow(dead_code)] /// Encodes either Byte- or Word-sized operands. /// Also sometimes used to decide if an instruction is Byte- or Word-sized, /// which is usually indicated by using a value of 0 and the disregarding diff --git a/src/register.rs b/src/register.rs index 844cd67..8af5586 100644 --- a/src/register.rs +++ b/src/register.rs @@ -4,7 +4,6 @@ use crate::{disasm::DisasmError, operands::Operand}; use core::fmt; #[derive(Debug, Clone, PartialEq, Eq)] -#[allow(dead_code)] /// Registers of a 8086 processor pub enum Register { // 8 bit @@ -33,7 +32,6 @@ pub enum Register { /// Selector for Register or Segment Register pub type RegisterId = u8; -#[allow(dead_code)] impl Register { /// Find the register corresponding to the 8086 bytecode ID pub fn by_id(id: Operand) -> Result { @@ -89,7 +87,6 @@ impl fmt::Display for Register { /// Segment Registers of a 8086 processor #[derive(Debug, Clone, PartialEq, Eq)] -#[allow(dead_code)] pub enum SegmentRegister { DS, ES, @@ -97,7 +94,6 @@ pub enum SegmentRegister { CS, } -#[allow(dead_code)] impl SegmentRegister { /// Find the SRegister corresponding to the 8086 bytecode ID pub fn by_id(id: u8) -> Result {