From 0893969f4e0a4a3723a8d34b26a64bfe592f5c7e Mon Sep 17 00:00:00 2001
From: Marco Thomas <github@marcothms.de>
Date: Wed, 28 May 2025 09:41:40 +0900
Subject: [PATCH] chore: whole swoop of enhanced documentation

---
 README.md            |  32 +++---
 src/aout.rs          |  45 +++++++-
 src/disasm.rs        | 264 ++++++++++++++++++++++---------------------
 src/disasm_macros.rs |   6 +-
 src/instructions.rs  |  31 +++--
 src/operands.rs      |   1 -
 src/register.rs      |   4 -
 7 files changed, 210 insertions(+), 173 deletions(-)
diff --git a/README.md b/README.md
index ed52ffc..8f2623b 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ minix-8086-rs is a Rust-based toolchain for analyzing and interpreting 16-bit 80
 
 It includes:
 - 📦 a.out Parser: Parses legacy MINIX executables.
-- 🛠 8086 Disassembler: Parses 16-bit instructions into an IR and prints them in a `objdump(1)`-style fasion.
+- 🛠 8086 Disassembler: Parses 16-bit instructions into an IR and prints them in a `objdump(1)`-style fashion.
 - 💻 8086 Interpreter: Interprets the 8086 instructions, i.e., the MINIX binary.
 
 ## Usage
@@ -19,6 +19,11 @@ Or run it directly:
 cargo run -- --help
 ```
 
+Run with debug output:
+```
+RUST_LOG=debug cargo run -- interpret -p ./a.out
+```
+
 CLI Options:
 ```
 $ cargo run -- --help
@@ -37,32 +42,21 @@ Options:
   -V, --version      Print version                                                     
 ```
 
-## Examples
-
-Disassemble a binary:
-
-```
-cargo run -- disasm -p ./a.out
-```
-
-Interpret a binary:
-```
-cargo run -- interpret -p ./a.out
-```
-
-Run with debug output:
-```
-RUST_LOG=debug cargo run -- interpret -p ./a.out
-```
-
 ## Status
 
 This project is under active development and primarily used by me to explore some Intel disassembly and learn some more Rust.
 Expect bugs and some missing features.
+I mainly test with 'official' binaries from the MINIX source tree.
 
 
 ## Documentation
 
+The documentation of the project itself can be accessed by using `cargo doc`.
+```
+$ cargo doc
+$ firefox target/doc/minix_8086_rs/index.html 
+```
+
 For the implementation of all instructions I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix.
 
 
diff --git a/src/aout.rs b/src/aout.rs
index 80417ed..2cac670 100644
--- a/src/aout.rs
+++ b/src/aout.rs
@@ -1,12 +1,12 @@
 //! Internal a.out File abstraction.
 
+use core::fmt;
 use std::ffi::{c_uchar, c_ushort};
 
 #[allow(non_camel_case_types)]
 pub type c_long = i32; // we use a a.out with 32 byte
 
 #[derive(Debug)]
-#[allow(dead_code)]
 /// Internal representation of the a.out binary format.
 pub struct Aout {
     pub header: Header,
@@ -14,6 +14,14 @@ pub struct Aout {
     pub data: Vec<u8>,
 }
 
+impl fmt::Display for Aout {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Header: {:#?}\n", self.header).unwrap();
+        write!(f, "Text: {:#?}\n", self.text).unwrap();
+        write!(f, "Data: {:#?}\n", self.data)
+    }
+}
+
 impl Aout {
     pub fn new(buf: Vec<u8>) -> Self {
         let hdr = Header {
@@ -48,7 +56,6 @@ impl Aout {
 }
 
 #[derive(Debug)]
-#[allow(dead_code)]
 pub struct Header {
     pub magic: [c_uchar; 2], // magic number
     pub flags: c_uchar,      // flags, see below
@@ -63,3 +70,37 @@ pub struct Header {
     pub total: c_long,       // total memory allocated
     pub syms: c_long,        // size of symbol table
 }
+
+impl fmt::Display for Header {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "Header:
+    Magic: {:02X?}
+    Flags: {:#04X}
+    CPU: {}
+    Header Length: {}
+    Unused: {}
+    Version: {:#06X}
+    Text Size: {}
+    Data Size: {}
+    BSS Size: {}
+    Entry Point: {:#X}
+    Total Allocated: {}
+    Symbol Table Size: {}
+",
+            self.magic,
+            self.flags,
+            self.cpu,
+            self.hdrlen,
+            self.unused,
+            self.version,
+            self.text,
+            self.data,
+            self.bss,
+            self.entry,
+            self.total,
+            self.syms
+        )
+    }
+}
diff --git a/src/disasm.rs b/src/disasm.rs
index 3a5f676..3c94964 100644
--- a/src/disasm.rs
+++ b/src/disasm.rs
@@ -9,7 +9,7 @@ use crate::{
     Args,
     instructions::{Instruction, Mnemonic},
 };
-use crate::{modrm_instruction_sregister, modrm_instruction_wordwidth, modrm_target_bytewidth};
+use crate::{modrm_8b_register, modrm_16b_register, modrm_sregister};
 use core::fmt;
 use std::{fs::File, io::Read, process::exit};
 
@@ -103,8 +103,37 @@ impl Disassembler {
         }
     }
 
-    /// Parse a single byte of binary, return it and advance the offset.
-    /// Returns the read byte.
+    /// Start the disassmble and allow for some error handling wrapped around
+    /// the actual decoding function.
+    pub fn disassemble(&mut self) -> Result<Vec<Instruction>, DisasmError> {
+        let is_ok = self.decode_instructions();
+
+        // a.out pads the text section to byte align, so the fasely interpreted
+        // instructions have to be removed.
+        self.remove_trailing_padding();
+
+        // read instructions from disassembler object instead of decode function
+        // to allow some error's to act as warnings (see below)
+        let instructions = self.instructions.clone();
+
+        // allow for warning-type errors to pass through, as they are not fatal
+        match is_ok {
+            Ok(_) => Ok(instructions),
+            Err(e) => match e {
+                DisasmError::EndOfTextSection => {
+                    log::debug!("Solo padded 0-byte at end of file was found. Ignoring.");
+                    Ok(instructions)
+                }
+                _ => {
+                    println!("Encountered error during disassembly: {e}");
+                    Err(e)
+                }
+            },
+        }
+    }
+
+    /// Parse a single byte of the binary and advance the offset.
+    /// Returns the read byte (Intel b operand).
     fn parse_byte(&mut self) -> Result<Byte, DisasmError> {
         log::debug!("Attempting to parse byte at {:#04x} ...", self.offset);
         // check if the byte would be out of bounds
@@ -130,9 +159,9 @@ impl Disassembler {
         Ok(*byte)
     }
 
-    /// Parse a single word of binary.
+    /// Parse a single word of the binary and advance the offset.
     /// Just a wrapper for parsing a byte twice.
-    /// Returns the read word.
+    /// Returns the read word (Intel w/v operand).
     fn parse_word(&mut self) -> Result<Word, DisasmError> {
         log::debug!("Attempting to parse word at {:#04x} ...", self.offset);
         let byte1 = self.parse_byte()?;
@@ -140,9 +169,10 @@ impl Disassembler {
         Ok(u16::from_le_bytes([byte1, byte2]))
     }
 
-    /// Parse a single byte of binary and interpret as as signed.
-    /// The isize contains a relative offset to be added to the address
-    /// of the subsequent instruction.
+    /// Parse a single of the binary, interpret it as signed and advance the
+    /// offset.
+    /// Returns the read byte added to the address of the subsequent instruction
+    /// to act as a relative offset (Intel Jb operand).
     fn parse_j_byte(&mut self) -> Result<isize, DisasmError> {
         log::debug!("Attempting to parse Jb at {:#04x} ...", self.offset);
         // first interpret as 2-complement, then cast for addition
@@ -155,9 +185,10 @@ impl Disassembler {
         Ok(byte + next_addr)
     }
 
-    /// Parse a single byte of binary and interpret as signed.
-    /// The isize contains a relative offset to be added to the address
-    /// of the subsequent instruction.
+    /// Parse a word of the binary, interpret it as signed and advance the
+    /// offset.
+    /// Returns the read word added to the address of the subsequent instruction
+    /// to act as a relative offset (Intel Jw/Jv operand).
     pub fn parse_j_word(&mut self) -> Result<isize, DisasmError> {
         log::debug!("Attempting to parse Jv at {:#04x} ...", self.offset);
         // first interpret as 2-complement, then cast for addition
@@ -170,7 +201,10 @@ impl Disassembler {
         Ok(word + next_addr)
     }
 
-    /// Parse a pointer type.
+    /// Parse a single pointer of the binary and advance the offset.
+    /// Just a wrapper for parsing a byte 4 types and constructing a pointer
+    /// type.
+    /// Returns the read pointer (Intel p operand).
     fn parse_ptr(&mut self) -> Result<Pointer, DisasmError> {
         log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset);
         let byte0 = self.parse_byte()?;
@@ -185,24 +219,30 @@ impl Disassembler {
         })
     }
 
-    /// Takes in a modrm byte and returns mod, reg and r/m.
-    fn deconstruct_modrm_byte(modrm: u8) -> (u8, u8, u8) {
-        let mode = (modrm >> 6) & 0b11;
-        let reg = (modrm >> 3) & 0b111;
-        let rm = modrm & 0b111;
-
-        (mode, reg, rm)
+    /// Parse an Mp Operand (Memory Pointer).
+    /// An Mp is a ModRM byte with the `reg` bits ignored and an additional
+    /// 2 [`Word`]s parsed for a [`Pointer`] type.
+    fn parse_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> {
+        let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?;
+        let ptr = self.parse_ptr()?;
+        Ok((target, ptr))
     }
 
-    /// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset.
-    /// Returns the parsed modrm target and the source register
+    /// Parse a single ModRM byte, calculate the [`ModRmTarget`] (Memory or
+    /// Register) from that byte and advance the offset.
+    /// It is always just a single byte, even for word-width instructions.
+    /// Returns the [`ModRmTarget`] (either memory or a register) as well as the
+    /// `reg` bitfield, which will later be used to determine another register
+    /// or even mnemonic in the group-type instructions.
     fn parse_modrm_byte(
         &mut self,
         register_width: Operand,
     ) -> Result<(ModRmTarget, RegisterId), DisasmError> {
         let modrm = self.parse_byte()?;
 
-        let (mode, reg, rm) = Self::deconstruct_modrm_byte(modrm);
+        let mode = (modrm >> 6) & 0b11;
+        let reg = (modrm >> 3) & 0b111;
+        let rm = modrm & 0b111;
 
         log::debug!(
             "{:#04x} deconstructed into: {:#b}, {:#b}, {:#b}",
@@ -304,17 +344,17 @@ impl Disassembler {
         Ok((ModRmTarget::Memory(index), reg))
     }
 
-    /// Match the modrm reg bits to the GPR1 mnemonics.
-    /// Group 1 always have an ModRM target (all modrm bits, without reg) as
-    /// first and an imm value as second operand (which has to be parsed before
-    /// call to this function), but is available in both Byte and Word length.
+    /// Match the ModRM `reg` bitfield to Intel Group 1-type instructions. Group
+    /// 1 always has an [`ModRmTarget`] as first and a [`Register`] as second
+    /// operand, which is determined by the ModRM `reg` field, aswell as the
+    /// bit-width of the instruction currently being parsed.
     fn modrm_reg_to_grp1(
-        modrm_reg_byte: u8,
+        reg: u8,
         target: ModRmTarget,
-        register_id: Operand,
+        instruction_width: Operand,
     ) -> Result<Mnemonic, DisasmError> {
-        match register_id {
-            Operand::Byte(b) => match modrm_reg_byte {
+        match instruction_width {
+            Operand::Byte(b) => match reg {
                 0b000 => Ok(Mnemonic::ADD_Ib(target, b)),
                 0b001 => Ok(Mnemonic::OR_Ib(target, b)),
                 0b010 => Ok(Mnemonic::ADC_Ib(target, b)),
@@ -323,9 +363,9 @@ impl Disassembler {
                 0b101 => Ok(Mnemonic::SUB_Ib(target, b)),
                 0b110 => Ok(Mnemonic::XOR_Ib(target, b)),
                 0b111 => Ok(Mnemonic::CMP_Ib(target, b)),
-                _ => return Err(DisasmError::IllegalGroupMnemonic(1, modrm_reg_byte)),
+                _ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)),
             },
-            Operand::Word(w) => match modrm_reg_byte {
+            Operand::Word(w) => match reg {
                 0b000 => Ok(Mnemonic::ADD_Iv(target, w)),
                 0b001 => Ok(Mnemonic::OR_Iv(target, w)),
                 0b010 => Ok(Mnemonic::ADC_Iv(target, w)),
@@ -334,15 +374,16 @@ impl Disassembler {
                 0b101 => Ok(Mnemonic::SUB_Iv(target, w)),
                 0b110 => Ok(Mnemonic::XOR_Iv(target, w)),
                 0b111 => Ok(Mnemonic::CMP_Iv(target, w)),
-                _ => return Err(DisasmError::IllegalGroupMnemonic(1, modrm_reg_byte)),
+                _ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)),
             },
         }
     }
 
-    /// Match the modrm reg bits to the GPR2 mnemonics.
-    /// Group 2 only has a single operand, the other one is either a constant
-    /// 1 (not present in the binary) or the CL register.
-    /// This function assumes the operand to be 1
+    /// Match the ModRM `reg` bits to Intel Group 2-type instructions. Group 2
+    /// always only has a single operand, the other is either `1` or the `CL`
+    /// register.
+    /// This function assumes the operand to be `1`.
+    /// See [`Self::modrm_reg_to_grp2_cl`] for the counter part.
     fn modrm_reg_to_grp2_1(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
         match reg {
             0b000 => Ok(Mnemonic::ROL_b(target, 1)),
@@ -357,10 +398,11 @@ impl Disassembler {
         }
     }
 
-    /// Match the modrm reg bits to the GPR2 mnemonics.
-    /// Group 2 only has a single operand, the other one is either a constant
-    /// 1 (not present in the binary) or the CL register.
-    /// This function assumes the operand to be CL register.
+    /// Match the ModRM `reg` bits to Intel Group 2-type instructions. Group 2
+    /// always only has a single operand, the other is either `1` or the `CL`
+    /// register.
+    /// This function assumes the operand to be [`Register::CL`].
+    /// See [`Self::modrm_reg_to_grp2_cl`] for the counter part.
     fn modrm_reg_to_grp2_cl(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
         match reg {
             0b000 => Ok(Mnemonic::ROL_fromReg(target, Register::CL)),
@@ -375,9 +417,9 @@ impl Disassembler {
         }
     }
 
-    /// Match the modrm reg bits to the GPR3a/b mnemonics.
-    /// Group 3 only has a single operand, which is the ModRmTarget selected
-    /// by modrm bits.
+    /// Match the ModRM `reg` bits to Intel Group 3a/b-type instructions.
+    /// Group 3 selects an unary mnemonic with the `reg` bit fiels. The operand
+    /// is the [`ModRmTarget`].
     fn modrm_reg_to_grp3(
         &mut self,
         reg: u8,
@@ -400,21 +442,12 @@ impl Disassembler {
         }
     }
 
-    /// Parse an Mp Operand (Memory Pointer).
-    /// An Mp is a ModRM byte with the `reg` bits ignored and an additional
-    /// 2 words parsed for a `Pointer` type.
-    fn modrm_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> {
-        let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?;
-        let ptr = self.parse_ptr()?;
-        Ok((target, ptr))
-    }
-
     /// a.out pads the text section with 0x00 bytes. During parsing, these get
     /// interpreted as `0x00 0x00`, which have to get removed for an authentic
     /// disassembly.
     /// This is done in favor of removing all 0x00 bytes in the beginning,
-    /// as this could remove an actual 0x00 byte as operand of the final
-    /// instruction. Of course, this could remove an actual `0x00 0x00`
+    /// as this could remove an actual `0x00` byte as operand of the final
+    /// real instruction. Of course, this could remove an actual `0x00 0x00`
     /// instruction from the end, but they would not have any effect on
     /// execution anyway.
     fn remove_trailing_padding(&mut self) {
@@ -441,33 +474,10 @@ impl Disassembler {
         self.instructions.truncate(until);
     }
 
-    /// Start the disassmble and allow for some error handling wrapped around
-    /// the actual decoding function.
-    pub fn disassemble(&mut self) -> Result<Vec<Instruction>, DisasmError> {
-        let parsing = self.decode_instructions();
-
-        // a.out pads the text section to byte align, so the fasely interpreted
-        // instructions have to be removed.
-        self.remove_trailing_padding();
-        let instructions = self.instructions.clone();
-
-        // allow for warning-type errors to pass through, as they are not fatal
-        match parsing {
-            Ok(_) => Ok(instructions),
-            Err(e) => match e {
-                DisasmError::EndOfTextSection => {
-                    log::debug!("Solo padded 0-byte at end of file was found. Ignoring.");
-                    Ok(instructions)
-                }
-                _ => {
-                    println!("Encountered error during disassembly: {e}");
-                    Err(e)
-                }
-            },
-        }
-    }
-
-    /// Decode instructions by matching their byte signature to their mnemonics.
+    /// Decode instructions by matching byte signature to their mnemonics and
+    /// depending on the instruction, parsing some operands afterwards.
+    /// All parsing is done in capsulated functions, here everything just
+    /// gets consolodated.
     fn decode_instructions(&mut self) -> Result<(), DisasmError> {
         log::debug!("Starting to decode text of length {}", self.text.len());
         while self.offset < self.text.len() {
@@ -482,20 +492,20 @@ impl Disassembler {
             self.instruction.raw.push(opcode);
 
             self.instruction.opcode = match opcode {
-                0x00 => modrm_target_bytewidth!(self, ADD_FromReg),
-                0x01 => modrm_instruction_wordwidth!(self, ADD_FromReg),
-                0x02 => modrm_target_bytewidth!(self, ADD_ToReg),
-                0x03 => modrm_instruction_wordwidth!(self, ADD_ToReg),
+                0x00 => modrm_8b_register!(self, ADD_FromReg),
+                0x01 => modrm_16b_register!(self, ADD_FromReg),
+                0x02 => modrm_8b_register!(self, ADD_ToReg),
+                0x03 => modrm_16b_register!(self, ADD_ToReg),
                 0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?),
                 0x05 => Mnemonic::ADD_AXIv(self.parse_word()?),
 
                 0x06 => Mnemonic::PUSH_S(SegmentRegister::ES),
                 0x07 => Mnemonic::POP_S(SegmentRegister::ES),
 
-                0x08 => modrm_target_bytewidth!(self, OR_FromReg),
-                0x09 => modrm_instruction_wordwidth!(self, OR_FromReg),
-                0x0A => modrm_target_bytewidth!(self, OR_ToReg),
-                0x0B => modrm_instruction_wordwidth!(self, OR_ToReg),
+                0x08 => modrm_8b_register!(self, OR_FromReg),
+                0x09 => modrm_16b_register!(self, OR_FromReg),
+                0x0A => modrm_8b_register!(self, OR_ToReg),
+                0x0B => modrm_16b_register!(self, OR_ToReg),
                 0x0C => Mnemonic::OR_ALIb(self.parse_byte()?),
                 0x0D => Mnemonic::OR_AXIv(self.parse_word()?),
 
@@ -503,60 +513,60 @@ impl Disassembler {
 
                 0x0F => return Err(DisasmError::OpcodeUndefined(opcode)),
 
-                0x10 => modrm_target_bytewidth!(self, ADC_FromReg),
-                0x11 => modrm_instruction_wordwidth!(self, ADC_FromReg),
-                0x12 => modrm_target_bytewidth!(self, ADC_ToReg),
-                0x13 => modrm_instruction_wordwidth!(self, ADC_ToReg),
+                0x10 => modrm_8b_register!(self, ADC_FromReg),
+                0x11 => modrm_16b_register!(self, ADC_FromReg),
+                0x12 => modrm_8b_register!(self, ADC_ToReg),
+                0x13 => modrm_16b_register!(self, ADC_ToReg),
                 0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?),
                 0x15 => Mnemonic::ADC_AXIv(self.parse_word()?),
 
                 0x16 => Mnemonic::PUSH_S(SegmentRegister::SS),
                 0x17 => Mnemonic::POP_S(SegmentRegister::SS),
 
-                0x18 => modrm_target_bytewidth!(self, SBB_FromReg),
-                0x19 => modrm_instruction_wordwidth!(self, SBB_FromReg),
-                0x1A => modrm_target_bytewidth!(self, SBB_ToReg),
-                0x1B => modrm_instruction_wordwidth!(self, SBB_ToReg),
+                0x18 => modrm_8b_register!(self, SBB_FromReg),
+                0x19 => modrm_16b_register!(self, SBB_FromReg),
+                0x1A => modrm_8b_register!(self, SBB_ToReg),
+                0x1B => modrm_16b_register!(self, SBB_ToReg),
                 0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?),
                 0x1D => Mnemonic::SBB_AXIv(self.parse_word()?),
 
                 0x1E => Mnemonic::PUSH_S(SegmentRegister::DS),
                 0x1F => Mnemonic::POP_S(SegmentRegister::DS),
 
-                0x20 => modrm_target_bytewidth!(self, AND_FromReg),
-                0x21 => modrm_instruction_wordwidth!(self, AND_FromReg),
-                0x22 => modrm_target_bytewidth!(self, AND_ToReg),
-                0x23 => modrm_instruction_wordwidth!(self, AND_ToReg),
+                0x20 => modrm_8b_register!(self, AND_FromReg),
+                0x21 => modrm_16b_register!(self, AND_FromReg),
+                0x22 => modrm_8b_register!(self, AND_ToReg),
+                0x23 => modrm_16b_register!(self, AND_ToReg),
                 0x24 => Mnemonic::AND_ALIb(self.parse_byte()?),
                 0x25 => Mnemonic::AND_AXIv(self.parse_word()?),
 
                 0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES),
                 0x27 => Mnemonic::DAA,
 
-                0x28 => modrm_target_bytewidth!(self, SUB_FromReg),
-                0x29 => modrm_instruction_wordwidth!(self, SUB_FromReg),
-                0x2A => modrm_target_bytewidth!(self, SUB_ToReg),
-                0x2B => modrm_instruction_wordwidth!(self, SUB_ToReg),
+                0x28 => modrm_8b_register!(self, SUB_FromReg),
+                0x29 => modrm_16b_register!(self, SUB_FromReg),
+                0x2A => modrm_8b_register!(self, SUB_ToReg),
+                0x2B => modrm_16b_register!(self, SUB_ToReg),
                 0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?),
                 0x2D => Mnemonic::SUB_AXIv(self.parse_word()?),
 
                 0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS),
                 0x2F => Mnemonic::DAS,
 
-                0x30 => modrm_target_bytewidth!(self, XOR_FromReg),
-                0x31 => modrm_instruction_wordwidth!(self, XOR_FromReg),
-                0x32 => modrm_target_bytewidth!(self, XOR_ToReg),
-                0x33 => modrm_instruction_wordwidth!(self, XOR_ToReg),
+                0x30 => modrm_8b_register!(self, XOR_FromReg),
+                0x31 => modrm_16b_register!(self, XOR_FromReg),
+                0x32 => modrm_8b_register!(self, XOR_ToReg),
+                0x33 => modrm_16b_register!(self, XOR_ToReg),
                 0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?),
                 0x35 => Mnemonic::XOR_AXIv(self.parse_word()?),
 
                 0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS),
                 0x37 => Mnemonic::AAA,
 
-                0x38 => modrm_target_bytewidth!(self, CMP_FromReg),
-                0x39 => modrm_instruction_wordwidth!(self, CMP_FromReg),
-                0x3A => modrm_target_bytewidth!(self, CMP_ToReg),
-                0x3B => modrm_instruction_wordwidth!(self, CMP_ToReg),
+                0x38 => modrm_8b_register!(self, CMP_FromReg),
+                0x39 => modrm_16b_register!(self, CMP_FromReg),
+                0x3A => modrm_8b_register!(self, CMP_ToReg),
+                0x3B => modrm_16b_register!(self, CMP_ToReg),
                 0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?),
                 0x3D => Mnemonic::CMP_AXIv(self.parse_word()?),
 
@@ -642,20 +652,20 @@ impl Disassembler {
                     Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))?
                 }
 
-                0x84 => modrm_target_bytewidth!(self, TEST),
-                0x85 => modrm_instruction_wordwidth!(self, TEST),
+                0x84 => modrm_8b_register!(self, TEST),
+                0x85 => modrm_16b_register!(self, TEST),
 
-                0x86 => modrm_target_bytewidth!(self, XCHG),
-                0x87 => modrm_instruction_wordwidth!(self, XCHG),
+                0x86 => modrm_8b_register!(self, XCHG),
+                0x87 => modrm_16b_register!(self, XCHG),
 
-                0x88 => modrm_target_bytewidth!(self, MOV_FromReg),
-                0x89 => modrm_instruction_wordwidth!(self, MOV_FromReg),
-                0x8A => modrm_target_bytewidth!(self, MOV_ToReg),
-                0x8B => modrm_instruction_wordwidth!(self, MOV_ToReg),
-                0x8C => modrm_instruction_sregister!(self, MOV_FromSReg),
-                0x8E => modrm_instruction_sregister!(self, MOV_ToSReg),
+                0x88 => modrm_8b_register!(self, MOV_FromReg),
+                0x89 => modrm_16b_register!(self, MOV_FromReg),
+                0x8A => modrm_8b_register!(self, MOV_ToReg),
+                0x8B => modrm_16b_register!(self, MOV_ToReg),
+                0x8C => modrm_sregister!(self, MOV_FromSReg),
+                0x8E => modrm_sregister!(self, MOV_ToSReg),
 
-                0x8D => modrm_instruction_wordwidth!(self, LEA),
+                0x8D => modrm_16b_register!(self, LEA),
 
                 0x8F => {
                     let (target, _) = self.parse_modrm_byte(Operand::Word(0))?;
@@ -734,11 +744,11 @@ impl Disassembler {
                 0xC3 => Mnemonic::RET,
 
                 0xC4 => {
-                    let (target, ptr) = self.modrm_mp()?;
+                    let (target, ptr) = self.parse_mp()?;
                     Mnemonic::LES(target, ptr)
                 }
                 0xC5 => {
-                    let (target, ptr) = self.modrm_mp()?;
+                    let (target, ptr) = self.parse_mp()?;
                     Mnemonic::LDS(target, ptr)
                 }
 
diff --git a/src/disasm_macros.rs b/src/disasm_macros.rs
index c126806..bdfcdee 100644
--- a/src/disasm_macros.rs
+++ b/src/disasm_macros.rs
@@ -2,7 +2,7 @@
 
 #[macro_export]
 /// Generate a Mnemonic for an 8-bit Register from a ModRM byte.
-macro_rules! modrm_target_bytewidth {
+macro_rules! modrm_8b_register {
     ($self:ident, $variant:ident) => {{
         let (target, reg) = $self.parse_modrm_byte(Operand::Byte(0))?;
         Mnemonic::$variant(target, Register::by_id(Operand::Byte(reg))?)
@@ -11,7 +11,7 @@ macro_rules! modrm_target_bytewidth {
 
 #[macro_export]
 /// Generate a Mnemonic for a 16-bit Register from a ModRM byte.
-macro_rules! modrm_instruction_wordwidth {
+macro_rules! modrm_16b_register {
     ($self:ident, $variant:ident) => {{
         let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?;
         Mnemonic::$variant(target, Register::by_id(Operand::Word(reg.into()))?)
@@ -20,7 +20,7 @@ macro_rules! modrm_instruction_wordwidth {
 
 #[macro_export]
 /// Generate a Mnemonic for a 16-bit Segment Register from a ModRM byte.
-macro_rules! modrm_instruction_sregister {
+macro_rules! modrm_sregister {
     ($self:ident, $variant:ident) => {{
         let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?;
         Mnemonic::$variant(target, SegmentRegister::by_id(reg)?)
diff --git a/src/instructions.rs b/src/instructions.rs
index fb858d5..8badb48 100644
--- a/src/instructions.rs
+++ b/src/instructions.rs
@@ -7,7 +7,6 @@ use crate::{
 use core::fmt;
 
 #[derive(Debug, Clone, Eq, PartialEq)]
-#[allow(dead_code)]
 /// A single 'line' of executable ASM is called an Instruction, which
 /// contains the `Mnemonic` that will be executed, alongside its starting offset
 /// and the raw parsed bytes
@@ -47,7 +46,7 @@ impl fmt::Display for Instruction {
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
-#[allow(dead_code, non_camel_case_types)]
+#[allow(non_camel_case_types)]
 /// All possible mnemonic variantions.
 /// These are sorted by type and are not in hex-encoding order.
 // XXX: convert this copy and paste horror in a proc macro like
@@ -296,8 +295,6 @@ pub enum Mnemonic {
     AAD(Byte),
     // MISC
     XLAT,
-    // Not part of 8086:
-    EOT, // End of Text Section
 }
 
 impl fmt::Display for Mnemonic {
@@ -414,8 +411,8 @@ impl fmt::Display for Mnemonic {
             Self::MOV_ToReg(target, reg) => write!(f, "mov {reg}, {target}"),
             Self::MOV_FromSReg(target, reg) => write!(f, "mov {target}, {reg}"),
             Self::MOV_ToSReg(target, reg) => write!(f, "mov {reg}, {target}"),
-            Self::MOV_Ib(target, byte) => write!(f, "mov byte {target}, {byte:#04x}"),
-            Self::MOV_Iv(target, word) => write!(f, "mov word {target}, {word:#04x}"),
+            Self::MOV_Ib(target, byte) => write!(f, "mov byte ptr {target}, {byte:#04x}"),
+            Self::MOV_Iv(target, word) => write!(f, "mov word ptr {target}, {word:#04x}"),
 
             Self::MOV_AL0b(byte) => write!(f, "mov {}, {byte:#04x}", Register::AL),
             Self::MOV_AX0v(word) => write!(f, "mov {}, {word:#04x}", Register::AX),
@@ -492,13 +489,13 @@ impl fmt::Display for Mnemonic {
 
             Self::HLT => write!(f, "hlt"),
 
-            Self::ROL_b(target, byte) => write!(f, "rol byte {target}, {byte:#04x}"),
-            Self::ROR_b(target, byte) => write!(f, "ror byte {target}, {byte:#04x}"),
-            Self::RCL_b(target, byte) => write!(f, "rcl byte {target}, {byte:#04x}"),
-            Self::RCR_b(target, byte) => write!(f, "rcr byte {target}, {byte:#04x}"),
-            Self::SHL_b(target, byte) => write!(f, "shl byte {target}, {byte:#04x}"),
-            Self::SHR_b(target, byte) => write!(f, "shr byte {target}, {byte:#04x}"),
-            Self::SAR_b(target, byte) => write!(f, "sar byte {target}, {byte:#04x}"),
+            Self::ROL_b(target, byte) => write!(f, "rol byte ptr {target}, {byte:#04x}"),
+            Self::ROR_b(target, byte) => write!(f, "ror byte ptr {target}, {byte:#04x}"),
+            Self::RCL_b(target, byte) => write!(f, "rcl byte ptr {target}, {byte:#04x}"),
+            Self::RCR_b(target, byte) => write!(f, "rcr byte ptr {target}, {byte:#04x}"),
+            Self::SHL_b(target, byte) => write!(f, "shl byte ptr {target}, {byte:#04x}"),
+            Self::SHR_b(target, byte) => write!(f, "shr byte ptr {target}, {byte:#04x}"),
+            Self::SAR_b(target, byte) => write!(f, "sar byte ptr {target}, {byte:#04x}"),
             Self::ROL_fromReg(target, reg) => write!(f, "rol {target}, {reg}"),
             Self::ROR_fromReg(target, reg) => write!(f, "ror {target}, {reg}"),
             Self::RCL_fromReg(target, reg) => write!(f, "rcl {target}, {reg}"),
@@ -507,13 +504,13 @@ impl fmt::Display for Mnemonic {
             Self::SHR_fromReg(target, reg) => write!(f, "shr {target}, {reg}"),
             Self::SAR_fromReg(target, reg) => write!(f, "sar {target}, {reg}"),
 
-            Self::IN_AL(byte) => write!(f, "in byte {}, {byte:#04x}", Register::AL),
-            Self::IN_AX(byte) => write!(f, "in byte {}, {byte:#04x}", Register::AX),
+            Self::IN_AL(byte) => write!(f, "in byte ptr {}, {byte:#04x}", Register::AL),
+            Self::IN_AX(byte) => write!(f, "in byte ptr {}, {byte:#04x}", Register::AX),
             Self::IN_ALDX => write!(f, "in {}, {}", Register::AL, Register::DX),
             Self::IN_AXDX => write!(f, "in {}, {}", Register::AX, Register::DX),
 
-            Self::OUT_AL(byte) => write!(f, "out byte {}, {byte:#04x}", Register::AL),
-            Self::OUT_AX(byte) => write!(f, "out byte {}, {byte:#04x}", Register::AX),
+            Self::OUT_AL(byte) => write!(f, "out byte ptr {}, {byte:#04x}", Register::AL),
+            Self::OUT_AX(byte) => write!(f, "out byte ptr {}, {byte:#04x}", Register::AX),
             Self::OUT_ALDX => write!(f, "out {}, {}", Register::AL, Register::DX),
             Self::OUT_AXDX => write!(f, "out {}, {}", Register::AX, Register::DX),
 
diff --git a/src/operands.rs b/src/operands.rs
index c28c26d..529ba07 100644
--- a/src/operands.rs
+++ b/src/operands.rs
@@ -11,7 +11,6 @@ pub type IWord = i16; // used for displacement of memory access
 pub type DWord = u32;
 
 #[derive(Debug, Clone)]
-#[allow(dead_code)]
 /// Encodes either Byte- or Word-sized operands.
 /// Also sometimes used to decide if an instruction is Byte- or Word-sized,
 /// which is usually indicated by using a value of 0 and the disregarding
diff --git a/src/register.rs b/src/register.rs
index 844cd67..8af5586 100644
--- a/src/register.rs
+++ b/src/register.rs
@@ -4,7 +4,6 @@ use crate::{disasm::DisasmError, operands::Operand};
 use core::fmt;
 
 #[derive(Debug, Clone, PartialEq, Eq)]
-#[allow(dead_code)]
 /// Registers of a 8086 processor
 pub enum Register {
     // 8 bit
@@ -33,7 +32,6 @@ pub enum Register {
 /// Selector for Register or Segment Register
 pub type RegisterId = u8;
 
-#[allow(dead_code)]
 impl Register {
     /// Find the register corresponding to the 8086 bytecode ID
     pub fn by_id(id: Operand) -> Result<Self, DisasmError> {
@@ -89,7 +87,6 @@ impl fmt::Display for Register {
 
 /// Segment Registers of a 8086 processor
 #[derive(Debug, Clone, PartialEq, Eq)]
-#[allow(dead_code)]
 pub enum SegmentRegister {
     DS,
     ES,
@@ -97,7 +94,6 @@ pub enum SegmentRegister {
     CS,
 }
 
-#[allow(dead_code)]
 impl SegmentRegister {
     /// Find the SRegister corresponding to the 8086 bytecode ID
     pub fn by_id(id: u8) -> Result<Self, DisasmError> {