chore: whole swoop of enhanced documentation

This commit is contained in:
2025-05-28 09:41:40 +09:00
parent 322a276617
commit 0893969f4e
7 changed files with 210 additions and 173 deletions

View File

@@ -4,7 +4,7 @@ minix-8086-rs is a Rust-based toolchain for analyzing and interpreting 16-bit 80
It includes: It includes:
- 📦 a.out Parser: Parses legacy MINIX executables. - 📦 a.out Parser: Parses legacy MINIX executables.
- 🛠 8086 Disassembler: Parses 16-bit instructions into an IR and prints them in a `objdump(1)`-style fasion. - 🛠 8086 Disassembler: Parses 16-bit instructions into an IR and prints them in a `objdump(1)`-style fashion.
- 💻 8086 Interpreter: Interprets the 8086 instructions, i.e., the MINIX binary. - 💻 8086 Interpreter: Interprets the 8086 instructions, i.e., the MINIX binary.
## Usage ## Usage
@@ -19,6 +19,11 @@ Or run it directly:
cargo run -- --help cargo run -- --help
``` ```
Run with debug output:
```
RUST_LOG=debug cargo run -- interpret -p ./a.out
```
CLI Options: CLI Options:
``` ```
$ cargo run -- --help $ cargo run -- --help
@@ -37,32 +42,21 @@ Options:
-V, --version Print version -V, --version Print version
``` ```
## Examples
Disassemble a binary:
```
cargo run -- disasm -p ./a.out
```
Interpret a binary:
```
cargo run -- interpret -p ./a.out
```
Run with debug output:
```
RUST_LOG=debug cargo run -- interpret -p ./a.out
```
## Status ## Status
This project is under active development and primarily used by me to explore some Intel disassembly and learn some more Rust. This project is under active development and primarily used by me to explore some Intel disassembly and learn some more Rust.
Expect bugs and some missing features. Expect bugs and some missing features.
I mainly test with 'official' binaries from the MINIX source tree.
## Documentation ## Documentation
The documentation of the project itself can be accessed by using `cargo doc`.
```
$ cargo doc
$ firefox target/doc/minix_8086_rs/index.html
```
For the implementation of all instructions I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix. For the implementation of all instructions I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix.

View File

@@ -1,12 +1,12 @@
//! Internal a.out File abstraction. //! Internal a.out File abstraction.
use core::fmt;
use std::ffi::{c_uchar, c_ushort}; use std::ffi::{c_uchar, c_ushort};
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
pub type c_long = i32; // we use a a.out with 32 byte pub type c_long = i32; // we use a a.out with 32 byte
#[derive(Debug)] #[derive(Debug)]
#[allow(dead_code)]
/// Internal representation of the a.out binary format. /// Internal representation of the a.out binary format.
pub struct Aout { pub struct Aout {
pub header: Header, pub header: Header,
@@ -14,6 +14,14 @@ pub struct Aout {
pub data: Vec<u8>, pub data: Vec<u8>,
} }
impl fmt::Display for Aout {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Header: {:#?}\n", self.header).unwrap();
write!(f, "Text: {:#?}\n", self.text).unwrap();
write!(f, "Data: {:#?}\n", self.data)
}
}
impl Aout { impl Aout {
pub fn new(buf: Vec<u8>) -> Self { pub fn new(buf: Vec<u8>) -> Self {
let hdr = Header { let hdr = Header {
@@ -48,7 +56,6 @@ impl Aout {
} }
#[derive(Debug)] #[derive(Debug)]
#[allow(dead_code)]
pub struct Header { pub struct Header {
pub magic: [c_uchar; 2], // magic number pub magic: [c_uchar; 2], // magic number
pub flags: c_uchar, // flags, see below pub flags: c_uchar, // flags, see below
@@ -63,3 +70,37 @@ pub struct Header {
pub total: c_long, // total memory allocated pub total: c_long, // total memory allocated
pub syms: c_long, // size of symbol table pub syms: c_long, // size of symbol table
} }
impl fmt::Display for Header {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"Header:
Magic: {:02X?}
Flags: {:#04X}
CPU: {}
Header Length: {}
Unused: {}
Version: {:#06X}
Text Size: {}
Data Size: {}
BSS Size: {}
Entry Point: {:#X}
Total Allocated: {}
Symbol Table Size: {}
",
self.magic,
self.flags,
self.cpu,
self.hdrlen,
self.unused,
self.version,
self.text,
self.data,
self.bss,
self.entry,
self.total,
self.syms
)
}
}

View File

@@ -9,7 +9,7 @@ use crate::{
Args, Args,
instructions::{Instruction, Mnemonic}, instructions::{Instruction, Mnemonic},
}; };
use crate::{modrm_instruction_sregister, modrm_instruction_wordwidth, modrm_target_bytewidth}; use crate::{modrm_8b_register, modrm_16b_register, modrm_sregister};
use core::fmt; use core::fmt;
use std::{fs::File, io::Read, process::exit}; use std::{fs::File, io::Read, process::exit};
@@ -103,8 +103,37 @@ impl Disassembler {
} }
} }
/// Parse a single byte of binary, return it and advance the offset. /// Start the disassmble and allow for some error handling wrapped around
/// Returns the read byte. /// the actual decoding function.
pub fn disassemble(&mut self) -> Result<Vec<Instruction>, DisasmError> {
let is_ok = self.decode_instructions();
// a.out pads the text section to byte align, so the fasely interpreted
// instructions have to be removed.
self.remove_trailing_padding();
// read instructions from disassembler object instead of decode function
// to allow some error's to act as warnings (see below)
let instructions = self.instructions.clone();
// allow for warning-type errors to pass through, as they are not fatal
match is_ok {
Ok(_) => Ok(instructions),
Err(e) => match e {
DisasmError::EndOfTextSection => {
log::debug!("Solo padded 0-byte at end of file was found. Ignoring.");
Ok(instructions)
}
_ => {
println!("Encountered error during disassembly: {e}");
Err(e)
}
},
}
}
/// Parse a single byte of the binary and advance the offset.
/// Returns the read byte (Intel b operand).
fn parse_byte(&mut self) -> Result<Byte, DisasmError> { fn parse_byte(&mut self) -> Result<Byte, DisasmError> {
log::debug!("Attempting to parse byte at {:#04x} ...", self.offset); log::debug!("Attempting to parse byte at {:#04x} ...", self.offset);
// check if the byte would be out of bounds // check if the byte would be out of bounds
@@ -130,9 +159,9 @@ impl Disassembler {
Ok(*byte) Ok(*byte)
} }
/// Parse a single word of binary. /// Parse a single word of the binary and advance the offset.
/// Just a wrapper for parsing a byte twice. /// Just a wrapper for parsing a byte twice.
/// Returns the read word. /// Returns the read word (Intel w/v operand).
fn parse_word(&mut self) -> Result<Word, DisasmError> { fn parse_word(&mut self) -> Result<Word, DisasmError> {
log::debug!("Attempting to parse word at {:#04x} ...", self.offset); log::debug!("Attempting to parse word at {:#04x} ...", self.offset);
let byte1 = self.parse_byte()?; let byte1 = self.parse_byte()?;
@@ -140,9 +169,10 @@ impl Disassembler {
Ok(u16::from_le_bytes([byte1, byte2])) Ok(u16::from_le_bytes([byte1, byte2]))
} }
/// Parse a single byte of binary and interpret as as signed. /// Parse a single of the binary, interpret it as signed and advance the
/// The isize contains a relative offset to be added to the address /// offset.
/// of the subsequent instruction. /// Returns the read byte added to the address of the subsequent instruction
/// to act as a relative offset (Intel Jb operand).
fn parse_j_byte(&mut self) -> Result<isize, DisasmError> { fn parse_j_byte(&mut self) -> Result<isize, DisasmError> {
log::debug!("Attempting to parse Jb at {:#04x} ...", self.offset); log::debug!("Attempting to parse Jb at {:#04x} ...", self.offset);
// first interpret as 2-complement, then cast for addition // first interpret as 2-complement, then cast for addition
@@ -155,9 +185,10 @@ impl Disassembler {
Ok(byte + next_addr) Ok(byte + next_addr)
} }
/// Parse a single byte of binary and interpret as signed. /// Parse a word of the binary, interpret it as signed and advance the
/// The isize contains a relative offset to be added to the address /// offset.
/// of the subsequent instruction. /// Returns the read word added to the address of the subsequent instruction
/// to act as a relative offset (Intel Jw/Jv operand).
pub fn parse_j_word(&mut self) -> Result<isize, DisasmError> { pub fn parse_j_word(&mut self) -> Result<isize, DisasmError> {
log::debug!("Attempting to parse Jv at {:#04x} ...", self.offset); log::debug!("Attempting to parse Jv at {:#04x} ...", self.offset);
// first interpret as 2-complement, then cast for addition // first interpret as 2-complement, then cast for addition
@@ -170,7 +201,10 @@ impl Disassembler {
Ok(word + next_addr) Ok(word + next_addr)
} }
/// Parse a pointer type. /// Parse a single pointer of the binary and advance the offset.
/// Just a wrapper for parsing a byte 4 types and constructing a pointer
/// type.
/// Returns the read pointer (Intel p operand).
fn parse_ptr(&mut self) -> Result<Pointer, DisasmError> { fn parse_ptr(&mut self) -> Result<Pointer, DisasmError> {
log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset); log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset);
let byte0 = self.parse_byte()?; let byte0 = self.parse_byte()?;
@@ -185,24 +219,30 @@ impl Disassembler {
}) })
} }
/// Takes in a modrm byte and returns mod, reg and r/m. /// Parse an Mp Operand (Memory Pointer).
fn deconstruct_modrm_byte(modrm: u8) -> (u8, u8, u8) { /// An Mp is a ModRM byte with the `reg` bits ignored and an additional
let mode = (modrm >> 6) & 0b11; /// 2 [`Word`]s parsed for a [`Pointer`] type.
let reg = (modrm >> 3) & 0b111; fn parse_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> {
let rm = modrm & 0b111; let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?;
let ptr = self.parse_ptr()?;
(mode, reg, rm) Ok((target, ptr))
} }
/// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset. /// Parse a single ModRM byte, calculate the [`ModRmTarget`] (Memory or
/// Returns the parsed modrm target and the source register /// Register) from that byte and advance the offset.
/// It is always just a single byte, even for word-width instructions.
/// Returns the [`ModRmTarget`] (either memory or a register) as well as the
/// `reg` bitfield, which will later be used to determine another register
/// or even mnemonic in the group-type instructions.
fn parse_modrm_byte( fn parse_modrm_byte(
&mut self, &mut self,
register_width: Operand, register_width: Operand,
) -> Result<(ModRmTarget, RegisterId), DisasmError> { ) -> Result<(ModRmTarget, RegisterId), DisasmError> {
let modrm = self.parse_byte()?; let modrm = self.parse_byte()?;
let (mode, reg, rm) = Self::deconstruct_modrm_byte(modrm); let mode = (modrm >> 6) & 0b11;
let reg = (modrm >> 3) & 0b111;
let rm = modrm & 0b111;
log::debug!( log::debug!(
"{:#04x} deconstructed into: {:#b}, {:#b}, {:#b}", "{:#04x} deconstructed into: {:#b}, {:#b}, {:#b}",
@@ -304,17 +344,17 @@ impl Disassembler {
Ok((ModRmTarget::Memory(index), reg)) Ok((ModRmTarget::Memory(index), reg))
} }
/// Match the modrm reg bits to the GPR1 mnemonics. /// Match the ModRM `reg` bitfield to Intel Group 1-type instructions. Group
/// Group 1 always have an ModRM target (all modrm bits, without reg) as /// 1 always has an [`ModRmTarget`] as first and a [`Register`] as second
/// first and an imm value as second operand (which has to be parsed before /// operand, which is determined by the ModRM `reg` field, aswell as the
/// call to this function), but is available in both Byte and Word length. /// bit-width of the instruction currently being parsed.
fn modrm_reg_to_grp1( fn modrm_reg_to_grp1(
modrm_reg_byte: u8, reg: u8,
target: ModRmTarget, target: ModRmTarget,
register_id: Operand, instruction_width: Operand,
) -> Result<Mnemonic, DisasmError> { ) -> Result<Mnemonic, DisasmError> {
match register_id { match instruction_width {
Operand::Byte(b) => match modrm_reg_byte { Operand::Byte(b) => match reg {
0b000 => Ok(Mnemonic::ADD_Ib(target, b)), 0b000 => Ok(Mnemonic::ADD_Ib(target, b)),
0b001 => Ok(Mnemonic::OR_Ib(target, b)), 0b001 => Ok(Mnemonic::OR_Ib(target, b)),
0b010 => Ok(Mnemonic::ADC_Ib(target, b)), 0b010 => Ok(Mnemonic::ADC_Ib(target, b)),
@@ -323,9 +363,9 @@ impl Disassembler {
0b101 => Ok(Mnemonic::SUB_Ib(target, b)), 0b101 => Ok(Mnemonic::SUB_Ib(target, b)),
0b110 => Ok(Mnemonic::XOR_Ib(target, b)), 0b110 => Ok(Mnemonic::XOR_Ib(target, b)),
0b111 => Ok(Mnemonic::CMP_Ib(target, b)), 0b111 => Ok(Mnemonic::CMP_Ib(target, b)),
_ => return Err(DisasmError::IllegalGroupMnemonic(1, modrm_reg_byte)), _ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)),
}, },
Operand::Word(w) => match modrm_reg_byte { Operand::Word(w) => match reg {
0b000 => Ok(Mnemonic::ADD_Iv(target, w)), 0b000 => Ok(Mnemonic::ADD_Iv(target, w)),
0b001 => Ok(Mnemonic::OR_Iv(target, w)), 0b001 => Ok(Mnemonic::OR_Iv(target, w)),
0b010 => Ok(Mnemonic::ADC_Iv(target, w)), 0b010 => Ok(Mnemonic::ADC_Iv(target, w)),
@@ -334,15 +374,16 @@ impl Disassembler {
0b101 => Ok(Mnemonic::SUB_Iv(target, w)), 0b101 => Ok(Mnemonic::SUB_Iv(target, w)),
0b110 => Ok(Mnemonic::XOR_Iv(target, w)), 0b110 => Ok(Mnemonic::XOR_Iv(target, w)),
0b111 => Ok(Mnemonic::CMP_Iv(target, w)), 0b111 => Ok(Mnemonic::CMP_Iv(target, w)),
_ => return Err(DisasmError::IllegalGroupMnemonic(1, modrm_reg_byte)), _ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)),
}, },
} }
} }
/// Match the modrm reg bits to the GPR2 mnemonics. /// Match the ModRM `reg` bits to Intel Group 2-type instructions. Group 2
/// Group 2 only has a single operand, the other one is either a constant /// always only has a single operand, the other is either `1` or the `CL`
/// 1 (not present in the binary) or the CL register. /// register.
/// This function assumes the operand to be 1 /// This function assumes the operand to be `1`.
/// See [`Self::modrm_reg_to_grp2_cl`] for the counter part.
fn modrm_reg_to_grp2_1(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> { fn modrm_reg_to_grp2_1(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
match reg { match reg {
0b000 => Ok(Mnemonic::ROL_b(target, 1)), 0b000 => Ok(Mnemonic::ROL_b(target, 1)),
@@ -357,10 +398,11 @@ impl Disassembler {
} }
} }
/// Match the modrm reg bits to the GPR2 mnemonics. /// Match the ModRM `reg` bits to Intel Group 2-type instructions. Group 2
/// Group 2 only has a single operand, the other one is either a constant /// always only has a single operand, the other is either `1` or the `CL`
/// 1 (not present in the binary) or the CL register. /// register.
/// This function assumes the operand to be CL register. /// This function assumes the operand to be [`Register::CL`].
/// See [`Self::modrm_reg_to_grp2_cl`] for the counter part.
fn modrm_reg_to_grp2_cl(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> { fn modrm_reg_to_grp2_cl(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
match reg { match reg {
0b000 => Ok(Mnemonic::ROL_fromReg(target, Register::CL)), 0b000 => Ok(Mnemonic::ROL_fromReg(target, Register::CL)),
@@ -375,9 +417,9 @@ impl Disassembler {
} }
} }
/// Match the modrm reg bits to the GPR3a/b mnemonics. /// Match the ModRM `reg` bits to Intel Group 3a/b-type instructions.
/// Group 3 only has a single operand, which is the ModRmTarget selected /// Group 3 selects an unary mnemonic with the `reg` bit fiels. The operand
/// by modrm bits. /// is the [`ModRmTarget`].
fn modrm_reg_to_grp3( fn modrm_reg_to_grp3(
&mut self, &mut self,
reg: u8, reg: u8,
@@ -400,21 +442,12 @@ impl Disassembler {
} }
} }
/// Parse an Mp Operand (Memory Pointer).
/// An Mp is a ModRM byte with the `reg` bits ignored and an additional
/// 2 words parsed for a `Pointer` type.
fn modrm_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> {
let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?;
let ptr = self.parse_ptr()?;
Ok((target, ptr))
}
/// a.out pads the text section with 0x00 bytes. During parsing, these get /// a.out pads the text section with 0x00 bytes. During parsing, these get
/// interpreted as `0x00 0x00`, which have to get removed for an authentic /// interpreted as `0x00 0x00`, which have to get removed for an authentic
/// disassembly. /// disassembly.
/// This is done in favor of removing all 0x00 bytes in the beginning, /// This is done in favor of removing all 0x00 bytes in the beginning,
/// as this could remove an actual 0x00 byte as operand of the final /// as this could remove an actual `0x00` byte as operand of the final
/// instruction. Of course, this could remove an actual `0x00 0x00` /// real instruction. Of course, this could remove an actual `0x00 0x00`
/// instruction from the end, but they would not have any effect on /// instruction from the end, but they would not have any effect on
/// execution anyway. /// execution anyway.
fn remove_trailing_padding(&mut self) { fn remove_trailing_padding(&mut self) {
@@ -441,33 +474,10 @@ impl Disassembler {
self.instructions.truncate(until); self.instructions.truncate(until);
} }
/// Start the disassmble and allow for some error handling wrapped around /// Decode instructions by matching byte signature to their mnemonics and
/// the actual decoding function. /// depending on the instruction, parsing some operands afterwards.
pub fn disassemble(&mut self) -> Result<Vec<Instruction>, DisasmError> { /// All parsing is done in capsulated functions, here everything just
let parsing = self.decode_instructions(); /// gets consolodated.
// a.out pads the text section to byte align, so the fasely interpreted
// instructions have to be removed.
self.remove_trailing_padding();
let instructions = self.instructions.clone();
// allow for warning-type errors to pass through, as they are not fatal
match parsing {
Ok(_) => Ok(instructions),
Err(e) => match e {
DisasmError::EndOfTextSection => {
log::debug!("Solo padded 0-byte at end of file was found. Ignoring.");
Ok(instructions)
}
_ => {
println!("Encountered error during disassembly: {e}");
Err(e)
}
},
}
}
/// Decode instructions by matching their byte signature to their mnemonics.
fn decode_instructions(&mut self) -> Result<(), DisasmError> { fn decode_instructions(&mut self) -> Result<(), DisasmError> {
log::debug!("Starting to decode text of length {}", self.text.len()); log::debug!("Starting to decode text of length {}", self.text.len());
while self.offset < self.text.len() { while self.offset < self.text.len() {
@@ -482,20 +492,20 @@ impl Disassembler {
self.instruction.raw.push(opcode); self.instruction.raw.push(opcode);
self.instruction.opcode = match opcode { self.instruction.opcode = match opcode {
0x00 => modrm_target_bytewidth!(self, ADD_FromReg), 0x00 => modrm_8b_register!(self, ADD_FromReg),
0x01 => modrm_instruction_wordwidth!(self, ADD_FromReg), 0x01 => modrm_16b_register!(self, ADD_FromReg),
0x02 => modrm_target_bytewidth!(self, ADD_ToReg), 0x02 => modrm_8b_register!(self, ADD_ToReg),
0x03 => modrm_instruction_wordwidth!(self, ADD_ToReg), 0x03 => modrm_16b_register!(self, ADD_ToReg),
0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?), 0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?),
0x05 => Mnemonic::ADD_AXIv(self.parse_word()?), 0x05 => Mnemonic::ADD_AXIv(self.parse_word()?),
0x06 => Mnemonic::PUSH_S(SegmentRegister::ES), 0x06 => Mnemonic::PUSH_S(SegmentRegister::ES),
0x07 => Mnemonic::POP_S(SegmentRegister::ES), 0x07 => Mnemonic::POP_S(SegmentRegister::ES),
0x08 => modrm_target_bytewidth!(self, OR_FromReg), 0x08 => modrm_8b_register!(self, OR_FromReg),
0x09 => modrm_instruction_wordwidth!(self, OR_FromReg), 0x09 => modrm_16b_register!(self, OR_FromReg),
0x0A => modrm_target_bytewidth!(self, OR_ToReg), 0x0A => modrm_8b_register!(self, OR_ToReg),
0x0B => modrm_instruction_wordwidth!(self, OR_ToReg), 0x0B => modrm_16b_register!(self, OR_ToReg),
0x0C => Mnemonic::OR_ALIb(self.parse_byte()?), 0x0C => Mnemonic::OR_ALIb(self.parse_byte()?),
0x0D => Mnemonic::OR_AXIv(self.parse_word()?), 0x0D => Mnemonic::OR_AXIv(self.parse_word()?),
@@ -503,60 +513,60 @@ impl Disassembler {
0x0F => return Err(DisasmError::OpcodeUndefined(opcode)), 0x0F => return Err(DisasmError::OpcodeUndefined(opcode)),
0x10 => modrm_target_bytewidth!(self, ADC_FromReg), 0x10 => modrm_8b_register!(self, ADC_FromReg),
0x11 => modrm_instruction_wordwidth!(self, ADC_FromReg), 0x11 => modrm_16b_register!(self, ADC_FromReg),
0x12 => modrm_target_bytewidth!(self, ADC_ToReg), 0x12 => modrm_8b_register!(self, ADC_ToReg),
0x13 => modrm_instruction_wordwidth!(self, ADC_ToReg), 0x13 => modrm_16b_register!(self, ADC_ToReg),
0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?), 0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?),
0x15 => Mnemonic::ADC_AXIv(self.parse_word()?), 0x15 => Mnemonic::ADC_AXIv(self.parse_word()?),
0x16 => Mnemonic::PUSH_S(SegmentRegister::SS), 0x16 => Mnemonic::PUSH_S(SegmentRegister::SS),
0x17 => Mnemonic::POP_S(SegmentRegister::SS), 0x17 => Mnemonic::POP_S(SegmentRegister::SS),
0x18 => modrm_target_bytewidth!(self, SBB_FromReg), 0x18 => modrm_8b_register!(self, SBB_FromReg),
0x19 => modrm_instruction_wordwidth!(self, SBB_FromReg), 0x19 => modrm_16b_register!(self, SBB_FromReg),
0x1A => modrm_target_bytewidth!(self, SBB_ToReg), 0x1A => modrm_8b_register!(self, SBB_ToReg),
0x1B => modrm_instruction_wordwidth!(self, SBB_ToReg), 0x1B => modrm_16b_register!(self, SBB_ToReg),
0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?), 0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?),
0x1D => Mnemonic::SBB_AXIv(self.parse_word()?), 0x1D => Mnemonic::SBB_AXIv(self.parse_word()?),
0x1E => Mnemonic::PUSH_S(SegmentRegister::DS), 0x1E => Mnemonic::PUSH_S(SegmentRegister::DS),
0x1F => Mnemonic::POP_S(SegmentRegister::DS), 0x1F => Mnemonic::POP_S(SegmentRegister::DS),
0x20 => modrm_target_bytewidth!(self, AND_FromReg), 0x20 => modrm_8b_register!(self, AND_FromReg),
0x21 => modrm_instruction_wordwidth!(self, AND_FromReg), 0x21 => modrm_16b_register!(self, AND_FromReg),
0x22 => modrm_target_bytewidth!(self, AND_ToReg), 0x22 => modrm_8b_register!(self, AND_ToReg),
0x23 => modrm_instruction_wordwidth!(self, AND_ToReg), 0x23 => modrm_16b_register!(self, AND_ToReg),
0x24 => Mnemonic::AND_ALIb(self.parse_byte()?), 0x24 => Mnemonic::AND_ALIb(self.parse_byte()?),
0x25 => Mnemonic::AND_AXIv(self.parse_word()?), 0x25 => Mnemonic::AND_AXIv(self.parse_word()?),
0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES), 0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES),
0x27 => Mnemonic::DAA, 0x27 => Mnemonic::DAA,
0x28 => modrm_target_bytewidth!(self, SUB_FromReg), 0x28 => modrm_8b_register!(self, SUB_FromReg),
0x29 => modrm_instruction_wordwidth!(self, SUB_FromReg), 0x29 => modrm_16b_register!(self, SUB_FromReg),
0x2A => modrm_target_bytewidth!(self, SUB_ToReg), 0x2A => modrm_8b_register!(self, SUB_ToReg),
0x2B => modrm_instruction_wordwidth!(self, SUB_ToReg), 0x2B => modrm_16b_register!(self, SUB_ToReg),
0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?), 0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?),
0x2D => Mnemonic::SUB_AXIv(self.parse_word()?), 0x2D => Mnemonic::SUB_AXIv(self.parse_word()?),
0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS), 0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS),
0x2F => Mnemonic::DAS, 0x2F => Mnemonic::DAS,
0x30 => modrm_target_bytewidth!(self, XOR_FromReg), 0x30 => modrm_8b_register!(self, XOR_FromReg),
0x31 => modrm_instruction_wordwidth!(self, XOR_FromReg), 0x31 => modrm_16b_register!(self, XOR_FromReg),
0x32 => modrm_target_bytewidth!(self, XOR_ToReg), 0x32 => modrm_8b_register!(self, XOR_ToReg),
0x33 => modrm_instruction_wordwidth!(self, XOR_ToReg), 0x33 => modrm_16b_register!(self, XOR_ToReg),
0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?), 0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?),
0x35 => Mnemonic::XOR_AXIv(self.parse_word()?), 0x35 => Mnemonic::XOR_AXIv(self.parse_word()?),
0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS), 0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS),
0x37 => Mnemonic::AAA, 0x37 => Mnemonic::AAA,
0x38 => modrm_target_bytewidth!(self, CMP_FromReg), 0x38 => modrm_8b_register!(self, CMP_FromReg),
0x39 => modrm_instruction_wordwidth!(self, CMP_FromReg), 0x39 => modrm_16b_register!(self, CMP_FromReg),
0x3A => modrm_target_bytewidth!(self, CMP_ToReg), 0x3A => modrm_8b_register!(self, CMP_ToReg),
0x3B => modrm_instruction_wordwidth!(self, CMP_ToReg), 0x3B => modrm_16b_register!(self, CMP_ToReg),
0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?), 0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?),
0x3D => Mnemonic::CMP_AXIv(self.parse_word()?), 0x3D => Mnemonic::CMP_AXIv(self.parse_word()?),
@@ -642,20 +652,20 @@ impl Disassembler {
Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))? Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))?
} }
0x84 => modrm_target_bytewidth!(self, TEST), 0x84 => modrm_8b_register!(self, TEST),
0x85 => modrm_instruction_wordwidth!(self, TEST), 0x85 => modrm_16b_register!(self, TEST),
0x86 => modrm_target_bytewidth!(self, XCHG), 0x86 => modrm_8b_register!(self, XCHG),
0x87 => modrm_instruction_wordwidth!(self, XCHG), 0x87 => modrm_16b_register!(self, XCHG),
0x88 => modrm_target_bytewidth!(self, MOV_FromReg), 0x88 => modrm_8b_register!(self, MOV_FromReg),
0x89 => modrm_instruction_wordwidth!(self, MOV_FromReg), 0x89 => modrm_16b_register!(self, MOV_FromReg),
0x8A => modrm_target_bytewidth!(self, MOV_ToReg), 0x8A => modrm_8b_register!(self, MOV_ToReg),
0x8B => modrm_instruction_wordwidth!(self, MOV_ToReg), 0x8B => modrm_16b_register!(self, MOV_ToReg),
0x8C => modrm_instruction_sregister!(self, MOV_FromSReg), 0x8C => modrm_sregister!(self, MOV_FromSReg),
0x8E => modrm_instruction_sregister!(self, MOV_ToSReg), 0x8E => modrm_sregister!(self, MOV_ToSReg),
0x8D => modrm_instruction_wordwidth!(self, LEA), 0x8D => modrm_16b_register!(self, LEA),
0x8F => { 0x8F => {
let (target, _) = self.parse_modrm_byte(Operand::Word(0))?; let (target, _) = self.parse_modrm_byte(Operand::Word(0))?;
@@ -734,11 +744,11 @@ impl Disassembler {
0xC3 => Mnemonic::RET, 0xC3 => Mnemonic::RET,
0xC4 => { 0xC4 => {
let (target, ptr) = self.modrm_mp()?; let (target, ptr) = self.parse_mp()?;
Mnemonic::LES(target, ptr) Mnemonic::LES(target, ptr)
} }
0xC5 => { 0xC5 => {
let (target, ptr) = self.modrm_mp()?; let (target, ptr) = self.parse_mp()?;
Mnemonic::LDS(target, ptr) Mnemonic::LDS(target, ptr)
} }

View File

@@ -2,7 +2,7 @@
#[macro_export] #[macro_export]
/// Generate a Mnemonic for an 8-bit Register from a ModRM byte. /// Generate a Mnemonic for an 8-bit Register from a ModRM byte.
macro_rules! modrm_target_bytewidth { macro_rules! modrm_8b_register {
($self:ident, $variant:ident) => {{ ($self:ident, $variant:ident) => {{
let (target, reg) = $self.parse_modrm_byte(Operand::Byte(0))?; let (target, reg) = $self.parse_modrm_byte(Operand::Byte(0))?;
Mnemonic::$variant(target, Register::by_id(Operand::Byte(reg))?) Mnemonic::$variant(target, Register::by_id(Operand::Byte(reg))?)
@@ -11,7 +11,7 @@ macro_rules! modrm_target_bytewidth {
#[macro_export] #[macro_export]
/// Generate a Mnemonic for a 16-bit Register from a ModRM byte. /// Generate a Mnemonic for a 16-bit Register from a ModRM byte.
macro_rules! modrm_instruction_wordwidth { macro_rules! modrm_16b_register {
($self:ident, $variant:ident) => {{ ($self:ident, $variant:ident) => {{
let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?; let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?;
Mnemonic::$variant(target, Register::by_id(Operand::Word(reg.into()))?) Mnemonic::$variant(target, Register::by_id(Operand::Word(reg.into()))?)
@@ -20,7 +20,7 @@ macro_rules! modrm_instruction_wordwidth {
#[macro_export] #[macro_export]
/// Generate a Mnemonic for a 16-bit Segment Register from a ModRM byte. /// Generate a Mnemonic for a 16-bit Segment Register from a ModRM byte.
macro_rules! modrm_instruction_sregister { macro_rules! modrm_sregister {
($self:ident, $variant:ident) => {{ ($self:ident, $variant:ident) => {{
let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?; let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?;
Mnemonic::$variant(target, SegmentRegister::by_id(reg)?) Mnemonic::$variant(target, SegmentRegister::by_id(reg)?)

View File

@@ -7,7 +7,6 @@ use crate::{
use core::fmt; use core::fmt;
#[derive(Debug, Clone, Eq, PartialEq)] #[derive(Debug, Clone, Eq, PartialEq)]
#[allow(dead_code)]
/// A single 'line' of executable ASM is called an Instruction, which /// A single 'line' of executable ASM is called an Instruction, which
/// contains the `Mnemonic` that will be executed, alongside its starting offset /// contains the `Mnemonic` that will be executed, alongside its starting offset
/// and the raw parsed bytes /// and the raw parsed bytes
@@ -47,7 +46,7 @@ impl fmt::Display for Instruction {
} }
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code, non_camel_case_types)] #[allow(non_camel_case_types)]
/// All possible mnemonic variantions. /// All possible mnemonic variantions.
/// These are sorted by type and are not in hex-encoding order. /// These are sorted by type and are not in hex-encoding order.
// XXX: convert this copy and paste horror in a proc macro like // XXX: convert this copy and paste horror in a proc macro like
@@ -296,8 +295,6 @@ pub enum Mnemonic {
AAD(Byte), AAD(Byte),
// MISC // MISC
XLAT, XLAT,
// Not part of 8086:
EOT, // End of Text Section
} }
impl fmt::Display for Mnemonic { impl fmt::Display for Mnemonic {
@@ -414,8 +411,8 @@ impl fmt::Display for Mnemonic {
Self::MOV_ToReg(target, reg) => write!(f, "mov {reg}, {target}"), Self::MOV_ToReg(target, reg) => write!(f, "mov {reg}, {target}"),
Self::MOV_FromSReg(target, reg) => write!(f, "mov {target}, {reg}"), Self::MOV_FromSReg(target, reg) => write!(f, "mov {target}, {reg}"),
Self::MOV_ToSReg(target, reg) => write!(f, "mov {reg}, {target}"), Self::MOV_ToSReg(target, reg) => write!(f, "mov {reg}, {target}"),
Self::MOV_Ib(target, byte) => write!(f, "mov byte {target}, {byte:#04x}"), Self::MOV_Ib(target, byte) => write!(f, "mov byte ptr {target}, {byte:#04x}"),
Self::MOV_Iv(target, word) => write!(f, "mov word {target}, {word:#04x}"), Self::MOV_Iv(target, word) => write!(f, "mov word ptr {target}, {word:#04x}"),
Self::MOV_AL0b(byte) => write!(f, "mov {}, {byte:#04x}", Register::AL), Self::MOV_AL0b(byte) => write!(f, "mov {}, {byte:#04x}", Register::AL),
Self::MOV_AX0v(word) => write!(f, "mov {}, {word:#04x}", Register::AX), Self::MOV_AX0v(word) => write!(f, "mov {}, {word:#04x}", Register::AX),
@@ -492,13 +489,13 @@ impl fmt::Display for Mnemonic {
Self::HLT => write!(f, "hlt"), Self::HLT => write!(f, "hlt"),
Self::ROL_b(target, byte) => write!(f, "rol byte {target}, {byte:#04x}"), Self::ROL_b(target, byte) => write!(f, "rol byte ptr {target}, {byte:#04x}"),
Self::ROR_b(target, byte) => write!(f, "ror byte {target}, {byte:#04x}"), Self::ROR_b(target, byte) => write!(f, "ror byte ptr {target}, {byte:#04x}"),
Self::RCL_b(target, byte) => write!(f, "rcl byte {target}, {byte:#04x}"), Self::RCL_b(target, byte) => write!(f, "rcl byte ptr {target}, {byte:#04x}"),
Self::RCR_b(target, byte) => write!(f, "rcr byte {target}, {byte:#04x}"), Self::RCR_b(target, byte) => write!(f, "rcr byte ptr {target}, {byte:#04x}"),
Self::SHL_b(target, byte) => write!(f, "shl byte {target}, {byte:#04x}"), Self::SHL_b(target, byte) => write!(f, "shl byte ptr {target}, {byte:#04x}"),
Self::SHR_b(target, byte) => write!(f, "shr byte {target}, {byte:#04x}"), Self::SHR_b(target, byte) => write!(f, "shr byte ptr {target}, {byte:#04x}"),
Self::SAR_b(target, byte) => write!(f, "sar byte {target}, {byte:#04x}"), Self::SAR_b(target, byte) => write!(f, "sar byte ptr {target}, {byte:#04x}"),
Self::ROL_fromReg(target, reg) => write!(f, "rol {target}, {reg}"), Self::ROL_fromReg(target, reg) => write!(f, "rol {target}, {reg}"),
Self::ROR_fromReg(target, reg) => write!(f, "ror {target}, {reg}"), Self::ROR_fromReg(target, reg) => write!(f, "ror {target}, {reg}"),
Self::RCL_fromReg(target, reg) => write!(f, "rcl {target}, {reg}"), Self::RCL_fromReg(target, reg) => write!(f, "rcl {target}, {reg}"),
@@ -507,13 +504,13 @@ impl fmt::Display for Mnemonic {
Self::SHR_fromReg(target, reg) => write!(f, "shr {target}, {reg}"), Self::SHR_fromReg(target, reg) => write!(f, "shr {target}, {reg}"),
Self::SAR_fromReg(target, reg) => write!(f, "sar {target}, {reg}"), Self::SAR_fromReg(target, reg) => write!(f, "sar {target}, {reg}"),
Self::IN_AL(byte) => write!(f, "in byte {}, {byte:#04x}", Register::AL), Self::IN_AL(byte) => write!(f, "in byte ptr {}, {byte:#04x}", Register::AL),
Self::IN_AX(byte) => write!(f, "in byte {}, {byte:#04x}", Register::AX), Self::IN_AX(byte) => write!(f, "in byte ptr {}, {byte:#04x}", Register::AX),
Self::IN_ALDX => write!(f, "in {}, {}", Register::AL, Register::DX), Self::IN_ALDX => write!(f, "in {}, {}", Register::AL, Register::DX),
Self::IN_AXDX => write!(f, "in {}, {}", Register::AX, Register::DX), Self::IN_AXDX => write!(f, "in {}, {}", Register::AX, Register::DX),
Self::OUT_AL(byte) => write!(f, "out byte {}, {byte:#04x}", Register::AL), Self::OUT_AL(byte) => write!(f, "out byte ptr {}, {byte:#04x}", Register::AL),
Self::OUT_AX(byte) => write!(f, "out byte {}, {byte:#04x}", Register::AX), Self::OUT_AX(byte) => write!(f, "out byte ptr {}, {byte:#04x}", Register::AX),
Self::OUT_ALDX => write!(f, "out {}, {}", Register::AL, Register::DX), Self::OUT_ALDX => write!(f, "out {}, {}", Register::AL, Register::DX),
Self::OUT_AXDX => write!(f, "out {}, {}", Register::AX, Register::DX), Self::OUT_AXDX => write!(f, "out {}, {}", Register::AX, Register::DX),

View File

@@ -11,7 +11,6 @@ pub type IWord = i16; // used for displacement of memory access
pub type DWord = u32; pub type DWord = u32;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
#[allow(dead_code)]
/// Encodes either Byte- or Word-sized operands. /// Encodes either Byte- or Word-sized operands.
/// Also sometimes used to decide if an instruction is Byte- or Word-sized, /// Also sometimes used to decide if an instruction is Byte- or Word-sized,
/// which is usually indicated by using a value of 0 and the disregarding /// which is usually indicated by using a value of 0 and the disregarding

View File

@@ -4,7 +4,6 @@ use crate::{disasm::DisasmError, operands::Operand};
use core::fmt; use core::fmt;
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code)]
/// Registers of a 8086 processor /// Registers of a 8086 processor
pub enum Register { pub enum Register {
// 8 bit // 8 bit
@@ -33,7 +32,6 @@ pub enum Register {
/// Selector for Register or Segment Register /// Selector for Register or Segment Register
pub type RegisterId = u8; pub type RegisterId = u8;
#[allow(dead_code)]
impl Register { impl Register {
/// Find the register corresponding to the 8086 bytecode ID /// Find the register corresponding to the 8086 bytecode ID
pub fn by_id(id: Operand) -> Result<Self, DisasmError> { pub fn by_id(id: Operand) -> Result<Self, DisasmError> {
@@ -89,7 +87,6 @@ impl fmt::Display for Register {
/// Segment Registers of a 8086 processor /// Segment Registers of a 8086 processor
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
#[allow(dead_code)]
pub enum SegmentRegister { pub enum SegmentRegister {
DS, DS,
ES, ES,
@@ -97,7 +94,6 @@ pub enum SegmentRegister {
CS, CS,
} }
#[allow(dead_code)]
impl SegmentRegister { impl SegmentRegister {
/// Find the SRegister corresponding to the 8086 bytecode ID /// Find the SRegister corresponding to the 8086 bytecode ID
pub fn by_id(id: u8) -> Result<Self, DisasmError> { pub fn by_id(id: u8) -> Result<Self, DisasmError> {