chore: add nice debug output

This commit is contained in:
2025-05-25 21:05:54 +09:00
parent f9ae0dc6ee
commit 35207d23f0
3 changed files with 58 additions and 25 deletions

View File

@@ -50,6 +50,11 @@ Interpret a binary:
cargo run -- interpret -p ./a.out cargo run -- interpret -p ./a.out
``` ```
Run with debug output:
```
RUST_LOG=debug cargo run -- interpret -p ./a.out
```
## Status ## Status
This project is under active development and primarily used by me to explore some Intel disassembly and learn some more Rust. This project is under active development and primarily used by me to explore some Intel disassembly and learn some more Rust.

View File

@@ -22,6 +22,7 @@ pub enum DisasmError {
IllegalGroupMnemonic(u8, u8), IllegalGroupMnemonic(u8, u8),
IllegalModRMByteMode(u8), IllegalModRMByteMode(u8),
IllegalModRMByteIndex(u8), IllegalModRMByteIndex(u8),
IllegalOperand(String),
ReadBeyondTextSection(Disassembler), ReadBeyondTextSection(Disassembler),
UnknownRegister(usize), UnknownRegister(usize),
} }
@@ -57,6 +58,7 @@ impl fmt::Display for DisasmError {
"Error (Illegal modrm byte). While deconstructing a ModRM byte, the following index is unknown: {}", "Error (Illegal modrm byte). While deconstructing a ModRM byte, the following index is unknown: {}",
modrm modrm
), ),
DisasmError::IllegalOperand(msg) => write!(f, "Error (Illegal operand). {}", msg),
DisasmError::ReadBeyondTextSection(disasm) => write!( DisasmError::ReadBeyondTextSection(disasm) => write!(
f, f,
"Error (Out of bounds access). Disassembler state: {:?}", "Error (Out of bounds access). Disassembler state: {:?}",
@@ -114,13 +116,14 @@ impl Disassembler {
/// Parse a single byte of binary, return it and advance the offset. /// Parse a single byte of binary, return it and advance the offset.
/// Returns the read byte. /// Returns the read byte.
pub fn parse_byte(&mut self) -> Result<Byte, DisasmError> { pub fn parse_byte(&mut self) -> Result<Byte, DisasmError> {
log::debug!("Attempting to parse byte at {} ...", self.offset); log::debug!("Attempting to parse byte at {:#04x} ...", self.offset);
// advance to operand // advance to operand
self.offset += 1; self.offset += 1;
let byte = self let byte = self
.text .text
.get(self.offset) .get(self.offset)
.ok_or(DisasmError::ReadBeyondTextSection(self.clone()))?; .ok_or(DisasmError::ReadBeyondTextSection(self.clone()))?;
log::debug!("Parsed byte {byte:#04x}");
self.instruction.raw.push(*byte); self.instruction.raw.push(*byte);
Ok(*byte) Ok(*byte)
} }
@@ -129,7 +132,7 @@ impl Disassembler {
/// Just a wrapper for parsing a byte twice. /// Just a wrapper for parsing a byte twice.
/// Returns the read word. /// Returns the read word.
pub fn parse_word(&mut self) -> Result<Word, DisasmError> { pub fn parse_word(&mut self) -> Result<Word, DisasmError> {
log::debug!("Attempting to parse word at {} ...", self.offset); log::debug!("Attempting to parse word at {:#04x} ...", self.offset);
let byte1 = self.parse_byte()?; let byte1 = self.parse_byte()?;
let byte2 = self.parse_byte()?; let byte2 = self.parse_byte()?;
self.instruction.raw.push(byte1); self.instruction.raw.push(byte1);
@@ -141,9 +144,14 @@ impl Disassembler {
/// The isize contains a relative offset to be added to the address /// The isize contains a relative offset to be added to the address
/// of the subsequent instruction. /// of the subsequent instruction.
pub fn parse_j_byte(&mut self) -> Result<isize, DisasmError> { pub fn parse_j_byte(&mut self) -> Result<isize, DisasmError> {
log::debug!("Attempting to parse Jb at {:#04x} ...", self.offset);
// first interpret as 2-complement, then cast for addition // first interpret as 2-complement, then cast for addition
let byte = self.parse_byte()? as IByte as isize; let byte = self.parse_byte()? as IByte as isize;
let next_addr = (self.offset + 1) as isize; let next_addr = (self.offset + 1) as isize;
log::debug!(
"Parsed Jb consists of {byte:#04x} + {next_addr:#04x} = {:#04x}",
byte + next_addr
);
Ok(byte + next_addr) Ok(byte + next_addr)
} }
@@ -151,15 +159,20 @@ impl Disassembler {
/// The isize contains a relative offset to be added to the address /// The isize contains a relative offset to be added to the address
/// of the subsequent instruction. /// of the subsequent instruction.
pub fn parse_j_word(&mut self) -> Result<isize, DisasmError> { pub fn parse_j_word(&mut self) -> Result<isize, DisasmError> {
log::debug!("Attempting to parse Jv at {:#04x} ...", self.offset);
// first interpret as 2-complement, then cast for addition // first interpret as 2-complement, then cast for addition
let word = self.parse_word()? as IWord as isize; let word = self.parse_word()? as IWord as isize;
let next_addr = (self.offset + 1) as isize; let next_addr = (self.offset + 1) as isize;
log::debug!(
"Parsed Jv consists of {word:#04x} + {next_addr:#04x} = {:#04x}",
word + next_addr
);
Ok(word + next_addr) Ok(word + next_addr)
} }
/// Parse a pointer type. /// Parse a pointer type.
pub fn parse_ptr(&mut self) -> Result<Pointer, DisasmError> { pub fn parse_ptr(&mut self) -> Result<Pointer, DisasmError> {
log::debug!("Attempting to parse pointer at {} ...", self.offset); log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset);
let byte0 = self.parse_byte()?; let byte0 = self.parse_byte()?;
let byte1 = self.parse_byte()?; let byte1 = self.parse_byte()?;
let byte2 = self.parse_byte()?; let byte2 = self.parse_byte()?;
@@ -206,8 +219,8 @@ impl Disassembler {
match mode { match mode {
0b00 => { 0b00 => {
if rm == 0b110 { if rm == 0b110 {
log::debug!("Additional word during ModRM parsing was read with mod 0.");
displacement = Some(Displacement::IWord(self.parse_word()? as IWord)); displacement = Some(Displacement::IWord(self.parse_word()? as IWord));
log::debug!("ModRM direct memory read at {displacement:?}");
return Ok(( return Ok((
ModRmTarget::Memory(MemoryIndex { ModRmTarget::Memory(MemoryIndex {
base: None, base: None,
@@ -217,20 +230,24 @@ impl Disassembler {
reg, reg,
)); ));
} else { } else {
log::debug!("ModRM does not have a displacement");
displacement = None; displacement = None;
} }
} }
0b01 => { 0b01 => {
log::debug!("Additional byte during ModRM parsing was read."); let byte = Displacement::IByte(self.parse_byte()? as IByte);
displacement = Some(Displacement::IByte(self.parse_byte()? as IByte)) log::debug!("ModRM has a single byte of displacement: {byte}.");
displacement = Some(byte);
} }
0b10 => { 0b10 => {
log::debug!("Additional word during ModRM parsing was read."); let word = Displacement::IWord(self.parse_word()? as IWord);
displacement = Some(Displacement::IWord(self.parse_word()? as IWord)); log::debug!("ModRM has a single word of displacement: {word}");
displacement = Some(word);
} }
0b11 => { 0b11 => {
log::debug!("ModRM ({:#b}) to/from Register ({:#b})", rm, reg); log::debug!(
// XXX: find a nicer way instead of using Byte(0) and Word(0) "ModRM selected Register to Register: ({rm:#b}) to/from RegID ({reg:#b})"
);
let target = match register_width { let target = match register_width {
Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))?), Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))?),
Operand::Word(_) => { Operand::Word(_) => {
@@ -294,12 +311,12 @@ impl Disassembler {
/// first and an imm value as second operand (which has to be parsed before /// first and an imm value as second operand (which has to be parsed before
/// call to this function), but is available in both Byte and Word length. /// call to this function), but is available in both Byte and Word length.
pub fn modrm_reg_to_grp1( pub fn modrm_reg_to_grp1(
reg: u8, modrm_reg_byte: u8,
target: ModRmTarget, target: ModRmTarget,
imm: Operand, register_id: Operand,
) -> Result<Mnemonic, DisasmError> { ) -> Result<Mnemonic, DisasmError> {
match imm { match register_id {
Operand::Byte(b) => match reg { Operand::Byte(b) => match modrm_reg_byte {
0b000 => Ok(Mnemonic::ADD_Ib(target, b)), 0b000 => Ok(Mnemonic::ADD_Ib(target, b)),
0b001 => Ok(Mnemonic::OR_Ib(target, b)), 0b001 => Ok(Mnemonic::OR_Ib(target, b)),
0b010 => Ok(Mnemonic::ADC_Ib(target, b)), 0b010 => Ok(Mnemonic::ADC_Ib(target, b)),
@@ -308,9 +325,9 @@ impl Disassembler {
0b101 => Ok(Mnemonic::SUB_Ib(target, b)), 0b101 => Ok(Mnemonic::SUB_Ib(target, b)),
0b110 => Ok(Mnemonic::XOR_Ib(target, b)), 0b110 => Ok(Mnemonic::XOR_Ib(target, b)),
0b111 => Ok(Mnemonic::CMP_Ib(target, b)), 0b111 => Ok(Mnemonic::CMP_Ib(target, b)),
_ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)), _ => return Err(DisasmError::IllegalGroupMnemonic(1, modrm_reg_byte)),
}, },
Operand::Word(w) => match reg { Operand::Word(w) => match modrm_reg_byte {
0b000 => Ok(Mnemonic::ADD_Iv(target, w)), 0b000 => Ok(Mnemonic::ADD_Iv(target, w)),
0b001 => Ok(Mnemonic::OR_Iv(target, w)), 0b001 => Ok(Mnemonic::OR_Iv(target, w)),
0b010 => Ok(Mnemonic::ADC_Iv(target, w)), 0b010 => Ok(Mnemonic::ADC_Iv(target, w)),
@@ -319,7 +336,7 @@ impl Disassembler {
0b101 => Ok(Mnemonic::SUB_Iv(target, w)), 0b101 => Ok(Mnemonic::SUB_Iv(target, w)),
0b110 => Ok(Mnemonic::XOR_Iv(target, w)), 0b110 => Ok(Mnemonic::XOR_Iv(target, w)),
0b111 => Ok(Mnemonic::CMP_Iv(target, w)), 0b111 => Ok(Mnemonic::CMP_Iv(target, w)),
_ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)), _ => return Err(DisasmError::IllegalGroupMnemonic(1, modrm_reg_byte)),
}, },
} }
} }
@@ -589,10 +606,14 @@ impl Disassembler {
0x8D => modrmv!(self, LEA), 0x8D => modrmv!(self, LEA),
0x8F => { 0x8F => {
let target = self.parse_modrm_byte(Operand::Word(0))?.0; let (target, _) = self.parse_modrm_byte(Operand::Word(0))?;
let mem = match target { let mem = match target {
ModRmTarget::Memory(idx) => idx, ModRmTarget::Memory(idx) => idx,
_ => panic!("POP_M instruction given a register to pop into"), _ => {
return Err(DisasmError::IllegalOperand(
"POP (memory) instruction given a register to pop into".into(),
));
}
}; };
Mnemonic::POP_M(mem) Mnemonic::POP_M(mem)
} }
@@ -747,11 +768,12 @@ impl Disassembler {
0xF5 => Mnemonic::CMC, 0xF5 => Mnemonic::CMC,
// Group 3 // Group 3a
0xF6 => { 0xF6 => {
let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?;
self.modrm_reg_to_grp3(reg, target, Operand::Byte(0))? self.modrm_reg_to_grp3(reg, target, Operand::Byte(0))?
} }
// Group 3b
0xF7 => { 0xF7 => {
let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?;
self.modrm_reg_to_grp3(reg, target, Operand::Word(0))? self.modrm_reg_to_grp3(reg, target, Operand::Word(0))?
@@ -764,6 +786,7 @@ impl Disassembler {
0xFC => Mnemonic::CLD, 0xFC => Mnemonic::CLD,
0xFD => Mnemonic::STD, 0xFD => Mnemonic::STD,
// Group 4
0xFE => { 0xFE => {
let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?;
match reg { match reg {
@@ -772,6 +795,8 @@ impl Disassembler {
_ => return Err(DisasmError::IllegalGroupMnemonic(4, reg)), _ => return Err(DisasmError::IllegalGroupMnemonic(4, reg)),
} }
} }
// Group 5
0xFF => { 0xFF => {
let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?;
match reg { match reg {

View File

@@ -13,6 +13,9 @@ pub type DWord = u32;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
#[allow(dead_code)] #[allow(dead_code)]
/// Encodes either Byte- or Word-sized operands. /// Encodes either Byte- or Word-sized operands.
/// Also sometimes used to decide if an instruction is Byte- or Word-sized,
/// which is usually indicated by using a value of 0 and the disregarding
/// the value when read.
pub enum Operand { pub enum Operand {
Byte(Byte), Byte(Byte),
Word(Word), Word(Word),
@@ -37,7 +40,7 @@ impl fmt::LowerHex for Operand {
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
/// ModRM byte can either target a memory location or some register /// ModRM byte can either target a memory location or some register.
pub enum ModRmTarget { pub enum ModRmTarget {
Memory(MemoryIndex), Memory(MemoryIndex),
Register(Register), Register(Register),
@@ -53,11 +56,11 @@ impl std::fmt::Display for ModRmTarget {
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
/// Memory displacements are signed versions of u8 and u16. /// Memory displacements are signed versions of Byte and Word operands.
/// Encodes either Byte- or Word-sized operands. /// Encodes either Byte- or Word-sized operands.
pub enum Displacement { pub enum Displacement {
IByte(i8), IByte(IByte),
IWord(i16), IWord(IWord),
} }
impl fmt::LowerHex for Displacement { impl fmt::LowerHex for Displacement {