chore: add nice debug output

This commit is contained in:
2025-05-25 21:05:54 +09:00
parent f9ae0dc6ee
commit 35207d23f0
3 changed files with 58 additions and 25 deletions

View File

@@ -50,6 +50,11 @@ Interpret a binary:
cargo run -- interpret -p ./a.out
```
Run with debug output:
```
RUST_LOG=debug cargo run -- interpret -p ./a.out
```
## Status
This project is under active development and primarily used by me to explore some Intel disassembly and learn some more Rust.

View File

@@ -22,6 +22,7 @@ pub enum DisasmError {
IllegalGroupMnemonic(u8, u8),
IllegalModRMByteMode(u8),
IllegalModRMByteIndex(u8),
IllegalOperand(String),
ReadBeyondTextSection(Disassembler),
UnknownRegister(usize),
}
@@ -57,6 +58,7 @@ impl fmt::Display for DisasmError {
"Error (Illegal modrm byte). While deconstructing a ModRM byte, the following index is unknown: {}",
modrm
),
DisasmError::IllegalOperand(msg) => write!(f, "Error (Illegal operand). {}", msg),
DisasmError::ReadBeyondTextSection(disasm) => write!(
f,
"Error (Out of bounds access). Disassembler state: {:?}",
@@ -114,13 +116,14 @@ impl Disassembler {
/// Parse a single byte of binary, return it and advance the offset.
/// Returns the read byte.
pub fn parse_byte(&mut self) -> Result<Byte, DisasmError> {
log::debug!("Attempting to parse byte at {} ...", self.offset);
log::debug!("Attempting to parse byte at {:#04x} ...", self.offset);
// advance to operand
self.offset += 1;
let byte = self
.text
.get(self.offset)
.ok_or(DisasmError::ReadBeyondTextSection(self.clone()))?;
log::debug!("Parsed byte {byte:#04x}");
self.instruction.raw.push(*byte);
Ok(*byte)
}
@@ -129,7 +132,7 @@ impl Disassembler {
/// Just a wrapper for parsing a byte twice.
/// Returns the read word.
pub fn parse_word(&mut self) -> Result<Word, DisasmError> {
log::debug!("Attempting to parse word at {} ...", self.offset);
log::debug!("Attempting to parse word at {:#04x} ...", self.offset);
let byte1 = self.parse_byte()?;
let byte2 = self.parse_byte()?;
self.instruction.raw.push(byte1);
@@ -141,9 +144,14 @@ impl Disassembler {
/// The isize contains a relative offset to be added to the address
/// of the subsequent instruction.
pub fn parse_j_byte(&mut self) -> Result<isize, DisasmError> {
log::debug!("Attempting to parse Jb at {:#04x} ...", self.offset);
// first interpret as 2-complement, then cast for addition
let byte = self.parse_byte()? as IByte as isize;
let next_addr = (self.offset + 1) as isize;
log::debug!(
"Parsed Jb consists of {byte:#04x} + {next_addr:#04x} = {:#04x}",
byte + next_addr
);
Ok(byte + next_addr)
}
@@ -151,15 +159,20 @@ impl Disassembler {
/// The isize contains a relative offset to be added to the address
/// of the subsequent instruction.
pub fn parse_j_word(&mut self) -> Result<isize, DisasmError> {
log::debug!("Attempting to parse Jv at {:#04x} ...", self.offset);
// first interpret as 2-complement, then cast for addition
let word = self.parse_word()? as IWord as isize;
let next_addr = (self.offset + 1) as isize;
log::debug!(
"Parsed Jv consists of {word:#04x} + {next_addr:#04x} = {:#04x}",
word + next_addr
);
Ok(word + next_addr)
}
/// Parse a pointer type.
pub fn parse_ptr(&mut self) -> Result<Pointer, DisasmError> {
log::debug!("Attempting to parse pointer at {} ...", self.offset);
log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset);
let byte0 = self.parse_byte()?;
let byte1 = self.parse_byte()?;
let byte2 = self.parse_byte()?;
@@ -206,8 +219,8 @@ impl Disassembler {
match mode {
0b00 => {
if rm == 0b110 {
log::debug!("Additional word during ModRM parsing was read with mod 0.");
displacement = Some(Displacement::IWord(self.parse_word()? as IWord));
log::debug!("ModRM direct memory read at {displacement:?}");
return Ok((
ModRmTarget::Memory(MemoryIndex {
base: None,
@@ -217,20 +230,24 @@ impl Disassembler {
reg,
));
} else {
log::debug!("ModRM does not have a displacement");
displacement = None;
}
}
0b01 => {
log::debug!("Additional byte during ModRM parsing was read.");
displacement = Some(Displacement::IByte(self.parse_byte()? as IByte))
let byte = Displacement::IByte(self.parse_byte()? as IByte);
log::debug!("ModRM has a single byte of displacement: {byte}.");
displacement = Some(byte);
}
0b10 => {
log::debug!("Additional word during ModRM parsing was read.");
displacement = Some(Displacement::IWord(self.parse_word()? as IWord));
let word = Displacement::IWord(self.parse_word()? as IWord);
log::debug!("ModRM has a single word of displacement: {word}");
displacement = Some(word);
}
0b11 => {
log::debug!("ModRM ({:#b}) to/from Register ({:#b})", rm, reg);
// XXX: find a nicer way instead of using Byte(0) and Word(0)
log::debug!(
"ModRM selected Register to Register: ({rm:#b}) to/from RegID ({reg:#b})"
);
let target = match register_width {
Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))?),
Operand::Word(_) => {
@@ -294,12 +311,12 @@ impl Disassembler {
/// first and an imm value as second operand (which has to be parsed before
/// call to this function), but is available in both Byte and Word length.
pub fn modrm_reg_to_grp1(
reg: u8,
modrm_reg_byte: u8,
target: ModRmTarget,
imm: Operand,
register_id: Operand,
) -> Result<Mnemonic, DisasmError> {
match imm {
Operand::Byte(b) => match reg {
match register_id {
Operand::Byte(b) => match modrm_reg_byte {
0b000 => Ok(Mnemonic::ADD_Ib(target, b)),
0b001 => Ok(Mnemonic::OR_Ib(target, b)),
0b010 => Ok(Mnemonic::ADC_Ib(target, b)),
@@ -308,9 +325,9 @@ impl Disassembler {
0b101 => Ok(Mnemonic::SUB_Ib(target, b)),
0b110 => Ok(Mnemonic::XOR_Ib(target, b)),
0b111 => Ok(Mnemonic::CMP_Ib(target, b)),
_ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)),
_ => return Err(DisasmError::IllegalGroupMnemonic(1, modrm_reg_byte)),
},
Operand::Word(w) => match reg {
Operand::Word(w) => match modrm_reg_byte {
0b000 => Ok(Mnemonic::ADD_Iv(target, w)),
0b001 => Ok(Mnemonic::OR_Iv(target, w)),
0b010 => Ok(Mnemonic::ADC_Iv(target, w)),
@@ -319,7 +336,7 @@ impl Disassembler {
0b101 => Ok(Mnemonic::SUB_Iv(target, w)),
0b110 => Ok(Mnemonic::XOR_Iv(target, w)),
0b111 => Ok(Mnemonic::CMP_Iv(target, w)),
_ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)),
_ => return Err(DisasmError::IllegalGroupMnemonic(1, modrm_reg_byte)),
},
}
}
@@ -589,10 +606,14 @@ impl Disassembler {
0x8D => modrmv!(self, LEA),
0x8F => {
let target = self.parse_modrm_byte(Operand::Word(0))?.0;
let (target, _) = self.parse_modrm_byte(Operand::Word(0))?;
let mem = match target {
ModRmTarget::Memory(idx) => idx,
_ => panic!("POP_M instruction given a register to pop into"),
_ => {
return Err(DisasmError::IllegalOperand(
"POP (memory) instruction given a register to pop into".into(),
));
}
};
Mnemonic::POP_M(mem)
}
@@ -747,11 +768,12 @@ impl Disassembler {
0xF5 => Mnemonic::CMC,
// Group 3
// Group 3a
0xF6 => {
let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?;
let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?;
self.modrm_reg_to_grp3(reg, target, Operand::Byte(0))?
}
// Group 3b
0xF7 => {
let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?;
self.modrm_reg_to_grp3(reg, target, Operand::Word(0))?
@@ -764,6 +786,7 @@ impl Disassembler {
0xFC => Mnemonic::CLD,
0xFD => Mnemonic::STD,
// Group 4
0xFE => {
let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?;
match reg {
@@ -772,6 +795,8 @@ impl Disassembler {
_ => return Err(DisasmError::IllegalGroupMnemonic(4, reg)),
}
}
// Group 5
0xFF => {
let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?;
match reg {

View File

@@ -13,6 +13,9 @@ pub type DWord = u32;
#[derive(Debug, Clone)]
#[allow(dead_code)]
/// Encodes either Byte- or Word-sized operands.
/// Also sometimes used to decide if an instruction is Byte- or Word-sized,
/// which is usually indicated by using a value of 0 and the disregarding
/// the value when read.
pub enum Operand {
Byte(Byte),
Word(Word),
@@ -37,7 +40,7 @@ impl fmt::LowerHex for Operand {
}
#[derive(Debug, Clone)]
/// ModRM byte can either target a memory location or some register
/// ModRM byte can either target a memory location or some register.
pub enum ModRmTarget {
Memory(MemoryIndex),
Register(Register),
@@ -53,11 +56,11 @@ impl std::fmt::Display for ModRmTarget {
}
#[derive(Debug, Clone)]
/// Memory displacements are signed versions of u8 and u16.
/// Memory displacements are signed versions of Byte and Word operands.
/// Encodes either Byte- or Word-sized operands.
pub enum Displacement {
IByte(i8),
IWord(i16),
IByte(IByte),
IWord(IWord),
}
impl fmt::LowerHex for Displacement {