diff --git a/README.md b/README.md index 3cda3dd..ed52ffc 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,11 @@ Interpret a binary: cargo run -- interpret -p ./a.out ``` +Run with debug output: +``` +RUST_LOG=debug cargo run -- interpret -p ./a.out +``` + ## Status This project is under active development and primarily used by me to explore some Intel disassembly and learn some more Rust. diff --git a/src/disasm.rs b/src/disasm.rs index aa2e08d..cedb076 100644 --- a/src/disasm.rs +++ b/src/disasm.rs @@ -22,6 +22,7 @@ pub enum DisasmError { IllegalGroupMnemonic(u8, u8), IllegalModRMByteMode(u8), IllegalModRMByteIndex(u8), + IllegalOperand(String), ReadBeyondTextSection(Disassembler), UnknownRegister(usize), } @@ -57,6 +58,7 @@ impl fmt::Display for DisasmError { "Error (Illegal modrm byte). While deconstructing a ModRM byte, the following index is unknown: {}", modrm ), + DisasmError::IllegalOperand(msg) => write!(f, "Error (Illegal operand). {}", msg), DisasmError::ReadBeyondTextSection(disasm) => write!( f, "Error (Out of bounds access). Disassembler state: {:?}", @@ -114,13 +116,14 @@ impl Disassembler { /// Parse a single byte of binary, return it and advance the offset. /// Returns the read byte. pub fn parse_byte(&mut self) -> Result { - log::debug!("Attempting to parse byte at {} ...", self.offset); + log::debug!("Attempting to parse byte at {:#04x} ...", self.offset); // advance to operand self.offset += 1; let byte = self .text .get(self.offset) .ok_or(DisasmError::ReadBeyondTextSection(self.clone()))?; + log::debug!("Parsed byte {byte:#04x}"); self.instruction.raw.push(*byte); Ok(*byte) } @@ -129,7 +132,7 @@ impl Disassembler { /// Just a wrapper for parsing a byte twice. /// Returns the read word. pub fn parse_word(&mut self) -> Result { - log::debug!("Attempting to parse word at {} ...", self.offset); + log::debug!("Attempting to parse word at {:#04x} ...", self.offset); let byte1 = self.parse_byte()?; let byte2 = self.parse_byte()?; self.instruction.raw.push(byte1); @@ -141,9 +144,14 @@ impl Disassembler { /// The isize contains a relative offset to be added to the address /// of the subsequent instruction. pub fn parse_j_byte(&mut self) -> Result { + log::debug!("Attempting to parse Jb at {:#04x} ...", self.offset); // first interpret as 2-complement, then cast for addition let byte = self.parse_byte()? as IByte as isize; let next_addr = (self.offset + 1) as isize; + log::debug!( + "Parsed Jb consists of {byte:#04x} + {next_addr:#04x} = {:#04x}", + byte + next_addr + ); Ok(byte + next_addr) } @@ -151,15 +159,20 @@ impl Disassembler { /// The isize contains a relative offset to be added to the address /// of the subsequent instruction. pub fn parse_j_word(&mut self) -> Result { + log::debug!("Attempting to parse Jv at {:#04x} ...", self.offset); // first interpret as 2-complement, then cast for addition let word = self.parse_word()? as IWord as isize; let next_addr = (self.offset + 1) as isize; + log::debug!( + "Parsed Jv consists of {word:#04x} + {next_addr:#04x} = {:#04x}", + word + next_addr + ); Ok(word + next_addr) } /// Parse a pointer type. pub fn parse_ptr(&mut self) -> Result { - log::debug!("Attempting to parse pointer at {} ...", self.offset); + log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset); let byte0 = self.parse_byte()?; let byte1 = self.parse_byte()?; let byte2 = self.parse_byte()?; @@ -206,8 +219,8 @@ impl Disassembler { match mode { 0b00 => { if rm == 0b110 { - log::debug!("Additional word during ModRM parsing was read with mod 0."); displacement = Some(Displacement::IWord(self.parse_word()? as IWord)); + log::debug!("ModRM direct memory read at {displacement:?}"); return Ok(( ModRmTarget::Memory(MemoryIndex { base: None, @@ -217,20 +230,24 @@ impl Disassembler { reg, )); } else { + log::debug!("ModRM does not have a displacement"); displacement = None; } } 0b01 => { - log::debug!("Additional byte during ModRM parsing was read."); - displacement = Some(Displacement::IByte(self.parse_byte()? as IByte)) + let byte = Displacement::IByte(self.parse_byte()? as IByte); + log::debug!("ModRM has a single byte of displacement: {byte}."); + displacement = Some(byte); } 0b10 => { - log::debug!("Additional word during ModRM parsing was read."); - displacement = Some(Displacement::IWord(self.parse_word()? as IWord)); + let word = Displacement::IWord(self.parse_word()? as IWord); + log::debug!("ModRM has a single word of displacement: {word}"); + displacement = Some(word); } 0b11 => { - log::debug!("ModRM ({:#b}) to/from Register ({:#b})", rm, reg); - // XXX: find a nicer way instead of using Byte(0) and Word(0) + log::debug!( + "ModRM selected Register to Register: ({rm:#b}) to/from RegID ({reg:#b})" + ); let target = match register_width { Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))?), Operand::Word(_) => { @@ -294,12 +311,12 @@ impl Disassembler { /// first and an imm value as second operand (which has to be parsed before /// call to this function), but is available in both Byte and Word length. pub fn modrm_reg_to_grp1( - reg: u8, + modrm_reg_byte: u8, target: ModRmTarget, - imm: Operand, + register_id: Operand, ) -> Result { - match imm { - Operand::Byte(b) => match reg { + match register_id { + Operand::Byte(b) => match modrm_reg_byte { 0b000 => Ok(Mnemonic::ADD_Ib(target, b)), 0b001 => Ok(Mnemonic::OR_Ib(target, b)), 0b010 => Ok(Mnemonic::ADC_Ib(target, b)), @@ -308,9 +325,9 @@ impl Disassembler { 0b101 => Ok(Mnemonic::SUB_Ib(target, b)), 0b110 => Ok(Mnemonic::XOR_Ib(target, b)), 0b111 => Ok(Mnemonic::CMP_Ib(target, b)), - _ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)), + _ => return Err(DisasmError::IllegalGroupMnemonic(1, modrm_reg_byte)), }, - Operand::Word(w) => match reg { + Operand::Word(w) => match modrm_reg_byte { 0b000 => Ok(Mnemonic::ADD_Iv(target, w)), 0b001 => Ok(Mnemonic::OR_Iv(target, w)), 0b010 => Ok(Mnemonic::ADC_Iv(target, w)), @@ -319,7 +336,7 @@ impl Disassembler { 0b101 => Ok(Mnemonic::SUB_Iv(target, w)), 0b110 => Ok(Mnemonic::XOR_Iv(target, w)), 0b111 => Ok(Mnemonic::CMP_Iv(target, w)), - _ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)), + _ => return Err(DisasmError::IllegalGroupMnemonic(1, modrm_reg_byte)), }, } } @@ -589,10 +606,14 @@ impl Disassembler { 0x8D => modrmv!(self, LEA), 0x8F => { - let target = self.parse_modrm_byte(Operand::Word(0))?.0; + let (target, _) = self.parse_modrm_byte(Operand::Word(0))?; let mem = match target { ModRmTarget::Memory(idx) => idx, - _ => panic!("POP_M instruction given a register to pop into"), + _ => { + return Err(DisasmError::IllegalOperand( + "POP (memory) instruction given a register to pop into".into(), + )); + } }; Mnemonic::POP_M(mem) } @@ -747,11 +768,12 @@ impl Disassembler { 0xF5 => Mnemonic::CMC, - // Group 3 + // Group 3a 0xF6 => { - let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; + let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; self.modrm_reg_to_grp3(reg, target, Operand::Byte(0))? } + // Group 3b 0xF7 => { let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; self.modrm_reg_to_grp3(reg, target, Operand::Word(0))? @@ -764,6 +786,7 @@ impl Disassembler { 0xFC => Mnemonic::CLD, 0xFD => Mnemonic::STD, + // Group 4 0xFE => { let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; match reg { @@ -772,6 +795,8 @@ impl Disassembler { _ => return Err(DisasmError::IllegalGroupMnemonic(4, reg)), } } + + // Group 5 0xFF => { let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; match reg { diff --git a/src/operands.rs b/src/operands.rs index 7e85868..e2e7811 100644 --- a/src/operands.rs +++ b/src/operands.rs @@ -13,6 +13,9 @@ pub type DWord = u32; #[derive(Debug, Clone)] #[allow(dead_code)] /// Encodes either Byte- or Word-sized operands. +/// Also sometimes used to decide if an instruction is Byte- or Word-sized, +/// which is usually indicated by using a value of 0 and the disregarding +/// the value when read. pub enum Operand { Byte(Byte), Word(Word), @@ -37,7 +40,7 @@ impl fmt::LowerHex for Operand { } #[derive(Debug, Clone)] -/// ModRM byte can either target a memory location or some register +/// ModRM byte can either target a memory location or some register. pub enum ModRmTarget { Memory(MemoryIndex), Register(Register), @@ -53,11 +56,11 @@ impl std::fmt::Display for ModRmTarget { } #[derive(Debug, Clone)] -/// Memory displacements are signed versions of u8 and u16. +/// Memory displacements are signed versions of Byte and Word operands. /// Encodes either Byte- or Word-sized operands. pub enum Displacement { - IByte(i8), - IWord(i16), + IByte(IByte), + IWord(IWord), } impl fmt::LowerHex for Displacement {