From 74e936ab767b606dc56b9d6d221390f1a0c291d8 Mon Sep 17 00:00:00 2001 From: Marco Thomas Date: Sun, 25 May 2025 15:45:09 +0900 Subject: [PATCH] chore: replace all panic's with proper error propagation --- src/disasm.rs | 441 +++++++++++++++++++++++-------------------- src/disasm_macros.rs | 12 +- src/operands.rs | 16 +- src/register.rs | 52 ++--- 4 files changed, 278 insertions(+), 243 deletions(-) diff --git a/src/disasm.rs b/src/disasm.rs index 367d142..e78ed2b 100644 --- a/src/disasm.rs +++ b/src/disasm.rs @@ -2,7 +2,7 @@ use crate::aout::Aout; use crate::operands::{ - DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer, + Byte, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer, Word, }; use crate::register::{Register, RegisterId, SegmentRegister}; use crate::{ @@ -19,7 +19,11 @@ pub enum DisasmError { NoFile(Option), IoError(std::io::Error), OpcodeUndefined(u8), - IndexOutOfBounds(usize), + IllegalGroupMnemonic(u8, u8), + IllegalModRMByteMode(u8), + IllegalModRMByteIndex(u8), + ReadBeyondTextSection(Disassembler), + UnknownRegister(usize), } impl From for DisasmError { @@ -35,10 +39,33 @@ impl fmt::Display for DisasmError { DisasmError::IoError(msg) => write!(f, "{}", msg), DisasmError::OpcodeUndefined(opcode) => write!( f, - "Instruction '{:#x} is considered undefined by the Spec", + "Error (Undefined Opcode). '{:#x} is considered undefined by the Spec", opcode ), - DisasmError::IndexOutOfBounds(msg) => write!(f, "Out of bounds read at {}", msg), + DisasmError::IllegalGroupMnemonic(group, mnemonic) => write!( + f, + "Error (Illegal group mnemonic). While parsing the ModRM reg field for groups, the following bit-combination for GRP{group} is unknown: {}", + mnemonic + ), + DisasmError::IllegalModRMByteMode(modrm) => write!( + f, + "Error (Illegal modrm byte). While deconstructing a ModRM byte, the following mode is unknown: {}", + modrm + ), + DisasmError::IllegalModRMByteIndex(modrm) => write!( + f, + "Error (Illegal modrm byte). While deconstructing a ModRM byte, the following index is unknown: {}", + modrm + ), + DisasmError::ReadBeyondTextSection(disasm) => write!( + f, + "Error (Out of bounds access). Disassembler state: {:?}", + disasm + ), + DisasmError::UnknownRegister(id) => write!( + f, + "Error (Unknown register). The register with ID {id} is unknown", + ), } } } @@ -68,7 +95,7 @@ fn path_to_buf(args: &Args) -> Result, DisasmError> { Ok(buf) } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct Disassembler { pub offset: usize, // the current offset in the disasm process pub text: Vec, // the aout binary @@ -86,47 +113,48 @@ impl Disassembler { /// Parse a single byte of binary, return it and advance the offset. /// Returns the read byte. - pub fn parse_byte(&mut self) -> u8 { + pub fn parse_byte(&mut self) -> Result { + log::debug!("Attempting to parse byte at {} ...", self.offset); // advance to operand self.offset += 1; - let byte = self.text[self.offset]; - self.instruction.raw.push(byte); - byte + let byte = self + .text + .get(self.offset) + .ok_or(DisasmError::ReadBeyondTextSection(self.clone()))?; + self.instruction.raw.push(*byte); + Ok(*byte) } - /// Parse a single word of binary, return it and advance the offset. + /// Parse a single word of binary. + /// Just a wrapper for parsing a byte twice. /// Returns the read word. - pub fn parse_word(&mut self) -> u16 { - // advance to operand - self.offset += 1; - // XXX: wrap in Result<> - let byte1 = self.text[self.offset]; - let byte2 = self.text[self.offset + 1]; - // jump onto last operand - self.offset += 1; + pub fn parse_word(&mut self) -> Result { + log::debug!("Attempting to parse word at {} ...", self.offset); + let byte1 = self.parse_byte()?; + let byte2 = self.parse_byte()?; self.instruction.raw.push(byte1); self.instruction.raw.push(byte2); - u16::from_le_bytes([byte1, byte2]) + Ok(u16::from_le_bytes([byte1, byte2])) } - /// Parse a single byte of binary and advance the offset. - /// The returned IByte contains a relative offset to be added to the address + /// Parse a single byte of binary and interpret as as signed. + /// The isize contains a relative offset to be added to the address /// of the subsequent instruction. - pub fn parse_j_byte(&mut self) -> isize { + pub fn parse_j_byte(&mut self) -> Result { // first interpret as 2-complement, then cast for addition - let byte = self.parse_byte() as IByte as isize; + let byte = self.parse_byte()? as IByte as isize; let next_addr = (self.offset + 1) as isize; - byte + next_addr + Ok(byte + next_addr) } - /// Parse a single byte of binary and advance the offset. - /// The returned IByte contains a relative offset to be added to the address + /// Parse a single byte of binary and interpret as signed. + /// The isize contains a relative offset to be added to the address /// of the subsequent instruction. - pub fn parse_j_word(&mut self) -> isize { + pub fn parse_j_word(&mut self) -> Result { // first interpret as 2-complement, then cast for addition - let word = self.parse_word() as IWord as isize; + let word = self.parse_word()? as IWord as isize; let next_addr = (self.offset + 1) as isize; - word + next_addr + Ok(word + next_addr) } /// Takes in a modrm byte and returns mod, reg and r/m. @@ -140,7 +168,10 @@ impl Disassembler { /// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset. /// Returns the parsed modrm target and the source register - pub fn parse_modrm_byte(&mut self, width: Operand) -> (ModRmTarget, RegisterId) { + pub fn parse_modrm_byte( + &mut self, + register_width: Operand, + ) -> Result<(ModRmTarget, RegisterId), DisasmError> { // advance to operand self.offset += 1; let modrm = self.text[self.offset]; @@ -161,39 +192,39 @@ impl Disassembler { 0b00 => { if rm == 0b110 { log::debug!("Additional word during ModRM parsing was read with mod 0."); - displacement = Some(Displacement::IWord(self.parse_word() as IWord)); - return ( + displacement = Some(Displacement::IWord(self.parse_word()? as IWord)); + return Ok(( ModRmTarget::Memory(MemoryIndex { base: None, index: None, displacement, }), reg, - ); + )); } else { displacement = None; } } 0b01 => { log::debug!("Additional byte during ModRM parsing was read."); - displacement = Some(Displacement::IByte(self.parse_byte() as IByte)) + displacement = Some(Displacement::IByte(self.parse_byte()? as IByte)) } 0b10 => { log::debug!("Additional word during ModRM parsing was read."); - displacement = Some(Displacement::IWord(self.parse_word() as IWord)); + displacement = Some(Displacement::IWord(self.parse_word()? as IWord)); } 0b11 => { log::debug!("ModRM ({:#b}) to/from Register ({:#b})", rm, reg); // XXX: find a nicer way instead of using Byte(0) and Word(0) - let target = match width { - Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))), + let target = match register_width { + Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))?), Operand::Word(_) => { - ModRmTarget::Register(Register::by_id(Operand::Word(rm.into()))) + ModRmTarget::Register(Register::by_id(Operand::Word(rm.into()))?) } }; - return (target, reg); + return Ok((target, reg)); } - _ => panic!("Invalid ModRM byte encountered"), + _ => return Err(DisasmError::IllegalModRMByteMode(mode)), }; let index = match rm { @@ -237,39 +268,43 @@ impl Disassembler { index: None, displacement, }, - _ => panic!("Invalid ModRM byte encountered"), + _ => return Err(DisasmError::IllegalModRMByteIndex(rm)), }; - (ModRmTarget::Memory(index), reg) + Ok((ModRmTarget::Memory(index), reg)) } /// Match the modrm reg bits to the GPR1 mnemonics. /// Group 1 always have an ModRM target (all modrm bits, without reg) as /// first and an imm value as second operand (which has to be parsed before /// call to this function), but is available in both Byte and Word length. - pub fn modrm_reg_to_grp1(reg: u8, target: ModRmTarget, imm: Operand) -> Mnemonic { + pub fn modrm_reg_to_grp1( + reg: u8, + target: ModRmTarget, + imm: Operand, + ) -> Result { match imm { Operand::Byte(b) => match reg { - 0b000 => Mnemonic::ADD_Ib(target, b), - 0b001 => Mnemonic::OR_Ib(target, b), - 0b010 => Mnemonic::ADC_Ib(target, b), - 0b011 => Mnemonic::SBB_Ib(target, b), - 0b100 => Mnemonic::AND_Ib(target, b), - 0b101 => Mnemonic::SUB_Ib(target, b), - 0b110 => Mnemonic::XOR_Ib(target, b), - 0b111 => Mnemonic::CMP_Ib(target, b), - _ => panic!("Illegal Group 1 mnemonic"), + 0b000 => Ok(Mnemonic::ADD_Ib(target, b)), + 0b001 => Ok(Mnemonic::OR_Ib(target, b)), + 0b010 => Ok(Mnemonic::ADC_Ib(target, b)), + 0b011 => Ok(Mnemonic::SBB_Ib(target, b)), + 0b100 => Ok(Mnemonic::AND_Ib(target, b)), + 0b101 => Ok(Mnemonic::SUB_Ib(target, b)), + 0b110 => Ok(Mnemonic::XOR_Ib(target, b)), + 0b111 => Ok(Mnemonic::CMP_Ib(target, b)), + _ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)), }, Operand::Word(w) => match reg { - 0b000 => Mnemonic::ADD_Iv(target, w), - 0b001 => Mnemonic::OR_Iv(target, w), - 0b010 => Mnemonic::ADC_Iv(target, w), - 0b011 => Mnemonic::SBB_Iv(target, w), - 0b100 => Mnemonic::AND_Iv(target, w), - 0b101 => Mnemonic::SUB_Iv(target, w), - 0b110 => Mnemonic::XOR_Iv(target, w), - 0b111 => Mnemonic::CMP_Iv(target, w), - _ => panic!("Illegal Group 1 mnemonic"), + 0b000 => Ok(Mnemonic::ADD_Iv(target, w)), + 0b001 => Ok(Mnemonic::OR_Iv(target, w)), + 0b010 => Ok(Mnemonic::ADC_Iv(target, w)), + 0b011 => Ok(Mnemonic::SBB_Iv(target, w)), + 0b100 => Ok(Mnemonic::AND_Iv(target, w)), + 0b101 => Ok(Mnemonic::SUB_Iv(target, w)), + 0b110 => Ok(Mnemonic::XOR_Iv(target, w)), + 0b111 => Ok(Mnemonic::CMP_Iv(target, w)), + _ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)), }, } } @@ -278,17 +313,17 @@ impl Disassembler { /// Group 2 only has a single operand, the other one is either a constant /// 1 (not present in the binary) or the CL register. /// This function assumes the operand to be 1 - pub fn modrm_reg_to_grp2_1(reg: u8, target: ModRmTarget) -> Mnemonic { + pub fn modrm_reg_to_grp2_1(reg: u8, target: ModRmTarget) -> Result { match reg { - 0b000 => Mnemonic::ROL_b(target, 1), - 0b001 => Mnemonic::ROR_b(target, 1), - 0b010 => Mnemonic::RCL_b(target, 1), - 0b011 => Mnemonic::RCR_b(target, 1), - 0b100 => Mnemonic::SHL_b(target, 1), - 0b101 => Mnemonic::SHR_b(target, 1), - 0b110 => Mnemonic::SAR_b(target, 1), - 0b111 => Mnemonic::SAR_b(target, 1), - _ => panic!("Illegal Group 2 mnemonic"), + 0b000 => Ok(Mnemonic::ROL_b(target, 1)), + 0b001 => Ok(Mnemonic::ROR_b(target, 1)), + 0b010 => Ok(Mnemonic::RCL_b(target, 1)), + 0b011 => Ok(Mnemonic::RCR_b(target, 1)), + 0b100 => Ok(Mnemonic::SHL_b(target, 1)), + 0b101 => Ok(Mnemonic::SHR_b(target, 1)), + 0b110 => Ok(Mnemonic::SAR_b(target, 1)), + 0b111 => Ok(Mnemonic::SAR_b(target, 1)), + _ => return Err(DisasmError::IllegalGroupMnemonic(2, reg)), } } @@ -296,37 +331,42 @@ impl Disassembler { /// Group 2 only has a single operand, the other one is either a constant /// 1 (not present in the binary) or the CL register. /// This function assumes the operand to be CL register. - pub fn modrm_reg_to_grp2_cl(reg: u8, target: ModRmTarget) -> Mnemonic { + pub fn modrm_reg_to_grp2_cl(reg: u8, target: ModRmTarget) -> Result { match reg { - 0b000 => Mnemonic::ROL_fromReg(target, Register::CL), - 0b001 => Mnemonic::ROR_fromReg(target, Register::CL), - 0b010 => Mnemonic::RCL_fromReg(target, Register::CL), - 0b011 => Mnemonic::RCR_fromReg(target, Register::CL), - 0b100 => Mnemonic::SHL_fromReg(target, Register::CL), - 0b101 => Mnemonic::SHR_fromReg(target, Register::CL), - 0b110 => Mnemonic::SAR_fromReg(target, Register::CL), - 0b111 => Mnemonic::SAR_fromReg(target, Register::CL), - _ => panic!("Illegal Group 2 mnemonic"), + 0b000 => Ok(Mnemonic::ROL_fromReg(target, Register::CL)), + 0b001 => Ok(Mnemonic::ROR_fromReg(target, Register::CL)), + 0b010 => Ok(Mnemonic::RCL_fromReg(target, Register::CL)), + 0b011 => Ok(Mnemonic::RCR_fromReg(target, Register::CL)), + 0b100 => Ok(Mnemonic::SHL_fromReg(target, Register::CL)), + 0b101 => Ok(Mnemonic::SHR_fromReg(target, Register::CL)), + 0b110 => Ok(Mnemonic::SAR_fromReg(target, Register::CL)), + 0b111 => Ok(Mnemonic::SAR_fromReg(target, Register::CL)), + _ => return Err(DisasmError::IllegalGroupMnemonic(2, reg)), } } /// Match the modrm reg bits to the GPR3a/b mnemonics. /// Group 3 only has a single operand, which is the ModRmTarget selected /// by modrm bits. - pub fn modrm_reg_to_grp3(&mut self, reg: u8, target: ModRmTarget, width: Operand) -> Mnemonic { + pub fn modrm_reg_to_grp3( + &mut self, + reg: u8, + target: ModRmTarget, + width: Operand, + ) -> Result { match reg { 0b000 => match width { - Operand::Byte(_) => Mnemonic::TEST_Ib(target, self.parse_byte()), - Operand::Word(_) => Mnemonic::TEST_Iv(target, self.parse_word()), + Operand::Byte(_) => Ok(Mnemonic::TEST_Ib(target, self.parse_byte()?)), + Operand::Word(_) => Ok(Mnemonic::TEST_Iv(target, self.parse_word()?)), }, // 0b001 => // unused - 0b010 => Mnemonic::NOT(target), - 0b011 => Mnemonic::NEG(target), - 0b100 => Mnemonic::MUL(target), - 0b101 => Mnemonic::IMUL(target), - 0b110 => Mnemonic::DIV(target), - 0b111 => Mnemonic::IDIV(target), - _ => panic!("Illegal Group 3 mnemonic"), + 0b010 => Ok(Mnemonic::NOT(target)), + 0b011 => Ok(Mnemonic::NEG(target)), + 0b100 => Ok(Mnemonic::MUL(target)), + 0b101 => Ok(Mnemonic::IMUL(target)), + 0b110 => Ok(Mnemonic::DIV(target)), + 0b111 => Ok(Mnemonic::IDIV(target)), + _ => Err(DisasmError::IllegalGroupMnemonic(3, reg)), } } @@ -334,7 +374,7 @@ impl Disassembler { /// An Mp is a ModRM byte with the `reg` bits ignored and an additional /// 2 words parsed for a `Pointer` type. pub fn modrm_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> { - let (target, _) = self.parse_modrm_byte(Operand::Byte(0)); + let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?; let ptr = Pointer::new(self)?; Ok((target, ptr)) } @@ -362,8 +402,8 @@ impl Disassembler { 0x01 => modrmv!(self, ADD_FromReg), 0x02 => modrmb!(self, ADD_ToReg), 0x03 => modrmv!(self, ADD_ToReg), - 0x04 => Mnemonic::ADD_ALIb(self.parse_byte()), - 0x05 => Mnemonic::ADD_AXIv(self.parse_word()), + 0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?), + 0x05 => Mnemonic::ADD_AXIv(self.parse_word()?), 0x06 => Mnemonic::PUSH_S(SegmentRegister::ES), 0x07 => Mnemonic::POP_S(SegmentRegister::ES), @@ -372,8 +412,8 @@ impl Disassembler { 0x09 => modrmv!(self, OR_FromReg), 0x0A => modrmb!(self, OR_ToReg), 0x0B => modrmv!(self, OR_ToReg), - 0x0C => Mnemonic::OR_ALIb(self.parse_byte()), - 0x0D => Mnemonic::OR_AXIv(self.parse_word()), + 0x0C => Mnemonic::OR_ALIb(self.parse_byte()?), + 0x0D => Mnemonic::OR_AXIv(self.parse_word()?), 0x0E => Mnemonic::PUSH_S(SegmentRegister::CS), @@ -383,8 +423,8 @@ impl Disassembler { 0x11 => modrmv!(self, ADC_FromReg), 0x12 => modrmb!(self, ADC_ToReg), 0x13 => modrmv!(self, ADC_ToReg), - 0x14 => Mnemonic::ADC_ALIb(self.parse_byte()), - 0x15 => Mnemonic::ADC_AXIv(self.parse_word()), + 0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?), + 0x15 => Mnemonic::ADC_AXIv(self.parse_word()?), 0x16 => Mnemonic::PUSH_S(SegmentRegister::SS), 0x17 => Mnemonic::POP_S(SegmentRegister::SS), @@ -393,8 +433,8 @@ impl Disassembler { 0x19 => modrmv!(self, SBB_FromReg), 0x1A => modrmb!(self, SBB_ToReg), 0x1B => modrmv!(self, SBB_ToReg), - 0x1C => Mnemonic::SBB_ALIb(self.parse_byte()), - 0x1D => Mnemonic::SBB_AXIv(self.parse_word()), + 0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?), + 0x1D => Mnemonic::SBB_AXIv(self.parse_word()?), 0x1E => Mnemonic::PUSH_S(SegmentRegister::DS), 0x1F => Mnemonic::POP_S(SegmentRegister::DS), @@ -403,8 +443,8 @@ impl Disassembler { 0x21 => modrmv!(self, AND_FromReg), 0x22 => modrmb!(self, AND_ToReg), 0x23 => modrmv!(self, AND_ToReg), - 0x24 => Mnemonic::AND_ALIb(self.parse_byte()), - 0x25 => Mnemonic::AND_AXIv(self.parse_word()), + 0x24 => Mnemonic::AND_ALIb(self.parse_byte()?), + 0x25 => Mnemonic::AND_AXIv(self.parse_word()?), 0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES), 0x27 => Mnemonic::DAA, @@ -413,8 +453,8 @@ impl Disassembler { 0x29 => modrmv!(self, SUB_FromReg), 0x2A => modrmb!(self, SUB_ToReg), 0x2B => modrmv!(self, SUB_ToReg), - 0x2C => Mnemonic::SUB_ALIb(self.parse_byte()), - 0x2D => Mnemonic::SUB_AXIv(self.parse_word()), + 0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?), + 0x2D => Mnemonic::SUB_AXIv(self.parse_word()?), 0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS), 0x2F => Mnemonic::DAS, @@ -423,8 +463,8 @@ impl Disassembler { 0x31 => modrmv!(self, XOR_FromReg), 0x32 => modrmb!(self, XOR_ToReg), 0x33 => modrmv!(self, XOR_ToReg), - 0x34 => Mnemonic::XOR_ALIb(self.parse_byte()), - 0x35 => Mnemonic::XOR_AXIv(self.parse_word()), + 0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?), + 0x35 => Mnemonic::XOR_AXIv(self.parse_word()?), 0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS), 0x37 => Mnemonic::AAA, @@ -433,8 +473,8 @@ impl Disassembler { 0x39 => modrmv!(self, CMP_FromReg), 0x3A => modrmb!(self, CMP_ToReg), 0x3B => modrmv!(self, CMP_ToReg), - 0x3C => Mnemonic::CMP_ALIb(self.parse_byte()), - 0x3D => Mnemonic::CMP_AXIv(self.parse_word()), + 0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?), + 0x3D => Mnemonic::CMP_AXIv(self.parse_word()?), 0x3E => Mnemonic::OVERRIDE(SegmentRegister::DS), 0x3F => Mnemonic::AAS, @@ -477,45 +517,45 @@ impl Disassembler { 0x60..=0x6F => return Err(DisasmError::OpcodeUndefined(opcode)), - 0x70 => Mnemonic::JO(self.parse_j_byte()), - 0x71 => Mnemonic::JNO(self.parse_j_byte()), - 0x72 => Mnemonic::JB(self.parse_j_byte()), - 0x73 => Mnemonic::JNB(self.parse_j_byte()), - 0x74 => Mnemonic::JZ(self.parse_j_byte()), - 0x75 => Mnemonic::JNZ(self.parse_j_byte()), - 0x76 => Mnemonic::JBE(self.parse_j_byte()), - 0x77 => Mnemonic::JA(self.parse_j_byte()), - 0x78 => Mnemonic::JS(self.parse_j_byte()), - 0x79 => Mnemonic::JNS(self.parse_j_byte()), - 0x7A => Mnemonic::JPE(self.parse_j_byte()), - 0x7B => Mnemonic::JPO(self.parse_j_byte()), - 0x7C => Mnemonic::JL(self.parse_j_byte()), - 0x7D => Mnemonic::JGE(self.parse_j_byte()), - 0x7E => Mnemonic::JLE(self.parse_j_byte()), - 0x7F => Mnemonic::JG(self.parse_j_byte()), + 0x70 => Mnemonic::JO(self.parse_j_byte()?), + 0x71 => Mnemonic::JNO(self.parse_j_byte()?), + 0x72 => Mnemonic::JB(self.parse_j_byte()?), + 0x73 => Mnemonic::JNB(self.parse_j_byte()?), + 0x74 => Mnemonic::JZ(self.parse_j_byte()?), + 0x75 => Mnemonic::JNZ(self.parse_j_byte()?), + 0x76 => Mnemonic::JBE(self.parse_j_byte()?), + 0x77 => Mnemonic::JA(self.parse_j_byte()?), + 0x78 => Mnemonic::JS(self.parse_j_byte()?), + 0x79 => Mnemonic::JNS(self.parse_j_byte()?), + 0x7A => Mnemonic::JPE(self.parse_j_byte()?), + 0x7B => Mnemonic::JPO(self.parse_j_byte()?), + 0x7C => Mnemonic::JL(self.parse_j_byte()?), + 0x7D => Mnemonic::JGE(self.parse_j_byte()?), + 0x7E => Mnemonic::JLE(self.parse_j_byte()?), + 0x7F => Mnemonic::JG(self.parse_j_byte()?), // Group 1 0x80 => { - let (target, reg) = self.parse_modrm_byte(Operand::Byte(0)); - let imm = self.parse_byte(); - Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm)) + let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; + let imm = self.parse_byte()?; + Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))? } 0x81 => { - let (target, reg) = self.parse_modrm_byte(Operand::Word(0)); - let imm = self.parse_word(); - Self::modrm_reg_to_grp1(reg, target, Operand::Word(imm)) + let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; + let imm = self.parse_word()?; + Self::modrm_reg_to_grp1(reg, target, Operand::Word(imm))? } 0x82 => { // same as 0x80 - let (target, reg) = self.parse_modrm_byte(Operand::Byte(0)); - let imm = self.parse_byte(); - Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm)) + let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; + let imm = self.parse_byte()?; + Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))? } 0x83 => { // byte extended version - let (target, reg) = self.parse_modrm_byte(Operand::Word(0)); - let imm = self.parse_byte(); - Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm)) + let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; + let imm = self.parse_byte()?; + Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))? } 0x84 => modrmb!(self, TEST), @@ -534,7 +574,7 @@ impl Disassembler { 0x8D => modrmv!(self, LEA), 0x8F => { - let target = self.parse_modrm_byte(Operand::Word(0)).0; + let target = self.parse_modrm_byte(Operand::Word(0))?.0; let mem = match target { ModRmTarget::Memory(idx) => idx, _ => panic!("POP_M instruction given a register to pop into"), @@ -554,16 +594,7 @@ impl Disassembler { 0x98 => Mnemonic::CBW, 0x99 => Mnemonic::CWD, - 0x9A => Mnemonic::CALL_p(Pointer { - raw: DWord::from_le_bytes([ - self.text[self.offset], - self.text[self.offset + 1], - self.text[self.offset + 2], - self.text[self.offset + 3], - ]), - segment: self.parse_word(), - offset: self.parse_word(), - }), + 0x9A => Mnemonic::CALL_p(Pointer::new(self)?), 0x9B => Mnemonic::WAIT, @@ -572,18 +603,18 @@ impl Disassembler { 0x9E => Mnemonic::SAHF, 0x9F => Mnemonic::LAHF, - 0xA0 => Mnemonic::MOV_AL0b(self.parse_byte()), - 0xA1 => Mnemonic::MOV_AX0v(self.parse_word()), - 0xA2 => Mnemonic::MOV_0bAL(self.parse_byte()), - 0xA3 => Mnemonic::MOV_0vAX(self.parse_word()), + 0xA0 => Mnemonic::MOV_AL0b(self.parse_byte()?), + 0xA1 => Mnemonic::MOV_AX0v(self.parse_word()?), + 0xA2 => Mnemonic::MOV_0bAL(self.parse_byte()?), + 0xA3 => Mnemonic::MOV_0vAX(self.parse_word()?), 0xA4 => Mnemonic::MOVSB, 0xA5 => Mnemonic::MOVSW, 0xA6 => Mnemonic::CMPSB, 0xA7 => Mnemonic::CMPSW, - 0xA8 => Mnemonic::TEST_ALIb(self.parse_byte()), - 0xA9 => Mnemonic::TEST_AXIv(self.parse_word()), + 0xA8 => Mnemonic::TEST_ALIb(self.parse_byte()?), + 0xA9 => Mnemonic::TEST_AXIv(self.parse_word()?), 0xAA => Mnemonic::STOSB, 0xAB => Mnemonic::STOSW, @@ -592,26 +623,26 @@ impl Disassembler { 0xAE => Mnemonic::SCASB, 0xAF => Mnemonic::SCASW, - 0xB0 => Mnemonic::MOV_ALIb(self.parse_byte()), - 0xB1 => Mnemonic::MOV_CLIb(self.parse_byte()), - 0xB2 => Mnemonic::MOV_DLIb(self.parse_byte()), - 0xB3 => Mnemonic::MOV_BLIb(self.parse_byte()), - 0xB4 => Mnemonic::MOV_AHIb(self.parse_byte()), - 0xB5 => Mnemonic::MOV_CHIb(self.parse_byte()), - 0xB6 => Mnemonic::MOV_DHIb(self.parse_byte()), - 0xB7 => Mnemonic::MOV_BHIb(self.parse_byte()), - 0xB8 => Mnemonic::MOV_AXIv(self.parse_word()), - 0xB9 => Mnemonic::MOV_CXIv(self.parse_word()), - 0xBA => Mnemonic::MOV_DXIv(self.parse_word()), - 0xBB => Mnemonic::MOV_BXIv(self.parse_word()), - 0xBC => Mnemonic::MOV_SPIv(self.parse_word()), - 0xBD => Mnemonic::MOV_BPIv(self.parse_word()), - 0xBE => Mnemonic::MOV_SIIv(self.parse_word()), - 0xBF => Mnemonic::MOV_DIIv(self.parse_word()), + 0xB0 => Mnemonic::MOV_ALIb(self.parse_byte()?), + 0xB1 => Mnemonic::MOV_CLIb(self.parse_byte()?), + 0xB2 => Mnemonic::MOV_DLIb(self.parse_byte()?), + 0xB3 => Mnemonic::MOV_BLIb(self.parse_byte()?), + 0xB4 => Mnemonic::MOV_AHIb(self.parse_byte()?), + 0xB5 => Mnemonic::MOV_CHIb(self.parse_byte()?), + 0xB6 => Mnemonic::MOV_DHIb(self.parse_byte()?), + 0xB7 => Mnemonic::MOV_BHIb(self.parse_byte()?), + 0xB8 => Mnemonic::MOV_AXIv(self.parse_word()?), + 0xB9 => Mnemonic::MOV_CXIv(self.parse_word()?), + 0xBA => Mnemonic::MOV_DXIv(self.parse_word()?), + 0xBB => Mnemonic::MOV_BXIv(self.parse_word()?), + 0xBC => Mnemonic::MOV_SPIv(self.parse_word()?), + 0xBD => Mnemonic::MOV_BPIv(self.parse_word()?), + 0xBE => Mnemonic::MOV_SIIv(self.parse_word()?), + 0xBF => Mnemonic::MOV_DIIv(self.parse_word()?), 0xC0..=0xC1 => return Err(DisasmError::OpcodeUndefined(opcode)), - 0xC2 => Mnemonic::RET_Iw(self.parse_word()), + 0xC2 => Mnemonic::RET_Iw(self.parse_word()?), 0xC3 => Mnemonic::RET, 0xC4 => { @@ -624,45 +655,45 @@ impl Disassembler { } 0xC6 => { - let (target, _) = self.parse_modrm_byte(Operand::Byte(0)); - Mnemonic::MOV_Ib(target, self.parse_byte()) + let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?; + Mnemonic::MOV_Ib(target, self.parse_byte()?) } 0xC7 => { - let (target, _) = self.parse_modrm_byte(Operand::Word(0)); - Mnemonic::MOV_Iv(target, self.parse_word()) + let (target, _) = self.parse_modrm_byte(Operand::Word(0))?; + Mnemonic::MOV_Iv(target, self.parse_word()?) } 0xC8..=0xC9 => return Err(DisasmError::OpcodeUndefined(opcode)), - 0xCA => Mnemonic::RETF_Iw(self.parse_word()), + 0xCA => Mnemonic::RETF_Iw(self.parse_word()?), 0xCB => Mnemonic::RETF, 0xCC => Mnemonic::INT(3), - 0xCD => Mnemonic::INT(self.parse_byte()), + 0xCD => Mnemonic::INT(self.parse_byte()?), 0xCE => Mnemonic::INTO, 0xCF => Mnemonic::IRET, // Group 2 0xD0 => { - let (target, reg) = self.parse_modrm_byte(Operand::Byte(0)); - Self::modrm_reg_to_grp2_1(reg, target) + let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; + Self::modrm_reg_to_grp2_1(reg, target)? } 0xD1 => { - let (target, reg) = self.parse_modrm_byte(Operand::Word(0)); - Self::modrm_reg_to_grp2_1(reg, target) + let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; + Self::modrm_reg_to_grp2_1(reg, target)? } 0xD2 => { - let (target, reg) = self.parse_modrm_byte(Operand::Byte(0)); - Self::modrm_reg_to_grp2_cl(reg, target) + let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; + Self::modrm_reg_to_grp2_cl(reg, target)? } 0xD3 => { - let (target, reg) = self.parse_modrm_byte(Operand::Word(0)); - Self::modrm_reg_to_grp2_cl(reg, target) + let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; + Self::modrm_reg_to_grp2_cl(reg, target)? } - 0xD4 => Mnemonic::AAM(self.parse_byte()), - 0xD5 => Mnemonic::AAD(self.parse_byte()), + 0xD4 => Mnemonic::AAM(self.parse_byte()?), + 0xD5 => Mnemonic::AAD(self.parse_byte()?), 0xD6 => return Err(DisasmError::OpcodeUndefined(opcode)), @@ -670,21 +701,21 @@ impl Disassembler { 0xD8..=0xDF => return Err(DisasmError::OpcodeUndefined(opcode)), - 0xE0 => Mnemonic::LOOPNZ(self.parse_j_byte()), - 0xE1 => Mnemonic::LOOPZ(self.parse_j_byte()), - 0xE2 => Mnemonic::LOOP(self.parse_j_byte()), - 0xE3 => Mnemonic::JCXZ(self.parse_j_byte()), + 0xE0 => Mnemonic::LOOPNZ(self.parse_j_byte()?), + 0xE1 => Mnemonic::LOOPZ(self.parse_j_byte()?), + 0xE2 => Mnemonic::LOOP(self.parse_j_byte()?), + 0xE3 => Mnemonic::JCXZ(self.parse_j_byte()?), - 0xE4 => Mnemonic::IN_AL(self.parse_byte()), - 0xE5 => Mnemonic::IN_AX(self.parse_byte()), - 0xE6 => Mnemonic::OUT_AL(self.parse_byte()), - 0xE7 => Mnemonic::OUT_AX(self.parse_byte()), + 0xE4 => Mnemonic::IN_AL(self.parse_byte()?), + 0xE5 => Mnemonic::IN_AX(self.parse_byte()?), + 0xE6 => Mnemonic::OUT_AL(self.parse_byte()?), + 0xE7 => Mnemonic::OUT_AX(self.parse_byte()?), - 0xE8 => Mnemonic::CALL_v(self.parse_j_word()), + 0xE8 => Mnemonic::CALL_v(self.parse_j_word()?), - 0xE9 => Mnemonic::JMP_v(self.parse_j_word()), + 0xE9 => Mnemonic::JMP_v(self.parse_j_word()?), 0xEA => Mnemonic::JMP_p(Pointer::new(self)?), - 0xEB => Mnemonic::JMP_b(self.parse_j_byte()), + 0xEB => Mnemonic::JMP_b(self.parse_j_byte()?), 0xEC => Mnemonic::IN_ALDX, 0xED => Mnemonic::IN_AXDX, @@ -703,12 +734,12 @@ impl Disassembler { // Group 3 0xF6 => { - let (target, reg) = self.parse_modrm_byte(Operand::Word(0)); - self.modrm_reg_to_grp3(reg, target, Operand::Byte(0)) + let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; + self.modrm_reg_to_grp3(reg, target, Operand::Byte(0))? } 0xF7 => { - let (target, reg) = self.parse_modrm_byte(Operand::Word(0)); - self.modrm_reg_to_grp3(reg, target, Operand::Word(0)) + let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; + self.modrm_reg_to_grp3(reg, target, Operand::Word(0))? } 0xF8 => Mnemonic::CLC, @@ -719,15 +750,15 @@ impl Disassembler { 0xFD => Mnemonic::STD, 0xFE => { - let (target, reg) = self.parse_modrm_byte(Operand::Byte(0)); + let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; match reg { 0b0 => Mnemonic::INC_Mod(target), 0b1 => Mnemonic::DEC_Mod(target), - _ => panic!("Illegal Group 4 mnemonic"), + _ => return Err(DisasmError::IllegalGroupMnemonic(4, reg)), } } 0xFF => { - let (target, reg) = self.parse_modrm_byte(Operand::Word(0)); + let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; match reg { 0b000 => Mnemonic::INC_Mod(target), 0b001 => Mnemonic::DEC_Mod(target), @@ -737,7 +768,7 @@ impl Disassembler { 0b101 => Mnemonic::JMP_Mp(target, Pointer::new(self)?), 0b110 => Mnemonic::PUSH_Mod(target), // 0b111 => unused - _ => panic!("Illegal Group 5 mnemonic"), + _ => return Err(DisasmError::IllegalGroupMnemonic(5, reg)), } } diff --git a/src/disasm_macros.rs b/src/disasm_macros.rs index f717d79..872ac00 100644 --- a/src/disasm_macros.rs +++ b/src/disasm_macros.rs @@ -4,8 +4,8 @@ /// Generate a Mnemonic for an 8-bit Register from a ModRM byte. macro_rules! modrmb { ($self:ident, $variant:ident) => {{ - let (target, reg) = $self.parse_modrm_byte(Operand::Byte(0)); - Mnemonic::$variant(target, Register::by_id(Operand::Byte(reg))) + let (target, reg) = $self.parse_modrm_byte(Operand::Byte(0))?; + Mnemonic::$variant(target, Register::by_id(Operand::Byte(reg))?) }}; } @@ -13,8 +13,8 @@ macro_rules! modrmb { /// Generate a Mnemonic for a 16-bit Register from a ModRM byte. macro_rules! modrmv { ($self:ident, $variant:ident) => {{ - let (target, reg) = $self.parse_modrm_byte(Operand::Word(0)); - Mnemonic::$variant(target, Register::by_id(Operand::Word(reg.into()))) + let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?; + Mnemonic::$variant(target, Register::by_id(Operand::Word(reg.into()))?) }}; } @@ -22,7 +22,7 @@ macro_rules! modrmv { /// Generate a Mnemonic for a 16-bit Segment Register from a ModRM byte. macro_rules! modrms { ($self:ident, $variant:ident) => {{ - let (target, reg) = $self.parse_modrm_byte(Operand::Word(0)); - Mnemonic::$variant(target, SegmentRegister::by_id(reg)) + let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?; + Mnemonic::$variant(target, SegmentRegister::by_id(reg)?) }}; } diff --git a/src/operands.rs b/src/operands.rs index 174628b..cca854f 100644 --- a/src/operands.rs +++ b/src/operands.rs @@ -141,27 +141,31 @@ pub struct Pointer { impl Pointer { pub fn new(disasm: &mut Disassembler) -> Result { + log::debug!( + "Seeking 4 bytes ahead of current text offset... ({} + 4)", + disasm.offset + ); let byte0 = disasm .text .get(disasm.offset) - .ok_or(DisasmError::IndexOutOfBounds(disasm.offset))?; + .ok_or(DisasmError::ReadBeyondTextSection(disasm.clone()))?; let byte1 = disasm .text .get(disasm.offset + 1) - .ok_or(DisasmError::IndexOutOfBounds(disasm.offset + 1))?; + .ok_or(DisasmError::ReadBeyondTextSection(disasm.clone()))?; let byte2 = disasm .text .get(disasm.offset + 2) - .ok_or(DisasmError::IndexOutOfBounds(disasm.offset + 2))?; + .ok_or(DisasmError::ReadBeyondTextSection(disasm.clone()))?; let byte3 = disasm .text .get(disasm.offset + 3) - .ok_or(DisasmError::IndexOutOfBounds(disasm.offset + 3))?; + .ok_or(DisasmError::ReadBeyondTextSection(disasm.clone()))?; Ok(Pointer { raw: DWord::from_le_bytes([*byte0, *byte1, *byte2, *byte3]), - segment: disasm.parse_word(), - offset: disasm.parse_word(), + segment: disasm.parse_word()?, + offset: disasm.parse_word()?, }) } } diff --git a/src/register.rs b/src/register.rs index c0c864d..b94ec38 100644 --- a/src/register.rs +++ b/src/register.rs @@ -1,6 +1,6 @@ //! Internal abstraction of all 8086 registers for disassembly. -use crate::operands::Operand; +use crate::{disasm::DisasmError, operands::Operand}; use core::fmt; #[derive(Debug, Clone)] @@ -36,29 +36,29 @@ pub type RegisterId = u8; #[allow(dead_code)] impl Register { /// Find the register corresponding to the 8086 bytecode ID - pub fn by_id(id: Operand) -> Self { + pub fn by_id(id: Operand) -> Result { match id { Operand::Byte(b) => match b { - 0b000 => Self::AL, - 0b001 => Self::CL, - 0b010 => Self::DL, - 0b011 => Self::BL, - 0b100 => Self::AH, - 0b101 => Self::CH, - 0b110 => Self::DH, - 0b111 => Self::BH, - _ => panic!("Invalid 8bit register ID encountered"), + 0b000 => Ok(Self::AL), + 0b001 => Ok(Self::CL), + 0b010 => Ok(Self::DL), + 0b011 => Ok(Self::BL), + 0b100 => Ok(Self::AH), + 0b101 => Ok(Self::CH), + 0b110 => Ok(Self::DH), + 0b111 => Ok(Self::BH), + _ => Err(DisasmError::UnknownRegister(b as usize)), }, Operand::Word(w) => match w { - 0b000 => Self::AX, - 0b001 => Self::CX, - 0b010 => Self::DX, - 0b011 => Self::BX, - 0b100 => Self::SP, - 0b101 => Self::BP, - 0b110 => Self::SI, - 0b111 => Self::DI, - _ => panic!("Invalid 16bit register ID encountered"), + 0b000 => Ok(Self::AX), + 0b001 => Ok(Self::CX), + 0b010 => Ok(Self::DX), + 0b011 => Ok(Self::BX), + 0b100 => Ok(Self::SP), + 0b101 => Ok(Self::BP), + 0b110 => Ok(Self::SI), + 0b111 => Ok(Self::DI), + _ => Err(DisasmError::UnknownRegister(w as usize)), }, } } @@ -100,13 +100,13 @@ pub enum SegmentRegister { #[allow(dead_code)] impl SegmentRegister { /// Find the SRegister corresponding to the 8086 bytecode ID - pub fn by_id(id: u8) -> Self { + pub fn by_id(id: u8) -> Result { match id { - 0x00 => Self::ES, - 0x01 => Self::CS, - 0x10 => Self::SS, - 0x11 => Self::DS, - _ => panic!("Invalid segment register ID encountered"), + 0x00 => Ok(Self::ES), + 0x01 => Ok(Self::CS), + 0x10 => Ok(Self::SS), + 0x11 => Ok(Self::DS), + _ => Err(DisasmError::UnknownRegister(id as usize)), } } }