//! The main dissembling logic. use crate::aout::Aout; use crate::operands::{ Byte, DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer16, Pointer32, Word, }; use crate::register::{Register, RegisterId, SegmentRegister}; use crate::{ Args, instructions::{Instruction, Mnemonic}, }; use crate::{modrm_8b_register, modrm_16b_register, modrm_sregister}; use core::fmt; use std::{fs::File, io::Read, process::exit}; #[derive(Debug)] /// Generic errors, which are encountered during parsing. pub enum DisasmError { NoFile(Option), IoError(std::io::Error), OpcodeUndefined(u8), IllegalGroupMnemonic(u8, u8), IllegalModRMByteMode(u8), IllegalModRMByteIndex(u8), IllegalOperand(String), ReadBeyondTextSection, // not an error per se, it indicates a single 0x00 byte padding EndOfTextSection, UnknownRegister(usize), } impl From for DisasmError { fn from(error: std::io::Error) -> Self { DisasmError::IoError(error) } } impl fmt::Display for DisasmError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { DisasmError::NoFile(msg) => write!(f, "No file error: {:?}", msg), DisasmError::IoError(msg) => write!(f, "{}", msg), DisasmError::OpcodeUndefined(opcode) => write!( f, "Error (Undefined Opcode). '{:#x} is considered undefined by the Spec", opcode ), DisasmError::IllegalGroupMnemonic(group, mnemonic) => write!( f, "Error (Illegal group mnemonic). While parsing the ModRM reg field for groups, the following bit-combination for GRP{group} is unknown: {}", mnemonic ), DisasmError::IllegalModRMByteMode(modrm) => write!( f, "Error (Illegal modrm byte). While deconstructing a ModRM byte, the following mode is unknown: {}", modrm ), DisasmError::IllegalModRMByteIndex(modrm) => write!( f, "Error (Illegal modrm byte). While deconstructing a ModRM byte, the following index is unknown: {}", modrm ), DisasmError::IllegalOperand(msg) => write!(f, "Error (Illegal operand). {}", msg), DisasmError::ReadBeyondTextSection => write!( f, "Error (Out of bounds access). Wanted to paese an additional byte, but there is no more text section.", ), DisasmError::UnknownRegister(id) => write!( f, "Error (Unknown register). The register with ID {id} is unknown", ), DisasmError::EndOfTextSection => write!(f, "Warning. End of text section reached."), } } } #[derive(Debug, Clone)] pub struct Disassembler { pub offset: usize, // the current offset in the disasm process pub text: Vec, // the aout binary pub instruction: Instruction, // the instruction, which is currently being parsed pub instructions: Vec, // all parsed instructions } impl Disassembler { pub fn new(args: &Args) -> Self { let path = args .path .clone() .ok_or(DisasmError::NoFile(args.path.clone())) .unwrap(); let mut file = File::open(path).unwrap(); let mut buf = Vec::new(); file.read_to_end(&mut buf).unwrap(); let aout = Aout::new(buf); log::debug!("{:?}", aout); Disassembler { offset: 0, text: aout.text, instruction: Instruction::new(), instructions: Vec::new(), } } /// Start the disassmble and allow for some error handling wrapped around /// the actual decoding function. pub fn disassemble(&mut self) -> Result, DisasmError> { let is_ok = self.decode_instructions(); // a.out pads the text section to byte align, so the fasely interpreted // instructions have to be removed. self.remove_trailing_padding(); // read instructions from disassembler object instead of decode function // to allow some error's to act as warnings (see below) let instructions = self.instructions.clone(); // allow for warning-type errors to pass through, as they are not fatal match is_ok { Ok(_) => Ok(instructions), Err(e) => match e { DisasmError::EndOfTextSection => { log::debug!("Solo padded 0-byte at end of file was found. Ignoring."); Ok(instructions) } _ => { println!("Encountered error during disassembly: {e}"); Err(e) } }, } } /// Parse a single byte of the binary and advance the offset. /// Returns the read byte (Intel b operand). fn parse_byte(&mut self) -> Result { log::debug!("Attempting to parse byte at {:#04x} ...", self.offset); // check if the byte would be out of bounds if self.offset + 1 == self.text.len() { // check if text section ends with single 0x00 padding byte if self.text[self.offset] == 0 { return Err(DisasmError::EndOfTextSection); // else its just an out of bounds read } else { return Err(DisasmError::ReadBeyondTextSection); } // if not, advance offset to next byte } else { self.offset += 1; } let byte = self .text .get(self.offset) .ok_or(DisasmError::ReadBeyondTextSection)?; log::debug!("Parsed byte {byte:#04x}"); self.instruction.raw.push(*byte); Ok(*byte) } /// Parse a single word of the binary and advance the offset. /// Just a wrapper for parsing a byte twice. /// Returns the read word (Intel w/v operand). fn parse_word(&mut self) -> Result { log::debug!("Attempting to parse word at {:#04x} ...", self.offset); let byte1 = self.parse_byte()?; let byte2 = self.parse_byte()?; Ok(u16::from_le_bytes([byte1, byte2])) } /// Parse a single of the binary, interpret it as signed and advance the /// offset. /// Returns the read byte added to the address of the subsequent instruction /// to act as a relative offset (Intel Jb operand). fn parse_j_byte(&mut self) -> Result { log::debug!("Attempting to parse Jb at {:#04x} ...", self.offset); // first interpret as 2-complement, then cast for addition let byte = self.parse_byte()? as IByte as isize; let next_addr = (self.offset + 1) as isize; log::debug!( "Parsed Jb consists of {byte:#04x} + {next_addr:#04x} = {:#04x}", byte + next_addr ); Ok(byte + next_addr) } /// Parse a word of the binary, interpret it as signed and advance the /// offset. /// Returns the read word added to the address of the subsequent instruction /// to act as a relative offset (Intel Jw/Jv operand). pub fn parse_j_word(&mut self) -> Result { log::debug!("Attempting to parse Jv at {:#04x} ...", self.offset); // first interpret as 2-complement, then cast for addition let word = self.parse_word()? as IWord as isize; let next_addr = (self.offset + 1) as isize; log::debug!( "Parsed Jv consists of {word:#04x} + {next_addr:#04x} = {:#04x}", word + next_addr ); Ok(word + next_addr) } /// Parse a single pointer of the binary and advance the offset. /// Just a wrapper for parsing a byte 4 types and constructing a pointer /// type. /// Returns the read pointer (Intel p operand). fn parse_ptr(&mut self) -> Result { log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset); let byte0 = self.parse_byte()?; let byte1 = self.parse_byte()?; let byte2 = self.parse_byte()?; let byte3 = self.parse_byte()?; Ok(Pointer32 { raw: DWord::from_le_bytes([byte0, byte1, byte2, byte3]), segment: Word::from_le_bytes([byte2, byte3]), offset: Word::from_le_bytes([byte0, byte1]), }) } /// Parse a single ModRM byte, calculate the [`ModRmTarget`] (Memory or /// Register) from that byte and advance the offset. /// It is always just a single byte, even for word-width instructions. /// Returns the [`ModRmTarget`] (either memory or a register) as well as the /// `reg` bitfield, which will later be used to determine another register /// or even mnemonic in the group-type instructions. fn parse_modrm_byte( &mut self, register_width: Operand, ) -> Result<(ModRmTarget, RegisterId), DisasmError> { let modrm = self.parse_byte()?; let mode = (modrm >> 6) & 0b11; let reg = (modrm >> 3) & 0b111; let rm = modrm & 0b111; log::debug!( "{:#04x} deconstructed into: {:#b}, {:#b}, {:#b}", modrm, mode, reg, rm ); let mut displacement = None; match mode { 0b00 => { if rm == 0b110 { let word = Displacement::IWord(self.parse_word()? as IWord); log::debug!("ModRM direct memory read at {word:?}"); displacement = Some(word); return Ok(( ModRmTarget::Memory(MemoryIndex { base: None, index: None, displacement, }), reg, )); } else { log::debug!("ModRM does not have a displacement"); displacement = None; } } 0b01 => { let byte = Displacement::IByte(self.parse_byte()? as IByte); log::debug!("ModRM has a single byte of displacement: {byte}."); displacement = Some(byte); } 0b10 => { let word = Displacement::IWord(self.parse_word()? as IWord); log::debug!("ModRM has a single word of displacement: {word}"); displacement = Some(word); } 0b11 => { log::debug!( "ModRM selected Register to Register: ({rm:#b}) to/from RegID ({reg:#b})" ); let target = match register_width { Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))?), Operand::Word(_) => { ModRmTarget::Register(Register::by_id(Operand::Word(rm as Word))?) } }; return Ok((target, reg)); } _ => return Err(DisasmError::IllegalModRMByteMode(mode)), }; let index = match rm { 0b0000 => MemoryIndex { base: Some(Register::BX), index: Some(Register::SI), displacement, }, 0b0001 => MemoryIndex { base: Some(Register::BX), index: Some(Register::DI), displacement, }, 0b0010 => MemoryIndex { base: Some(Register::BP), index: Some(Register::SI), displacement, }, 0b0011 => MemoryIndex { base: Some(Register::BP), index: Some(Register::DI), displacement, }, 0b0100 => MemoryIndex { base: None, index: Some(Register::SI), displacement, }, 0b0101 => MemoryIndex { base: None, index: Some(Register::DI), displacement, }, 0b0110 => MemoryIndex { base: Some(Register::BP), index: None, displacement, }, 0b0111 => MemoryIndex { base: Some(Register::BX), index: None, displacement, }, _ => return Err(DisasmError::IllegalModRMByteIndex(rm)), }; Ok((ModRmTarget::Memory(index), reg)) } /// Match the ModRM `reg` bitfield to Intel Group 1-type instructions. Group /// 1 always has an [`ModRmTarget`] as first and a [`Register`] as second /// operand, which is determined by the ModRM `reg` field, aswell as the /// bit-width of the instruction currently being parsed. fn modrm_reg_to_grp1( reg: u8, target: ModRmTarget, instruction_width: Operand, ) -> Result { match instruction_width { Operand::Byte(b) => match reg { 0b000 => Ok(Mnemonic::ADD_Ib(target, b)), 0b001 => Ok(Mnemonic::OR_Ib(target, b)), 0b010 => Ok(Mnemonic::ADC_Ib(target, b)), 0b011 => Ok(Mnemonic::SBB_Ib(target, b)), 0b100 => Ok(Mnemonic::AND_Ib(target, b)), 0b101 => Ok(Mnemonic::SUB_Ib(target, b)), 0b110 => Ok(Mnemonic::XOR_Ib(target, b)), 0b111 => Ok(Mnemonic::CMP_Ib(target, b)), _ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)), }, Operand::Word(w) => match reg { 0b000 => Ok(Mnemonic::ADD_Iv(target, w)), 0b001 => Ok(Mnemonic::OR_Iv(target, w)), 0b010 => Ok(Mnemonic::ADC_Iv(target, w)), 0b011 => Ok(Mnemonic::SBB_Iv(target, w)), 0b100 => Ok(Mnemonic::AND_Iv(target, w)), 0b101 => Ok(Mnemonic::SUB_Iv(target, w)), 0b110 => Ok(Mnemonic::XOR_Iv(target, w)), 0b111 => Ok(Mnemonic::CMP_Iv(target, w)), _ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)), }, } } /// Match the ModRM `reg` bits to Intel Group 2-type instructions. Group 2 /// always only has a single operand, the other is either `1` or the `CL` /// register. /// This function assumes the operand to be `1`. /// See [`Self::modrm_reg_to_grp2_cl`] for the counter part. fn modrm_reg_to_grp2_1(reg: u8, target: ModRmTarget) -> Result { match reg { 0b000 => Ok(Mnemonic::ROL_b(target, 1)), 0b001 => Ok(Mnemonic::ROR_b(target, 1)), 0b010 => Ok(Mnemonic::RCL_b(target, 1)), 0b011 => Ok(Mnemonic::RCR_b(target, 1)), 0b100 => Ok(Mnemonic::SHL_b(target, 1)), 0b101 => Ok(Mnemonic::SHR_b(target, 1)), 0b110 => Ok(Mnemonic::SAR_b(target, 1)), 0b111 => Ok(Mnemonic::SAR_b(target, 1)), _ => return Err(DisasmError::IllegalGroupMnemonic(2, reg)), } } /// Match the ModRM `reg` bits to Intel Group 2-type instructions. Group 2 /// always only has a single operand, the other is either `1` or the `CL` /// register. /// This function assumes the operand to be [`Register::CL`]. /// See [`Self::modrm_reg_to_grp2_cl`] for the counter part. fn modrm_reg_to_grp2_cl(reg: u8, target: ModRmTarget) -> Result { match reg { 0b000 => Ok(Mnemonic::ROL_fromReg(target, Register::CL)), 0b001 => Ok(Mnemonic::ROR_fromReg(target, Register::CL)), 0b010 => Ok(Mnemonic::RCL_fromReg(target, Register::CL)), 0b011 => Ok(Mnemonic::RCR_fromReg(target, Register::CL)), 0b100 => Ok(Mnemonic::SHL_fromReg(target, Register::CL)), 0b101 => Ok(Mnemonic::SHR_fromReg(target, Register::CL)), 0b110 => Ok(Mnemonic::SAR_fromReg(target, Register::CL)), 0b111 => Ok(Mnemonic::SAR_fromReg(target, Register::CL)), _ => return Err(DisasmError::IllegalGroupMnemonic(2, reg)), } } /// Match the ModRM `reg` bits to Intel Group 3a/b-type instructions. /// Group 3 selects an unary mnemonic with the `reg` bit fiels. The operand /// is the [`ModRmTarget`]. fn modrm_reg_to_grp3( &mut self, reg: u8, target: ModRmTarget, width: Operand, ) -> Result { match reg { 0b000 => match width { Operand::Byte(_) => Ok(Mnemonic::TEST_Ib(target, self.parse_byte()?)), Operand::Word(_) => Ok(Mnemonic::TEST_Iv(target, self.parse_word()?)), }, // 0b001 => // unused 0b010 => Ok(Mnemonic::NOT(target)), 0b011 => Ok(Mnemonic::NEG(target)), 0b100 => Ok(Mnemonic::MUL(target)), 0b101 => Ok(Mnemonic::IMUL(target)), 0b110 => Ok(Mnemonic::DIV(target)), 0b111 => Ok(Mnemonic::IDIV(target)), _ => Err(DisasmError::IllegalGroupMnemonic(3, reg)), } } /// a.out pads the text section with 0x00 bytes. During parsing, these get /// interpreted as `0x00 0x00`, which have to get removed for an authentic /// disassembly. /// This is done in favor of removing all 0x00 bytes in the beginning, /// as this could remove an actual `0x00` byte as operand of the final /// real instruction. Of course, this could remove an actual `0x00 0x00` /// instruction from the end, but they would not have any effect on /// execution anyway. fn remove_trailing_padding(&mut self) { let mut until = self.instructions.len(); for i in self.instructions.iter().rev() { match i.opcode { // 0x00 0x00 in binary Mnemonic::ADD_FromReg( ModRmTarget::Memory(MemoryIndex { base: Some(Register::BX), index: Some(Register::SI), displacement: None, }), Register::AL, ) => until -= 1, // stop when another instruction is hit _ => break, } } log::debug!( "Truncated file by {} bytes by removing trailing padding bytes.", self.text.len() - until ); self.instructions.truncate(until); } /// Decode instructions by matching byte signature to their mnemonics and /// depending on the instruction, parsing some operands afterwards. /// All parsing is done in capsulated functions, here everything just /// gets consolodated. fn decode_instructions(&mut self) -> Result<(), DisasmError> { log::debug!("Starting to decode text of length {}", self.text.len()); while self.offset < self.text.len() { // reset mutable current instruction self.instruction = Instruction::new(); self.instruction.start = self.offset; // fetch next opcode let opcode = self.text[self.offset]; // additional raw bytes will be pushed by parse functions self.instruction.raw.push(opcode); log::debug!("Parsing next opcode with opcode: {opcode:#04}"); self.instruction.opcode = match opcode { 0x00 => modrm_8b_register!(self, ADD_FromReg), 0x01 => modrm_16b_register!(self, ADD_FromReg), 0x02 => modrm_8b_register!(self, ADD_ToReg), 0x03 => modrm_16b_register!(self, ADD_ToReg), 0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?), 0x05 => Mnemonic::ADD_AXIv(self.parse_word()?), 0x06 => Mnemonic::PUSH_S(SegmentRegister::ES), 0x07 => Mnemonic::POP_S(SegmentRegister::ES), 0x08 => modrm_8b_register!(self, OR_FromReg), 0x09 => modrm_16b_register!(self, OR_FromReg), 0x0A => modrm_8b_register!(self, OR_ToReg), 0x0B => modrm_16b_register!(self, OR_ToReg), 0x0C => Mnemonic::OR_ALIb(self.parse_byte()?), 0x0D => Mnemonic::OR_AXIv(self.parse_word()?), 0x0E => Mnemonic::PUSH_S(SegmentRegister::CS), 0x0F => return Err(DisasmError::OpcodeUndefined(opcode)), 0x10 => modrm_8b_register!(self, ADC_FromReg), 0x11 => modrm_16b_register!(self, ADC_FromReg), 0x12 => modrm_8b_register!(self, ADC_ToReg), 0x13 => modrm_16b_register!(self, ADC_ToReg), 0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?), 0x15 => Mnemonic::ADC_AXIv(self.parse_word()?), 0x16 => Mnemonic::PUSH_S(SegmentRegister::SS), 0x17 => Mnemonic::POP_S(SegmentRegister::SS), 0x18 => modrm_8b_register!(self, SBB_FromReg), 0x19 => modrm_16b_register!(self, SBB_FromReg), 0x1A => modrm_8b_register!(self, SBB_ToReg), 0x1B => modrm_16b_register!(self, SBB_ToReg), 0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?), 0x1D => Mnemonic::SBB_AXIv(self.parse_word()?), 0x1E => Mnemonic::PUSH_S(SegmentRegister::DS), 0x1F => Mnemonic::POP_S(SegmentRegister::DS), 0x20 => modrm_8b_register!(self, AND_FromReg), 0x21 => modrm_16b_register!(self, AND_FromReg), 0x22 => modrm_8b_register!(self, AND_ToReg), 0x23 => modrm_16b_register!(self, AND_ToReg), 0x24 => Mnemonic::AND_ALIb(self.parse_byte()?), 0x25 => Mnemonic::AND_AXIv(self.parse_word()?), 0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES), 0x27 => Mnemonic::DAA, 0x28 => modrm_8b_register!(self, SUB_FromReg), 0x29 => modrm_16b_register!(self, SUB_FromReg), 0x2A => modrm_8b_register!(self, SUB_ToReg), 0x2B => modrm_16b_register!(self, SUB_ToReg), 0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?), 0x2D => Mnemonic::SUB_AXIv(self.parse_word()?), 0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS), 0x2F => Mnemonic::DAS, 0x30 => modrm_8b_register!(self, XOR_FromReg), 0x31 => modrm_16b_register!(self, XOR_FromReg), 0x32 => modrm_8b_register!(self, XOR_ToReg), 0x33 => modrm_16b_register!(self, XOR_ToReg), 0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?), 0x35 => Mnemonic::XOR_AXIv(self.parse_word()?), 0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS), 0x37 => Mnemonic::AAA, 0x38 => modrm_8b_register!(self, CMP_FromReg), 0x39 => modrm_16b_register!(self, CMP_FromReg), 0x3A => modrm_8b_register!(self, CMP_ToReg), 0x3B => modrm_16b_register!(self, CMP_ToReg), 0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?), 0x3D => Mnemonic::CMP_AXIv(self.parse_word()?), 0x3E => Mnemonic::OVERRIDE(SegmentRegister::DS), 0x3F => Mnemonic::AAS, 0x40 => Mnemonic::INC_Reg(Register::AX), 0x41 => Mnemonic::INC_Reg(Register::CX), 0x42 => Mnemonic::INC_Reg(Register::DX), 0x43 => Mnemonic::INC_Reg(Register::BX), 0x44 => Mnemonic::INC_Reg(Register::SP), 0x45 => Mnemonic::INC_Reg(Register::BP), 0x46 => Mnemonic::INC_Reg(Register::SI), 0x47 => Mnemonic::INC_Reg(Register::DI), 0x48 => Mnemonic::DEC_Reg(Register::AX), 0x49 => Mnemonic::DEC_Reg(Register::CX), 0x4A => Mnemonic::DEC_Reg(Register::DX), 0x4B => Mnemonic::DEC_Reg(Register::BX), 0x4C => Mnemonic::DEC_Reg(Register::SP), 0x4D => Mnemonic::DEC_Reg(Register::BP), 0x4E => Mnemonic::DEC_Reg(Register::SI), 0x4F => Mnemonic::DEC_Reg(Register::DI), 0x50 => Mnemonic::PUSH_R(Register::AX), 0x51 => Mnemonic::PUSH_R(Register::CX), 0x52 => Mnemonic::PUSH_R(Register::DX), 0x53 => Mnemonic::PUSH_R(Register::BX), 0x54 => Mnemonic::PUSH_R(Register::SP), 0x55 => Mnemonic::PUSH_R(Register::BP), 0x56 => Mnemonic::PUSH_R(Register::SI), 0x57 => Mnemonic::PUSH_R(Register::DI), 0x58 => Mnemonic::POP_R(Register::AX), 0x59 => Mnemonic::POP_R(Register::CX), 0x5A => Mnemonic::POP_R(Register::DX), 0x5B => Mnemonic::POP_R(Register::BX), 0x5C => Mnemonic::POP_R(Register::SP), 0x5D => Mnemonic::POP_R(Register::BP), 0x5E => Mnemonic::POP_R(Register::SI), 0x5F => Mnemonic::POP_R(Register::DI), 0x60..=0x6F => return Err(DisasmError::OpcodeUndefined(opcode)), 0x70 => Mnemonic::JO(self.parse_j_byte()?), 0x71 => Mnemonic::JNO(self.parse_j_byte()?), 0x72 => Mnemonic::JB(self.parse_j_byte()?), 0x73 => Mnemonic::JNB(self.parse_j_byte()?), 0x74 => Mnemonic::JZ(self.parse_j_byte()?), 0x75 => Mnemonic::JNZ(self.parse_j_byte()?), 0x76 => Mnemonic::JBE(self.parse_j_byte()?), 0x77 => Mnemonic::JA(self.parse_j_byte()?), 0x78 => Mnemonic::JS(self.parse_j_byte()?), 0x79 => Mnemonic::JNS(self.parse_j_byte()?), 0x7A => Mnemonic::JPE(self.parse_j_byte()?), 0x7B => Mnemonic::JPO(self.parse_j_byte()?), 0x7C => Mnemonic::JL(self.parse_j_byte()?), 0x7D => Mnemonic::JGE(self.parse_j_byte()?), 0x7E => Mnemonic::JLE(self.parse_j_byte()?), 0x7F => Mnemonic::JG(self.parse_j_byte()?), // Group 1 0x80 => { let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; let imm = self.parse_byte()?; Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))? } 0x81 => { let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; let imm = self.parse_word()?; Self::modrm_reg_to_grp1(reg, target, Operand::Word(imm))? } 0x82 => { // same as 0x80 let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; let imm = self.parse_byte()?; Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))? } 0x83 => { // byte extended version let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; let imm = self.parse_byte()?; Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))? } 0x84 => modrm_8b_register!(self, TEST), 0x85 => modrm_16b_register!(self, TEST), 0x86 => modrm_8b_register!(self, XCHG), 0x87 => modrm_16b_register!(self, XCHG), 0x88 => modrm_8b_register!(self, MOV_FromReg), 0x89 => modrm_16b_register!(self, MOV_FromReg), 0x8A => modrm_8b_register!(self, MOV_ToReg), 0x8B => modrm_16b_register!(self, MOV_ToReg), 0x8C => modrm_sregister!(self, MOV_FromSReg), 0x8E => modrm_sregister!(self, MOV_ToSReg), 0x8D => modrm_16b_register!(self, LEA), 0x8F => { let (target, _) = self.parse_modrm_byte(Operand::Word(0))?; let mem = match target { ModRmTarget::Memory(idx) => idx, _ => { return Err(DisasmError::IllegalOperand( "POP (memory) instruction given a register to pop into".into(), )); } }; Mnemonic::POP_M(mem) } 0x90 => Mnemonic::NOP(), 0x91 => Mnemonic::XCHG_AX(Register::CX), 0x92 => Mnemonic::XCHG_AX(Register::DX), 0x93 => Mnemonic::XCHG_AX(Register::BX), 0x94 => Mnemonic::XCHG_AX(Register::SP), 0x95 => Mnemonic::XCHG_AX(Register::BP), 0x96 => Mnemonic::XCHG_AX(Register::SI), 0x97 => Mnemonic::XCHG_AX(Register::DI), 0x98 => Mnemonic::CBW, 0x99 => Mnemonic::CWD, 0x9A => Mnemonic::CALL_p(self.parse_ptr()?), 0x9B => Mnemonic::WAIT, 0x9C => Mnemonic::PUSHF, 0x9D => Mnemonic::POPF, 0x9E => Mnemonic::SAHF, 0x9F => Mnemonic::LAHF, 0xA0 => Mnemonic::MOV_AL0b(self.parse_byte()?), 0xA1 => Mnemonic::MOV_AX0v(self.parse_word()?), 0xA2 => Mnemonic::MOV_0bAL(self.parse_byte()?), 0xA3 => Mnemonic::MOV_0vAX(self.parse_word()?), 0xA4 => Mnemonic::MOVSB, 0xA5 => Mnemonic::MOVSW, 0xA6 => Mnemonic::CMPSB, 0xA7 => Mnemonic::CMPSW, 0xA8 => Mnemonic::TEST_ALIb(self.parse_byte()?), 0xA9 => Mnemonic::TEST_AXIv(self.parse_word()?), 0xAA => Mnemonic::STOSB, 0xAB => Mnemonic::STOSW, 0xAC => Mnemonic::LODSB, 0xAD => Mnemonic::LODSW, 0xAE => Mnemonic::SCASB, 0xAF => Mnemonic::SCASW, 0xB0 => Mnemonic::MOV_ALIb(self.parse_byte()?), 0xB1 => Mnemonic::MOV_CLIb(self.parse_byte()?), 0xB2 => Mnemonic::MOV_DLIb(self.parse_byte()?), 0xB3 => Mnemonic::MOV_BLIb(self.parse_byte()?), 0xB4 => Mnemonic::MOV_AHIb(self.parse_byte()?), 0xB5 => Mnemonic::MOV_CHIb(self.parse_byte()?), 0xB6 => Mnemonic::MOV_DHIb(self.parse_byte()?), 0xB7 => Mnemonic::MOV_BHIb(self.parse_byte()?), 0xB8 => Mnemonic::MOV_AXIv(self.parse_word()?), 0xB9 => Mnemonic::MOV_CXIv(self.parse_word()?), 0xBA => Mnemonic::MOV_DXIv(self.parse_word()?), 0xBB => Mnemonic::MOV_BXIv(self.parse_word()?), 0xBC => Mnemonic::MOV_SPIv(self.parse_word()?), 0xBD => Mnemonic::MOV_BPIv(self.parse_word()?), 0xBE => Mnemonic::MOV_SIIv(self.parse_word()?), 0xBF => Mnemonic::MOV_DIIv(self.parse_word()?), 0xC0..=0xC1 => return Err(DisasmError::OpcodeUndefined(opcode)), 0xC2 => Mnemonic::RET_Iw(self.parse_word()?), 0xC3 => Mnemonic::RET, 0xC4 => { let (target, reg_id) = self.parse_modrm_byte(Operand::Word(0))?; let reg = Register::by_id(Operand::Word(reg_id as Word))?; let ptr = Pointer16::try_from(target)?; Mnemonic::LES(reg, ptr) } 0xC5 => { let (target, reg_id) = self.parse_modrm_byte(Operand::Word(0))?; let reg = Register::by_id(Operand::Word(reg_id as Word))?; let ptr = Pointer16::try_from(target)?; Mnemonic::LDS(reg, ptr) } 0xC6 => { let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?; Mnemonic::MOV_Ib(target, self.parse_byte()?) } 0xC7 => { let (target, _) = self.parse_modrm_byte(Operand::Word(0))?; Mnemonic::MOV_Iv(target, self.parse_word()?) } 0xC8..=0xC9 => return Err(DisasmError::OpcodeUndefined(opcode)), 0xCA => Mnemonic::RETF_Iw(self.parse_word()?), 0xCB => Mnemonic::RETF, 0xCC => Mnemonic::INT(3), 0xCD => Mnemonic::INT(self.parse_byte()?), 0xCE => Mnemonic::INTO, 0xCF => Mnemonic::IRET, // Group 2 0xD0 => { let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; Self::modrm_reg_to_grp2_1(reg, target)? } 0xD1 => { let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; Self::modrm_reg_to_grp2_1(reg, target)? } 0xD2 => { let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; Self::modrm_reg_to_grp2_cl(reg, target)? } 0xD3 => { let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; Self::modrm_reg_to_grp2_cl(reg, target)? } 0xD4 => Mnemonic::AAM(self.parse_byte()?), 0xD5 => Mnemonic::AAD(self.parse_byte()?), 0xD6 => return Err(DisasmError::OpcodeUndefined(opcode)), 0xD7 => Mnemonic::XLAT, 0xD8..=0xDF => return Err(DisasmError::OpcodeUndefined(opcode)), 0xE0 => Mnemonic::LOOPNZ(self.parse_j_byte()?), 0xE1 => Mnemonic::LOOPZ(self.parse_j_byte()?), 0xE2 => Mnemonic::LOOP(self.parse_j_byte()?), 0xE3 => Mnemonic::JCXZ(self.parse_j_byte()?), 0xE4 => Mnemonic::IN_AL(self.parse_byte()?), 0xE5 => Mnemonic::IN_AX(self.parse_byte()?), 0xE6 => Mnemonic::OUT_AL(self.parse_byte()?), 0xE7 => Mnemonic::OUT_AX(self.parse_byte()?), 0xE8 => Mnemonic::CALL_v(self.parse_j_word()?), 0xE9 => Mnemonic::JMP_v(self.parse_j_word()?), 0xEA => Mnemonic::JMP_p(self.parse_ptr()?), 0xEB => Mnemonic::JMP_b(self.parse_j_byte()?), 0xEC => Mnemonic::IN_ALDX, 0xED => Mnemonic::IN_AXDX, 0xEE => Mnemonic::OUT_ALDX, 0xEF => Mnemonic::OUT_AXDX, 0xF1 => return Err(DisasmError::OpcodeUndefined(opcode)), 0xF2 => Mnemonic::REPNZ, 0xF3 => Mnemonic::REPZ, 0xF4 => Mnemonic::HLT, 0xF5 => Mnemonic::CMC, // Group 3a 0xF6 => { let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; self.modrm_reg_to_grp3(reg, target, Operand::Byte(0))? } // Group 3b 0xF7 => { let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; self.modrm_reg_to_grp3(reg, target, Operand::Word(0))? } 0xF8 => Mnemonic::CLC, 0xF9 => Mnemonic::STC, 0xFA => Mnemonic::CLI, 0xFB => Mnemonic::STI, 0xFC => Mnemonic::CLD, 0xFD => Mnemonic::STD, // Group 4 0xFE => { let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?; match reg { 0b0 => Mnemonic::INC_Mod(target), 0b1 => Mnemonic::DEC_Mod(target), _ => return Err(DisasmError::IllegalGroupMnemonic(4, reg)), } } // Group 5 0xFF => { let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?; match reg { 0b000 => Mnemonic::INC_Mod(target), 0b001 => Mnemonic::DEC_Mod(target), 0b010 => Mnemonic::CALL_Mod(target), 0b011 => Mnemonic::CALL_Mp(Pointer16::try_from(target)?), 0b100 => Mnemonic::JMP_Mod(target), 0b101 => Mnemonic::JMP_Mp(Pointer16::try_from(target)?), 0b110 => Mnemonic::PUSH_Mod(target), // 0b111 => unused _ => return Err(DisasmError::IllegalGroupMnemonic(5, reg)), } } _ => { eprintln!("Encountered unknown instruction '0x{:x}'", opcode); eprintln!("Offset might be misaligned and data is being interpreted."); eprintln!("Existing to avoid further misinterpretation..."); exit(1); } }; // Save parsed instruction log::debug!("{}", self.instruction); self.instructions.push(self.instruction.clone()); // Advance offset to hover the next potential opcode self.offset += 1; } Ok(()) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_basic() { let text = Vec::from([0x0, 0x0]); let mut disassembler = Disassembler { offset: 0, text, instruction: Instruction::new(), instructions: Vec::new(), }; disassembler.decode_instructions().unwrap(); let instructions = disassembler.instructions; assert_eq!( instructions[0], Instruction { start: 0, raw: Vec::from([0, 0]), opcode: Mnemonic::ADD_FromReg( ModRmTarget::Memory(MemoryIndex { base: Some(Register::BX), index: Some(Register::SI), displacement: None }), Register::AL ) } ) } }