From c396d33f76e73961e9a575161f4544d7c3e87b03 Mon Sep 17 00:00:00 2001 From: Marco Thomas Date: Wed, 28 May 2025 13:28:35 +0900 Subject: [PATCH] fix: align pointer parsing with spec Previously pointer parsing was completely wrong. Now split into Pointer32 for immediates with segment;offset and Pointer16 for short jumps, which use DS or ES as segment and the Pointer16 value as offset. --- src/disasm.rs | 39 +++++++++++++++++-------------------- src/instructions.rs | 22 ++++++++++----------- src/operands.rs | 47 ++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 71 insertions(+), 37 deletions(-) diff --git a/src/disasm.rs b/src/disasm.rs index 3c94964..cf90aad 100644 --- a/src/disasm.rs +++ b/src/disasm.rs @@ -2,7 +2,8 @@ use crate::aout::Aout; use crate::operands::{ - Byte, DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer, Word, + Byte, DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer16, + Pointer32, Word, }; use crate::register::{Register, RegisterId, SegmentRegister}; use crate::{ @@ -205,29 +206,20 @@ impl Disassembler { /// Just a wrapper for parsing a byte 4 types and constructing a pointer /// type. /// Returns the read pointer (Intel p operand). - fn parse_ptr(&mut self) -> Result { + fn parse_ptr(&mut self) -> Result { log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset); let byte0 = self.parse_byte()?; let byte1 = self.parse_byte()?; let byte2 = self.parse_byte()?; let byte3 = self.parse_byte()?; - Ok(Pointer { + Ok(Pointer32 { raw: DWord::from_le_bytes([byte0, byte1, byte2, byte3]), - segment: Word::from_le_bytes([byte0, byte1]), - offset: Word::from_le_bytes([byte2, byte3]), + segment: Word::from_le_bytes([byte2, byte3]), + offset: Word::from_le_bytes([byte0, byte1]), }) } - /// Parse an Mp Operand (Memory Pointer). - /// An Mp is a ModRM byte with the `reg` bits ignored and an additional - /// 2 [`Word`]s parsed for a [`Pointer`] type. - fn parse_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> { - let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?; - let ptr = self.parse_ptr()?; - Ok((target, ptr)) - } - /// Parse a single ModRM byte, calculate the [`ModRmTarget`] (Memory or /// Register) from that byte and advance the offset. /// It is always just a single byte, even for word-width instructions. @@ -289,7 +281,7 @@ impl Disassembler { let target = match register_width { Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))?), Operand::Word(_) => { - ModRmTarget::Register(Register::by_id(Operand::Word(rm.into()))?) + ModRmTarget::Register(Register::by_id(Operand::Word(rm as Word))?) } }; return Ok((target, reg)); @@ -491,6 +483,7 @@ impl Disassembler { // additional raw bytes will be pushed by parse functions self.instruction.raw.push(opcode); + log::debug!("Parsing next opcode with opcode: {opcode:#04}"); self.instruction.opcode = match opcode { 0x00 => modrm_8b_register!(self, ADD_FromReg), 0x01 => modrm_16b_register!(self, ADD_FromReg), @@ -744,12 +737,16 @@ impl Disassembler { 0xC3 => Mnemonic::RET, 0xC4 => { - let (target, ptr) = self.parse_mp()?; - Mnemonic::LES(target, ptr) + let (target, reg_id) = self.parse_modrm_byte(Operand::Word(0))?; + let reg = Register::by_id(Operand::Word(reg_id as Word))?; + let ptr = Pointer16::try_from(target)?; + Mnemonic::LES(reg, ptr) } 0xC5 => { - let (target, ptr) = self.parse_mp()?; - Mnemonic::LDS(target, ptr) + let (target, reg_id) = self.parse_modrm_byte(Operand::Word(0))?; + let reg = Register::by_id(Operand::Word(reg_id as Word))?; + let ptr = Pointer16::try_from(target)?; + Mnemonic::LDS(reg, ptr) } 0xC6 => { @@ -865,9 +862,9 @@ impl Disassembler { 0b000 => Mnemonic::INC_Mod(target), 0b001 => Mnemonic::DEC_Mod(target), 0b010 => Mnemonic::CALL_Mod(target), - 0b011 => Mnemonic::CALL_Mp(target, self.parse_ptr()?), + 0b011 => Mnemonic::CALL_Mp(Pointer16::try_from(target)?), 0b100 => Mnemonic::JMP_Mod(target), - 0b101 => Mnemonic::JMP_Mp(target, self.parse_ptr()?), + 0b101 => Mnemonic::JMP_Mp(Pointer16::try_from(target)?), 0b110 => Mnemonic::PUSH_Mod(target), // 0b111 => unused _ => return Err(DisasmError::IllegalGroupMnemonic(5, reg)), diff --git a/src/instructions.rs b/src/instructions.rs index 8badb48..32bcdfb 100644 --- a/src/instructions.rs +++ b/src/instructions.rs @@ -1,7 +1,7 @@ //! Internal abstraction of the 8086 instructions. use crate::{ - operands::{Byte, MemoryIndex, ModRmTarget, Pointer, Word}, + operands::{Byte, MemoryIndex, ModRmTarget, Pointer16, Pointer32, Word}, register::{Register, SegmentRegister}, }; use core::fmt; @@ -202,16 +202,16 @@ pub enum Mnemonic { CBW, CWD, // CALL - CALL_p(Pointer), + CALL_p(Pointer32), CALL_v(isize), CALL_Mod(ModRmTarget), - CALL_Mp(ModRmTarget, Pointer), + CALL_Mp(Pointer16), // JUMP - JMP_p(Pointer), + JMP_p(Pointer32), JMP_b(isize), // parses IByte, but stores as isize JMP_v(isize), // parwses IWord, but stores as isize JMP_Mod(ModRmTarget), - JMP_Mp(ModRmTarget, Pointer), + JMP_Mp(Pointer16), // WAIT WAIT, // Push/Pop Flags @@ -237,8 +237,8 @@ pub enum Mnemonic { RETF, IRET, // Load ES/DS Register - LES(ModRmTarget, Pointer), - LDS(ModRmTarget, Pointer), + LES(Register, Pointer16), + LDS(Register, Pointer16), // NOT NOT(ModRmTarget), // NEG @@ -444,13 +444,13 @@ impl fmt::Display for Mnemonic { Self::CALL_p(ptr) => write!(f, "call {ptr}"), Self::CALL_v(word) => write!(f, "call {word:#04x}"), Self::CALL_Mod(target) => write!(f, "call {target}"), - Self::CALL_Mp(target, ptr) => write!(f, "call {target}, {ptr}"), + Self::CALL_Mp(ptr) => write!(f, "call {ptr}"), Self::JMP_p(ptr) => write!(f, "jmp {ptr}"), Self::JMP_b(byte) => write!(f, "jmp {byte:#04x}"), Self::JMP_v(word) => write!(f, "jmp {word:#04x}"), Self::JMP_Mod(target) => write!(f, "jmp {target}"), - Self::JMP_Mp(target, ptr) => write!(f, "jmp {target}, {ptr}"), + Self::JMP_Mp(ptr) => write!(f, "jmp {ptr}"), Self::WAIT => write!(f, "wait"), @@ -476,8 +476,8 @@ impl fmt::Display for Mnemonic { Self::RETF => write!(f, "retf"), Self::IRET => write!(f, "iret"), - Self::LES(target, ptr) => write!(f, "les {target}, {ptr}"), - Self::LDS(target, ptr) => write!(f, "lds {target}, {ptr}"), + Self::LES(reg, mem) => write!(f, "les {reg}, ptr {mem}"), + Self::LDS(reg, mem) => write!(f, "lds {reg}, ptr {mem}"), Self::NOT(target) => write!(f, "not {target}"), Self::NEG(target) => write!(f, "neg {target}"), diff --git a/src/operands.rs b/src/operands.rs index 529ba07..d5f2718 100644 --- a/src/operands.rs +++ b/src/operands.rs @@ -1,7 +1,7 @@ //! All types which a Mnemonic can have as some kind of operand. //! This includes things such as immediates, ModRM byte targets, etc. etc. -use crate::register::Register; +use crate::{disasm::DisasmError, register::Register}; use core::fmt; pub type Byte = u8; // b @@ -131,15 +131,52 @@ impl fmt::Display for MemoryIndex { } #[derive(Debug, Clone, PartialEq, Eq)] -/// 32-bit segment:offset pointer (e.g. for CALL instruction) -pub struct Pointer { +/// 16-bit pointer for access, usually with a [`SegmentRegister`] as segment +/// and [`Pointer16`] as offset. +pub struct Pointer16 { + pub word: Word, +} + +impl std::fmt::Display for Pointer16 { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "ptr [{:#04x}]", self.word) + } +} + +impl TryFrom for Pointer16 { + type Error = DisasmError; + + fn try_from(target: ModRmTarget) -> Result { + match target { + ModRmTarget::Memory(mem) => match mem.displacement { + Some(disp) => match disp { + Displacement::IWord(word) => Ok(Pointer16 { word: word as Word }), + _ => { + return Err(DisasmError::IllegalOperand("Word expected".into())); + } + }, + _ => { + return Err(DisasmError::IllegalOperand("Displacement expected".into())); + } + }, + _ => { + return Err(DisasmError::IllegalOperand("MemoryIndex expected".into())); + } + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +/// 32-bit segment:offset pointer for long jumps. +/// Both [`Word`]s are immediately encoded after the instruction +pub struct Pointer32 { pub raw: DWord, pub segment: Word, pub offset: Word, } -impl std::fmt::Display for Pointer { +impl std::fmt::Display for Pointer32 { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "[{:#04x}] ({}:{})", self.raw, self.segment, self.offset) + write!(f, "{:#04x}:{:#04x}", self.segment, self.offset) } }