fix: align pointer parsing with spec

Previously pointer parsing was completely wrong.
Now split into Pointer32 for immediates with
segment;offset and Pointer16 for short jumps, which
use DS or ES as segment and the Pointer16 value
as offset.
This commit is contained in:
2025-05-28 13:28:35 +09:00
parent 0893969f4e
commit c396d33f76
3 changed files with 71 additions and 37 deletions

View File

@@ -2,7 +2,8 @@
use crate::aout::Aout; use crate::aout::Aout;
use crate::operands::{ use crate::operands::{
Byte, DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer, Word, Byte, DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer16,
Pointer32, Word,
}; };
use crate::register::{Register, RegisterId, SegmentRegister}; use crate::register::{Register, RegisterId, SegmentRegister};
use crate::{ use crate::{
@@ -205,29 +206,20 @@ impl Disassembler {
/// Just a wrapper for parsing a byte 4 types and constructing a pointer /// Just a wrapper for parsing a byte 4 types and constructing a pointer
/// type. /// type.
/// Returns the read pointer (Intel p operand). /// Returns the read pointer (Intel p operand).
fn parse_ptr(&mut self) -> Result<Pointer, DisasmError> { fn parse_ptr(&mut self) -> Result<Pointer32, DisasmError> {
log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset); log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset);
let byte0 = self.parse_byte()?; let byte0 = self.parse_byte()?;
let byte1 = self.parse_byte()?; let byte1 = self.parse_byte()?;
let byte2 = self.parse_byte()?; let byte2 = self.parse_byte()?;
let byte3 = self.parse_byte()?; let byte3 = self.parse_byte()?;
Ok(Pointer { Ok(Pointer32 {
raw: DWord::from_le_bytes([byte0, byte1, byte2, byte3]), raw: DWord::from_le_bytes([byte0, byte1, byte2, byte3]),
segment: Word::from_le_bytes([byte0, byte1]), segment: Word::from_le_bytes([byte2, byte3]),
offset: Word::from_le_bytes([byte2, byte3]), offset: Word::from_le_bytes([byte0, byte1]),
}) })
} }
/// Parse an Mp Operand (Memory Pointer).
/// An Mp is a ModRM byte with the `reg` bits ignored and an additional
/// 2 [`Word`]s parsed for a [`Pointer`] type.
fn parse_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> {
let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?;
let ptr = self.parse_ptr()?;
Ok((target, ptr))
}
/// Parse a single ModRM byte, calculate the [`ModRmTarget`] (Memory or /// Parse a single ModRM byte, calculate the [`ModRmTarget`] (Memory or
/// Register) from that byte and advance the offset. /// Register) from that byte and advance the offset.
/// It is always just a single byte, even for word-width instructions. /// It is always just a single byte, even for word-width instructions.
@@ -289,7 +281,7 @@ impl Disassembler {
let target = match register_width { let target = match register_width {
Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))?), Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))?),
Operand::Word(_) => { Operand::Word(_) => {
ModRmTarget::Register(Register::by_id(Operand::Word(rm.into()))?) ModRmTarget::Register(Register::by_id(Operand::Word(rm as Word))?)
} }
}; };
return Ok((target, reg)); return Ok((target, reg));
@@ -491,6 +483,7 @@ impl Disassembler {
// additional raw bytes will be pushed by parse functions // additional raw bytes will be pushed by parse functions
self.instruction.raw.push(opcode); self.instruction.raw.push(opcode);
log::debug!("Parsing next opcode with opcode: {opcode:#04}");
self.instruction.opcode = match opcode { self.instruction.opcode = match opcode {
0x00 => modrm_8b_register!(self, ADD_FromReg), 0x00 => modrm_8b_register!(self, ADD_FromReg),
0x01 => modrm_16b_register!(self, ADD_FromReg), 0x01 => modrm_16b_register!(self, ADD_FromReg),
@@ -744,12 +737,16 @@ impl Disassembler {
0xC3 => Mnemonic::RET, 0xC3 => Mnemonic::RET,
0xC4 => { 0xC4 => {
let (target, ptr) = self.parse_mp()?; let (target, reg_id) = self.parse_modrm_byte(Operand::Word(0))?;
Mnemonic::LES(target, ptr) let reg = Register::by_id(Operand::Word(reg_id as Word))?;
let ptr = Pointer16::try_from(target)?;
Mnemonic::LES(reg, ptr)
} }
0xC5 => { 0xC5 => {
let (target, ptr) = self.parse_mp()?; let (target, reg_id) = self.parse_modrm_byte(Operand::Word(0))?;
Mnemonic::LDS(target, ptr) let reg = Register::by_id(Operand::Word(reg_id as Word))?;
let ptr = Pointer16::try_from(target)?;
Mnemonic::LDS(reg, ptr)
} }
0xC6 => { 0xC6 => {
@@ -865,9 +862,9 @@ impl Disassembler {
0b000 => Mnemonic::INC_Mod(target), 0b000 => Mnemonic::INC_Mod(target),
0b001 => Mnemonic::DEC_Mod(target), 0b001 => Mnemonic::DEC_Mod(target),
0b010 => Mnemonic::CALL_Mod(target), 0b010 => Mnemonic::CALL_Mod(target),
0b011 => Mnemonic::CALL_Mp(target, self.parse_ptr()?), 0b011 => Mnemonic::CALL_Mp(Pointer16::try_from(target)?),
0b100 => Mnemonic::JMP_Mod(target), 0b100 => Mnemonic::JMP_Mod(target),
0b101 => Mnemonic::JMP_Mp(target, self.parse_ptr()?), 0b101 => Mnemonic::JMP_Mp(Pointer16::try_from(target)?),
0b110 => Mnemonic::PUSH_Mod(target), 0b110 => Mnemonic::PUSH_Mod(target),
// 0b111 => unused // 0b111 => unused
_ => return Err(DisasmError::IllegalGroupMnemonic(5, reg)), _ => return Err(DisasmError::IllegalGroupMnemonic(5, reg)),

View File

@@ -1,7 +1,7 @@
//! Internal abstraction of the 8086 instructions. //! Internal abstraction of the 8086 instructions.
use crate::{ use crate::{
operands::{Byte, MemoryIndex, ModRmTarget, Pointer, Word}, operands::{Byte, MemoryIndex, ModRmTarget, Pointer16, Pointer32, Word},
register::{Register, SegmentRegister}, register::{Register, SegmentRegister},
}; };
use core::fmt; use core::fmt;
@@ -202,16 +202,16 @@ pub enum Mnemonic {
CBW, CBW,
CWD, CWD,
// CALL // CALL
CALL_p(Pointer), CALL_p(Pointer32),
CALL_v(isize), CALL_v(isize),
CALL_Mod(ModRmTarget), CALL_Mod(ModRmTarget),
CALL_Mp(ModRmTarget, Pointer), CALL_Mp(Pointer16),
// JUMP // JUMP
JMP_p(Pointer), JMP_p(Pointer32),
JMP_b(isize), // parses IByte, but stores as isize JMP_b(isize), // parses IByte, but stores as isize
JMP_v(isize), // parwses IWord, but stores as isize JMP_v(isize), // parwses IWord, but stores as isize
JMP_Mod(ModRmTarget), JMP_Mod(ModRmTarget),
JMP_Mp(ModRmTarget, Pointer), JMP_Mp(Pointer16),
// WAIT // WAIT
WAIT, WAIT,
// Push/Pop Flags // Push/Pop Flags
@@ -237,8 +237,8 @@ pub enum Mnemonic {
RETF, RETF,
IRET, IRET,
// Load ES/DS Register // Load ES/DS Register
LES(ModRmTarget, Pointer), LES(Register, Pointer16),
LDS(ModRmTarget, Pointer), LDS(Register, Pointer16),
// NOT // NOT
NOT(ModRmTarget), NOT(ModRmTarget),
// NEG // NEG
@@ -444,13 +444,13 @@ impl fmt::Display for Mnemonic {
Self::CALL_p(ptr) => write!(f, "call {ptr}"), Self::CALL_p(ptr) => write!(f, "call {ptr}"),
Self::CALL_v(word) => write!(f, "call {word:#04x}"), Self::CALL_v(word) => write!(f, "call {word:#04x}"),
Self::CALL_Mod(target) => write!(f, "call {target}"), Self::CALL_Mod(target) => write!(f, "call {target}"),
Self::CALL_Mp(target, ptr) => write!(f, "call {target}, {ptr}"), Self::CALL_Mp(ptr) => write!(f, "call {ptr}"),
Self::JMP_p(ptr) => write!(f, "jmp {ptr}"), Self::JMP_p(ptr) => write!(f, "jmp {ptr}"),
Self::JMP_b(byte) => write!(f, "jmp {byte:#04x}"), Self::JMP_b(byte) => write!(f, "jmp {byte:#04x}"),
Self::JMP_v(word) => write!(f, "jmp {word:#04x}"), Self::JMP_v(word) => write!(f, "jmp {word:#04x}"),
Self::JMP_Mod(target) => write!(f, "jmp {target}"), Self::JMP_Mod(target) => write!(f, "jmp {target}"),
Self::JMP_Mp(target, ptr) => write!(f, "jmp {target}, {ptr}"), Self::JMP_Mp(ptr) => write!(f, "jmp {ptr}"),
Self::WAIT => write!(f, "wait"), Self::WAIT => write!(f, "wait"),
@@ -476,8 +476,8 @@ impl fmt::Display for Mnemonic {
Self::RETF => write!(f, "retf"), Self::RETF => write!(f, "retf"),
Self::IRET => write!(f, "iret"), Self::IRET => write!(f, "iret"),
Self::LES(target, ptr) => write!(f, "les {target}, {ptr}"), Self::LES(reg, mem) => write!(f, "les {reg}, ptr {mem}"),
Self::LDS(target, ptr) => write!(f, "lds {target}, {ptr}"), Self::LDS(reg, mem) => write!(f, "lds {reg}, ptr {mem}"),
Self::NOT(target) => write!(f, "not {target}"), Self::NOT(target) => write!(f, "not {target}"),
Self::NEG(target) => write!(f, "neg {target}"), Self::NEG(target) => write!(f, "neg {target}"),

View File

@@ -1,7 +1,7 @@
//! All types which a Mnemonic can have as some kind of operand. //! All types which a Mnemonic can have as some kind of operand.
//! This includes things such as immediates, ModRM byte targets, etc. etc. //! This includes things such as immediates, ModRM byte targets, etc. etc.
use crate::register::Register; use crate::{disasm::DisasmError, register::Register};
use core::fmt; use core::fmt;
pub type Byte = u8; // b pub type Byte = u8; // b
@@ -131,15 +131,52 @@ impl fmt::Display for MemoryIndex {
} }
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
/// 32-bit segment:offset pointer (e.g. for CALL instruction) /// 16-bit pointer for access, usually with a [`SegmentRegister`] as segment
pub struct Pointer { /// and [`Pointer16`] as offset.
pub struct Pointer16 {
pub word: Word,
}
impl std::fmt::Display for Pointer16 {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "ptr [{:#04x}]", self.word)
}
}
impl TryFrom<ModRmTarget> for Pointer16 {
type Error = DisasmError;
fn try_from(target: ModRmTarget) -> Result<Self, Self::Error> {
match target {
ModRmTarget::Memory(mem) => match mem.displacement {
Some(disp) => match disp {
Displacement::IWord(word) => Ok(Pointer16 { word: word as Word }),
_ => {
return Err(DisasmError::IllegalOperand("Word expected".into()));
}
},
_ => {
return Err(DisasmError::IllegalOperand("Displacement expected".into()));
}
},
_ => {
return Err(DisasmError::IllegalOperand("MemoryIndex expected".into()));
}
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
/// 32-bit segment:offset pointer for long jumps.
/// Both [`Word`]s are immediately encoded after the instruction
pub struct Pointer32 {
pub raw: DWord, pub raw: DWord,
pub segment: Word, pub segment: Word,
pub offset: Word, pub offset: Word,
} }
impl std::fmt::Display for Pointer { impl std::fmt::Display for Pointer32 {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "[{:#04x}] ({}:{})", self.raw, self.segment, self.offset) write!(f, "{:#04x}:{:#04x}", self.segment, self.offset)
} }
} }