fix: align pointer parsing with spec

Previously pointer parsing was completely wrong.
Now split into Pointer32 for immediates with
segment;offset and Pointer16 for short jumps, which
use DS or ES as segment and the Pointer16 value
as offset.
This commit is contained in:
2025-05-28 13:28:35 +09:00
parent 0893969f4e
commit c396d33f76
3 changed files with 71 additions and 37 deletions

View File

@@ -2,7 +2,8 @@
use crate::aout::Aout;
use crate::operands::{
Byte, DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer, Word,
Byte, DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer16,
Pointer32, Word,
};
use crate::register::{Register, RegisterId, SegmentRegister};
use crate::{
@@ -205,29 +206,20 @@ impl Disassembler {
/// Just a wrapper for parsing a byte 4 types and constructing a pointer
/// type.
/// Returns the read pointer (Intel p operand).
fn parse_ptr(&mut self) -> Result<Pointer, DisasmError> {
fn parse_ptr(&mut self) -> Result<Pointer32, DisasmError> {
log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset);
let byte0 = self.parse_byte()?;
let byte1 = self.parse_byte()?;
let byte2 = self.parse_byte()?;
let byte3 = self.parse_byte()?;
Ok(Pointer {
Ok(Pointer32 {
raw: DWord::from_le_bytes([byte0, byte1, byte2, byte3]),
segment: Word::from_le_bytes([byte0, byte1]),
offset: Word::from_le_bytes([byte2, byte3]),
segment: Word::from_le_bytes([byte2, byte3]),
offset: Word::from_le_bytes([byte0, byte1]),
})
}
/// Parse an Mp Operand (Memory Pointer).
/// An Mp is a ModRM byte with the `reg` bits ignored and an additional
/// 2 [`Word`]s parsed for a [`Pointer`] type.
fn parse_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> {
let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?;
let ptr = self.parse_ptr()?;
Ok((target, ptr))
}
/// Parse a single ModRM byte, calculate the [`ModRmTarget`] (Memory or
/// Register) from that byte and advance the offset.
/// It is always just a single byte, even for word-width instructions.
@@ -289,7 +281,7 @@ impl Disassembler {
let target = match register_width {
Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))?),
Operand::Word(_) => {
ModRmTarget::Register(Register::by_id(Operand::Word(rm.into()))?)
ModRmTarget::Register(Register::by_id(Operand::Word(rm as Word))?)
}
};
return Ok((target, reg));
@@ -491,6 +483,7 @@ impl Disassembler {
// additional raw bytes will be pushed by parse functions
self.instruction.raw.push(opcode);
log::debug!("Parsing next opcode with opcode: {opcode:#04}");
self.instruction.opcode = match opcode {
0x00 => modrm_8b_register!(self, ADD_FromReg),
0x01 => modrm_16b_register!(self, ADD_FromReg),
@@ -744,12 +737,16 @@ impl Disassembler {
0xC3 => Mnemonic::RET,
0xC4 => {
let (target, ptr) = self.parse_mp()?;
Mnemonic::LES(target, ptr)
let (target, reg_id) = self.parse_modrm_byte(Operand::Word(0))?;
let reg = Register::by_id(Operand::Word(reg_id as Word))?;
let ptr = Pointer16::try_from(target)?;
Mnemonic::LES(reg, ptr)
}
0xC5 => {
let (target, ptr) = self.parse_mp()?;
Mnemonic::LDS(target, ptr)
let (target, reg_id) = self.parse_modrm_byte(Operand::Word(0))?;
let reg = Register::by_id(Operand::Word(reg_id as Word))?;
let ptr = Pointer16::try_from(target)?;
Mnemonic::LDS(reg, ptr)
}
0xC6 => {
@@ -865,9 +862,9 @@ impl Disassembler {
0b000 => Mnemonic::INC_Mod(target),
0b001 => Mnemonic::DEC_Mod(target),
0b010 => Mnemonic::CALL_Mod(target),
0b011 => Mnemonic::CALL_Mp(target, self.parse_ptr()?),
0b011 => Mnemonic::CALL_Mp(Pointer16::try_from(target)?),
0b100 => Mnemonic::JMP_Mod(target),
0b101 => Mnemonic::JMP_Mp(target, self.parse_ptr()?),
0b101 => Mnemonic::JMP_Mp(Pointer16::try_from(target)?),
0b110 => Mnemonic::PUSH_Mod(target),
// 0b111 => unused
_ => return Err(DisasmError::IllegalGroupMnemonic(5, reg)),