fix: align pointer parsing with spec
Previously pointer parsing was completely wrong. Now split into Pointer32 for immediates with segment;offset and Pointer16 for short jumps, which use DS or ES as segment and the Pointer16 value as offset.
This commit is contained in:
@@ -2,7 +2,8 @@
|
|||||||
|
|
||||||
use crate::aout::Aout;
|
use crate::aout::Aout;
|
||||||
use crate::operands::{
|
use crate::operands::{
|
||||||
Byte, DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer, Word,
|
Byte, DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer16,
|
||||||
|
Pointer32, Word,
|
||||||
};
|
};
|
||||||
use crate::register::{Register, RegisterId, SegmentRegister};
|
use crate::register::{Register, RegisterId, SegmentRegister};
|
||||||
use crate::{
|
use crate::{
|
||||||
@@ -205,29 +206,20 @@ impl Disassembler {
|
|||||||
/// Just a wrapper for parsing a byte 4 types and constructing a pointer
|
/// Just a wrapper for parsing a byte 4 types and constructing a pointer
|
||||||
/// type.
|
/// type.
|
||||||
/// Returns the read pointer (Intel p operand).
|
/// Returns the read pointer (Intel p operand).
|
||||||
fn parse_ptr(&mut self) -> Result<Pointer, DisasmError> {
|
fn parse_ptr(&mut self) -> Result<Pointer32, DisasmError> {
|
||||||
log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset);
|
log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset);
|
||||||
let byte0 = self.parse_byte()?;
|
let byte0 = self.parse_byte()?;
|
||||||
let byte1 = self.parse_byte()?;
|
let byte1 = self.parse_byte()?;
|
||||||
let byte2 = self.parse_byte()?;
|
let byte2 = self.parse_byte()?;
|
||||||
let byte3 = self.parse_byte()?;
|
let byte3 = self.parse_byte()?;
|
||||||
|
|
||||||
Ok(Pointer {
|
Ok(Pointer32 {
|
||||||
raw: DWord::from_le_bytes([byte0, byte1, byte2, byte3]),
|
raw: DWord::from_le_bytes([byte0, byte1, byte2, byte3]),
|
||||||
segment: Word::from_le_bytes([byte0, byte1]),
|
segment: Word::from_le_bytes([byte2, byte3]),
|
||||||
offset: Word::from_le_bytes([byte2, byte3]),
|
offset: Word::from_le_bytes([byte0, byte1]),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse an Mp Operand (Memory Pointer).
|
|
||||||
/// An Mp is a ModRM byte with the `reg` bits ignored and an additional
|
|
||||||
/// 2 [`Word`]s parsed for a [`Pointer`] type.
|
|
||||||
fn parse_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> {
|
|
||||||
let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?;
|
|
||||||
let ptr = self.parse_ptr()?;
|
|
||||||
Ok((target, ptr))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Parse a single ModRM byte, calculate the [`ModRmTarget`] (Memory or
|
/// Parse a single ModRM byte, calculate the [`ModRmTarget`] (Memory or
|
||||||
/// Register) from that byte and advance the offset.
|
/// Register) from that byte and advance the offset.
|
||||||
/// It is always just a single byte, even for word-width instructions.
|
/// It is always just a single byte, even for word-width instructions.
|
||||||
@@ -289,7 +281,7 @@ impl Disassembler {
|
|||||||
let target = match register_width {
|
let target = match register_width {
|
||||||
Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))?),
|
Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))?),
|
||||||
Operand::Word(_) => {
|
Operand::Word(_) => {
|
||||||
ModRmTarget::Register(Register::by_id(Operand::Word(rm.into()))?)
|
ModRmTarget::Register(Register::by_id(Operand::Word(rm as Word))?)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
return Ok((target, reg));
|
return Ok((target, reg));
|
||||||
@@ -491,6 +483,7 @@ impl Disassembler {
|
|||||||
// additional raw bytes will be pushed by parse functions
|
// additional raw bytes will be pushed by parse functions
|
||||||
self.instruction.raw.push(opcode);
|
self.instruction.raw.push(opcode);
|
||||||
|
|
||||||
|
log::debug!("Parsing next opcode with opcode: {opcode:#04}");
|
||||||
self.instruction.opcode = match opcode {
|
self.instruction.opcode = match opcode {
|
||||||
0x00 => modrm_8b_register!(self, ADD_FromReg),
|
0x00 => modrm_8b_register!(self, ADD_FromReg),
|
||||||
0x01 => modrm_16b_register!(self, ADD_FromReg),
|
0x01 => modrm_16b_register!(self, ADD_FromReg),
|
||||||
@@ -744,12 +737,16 @@ impl Disassembler {
|
|||||||
0xC3 => Mnemonic::RET,
|
0xC3 => Mnemonic::RET,
|
||||||
|
|
||||||
0xC4 => {
|
0xC4 => {
|
||||||
let (target, ptr) = self.parse_mp()?;
|
let (target, reg_id) = self.parse_modrm_byte(Operand::Word(0))?;
|
||||||
Mnemonic::LES(target, ptr)
|
let reg = Register::by_id(Operand::Word(reg_id as Word))?;
|
||||||
|
let ptr = Pointer16::try_from(target)?;
|
||||||
|
Mnemonic::LES(reg, ptr)
|
||||||
}
|
}
|
||||||
0xC5 => {
|
0xC5 => {
|
||||||
let (target, ptr) = self.parse_mp()?;
|
let (target, reg_id) = self.parse_modrm_byte(Operand::Word(0))?;
|
||||||
Mnemonic::LDS(target, ptr)
|
let reg = Register::by_id(Operand::Word(reg_id as Word))?;
|
||||||
|
let ptr = Pointer16::try_from(target)?;
|
||||||
|
Mnemonic::LDS(reg, ptr)
|
||||||
}
|
}
|
||||||
|
|
||||||
0xC6 => {
|
0xC6 => {
|
||||||
@@ -865,9 +862,9 @@ impl Disassembler {
|
|||||||
0b000 => Mnemonic::INC_Mod(target),
|
0b000 => Mnemonic::INC_Mod(target),
|
||||||
0b001 => Mnemonic::DEC_Mod(target),
|
0b001 => Mnemonic::DEC_Mod(target),
|
||||||
0b010 => Mnemonic::CALL_Mod(target),
|
0b010 => Mnemonic::CALL_Mod(target),
|
||||||
0b011 => Mnemonic::CALL_Mp(target, self.parse_ptr()?),
|
0b011 => Mnemonic::CALL_Mp(Pointer16::try_from(target)?),
|
||||||
0b100 => Mnemonic::JMP_Mod(target),
|
0b100 => Mnemonic::JMP_Mod(target),
|
||||||
0b101 => Mnemonic::JMP_Mp(target, self.parse_ptr()?),
|
0b101 => Mnemonic::JMP_Mp(Pointer16::try_from(target)?),
|
||||||
0b110 => Mnemonic::PUSH_Mod(target),
|
0b110 => Mnemonic::PUSH_Mod(target),
|
||||||
// 0b111 => unused
|
// 0b111 => unused
|
||||||
_ => return Err(DisasmError::IllegalGroupMnemonic(5, reg)),
|
_ => return Err(DisasmError::IllegalGroupMnemonic(5, reg)),
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
//! Internal abstraction of the 8086 instructions.
|
//! Internal abstraction of the 8086 instructions.
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
operands::{Byte, MemoryIndex, ModRmTarget, Pointer, Word},
|
operands::{Byte, MemoryIndex, ModRmTarget, Pointer16, Pointer32, Word},
|
||||||
register::{Register, SegmentRegister},
|
register::{Register, SegmentRegister},
|
||||||
};
|
};
|
||||||
use core::fmt;
|
use core::fmt;
|
||||||
@@ -202,16 +202,16 @@ pub enum Mnemonic {
|
|||||||
CBW,
|
CBW,
|
||||||
CWD,
|
CWD,
|
||||||
// CALL
|
// CALL
|
||||||
CALL_p(Pointer),
|
CALL_p(Pointer32),
|
||||||
CALL_v(isize),
|
CALL_v(isize),
|
||||||
CALL_Mod(ModRmTarget),
|
CALL_Mod(ModRmTarget),
|
||||||
CALL_Mp(ModRmTarget, Pointer),
|
CALL_Mp(Pointer16),
|
||||||
// JUMP
|
// JUMP
|
||||||
JMP_p(Pointer),
|
JMP_p(Pointer32),
|
||||||
JMP_b(isize), // parses IByte, but stores as isize
|
JMP_b(isize), // parses IByte, but stores as isize
|
||||||
JMP_v(isize), // parwses IWord, but stores as isize
|
JMP_v(isize), // parwses IWord, but stores as isize
|
||||||
JMP_Mod(ModRmTarget),
|
JMP_Mod(ModRmTarget),
|
||||||
JMP_Mp(ModRmTarget, Pointer),
|
JMP_Mp(Pointer16),
|
||||||
// WAIT
|
// WAIT
|
||||||
WAIT,
|
WAIT,
|
||||||
// Push/Pop Flags
|
// Push/Pop Flags
|
||||||
@@ -237,8 +237,8 @@ pub enum Mnemonic {
|
|||||||
RETF,
|
RETF,
|
||||||
IRET,
|
IRET,
|
||||||
// Load ES/DS Register
|
// Load ES/DS Register
|
||||||
LES(ModRmTarget, Pointer),
|
LES(Register, Pointer16),
|
||||||
LDS(ModRmTarget, Pointer),
|
LDS(Register, Pointer16),
|
||||||
// NOT
|
// NOT
|
||||||
NOT(ModRmTarget),
|
NOT(ModRmTarget),
|
||||||
// NEG
|
// NEG
|
||||||
@@ -444,13 +444,13 @@ impl fmt::Display for Mnemonic {
|
|||||||
Self::CALL_p(ptr) => write!(f, "call {ptr}"),
|
Self::CALL_p(ptr) => write!(f, "call {ptr}"),
|
||||||
Self::CALL_v(word) => write!(f, "call {word:#04x}"),
|
Self::CALL_v(word) => write!(f, "call {word:#04x}"),
|
||||||
Self::CALL_Mod(target) => write!(f, "call {target}"),
|
Self::CALL_Mod(target) => write!(f, "call {target}"),
|
||||||
Self::CALL_Mp(target, ptr) => write!(f, "call {target}, {ptr}"),
|
Self::CALL_Mp(ptr) => write!(f, "call {ptr}"),
|
||||||
|
|
||||||
Self::JMP_p(ptr) => write!(f, "jmp {ptr}"),
|
Self::JMP_p(ptr) => write!(f, "jmp {ptr}"),
|
||||||
Self::JMP_b(byte) => write!(f, "jmp {byte:#04x}"),
|
Self::JMP_b(byte) => write!(f, "jmp {byte:#04x}"),
|
||||||
Self::JMP_v(word) => write!(f, "jmp {word:#04x}"),
|
Self::JMP_v(word) => write!(f, "jmp {word:#04x}"),
|
||||||
Self::JMP_Mod(target) => write!(f, "jmp {target}"),
|
Self::JMP_Mod(target) => write!(f, "jmp {target}"),
|
||||||
Self::JMP_Mp(target, ptr) => write!(f, "jmp {target}, {ptr}"),
|
Self::JMP_Mp(ptr) => write!(f, "jmp {ptr}"),
|
||||||
|
|
||||||
Self::WAIT => write!(f, "wait"),
|
Self::WAIT => write!(f, "wait"),
|
||||||
|
|
||||||
@@ -476,8 +476,8 @@ impl fmt::Display for Mnemonic {
|
|||||||
Self::RETF => write!(f, "retf"),
|
Self::RETF => write!(f, "retf"),
|
||||||
Self::IRET => write!(f, "iret"),
|
Self::IRET => write!(f, "iret"),
|
||||||
|
|
||||||
Self::LES(target, ptr) => write!(f, "les {target}, {ptr}"),
|
Self::LES(reg, mem) => write!(f, "les {reg}, ptr {mem}"),
|
||||||
Self::LDS(target, ptr) => write!(f, "lds {target}, {ptr}"),
|
Self::LDS(reg, mem) => write!(f, "lds {reg}, ptr {mem}"),
|
||||||
|
|
||||||
Self::NOT(target) => write!(f, "not {target}"),
|
Self::NOT(target) => write!(f, "not {target}"),
|
||||||
Self::NEG(target) => write!(f, "neg {target}"),
|
Self::NEG(target) => write!(f, "neg {target}"),
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
//! All types which a Mnemonic can have as some kind of operand.
|
//! All types which a Mnemonic can have as some kind of operand.
|
||||||
//! This includes things such as immediates, ModRM byte targets, etc. etc.
|
//! This includes things such as immediates, ModRM byte targets, etc. etc.
|
||||||
|
|
||||||
use crate::register::Register;
|
use crate::{disasm::DisasmError, register::Register};
|
||||||
use core::fmt;
|
use core::fmt;
|
||||||
|
|
||||||
pub type Byte = u8; // b
|
pub type Byte = u8; // b
|
||||||
@@ -131,15 +131,52 @@ impl fmt::Display for MemoryIndex {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
/// 32-bit segment:offset pointer (e.g. for CALL instruction)
|
/// 16-bit pointer for access, usually with a [`SegmentRegister`] as segment
|
||||||
pub struct Pointer {
|
/// and [`Pointer16`] as offset.
|
||||||
|
pub struct Pointer16 {
|
||||||
|
pub word: Word,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for Pointer16 {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
write!(f, "ptr [{:#04x}]", self.word)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<ModRmTarget> for Pointer16 {
|
||||||
|
type Error = DisasmError;
|
||||||
|
|
||||||
|
fn try_from(target: ModRmTarget) -> Result<Self, Self::Error> {
|
||||||
|
match target {
|
||||||
|
ModRmTarget::Memory(mem) => match mem.displacement {
|
||||||
|
Some(disp) => match disp {
|
||||||
|
Displacement::IWord(word) => Ok(Pointer16 { word: word as Word }),
|
||||||
|
_ => {
|
||||||
|
return Err(DisasmError::IllegalOperand("Word expected".into()));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
return Err(DisasmError::IllegalOperand("Displacement expected".into()));
|
||||||
|
}
|
||||||
|
},
|
||||||
|
_ => {
|
||||||
|
return Err(DisasmError::IllegalOperand("MemoryIndex expected".into()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
/// 32-bit segment:offset pointer for long jumps.
|
||||||
|
/// Both [`Word`]s are immediately encoded after the instruction
|
||||||
|
pub struct Pointer32 {
|
||||||
pub raw: DWord,
|
pub raw: DWord,
|
||||||
pub segment: Word,
|
pub segment: Word,
|
||||||
pub offset: Word,
|
pub offset: Word,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl std::fmt::Display for Pointer {
|
impl std::fmt::Display for Pointer32 {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
write!(f, "[{:#04x}] ({}:{})", self.raw, self.segment, self.offset)
|
write!(f, "{:#04x}:{:#04x}", self.segment, self.offset)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user