fix: fix displacement parsing

1. correctly return when only displacement in modrm mem adressing
Previously the disassmbler wouldn't stop if only a displacement value
should be used without any base or offset index.

2. Displacement can be negative, so use the signed version where applicable
This commit is contained in:
2025-05-14 14:34:47 +09:00
parent c0bb448d79
commit 227f1bd133
3 changed files with 64 additions and 34 deletions

View File

@@ -2,7 +2,7 @@ use core::fmt;
use std::{fs::File, io::Read, process::exit};
use crate::aout::Aout;
use crate::instructions::{MemoryIndex, ModRmTarget, Operand, Pointer};
use crate::instructions::{Displacement, MemoryIndex, ModRmTarget, Operand, Pointer};
use crate::register::{Register, RegisterId, SegmentRegister};
use crate::{
Args,
@@ -123,7 +123,7 @@ impl Disassembler {
let (mode, reg, rm) = Self::deconstruct_modrm_byte(modrm);
log::debug!(
"0x{:04x} deconstructed into: 0b{:b}, 0b{:b}, 0b{:b}",
"{:#04x} deconstructed into: {:#b}, {:#b}, {:#b}",
modrm,
mode,
reg,
@@ -135,18 +135,26 @@ impl Disassembler {
0b00 => {
if rm == 0b110 {
log::debug!("Additional word during ModRM parsing was read with mod 0.");
displacement = Some(Operand::Word(self.parse_word()));
displacement = Some(Displacement::IWord(self.parse_word() as i16));
return (
ModRmTarget::Memory(MemoryIndex {
base: None,
index: None,
displacement,
}),
reg,
);
} else {
displacement = None;
}
}
0b01 => {
log::debug!("Additional byte during ModRM parsing was read.");
displacement = Some(Operand::Byte(self.parse_byte()))
displacement = Some(Displacement::IByte(self.parse_byte() as i8))
}
0b10 => {
log::debug!("Additional word during ModRM parsing was read.");
displacement = Some(Operand::Word(self.parse_word()));
displacement = Some(Displacement::IWord(self.parse_word() as i16));
}
0b11 => {
log::debug!("ModRM ({:#b}) to/from Register ({:#b})", rm, reg);
@@ -213,9 +221,9 @@ impl Disassembler {
/// Group 1 always have an ModRM target (all modrm bits, without reg) as
/// first and an imm value as second operand (which has to be parsed before
/// call to this function), but is available in both Byte and Word length.
pub fn modrm_reg_to_grp1(reg: u8, target: ModRmTarget, imm: Operand) -> Mnemonic {
pub fn modrm_reg_to_grp1(reg: u8, target: ModRmTarget, imm: Displacement) -> Mnemonic {
match imm {
Operand::Byte(b) => match reg {
Displacement::IByte(b) => match reg {
0b000 => Mnemonic::ADD_Ib(target, b),
0b001 => Mnemonic::OR_Ib(target, b),
0b010 => Mnemonic::ADC_Ib(target, b),
@@ -226,7 +234,7 @@ impl Disassembler {
0b111 => Mnemonic::CMP_Ib(target, b),
_ => panic!("Illegal Group 1 mnemonic"),
},
Operand::Word(w) => match reg {
Displacement::IWord(w) => match reg {
0b000 => Mnemonic::ADD_Iv(target, w),
0b001 => Mnemonic::OR_Iv(target, w),
0b010 => Mnemonic::ADC_Iv(target, w),
@@ -454,25 +462,25 @@ impl Disassembler {
// Group 1
0x80 => {
let (target, reg) = self.parse_modrm_byte(Operand::Byte(0));
let imm = self.parse_byte();
Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))
let imm = self.parse_byte() as i8;
Self::modrm_reg_to_grp1(reg, target, Displacement::IByte(imm))
}
0x81 => {
let (target, reg) = self.parse_modrm_byte(Operand::Word(0));
let imm = self.parse_word();
Self::modrm_reg_to_grp1(reg, target, Operand::Word(imm))
let imm = self.parse_word() as i16;
Self::modrm_reg_to_grp1(reg, target, Displacement::IWord(imm))
}
0x82 => {
// same as 0x80
let (target, reg) = self.parse_modrm_byte(Operand::Byte(0));
let imm = self.parse_byte();
Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))
let imm = self.parse_byte() as i8;
Self::modrm_reg_to_grp1(reg, target, Displacement::IByte(imm))
}
0x83 => {
// byte extended version
let (target, reg) = self.parse_modrm_byte(Operand::Word(0));
let imm = self.parse_byte();
Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))
let imm = self.parse_byte() as i8;
Self::modrm_reg_to_grp1(reg, target, Displacement::IByte(imm))
}
0x84 => modrmb!(self, TEST),

View File

@@ -3,7 +3,9 @@ use core::fmt;
use crate::register::{Register, SegmentRegister};
pub type Byte = u8; // b
pub type IByte = i8; // used for displacements of memory access
pub type Word = u16; // w or v
pub type IWord = i16; // used for displacement of memory access
#[derive(Debug, Clone)]
#[allow(dead_code)]
@@ -79,8 +81,8 @@ pub enum Mnemonic {
// ADD
ADD_FromReg(ModRmTarget, Register), // From Register into either Memory or Register
ADD_ToReg(ModRmTarget, Register), // From either Memory or Register into Reigster
ADD_Ib(ModRmTarget, Byte), // From Immediate into either Memory or Register
ADD_Iv(ModRmTarget, Word), // From Immediate into either Memory or Register
ADD_Ib(ModRmTarget, IByte), // From Immediate into either Memory or Register
ADD_Iv(ModRmTarget, IWord), // From Immediate into either Memory or Register
ADD_ALIb(Byte),
ADD_AXIv(Word),
// PUSH
@@ -94,29 +96,29 @@ pub enum Mnemonic {
// OR
OR_FromReg(ModRmTarget, Register),
OR_ToReg(ModRmTarget, Register),
OR_Ib(ModRmTarget, Byte),
OR_Iv(ModRmTarget, Word),
OR_Ib(ModRmTarget, IByte),
OR_Iv(ModRmTarget, IWord),
OR_ALIb(Byte),
OR_AXIv(Word),
// ADC
ADC_FromReg(ModRmTarget, Register),
ADC_ToReg(ModRmTarget, Register),
ADC_Ib(ModRmTarget, Byte),
ADC_Iv(ModRmTarget, Word),
ADC_Ib(ModRmTarget, IByte),
ADC_Iv(ModRmTarget, IWord),
ADC_ALIb(Byte),
ADC_AXIv(Word),
// SBB
SBB_FromReg(ModRmTarget, Register),
SBB_ToReg(ModRmTarget, Register),
SBB_Ib(ModRmTarget, Byte),
SBB_Iv(ModRmTarget, Word),
SBB_Ib(ModRmTarget, IByte),
SBB_Iv(ModRmTarget, IWord),
SBB_ALIb(Byte),
SBB_AXIv(Word),
// AND
AND_FromReg(ModRmTarget, Register),
AND_ToReg(ModRmTarget, Register),
AND_Ib(ModRmTarget, Byte),
AND_Iv(ModRmTarget, Word),
AND_Ib(ModRmTarget, IByte),
AND_Iv(ModRmTarget, IWord),
AND_ALIb(Byte),
AND_AXIv(Word),
// Override
@@ -129,22 +131,22 @@ pub enum Mnemonic {
// SUB
SUB_FromReg(ModRmTarget, Register),
SUB_ToReg(ModRmTarget, Register),
SUB_Ib(ModRmTarget, Byte),
SUB_Iv(ModRmTarget, Word),
SUB_Ib(ModRmTarget, IByte),
SUB_Iv(ModRmTarget, IWord),
SUB_ALIb(Byte),
SUB_AXIv(Word),
// XOR
XOR_FromReg(ModRmTarget, Register),
XOR_ToReg(ModRmTarget, Register),
XOR_Ib(ModRmTarget, Byte),
XOR_Iv(ModRmTarget, Word),
XOR_Ib(ModRmTarget, IByte),
XOR_Iv(ModRmTarget, IWord),
XOR_ALIb(Byte),
XOR_AXIv(Word),
// CMP
CMP_FromReg(ModRmTarget, Register),
CMP_ToReg(ModRmTarget, Register),
CMP_Ib(ModRmTarget, Byte),
CMP_Iv(ModRmTarget, Word),
CMP_Ib(ModRmTarget, IByte),
CMP_Iv(ModRmTarget, IWord),
CMP_ALIb(Byte),
CMP_AXIv(Word),
// INC
@@ -307,13 +309,29 @@ impl std::fmt::Display for ModRmTarget {
}
}
#[derive(Debug, Clone)]
/// Displacements are signed versions of u8 and u16.
pub enum Displacement {
IByte(i8),
IWord(i16),
}
impl fmt::LowerHex for Displacement {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::IByte(b) => fmt::LowerHex::fmt(b, f),
Self::IWord(v) => fmt::LowerHex::fmt(v, f),
}
}
}
/// A memory index operand is usually created by ModRM bytes or words.
/// e.g. [bx+si]
#[derive(Debug, Clone)]
pub struct MemoryIndex {
pub base: Option<Register>,
pub index: Option<Register>,
pub displacement: Option<Operand>,
pub displacement: Option<Displacement>,
}
impl fmt::Display for MemoryIndex {
@@ -336,7 +354,10 @@ impl fmt::Display for MemoryIndex {
Some(displacement) => write!(f, "[{}+{:04x}]", index, displacement),
None => write!(f, "[{}]", index),
},
None => panic!("Invalid MemoryIndex encountered"),
None => match &self.displacement {
Some(displacement) => write!(f, "[{:04x}]", displacement),
None => panic!("Memory Index without base, index and displacement"),
},
},
}
}