fix: fix displacement parsing

1. correctly return when only displacement in modrm mem adressing
Previously the disassmbler wouldn't stop if only a displacement value
should be used without any base or offset index.

2. Displacement can be negative, so use the signed version where applicable
This commit is contained in:
2025-05-14 14:34:47 +09:00
parent c0bb448d79
commit 227f1bd133
3 changed files with 64 additions and 34 deletions

1
README.md Normal file
View File

@@ -0,0 +1 @@
dont ask me why i didnt use nom, i remembered that it existed halfway through

View File

@@ -2,7 +2,7 @@ use core::fmt;
use std::{fs::File, io::Read, process::exit}; use std::{fs::File, io::Read, process::exit};
use crate::aout::Aout; use crate::aout::Aout;
use crate::instructions::{MemoryIndex, ModRmTarget, Operand, Pointer}; use crate::instructions::{Displacement, MemoryIndex, ModRmTarget, Operand, Pointer};
use crate::register::{Register, RegisterId, SegmentRegister}; use crate::register::{Register, RegisterId, SegmentRegister};
use crate::{ use crate::{
Args, Args,
@@ -123,7 +123,7 @@ impl Disassembler {
let (mode, reg, rm) = Self::deconstruct_modrm_byte(modrm); let (mode, reg, rm) = Self::deconstruct_modrm_byte(modrm);
log::debug!( log::debug!(
"0x{:04x} deconstructed into: 0b{:b}, 0b{:b}, 0b{:b}", "{:#04x} deconstructed into: {:#b}, {:#b}, {:#b}",
modrm, modrm,
mode, mode,
reg, reg,
@@ -135,18 +135,26 @@ impl Disassembler {
0b00 => { 0b00 => {
if rm == 0b110 { if rm == 0b110 {
log::debug!("Additional word during ModRM parsing was read with mod 0."); log::debug!("Additional word during ModRM parsing was read with mod 0.");
displacement = Some(Operand::Word(self.parse_word())); displacement = Some(Displacement::IWord(self.parse_word() as i16));
return (
ModRmTarget::Memory(MemoryIndex {
base: None,
index: None,
displacement,
}),
reg,
);
} else { } else {
displacement = None; displacement = None;
} }
} }
0b01 => { 0b01 => {
log::debug!("Additional byte during ModRM parsing was read."); log::debug!("Additional byte during ModRM parsing was read.");
displacement = Some(Operand::Byte(self.parse_byte())) displacement = Some(Displacement::IByte(self.parse_byte() as i8))
} }
0b10 => { 0b10 => {
log::debug!("Additional word during ModRM parsing was read."); log::debug!("Additional word during ModRM parsing was read.");
displacement = Some(Operand::Word(self.parse_word())); displacement = Some(Displacement::IWord(self.parse_word() as i16));
} }
0b11 => { 0b11 => {
log::debug!("ModRM ({:#b}) to/from Register ({:#b})", rm, reg); log::debug!("ModRM ({:#b}) to/from Register ({:#b})", rm, reg);
@@ -213,9 +221,9 @@ impl Disassembler {
/// Group 1 always have an ModRM target (all modrm bits, without reg) as /// Group 1 always have an ModRM target (all modrm bits, without reg) as
/// first and an imm value as second operand (which has to be parsed before /// first and an imm value as second operand (which has to be parsed before
/// call to this function), but is available in both Byte and Word length. /// call to this function), but is available in both Byte and Word length.
pub fn modrm_reg_to_grp1(reg: u8, target: ModRmTarget, imm: Operand) -> Mnemonic { pub fn modrm_reg_to_grp1(reg: u8, target: ModRmTarget, imm: Displacement) -> Mnemonic {
match imm { match imm {
Operand::Byte(b) => match reg { Displacement::IByte(b) => match reg {
0b000 => Mnemonic::ADD_Ib(target, b), 0b000 => Mnemonic::ADD_Ib(target, b),
0b001 => Mnemonic::OR_Ib(target, b), 0b001 => Mnemonic::OR_Ib(target, b),
0b010 => Mnemonic::ADC_Ib(target, b), 0b010 => Mnemonic::ADC_Ib(target, b),
@@ -226,7 +234,7 @@ impl Disassembler {
0b111 => Mnemonic::CMP_Ib(target, b), 0b111 => Mnemonic::CMP_Ib(target, b),
_ => panic!("Illegal Group 1 mnemonic"), _ => panic!("Illegal Group 1 mnemonic"),
}, },
Operand::Word(w) => match reg { Displacement::IWord(w) => match reg {
0b000 => Mnemonic::ADD_Iv(target, w), 0b000 => Mnemonic::ADD_Iv(target, w),
0b001 => Mnemonic::OR_Iv(target, w), 0b001 => Mnemonic::OR_Iv(target, w),
0b010 => Mnemonic::ADC_Iv(target, w), 0b010 => Mnemonic::ADC_Iv(target, w),
@@ -454,25 +462,25 @@ impl Disassembler {
// Group 1 // Group 1
0x80 => { 0x80 => {
let (target, reg) = self.parse_modrm_byte(Operand::Byte(0)); let (target, reg) = self.parse_modrm_byte(Operand::Byte(0));
let imm = self.parse_byte(); let imm = self.parse_byte() as i8;
Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm)) Self::modrm_reg_to_grp1(reg, target, Displacement::IByte(imm))
} }
0x81 => { 0x81 => {
let (target, reg) = self.parse_modrm_byte(Operand::Word(0)); let (target, reg) = self.parse_modrm_byte(Operand::Word(0));
let imm = self.parse_word(); let imm = self.parse_word() as i16;
Self::modrm_reg_to_grp1(reg, target, Operand::Word(imm)) Self::modrm_reg_to_grp1(reg, target, Displacement::IWord(imm))
} }
0x82 => { 0x82 => {
// same as 0x80 // same as 0x80
let (target, reg) = self.parse_modrm_byte(Operand::Byte(0)); let (target, reg) = self.parse_modrm_byte(Operand::Byte(0));
let imm = self.parse_byte(); let imm = self.parse_byte() as i8;
Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm)) Self::modrm_reg_to_grp1(reg, target, Displacement::IByte(imm))
} }
0x83 => { 0x83 => {
// byte extended version // byte extended version
let (target, reg) = self.parse_modrm_byte(Operand::Word(0)); let (target, reg) = self.parse_modrm_byte(Operand::Word(0));
let imm = self.parse_byte(); let imm = self.parse_byte() as i8;
Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm)) Self::modrm_reg_to_grp1(reg, target, Displacement::IByte(imm))
} }
0x84 => modrmb!(self, TEST), 0x84 => modrmb!(self, TEST),

View File

@@ -3,7 +3,9 @@ use core::fmt;
use crate::register::{Register, SegmentRegister}; use crate::register::{Register, SegmentRegister};
pub type Byte = u8; // b pub type Byte = u8; // b
pub type IByte = i8; // used for displacements of memory access
pub type Word = u16; // w or v pub type Word = u16; // w or v
pub type IWord = i16; // used for displacement of memory access
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
#[allow(dead_code)] #[allow(dead_code)]
@@ -79,8 +81,8 @@ pub enum Mnemonic {
// ADD // ADD
ADD_FromReg(ModRmTarget, Register), // From Register into either Memory or Register ADD_FromReg(ModRmTarget, Register), // From Register into either Memory or Register
ADD_ToReg(ModRmTarget, Register), // From either Memory or Register into Reigster ADD_ToReg(ModRmTarget, Register), // From either Memory or Register into Reigster
ADD_Ib(ModRmTarget, Byte), // From Immediate into either Memory or Register ADD_Ib(ModRmTarget, IByte), // From Immediate into either Memory or Register
ADD_Iv(ModRmTarget, Word), // From Immediate into either Memory or Register ADD_Iv(ModRmTarget, IWord), // From Immediate into either Memory or Register
ADD_ALIb(Byte), ADD_ALIb(Byte),
ADD_AXIv(Word), ADD_AXIv(Word),
// PUSH // PUSH
@@ -94,29 +96,29 @@ pub enum Mnemonic {
// OR // OR
OR_FromReg(ModRmTarget, Register), OR_FromReg(ModRmTarget, Register),
OR_ToReg(ModRmTarget, Register), OR_ToReg(ModRmTarget, Register),
OR_Ib(ModRmTarget, Byte), OR_Ib(ModRmTarget, IByte),
OR_Iv(ModRmTarget, Word), OR_Iv(ModRmTarget, IWord),
OR_ALIb(Byte), OR_ALIb(Byte),
OR_AXIv(Word), OR_AXIv(Word),
// ADC // ADC
ADC_FromReg(ModRmTarget, Register), ADC_FromReg(ModRmTarget, Register),
ADC_ToReg(ModRmTarget, Register), ADC_ToReg(ModRmTarget, Register),
ADC_Ib(ModRmTarget, Byte), ADC_Ib(ModRmTarget, IByte),
ADC_Iv(ModRmTarget, Word), ADC_Iv(ModRmTarget, IWord),
ADC_ALIb(Byte), ADC_ALIb(Byte),
ADC_AXIv(Word), ADC_AXIv(Word),
// SBB // SBB
SBB_FromReg(ModRmTarget, Register), SBB_FromReg(ModRmTarget, Register),
SBB_ToReg(ModRmTarget, Register), SBB_ToReg(ModRmTarget, Register),
SBB_Ib(ModRmTarget, Byte), SBB_Ib(ModRmTarget, IByte),
SBB_Iv(ModRmTarget, Word), SBB_Iv(ModRmTarget, IWord),
SBB_ALIb(Byte), SBB_ALIb(Byte),
SBB_AXIv(Word), SBB_AXIv(Word),
// AND // AND
AND_FromReg(ModRmTarget, Register), AND_FromReg(ModRmTarget, Register),
AND_ToReg(ModRmTarget, Register), AND_ToReg(ModRmTarget, Register),
AND_Ib(ModRmTarget, Byte), AND_Ib(ModRmTarget, IByte),
AND_Iv(ModRmTarget, Word), AND_Iv(ModRmTarget, IWord),
AND_ALIb(Byte), AND_ALIb(Byte),
AND_AXIv(Word), AND_AXIv(Word),
// Override // Override
@@ -129,22 +131,22 @@ pub enum Mnemonic {
// SUB // SUB
SUB_FromReg(ModRmTarget, Register), SUB_FromReg(ModRmTarget, Register),
SUB_ToReg(ModRmTarget, Register), SUB_ToReg(ModRmTarget, Register),
SUB_Ib(ModRmTarget, Byte), SUB_Ib(ModRmTarget, IByte),
SUB_Iv(ModRmTarget, Word), SUB_Iv(ModRmTarget, IWord),
SUB_ALIb(Byte), SUB_ALIb(Byte),
SUB_AXIv(Word), SUB_AXIv(Word),
// XOR // XOR
XOR_FromReg(ModRmTarget, Register), XOR_FromReg(ModRmTarget, Register),
XOR_ToReg(ModRmTarget, Register), XOR_ToReg(ModRmTarget, Register),
XOR_Ib(ModRmTarget, Byte), XOR_Ib(ModRmTarget, IByte),
XOR_Iv(ModRmTarget, Word), XOR_Iv(ModRmTarget, IWord),
XOR_ALIb(Byte), XOR_ALIb(Byte),
XOR_AXIv(Word), XOR_AXIv(Word),
// CMP // CMP
CMP_FromReg(ModRmTarget, Register), CMP_FromReg(ModRmTarget, Register),
CMP_ToReg(ModRmTarget, Register), CMP_ToReg(ModRmTarget, Register),
CMP_Ib(ModRmTarget, Byte), CMP_Ib(ModRmTarget, IByte),
CMP_Iv(ModRmTarget, Word), CMP_Iv(ModRmTarget, IWord),
CMP_ALIb(Byte), CMP_ALIb(Byte),
CMP_AXIv(Word), CMP_AXIv(Word),
// INC // INC
@@ -307,13 +309,29 @@ impl std::fmt::Display for ModRmTarget {
} }
} }
#[derive(Debug, Clone)]
/// Displacements are signed versions of u8 and u16.
pub enum Displacement {
IByte(i8),
IWord(i16),
}
impl fmt::LowerHex for Displacement {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::IByte(b) => fmt::LowerHex::fmt(b, f),
Self::IWord(v) => fmt::LowerHex::fmt(v, f),
}
}
}
/// A memory index operand is usually created by ModRM bytes or words. /// A memory index operand is usually created by ModRM bytes or words.
/// e.g. [bx+si] /// e.g. [bx+si]
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct MemoryIndex { pub struct MemoryIndex {
pub base: Option<Register>, pub base: Option<Register>,
pub index: Option<Register>, pub index: Option<Register>,
pub displacement: Option<Operand>, pub displacement: Option<Displacement>,
} }
impl fmt::Display for MemoryIndex { impl fmt::Display for MemoryIndex {
@@ -336,7 +354,10 @@ impl fmt::Display for MemoryIndex {
Some(displacement) => write!(f, "[{}+{:04x}]", index, displacement), Some(displacement) => write!(f, "[{}+{:04x}]", index, displacement),
None => write!(f, "[{}]", index), None => write!(f, "[{}]", index),
}, },
None => panic!("Invalid MemoryIndex encountered"), None => match &self.displacement {
Some(displacement) => write!(f, "[{:04x}]", displacement),
None => panic!("Memory Index without base, index and displacement"),
},
}, },
} }
} }