From 227f1bd133c5c76006666861bdd4e6283274f6d8 Mon Sep 17 00:00:00 2001 From: Marco Thomas Date: Wed, 14 May 2025 14:34:47 +0900 Subject: [PATCH] fix: fix displacement parsing 1. correctly return when only displacement in modrm mem adressing Previously the disassmbler wouldn't stop if only a displacement value should be used without any base or offset index. 2. Displacement can be negative, so use the signed version where applicable --- README.md | 1 + src/disasm.rs | 40 ++++++++++++++++++------------- src/instructions.rs | 57 +++++++++++++++++++++++++++++++-------------- 3 files changed, 64 insertions(+), 34 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..b4132a9 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +dont ask me why i didnt use nom, i remembered that it existed halfway through diff --git a/src/disasm.rs b/src/disasm.rs index 4c2a37b..f31dde9 100644 --- a/src/disasm.rs +++ b/src/disasm.rs @@ -2,7 +2,7 @@ use core::fmt; use std::{fs::File, io::Read, process::exit}; use crate::aout::Aout; -use crate::instructions::{MemoryIndex, ModRmTarget, Operand, Pointer}; +use crate::instructions::{Displacement, MemoryIndex, ModRmTarget, Operand, Pointer}; use crate::register::{Register, RegisterId, SegmentRegister}; use crate::{ Args, @@ -123,7 +123,7 @@ impl Disassembler { let (mode, reg, rm) = Self::deconstruct_modrm_byte(modrm); log::debug!( - "0x{:04x} deconstructed into: 0b{:b}, 0b{:b}, 0b{:b}", + "{:#04x} deconstructed into: {:#b}, {:#b}, {:#b}", modrm, mode, reg, @@ -135,18 +135,26 @@ impl Disassembler { 0b00 => { if rm == 0b110 { log::debug!("Additional word during ModRM parsing was read with mod 0."); - displacement = Some(Operand::Word(self.parse_word())); + displacement = Some(Displacement::IWord(self.parse_word() as i16)); + return ( + ModRmTarget::Memory(MemoryIndex { + base: None, + index: None, + displacement, + }), + reg, + ); } else { displacement = None; } } 0b01 => { log::debug!("Additional byte during ModRM parsing was read."); - displacement = Some(Operand::Byte(self.parse_byte())) + displacement = Some(Displacement::IByte(self.parse_byte() as i8)) } 0b10 => { log::debug!("Additional word during ModRM parsing was read."); - displacement = Some(Operand::Word(self.parse_word())); + displacement = Some(Displacement::IWord(self.parse_word() as i16)); } 0b11 => { log::debug!("ModRM ({:#b}) to/from Register ({:#b})", rm, reg); @@ -213,9 +221,9 @@ impl Disassembler { /// Group 1 always have an ModRM target (all modrm bits, without reg) as /// first and an imm value as second operand (which has to be parsed before /// call to this function), but is available in both Byte and Word length. - pub fn modrm_reg_to_grp1(reg: u8, target: ModRmTarget, imm: Operand) -> Mnemonic { + pub fn modrm_reg_to_grp1(reg: u8, target: ModRmTarget, imm: Displacement) -> Mnemonic { match imm { - Operand::Byte(b) => match reg { + Displacement::IByte(b) => match reg { 0b000 => Mnemonic::ADD_Ib(target, b), 0b001 => Mnemonic::OR_Ib(target, b), 0b010 => Mnemonic::ADC_Ib(target, b), @@ -226,7 +234,7 @@ impl Disassembler { 0b111 => Mnemonic::CMP_Ib(target, b), _ => panic!("Illegal Group 1 mnemonic"), }, - Operand::Word(w) => match reg { + Displacement::IWord(w) => match reg { 0b000 => Mnemonic::ADD_Iv(target, w), 0b001 => Mnemonic::OR_Iv(target, w), 0b010 => Mnemonic::ADC_Iv(target, w), @@ -454,25 +462,25 @@ impl Disassembler { // Group 1 0x80 => { let (target, reg) = self.parse_modrm_byte(Operand::Byte(0)); - let imm = self.parse_byte(); - Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm)) + let imm = self.parse_byte() as i8; + Self::modrm_reg_to_grp1(reg, target, Displacement::IByte(imm)) } 0x81 => { let (target, reg) = self.parse_modrm_byte(Operand::Word(0)); - let imm = self.parse_word(); - Self::modrm_reg_to_grp1(reg, target, Operand::Word(imm)) + let imm = self.parse_word() as i16; + Self::modrm_reg_to_grp1(reg, target, Displacement::IWord(imm)) } 0x82 => { // same as 0x80 let (target, reg) = self.parse_modrm_byte(Operand::Byte(0)); - let imm = self.parse_byte(); - Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm)) + let imm = self.parse_byte() as i8; + Self::modrm_reg_to_grp1(reg, target, Displacement::IByte(imm)) } 0x83 => { // byte extended version let (target, reg) = self.parse_modrm_byte(Operand::Word(0)); - let imm = self.parse_byte(); - Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm)) + let imm = self.parse_byte() as i8; + Self::modrm_reg_to_grp1(reg, target, Displacement::IByte(imm)) } 0x84 => modrmb!(self, TEST), diff --git a/src/instructions.rs b/src/instructions.rs index 7996d07..8e43ffa 100644 --- a/src/instructions.rs +++ b/src/instructions.rs @@ -3,7 +3,9 @@ use core::fmt; use crate::register::{Register, SegmentRegister}; pub type Byte = u8; // b +pub type IByte = i8; // used for displacements of memory access pub type Word = u16; // w or v +pub type IWord = i16; // used for displacement of memory access #[derive(Debug, Clone)] #[allow(dead_code)] @@ -79,8 +81,8 @@ pub enum Mnemonic { // ADD ADD_FromReg(ModRmTarget, Register), // From Register into either Memory or Register ADD_ToReg(ModRmTarget, Register), // From either Memory or Register into Reigster - ADD_Ib(ModRmTarget, Byte), // From Immediate into either Memory or Register - ADD_Iv(ModRmTarget, Word), // From Immediate into either Memory or Register + ADD_Ib(ModRmTarget, IByte), // From Immediate into either Memory or Register + ADD_Iv(ModRmTarget, IWord), // From Immediate into either Memory or Register ADD_ALIb(Byte), ADD_AXIv(Word), // PUSH @@ -94,29 +96,29 @@ pub enum Mnemonic { // OR OR_FromReg(ModRmTarget, Register), OR_ToReg(ModRmTarget, Register), - OR_Ib(ModRmTarget, Byte), - OR_Iv(ModRmTarget, Word), + OR_Ib(ModRmTarget, IByte), + OR_Iv(ModRmTarget, IWord), OR_ALIb(Byte), OR_AXIv(Word), // ADC ADC_FromReg(ModRmTarget, Register), ADC_ToReg(ModRmTarget, Register), - ADC_Ib(ModRmTarget, Byte), - ADC_Iv(ModRmTarget, Word), + ADC_Ib(ModRmTarget, IByte), + ADC_Iv(ModRmTarget, IWord), ADC_ALIb(Byte), ADC_AXIv(Word), // SBB SBB_FromReg(ModRmTarget, Register), SBB_ToReg(ModRmTarget, Register), - SBB_Ib(ModRmTarget, Byte), - SBB_Iv(ModRmTarget, Word), + SBB_Ib(ModRmTarget, IByte), + SBB_Iv(ModRmTarget, IWord), SBB_ALIb(Byte), SBB_AXIv(Word), // AND AND_FromReg(ModRmTarget, Register), AND_ToReg(ModRmTarget, Register), - AND_Ib(ModRmTarget, Byte), - AND_Iv(ModRmTarget, Word), + AND_Ib(ModRmTarget, IByte), + AND_Iv(ModRmTarget, IWord), AND_ALIb(Byte), AND_AXIv(Word), // Override @@ -129,22 +131,22 @@ pub enum Mnemonic { // SUB SUB_FromReg(ModRmTarget, Register), SUB_ToReg(ModRmTarget, Register), - SUB_Ib(ModRmTarget, Byte), - SUB_Iv(ModRmTarget, Word), + SUB_Ib(ModRmTarget, IByte), + SUB_Iv(ModRmTarget, IWord), SUB_ALIb(Byte), SUB_AXIv(Word), // XOR XOR_FromReg(ModRmTarget, Register), XOR_ToReg(ModRmTarget, Register), - XOR_Ib(ModRmTarget, Byte), - XOR_Iv(ModRmTarget, Word), + XOR_Ib(ModRmTarget, IByte), + XOR_Iv(ModRmTarget, IWord), XOR_ALIb(Byte), XOR_AXIv(Word), // CMP CMP_FromReg(ModRmTarget, Register), CMP_ToReg(ModRmTarget, Register), - CMP_Ib(ModRmTarget, Byte), - CMP_Iv(ModRmTarget, Word), + CMP_Ib(ModRmTarget, IByte), + CMP_Iv(ModRmTarget, IWord), CMP_ALIb(Byte), CMP_AXIv(Word), // INC @@ -307,13 +309,29 @@ impl std::fmt::Display for ModRmTarget { } } +#[derive(Debug, Clone)] +/// Displacements are signed versions of u8 and u16. +pub enum Displacement { + IByte(i8), + IWord(i16), +} + +impl fmt::LowerHex for Displacement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::IByte(b) => fmt::LowerHex::fmt(b, f), + Self::IWord(v) => fmt::LowerHex::fmt(v, f), + } + } +} + /// A memory index operand is usually created by ModRM bytes or words. /// e.g. [bx+si] #[derive(Debug, Clone)] pub struct MemoryIndex { pub base: Option, pub index: Option, - pub displacement: Option, + pub displacement: Option, } impl fmt::Display for MemoryIndex { @@ -336,7 +354,10 @@ impl fmt::Display for MemoryIndex { Some(displacement) => write!(f, "[{}+{:04x}]", index, displacement), None => write!(f, "[{}]", index), }, - None => panic!("Invalid MemoryIndex encountered"), + None => match &self.displacement { + Some(displacement) => write!(f, "[{:04x}]", displacement), + None => panic!("Memory Index without base, index and displacement"), + }, }, } }