ft: fixed modrm target calculation

While implementing some more mnemonics and testing
them, it was clear that modrm parsing was wrong.
Now reg to reg and immediates, together with
GPR1 interpretation should work as expected.

GPR1 interpretation can currently not be merged
into the modrm function, as with the current
abstraction the REG needs to select the correct
mnemonic, for which we need to also know the second
operand, which will only be parsed afterwards.
But this will be incorporated at some point, this
just marks the first working state.
This commit is contained in:
2025-05-13 12:07:22 +09:00
parent 51b28b3bac
commit a25e0a3890
3 changed files with 450 additions and 117 deletions

View File

@@ -2,12 +2,12 @@ use core::fmt;
use std::{fs::File, io::Read, process::exit};
use crate::aout::Aout;
use crate::instructions::{Displacement, MemoryIndex, RegisterId, SegmentRegister};
use crate::modrm;
use crate::instructions::{MemoryIndex, ModRmTarget, OperandSize, RegisterId, SegmentRegister};
use crate::{
Args,
instructions::{ImmediateByte, ImmediateWord, Instruction, Opcode, Register},
instructions::{ImmediateByte, ImmediateWord, Instruction, Mnemonic, Register},
};
use crate::{modrmb, modrms, modrmv};
#[derive(Debug)]
/// Generic errors, which are encountered during parsing.
@@ -78,8 +78,6 @@ impl Disassembler {
// advance to operand
self.offset += 1;
let byte = self.text[self.offset];
// jump to right after operand
self.offset += 1;
self.instruction.raw.push(byte);
byte
}
@@ -91,51 +89,69 @@ impl Disassembler {
self.offset += 1;
let byte1 = self.text[self.offset];
let byte2 = self.text[self.offset + 1];
// jump right after operand
self.offset += 2;
// jump onto last operand
self.offset += 1;
self.instruction.raw.push(byte1);
self.instruction.raw.push(byte2);
u16::from_le_bytes([byte1, byte2])
}
/// Takes in a modrm byte and returns mod, reg and r/m.
fn deconstruct_modrm_byte(modrm: u8) -> (u8, u8, u8) {
let mode = (modrm >> 6) & 0b11;
let reg = (modrm >> 3) & 0b111;
let rm = modrm & 0b111;
(mode, reg, rm)
}
/// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset.
/// Returns the parsed modrm memory access and the source register
pub fn parse_modrm_byte(&mut self) -> (MemoryIndex, RegisterId) {
/// Returns the parsed modrm target and the source register
pub fn parse_modrm_byte(&mut self, size: OperandSize) -> (ModRmTarget, RegisterId) {
// advance to operand
self.offset += 1;
let modrm = self.text[self.offset];
self.instruction.raw.push(modrm);
// jump right after operand
self.offset += 1;
// Calculate ModRM byte with bitmask
let modulo = modrm >> 6;
let reg_id = (modrm >> 3) & 7;
let rm = modrm & 7;
let (mode, reg, rm) = Self::deconstruct_modrm_byte(modrm);
let displacement = match modulo {
log::debug!(
"0x{:04x} deconstructed into: 0b{:b}, 0b{:b}, 0b{:b}",
modrm,
mode,
reg,
rm
);
let mut displacement = None;
match mode {
0b00 => {
if rm == 0b110 {
log::debug!("Additional word during ModRM parsing was read with mod 0.");
Some(Displacement::Word(self.parse_word()))
displacement = Some(OperandSize::Word(self.parse_word()));
} else {
None
displacement = None;
}
}
0b01 => {
log::debug!("Additional byte during ModRM parsing was read.");
Some(Displacement::Byte(self.parse_byte()))
displacement = Some(OperandSize::Byte(self.parse_byte()))
}
0b10 => {
log::debug!("Additional word during ModRM parsing was read.");
Some(Displacement::Word(self.parse_word()))
displacement = Some(OperandSize::Word(self.parse_word()));
}
0b11 => {
// XXX is this correct?
log::debug!(
"No displacement, as reg to reg - maybe some implementation is missing here"
);
None
log::debug!("ModRM to reg");
let target = match size {
OperandSize::Byte(_) => {
ModRmTarget::Register(Register::by_id(OperandSize::Byte(rm)))
}
OperandSize::Word(_) => {
ModRmTarget::Register(Register::by_id(OperandSize::Word(rm.into())))
}
};
return (target, reg);
}
_ => panic!("Invalid ModRM byte encountered"),
};
@@ -184,7 +200,35 @@ impl Disassembler {
_ => panic!("Invalid ModRM byte encountered"),
};
(index, reg_id)
(ModRmTarget::Memory(index), reg)
}
/// Match the modrm reg bits to the GPR1 mnemonics.
pub fn modrm_reg_to_mnemonic(reg: u8, target: ModRmTarget, imm: OperandSize) -> Mnemonic {
match imm {
OperandSize::Byte(b) => match reg {
0b000 => Mnemonic::ADD_Ib(target, ImmediateByte(b)),
0b001 => Mnemonic::OR_Ib(target, ImmediateByte(b)),
0b010 => Mnemonic::ADC_Ib(target, ImmediateByte(b)),
0b011 => Mnemonic::SBB_Ib(target, ImmediateByte(b)),
0b100 => Mnemonic::AND_Ib(target, ImmediateByte(b)),
0b101 => Mnemonic::SUB_Ib(target, ImmediateByte(b)),
0b110 => Mnemonic::XOR_Ib(target, ImmediateByte(b)),
0b111 => Mnemonic::CMP_Ib(target, ImmediateByte(b)),
_ => panic!("Illegal GPR1 mnemonic"),
},
OperandSize::Word(w) => match reg {
0b000 => Mnemonic::ADD_Iv(target, ImmediateWord(w)),
0b001 => Mnemonic::OR_Iv(target, ImmediateWord(w)),
0b010 => Mnemonic::ADC_Iv(target, ImmediateWord(w)),
0b011 => Mnemonic::SBB_Iv(target, ImmediateWord(w)),
0b100 => Mnemonic::AND_Iv(target, ImmediateWord(w)),
0b101 => Mnemonic::SUB_Iv(target, ImmediateWord(w)),
0b110 => Mnemonic::XOR_Iv(target, ImmediateWord(w)),
0b111 => Mnemonic::CMP_Iv(target, ImmediateWord(w)),
_ => panic!("Illegal GPR1 mnemonic"),
},
}
}
/// Decode instructions from the text section of the provided binary
@@ -204,30 +248,177 @@ impl Disassembler {
// additional raw bytes will be pushed by parse functions
self.instruction.raw.push(opcode);
// XXX: convert this copy and paste horror into a proc macro
self.instruction.opcode = match opcode {
// ADD
0x00 => modrm!(self, ADD_EbGb),
0x01 => modrm!(self, ADD_EvGv),
0x02 => modrm!(self, ADD_GbEb),
0x03 => modrm!(self, ADD_GvEv),
0x04 => Opcode::ADD_ALIb(ImmediateByte(self.parse_byte())),
0x05 => Opcode::ADD_AXIv(ImmediateWord(self.parse_word())),
// PUSH
0x06 => Opcode::PUSH(SegmentRegister::by_id(self.parse_modrm_byte().1)),
// POP
0x07 => Opcode::POP(SegmentRegister::by_id(self.parse_modrm_byte().1)),
// OR
0x08 => modrm!(self, OR_EbGb),
0x0A => modrm!(self, OR_GbEb),
// INT
0xCD => Opcode::INT(ImmediateByte(self.parse_byte())),
// MOV
0xBB => Opcode::MOV_BXIv(ImmediateWord(self.parse_word())),
0x00 => modrmb!(self, ADD_FromReg),
0x01 => modrmv!(self, ADD_FromReg),
0x02 => modrmb!(self, ADD_ToReg),
0x03 => modrmv!(self, ADD_ToReg),
0x04 => Mnemonic::ADD_ALIb(ImmediateByte(self.parse_byte())),
0x05 => Mnemonic::ADD_AXIv(ImmediateWord(self.parse_word())),
0x06 => Mnemonic::PUSH_S(SegmentRegister::ES),
0x07 => Mnemonic::POP_S(SegmentRegister::ES),
0x08 => modrmb!(self, OR_FromReg),
0x09 => modrmv!(self, OR_FromReg),
0x0A => modrmb!(self, OR_ToReg),
0x0B => modrmv!(self, OR_ToReg),
0x0C => Mnemonic::OR_ALIb(ImmediateByte(self.parse_byte())),
0x0D => Mnemonic::OR_AXIv(ImmediateWord(self.parse_word())),
0x0E => Mnemonic::PUSH_S(SegmentRegister::CS),
0x0F => panic!("Opcode 0x0F (POP CS) is considered undefined"),
0x10 => modrmb!(self, ADC_FromReg),
0x11 => modrmv!(self, ADC_FromReg),
0x12 => modrmb!(self, ADC_ToReg),
0x13 => modrmv!(self, ADC_ToReg),
0x14 => Mnemonic::ADC_ALIb(ImmediateByte(self.parse_byte())),
0x15 => Mnemonic::ADC_AXIv(ImmediateWord(self.parse_word())),
0x16 => Mnemonic::PUSH_S(SegmentRegister::SS),
0x17 => Mnemonic::POP_S(SegmentRegister::SS),
0x18 => modrmb!(self, SBB_FromReg),
0x19 => modrmv!(self, SBB_FromReg),
0x1A => modrmb!(self, SBB_ToReg),
0x1B => modrmv!(self, SBB_ToReg),
0x1C => Mnemonic::SBB_ALIb(ImmediateByte(self.parse_byte())),
0x1D => Mnemonic::SBB_AXIv(ImmediateWord(self.parse_word())),
0x1E => Mnemonic::PUSH_S(SegmentRegister::DS),
0x1F => Mnemonic::POP_S(SegmentRegister::DS),
0x20 => modrmb!(self, AND_FromReg),
0x21 => modrmv!(self, AND_FromReg),
0x22 => modrmb!(self, AND_ToReg),
0x23 => modrmv!(self, AND_ToReg),
0x24 => Mnemonic::AND_ALIb(ImmediateByte(self.parse_byte())),
0x25 => Mnemonic::AND_AXIv(ImmediateWord(self.parse_word())),
0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES),
0x27 => Mnemonic::DAA,
0x28 => modrmb!(self, SUB_FromReg),
0x29 => modrmv!(self, SUB_FromReg),
0x2A => modrmb!(self, SUB_ToReg),
0x2B => modrmv!(self, SUB_ToReg),
0x2C => Mnemonic::SUB_ALIb(ImmediateByte(self.parse_byte())),
0x2D => Mnemonic::SUB_AXIv(ImmediateWord(self.parse_word())),
0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS),
0x2F => Mnemonic::DAS,
0x30 => modrmb!(self, XOR_FromReg),
0x31 => modrmv!(self, XOR_FromReg),
0x32 => modrmb!(self, XOR_ToReg),
0x33 => modrmv!(self, XOR_ToReg),
0x34 => Mnemonic::XOR_ALIb(ImmediateByte(self.parse_byte())),
0x35 => Mnemonic::XOR_AXIv(ImmediateWord(self.parse_word())),
0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS),
0x37 => Mnemonic::AAA,
0x38 => modrmb!(self, CMP_FromReg),
0x39 => modrmv!(self, CMP_FromReg),
0x3A => modrmb!(self, CMP_ToReg),
0x3B => modrmv!(self, CMP_ToReg),
0x3C => Mnemonic::CMP_ALIb(ImmediateByte(self.parse_byte())),
0x3D => Mnemonic::CMP_AXIv(ImmediateWord(self.parse_word())),
0x3E => Mnemonic::OVERRIDE(SegmentRegister::DS),
0x3F => Mnemonic::AAS,
0x40 => Mnemonic::INC(Register::AX),
0x41 => Mnemonic::INC(Register::CX),
0x42 => Mnemonic::INC(Register::DX),
0x43 => Mnemonic::INC(Register::BX),
0x44 => Mnemonic::INC(Register::SP),
0x45 => Mnemonic::INC(Register::BP),
0x46 => Mnemonic::INC(Register::SI),
0x47 => Mnemonic::INC(Register::DI),
0x48 => Mnemonic::DEC(Register::AX),
0x49 => Mnemonic::DEC(Register::CX),
0x4A => Mnemonic::DEC(Register::DX),
0x4B => Mnemonic::DEC(Register::BX),
0x4C => Mnemonic::DEC(Register::SP),
0x4D => Mnemonic::DEC(Register::BP),
0x4E => Mnemonic::DEC(Register::SI),
0x4F => Mnemonic::DEC(Register::DI),
0x50 => Mnemonic::PUSH_R(Register::AX),
0x51 => Mnemonic::PUSH_R(Register::CX),
0x52 => Mnemonic::PUSH_R(Register::DX),
0x53 => Mnemonic::PUSH_R(Register::BX),
0x54 => Mnemonic::PUSH_R(Register::SP),
0x55 => Mnemonic::PUSH_R(Register::BP),
0x56 => Mnemonic::PUSH_R(Register::SI),
0x57 => Mnemonic::PUSH_R(Register::DI),
0x58 => Mnemonic::POP_R(Register::AX),
0x59 => Mnemonic::POP_R(Register::CX),
0x5A => Mnemonic::POP_R(Register::DX),
0x5B => Mnemonic::POP_R(Register::BX),
0x5C => Mnemonic::POP_R(Register::SP),
0x5D => Mnemonic::POP_R(Register::BP),
0x5E => Mnemonic::POP_R(Register::SI),
0x5F => Mnemonic::POP_R(Register::DI),
0x60..=0x6F => panic!("0x06 to 0x06F is considered undefined."),
0x70 => Mnemonic::JO(ImmediateByte(self.parse_byte())),
0x71 => Mnemonic::JNO(ImmediateByte(self.parse_byte())),
0x72 => Mnemonic::JB(ImmediateByte(self.parse_byte())),
0x73 => Mnemonic::JNB(ImmediateByte(self.parse_byte())),
0x74 => Mnemonic::JZ(ImmediateByte(self.parse_byte())),
0x75 => Mnemonic::JNZ(ImmediateByte(self.parse_byte())),
0x76 => Mnemonic::JBE(ImmediateByte(self.parse_byte())),
0x77 => Mnemonic::JA(ImmediateByte(self.parse_byte())),
0x78 => Mnemonic::JS(ImmediateByte(self.parse_byte())),
0x79 => Mnemonic::JNS(ImmediateByte(self.parse_byte())),
0x7A => Mnemonic::JPE(ImmediateByte(self.parse_byte())),
0x7B => Mnemonic::JPO(ImmediateByte(self.parse_byte())),
0x7C => Mnemonic::JL(ImmediateByte(self.parse_byte())),
0x7D => Mnemonic::JGE(ImmediateByte(self.parse_byte())),
0x7E => Mnemonic::JLE(ImmediateByte(self.parse_byte())),
0x7F => Mnemonic::JG(ImmediateByte(self.parse_byte())),
// 0x80..=0x83 => panic!("GRP1 not implemented"),
0x80 => {
let (target, reg) = self.parse_modrm_byte(OperandSize::Byte(0));
let imm = self.parse_byte();
Self::modrm_reg_to_mnemonic(reg, target, OperandSize::Byte(imm))
}
0x81 => {
let (target, reg) = self.parse_modrm_byte(OperandSize::Word(0));
let imm = self.parse_word();
Self::modrm_reg_to_mnemonic(reg, target, OperandSize::Word(imm))
}
0x82 => panic!("Same as 0x80"),
0x83 => panic!("Sign extented GPR1 not yet implemented"),
0x84 => modrmb!(self, TEST),
0x85 => modrmv!(self, TEST),
0x86 => modrmb!(self, XHCG),
0x87 => modrmv!(self, XHCG),
0x88 => modrmb!(self, MOV_FromReg),
0x89 => modrmv!(self, MOV_FromReg),
0x8A => modrmb!(self, MOV_ToReg),
0x8B => modrmv!(self, MOV_ToReg),
0x8C => modrms!(self, MOV_FromSReg),
0x8E => modrms!(self, MOV_ToSReg),
0x8D => modrmv!(self, LEA),
0xCD => Mnemonic::INT(ImmediateByte(self.parse_byte())),
0xBB => Mnemonic::MOV_BXIv(ImmediateWord(self.parse_word())),
_ => {
eprintln!(
"Encountered unknown self.instructionuction '0x{:x}'",
opcode
);
eprintln!("Encountered unknown instruction '0x{:x}'", opcode);
eprintln!("Offset might be misaligned and data is being interpreted.");
eprintln!("Existing to avoid further misinterpretation...");
exit(1);
@@ -237,6 +428,7 @@ impl Disassembler {
println!("{}", self.instruction);
instructions.push(self.instruction.clone());
self.instruction = Instruction::new();
self.offset += 1;
}
Ok(instructions)