ft: fixed modrm target calculation
While implementing some more mnemonics and testing them, it was clear that modrm parsing was wrong. Now reg to reg and immediates, together with GPR1 interpretation should work as expected. GPR1 interpretation can currently not be merged into the modrm function, as with the current abstraction the REG needs to select the correct mnemonic, for which we need to also know the second operand, which will only be parsed afterwards. But this will be incorporated at some point, this just marks the first working state.
This commit is contained in:
288
src/disasm.rs
288
src/disasm.rs
@@ -2,12 +2,12 @@ use core::fmt;
|
|||||||
use std::{fs::File, io::Read, process::exit};
|
use std::{fs::File, io::Read, process::exit};
|
||||||
|
|
||||||
use crate::aout::Aout;
|
use crate::aout::Aout;
|
||||||
use crate::instructions::{Displacement, MemoryIndex, RegisterId, SegmentRegister};
|
use crate::instructions::{MemoryIndex, ModRmTarget, OperandSize, RegisterId, SegmentRegister};
|
||||||
use crate::modrm;
|
|
||||||
use crate::{
|
use crate::{
|
||||||
Args,
|
Args,
|
||||||
instructions::{ImmediateByte, ImmediateWord, Instruction, Opcode, Register},
|
instructions::{ImmediateByte, ImmediateWord, Instruction, Mnemonic, Register},
|
||||||
};
|
};
|
||||||
|
use crate::{modrmb, modrms, modrmv};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
/// Generic errors, which are encountered during parsing.
|
/// Generic errors, which are encountered during parsing.
|
||||||
@@ -78,8 +78,6 @@ impl Disassembler {
|
|||||||
// advance to operand
|
// advance to operand
|
||||||
self.offset += 1;
|
self.offset += 1;
|
||||||
let byte = self.text[self.offset];
|
let byte = self.text[self.offset];
|
||||||
// jump to right after operand
|
|
||||||
self.offset += 1;
|
|
||||||
self.instruction.raw.push(byte);
|
self.instruction.raw.push(byte);
|
||||||
byte
|
byte
|
||||||
}
|
}
|
||||||
@@ -91,51 +89,69 @@ impl Disassembler {
|
|||||||
self.offset += 1;
|
self.offset += 1;
|
||||||
let byte1 = self.text[self.offset];
|
let byte1 = self.text[self.offset];
|
||||||
let byte2 = self.text[self.offset + 1];
|
let byte2 = self.text[self.offset + 1];
|
||||||
// jump right after operand
|
// jump onto last operand
|
||||||
self.offset += 2;
|
self.offset += 1;
|
||||||
self.instruction.raw.push(byte1);
|
self.instruction.raw.push(byte1);
|
||||||
self.instruction.raw.push(byte2);
|
self.instruction.raw.push(byte2);
|
||||||
u16::from_le_bytes([byte1, byte2])
|
u16::from_le_bytes([byte1, byte2])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Takes in a modrm byte and returns mod, reg and r/m.
|
||||||
|
fn deconstruct_modrm_byte(modrm: u8) -> (u8, u8, u8) {
|
||||||
|
let mode = (modrm >> 6) & 0b11;
|
||||||
|
let reg = (modrm >> 3) & 0b111;
|
||||||
|
let rm = modrm & 0b111;
|
||||||
|
|
||||||
|
(mode, reg, rm)
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset.
|
/// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset.
|
||||||
/// Returns the parsed modrm memory access and the source register
|
/// Returns the parsed modrm target and the source register
|
||||||
pub fn parse_modrm_byte(&mut self) -> (MemoryIndex, RegisterId) {
|
pub fn parse_modrm_byte(&mut self, size: OperandSize) -> (ModRmTarget, RegisterId) {
|
||||||
// advance to operand
|
// advance to operand
|
||||||
self.offset += 1;
|
self.offset += 1;
|
||||||
let modrm = self.text[self.offset];
|
let modrm = self.text[self.offset];
|
||||||
self.instruction.raw.push(modrm);
|
self.instruction.raw.push(modrm);
|
||||||
// jump right after operand
|
|
||||||
self.offset += 1;
|
|
||||||
|
|
||||||
// Calculate ModRM byte with bitmask
|
let (mode, reg, rm) = Self::deconstruct_modrm_byte(modrm);
|
||||||
let modulo = modrm >> 6;
|
|
||||||
let reg_id = (modrm >> 3) & 7;
|
|
||||||
let rm = modrm & 7;
|
|
||||||
|
|
||||||
let displacement = match modulo {
|
log::debug!(
|
||||||
|
"0x{:04x} deconstructed into: 0b{:b}, 0b{:b}, 0b{:b}",
|
||||||
|
modrm,
|
||||||
|
mode,
|
||||||
|
reg,
|
||||||
|
rm
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut displacement = None;
|
||||||
|
match mode {
|
||||||
0b00 => {
|
0b00 => {
|
||||||
if rm == 0b110 {
|
if rm == 0b110 {
|
||||||
log::debug!("Additional word during ModRM parsing was read with mod 0.");
|
log::debug!("Additional word during ModRM parsing was read with mod 0.");
|
||||||
Some(Displacement::Word(self.parse_word()))
|
displacement = Some(OperandSize::Word(self.parse_word()));
|
||||||
} else {
|
} else {
|
||||||
None
|
displacement = None;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
0b01 => {
|
0b01 => {
|
||||||
log::debug!("Additional byte during ModRM parsing was read.");
|
log::debug!("Additional byte during ModRM parsing was read.");
|
||||||
Some(Displacement::Byte(self.parse_byte()))
|
displacement = Some(OperandSize::Byte(self.parse_byte()))
|
||||||
}
|
}
|
||||||
0b10 => {
|
0b10 => {
|
||||||
log::debug!("Additional word during ModRM parsing was read.");
|
log::debug!("Additional word during ModRM parsing was read.");
|
||||||
Some(Displacement::Word(self.parse_word()))
|
displacement = Some(OperandSize::Word(self.parse_word()));
|
||||||
}
|
}
|
||||||
0b11 => {
|
0b11 => {
|
||||||
// XXX is this correct?
|
log::debug!("ModRM to reg");
|
||||||
log::debug!(
|
let target = match size {
|
||||||
"No displacement, as reg to reg - maybe some implementation is missing here"
|
OperandSize::Byte(_) => {
|
||||||
);
|
ModRmTarget::Register(Register::by_id(OperandSize::Byte(rm)))
|
||||||
None
|
}
|
||||||
|
OperandSize::Word(_) => {
|
||||||
|
ModRmTarget::Register(Register::by_id(OperandSize::Word(rm.into())))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
return (target, reg);
|
||||||
}
|
}
|
||||||
_ => panic!("Invalid ModRM byte encountered"),
|
_ => panic!("Invalid ModRM byte encountered"),
|
||||||
};
|
};
|
||||||
@@ -184,7 +200,35 @@ impl Disassembler {
|
|||||||
_ => panic!("Invalid ModRM byte encountered"),
|
_ => panic!("Invalid ModRM byte encountered"),
|
||||||
};
|
};
|
||||||
|
|
||||||
(index, reg_id)
|
(ModRmTarget::Memory(index), reg)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Match the modrm reg bits to the GPR1 mnemonics.
|
||||||
|
pub fn modrm_reg_to_mnemonic(reg: u8, target: ModRmTarget, imm: OperandSize) -> Mnemonic {
|
||||||
|
match imm {
|
||||||
|
OperandSize::Byte(b) => match reg {
|
||||||
|
0b000 => Mnemonic::ADD_Ib(target, ImmediateByte(b)),
|
||||||
|
0b001 => Mnemonic::OR_Ib(target, ImmediateByte(b)),
|
||||||
|
0b010 => Mnemonic::ADC_Ib(target, ImmediateByte(b)),
|
||||||
|
0b011 => Mnemonic::SBB_Ib(target, ImmediateByte(b)),
|
||||||
|
0b100 => Mnemonic::AND_Ib(target, ImmediateByte(b)),
|
||||||
|
0b101 => Mnemonic::SUB_Ib(target, ImmediateByte(b)),
|
||||||
|
0b110 => Mnemonic::XOR_Ib(target, ImmediateByte(b)),
|
||||||
|
0b111 => Mnemonic::CMP_Ib(target, ImmediateByte(b)),
|
||||||
|
_ => panic!("Illegal GPR1 mnemonic"),
|
||||||
|
},
|
||||||
|
OperandSize::Word(w) => match reg {
|
||||||
|
0b000 => Mnemonic::ADD_Iv(target, ImmediateWord(w)),
|
||||||
|
0b001 => Mnemonic::OR_Iv(target, ImmediateWord(w)),
|
||||||
|
0b010 => Mnemonic::ADC_Iv(target, ImmediateWord(w)),
|
||||||
|
0b011 => Mnemonic::SBB_Iv(target, ImmediateWord(w)),
|
||||||
|
0b100 => Mnemonic::AND_Iv(target, ImmediateWord(w)),
|
||||||
|
0b101 => Mnemonic::SUB_Iv(target, ImmediateWord(w)),
|
||||||
|
0b110 => Mnemonic::XOR_Iv(target, ImmediateWord(w)),
|
||||||
|
0b111 => Mnemonic::CMP_Iv(target, ImmediateWord(w)),
|
||||||
|
_ => panic!("Illegal GPR1 mnemonic"),
|
||||||
|
},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Decode instructions from the text section of the provided binary
|
/// Decode instructions from the text section of the provided binary
|
||||||
@@ -204,30 +248,177 @@ impl Disassembler {
|
|||||||
|
|
||||||
// additional raw bytes will be pushed by parse functions
|
// additional raw bytes will be pushed by parse functions
|
||||||
self.instruction.raw.push(opcode);
|
self.instruction.raw.push(opcode);
|
||||||
|
// XXX: convert this copy and paste horror into a proc macro
|
||||||
self.instruction.opcode = match opcode {
|
self.instruction.opcode = match opcode {
|
||||||
// ADD
|
0x00 => modrmb!(self, ADD_FromReg),
|
||||||
0x00 => modrm!(self, ADD_EbGb),
|
0x01 => modrmv!(self, ADD_FromReg),
|
||||||
0x01 => modrm!(self, ADD_EvGv),
|
0x02 => modrmb!(self, ADD_ToReg),
|
||||||
0x02 => modrm!(self, ADD_GbEb),
|
0x03 => modrmv!(self, ADD_ToReg),
|
||||||
0x03 => modrm!(self, ADD_GvEv),
|
0x04 => Mnemonic::ADD_ALIb(ImmediateByte(self.parse_byte())),
|
||||||
0x04 => Opcode::ADD_ALIb(ImmediateByte(self.parse_byte())),
|
0x05 => Mnemonic::ADD_AXIv(ImmediateWord(self.parse_word())),
|
||||||
0x05 => Opcode::ADD_AXIv(ImmediateWord(self.parse_word())),
|
|
||||||
// PUSH
|
0x06 => Mnemonic::PUSH_S(SegmentRegister::ES),
|
||||||
0x06 => Opcode::PUSH(SegmentRegister::by_id(self.parse_modrm_byte().1)),
|
0x07 => Mnemonic::POP_S(SegmentRegister::ES),
|
||||||
// POP
|
|
||||||
0x07 => Opcode::POP(SegmentRegister::by_id(self.parse_modrm_byte().1)),
|
0x08 => modrmb!(self, OR_FromReg),
|
||||||
// OR
|
0x09 => modrmv!(self, OR_FromReg),
|
||||||
0x08 => modrm!(self, OR_EbGb),
|
0x0A => modrmb!(self, OR_ToReg),
|
||||||
0x0A => modrm!(self, OR_GbEb),
|
0x0B => modrmv!(self, OR_ToReg),
|
||||||
// INT
|
0x0C => Mnemonic::OR_ALIb(ImmediateByte(self.parse_byte())),
|
||||||
0xCD => Opcode::INT(ImmediateByte(self.parse_byte())),
|
0x0D => Mnemonic::OR_AXIv(ImmediateWord(self.parse_word())),
|
||||||
// MOV
|
|
||||||
0xBB => Opcode::MOV_BXIv(ImmediateWord(self.parse_word())),
|
0x0E => Mnemonic::PUSH_S(SegmentRegister::CS),
|
||||||
|
0x0F => panic!("Opcode 0x0F (POP CS) is considered undefined"),
|
||||||
|
|
||||||
|
0x10 => modrmb!(self, ADC_FromReg),
|
||||||
|
0x11 => modrmv!(self, ADC_FromReg),
|
||||||
|
0x12 => modrmb!(self, ADC_ToReg),
|
||||||
|
0x13 => modrmv!(self, ADC_ToReg),
|
||||||
|
0x14 => Mnemonic::ADC_ALIb(ImmediateByte(self.parse_byte())),
|
||||||
|
0x15 => Mnemonic::ADC_AXIv(ImmediateWord(self.parse_word())),
|
||||||
|
|
||||||
|
0x16 => Mnemonic::PUSH_S(SegmentRegister::SS),
|
||||||
|
0x17 => Mnemonic::POP_S(SegmentRegister::SS),
|
||||||
|
|
||||||
|
0x18 => modrmb!(self, SBB_FromReg),
|
||||||
|
0x19 => modrmv!(self, SBB_FromReg),
|
||||||
|
0x1A => modrmb!(self, SBB_ToReg),
|
||||||
|
0x1B => modrmv!(self, SBB_ToReg),
|
||||||
|
0x1C => Mnemonic::SBB_ALIb(ImmediateByte(self.parse_byte())),
|
||||||
|
0x1D => Mnemonic::SBB_AXIv(ImmediateWord(self.parse_word())),
|
||||||
|
|
||||||
|
0x1E => Mnemonic::PUSH_S(SegmentRegister::DS),
|
||||||
|
0x1F => Mnemonic::POP_S(SegmentRegister::DS),
|
||||||
|
|
||||||
|
0x20 => modrmb!(self, AND_FromReg),
|
||||||
|
0x21 => modrmv!(self, AND_FromReg),
|
||||||
|
0x22 => modrmb!(self, AND_ToReg),
|
||||||
|
0x23 => modrmv!(self, AND_ToReg),
|
||||||
|
0x24 => Mnemonic::AND_ALIb(ImmediateByte(self.parse_byte())),
|
||||||
|
0x25 => Mnemonic::AND_AXIv(ImmediateWord(self.parse_word())),
|
||||||
|
|
||||||
|
0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES),
|
||||||
|
0x27 => Mnemonic::DAA,
|
||||||
|
|
||||||
|
0x28 => modrmb!(self, SUB_FromReg),
|
||||||
|
0x29 => modrmv!(self, SUB_FromReg),
|
||||||
|
0x2A => modrmb!(self, SUB_ToReg),
|
||||||
|
0x2B => modrmv!(self, SUB_ToReg),
|
||||||
|
0x2C => Mnemonic::SUB_ALIb(ImmediateByte(self.parse_byte())),
|
||||||
|
0x2D => Mnemonic::SUB_AXIv(ImmediateWord(self.parse_word())),
|
||||||
|
|
||||||
|
0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS),
|
||||||
|
0x2F => Mnemonic::DAS,
|
||||||
|
|
||||||
|
0x30 => modrmb!(self, XOR_FromReg),
|
||||||
|
0x31 => modrmv!(self, XOR_FromReg),
|
||||||
|
0x32 => modrmb!(self, XOR_ToReg),
|
||||||
|
0x33 => modrmv!(self, XOR_ToReg),
|
||||||
|
0x34 => Mnemonic::XOR_ALIb(ImmediateByte(self.parse_byte())),
|
||||||
|
0x35 => Mnemonic::XOR_AXIv(ImmediateWord(self.parse_word())),
|
||||||
|
|
||||||
|
0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS),
|
||||||
|
0x37 => Mnemonic::AAA,
|
||||||
|
|
||||||
|
0x38 => modrmb!(self, CMP_FromReg),
|
||||||
|
0x39 => modrmv!(self, CMP_FromReg),
|
||||||
|
0x3A => modrmb!(self, CMP_ToReg),
|
||||||
|
0x3B => modrmv!(self, CMP_ToReg),
|
||||||
|
0x3C => Mnemonic::CMP_ALIb(ImmediateByte(self.parse_byte())),
|
||||||
|
0x3D => Mnemonic::CMP_AXIv(ImmediateWord(self.parse_word())),
|
||||||
|
|
||||||
|
0x3E => Mnemonic::OVERRIDE(SegmentRegister::DS),
|
||||||
|
0x3F => Mnemonic::AAS,
|
||||||
|
|
||||||
|
0x40 => Mnemonic::INC(Register::AX),
|
||||||
|
0x41 => Mnemonic::INC(Register::CX),
|
||||||
|
0x42 => Mnemonic::INC(Register::DX),
|
||||||
|
0x43 => Mnemonic::INC(Register::BX),
|
||||||
|
0x44 => Mnemonic::INC(Register::SP),
|
||||||
|
0x45 => Mnemonic::INC(Register::BP),
|
||||||
|
0x46 => Mnemonic::INC(Register::SI),
|
||||||
|
0x47 => Mnemonic::INC(Register::DI),
|
||||||
|
|
||||||
|
0x48 => Mnemonic::DEC(Register::AX),
|
||||||
|
0x49 => Mnemonic::DEC(Register::CX),
|
||||||
|
0x4A => Mnemonic::DEC(Register::DX),
|
||||||
|
0x4B => Mnemonic::DEC(Register::BX),
|
||||||
|
0x4C => Mnemonic::DEC(Register::SP),
|
||||||
|
0x4D => Mnemonic::DEC(Register::BP),
|
||||||
|
0x4E => Mnemonic::DEC(Register::SI),
|
||||||
|
0x4F => Mnemonic::DEC(Register::DI),
|
||||||
|
|
||||||
|
0x50 => Mnemonic::PUSH_R(Register::AX),
|
||||||
|
0x51 => Mnemonic::PUSH_R(Register::CX),
|
||||||
|
0x52 => Mnemonic::PUSH_R(Register::DX),
|
||||||
|
0x53 => Mnemonic::PUSH_R(Register::BX),
|
||||||
|
0x54 => Mnemonic::PUSH_R(Register::SP),
|
||||||
|
0x55 => Mnemonic::PUSH_R(Register::BP),
|
||||||
|
0x56 => Mnemonic::PUSH_R(Register::SI),
|
||||||
|
0x57 => Mnemonic::PUSH_R(Register::DI),
|
||||||
|
|
||||||
|
0x58 => Mnemonic::POP_R(Register::AX),
|
||||||
|
0x59 => Mnemonic::POP_R(Register::CX),
|
||||||
|
0x5A => Mnemonic::POP_R(Register::DX),
|
||||||
|
0x5B => Mnemonic::POP_R(Register::BX),
|
||||||
|
0x5C => Mnemonic::POP_R(Register::SP),
|
||||||
|
0x5D => Mnemonic::POP_R(Register::BP),
|
||||||
|
0x5E => Mnemonic::POP_R(Register::SI),
|
||||||
|
0x5F => Mnemonic::POP_R(Register::DI),
|
||||||
|
|
||||||
|
0x60..=0x6F => panic!("0x06 to 0x06F is considered undefined."),
|
||||||
|
|
||||||
|
0x70 => Mnemonic::JO(ImmediateByte(self.parse_byte())),
|
||||||
|
0x71 => Mnemonic::JNO(ImmediateByte(self.parse_byte())),
|
||||||
|
0x72 => Mnemonic::JB(ImmediateByte(self.parse_byte())),
|
||||||
|
0x73 => Mnemonic::JNB(ImmediateByte(self.parse_byte())),
|
||||||
|
0x74 => Mnemonic::JZ(ImmediateByte(self.parse_byte())),
|
||||||
|
0x75 => Mnemonic::JNZ(ImmediateByte(self.parse_byte())),
|
||||||
|
0x76 => Mnemonic::JBE(ImmediateByte(self.parse_byte())),
|
||||||
|
0x77 => Mnemonic::JA(ImmediateByte(self.parse_byte())),
|
||||||
|
0x78 => Mnemonic::JS(ImmediateByte(self.parse_byte())),
|
||||||
|
0x79 => Mnemonic::JNS(ImmediateByte(self.parse_byte())),
|
||||||
|
0x7A => Mnemonic::JPE(ImmediateByte(self.parse_byte())),
|
||||||
|
0x7B => Mnemonic::JPO(ImmediateByte(self.parse_byte())),
|
||||||
|
0x7C => Mnemonic::JL(ImmediateByte(self.parse_byte())),
|
||||||
|
0x7D => Mnemonic::JGE(ImmediateByte(self.parse_byte())),
|
||||||
|
0x7E => Mnemonic::JLE(ImmediateByte(self.parse_byte())),
|
||||||
|
0x7F => Mnemonic::JG(ImmediateByte(self.parse_byte())),
|
||||||
|
|
||||||
|
// 0x80..=0x83 => panic!("GRP1 not implemented"),
|
||||||
|
0x80 => {
|
||||||
|
let (target, reg) = self.parse_modrm_byte(OperandSize::Byte(0));
|
||||||
|
let imm = self.parse_byte();
|
||||||
|
Self::modrm_reg_to_mnemonic(reg, target, OperandSize::Byte(imm))
|
||||||
|
}
|
||||||
|
0x81 => {
|
||||||
|
let (target, reg) = self.parse_modrm_byte(OperandSize::Word(0));
|
||||||
|
let imm = self.parse_word();
|
||||||
|
Self::modrm_reg_to_mnemonic(reg, target, OperandSize::Word(imm))
|
||||||
|
}
|
||||||
|
0x82 => panic!("Same as 0x80"),
|
||||||
|
0x83 => panic!("Sign extented GPR1 not yet implemented"),
|
||||||
|
|
||||||
|
0x84 => modrmb!(self, TEST),
|
||||||
|
0x85 => modrmv!(self, TEST),
|
||||||
|
|
||||||
|
0x86 => modrmb!(self, XHCG),
|
||||||
|
0x87 => modrmv!(self, XHCG),
|
||||||
|
|
||||||
|
0x88 => modrmb!(self, MOV_FromReg),
|
||||||
|
0x89 => modrmv!(self, MOV_FromReg),
|
||||||
|
0x8A => modrmb!(self, MOV_ToReg),
|
||||||
|
0x8B => modrmv!(self, MOV_ToReg),
|
||||||
|
0x8C => modrms!(self, MOV_FromSReg),
|
||||||
|
0x8E => modrms!(self, MOV_ToSReg),
|
||||||
|
|
||||||
|
0x8D => modrmv!(self, LEA),
|
||||||
|
|
||||||
|
0xCD => Mnemonic::INT(ImmediateByte(self.parse_byte())),
|
||||||
|
|
||||||
|
0xBB => Mnemonic::MOV_BXIv(ImmediateWord(self.parse_word())),
|
||||||
_ => {
|
_ => {
|
||||||
eprintln!(
|
eprintln!("Encountered unknown instruction '0x{:x}'", opcode);
|
||||||
"Encountered unknown self.instructionuction '0x{:x}'",
|
|
||||||
opcode
|
|
||||||
);
|
|
||||||
eprintln!("Offset might be misaligned and data is being interpreted.");
|
eprintln!("Offset might be misaligned and data is being interpreted.");
|
||||||
eprintln!("Existing to avoid further misinterpretation...");
|
eprintln!("Existing to avoid further misinterpretation...");
|
||||||
exit(1);
|
exit(1);
|
||||||
@@ -237,6 +428,7 @@ impl Disassembler {
|
|||||||
println!("{}", self.instruction);
|
println!("{}", self.instruction);
|
||||||
instructions.push(self.instruction.clone());
|
instructions.push(self.instruction.clone());
|
||||||
self.instruction = Instruction::new();
|
self.instruction = Instruction::new();
|
||||||
|
self.offset += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(instructions)
|
Ok(instructions)
|
||||||
|
|||||||
@@ -1,8 +1,26 @@
|
|||||||
#[macro_export]
|
#[macro_export]
|
||||||
/// Generate an Opcode for 'normal' ModRM instructions with mem access and a reg
|
/// Generate a byte Opcode for 'normal' ModRM instructions with mem access and a reg
|
||||||
macro_rules! modrm {
|
macro_rules! modrmb {
|
||||||
($self:ident, $variant:ident) => {{
|
($self:ident, $variant:ident) => {{
|
||||||
let (idx, reg) = $self.parse_modrm_byte();
|
let (target, reg) = $self.parse_modrm_byte(OperandSize::Byte(0));
|
||||||
Opcode::$variant(idx, Register::by_id(reg))
|
Mnemonic::$variant(target, Register::by_id(OperandSize::Byte(reg)))
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
|
#[macro_export]
|
||||||
|
/// Generate a word Opcode for 'normal' ModRM instructions with mem access and a reg
|
||||||
|
macro_rules! modrmv {
|
||||||
|
($self:ident, $variant:ident) => {{
|
||||||
|
let (target, reg) = $self.parse_modrm_byte(OperandSize::Word(0));
|
||||||
|
Mnemonic::$variant(target, Register::by_id(OperandSize::Word(reg.into())))
|
||||||
|
}};
|
||||||
|
}
|
||||||
|
|
||||||
|
#[macro_export]
|
||||||
|
/// Generate a word Opcode for 'normal' ModRM instructions with mem access and a segment reg
|
||||||
|
macro_rules! modrms {
|
||||||
|
($self:ident, $variant:ident) => {{
|
||||||
|
let (target, reg) = $self.parse_modrm_byte(OperandSize::Word(0));
|
||||||
|
Mnemonic::$variant(target, SegmentRegister::by_id(reg))
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -12,9 +12,9 @@ pub type w = u16;
|
|||||||
/// contains the `Opcode` that will be executed, alongside its starting offset
|
/// contains the `Opcode` that will be executed, alongside its starting offset
|
||||||
/// and the raw parsed bytes
|
/// and the raw parsed bytes
|
||||||
pub struct Instruction {
|
pub struct Instruction {
|
||||||
pub start: usize, // location of the instruction start
|
pub start: usize, // location of the instruction start
|
||||||
pub raw: Vec<u8>, // raw value of instruction
|
pub raw: Vec<u8>, // raw value of instruction
|
||||||
pub opcode: Opcode, // actual instruction
|
pub opcode: Mnemonic, // actual instruction
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Instruction {
|
impl Instruction {
|
||||||
@@ -22,7 +22,7 @@ impl Instruction {
|
|||||||
Instruction {
|
Instruction {
|
||||||
start: 0,
|
start: 0,
|
||||||
raw: Vec::new(),
|
raw: Vec::new(),
|
||||||
opcode: Opcode::NOP(),
|
opcode: Mnemonic::NOP(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -39,61 +39,163 @@ impl fmt::Display for Instruction {
|
|||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
#[allow(dead_code, non_camel_case_types)]
|
#[allow(dead_code, non_camel_case_types)]
|
||||||
pub enum Opcode {
|
/// All possible opcode variantions.
|
||||||
|
// XXX: convert this copy and paste horror in a proc macro like
|
||||||
|
// enum Opcode {
|
||||||
|
// #[derive(default_variations)]
|
||||||
|
// ADD,
|
||||||
|
// ...
|
||||||
|
// }
|
||||||
|
// which then add all variants and also create the matching logic for
|
||||||
|
// src/disasm.rs decode_instructions()
|
||||||
|
pub enum Mnemonic {
|
||||||
NOP(),
|
NOP(),
|
||||||
// ADD
|
// ADD
|
||||||
ADD_EbGb(MemoryIndex, Register),
|
ADD_FromReg(ModRmTarget, Register),
|
||||||
ADD_EvGv(MemoryIndex, Register),
|
ADD_ToReg(ModRmTarget, Register),
|
||||||
ADD_GbEb(MemoryIndex, Register),
|
ADD_Ib(ModRmTarget, ImmediateByte),
|
||||||
ADD_GvEv(MemoryIndex, Register),
|
ADD_Iv(ModRmTarget, ImmediateWord),
|
||||||
ADD_ALIb(ImmediateByte),
|
ADD_ALIb(ImmediateByte),
|
||||||
ADD_AXIv(ImmediateWord),
|
ADD_AXIv(ImmediateWord),
|
||||||
// PUSH
|
// PUSH
|
||||||
PUSH(SegmentRegister),
|
PUSH_R(Register),
|
||||||
|
PUSH_S(SegmentRegister),
|
||||||
// POP
|
// POP
|
||||||
POP(SegmentRegister),
|
POP_S(SegmentRegister),
|
||||||
|
POP_R(Register),
|
||||||
// OR
|
// OR
|
||||||
OR_EbGb(MemoryIndex, Register),
|
OR_FromReg(ModRmTarget, Register),
|
||||||
OR_GbEb(MemoryIndex, Register),
|
OR_ToReg(ModRmTarget, Register),
|
||||||
|
OR_Ib(ModRmTarget, ImmediateByte),
|
||||||
|
OR_Iv(ModRmTarget, ImmediateWord),
|
||||||
|
OR_ALIb(ImmediateByte),
|
||||||
|
OR_AXIv(ImmediateWord),
|
||||||
|
// ADC
|
||||||
|
ADC_FromReg(ModRmTarget, Register),
|
||||||
|
ADC_ToReg(ModRmTarget, Register),
|
||||||
|
ADC_Ib(ModRmTarget, ImmediateByte),
|
||||||
|
ADC_Iv(ModRmTarget, ImmediateWord),
|
||||||
|
ADC_ALIb(ImmediateByte),
|
||||||
|
ADC_AXIv(ImmediateWord),
|
||||||
|
// SBB
|
||||||
|
SBB_FromReg(ModRmTarget, Register),
|
||||||
|
SBB_ToReg(ModRmTarget, Register),
|
||||||
|
SBB_Ib(ModRmTarget, ImmediateByte),
|
||||||
|
SBB_Iv(ModRmTarget, ImmediateWord),
|
||||||
|
SBB_ALIb(ImmediateByte),
|
||||||
|
SBB_AXIv(ImmediateWord),
|
||||||
|
// AND
|
||||||
|
AND_FromReg(ModRmTarget, Register),
|
||||||
|
AND_ToReg(ModRmTarget, Register),
|
||||||
|
AND_Ib(ModRmTarget, ImmediateByte),
|
||||||
|
AND_Iv(ModRmTarget, ImmediateWord),
|
||||||
|
AND_ALIb(ImmediateByte),
|
||||||
|
AND_AXIv(ImmediateWord),
|
||||||
|
// Override
|
||||||
|
OVERRIDE(SegmentRegister),
|
||||||
|
// Decimal Adjustment
|
||||||
|
DAA,
|
||||||
|
DAS,
|
||||||
|
AAA,
|
||||||
|
AAS,
|
||||||
|
// SUB
|
||||||
|
SUB_FromReg(ModRmTarget, Register),
|
||||||
|
SUB_ToReg(ModRmTarget, Register),
|
||||||
|
SUB_Ib(ModRmTarget, ImmediateByte),
|
||||||
|
SUB_Iv(ModRmTarget, ImmediateWord),
|
||||||
|
SUB_ALIb(ImmediateByte),
|
||||||
|
SUB_AXIv(ImmediateWord),
|
||||||
|
// XOR
|
||||||
|
XOR_FromReg(ModRmTarget, Register),
|
||||||
|
XOR_ToReg(ModRmTarget, Register),
|
||||||
|
XOR_Ib(ModRmTarget, ImmediateByte),
|
||||||
|
XOR_Iv(ModRmTarget, ImmediateWord),
|
||||||
|
XOR_ALIb(ImmediateByte),
|
||||||
|
XOR_AXIv(ImmediateWord),
|
||||||
|
// CMP
|
||||||
|
CMP_FromReg(ModRmTarget, Register),
|
||||||
|
CMP_ToReg(ModRmTarget, Register),
|
||||||
|
CMP_Ib(ModRmTarget, ImmediateByte),
|
||||||
|
CMP_Iv(ModRmTarget, ImmediateWord),
|
||||||
|
CMP_ALIb(ImmediateByte),
|
||||||
|
CMP_AXIv(ImmediateWord),
|
||||||
|
// INC
|
||||||
|
INC(Register),
|
||||||
|
// DEC
|
||||||
|
DEC(Register),
|
||||||
|
// Jumps
|
||||||
|
JO(ImmediateByte),
|
||||||
|
JNO(ImmediateByte),
|
||||||
|
JB(ImmediateByte),
|
||||||
|
JNB(ImmediateByte),
|
||||||
|
JZ(ImmediateByte),
|
||||||
|
JNZ(ImmediateByte),
|
||||||
|
JBE(ImmediateByte),
|
||||||
|
JA(ImmediateByte),
|
||||||
|
JS(ImmediateByte),
|
||||||
|
JNS(ImmediateByte),
|
||||||
|
JPE(ImmediateByte),
|
||||||
|
JPO(ImmediateByte),
|
||||||
|
JL(ImmediateByte),
|
||||||
|
JGE(ImmediateByte),
|
||||||
|
JLE(ImmediateByte),
|
||||||
|
JG(ImmediateByte),
|
||||||
|
// TEST
|
||||||
|
TEST(ModRmTarget, Register),
|
||||||
|
//XHCG
|
||||||
|
XHCG(ModRmTarget, Register),
|
||||||
|
// MOV
|
||||||
|
MOV_FromReg(ModRmTarget, Register),
|
||||||
|
MOV_ToReg(ModRmTarget, Register),
|
||||||
|
MOV_FromSReg(ModRmTarget, SegmentRegister),
|
||||||
|
MOV_ToSReg(ModRmTarget, SegmentRegister),
|
||||||
|
MOV_BXIv(ImmediateWord),
|
||||||
|
// LEA
|
||||||
|
LEA(ModRmTarget, Register),
|
||||||
// INT
|
// INT
|
||||||
INT(ImmediateByte),
|
INT(ImmediateByte),
|
||||||
// MOV
|
|
||||||
MOV_BXIv(ImmediateWord),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for Opcode {
|
impl fmt::Display for Mnemonic {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Self::INT(byte) => write!(f, "INT, {:x}", byte),
|
Self::INT(byte) => write!(f, "INT, {:x}", byte),
|
||||||
Self::ADD_EbGb(mem, reg) => write!(f, "ADD {}, {}", mem, reg),
|
Self::ADD_FromReg(mem, reg) => write!(f, "ADD {}, {}", mem, reg),
|
||||||
Self::ADD_GbEb(mem, reg) => write!(f, "ADD {}, {}", reg, mem),
|
Self::ADD_ToReg(mem, reg) => write!(f, "ADD {}, {}", reg, mem),
|
||||||
|
Self::CMP_Iv(mem, imm) => write!(f, "CMP {}, {:04x}", mem, imm),
|
||||||
|
Self::LEA(mem, reg) => write!(f, "LEA {}, {}", reg, mem),
|
||||||
Self::MOV_BXIv(word) => write!(f, "MOV BX, {:04x}", word),
|
Self::MOV_BXIv(word) => write!(f, "MOV BX, {:04x}", word),
|
||||||
_ => write!(f, "display not yet implemented"),
|
Self::XOR_FromReg(mem, reg) => write!(f, "XOR {}, {}", mem, reg),
|
||||||
|
_ => write!(f, "??? ??, ??"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Registers of a 8086 processor
|
/// Registers of a 8086 processor
|
||||||
/// -x are 16bit, -l are 8bit
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub enum Register {
|
pub enum Register {
|
||||||
AX,
|
// 8 bit
|
||||||
BX,
|
// low bytes
|
||||||
CX,
|
|
||||||
DX,
|
|
||||||
AH,
|
|
||||||
AL,
|
AL,
|
||||||
BL,
|
|
||||||
BH,
|
|
||||||
CH,
|
|
||||||
CL,
|
CL,
|
||||||
DH,
|
|
||||||
DL,
|
DL,
|
||||||
DI,
|
BL,
|
||||||
SI,
|
// high bytes
|
||||||
BP,
|
AH,
|
||||||
SP,
|
CH,
|
||||||
|
DH,
|
||||||
|
BH,
|
||||||
|
|
||||||
|
// 16 bit
|
||||||
|
AX, // accumulator
|
||||||
|
CX, // counter
|
||||||
|
DX, // data
|
||||||
|
BX, // base
|
||||||
|
SP, // stack pointer
|
||||||
|
BP, // base pointer
|
||||||
|
SI, // source index
|
||||||
|
DI, // base index
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Selector for Register or Segment Register
|
/// Selector for Register or Segment Register
|
||||||
@@ -102,25 +204,30 @@ pub type RegisterId = u8;
|
|||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
impl Register {
|
impl Register {
|
||||||
/// Find the register corresponding to the 8086 bytecode ID
|
/// Find the register corresponding to the 8086 bytecode ID
|
||||||
pub fn by_id(id: RegisterId) -> Self {
|
pub fn by_id(id: OperandSize) -> Self {
|
||||||
match id {
|
match id {
|
||||||
0x00 => Self::AL,
|
OperandSize::Byte(b) => match b {
|
||||||
0x01 => Self::CL,
|
0b000 => Self::AL,
|
||||||
0x02 => Self::DL,
|
0b001 => Self::CL,
|
||||||
0x03 => Self::BL,
|
0b010 => Self::DL,
|
||||||
0x04 => Self::AH,
|
0b011 => Self::BL,
|
||||||
0x05 => Self::CH,
|
0b100 => Self::AH,
|
||||||
0x06 => Self::DH,
|
0b101 => Self::CH,
|
||||||
0x07 => Self::BH,
|
0b110 => Self::DH,
|
||||||
0x10 => Self::AX,
|
0b111 => Self::BH,
|
||||||
0x11 => Self::CX,
|
_ => panic!("Invalid 8bit register ID encountered"),
|
||||||
0x12 => Self::DX,
|
},
|
||||||
0x13 => Self::BX,
|
OperandSize::Word(w) => match w {
|
||||||
0x14 => Self::SP,
|
0b000 => Self::AX,
|
||||||
0x15 => Self::BP,
|
0b001 => Self::CX,
|
||||||
0x16 => Self::SI,
|
0b010 => Self::DX,
|
||||||
0x17 => Self::DI,
|
0b011 => Self::BX,
|
||||||
_ => panic!("Invalid register ID encountered"),
|
0b100 => Self::SP,
|
||||||
|
0b101 => Self::BP,
|
||||||
|
0b110 => Self::SI,
|
||||||
|
0b111 => Self::DI,
|
||||||
|
_ => panic!("Invalid 16bit register ID encountered"),
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -163,10 +270,10 @@ impl SegmentRegister {
|
|||||||
/// Find the SRegister corresponding to the 8086 bytecode ID
|
/// Find the SRegister corresponding to the 8086 bytecode ID
|
||||||
pub fn by_id(id: u8) -> Self {
|
pub fn by_id(id: u8) -> Self {
|
||||||
match id {
|
match id {
|
||||||
0x30 => Self::ES,
|
0x00 => Self::ES,
|
||||||
0x31 => Self::CS,
|
0x01 => Self::CS,
|
||||||
0x32 => Self::SS,
|
0x10 => Self::SS,
|
||||||
0x33 => Self::DS,
|
0x11 => Self::DS,
|
||||||
_ => panic!("Invalid segment register ID encountered"),
|
_ => panic!("Invalid segment register ID encountered"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -210,13 +317,29 @@ macro_rules! impl_display_and_lowerhex {
|
|||||||
impl_display_and_lowerhex!(ImmediateByte);
|
impl_display_and_lowerhex!(ImmediateByte);
|
||||||
impl_display_and_lowerhex!(ImmediateWord);
|
impl_display_and_lowerhex!(ImmediateWord);
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
/// ModRM byte can either target a memory location or some register
|
||||||
|
pub enum ModRmTarget {
|
||||||
|
Memory(MemoryIndex),
|
||||||
|
Register(Register),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for ModRmTarget {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
Self::Memory(idx) => write!(f, "{}", idx),
|
||||||
|
Self::Register(reg) => write!(f, "{}", reg),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A memory index operand is usually created by ModRM bytes or words.
|
/// A memory index operand is usually created by ModRM bytes or words.
|
||||||
/// e.g. [bx+si]
|
/// e.g. [bx+si]
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct MemoryIndex {
|
pub struct MemoryIndex {
|
||||||
pub base: Option<Register>,
|
pub base: Option<Register>,
|
||||||
pub index: Option<Register>,
|
pub index: Option<Register>,
|
||||||
pub displacement: Option<Displacement>,
|
pub displacement: Option<OperandSize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for MemoryIndex {
|
impl fmt::Display for MemoryIndex {
|
||||||
@@ -224,18 +347,18 @@ impl fmt::Display for MemoryIndex {
|
|||||||
match &self.base {
|
match &self.base {
|
||||||
Some(base) => match &self.index {
|
Some(base) => match &self.index {
|
||||||
Some(index) => match &self.displacement {
|
Some(index) => match &self.displacement {
|
||||||
Some(displacement) => write!(f, "[{}+{}+{}]", base, index, displacement),
|
Some(displacement) => write!(f, "[{} + {} + {}]", base, index, displacement),
|
||||||
None => write!(f, "[{}+{}]", base, index),
|
None => write!(f, "[{} + {}]", base, index),
|
||||||
},
|
},
|
||||||
None => match &self.displacement {
|
None => match &self.displacement {
|
||||||
Some(displacement) => write!(f, "[{}+{}]", base, displacement),
|
Some(displacement) => write!(f, "[{} + {}]", base, displacement),
|
||||||
None => write!(f, "{}", base),
|
None => write!(f, "[{} + 0]", base),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
None => match &self.index {
|
None => match &self.index {
|
||||||
Some(index) => match &self.displacement {
|
Some(index) => match &self.displacement {
|
||||||
Some(displacement) => write!(f, "{}+{}", index, displacement),
|
Some(displacement) => write!(f, "{} + {}", index, displacement),
|
||||||
None => write!(f, "{}", index),
|
None => write!(f, "[{} + 0]", index),
|
||||||
},
|
},
|
||||||
None => panic!("Invalid MemoryIndex encountered"),
|
None => panic!("Invalid MemoryIndex encountered"),
|
||||||
},
|
},
|
||||||
@@ -245,13 +368,13 @@ impl fmt::Display for MemoryIndex {
|
|||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
/// Displacement for ModRM
|
/// Can be used to encode either byte or word operands
|
||||||
pub enum Displacement {
|
pub enum OperandSize {
|
||||||
Byte(u8),
|
Byte(u8),
|
||||||
Word(u16),
|
Word(u16),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for Displacement {
|
impl fmt::Display for OperandSize {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Self::Byte(byte) => write!(f, "{}", byte),
|
Self::Byte(byte) => write!(f, "{}", byte),
|
||||||
|
|||||||
Reference in New Issue
Block a user