446 lines
11 KiB
Rust
446 lines
11 KiB
Rust
use core::fmt;
|
|
|
|
use crate::register::{Register, SegmentRegister};
|
|
|
|
pub type Byte = u8; // b
|
|
pub type IByte = i8; // used for displacements of memory access
|
|
pub type Word = u16; // w or v
|
|
pub type IWord = i16; // used for displacement of memory access
|
|
|
|
#[derive(Debug, Clone)]
|
|
#[allow(dead_code)]
|
|
/// Encodes either Byte- or Word-sized operands.
|
|
pub enum Operand {
|
|
Byte(Byte),
|
|
Word(Word),
|
|
}
|
|
|
|
impl fmt::Display for Operand {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
match self {
|
|
Self::Byte(byte) => write!(f, "{}", byte),
|
|
Self::Word(word) => write!(f, "{}", word),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl fmt::LowerHex for Operand {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
Self::Byte(b) => fmt::LowerHex::fmt(b, f),
|
|
Self::Word(v) => fmt::LowerHex::fmt(v, f),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
#[allow(dead_code)]
|
|
/// A single 'line' of executable ASM is called an Instruction, which
|
|
/// contains the `Mnemonic` that will be executed, alongside its starting offset
|
|
/// and the raw parsed bytes
|
|
pub struct Instruction {
|
|
pub start: usize, // location of the instruction start
|
|
pub raw: Vec<u8>, // raw value of instruction
|
|
pub opcode: Mnemonic, // actual instruction
|
|
}
|
|
|
|
impl Instruction {
|
|
pub fn new() -> Self {
|
|
Instruction {
|
|
start: 0,
|
|
raw: Vec::new(),
|
|
opcode: Mnemonic::NOP(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for Instruction {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
write!(f, "{:04x}: ", self.start).unwrap();
|
|
|
|
write!(
|
|
f,
|
|
"{:<10}",
|
|
self.raw
|
|
.iter()
|
|
.map(|b| format!("{:02x}", b))
|
|
.collect::<Vec<String>>()
|
|
.join("")
|
|
)
|
|
.unwrap();
|
|
|
|
write!(f, "\t\t{}", self.opcode)
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
#[allow(dead_code, non_camel_case_types)]
|
|
/// All possible mnemonic variantions.
|
|
/// These are sorted by type and are not in hex-encoding order.
|
|
// XXX: convert this copy and paste horror in a proc macro like
|
|
// enum Opcode {
|
|
// #[derive(default_variations)]
|
|
// ADD,
|
|
// ...
|
|
// }
|
|
// which then add all variants and also create the matching logic for
|
|
// src/disasm.rs decode_instructions()
|
|
pub enum Mnemonic {
|
|
NOP(),
|
|
// ADD
|
|
ADD_FromReg(ModRmTarget, Register), // From Register into either Memory or Register
|
|
ADD_ToReg(ModRmTarget, Register), // From either Memory or Register into Reigster
|
|
ADD_Ib(ModRmTarget, Byte), // From Immediate into either Memory or Register
|
|
ADD_Iv(ModRmTarget, Word), // From Immediate into either Memory or Register
|
|
ADD_ALIb(Byte),
|
|
ADD_AXIv(Word),
|
|
// PUSH
|
|
PUSH_R(Register),
|
|
PUSH_S(SegmentRegister),
|
|
PUSH_Mod(ModRmTarget),
|
|
// POP
|
|
POP_S(SegmentRegister), // POP to Segment Register
|
|
POP_R(Register), // POP to Register
|
|
POP_M(MemoryIndex), // POP to Memory
|
|
// OR
|
|
OR_FromReg(ModRmTarget, Register),
|
|
OR_ToReg(ModRmTarget, Register),
|
|
OR_Ib(ModRmTarget, Byte),
|
|
OR_Iv(ModRmTarget, Word),
|
|
OR_ALIb(Byte),
|
|
OR_AXIv(Word),
|
|
// ADC
|
|
ADC_FromReg(ModRmTarget, Register),
|
|
ADC_ToReg(ModRmTarget, Register),
|
|
ADC_Ib(ModRmTarget, Byte),
|
|
ADC_Iv(ModRmTarget, Word),
|
|
ADC_ALIb(Byte),
|
|
ADC_AXIv(Word),
|
|
// SBB
|
|
SBB_FromReg(ModRmTarget, Register),
|
|
SBB_ToReg(ModRmTarget, Register),
|
|
SBB_Ib(ModRmTarget, Byte),
|
|
SBB_Iv(ModRmTarget, Word),
|
|
SBB_ALIb(Byte),
|
|
SBB_AXIv(Word),
|
|
// AND
|
|
AND_FromReg(ModRmTarget, Register),
|
|
AND_ToReg(ModRmTarget, Register),
|
|
AND_Ib(ModRmTarget, Byte),
|
|
AND_Iv(ModRmTarget, Word),
|
|
AND_ALIb(Byte),
|
|
AND_AXIv(Word),
|
|
// Override
|
|
OVERRIDE(SegmentRegister),
|
|
// Decimal Adjustment
|
|
DAA,
|
|
DAS,
|
|
AAA,
|
|
AAS,
|
|
// SUB
|
|
SUB_FromReg(ModRmTarget, Register),
|
|
SUB_ToReg(ModRmTarget, Register),
|
|
SUB_Ib(ModRmTarget, Byte),
|
|
SUB_Iv(ModRmTarget, Word),
|
|
SUB_ALIb(Byte),
|
|
SUB_AXIv(Word),
|
|
// XOR
|
|
XOR_FromReg(ModRmTarget, Register),
|
|
XOR_ToReg(ModRmTarget, Register),
|
|
XOR_Ib(ModRmTarget, Byte),
|
|
XOR_Iv(ModRmTarget, Word),
|
|
XOR_ALIb(Byte),
|
|
XOR_AXIv(Word),
|
|
// CMP
|
|
CMP_FromReg(ModRmTarget, Register),
|
|
CMP_ToReg(ModRmTarget, Register),
|
|
CMP_Ib(ModRmTarget, Byte),
|
|
CMP_Iv(ModRmTarget, Word),
|
|
CMP_ALIb(Byte),
|
|
CMP_AXIv(Word),
|
|
// INC
|
|
INC_Reg(Register),
|
|
INC_Mod(ModRmTarget),
|
|
// DEC
|
|
DEC_Reg(Register),
|
|
DEC_Mod(ModRmTarget),
|
|
// Jumps
|
|
JO(IByte),
|
|
JNO(IByte),
|
|
JB(IByte),
|
|
JNB(IByte),
|
|
JZ(IByte),
|
|
JNZ(IByte),
|
|
JBE(IByte),
|
|
JA(IByte),
|
|
JS(IByte),
|
|
JNS(IByte),
|
|
JPE(IByte),
|
|
JPO(IByte),
|
|
JL(IByte),
|
|
JGE(IByte),
|
|
JLE(IByte),
|
|
JG(IByte),
|
|
LOOPNZ(IByte),
|
|
LOOPZ(IByte),
|
|
LOOP(IByte),
|
|
JCXZ(IByte),
|
|
|
|
// TEST
|
|
TEST(ModRmTarget, Register),
|
|
TEST_Ib(ModRmTarget, Byte),
|
|
TEST_Iv(ModRmTarget, Word),
|
|
TEST_ALIb(Byte),
|
|
TEST_AXIv(Word),
|
|
//XHCG
|
|
XHCG(ModRmTarget, Register),
|
|
XCHG_AX(Register), // from AX
|
|
// MOV
|
|
MOV_FromReg(ModRmTarget, Register),
|
|
MOV_ToReg(ModRmTarget, Register),
|
|
MOV_FromSReg(ModRmTarget, SegmentRegister),
|
|
MOV_ToSReg(ModRmTarget, SegmentRegister),
|
|
MOV_Ib(ModRmTarget, Byte),
|
|
MOV_Iv(ModRmTarget, Word),
|
|
|
|
MOV_AL0b(Byte),
|
|
MOV_AX0v(Word),
|
|
MOV_0bAL(Byte),
|
|
MOV_0vAX(Word),
|
|
|
|
MOV_ALIb(Byte),
|
|
MOV_CLIb(Byte),
|
|
MOV_DLIb(Byte),
|
|
MOV_BLIb(Byte),
|
|
MOV_AHIb(Byte),
|
|
MOV_CHIb(Byte),
|
|
MOV_DHIb(Byte),
|
|
MOV_BHIb(Byte),
|
|
MOV_AXIv(Word),
|
|
MOV_CXIv(Word),
|
|
MOV_DXIv(Word),
|
|
MOV_BXIv(Word),
|
|
MOV_SPIv(Word),
|
|
MOV_BPIv(Word),
|
|
MOV_SIIv(Word),
|
|
MOV_DIIv(Word),
|
|
// LEA
|
|
LEA(ModRmTarget, Register),
|
|
// Sign extensions
|
|
CBW,
|
|
CWD,
|
|
// CALL
|
|
CALL_p(Pointer),
|
|
CALL_v(Word),
|
|
CALL_Mod(ModRmTarget),
|
|
// JUMP
|
|
JMP_p(Pointer),
|
|
JMP_b(Byte),
|
|
JMP_v(Word),
|
|
JMP_Mod(ModRmTarget),
|
|
// WAIT
|
|
WAIT,
|
|
// Push/Pop Flags
|
|
PUSHF,
|
|
POPF,
|
|
SAHF,
|
|
LAHF,
|
|
// String Byte Operations
|
|
MOVSB,
|
|
MOVSW,
|
|
CMPSB,
|
|
CMPSW,
|
|
STOSB,
|
|
STOSW,
|
|
LODSB,
|
|
LODSW,
|
|
SCASB,
|
|
SCASW,
|
|
// RET
|
|
RET_Iw(Word),
|
|
RET,
|
|
RETF_Iw(Word),
|
|
RETF,
|
|
IRET,
|
|
// Load ES/DS Register
|
|
LES(ModRmTarget),
|
|
LDS(ModRmTarget),
|
|
// NOT
|
|
NOT(ModRmTarget),
|
|
// NEG
|
|
NEG(ModRmTarget),
|
|
// MUL
|
|
MUL(ModRmTarget),
|
|
IMUL(ModRmTarget),
|
|
// DIV
|
|
DIV(ModRmTarget),
|
|
IDIV(ModRmTarget),
|
|
// HALT
|
|
HLT,
|
|
// Shift and Rotate
|
|
ROL_b(ModRmTarget, Byte),
|
|
ROR_b(ModRmTarget, Byte),
|
|
RCL_b(ModRmTarget, Byte),
|
|
RCR_b(ModRmTarget, Byte),
|
|
SHL_b(ModRmTarget, Byte),
|
|
SHR_b(ModRmTarget, Byte),
|
|
SAR_b(ModRmTarget, Byte),
|
|
ROL_fromReg(ModRmTarget, Register),
|
|
ROR_fromReg(ModRmTarget, Register),
|
|
RCL_fromReg(ModRmTarget, Register),
|
|
RCR_fromReg(ModRmTarget, Register),
|
|
SHL_fromReg(ModRmTarget, Register),
|
|
SHR_fromReg(ModRmTarget, Register),
|
|
SAR_fromReg(ModRmTarget, Register),
|
|
// IN
|
|
IN_AL(Byte),
|
|
IN_AX(Byte),
|
|
IN_ALDX,
|
|
IN_AXDX,
|
|
// OUT
|
|
OUT_AL(Byte),
|
|
OUT_AX(Byte),
|
|
OUT_ALDX,
|
|
OUT_AXDX,
|
|
// INT
|
|
INT(Byte),
|
|
INTO,
|
|
// Flag Manipulation
|
|
CLC,
|
|
STC,
|
|
CLI,
|
|
STI,
|
|
CLD,
|
|
STD,
|
|
CMC,
|
|
// Repeat prefix
|
|
REPNZ,
|
|
REPZ,
|
|
// Adjust
|
|
AAM(Byte),
|
|
AAD(Byte),
|
|
// MISC
|
|
XLAT,
|
|
}
|
|
|
|
impl fmt::Display for Mnemonic {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
match self {
|
|
Self::INT(byte) => write!(f, "int, {:x}", byte),
|
|
Self::ADD_FromReg(mem, reg) => write!(f, "add {}, {}", mem, reg),
|
|
Self::ADD_ToReg(mem, reg) => write!(f, "add {}, {}", reg, mem),
|
|
Self::CMP_Iv(mem, imm) => write!(f, "cmp {}, {:04x}", mem, imm),
|
|
Self::CMP_Ib(target, imm) => write!(f, "cmp {}, {:04x}", target, imm),
|
|
Self::LEA(mem, reg) => write!(f, "lea {}, {}", reg, mem),
|
|
Self::MOV_BXIv(word) => write!(f, "mov bx, {:04x}", word),
|
|
Self::MOV_FromReg(target, reg) => write!(f, "mov {}, {}", target, reg),
|
|
Self::XOR_FromReg(mem, reg) => write!(f, "xor {}, {}", mem, reg),
|
|
_ => write!(f, "??? ??, ??"),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
/// ModRM byte can either target a memory location or some register
|
|
pub enum ModRmTarget {
|
|
Memory(MemoryIndex),
|
|
Register(Register),
|
|
}
|
|
|
|
impl std::fmt::Display for ModRmTarget {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
match self {
|
|
Self::Memory(idx) => write!(f, "{}", idx),
|
|
Self::Register(reg) => write!(f, "{}", reg),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
/// Displacements are signed versions of u8 and u16.
|
|
pub enum Displacement {
|
|
IByte(i8),
|
|
IWord(i16),
|
|
}
|
|
|
|
impl fmt::LowerHex for Displacement {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
Self::IByte(b) => fmt::LowerHex::fmt(b, f),
|
|
Self::IWord(v) => fmt::LowerHex::fmt(v, f),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Display for Displacement {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
match self {
|
|
Self::IByte(b) => {
|
|
if *b > 0 {
|
|
write!(f, " + {:#x}", b)
|
|
} else {
|
|
write!(f, " - {:#x}", b * -1)
|
|
}
|
|
}
|
|
Self::IWord(w) => {
|
|
if *w > 0 {
|
|
write!(f, " + {:#x}", w)
|
|
} else {
|
|
write!(f, " - {:#x}", w * -1)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// A memory index operand is usually created by ModRM bytes or words.
|
|
/// e.g. [bx+si]
|
|
#[derive(Debug, Clone)]
|
|
pub struct MemoryIndex {
|
|
pub base: Option<Register>,
|
|
pub index: Option<Register>,
|
|
pub displacement: Option<Displacement>,
|
|
}
|
|
|
|
impl fmt::Display for MemoryIndex {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
match &self.base {
|
|
Some(base) => match &self.index {
|
|
Some(index) => match &self.displacement {
|
|
Some(displacement) => {
|
|
write!(f, "[{} + {}{}]", base, index, displacement)
|
|
}
|
|
None => write!(f, "[{} + {}]", base, index),
|
|
},
|
|
None => match &self.displacement {
|
|
Some(displacement) => write!(f, "[{}{}]", base, displacement),
|
|
None => write!(f, "[{}]", base),
|
|
},
|
|
},
|
|
None => match &self.index {
|
|
Some(index) => match &self.displacement {
|
|
Some(displacement) => write!(f, "[{}{}]", index, displacement),
|
|
None => write!(f, "[{}]", index),
|
|
},
|
|
None => match &self.displacement {
|
|
Some(displacement) => write!(f, "[{:#x}]", displacement),
|
|
None => panic!("Memory Index without base, index and displacement"),
|
|
},
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
/// 32-bit segment:offset pointer (e.g. for CALL instruction)
|
|
pub struct Pointer {
|
|
pub segment: Word,
|
|
pub offset: Word,
|
|
}
|
|
|
|
impl std::fmt::Display for Pointer {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
write!(f, "{}:{}", self.segment, self.offset)
|
|
}
|
|
}
|