Files
8086-rs/src/instructions.rs
2025-05-14 13:54:13 +09:00

357 lines
9.4 KiB
Rust

use core::fmt;
use crate::register::{Register, SegmentRegister};
pub type Byte = u8; // b
pub type Word = u16; // w or v
#[derive(Debug, Clone)]
#[allow(dead_code)]
/// Encodes either Byte- or Word-sized operands.
pub enum Operand {
Byte(Byte),
Word(Word),
}
impl fmt::Display for Operand {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Byte(byte) => write!(f, "{}", byte),
Self::Word(word) => write!(f, "{}", word),
}
}
}
impl fmt::LowerHex for Operand {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Byte(b) => fmt::LowerHex::fmt(b, f),
Self::Word(v) => fmt::LowerHex::fmt(v, f),
}
}
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
/// A single 'line' of executable ASM is called an Instruction, which
/// contains the `Mnemonic` that will be executed, alongside its starting offset
/// and the raw parsed bytes
pub struct Instruction {
pub start: usize, // location of the instruction start
pub raw: Vec<u8>, // raw value of instruction
pub opcode: Mnemonic, // actual instruction
}
impl Instruction {
pub fn new() -> Self {
Instruction {
start: 0,
raw: Vec::new(),
opcode: Mnemonic::NOP(),
}
}
}
impl fmt::Display for Instruction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:04x}: ", self.start).unwrap();
for b in self.raw.iter() {
write!(f, "{:02x}", b).unwrap();
}
write!(f, "\t\t{}", self.opcode)
}
}
#[derive(Debug, Clone)]
#[allow(dead_code, non_camel_case_types)]
/// All possible mnemonic variantions.
/// These are sorted by type and are not in hex-encoding order.
// XXX: convert this copy and paste horror in a proc macro like
// enum Opcode {
// #[derive(default_variations)]
// ADD,
// ...
// }
// which then add all variants and also create the matching logic for
// src/disasm.rs decode_instructions()
pub enum Mnemonic {
NOP(),
// ADD
ADD_FromReg(ModRmTarget, Register), // From Register into either Memory or Register
ADD_ToReg(ModRmTarget, Register), // From either Memory or Register into Reigster
ADD_Ib(ModRmTarget, Byte), // From Immediate into either Memory or Register
ADD_Iv(ModRmTarget, Word), // From Immediate into either Memory or Register
ADD_ALIb(Byte),
ADD_AXIv(Word),
// PUSH
PUSH_R(Register),
PUSH_S(SegmentRegister),
PUSH_Mod(ModRmTarget),
// POP
POP_S(SegmentRegister), // POP to Segment Register
POP_R(Register), // POP to Register
POP_M(MemoryIndex), // POP to Memory
// OR
OR_FromReg(ModRmTarget, Register),
OR_ToReg(ModRmTarget, Register),
OR_Ib(ModRmTarget, Byte),
OR_Iv(ModRmTarget, Word),
OR_ALIb(Byte),
OR_AXIv(Word),
// ADC
ADC_FromReg(ModRmTarget, Register),
ADC_ToReg(ModRmTarget, Register),
ADC_Ib(ModRmTarget, Byte),
ADC_Iv(ModRmTarget, Word),
ADC_ALIb(Byte),
ADC_AXIv(Word),
// SBB
SBB_FromReg(ModRmTarget, Register),
SBB_ToReg(ModRmTarget, Register),
SBB_Ib(ModRmTarget, Byte),
SBB_Iv(ModRmTarget, Word),
SBB_ALIb(Byte),
SBB_AXIv(Word),
// AND
AND_FromReg(ModRmTarget, Register),
AND_ToReg(ModRmTarget, Register),
AND_Ib(ModRmTarget, Byte),
AND_Iv(ModRmTarget, Word),
AND_ALIb(Byte),
AND_AXIv(Word),
// Override
OVERRIDE(SegmentRegister),
// Decimal Adjustment
DAA,
DAS,
AAA,
AAS,
// SUB
SUB_FromReg(ModRmTarget, Register),
SUB_ToReg(ModRmTarget, Register),
SUB_Ib(ModRmTarget, Byte),
SUB_Iv(ModRmTarget, Word),
SUB_ALIb(Byte),
SUB_AXIv(Word),
// XOR
XOR_FromReg(ModRmTarget, Register),
XOR_ToReg(ModRmTarget, Register),
XOR_Ib(ModRmTarget, Byte),
XOR_Iv(ModRmTarget, Word),
XOR_ALIb(Byte),
XOR_AXIv(Word),
// CMP
CMP_FromReg(ModRmTarget, Register),
CMP_ToReg(ModRmTarget, Register),
CMP_Ib(ModRmTarget, Byte),
CMP_Iv(ModRmTarget, Word),
CMP_ALIb(Byte),
CMP_AXIv(Word),
// INC
INC_Reg(Register),
INC_Mod(ModRmTarget),
// DEC
DEC_Reg(Register),
DEC_Mod(ModRmTarget),
// Jumps
JO(Byte),
JNO(Byte),
JB(Byte),
JNB(Byte),
JZ(Byte),
JNZ(Byte),
JBE(Byte),
JA(Byte),
JS(Byte),
JNS(Byte),
JPE(Byte),
JPO(Byte),
JL(Byte),
JGE(Byte),
JLE(Byte),
JG(Byte),
// TEST
TEST(ModRmTarget, Register),
TEST_Ib(ModRmTarget, Byte),
TEST_Iv(ModRmTarget, Word),
TEST_ALIb(Byte),
TEST_AXIv(Word),
//XHCG
XHCG(ModRmTarget, Register),
XCHG_AX(Register), // from AX
// MOV
MOV_FromReg(ModRmTarget, Register),
MOV_ToReg(ModRmTarget, Register),
MOV_FromSReg(ModRmTarget, SegmentRegister),
MOV_ToSReg(ModRmTarget, SegmentRegister),
MOV_AL0b(Byte),
MOV_AX0v(Word),
MOV_0bAL(Byte),
MOV_0vAX(Word),
MOV_ALIb(Byte),
MOV_CLIb(Byte),
MOV_DLIb(Byte),
MOV_BLIb(Byte),
MOV_AHIb(Byte),
MOV_CHIb(Byte),
MOV_DHIb(Byte),
MOV_BHIb(Byte),
MOV_AXIv(Word),
MOV_CXIv(Word),
MOV_DXIv(Word),
MOV_BXIv(Word),
MOV_SPIv(Word),
MOV_BPIv(Word),
MOV_SIIv(Word),
MOV_DIIv(Word),
// LEA
LEA(ModRmTarget, Register),
// Sign extensions
CBW,
CWD,
// CALL
CALL_p(Pointer),
CALL_v(Word),
CALL_Mod(ModRmTarget),
// JUMP
JMP_p(Pointer),
JMP_b(Byte),
JMP_v(Word),
JMP_Mod(ModRmTarget),
// WAIT
WAIT,
// Push/Pop Flags
PUSHF,
POPF,
SAHF,
LAHF,
// String Byte Operations
MOVSB,
MOVSW,
CMPSB,
CMPSW,
STOSB,
STOSW,
LODSB,
LODSW,
SCASB,
SCASW,
// RET
RETIw(Word),
RET,
// Load ES/DS Register
LES(ModRmTarget),
LDS(ModRmTarget),
// NOT
NOT(ModRmTarget),
// NEG
NEG(ModRmTarget),
// MUL
MUL(ModRmTarget),
IMUL(ModRmTarget),
// DIV
DIV(ModRmTarget),
IDIV(ModRmTarget),
// HALT
HLT,
// Shift and Rotate
ROL_b(ModRmTarget, Byte),
ROR_b(ModRmTarget, Byte),
RCL_b(ModRmTarget, Byte),
RCR_b(ModRmTarget, Byte),
SHL_b(ModRmTarget, Byte),
SHR_b(ModRmTarget, Byte),
SAR_b(ModRmTarget, Byte),
ROL_fromReg(ModRmTarget, Register),
ROR_fromReg(ModRmTarget, Register),
RCL_fromReg(ModRmTarget, Register),
RCR_fromReg(ModRmTarget, Register),
SHL_fromReg(ModRmTarget, Register),
SHR_fromReg(ModRmTarget, Register),
SAR_fromReg(ModRmTarget, Register),
// INT
INT(Byte),
}
impl fmt::Display for Mnemonic {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::INT(byte) => write!(f, "int, {:x}", byte),
Self::ADD_FromReg(mem, reg) => write!(f, "add {}, {}", mem, reg),
Self::ADD_ToReg(mem, reg) => write!(f, "add {}, {}", reg, mem),
Self::CMP_Iv(mem, imm) => write!(f, "cmp {}, {:04x}", mem, imm),
Self::CMP_Ib(target, imm) => write!(f, "cmp {}, {:04x}", target, imm),
Self::LEA(mem, reg) => write!(f, "lea {}, {}", reg, mem),
Self::MOV_BXIv(word) => write!(f, "mov bx, {:04x}", word),
Self::MOV_FromReg(target, reg) => write!(f, "mov {}, {}", target, reg),
Self::XOR_FromReg(mem, reg) => write!(f, "xor {}, {}", mem, reg),
_ => write!(f, "??? ??, ??"),
}
}
}
#[derive(Debug, Clone)]
/// ModRM byte can either target a memory location or some register
pub enum ModRmTarget {
Memory(MemoryIndex),
Register(Register),
}
impl std::fmt::Display for ModRmTarget {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Memory(idx) => write!(f, "{}", idx),
Self::Register(reg) => write!(f, "{}", reg),
}
}
}
/// A memory index operand is usually created by ModRM bytes or words.
/// e.g. [bx+si]
#[derive(Debug, Clone)]
pub struct MemoryIndex {
pub base: Option<Register>,
pub index: Option<Register>,
pub displacement: Option<Operand>,
}
impl fmt::Display for MemoryIndex {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match &self.base {
Some(base) => match &self.index {
Some(index) => match &self.displacement {
Some(displacement) => {
write!(f, "[{}+{}+{:04x}]", base, index, displacement)
}
None => write!(f, "[{}+{}]", base, index),
},
None => match &self.displacement {
Some(displacement) => write!(f, "[{}+{:04x}]", base, displacement),
None => write!(f, "[{}]", base),
},
},
None => match &self.index {
Some(index) => match &self.displacement {
Some(displacement) => write!(f, "[{}+{:04x}]", index, displacement),
None => write!(f, "[{}]", index),
},
None => panic!("Invalid MemoryIndex encountered"),
},
}
}
}
#[derive(Debug, Clone)]
/// 32-bit segment:offset pointer (e.g. for CALL instruction)
pub struct Pointer {
pub segment: Word,
pub offset: Word,
}
impl std::fmt::Display for Pointer {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}:{}", self.segment, self.offset)
}
}