use core::fmt; use crate::register::{Register, SegmentRegister}; pub type Byte = u8; // b pub type Word = u16; // w or v #[derive(Debug, Clone)] #[allow(dead_code)] /// Encodes either Byte- or Word-sized operands. pub enum Operand { Byte(Byte), Word(Word), } impl fmt::Display for Operand { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Byte(byte) => write!(f, "{}", byte), Self::Word(word) => write!(f, "{}", word), } } } impl fmt::LowerHex for Operand { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Byte(b) => fmt::LowerHex::fmt(b, f), Self::Word(v) => fmt::LowerHex::fmt(v, f), } } } #[derive(Debug, Clone)] #[allow(dead_code)] /// A single 'line' of executable ASM is called an Instruction, which /// contains the `Mnemonic` that will be executed, alongside its starting offset /// and the raw parsed bytes pub struct Instruction { pub start: usize, // location of the instruction start pub raw: Vec, // raw value of instruction pub opcode: Mnemonic, // actual instruction } impl Instruction { pub fn new() -> Self { Instruction { start: 0, raw: Vec::new(), opcode: Mnemonic::NOP(), } } } impl fmt::Display for Instruction { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{:04x}: ", self.start).unwrap(); for b in self.raw.iter() { write!(f, "{:02x}", b).unwrap(); } write!(f, "\t\t{}", self.opcode) } } #[derive(Debug, Clone)] #[allow(dead_code, non_camel_case_types)] /// All possible mnemonic variantions. /// These are sorted by type and are not in hex-encoding order. // XXX: convert this copy and paste horror in a proc macro like // enum Opcode { // #[derive(default_variations)] // ADD, // ... // } // which then add all variants and also create the matching logic for // src/disasm.rs decode_instructions() pub enum Mnemonic { NOP(), // ADD ADD_FromReg(ModRmTarget, Register), // From Register into either Memory or Register ADD_ToReg(ModRmTarget, Register), // From either Memory or Register into Reigster ADD_Ib(ModRmTarget, Byte), // From Immediate into either Memory or Register ADD_Iv(ModRmTarget, Word), // From Immediate into either Memory or Register ADD_ALIb(Byte), ADD_AXIv(Word), // PUSH PUSH_R(Register), PUSH_S(SegmentRegister), PUSH_Mod(ModRmTarget), // POP POP_S(SegmentRegister), // POP to Segment Register POP_R(Register), // POP to Register POP_M(MemoryIndex), // POP to Memory // OR OR_FromReg(ModRmTarget, Register), OR_ToReg(ModRmTarget, Register), OR_Ib(ModRmTarget, Byte), OR_Iv(ModRmTarget, Word), OR_ALIb(Byte), OR_AXIv(Word), // ADC ADC_FromReg(ModRmTarget, Register), ADC_ToReg(ModRmTarget, Register), ADC_Ib(ModRmTarget, Byte), ADC_Iv(ModRmTarget, Word), ADC_ALIb(Byte), ADC_AXIv(Word), // SBB SBB_FromReg(ModRmTarget, Register), SBB_ToReg(ModRmTarget, Register), SBB_Ib(ModRmTarget, Byte), SBB_Iv(ModRmTarget, Word), SBB_ALIb(Byte), SBB_AXIv(Word), // AND AND_FromReg(ModRmTarget, Register), AND_ToReg(ModRmTarget, Register), AND_Ib(ModRmTarget, Byte), AND_Iv(ModRmTarget, Word), AND_ALIb(Byte), AND_AXIv(Word), // Override OVERRIDE(SegmentRegister), // Decimal Adjustment DAA, DAS, AAA, AAS, // SUB SUB_FromReg(ModRmTarget, Register), SUB_ToReg(ModRmTarget, Register), SUB_Ib(ModRmTarget, Byte), SUB_Iv(ModRmTarget, Word), SUB_ALIb(Byte), SUB_AXIv(Word), // XOR XOR_FromReg(ModRmTarget, Register), XOR_ToReg(ModRmTarget, Register), XOR_Ib(ModRmTarget, Byte), XOR_Iv(ModRmTarget, Word), XOR_ALIb(Byte), XOR_AXIv(Word), // CMP CMP_FromReg(ModRmTarget, Register), CMP_ToReg(ModRmTarget, Register), CMP_Ib(ModRmTarget, Byte), CMP_Iv(ModRmTarget, Word), CMP_ALIb(Byte), CMP_AXIv(Word), // INC INC_Reg(Register), INC_Mod(ModRmTarget), // DEC DEC_Reg(Register), DEC_Mod(ModRmTarget), // Jumps JO(Byte), JNO(Byte), JB(Byte), JNB(Byte), JZ(Byte), JNZ(Byte), JBE(Byte), JA(Byte), JS(Byte), JNS(Byte), JPE(Byte), JPO(Byte), JL(Byte), JGE(Byte), JLE(Byte), JG(Byte), // TEST TEST(ModRmTarget, Register), TEST_Ib(ModRmTarget, Byte), TEST_Iv(ModRmTarget, Word), TEST_ALIb(Byte), TEST_AXIv(Word), //XHCG XHCG(ModRmTarget, Register), XCHG_AX(Register), // from AX // MOV MOV_FromReg(ModRmTarget, Register), MOV_ToReg(ModRmTarget, Register), MOV_FromSReg(ModRmTarget, SegmentRegister), MOV_ToSReg(ModRmTarget, SegmentRegister), MOV_AL0b(Byte), MOV_AX0v(Word), MOV_0bAL(Byte), MOV_0vAX(Word), MOV_ALIb(Byte), MOV_CLIb(Byte), MOV_DLIb(Byte), MOV_BLIb(Byte), MOV_AHIb(Byte), MOV_CHIb(Byte), MOV_DHIb(Byte), MOV_BHIb(Byte), MOV_AXIv(Word), MOV_CXIv(Word), MOV_DXIv(Word), MOV_BXIv(Word), MOV_SPIv(Word), MOV_BPIv(Word), MOV_SIIv(Word), MOV_DIIv(Word), // LEA LEA(ModRmTarget, Register), // Sign extensions CBW, CWD, // CALL CALL_p(Pointer), CALL_v(Word), CALL_Mod(ModRmTarget), // JUMP JMP_p(Pointer), JMP_b(Byte), JMP_v(Word), JMP_Mod(ModRmTarget), // WAIT WAIT, // Push/Pop Flags PUSHF, POPF, SAHF, LAHF, // String Byte Operations MOVSB, MOVSW, CMPSB, CMPSW, STOSB, STOSW, LODSB, LODSW, SCASB, SCASW, // RET RETIw(Word), RET, // Load ES/DS Register LES(ModRmTarget), LDS(ModRmTarget), // NOT NOT(ModRmTarget), // NEG NEG(ModRmTarget), // MUL MUL(ModRmTarget), IMUL(ModRmTarget), // DIV DIV(ModRmTarget), IDIV(ModRmTarget), // HALT HLT, // Shift and Rotate ROL_b(ModRmTarget, Byte), ROR_b(ModRmTarget, Byte), RCL_b(ModRmTarget, Byte), RCR_b(ModRmTarget, Byte), SHL_b(ModRmTarget, Byte), SHR_b(ModRmTarget, Byte), SAR_b(ModRmTarget, Byte), ROL_fromReg(ModRmTarget, Register), ROR_fromReg(ModRmTarget, Register), RCL_fromReg(ModRmTarget, Register), RCR_fromReg(ModRmTarget, Register), SHL_fromReg(ModRmTarget, Register), SHR_fromReg(ModRmTarget, Register), SAR_fromReg(ModRmTarget, Register), // INT INT(Byte), } impl fmt::Display for Mnemonic { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::INT(byte) => write!(f, "int, {:x}", byte), Self::ADD_FromReg(mem, reg) => write!(f, "add {}, {}", mem, reg), Self::ADD_ToReg(mem, reg) => write!(f, "add {}, {}", reg, mem), Self::CMP_Iv(mem, imm) => write!(f, "cmp {}, {:04x}", mem, imm), Self::CMP_Ib(target, imm) => write!(f, "cmp {}, {:04x}", target, imm), Self::LEA(mem, reg) => write!(f, "lea {}, {}", reg, mem), Self::MOV_BXIv(word) => write!(f, "mov bx, {:04x}", word), Self::MOV_FromReg(target, reg) => write!(f, "mov {}, {}", target, reg), Self::XOR_FromReg(mem, reg) => write!(f, "xor {}, {}", mem, reg), _ => write!(f, "??? ??, ??"), } } } #[derive(Debug, Clone)] /// ModRM byte can either target a memory location or some register pub enum ModRmTarget { Memory(MemoryIndex), Register(Register), } impl std::fmt::Display for ModRmTarget { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { Self::Memory(idx) => write!(f, "{}", idx), Self::Register(reg) => write!(f, "{}", reg), } } } /// A memory index operand is usually created by ModRM bytes or words. /// e.g. [bx+si] #[derive(Debug, Clone)] pub struct MemoryIndex { pub base: Option, pub index: Option, pub displacement: Option, } impl fmt::Display for MemoryIndex { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match &self.base { Some(base) => match &self.index { Some(index) => match &self.displacement { Some(displacement) => { write!(f, "[{}+{}+{:04x}]", base, index, displacement) } None => write!(f, "[{}+{}]", base, index), }, None => match &self.displacement { Some(displacement) => write!(f, "[{}+{:04x}]", base, displacement), None => write!(f, "[{}]", base), }, }, None => match &self.index { Some(index) => match &self.displacement { Some(displacement) => write!(f, "[{}+{:04x}]", index, displacement), None => write!(f, "[{}]", index), }, None => panic!("Invalid MemoryIndex encountered"), }, } } } #[derive(Debug, Clone)] /// 32-bit segment:offset pointer (e.g. for CALL instruction) pub struct Pointer { pub segment: Word, pub offset: Word, } impl std::fmt::Display for Pointer { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{}:{}", self.segment, self.offset) } }