use core::fmt; use crate::register::{Register, SegmentRegister}; pub type Byte = u8; // b pub type IByte = i8; // used for displacements of memory access pub type Word = u16; // w or v pub type IWord = i16; // used for displacement of memory access #[derive(Debug, Clone)] #[allow(dead_code)] /// Encodes either Byte- or Word-sized operands. pub enum Operand { Byte(Byte), Word(Word), } impl fmt::Display for Operand { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Byte(byte) => write!(f, "{}", byte), Self::Word(word) => write!(f, "{}", word), } } } impl fmt::LowerHex for Operand { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Byte(b) => fmt::LowerHex::fmt(b, f), Self::Word(v) => fmt::LowerHex::fmt(v, f), } } } #[derive(Debug, Clone)] #[allow(dead_code)] /// A single 'line' of executable ASM is called an Instruction, which /// contains the `Mnemonic` that will be executed, alongside its starting offset /// and the raw parsed bytes pub struct Instruction { pub start: usize, // location of the instruction start pub raw: Vec, // raw value of instruction pub opcode: Mnemonic, // actual instruction } impl Instruction { pub fn new() -> Self { Instruction { start: 0, raw: Vec::new(), opcode: Mnemonic::NOP(), } } } impl fmt::Display for Instruction { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{:04x}: ", self.start).unwrap(); write!( f, "{:<10}", self.raw .iter() .map(|b| format!("{:02x}", b)) .collect::>() .join("") ) .unwrap(); write!(f, "\t\t{}", self.opcode) } } #[derive(Debug, Clone)] #[allow(dead_code, non_camel_case_types)] /// All possible mnemonic variantions. /// These are sorted by type and are not in hex-encoding order. // XXX: convert this copy and paste horror in a proc macro like // enum Opcode { // #[derive(default_variations)] // ADD, // ... // } // which then add all variants and also create the matching logic for // src/disasm.rs decode_instructions() pub enum Mnemonic { NOP(), // ADD ADD_FromReg(ModRmTarget, Register), // From Register into either Memory or Register ADD_ToReg(ModRmTarget, Register), // From either Memory or Register into Reigster ADD_Ib(ModRmTarget, IByte), // From Immediate into either Memory or Register ADD_Iv(ModRmTarget, IWord), // From Immediate into either Memory or Register ADD_ALIb(Byte), ADD_AXIv(Word), // PUSH PUSH_R(Register), PUSH_S(SegmentRegister), PUSH_Mod(ModRmTarget), // POP POP_S(SegmentRegister), // POP to Segment Register POP_R(Register), // POP to Register POP_M(MemoryIndex), // POP to Memory // OR OR_FromReg(ModRmTarget, Register), OR_ToReg(ModRmTarget, Register), OR_Ib(ModRmTarget, IByte), OR_Iv(ModRmTarget, IWord), OR_ALIb(Byte), OR_AXIv(Word), // ADC ADC_FromReg(ModRmTarget, Register), ADC_ToReg(ModRmTarget, Register), ADC_Ib(ModRmTarget, IByte), ADC_Iv(ModRmTarget, IWord), ADC_ALIb(Byte), ADC_AXIv(Word), // SBB SBB_FromReg(ModRmTarget, Register), SBB_ToReg(ModRmTarget, Register), SBB_Ib(ModRmTarget, IByte), SBB_Iv(ModRmTarget, IWord), SBB_ALIb(Byte), SBB_AXIv(Word), // AND AND_FromReg(ModRmTarget, Register), AND_ToReg(ModRmTarget, Register), AND_Ib(ModRmTarget, IByte), AND_Iv(ModRmTarget, IWord), AND_ALIb(Byte), AND_AXIv(Word), // Override OVERRIDE(SegmentRegister), // Decimal Adjustment DAA, DAS, AAA, AAS, // SUB SUB_FromReg(ModRmTarget, Register), SUB_ToReg(ModRmTarget, Register), SUB_Ib(ModRmTarget, IByte), SUB_Iv(ModRmTarget, IWord), SUB_ALIb(Byte), SUB_AXIv(Word), // XOR XOR_FromReg(ModRmTarget, Register), XOR_ToReg(ModRmTarget, Register), XOR_Ib(ModRmTarget, IByte), XOR_Iv(ModRmTarget, IWord), XOR_ALIb(Byte), XOR_AXIv(Word), // CMP CMP_FromReg(ModRmTarget, Register), CMP_ToReg(ModRmTarget, Register), CMP_Ib(ModRmTarget, IByte), CMP_Iv(ModRmTarget, IWord), CMP_ALIb(Byte), CMP_AXIv(Word), // INC INC_Reg(Register), INC_Mod(ModRmTarget), // DEC DEC_Reg(Register), DEC_Mod(ModRmTarget), // Jumps JO(IByte), JNO(IByte), JB(IByte), JNB(IByte), JZ(IByte), JNZ(IByte), JBE(IByte), JA(IByte), JS(IByte), JNS(IByte), JPE(IByte), JPO(IByte), JL(IByte), JGE(IByte), JLE(IByte), JG(IByte), LOOPNZ(IByte), LOOPZ(IByte), LOOP(IByte), JCXZ(IByte), // TEST TEST(ModRmTarget, Register), TEST_Ib(ModRmTarget, Byte), TEST_Iv(ModRmTarget, Word), TEST_ALIb(Byte), TEST_AXIv(Word), //XHCG XHCG(ModRmTarget, Register), XCHG_AX(Register), // from AX // MOV MOV_FromReg(ModRmTarget, Register), MOV_ToReg(ModRmTarget, Register), MOV_FromSReg(ModRmTarget, SegmentRegister), MOV_ToSReg(ModRmTarget, SegmentRegister), MOV_Ib(ModRmTarget, Byte), MOV_Iv(ModRmTarget, Word), MOV_AL0b(Byte), MOV_AX0v(Word), MOV_0bAL(Byte), MOV_0vAX(Word), MOV_ALIb(Byte), MOV_CLIb(Byte), MOV_DLIb(Byte), MOV_BLIb(Byte), MOV_AHIb(Byte), MOV_CHIb(Byte), MOV_DHIb(Byte), MOV_BHIb(Byte), MOV_AXIv(Word), MOV_CXIv(Word), MOV_DXIv(Word), MOV_BXIv(Word), MOV_SPIv(Word), MOV_BPIv(Word), MOV_SIIv(Word), MOV_DIIv(Word), // LEA LEA(ModRmTarget, Register), // Sign extensions CBW, CWD, // CALL CALL_p(Pointer), CALL_v(Word), CALL_Mod(ModRmTarget), // JUMP JMP_p(Pointer), JMP_b(Byte), JMP_v(Word), JMP_Mod(ModRmTarget), // WAIT WAIT, // Push/Pop Flags PUSHF, POPF, SAHF, LAHF, // String Byte Operations MOVSB, MOVSW, CMPSB, CMPSW, STOSB, STOSW, LODSB, LODSW, SCASB, SCASW, // RET RETIw(Word), RET, RETF_Iw(Word), RETF, IRET, // Load ES/DS Register LES(ModRmTarget), LDS(ModRmTarget), // NOT NOT(ModRmTarget), // NEG NEG(ModRmTarget), // MUL MUL(ModRmTarget), IMUL(ModRmTarget), // DIV DIV(ModRmTarget), IDIV(ModRmTarget), // HALT HLT, // Shift and Rotate ROL_b(ModRmTarget, Byte), ROR_b(ModRmTarget, Byte), RCL_b(ModRmTarget, Byte), RCR_b(ModRmTarget, Byte), SHL_b(ModRmTarget, Byte), SHR_b(ModRmTarget, Byte), SAR_b(ModRmTarget, Byte), ROL_fromReg(ModRmTarget, Register), ROR_fromReg(ModRmTarget, Register), RCL_fromReg(ModRmTarget, Register), RCR_fromReg(ModRmTarget, Register), SHL_fromReg(ModRmTarget, Register), SHR_fromReg(ModRmTarget, Register), SAR_fromReg(ModRmTarget, Register), // IN IN_AL(Byte), IN_AX(Byte), IN_ALDX, IN_AXDX, // OUT OUT_AL(Byte), OUT_AX(Byte), OUT_ALDX, OUT_AXDX, // INT INT(Byte), INTO, // Flag Manipulation CLC, STC, CLI, STI, CLD, STD, CMC, // Repeat prefix REPNZ, REPZ, // Adjust AAM(Byte), AAD(Byte), // MISC XLAT, } impl fmt::Display for Mnemonic { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::INT(byte) => write!(f, "int, {:x}", byte), Self::ADD_FromReg(mem, reg) => write!(f, "add {}, {}", mem, reg), Self::ADD_ToReg(mem, reg) => write!(f, "add {}, {}", reg, mem), Self::CMP_Iv(mem, imm) => write!(f, "cmp {}, {:04x}", mem, imm), Self::CMP_Ib(target, imm) => write!(f, "cmp {}, {:04x}", target, imm), Self::LEA(mem, reg) => write!(f, "lea {}, {}", reg, mem), Self::MOV_BXIv(word) => write!(f, "mov bx, {:04x}", word), Self::MOV_FromReg(target, reg) => write!(f, "mov {}, {}", target, reg), Self::XOR_FromReg(mem, reg) => write!(f, "xor {}, {}", mem, reg), _ => write!(f, "??? ??, ??"), } } } #[derive(Debug, Clone)] /// ModRM byte can either target a memory location or some register pub enum ModRmTarget { Memory(MemoryIndex), Register(Register), } impl std::fmt::Display for ModRmTarget { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { Self::Memory(idx) => write!(f, "{}", idx), Self::Register(reg) => write!(f, "{}", reg), } } } #[derive(Debug, Clone)] /// Displacements are signed versions of u8 and u16. pub enum Displacement { IByte(i8), IWord(i16), } impl fmt::LowerHex for Displacement { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::IByte(b) => fmt::LowerHex::fmt(b, f), Self::IWord(v) => fmt::LowerHex::fmt(v, f), } } } impl std::fmt::Display for Displacement { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { Self::IByte(b) => { if *b > 0 { write!(f, " + {:#x}", b) } else { write!(f, " - {:#x}", b * -1) } } Self::IWord(w) => { if *w > 0 { write!(f, " + {:#x}", w) } else { write!(f, " - {:#x}", w * -1) } } } } } /// A memory index operand is usually created by ModRM bytes or words. /// e.g. [bx+si] #[derive(Debug, Clone)] pub struct MemoryIndex { pub base: Option, pub index: Option, pub displacement: Option, } impl fmt::Display for MemoryIndex { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match &self.base { Some(base) => match &self.index { Some(index) => match &self.displacement { Some(displacement) => { write!(f, "[{} + {}{}]", base, index, displacement) } None => write!(f, "[{} + {}]", base, index), }, None => match &self.displacement { Some(displacement) => write!(f, "[{}{}]", base, displacement), None => write!(f, "[{}]", base), }, }, None => match &self.index { Some(index) => match &self.displacement { Some(displacement) => write!(f, "[{}{}]", index, displacement), None => write!(f, "[{}]", index), }, None => match &self.displacement { Some(displacement) => write!(f, "[{:#x}]", displacement), None => panic!("Memory Index without base, index and displacement"), }, }, } } } #[derive(Debug, Clone)] /// 32-bit segment:offset pointer (e.g. for CALL instruction) pub struct Pointer { pub segment: Word, pub offset: Word, } impl std::fmt::Display for Pointer { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{}:{}", self.segment, self.offset) } }