use core::fmt; // b: 8, w: 16, v: 16 -> i just treat v and w the same, if nothing blows up #[allow(non_camel_case_types)] pub type b = u8; #[allow(non_camel_case_types)] pub type w = u16; #[derive(Debug, Clone)] #[allow(dead_code)] /// A single 'line' of executable ASM is called an Instruction, which /// contains the `Opcode` that will be executed, alongside its starting offset /// and the raw parsed bytes pub struct Instruction { pub start: usize, // location of the instruction start pub raw: Vec, // raw value of instruction pub opcode: Mnemonic, // actual instruction } impl Instruction { pub fn new() -> Self { Instruction { start: 0, raw: Vec::new(), opcode: Mnemonic::NOP(), } } } impl fmt::Display for Instruction { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{:04x}: ", self.start).unwrap(); for b in self.raw.iter() { write!(f, "{:02x}", b).unwrap(); } write!(f, "\t{}", self.opcode) } } #[derive(Debug, Clone)] #[allow(dead_code, non_camel_case_types)] /// All possible opcode variantions. // XXX: convert this copy and paste horror in a proc macro like // enum Opcode { // #[derive(default_variations)] // ADD, // ... // } // which then add all variants and also create the matching logic for // src/disasm.rs decode_instructions() pub enum Mnemonic { NOP(), // ADD ADD_FromReg(ModRmTarget, Register), ADD_ToReg(ModRmTarget, Register), ADD_Ib(ModRmTarget, ImmediateByte), ADD_Iv(ModRmTarget, ImmediateWord), ADD_ALIb(ImmediateByte), ADD_AXIv(ImmediateWord), // PUSH PUSH_R(Register), PUSH_S(SegmentRegister), // POP POP_S(SegmentRegister), POP_R(Register), // OR OR_FromReg(ModRmTarget, Register), OR_ToReg(ModRmTarget, Register), OR_Ib(ModRmTarget, ImmediateByte), OR_Iv(ModRmTarget, ImmediateWord), OR_ALIb(ImmediateByte), OR_AXIv(ImmediateWord), // ADC ADC_FromReg(ModRmTarget, Register), ADC_ToReg(ModRmTarget, Register), ADC_Ib(ModRmTarget, ImmediateByte), ADC_Iv(ModRmTarget, ImmediateWord), ADC_ALIb(ImmediateByte), ADC_AXIv(ImmediateWord), // SBB SBB_FromReg(ModRmTarget, Register), SBB_ToReg(ModRmTarget, Register), SBB_Ib(ModRmTarget, ImmediateByte), SBB_Iv(ModRmTarget, ImmediateWord), SBB_ALIb(ImmediateByte), SBB_AXIv(ImmediateWord), // AND AND_FromReg(ModRmTarget, Register), AND_ToReg(ModRmTarget, Register), AND_Ib(ModRmTarget, ImmediateByte), AND_Iv(ModRmTarget, ImmediateWord), AND_ALIb(ImmediateByte), AND_AXIv(ImmediateWord), // Override OVERRIDE(SegmentRegister), // Decimal Adjustment DAA, DAS, AAA, AAS, // SUB SUB_FromReg(ModRmTarget, Register), SUB_ToReg(ModRmTarget, Register), SUB_Ib(ModRmTarget, ImmediateByte), SUB_Iv(ModRmTarget, ImmediateWord), SUB_ALIb(ImmediateByte), SUB_AXIv(ImmediateWord), // XOR XOR_FromReg(ModRmTarget, Register), XOR_ToReg(ModRmTarget, Register), XOR_Ib(ModRmTarget, ImmediateByte), XOR_Iv(ModRmTarget, ImmediateWord), XOR_ALIb(ImmediateByte), XOR_AXIv(ImmediateWord), // CMP CMP_FromReg(ModRmTarget, Register), CMP_ToReg(ModRmTarget, Register), CMP_Ib(ModRmTarget, ImmediateByte), CMP_Iv(ModRmTarget, ImmediateWord), CMP_ALIb(ImmediateByte), CMP_AXIv(ImmediateWord), // INC INC(Register), // DEC DEC(Register), // Jumps JO(ImmediateByte), JNO(ImmediateByte), JB(ImmediateByte), JNB(ImmediateByte), JZ(ImmediateByte), JNZ(ImmediateByte), JBE(ImmediateByte), JA(ImmediateByte), JS(ImmediateByte), JNS(ImmediateByte), JPE(ImmediateByte), JPO(ImmediateByte), JL(ImmediateByte), JGE(ImmediateByte), JLE(ImmediateByte), JG(ImmediateByte), // TEST TEST(ModRmTarget, Register), //XHCG XHCG(ModRmTarget, Register), // MOV MOV_FromReg(ModRmTarget, Register), MOV_ToReg(ModRmTarget, Register), MOV_FromSReg(ModRmTarget, SegmentRegister), MOV_ToSReg(ModRmTarget, SegmentRegister), MOV_BXIv(ImmediateWord), // LEA LEA(ModRmTarget, Register), // INT INT(ImmediateByte), } impl fmt::Display for Mnemonic { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::INT(byte) => write!(f, "INT, {:x}", byte), Self::ADD_FromReg(mem, reg) => write!(f, "ADD {}, {}", mem, reg), Self::ADD_ToReg(mem, reg) => write!(f, "ADD {}, {}", reg, mem), Self::CMP_Iv(mem, imm) => write!(f, "CMP {}, {:04x}", mem, imm), Self::LEA(mem, reg) => write!(f, "LEA {}, {}", reg, mem), Self::MOV_BXIv(word) => write!(f, "MOV BX, {:04x}", word), Self::XOR_FromReg(mem, reg) => write!(f, "XOR {}, {}", mem, reg), _ => write!(f, "??? ??, ??"), } } } /// Registers of a 8086 processor #[derive(Debug, Clone)] #[allow(dead_code)] pub enum Register { // 8 bit // low bytes AL, CL, DL, BL, // high bytes AH, CH, DH, BH, // 16 bit AX, // accumulator CX, // counter DX, // data BX, // base SP, // stack pointer BP, // base pointer SI, // source index DI, // base index } /// Selector for Register or Segment Register pub type RegisterId = u8; #[allow(dead_code)] impl Register { /// Find the register corresponding to the 8086 bytecode ID pub fn by_id(id: OperandSize) -> Self { match id { OperandSize::Byte(b) => match b { 0b000 => Self::AL, 0b001 => Self::CL, 0b010 => Self::DL, 0b011 => Self::BL, 0b100 => Self::AH, 0b101 => Self::CH, 0b110 => Self::DH, 0b111 => Self::BH, _ => panic!("Invalid 8bit register ID encountered"), }, OperandSize::Word(w) => match w { 0b000 => Self::AX, 0b001 => Self::CX, 0b010 => Self::DX, 0b011 => Self::BX, 0b100 => Self::SP, 0b101 => Self::BP, 0b110 => Self::SI, 0b111 => Self::DI, _ => panic!("Invalid 16bit register ID encountered"), }, } } } impl fmt::Display for Register { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::AX => write!(f, "AX"), Self::BX => write!(f, "BX"), Self::CX => write!(f, "CX"), Self::DX => write!(f, "DX"), Self::AH => write!(f, "AH"), Self::AL => write!(f, "AL"), Self::BL => write!(f, "BL"), Self::BH => write!(f, "BH"), Self::CH => write!(f, "CH"), Self::CL => write!(f, "CL"), Self::DH => write!(f, "DH"), Self::DL => write!(f, "DL"), Self::DI => write!(f, "DI"), Self::SI => write!(f, "SI"), Self::BP => write!(f, "BP"), Self::SP => write!(f, "SP"), } } } /// Segment Registers of a 8086 processor #[derive(Debug, Clone)] #[allow(dead_code)] pub enum SegmentRegister { DS, ES, SS, CS, } #[allow(dead_code)] impl SegmentRegister { /// Find the SRegister corresponding to the 8086 bytecode ID pub fn by_id(id: u8) -> Self { match id { 0x00 => Self::ES, 0x01 => Self::CS, 0x10 => Self::SS, 0x11 => Self::DS, _ => panic!("Invalid segment register ID encountered"), } } } impl fmt::Display for SegmentRegister { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::DS => write!(f, "DS"), Self::ES => write!(f, "ES"), Self::SS => write!(f, "SS"), Self::CS => write!(f, "CS"), } } } /// An immediate byte value for an instruction. #[derive(Debug, Clone)] pub struct ImmediateByte(pub b); /// An immediate word value for an instruction #[derive(Debug, Clone)] pub struct ImmediateWord(pub w); macro_rules! impl_display_and_lowerhex { ($name:ident) => { impl std::fmt::Display for $name { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "{}", self.0) } } impl std::fmt::LowerHex for $name { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { std::fmt::LowerHex::fmt(&self.0, f) } } }; } impl_display_and_lowerhex!(ImmediateByte); impl_display_and_lowerhex!(ImmediateWord); #[derive(Debug, Clone)] /// ModRM byte can either target a memory location or some register pub enum ModRmTarget { Memory(MemoryIndex), Register(Register), } impl std::fmt::Display for ModRmTarget { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { match self { Self::Memory(idx) => write!(f, "{}", idx), Self::Register(reg) => write!(f, "{}", reg), } } } /// A memory index operand is usually created by ModRM bytes or words. /// e.g. [bx+si] #[derive(Debug, Clone)] pub struct MemoryIndex { pub base: Option, pub index: Option, pub displacement: Option, } impl fmt::Display for MemoryIndex { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match &self.base { Some(base) => match &self.index { Some(index) => match &self.displacement { Some(displacement) => write!(f, "[{} + {} + {}]", base, index, displacement), None => write!(f, "[{} + {}]", base, index), }, None => match &self.displacement { Some(displacement) => write!(f, "[{} + {}]", base, displacement), None => write!(f, "[{} + 0]", base), }, }, None => match &self.index { Some(index) => match &self.displacement { Some(displacement) => write!(f, "{} + {}", index, displacement), None => write!(f, "[{} + 0]", index), }, None => panic!("Invalid MemoryIndex encountered"), }, } } } #[derive(Debug, Clone)] #[allow(dead_code)] /// Can be used to encode either byte or word operands pub enum OperandSize { Byte(u8), Word(u16), } impl fmt::Display for OperandSize { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Byte(byte) => write!(f, "{}", byte), Self::Word(word) => write!(f, "{}", word), } } }