diff --git a/src/disasm.rs b/src/disasm.rs index b084e34..0337a58 100644 --- a/src/disasm.rs +++ b/src/disasm.rs @@ -2,12 +2,12 @@ use core::fmt; use std::{fs::File, io::Read, process::exit}; use crate::aout::Aout; -use crate::instructions::{Displacement, MemoryIndex, RegisterId, SegmentRegister}; -use crate::modrm; +use crate::instructions::{MemoryIndex, ModRmTarget, OperandSize, RegisterId, SegmentRegister}; use crate::{ Args, - instructions::{ImmediateByte, ImmediateWord, Instruction, Opcode, Register}, + instructions::{ImmediateByte, ImmediateWord, Instruction, Mnemonic, Register}, }; +use crate::{modrmb, modrms, modrmv}; #[derive(Debug)] /// Generic errors, which are encountered during parsing. @@ -78,8 +78,6 @@ impl Disassembler { // advance to operand self.offset += 1; let byte = self.text[self.offset]; - // jump to right after operand - self.offset += 1; self.instruction.raw.push(byte); byte } @@ -91,51 +89,69 @@ impl Disassembler { self.offset += 1; let byte1 = self.text[self.offset]; let byte2 = self.text[self.offset + 1]; - // jump right after operand - self.offset += 2; + // jump onto last operand + self.offset += 1; self.instruction.raw.push(byte1); self.instruction.raw.push(byte2); u16::from_le_bytes([byte1, byte2]) } + /// Takes in a modrm byte and returns mod, reg and r/m. + fn deconstruct_modrm_byte(modrm: u8) -> (u8, u8, u8) { + let mode = (modrm >> 6) & 0b11; + let reg = (modrm >> 3) & 0b111; + let rm = modrm & 0b111; + + (mode, reg, rm) + } + /// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset. - /// Returns the parsed modrm memory access and the source register - pub fn parse_modrm_byte(&mut self) -> (MemoryIndex, RegisterId) { + /// Returns the parsed modrm target and the source register + pub fn parse_modrm_byte(&mut self, size: OperandSize) -> (ModRmTarget, RegisterId) { // advance to operand self.offset += 1; let modrm = self.text[self.offset]; self.instruction.raw.push(modrm); - // jump right after operand - self.offset += 1; - // Calculate ModRM byte with bitmask - let modulo = modrm >> 6; - let reg_id = (modrm >> 3) & 7; - let rm = modrm & 7; + let (mode, reg, rm) = Self::deconstruct_modrm_byte(modrm); - let displacement = match modulo { + log::debug!( + "0x{:04x} deconstructed into: 0b{:b}, 0b{:b}, 0b{:b}", + modrm, + mode, + reg, + rm + ); + + let mut displacement = None; + match mode { 0b00 => { if rm == 0b110 { log::debug!("Additional word during ModRM parsing was read with mod 0."); - Some(Displacement::Word(self.parse_word())) + displacement = Some(OperandSize::Word(self.parse_word())); } else { - None + displacement = None; } } 0b01 => { log::debug!("Additional byte during ModRM parsing was read."); - Some(Displacement::Byte(self.parse_byte())) + displacement = Some(OperandSize::Byte(self.parse_byte())) } 0b10 => { log::debug!("Additional word during ModRM parsing was read."); - Some(Displacement::Word(self.parse_word())) + displacement = Some(OperandSize::Word(self.parse_word())); } 0b11 => { - // XXX is this correct? - log::debug!( - "No displacement, as reg to reg - maybe some implementation is missing here" - ); - None + log::debug!("ModRM to reg"); + let target = match size { + OperandSize::Byte(_) => { + ModRmTarget::Register(Register::by_id(OperandSize::Byte(rm))) + } + OperandSize::Word(_) => { + ModRmTarget::Register(Register::by_id(OperandSize::Word(rm.into()))) + } + }; + return (target, reg); } _ => panic!("Invalid ModRM byte encountered"), }; @@ -184,7 +200,35 @@ impl Disassembler { _ => panic!("Invalid ModRM byte encountered"), }; - (index, reg_id) + (ModRmTarget::Memory(index), reg) + } + + /// Match the modrm reg bits to the GPR1 mnemonics. + pub fn modrm_reg_to_mnemonic(reg: u8, target: ModRmTarget, imm: OperandSize) -> Mnemonic { + match imm { + OperandSize::Byte(b) => match reg { + 0b000 => Mnemonic::ADD_Ib(target, ImmediateByte(b)), + 0b001 => Mnemonic::OR_Ib(target, ImmediateByte(b)), + 0b010 => Mnemonic::ADC_Ib(target, ImmediateByte(b)), + 0b011 => Mnemonic::SBB_Ib(target, ImmediateByte(b)), + 0b100 => Mnemonic::AND_Ib(target, ImmediateByte(b)), + 0b101 => Mnemonic::SUB_Ib(target, ImmediateByte(b)), + 0b110 => Mnemonic::XOR_Ib(target, ImmediateByte(b)), + 0b111 => Mnemonic::CMP_Ib(target, ImmediateByte(b)), + _ => panic!("Illegal GPR1 mnemonic"), + }, + OperandSize::Word(w) => match reg { + 0b000 => Mnemonic::ADD_Iv(target, ImmediateWord(w)), + 0b001 => Mnemonic::OR_Iv(target, ImmediateWord(w)), + 0b010 => Mnemonic::ADC_Iv(target, ImmediateWord(w)), + 0b011 => Mnemonic::SBB_Iv(target, ImmediateWord(w)), + 0b100 => Mnemonic::AND_Iv(target, ImmediateWord(w)), + 0b101 => Mnemonic::SUB_Iv(target, ImmediateWord(w)), + 0b110 => Mnemonic::XOR_Iv(target, ImmediateWord(w)), + 0b111 => Mnemonic::CMP_Iv(target, ImmediateWord(w)), + _ => panic!("Illegal GPR1 mnemonic"), + }, + } } /// Decode instructions from the text section of the provided binary @@ -204,30 +248,177 @@ impl Disassembler { // additional raw bytes will be pushed by parse functions self.instruction.raw.push(opcode); + // XXX: convert this copy and paste horror into a proc macro self.instruction.opcode = match opcode { - // ADD - 0x00 => modrm!(self, ADD_EbGb), - 0x01 => modrm!(self, ADD_EvGv), - 0x02 => modrm!(self, ADD_GbEb), - 0x03 => modrm!(self, ADD_GvEv), - 0x04 => Opcode::ADD_ALIb(ImmediateByte(self.parse_byte())), - 0x05 => Opcode::ADD_AXIv(ImmediateWord(self.parse_word())), - // PUSH - 0x06 => Opcode::PUSH(SegmentRegister::by_id(self.parse_modrm_byte().1)), - // POP - 0x07 => Opcode::POP(SegmentRegister::by_id(self.parse_modrm_byte().1)), - // OR - 0x08 => modrm!(self, OR_EbGb), - 0x0A => modrm!(self, OR_GbEb), - // INT - 0xCD => Opcode::INT(ImmediateByte(self.parse_byte())), - // MOV - 0xBB => Opcode::MOV_BXIv(ImmediateWord(self.parse_word())), + 0x00 => modrmb!(self, ADD_FromReg), + 0x01 => modrmv!(self, ADD_FromReg), + 0x02 => modrmb!(self, ADD_ToReg), + 0x03 => modrmv!(self, ADD_ToReg), + 0x04 => Mnemonic::ADD_ALIb(ImmediateByte(self.parse_byte())), + 0x05 => Mnemonic::ADD_AXIv(ImmediateWord(self.parse_word())), + + 0x06 => Mnemonic::PUSH_S(SegmentRegister::ES), + 0x07 => Mnemonic::POP_S(SegmentRegister::ES), + + 0x08 => modrmb!(self, OR_FromReg), + 0x09 => modrmv!(self, OR_FromReg), + 0x0A => modrmb!(self, OR_ToReg), + 0x0B => modrmv!(self, OR_ToReg), + 0x0C => Mnemonic::OR_ALIb(ImmediateByte(self.parse_byte())), + 0x0D => Mnemonic::OR_AXIv(ImmediateWord(self.parse_word())), + + 0x0E => Mnemonic::PUSH_S(SegmentRegister::CS), + 0x0F => panic!("Opcode 0x0F (POP CS) is considered undefined"), + + 0x10 => modrmb!(self, ADC_FromReg), + 0x11 => modrmv!(self, ADC_FromReg), + 0x12 => modrmb!(self, ADC_ToReg), + 0x13 => modrmv!(self, ADC_ToReg), + 0x14 => Mnemonic::ADC_ALIb(ImmediateByte(self.parse_byte())), + 0x15 => Mnemonic::ADC_AXIv(ImmediateWord(self.parse_word())), + + 0x16 => Mnemonic::PUSH_S(SegmentRegister::SS), + 0x17 => Mnemonic::POP_S(SegmentRegister::SS), + + 0x18 => modrmb!(self, SBB_FromReg), + 0x19 => modrmv!(self, SBB_FromReg), + 0x1A => modrmb!(self, SBB_ToReg), + 0x1B => modrmv!(self, SBB_ToReg), + 0x1C => Mnemonic::SBB_ALIb(ImmediateByte(self.parse_byte())), + 0x1D => Mnemonic::SBB_AXIv(ImmediateWord(self.parse_word())), + + 0x1E => Mnemonic::PUSH_S(SegmentRegister::DS), + 0x1F => Mnemonic::POP_S(SegmentRegister::DS), + + 0x20 => modrmb!(self, AND_FromReg), + 0x21 => modrmv!(self, AND_FromReg), + 0x22 => modrmb!(self, AND_ToReg), + 0x23 => modrmv!(self, AND_ToReg), + 0x24 => Mnemonic::AND_ALIb(ImmediateByte(self.parse_byte())), + 0x25 => Mnemonic::AND_AXIv(ImmediateWord(self.parse_word())), + + 0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES), + 0x27 => Mnemonic::DAA, + + 0x28 => modrmb!(self, SUB_FromReg), + 0x29 => modrmv!(self, SUB_FromReg), + 0x2A => modrmb!(self, SUB_ToReg), + 0x2B => modrmv!(self, SUB_ToReg), + 0x2C => Mnemonic::SUB_ALIb(ImmediateByte(self.parse_byte())), + 0x2D => Mnemonic::SUB_AXIv(ImmediateWord(self.parse_word())), + + 0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS), + 0x2F => Mnemonic::DAS, + + 0x30 => modrmb!(self, XOR_FromReg), + 0x31 => modrmv!(self, XOR_FromReg), + 0x32 => modrmb!(self, XOR_ToReg), + 0x33 => modrmv!(self, XOR_ToReg), + 0x34 => Mnemonic::XOR_ALIb(ImmediateByte(self.parse_byte())), + 0x35 => Mnemonic::XOR_AXIv(ImmediateWord(self.parse_word())), + + 0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS), + 0x37 => Mnemonic::AAA, + + 0x38 => modrmb!(self, CMP_FromReg), + 0x39 => modrmv!(self, CMP_FromReg), + 0x3A => modrmb!(self, CMP_ToReg), + 0x3B => modrmv!(self, CMP_ToReg), + 0x3C => Mnemonic::CMP_ALIb(ImmediateByte(self.parse_byte())), + 0x3D => Mnemonic::CMP_AXIv(ImmediateWord(self.parse_word())), + + 0x3E => Mnemonic::OVERRIDE(SegmentRegister::DS), + 0x3F => Mnemonic::AAS, + + 0x40 => Mnemonic::INC(Register::AX), + 0x41 => Mnemonic::INC(Register::CX), + 0x42 => Mnemonic::INC(Register::DX), + 0x43 => Mnemonic::INC(Register::BX), + 0x44 => Mnemonic::INC(Register::SP), + 0x45 => Mnemonic::INC(Register::BP), + 0x46 => Mnemonic::INC(Register::SI), + 0x47 => Mnemonic::INC(Register::DI), + + 0x48 => Mnemonic::DEC(Register::AX), + 0x49 => Mnemonic::DEC(Register::CX), + 0x4A => Mnemonic::DEC(Register::DX), + 0x4B => Mnemonic::DEC(Register::BX), + 0x4C => Mnemonic::DEC(Register::SP), + 0x4D => Mnemonic::DEC(Register::BP), + 0x4E => Mnemonic::DEC(Register::SI), + 0x4F => Mnemonic::DEC(Register::DI), + + 0x50 => Mnemonic::PUSH_R(Register::AX), + 0x51 => Mnemonic::PUSH_R(Register::CX), + 0x52 => Mnemonic::PUSH_R(Register::DX), + 0x53 => Mnemonic::PUSH_R(Register::BX), + 0x54 => Mnemonic::PUSH_R(Register::SP), + 0x55 => Mnemonic::PUSH_R(Register::BP), + 0x56 => Mnemonic::PUSH_R(Register::SI), + 0x57 => Mnemonic::PUSH_R(Register::DI), + + 0x58 => Mnemonic::POP_R(Register::AX), + 0x59 => Mnemonic::POP_R(Register::CX), + 0x5A => Mnemonic::POP_R(Register::DX), + 0x5B => Mnemonic::POP_R(Register::BX), + 0x5C => Mnemonic::POP_R(Register::SP), + 0x5D => Mnemonic::POP_R(Register::BP), + 0x5E => Mnemonic::POP_R(Register::SI), + 0x5F => Mnemonic::POP_R(Register::DI), + + 0x60..=0x6F => panic!("0x06 to 0x06F is considered undefined."), + + 0x70 => Mnemonic::JO(ImmediateByte(self.parse_byte())), + 0x71 => Mnemonic::JNO(ImmediateByte(self.parse_byte())), + 0x72 => Mnemonic::JB(ImmediateByte(self.parse_byte())), + 0x73 => Mnemonic::JNB(ImmediateByte(self.parse_byte())), + 0x74 => Mnemonic::JZ(ImmediateByte(self.parse_byte())), + 0x75 => Mnemonic::JNZ(ImmediateByte(self.parse_byte())), + 0x76 => Mnemonic::JBE(ImmediateByte(self.parse_byte())), + 0x77 => Mnemonic::JA(ImmediateByte(self.parse_byte())), + 0x78 => Mnemonic::JS(ImmediateByte(self.parse_byte())), + 0x79 => Mnemonic::JNS(ImmediateByte(self.parse_byte())), + 0x7A => Mnemonic::JPE(ImmediateByte(self.parse_byte())), + 0x7B => Mnemonic::JPO(ImmediateByte(self.parse_byte())), + 0x7C => Mnemonic::JL(ImmediateByte(self.parse_byte())), + 0x7D => Mnemonic::JGE(ImmediateByte(self.parse_byte())), + 0x7E => Mnemonic::JLE(ImmediateByte(self.parse_byte())), + 0x7F => Mnemonic::JG(ImmediateByte(self.parse_byte())), + + // 0x80..=0x83 => panic!("GRP1 not implemented"), + 0x80 => { + let (target, reg) = self.parse_modrm_byte(OperandSize::Byte(0)); + let imm = self.parse_byte(); + Self::modrm_reg_to_mnemonic(reg, target, OperandSize::Byte(imm)) + } + 0x81 => { + let (target, reg) = self.parse_modrm_byte(OperandSize::Word(0)); + let imm = self.parse_word(); + Self::modrm_reg_to_mnemonic(reg, target, OperandSize::Word(imm)) + } + 0x82 => panic!("Same as 0x80"), + 0x83 => panic!("Sign extented GPR1 not yet implemented"), + + 0x84 => modrmb!(self, TEST), + 0x85 => modrmv!(self, TEST), + + 0x86 => modrmb!(self, XHCG), + 0x87 => modrmv!(self, XHCG), + + 0x88 => modrmb!(self, MOV_FromReg), + 0x89 => modrmv!(self, MOV_FromReg), + 0x8A => modrmb!(self, MOV_ToReg), + 0x8B => modrmv!(self, MOV_ToReg), + 0x8C => modrms!(self, MOV_FromSReg), + 0x8E => modrms!(self, MOV_ToSReg), + + 0x8D => modrmv!(self, LEA), + + 0xCD => Mnemonic::INT(ImmediateByte(self.parse_byte())), + + 0xBB => Mnemonic::MOV_BXIv(ImmediateWord(self.parse_word())), _ => { - eprintln!( - "Encountered unknown self.instructionuction '0x{:x}'", - opcode - ); + eprintln!("Encountered unknown instruction '0x{:x}'", opcode); eprintln!("Offset might be misaligned and data is being interpreted."); eprintln!("Existing to avoid further misinterpretation..."); exit(1); @@ -237,6 +428,7 @@ impl Disassembler { println!("{}", self.instruction); instructions.push(self.instruction.clone()); self.instruction = Instruction::new(); + self.offset += 1; } Ok(instructions) diff --git a/src/disasm_macros.rs b/src/disasm_macros.rs index 61d6d1e..1bd359e 100644 --- a/src/disasm_macros.rs +++ b/src/disasm_macros.rs @@ -1,8 +1,26 @@ #[macro_export] -/// Generate an Opcode for 'normal' ModRM instructions with mem access and a reg -macro_rules! modrm { +/// Generate a byte Opcode for 'normal' ModRM instructions with mem access and a reg +macro_rules! modrmb { ($self:ident, $variant:ident) => {{ - let (idx, reg) = $self.parse_modrm_byte(); - Opcode::$variant(idx, Register::by_id(reg)) + let (target, reg) = $self.parse_modrm_byte(OperandSize::Byte(0)); + Mnemonic::$variant(target, Register::by_id(OperandSize::Byte(reg))) + }}; +} + +#[macro_export] +/// Generate a word Opcode for 'normal' ModRM instructions with mem access and a reg +macro_rules! modrmv { + ($self:ident, $variant:ident) => {{ + let (target, reg) = $self.parse_modrm_byte(OperandSize::Word(0)); + Mnemonic::$variant(target, Register::by_id(OperandSize::Word(reg.into()))) + }}; +} + +#[macro_export] +/// Generate a word Opcode for 'normal' ModRM instructions with mem access and a segment reg +macro_rules! modrms { + ($self:ident, $variant:ident) => {{ + let (target, reg) = $self.parse_modrm_byte(OperandSize::Word(0)); + Mnemonic::$variant(target, SegmentRegister::by_id(reg)) }}; } diff --git a/src/instructions.rs b/src/instructions.rs index ad6e358..d0243bf 100644 --- a/src/instructions.rs +++ b/src/instructions.rs @@ -12,9 +12,9 @@ pub type w = u16; /// contains the `Opcode` that will be executed, alongside its starting offset /// and the raw parsed bytes pub struct Instruction { - pub start: usize, // location of the instruction start - pub raw: Vec, // raw value of instruction - pub opcode: Opcode, // actual instruction + pub start: usize, // location of the instruction start + pub raw: Vec, // raw value of instruction + pub opcode: Mnemonic, // actual instruction } impl Instruction { @@ -22,7 +22,7 @@ impl Instruction { Instruction { start: 0, raw: Vec::new(), - opcode: Opcode::NOP(), + opcode: Mnemonic::NOP(), } } } @@ -39,61 +39,163 @@ impl fmt::Display for Instruction { #[derive(Debug, Clone)] #[allow(dead_code, non_camel_case_types)] -pub enum Opcode { +/// All possible opcode variantions. +// XXX: convert this copy and paste horror in a proc macro like +// enum Opcode { +// #[derive(default_variations)] +// ADD, +// ... +// } +// which then add all variants and also create the matching logic for +// src/disasm.rs decode_instructions() +pub enum Mnemonic { NOP(), // ADD - ADD_EbGb(MemoryIndex, Register), - ADD_EvGv(MemoryIndex, Register), - ADD_GbEb(MemoryIndex, Register), - ADD_GvEv(MemoryIndex, Register), + ADD_FromReg(ModRmTarget, Register), + ADD_ToReg(ModRmTarget, Register), + ADD_Ib(ModRmTarget, ImmediateByte), + ADD_Iv(ModRmTarget, ImmediateWord), ADD_ALIb(ImmediateByte), ADD_AXIv(ImmediateWord), // PUSH - PUSH(SegmentRegister), + PUSH_R(Register), + PUSH_S(SegmentRegister), // POP - POP(SegmentRegister), + POP_S(SegmentRegister), + POP_R(Register), // OR - OR_EbGb(MemoryIndex, Register), - OR_GbEb(MemoryIndex, Register), + OR_FromReg(ModRmTarget, Register), + OR_ToReg(ModRmTarget, Register), + OR_Ib(ModRmTarget, ImmediateByte), + OR_Iv(ModRmTarget, ImmediateWord), + OR_ALIb(ImmediateByte), + OR_AXIv(ImmediateWord), + // ADC + ADC_FromReg(ModRmTarget, Register), + ADC_ToReg(ModRmTarget, Register), + ADC_Ib(ModRmTarget, ImmediateByte), + ADC_Iv(ModRmTarget, ImmediateWord), + ADC_ALIb(ImmediateByte), + ADC_AXIv(ImmediateWord), + // SBB + SBB_FromReg(ModRmTarget, Register), + SBB_ToReg(ModRmTarget, Register), + SBB_Ib(ModRmTarget, ImmediateByte), + SBB_Iv(ModRmTarget, ImmediateWord), + SBB_ALIb(ImmediateByte), + SBB_AXIv(ImmediateWord), + // AND + AND_FromReg(ModRmTarget, Register), + AND_ToReg(ModRmTarget, Register), + AND_Ib(ModRmTarget, ImmediateByte), + AND_Iv(ModRmTarget, ImmediateWord), + AND_ALIb(ImmediateByte), + AND_AXIv(ImmediateWord), + // Override + OVERRIDE(SegmentRegister), + // Decimal Adjustment + DAA, + DAS, + AAA, + AAS, + // SUB + SUB_FromReg(ModRmTarget, Register), + SUB_ToReg(ModRmTarget, Register), + SUB_Ib(ModRmTarget, ImmediateByte), + SUB_Iv(ModRmTarget, ImmediateWord), + SUB_ALIb(ImmediateByte), + SUB_AXIv(ImmediateWord), + // XOR + XOR_FromReg(ModRmTarget, Register), + XOR_ToReg(ModRmTarget, Register), + XOR_Ib(ModRmTarget, ImmediateByte), + XOR_Iv(ModRmTarget, ImmediateWord), + XOR_ALIb(ImmediateByte), + XOR_AXIv(ImmediateWord), + // CMP + CMP_FromReg(ModRmTarget, Register), + CMP_ToReg(ModRmTarget, Register), + CMP_Ib(ModRmTarget, ImmediateByte), + CMP_Iv(ModRmTarget, ImmediateWord), + CMP_ALIb(ImmediateByte), + CMP_AXIv(ImmediateWord), + // INC + INC(Register), + // DEC + DEC(Register), + // Jumps + JO(ImmediateByte), + JNO(ImmediateByte), + JB(ImmediateByte), + JNB(ImmediateByte), + JZ(ImmediateByte), + JNZ(ImmediateByte), + JBE(ImmediateByte), + JA(ImmediateByte), + JS(ImmediateByte), + JNS(ImmediateByte), + JPE(ImmediateByte), + JPO(ImmediateByte), + JL(ImmediateByte), + JGE(ImmediateByte), + JLE(ImmediateByte), + JG(ImmediateByte), + // TEST + TEST(ModRmTarget, Register), + //XHCG + XHCG(ModRmTarget, Register), + // MOV + MOV_FromReg(ModRmTarget, Register), + MOV_ToReg(ModRmTarget, Register), + MOV_FromSReg(ModRmTarget, SegmentRegister), + MOV_ToSReg(ModRmTarget, SegmentRegister), + MOV_BXIv(ImmediateWord), + // LEA + LEA(ModRmTarget, Register), // INT INT(ImmediateByte), - // MOV - MOV_BXIv(ImmediateWord), } -impl fmt::Display for Opcode { +impl fmt::Display for Mnemonic { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::INT(byte) => write!(f, "INT, {:x}", byte), - Self::ADD_EbGb(mem, reg) => write!(f, "ADD {}, {}", mem, reg), - Self::ADD_GbEb(mem, reg) => write!(f, "ADD {}, {}", reg, mem), + Self::ADD_FromReg(mem, reg) => write!(f, "ADD {}, {}", mem, reg), + Self::ADD_ToReg(mem, reg) => write!(f, "ADD {}, {}", reg, mem), + Self::CMP_Iv(mem, imm) => write!(f, "CMP {}, {:04x}", mem, imm), + Self::LEA(mem, reg) => write!(f, "LEA {}, {}", reg, mem), Self::MOV_BXIv(word) => write!(f, "MOV BX, {:04x}", word), - _ => write!(f, "display not yet implemented"), + Self::XOR_FromReg(mem, reg) => write!(f, "XOR {}, {}", mem, reg), + _ => write!(f, "??? ??, ??"), } } } /// Registers of a 8086 processor -/// -x are 16bit, -l are 8bit #[derive(Debug, Clone)] #[allow(dead_code)] pub enum Register { - AX, - BX, - CX, - DX, - AH, + // 8 bit + // low bytes AL, - BL, - BH, - CH, CL, - DH, DL, - DI, - SI, - BP, - SP, + BL, + // high bytes + AH, + CH, + DH, + BH, + + // 16 bit + AX, // accumulator + CX, // counter + DX, // data + BX, // base + SP, // stack pointer + BP, // base pointer + SI, // source index + DI, // base index } /// Selector for Register or Segment Register @@ -102,25 +204,30 @@ pub type RegisterId = u8; #[allow(dead_code)] impl Register { /// Find the register corresponding to the 8086 bytecode ID - pub fn by_id(id: RegisterId) -> Self { + pub fn by_id(id: OperandSize) -> Self { match id { - 0x00 => Self::AL, - 0x01 => Self::CL, - 0x02 => Self::DL, - 0x03 => Self::BL, - 0x04 => Self::AH, - 0x05 => Self::CH, - 0x06 => Self::DH, - 0x07 => Self::BH, - 0x10 => Self::AX, - 0x11 => Self::CX, - 0x12 => Self::DX, - 0x13 => Self::BX, - 0x14 => Self::SP, - 0x15 => Self::BP, - 0x16 => Self::SI, - 0x17 => Self::DI, - _ => panic!("Invalid register ID encountered"), + OperandSize::Byte(b) => match b { + 0b000 => Self::AL, + 0b001 => Self::CL, + 0b010 => Self::DL, + 0b011 => Self::BL, + 0b100 => Self::AH, + 0b101 => Self::CH, + 0b110 => Self::DH, + 0b111 => Self::BH, + _ => panic!("Invalid 8bit register ID encountered"), + }, + OperandSize::Word(w) => match w { + 0b000 => Self::AX, + 0b001 => Self::CX, + 0b010 => Self::DX, + 0b011 => Self::BX, + 0b100 => Self::SP, + 0b101 => Self::BP, + 0b110 => Self::SI, + 0b111 => Self::DI, + _ => panic!("Invalid 16bit register ID encountered"), + }, } } } @@ -163,10 +270,10 @@ impl SegmentRegister { /// Find the SRegister corresponding to the 8086 bytecode ID pub fn by_id(id: u8) -> Self { match id { - 0x30 => Self::ES, - 0x31 => Self::CS, - 0x32 => Self::SS, - 0x33 => Self::DS, + 0x00 => Self::ES, + 0x01 => Self::CS, + 0x10 => Self::SS, + 0x11 => Self::DS, _ => panic!("Invalid segment register ID encountered"), } } @@ -210,13 +317,29 @@ macro_rules! impl_display_and_lowerhex { impl_display_and_lowerhex!(ImmediateByte); impl_display_and_lowerhex!(ImmediateWord); +#[derive(Debug, Clone)] +/// ModRM byte can either target a memory location or some register +pub enum ModRmTarget { + Memory(MemoryIndex), + Register(Register), +} + +impl std::fmt::Display for ModRmTarget { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + match self { + Self::Memory(idx) => write!(f, "{}", idx), + Self::Register(reg) => write!(f, "{}", reg), + } + } +} + /// A memory index operand is usually created by ModRM bytes or words. /// e.g. [bx+si] #[derive(Debug, Clone)] pub struct MemoryIndex { pub base: Option, pub index: Option, - pub displacement: Option, + pub displacement: Option, } impl fmt::Display for MemoryIndex { @@ -224,18 +347,18 @@ impl fmt::Display for MemoryIndex { match &self.base { Some(base) => match &self.index { Some(index) => match &self.displacement { - Some(displacement) => write!(f, "[{}+{}+{}]", base, index, displacement), - None => write!(f, "[{}+{}]", base, index), + Some(displacement) => write!(f, "[{} + {} + {}]", base, index, displacement), + None => write!(f, "[{} + {}]", base, index), }, None => match &self.displacement { - Some(displacement) => write!(f, "[{}+{}]", base, displacement), - None => write!(f, "{}", base), + Some(displacement) => write!(f, "[{} + {}]", base, displacement), + None => write!(f, "[{} + 0]", base), }, }, None => match &self.index { Some(index) => match &self.displacement { - Some(displacement) => write!(f, "{}+{}", index, displacement), - None => write!(f, "{}", index), + Some(displacement) => write!(f, "{} + {}", index, displacement), + None => write!(f, "[{} + 0]", index), }, None => panic!("Invalid MemoryIndex encountered"), }, @@ -245,13 +368,13 @@ impl fmt::Display for MemoryIndex { #[derive(Debug, Clone)] #[allow(dead_code)] -/// Displacement for ModRM -pub enum Displacement { +/// Can be used to encode either byte or word operands +pub enum OperandSize { Byte(u8), Word(u16), } -impl fmt::Display for Displacement { +impl fmt::Display for OperandSize { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::Byte(byte) => write!(f, "{}", byte),