diff --git a/src/disasm.rs b/src/disasm.rs index feb6edf..4fc95f5 100644 --- a/src/disasm.rs +++ b/src/disasm.rs @@ -2,7 +2,9 @@ use core::fmt; use std::{fs::File, io::Read, process::exit}; use crate::aout::Aout; -use crate::instructions::{MemoryIndex, ModRmTarget, OperandWidth, RegisterId, SegmentRegister}; +use crate::instructions::{ + ImmediateOperand, MemoryIndex, ModRmTarget, RegisterId, SegmentRegister, +}; use crate::{ Args, instructions::{ImmediateByte, ImmediateWord, Instruction, Mnemonic, Register}, @@ -107,7 +109,7 @@ impl Disassembler { /// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset. /// Returns the parsed modrm target and the source register - pub fn parse_modrm_byte(&mut self, width: OperandWidth) -> (ModRmTarget, RegisterId) { + pub fn parse_modrm_byte(&mut self, width: ImmediateOperand) -> (ModRmTarget, RegisterId) { // advance to operand self.offset += 1; let modrm = self.text[self.offset]; @@ -128,27 +130,28 @@ impl Disassembler { 0b00 => { if rm == 0b110 { log::debug!("Additional word during ModRM parsing was read with mod 0."); - displacement = Some(OperandWidth::Word(self.parse_word())); + displacement = Some(ImmediateOperand::Word(self.parse_word())); } else { displacement = None; } } 0b01 => { log::debug!("Additional byte during ModRM parsing was read."); - displacement = Some(OperandWidth::Byte(self.parse_byte())) + displacement = Some(ImmediateOperand::Byte(self.parse_byte())) } 0b10 => { log::debug!("Additional word during ModRM parsing was read."); - displacement = Some(OperandWidth::Word(self.parse_word())); + displacement = Some(ImmediateOperand::Word(self.parse_word())); } 0b11 => { log::debug!("ModRM to reg"); + // XXX: find a nicer way instead of using Byte(0) and Word(0) let target = match width { - OperandWidth::Byte(_) => { - ModRmTarget::Register(Register::by_id(OperandWidth::Byte(rm))) + ImmediateOperand::Byte(_) => { + ModRmTarget::Register(Register::by_id(ImmediateOperand::Byte(rm))) } - OperandWidth::Word(_) => { - ModRmTarget::Register(Register::by_id(OperandWidth::Word(rm.into()))) + ImmediateOperand::Word(_) => { + ModRmTarget::Register(Register::by_id(ImmediateOperand::Word(rm.into()))) } }; return (target, reg); @@ -206,28 +209,28 @@ impl Disassembler { /// Match the modrm reg bits to the GPR1 mnemonics. /// GPR always has an imm value as second operand, but is available in both /// Byte and Word length. - pub fn modrm_reg_to_mnemonic(reg: u8, target: ModRmTarget, imm: OperandWidth) -> Mnemonic { + pub fn modrm_reg_to_mnemonic(reg: u8, target: ModRmTarget, imm: ImmediateOperand) -> Mnemonic { match imm { - OperandWidth::Byte(b) => match reg { - 0b000 => Mnemonic::ADD_Ib(target, ImmediateByte(b)), - 0b001 => Mnemonic::OR_Ib(target, ImmediateByte(b)), - 0b010 => Mnemonic::ADC_Ib(target, ImmediateByte(b)), - 0b011 => Mnemonic::SBB_Ib(target, ImmediateByte(b)), - 0b100 => Mnemonic::AND_Ib(target, ImmediateByte(b)), - 0b101 => Mnemonic::SUB_Ib(target, ImmediateByte(b)), - 0b110 => Mnemonic::XOR_Ib(target, ImmediateByte(b)), - 0b111 => Mnemonic::CMP_Ib(target, ImmediateByte(b)), + ImmediateOperand::Byte(b) => match reg { + 0b000 => Mnemonic::ADD_Ib(target, b), + 0b001 => Mnemonic::OR_Ib(target, b), + 0b010 => Mnemonic::ADC_Ib(target, b), + 0b011 => Mnemonic::SBB_Ib(target, b), + 0b100 => Mnemonic::AND_Ib(target, b), + 0b101 => Mnemonic::SUB_Ib(target, b), + 0b110 => Mnemonic::XOR_Ib(target, b), + 0b111 => Mnemonic::CMP_Ib(target, b), _ => panic!("Illegal GPR1 mnemonic"), }, - OperandWidth::Word(w) => match reg { - 0b000 => Mnemonic::ADD_Iv(target, ImmediateWord(w)), - 0b001 => Mnemonic::OR_Iv(target, ImmediateWord(w)), - 0b010 => Mnemonic::ADC_Iv(target, ImmediateWord(w)), - 0b011 => Mnemonic::SBB_Iv(target, ImmediateWord(w)), - 0b100 => Mnemonic::AND_Iv(target, ImmediateWord(w)), - 0b101 => Mnemonic::SUB_Iv(target, ImmediateWord(w)), - 0b110 => Mnemonic::XOR_Iv(target, ImmediateWord(w)), - 0b111 => Mnemonic::CMP_Iv(target, ImmediateWord(w)), + ImmediateOperand::Word(w) => match reg { + 0b000 => Mnemonic::ADD_Iv(target, w), + 0b001 => Mnemonic::OR_Iv(target, w), + 0b010 => Mnemonic::ADC_Iv(target, w), + 0b011 => Mnemonic::SBB_Iv(target, w), + 0b100 => Mnemonic::AND_Iv(target, w), + 0b101 => Mnemonic::SUB_Iv(target, w), + 0b110 => Mnemonic::XOR_Iv(target, w), + 0b111 => Mnemonic::CMP_Iv(target, w), _ => panic!("Illegal GPR1 mnemonic"), }, } @@ -256,8 +259,8 @@ impl Disassembler { 0x01 => modrmv!(self, ADD_FromReg), 0x02 => modrmb!(self, ADD_ToReg), 0x03 => modrmv!(self, ADD_ToReg), - 0x04 => Mnemonic::ADD_ALIb(ImmediateByte(self.parse_byte())), - 0x05 => Mnemonic::ADD_AXIv(ImmediateWord(self.parse_word())), + 0x04 => Mnemonic::ADD_ALIb(self.parse_byte()), + 0x05 => Mnemonic::ADD_AXIv(self.parse_word()), 0x06 => Mnemonic::PUSH_S(SegmentRegister::ES), 0x07 => Mnemonic::POP_S(SegmentRegister::ES), @@ -266,8 +269,8 @@ impl Disassembler { 0x09 => modrmv!(self, OR_FromReg), 0x0A => modrmb!(self, OR_ToReg), 0x0B => modrmv!(self, OR_ToReg), - 0x0C => Mnemonic::OR_ALIb(ImmediateByte(self.parse_byte())), - 0x0D => Mnemonic::OR_AXIv(ImmediateWord(self.parse_word())), + 0x0C => Mnemonic::OR_ALIb(self.parse_byte()), + 0x0D => Mnemonic::OR_AXIv(self.parse_word()), 0x0E => Mnemonic::PUSH_S(SegmentRegister::CS), 0x0F => panic!("Opcode 0x0F (POP CS) is considered undefined"), @@ -276,8 +279,8 @@ impl Disassembler { 0x11 => modrmv!(self, ADC_FromReg), 0x12 => modrmb!(self, ADC_ToReg), 0x13 => modrmv!(self, ADC_ToReg), - 0x14 => Mnemonic::ADC_ALIb(ImmediateByte(self.parse_byte())), - 0x15 => Mnemonic::ADC_AXIv(ImmediateWord(self.parse_word())), + 0x14 => Mnemonic::ADC_ALIb(self.parse_byte()), + 0x15 => Mnemonic::ADC_AXIv(self.parse_word()), 0x16 => Mnemonic::PUSH_S(SegmentRegister::SS), 0x17 => Mnemonic::POP_S(SegmentRegister::SS), @@ -286,8 +289,8 @@ impl Disassembler { 0x19 => modrmv!(self, SBB_FromReg), 0x1A => modrmb!(self, SBB_ToReg), 0x1B => modrmv!(self, SBB_ToReg), - 0x1C => Mnemonic::SBB_ALIb(ImmediateByte(self.parse_byte())), - 0x1D => Mnemonic::SBB_AXIv(ImmediateWord(self.parse_word())), + 0x1C => Mnemonic::SBB_ALIb(self.parse_byte()), + 0x1D => Mnemonic::SBB_AXIv(self.parse_word()), 0x1E => Mnemonic::PUSH_S(SegmentRegister::DS), 0x1F => Mnemonic::POP_S(SegmentRegister::DS), @@ -296,8 +299,8 @@ impl Disassembler { 0x21 => modrmv!(self, AND_FromReg), 0x22 => modrmb!(self, AND_ToReg), 0x23 => modrmv!(self, AND_ToReg), - 0x24 => Mnemonic::AND_ALIb(ImmediateByte(self.parse_byte())), - 0x25 => Mnemonic::AND_AXIv(ImmediateWord(self.parse_word())), + 0x24 => Mnemonic::AND_ALIb(self.parse_byte()), + 0x25 => Mnemonic::AND_AXIv(self.parse_word()), 0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES), 0x27 => Mnemonic::DAA, @@ -306,8 +309,8 @@ impl Disassembler { 0x29 => modrmv!(self, SUB_FromReg), 0x2A => modrmb!(self, SUB_ToReg), 0x2B => modrmv!(self, SUB_ToReg), - 0x2C => Mnemonic::SUB_ALIb(ImmediateByte(self.parse_byte())), - 0x2D => Mnemonic::SUB_AXIv(ImmediateWord(self.parse_word())), + 0x2C => Mnemonic::SUB_ALIb(self.parse_byte()), + 0x2D => Mnemonic::SUB_AXIv(self.parse_word()), 0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS), 0x2F => Mnemonic::DAS, @@ -316,8 +319,8 @@ impl Disassembler { 0x31 => modrmv!(self, XOR_FromReg), 0x32 => modrmb!(self, XOR_ToReg), 0x33 => modrmv!(self, XOR_ToReg), - 0x34 => Mnemonic::XOR_ALIb(ImmediateByte(self.parse_byte())), - 0x35 => Mnemonic::XOR_AXIv(ImmediateWord(self.parse_word())), + 0x34 => Mnemonic::XOR_ALIb(self.parse_byte()), + 0x35 => Mnemonic::XOR_AXIv(self.parse_word()), 0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS), 0x37 => Mnemonic::AAA, @@ -326,8 +329,8 @@ impl Disassembler { 0x39 => modrmv!(self, CMP_FromReg), 0x3A => modrmb!(self, CMP_ToReg), 0x3B => modrmv!(self, CMP_ToReg), - 0x3C => Mnemonic::CMP_ALIb(ImmediateByte(self.parse_byte())), - 0x3D => Mnemonic::CMP_AXIv(ImmediateWord(self.parse_word())), + 0x3C => Mnemonic::CMP_ALIb(self.parse_byte()), + 0x3D => Mnemonic::CMP_AXIv(self.parse_word()), 0x3E => Mnemonic::OVERRIDE(SegmentRegister::DS), 0x3F => Mnemonic::AAS, @@ -370,22 +373,22 @@ impl Disassembler { 0x60..=0x6F => panic!("0x06 to 0x06F is considered undefined."), - 0x70 => Mnemonic::JO(ImmediateByte(self.parse_byte())), - 0x71 => Mnemonic::JNO(ImmediateByte(self.parse_byte())), - 0x72 => Mnemonic::JB(ImmediateByte(self.parse_byte())), - 0x73 => Mnemonic::JNB(ImmediateByte(self.parse_byte())), - 0x74 => Mnemonic::JZ(ImmediateByte(self.parse_byte())), - 0x75 => Mnemonic::JNZ(ImmediateByte(self.parse_byte())), - 0x76 => Mnemonic::JBE(ImmediateByte(self.parse_byte())), - 0x77 => Mnemonic::JA(ImmediateByte(self.parse_byte())), - 0x78 => Mnemonic::JS(ImmediateByte(self.parse_byte())), - 0x79 => Mnemonic::JNS(ImmediateByte(self.parse_byte())), - 0x7A => Mnemonic::JPE(ImmediateByte(self.parse_byte())), - 0x7B => Mnemonic::JPO(ImmediateByte(self.parse_byte())), - 0x7C => Mnemonic::JL(ImmediateByte(self.parse_byte())), - 0x7D => Mnemonic::JGE(ImmediateByte(self.parse_byte())), - 0x7E => Mnemonic::JLE(ImmediateByte(self.parse_byte())), - 0x7F => Mnemonic::JG(ImmediateByte(self.parse_byte())), + 0x70 => Mnemonic::JO(self.parse_byte()), + 0x71 => Mnemonic::JNO(self.parse_byte()), + 0x72 => Mnemonic::JB(self.parse_byte()), + 0x73 => Mnemonic::JNB(self.parse_byte()), + 0x74 => Mnemonic::JZ(self.parse_byte()), + 0x75 => Mnemonic::JNZ(self.parse_byte()), + 0x76 => Mnemonic::JBE(self.parse_byte()), + 0x77 => Mnemonic::JA(self.parse_byte()), + 0x78 => Mnemonic::JS(self.parse_byte()), + 0x79 => Mnemonic::JNS(self.parse_byte()), + 0x7A => Mnemonic::JPE(self.parse_byte()), + 0x7B => Mnemonic::JPO(self.parse_byte()), + 0x7C => Mnemonic::JL(self.parse_byte()), + 0x7D => Mnemonic::JGE(self.parse_byte()), + 0x7E => Mnemonic::JLE(self.parse_byte()), + 0x7F => Mnemonic::JG(self.parse_byte()), 0x80 => modrmgprb!(self), 0x81 => modrmgprv!(self), @@ -407,9 +410,9 @@ impl Disassembler { 0x8D => modrmv!(self, LEA), - 0xCD => Mnemonic::INT(ImmediateByte(self.parse_byte())), + 0xCD => Mnemonic::INT(self.parse_byte()), - 0xBB => Mnemonic::MOV_BXIv(ImmediateWord(self.parse_word())), + 0xBB => Mnemonic::MOV_BXIv(self.parse_word()), _ => { eprintln!("Encountered unknown instruction '0x{:x}'", opcode); eprintln!("Offset might be misaligned and data is being interpreted."); diff --git a/src/disasm_macros.rs b/src/disasm_macros.rs index 022c81d..a853e10 100644 --- a/src/disasm_macros.rs +++ b/src/disasm_macros.rs @@ -2,8 +2,8 @@ /// Generate a byte Opcode for 'normal' ModRM instructions with mem access and a reg macro_rules! modrmb { ($self:ident, $variant:ident) => {{ - let (target, reg) = $self.parse_modrm_byte(OperandWidth::Byte(0)); - Mnemonic::$variant(target, Register::by_id(OperandWidth::Byte(reg))) + let (target, reg) = $self.parse_modrm_byte(ImmediateOperand::Byte(0)); + Mnemonic::$variant(target, Register::by_id(ImmediateOperand::Byte(reg))) }}; } @@ -11,8 +11,8 @@ macro_rules! modrmb { /// Generate a word Opcode for 'normal' ModRM instructions with mem access and a reg macro_rules! modrmv { ($self:ident, $variant:ident) => {{ - let (target, reg) = $self.parse_modrm_byte(OperandWidth::Word(0)); - Mnemonic::$variant(target, Register::by_id(OperandWidth::Word(reg.into()))) + let (target, reg) = $self.parse_modrm_byte(ImmediateOperand::Word(0)); + Mnemonic::$variant(target, Register::by_id(ImmediateOperand::Word(reg.into()))) }}; } @@ -20,7 +20,7 @@ macro_rules! modrmv { /// Generate a word Opcode for 'normal' ModRM instructions with mem access and a segment reg macro_rules! modrms { ($self:ident, $variant:ident) => {{ - let (target, reg) = $self.parse_modrm_byte(OperandWidth::Word(0)); + let (target, reg) = $self.parse_modrm_byte(ImmediateOperand::Word(0)); Mnemonic::$variant(target, SegmentRegister::by_id(reg)) }}; } @@ -30,9 +30,9 @@ macro_rules! modrms { /// GPR always has an imm value as second operand. macro_rules! modrmgprb { ($self:ident) => {{ - let (target, reg) = $self.parse_modrm_byte(OperandWidth::Byte(0)); + let (target, reg) = $self.parse_modrm_byte(ImmediateOperand::Byte(0)); let imm = $self.parse_byte(); - Self::modrm_reg_to_mnemonic(reg, target, OperandWidth::Byte(imm)) + Self::modrm_reg_to_mnemonic(reg, target, ImmediateOperand::Byte(imm)) }}; } @@ -41,8 +41,8 @@ macro_rules! modrmgprb { /// GPR always has an imm value as second operand. macro_rules! modrmgprv { ($self:ident) => {{ - let (target, reg) = $self.parse_modrm_byte(OperandWidth::Word(0)); + let (target, reg) = $self.parse_modrm_byte(ImmediateOperand::Word(0)); let imm = $self.parse_word(); - Self::modrm_reg_to_mnemonic(reg, target, OperandWidth::Word(imm)) + Self::modrm_reg_to_mnemonic(reg, target, ImmediateOperand::Word(imm)) }}; } diff --git a/src/instructions.rs b/src/instructions.rs index 404b086..a66e745 100644 --- a/src/instructions.rs +++ b/src/instructions.rs @@ -1,10 +1,24 @@ use core::fmt; -// b: 8, w: 16, v: 16 -> i just treat v and w the same, if nothing blows up -#[allow(non_camel_case_types)] -pub type b = u8; -#[allow(non_camel_case_types)] -pub type w = u16; +pub type ImmediateByte = u8; +pub type ImmediateWord = u16; + +#[derive(Debug, Clone)] +#[allow(dead_code)] +/// Can be used to encode either byte or word operands +pub enum ImmediateOperand { + Byte(ImmediateByte), + Word(ImmediateWord), +} + +impl fmt::Display for ImmediateOperand { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Byte(byte) => write!(f, "{}", byte), + Self::Word(word) => write!(f, "{}", word), + } + } +} #[derive(Debug, Clone)] #[allow(dead_code)] @@ -204,9 +218,9 @@ pub type RegisterId = u8; #[allow(dead_code)] impl Register { /// Find the register corresponding to the 8086 bytecode ID - pub fn by_id(id: OperandWidth) -> Self { + pub fn by_id(id: ImmediateOperand) -> Self { match id { - OperandWidth::Byte(b) => match b { + ImmediateOperand::Byte(b) => match b { 0b000 => Self::AL, 0b001 => Self::CL, 0b010 => Self::DL, @@ -217,7 +231,7 @@ impl Register { 0b111 => Self::BH, _ => panic!("Invalid 8bit register ID encountered"), }, - OperandWidth::Word(w) => match w { + ImmediateOperand::Word(w) => match w { 0b000 => Self::AX, 0b001 => Self::CX, 0b010 => Self::DX, @@ -290,33 +304,6 @@ impl fmt::Display for SegmentRegister { } } -/// An immediate byte value for an instruction. -#[derive(Debug, Clone)] -pub struct ImmediateByte(pub b); - -/// An immediate word value for an instruction -#[derive(Debug, Clone)] -pub struct ImmediateWord(pub w); - -macro_rules! impl_display_and_lowerhex { - ($name:ident) => { - impl std::fmt::Display for $name { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", self.0) - } - } - - impl std::fmt::LowerHex for $name { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - std::fmt::LowerHex::fmt(&self.0, f) - } - } - }; -} - -impl_display_and_lowerhex!(ImmediateByte); -impl_display_and_lowerhex!(ImmediateWord); - #[derive(Debug, Clone)] /// ModRM byte can either target a memory location or some register pub enum ModRmTarget { @@ -339,7 +326,7 @@ impl std::fmt::Display for ModRmTarget { pub struct MemoryIndex { pub base: Option, pub index: Option, - pub displacement: Option, + pub displacement: Option, } impl fmt::Display for MemoryIndex { @@ -365,20 +352,3 @@ impl fmt::Display for MemoryIndex { } } } - -#[derive(Debug, Clone)] -#[allow(dead_code)] -/// Can be used to encode either byte or word operands -pub enum OperandWidth { - Byte(u8), - Word(u16), -} - -impl fmt::Display for OperandWidth { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Self::Byte(byte) => write!(f, "{}", byte), - Self::Word(word) => write!(f, "{}", word), - } - } -}