use core::fmt; use std::{fs::File, io::Read, process::exit}; use crate::aout::Aout; use crate::instructions::{MemoryIndex, ModRmTarget, Operand, Pointer}; use crate::register::{Register, RegisterId, SegmentRegister}; use crate::{ Args, instructions::{Instruction, Mnemonic}, }; use crate::{modrmb, modrmgprb, modrmgprv, modrms, modrmv}; #[derive(Debug)] /// Generic errors, which are encountered during parsing. pub enum DisasmError { NoFile(Option), IoError(std::io::Error), } impl From for DisasmError { fn from(error: std::io::Error) -> Self { DisasmError::IoError(error) } } impl fmt::Display for DisasmError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { DisasmError::NoFile(msg) => write!(f, "No file error: {:?}", msg), DisasmError::IoError(msg) => write!(f, "{}", msg), } } } /// Disassemble the binary in `path` into a vector of instructions. /// Main entry point to the disassembly. pub fn disasm(args: &Args) -> Result, DisasmError> { let contents = path_to_buf(args)?; let aout = Aout::new(contents); log::debug!("{:?}", aout); let mut disasm = Disassembler::new(aout); disasm.decode_instructions() } /// Read a filepath into a u8 buffer. fn path_to_buf(args: &Args) -> Result, DisasmError> { let path = args .path .clone() .ok_or(DisasmError::NoFile(args.path.clone()))?; let mut file = File::open(path)?; let mut buf = Vec::new(); file.read_to_end(&mut buf)?; Ok(buf) } #[derive(Debug)] struct Disassembler { pub offset: usize, // the current offset in the disasm process pub text: Vec, // the aout binary pub instruction: Instruction, // the instruction, which is currently being parsed } impl Disassembler { pub fn new(aout: Aout) -> Self { Disassembler { offset: 0, text: aout.text, instruction: Instruction::new(), } } /// Parse a single byte of binary, return it and advance the offset. /// Returns the read byte. pub fn parse_byte(&mut self) -> u8 { // advance to operand self.offset += 1; let byte = self.text[self.offset]; self.instruction.raw.push(byte); byte } /// Parse a single word of binary, return it and advance the offset. /// Returns the read word. pub fn parse_word(&mut self) -> u16 { // advance to operand self.offset += 1; let byte1 = self.text[self.offset]; let byte2 = self.text[self.offset + 1]; // jump onto last operand self.offset += 1; self.instruction.raw.push(byte1); self.instruction.raw.push(byte2); u16::from_le_bytes([byte1, byte2]) } /// Takes in a modrm byte and returns mod, reg and r/m. fn deconstruct_modrm_byte(modrm: u8) -> (u8, u8, u8) { let mode = (modrm >> 6) & 0b11; let reg = (modrm >> 3) & 0b111; let rm = modrm & 0b111; (mode, reg, rm) } /// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset. /// Returns the parsed modrm target and the source register pub fn parse_modrm_byte(&mut self, width: Operand) -> (ModRmTarget, RegisterId) { // advance to operand self.offset += 1; let modrm = self.text[self.offset]; self.instruction.raw.push(modrm); let (mode, reg, rm) = Self::deconstruct_modrm_byte(modrm); log::debug!( "0x{:04x} deconstructed into: 0b{:b}, 0b{:b}, 0b{:b}", modrm, mode, reg, rm ); let mut displacement = None; match mode { 0b00 => { if rm == 0b110 { log::debug!("Additional word during ModRM parsing was read with mod 0."); displacement = Some(Operand::Word(self.parse_word())); } else { displacement = None; } } 0b01 => { log::debug!("Additional byte during ModRM parsing was read."); displacement = Some(Operand::Byte(self.parse_byte())) } 0b10 => { log::debug!("Additional word during ModRM parsing was read."); displacement = Some(Operand::Word(self.parse_word())); } 0b11 => { log::debug!("ModRM (0b{:b}) to/from Register (0b{:b})", rm, reg); // XXX: find a nicer way instead of using Byte(0) and Word(0) let target = match width { Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))), Operand::Word(_) => { ModRmTarget::Register(Register::by_id(Operand::Word(rm.into()))) } }; return (target, reg); } _ => panic!("Invalid ModRM byte encountered"), }; let index = match rm { 0b0000 => MemoryIndex { base: Some(Register::BX), index: Some(Register::SI), displacement, }, 0b0001 => MemoryIndex { base: Some(Register::BX), index: Some(Register::DI), displacement, }, 0b0010 => MemoryIndex { base: Some(Register::BP), index: Some(Register::SI), displacement, }, 0b0011 => MemoryIndex { base: Some(Register::BP), index: Some(Register::DI), displacement, }, 0b0100 => MemoryIndex { base: None, index: Some(Register::SI), displacement, }, 0b0101 => MemoryIndex { base: None, index: Some(Register::DI), displacement, }, 0b0110 => MemoryIndex { base: Some(Register::BP), index: None, displacement, }, 0b0111 => MemoryIndex { base: Some(Register::BX), index: None, displacement, }, _ => panic!("Invalid ModRM byte encountered"), }; (ModRmTarget::Memory(index), reg) } /// Match the modrm reg bits to the GPR1 mnemonics. /// GPR always has an imm value as second operand, but is available in both /// Byte and Word length. pub fn modrm_reg_to_mnemonic(reg: u8, target: ModRmTarget, imm: Operand) -> Mnemonic { match imm { Operand::Byte(b) => match reg { 0b000 => Mnemonic::ADD_Ib(target, b), 0b001 => Mnemonic::OR_Ib(target, b), 0b010 => Mnemonic::ADC_Ib(target, b), 0b011 => Mnemonic::SBB_Ib(target, b), 0b100 => Mnemonic::AND_Ib(target, b), 0b101 => Mnemonic::SUB_Ib(target, b), 0b110 => Mnemonic::XOR_Ib(target, b), 0b111 => Mnemonic::CMP_Ib(target, b), _ => panic!("Illegal GPR1 mnemonic"), }, Operand::Word(w) => match reg { 0b000 => Mnemonic::ADD_Iv(target, w), 0b001 => Mnemonic::OR_Iv(target, w), 0b010 => Mnemonic::ADC_Iv(target, w), 0b011 => Mnemonic::SBB_Iv(target, w), 0b100 => Mnemonic::AND_Iv(target, w), 0b101 => Mnemonic::SUB_Iv(target, w), 0b110 => Mnemonic::XOR_Iv(target, w), 0b111 => Mnemonic::CMP_Iv(target, w), _ => panic!("Illegal GPR1 mnemonic"), }, } } /// Decode instructions from the text section of the provided binary pub fn decode_instructions(&mut self) -> Result, DisasmError> { // naive approach: // 1. read byte // 2. pattern match to see which instruction it is // 3. read as many bytes as this instruction needs (registers, immidiates, ...) // repeat until no bytes left let mut instructions = Vec::new(); while self.offset < self.text.len() { self.instruction.start = self.offset; let opcode = self.text[self.offset]; // additional raw bytes will be pushed by parse functions self.instruction.raw.push(opcode); // XXX: convert this copy and paste horror into a proc macro self.instruction.opcode = match opcode { 0x00 => modrmb!(self, ADD_FromReg), 0x01 => modrmv!(self, ADD_FromReg), 0x02 => modrmb!(self, ADD_ToReg), 0x03 => modrmv!(self, ADD_ToReg), 0x04 => Mnemonic::ADD_ALIb(self.parse_byte()), 0x05 => Mnemonic::ADD_AXIv(self.parse_word()), 0x06 => Mnemonic::PUSH_S(SegmentRegister::ES), 0x07 => Mnemonic::POP_S(SegmentRegister::ES), 0x08 => modrmb!(self, OR_FromReg), 0x09 => modrmv!(self, OR_FromReg), 0x0A => modrmb!(self, OR_ToReg), 0x0B => modrmv!(self, OR_ToReg), 0x0C => Mnemonic::OR_ALIb(self.parse_byte()), 0x0D => Mnemonic::OR_AXIv(self.parse_word()), 0x0E => Mnemonic::PUSH_S(SegmentRegister::CS), 0x0F => panic!("Opcode 0x0F (POP CS) is considered undefined"), 0x10 => modrmb!(self, ADC_FromReg), 0x11 => modrmv!(self, ADC_FromReg), 0x12 => modrmb!(self, ADC_ToReg), 0x13 => modrmv!(self, ADC_ToReg), 0x14 => Mnemonic::ADC_ALIb(self.parse_byte()), 0x15 => Mnemonic::ADC_AXIv(self.parse_word()), 0x16 => Mnemonic::PUSH_S(SegmentRegister::SS), 0x17 => Mnemonic::POP_S(SegmentRegister::SS), 0x18 => modrmb!(self, SBB_FromReg), 0x19 => modrmv!(self, SBB_FromReg), 0x1A => modrmb!(self, SBB_ToReg), 0x1B => modrmv!(self, SBB_ToReg), 0x1C => Mnemonic::SBB_ALIb(self.parse_byte()), 0x1D => Mnemonic::SBB_AXIv(self.parse_word()), 0x1E => Mnemonic::PUSH_S(SegmentRegister::DS), 0x1F => Mnemonic::POP_S(SegmentRegister::DS), 0x20 => modrmb!(self, AND_FromReg), 0x21 => modrmv!(self, AND_FromReg), 0x22 => modrmb!(self, AND_ToReg), 0x23 => modrmv!(self, AND_ToReg), 0x24 => Mnemonic::AND_ALIb(self.parse_byte()), 0x25 => Mnemonic::AND_AXIv(self.parse_word()), 0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES), 0x27 => Mnemonic::DAA, 0x28 => modrmb!(self, SUB_FromReg), 0x29 => modrmv!(self, SUB_FromReg), 0x2A => modrmb!(self, SUB_ToReg), 0x2B => modrmv!(self, SUB_ToReg), 0x2C => Mnemonic::SUB_ALIb(self.parse_byte()), 0x2D => Mnemonic::SUB_AXIv(self.parse_word()), 0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS), 0x2F => Mnemonic::DAS, 0x30 => modrmb!(self, XOR_FromReg), 0x31 => modrmv!(self, XOR_FromReg), 0x32 => modrmb!(self, XOR_ToReg), 0x33 => modrmv!(self, XOR_ToReg), 0x34 => Mnemonic::XOR_ALIb(self.parse_byte()), 0x35 => Mnemonic::XOR_AXIv(self.parse_word()), 0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS), 0x37 => Mnemonic::AAA, 0x38 => modrmb!(self, CMP_FromReg), 0x39 => modrmv!(self, CMP_FromReg), 0x3A => modrmb!(self, CMP_ToReg), 0x3B => modrmv!(self, CMP_ToReg), 0x3C => Mnemonic::CMP_ALIb(self.parse_byte()), 0x3D => Mnemonic::CMP_AXIv(self.parse_word()), 0x3E => Mnemonic::OVERRIDE(SegmentRegister::DS), 0x3F => Mnemonic::AAS, 0x40 => Mnemonic::INC(Register::AX), 0x41 => Mnemonic::INC(Register::CX), 0x42 => Mnemonic::INC(Register::DX), 0x43 => Mnemonic::INC(Register::BX), 0x44 => Mnemonic::INC(Register::SP), 0x45 => Mnemonic::INC(Register::BP), 0x46 => Mnemonic::INC(Register::SI), 0x47 => Mnemonic::INC(Register::DI), 0x48 => Mnemonic::DEC(Register::AX), 0x49 => Mnemonic::DEC(Register::CX), 0x4A => Mnemonic::DEC(Register::DX), 0x4B => Mnemonic::DEC(Register::BX), 0x4C => Mnemonic::DEC(Register::SP), 0x4D => Mnemonic::DEC(Register::BP), 0x4E => Mnemonic::DEC(Register::SI), 0x4F => Mnemonic::DEC(Register::DI), 0x50 => Mnemonic::PUSH_R(Register::AX), 0x51 => Mnemonic::PUSH_R(Register::CX), 0x52 => Mnemonic::PUSH_R(Register::DX), 0x53 => Mnemonic::PUSH_R(Register::BX), 0x54 => Mnemonic::PUSH_R(Register::SP), 0x55 => Mnemonic::PUSH_R(Register::BP), 0x56 => Mnemonic::PUSH_R(Register::SI), 0x57 => Mnemonic::PUSH_R(Register::DI), 0x58 => Mnemonic::POP_R(Register::AX), 0x59 => Mnemonic::POP_R(Register::CX), 0x5A => Mnemonic::POP_R(Register::DX), 0x5B => Mnemonic::POP_R(Register::BX), 0x5C => Mnemonic::POP_R(Register::SP), 0x5D => Mnemonic::POP_R(Register::BP), 0x5E => Mnemonic::POP_R(Register::SI), 0x5F => Mnemonic::POP_R(Register::DI), 0x60..=0x6F => panic!("0x06 to 0x06F is considered undefined."), 0x70 => Mnemonic::JO(self.parse_byte()), 0x71 => Mnemonic::JNO(self.parse_byte()), 0x72 => Mnemonic::JB(self.parse_byte()), 0x73 => Mnemonic::JNB(self.parse_byte()), 0x74 => Mnemonic::JZ(self.parse_byte()), 0x75 => Mnemonic::JNZ(self.parse_byte()), 0x76 => Mnemonic::JBE(self.parse_byte()), 0x77 => Mnemonic::JA(self.parse_byte()), 0x78 => Mnemonic::JS(self.parse_byte()), 0x79 => Mnemonic::JNS(self.parse_byte()), 0x7A => Mnemonic::JPE(self.parse_byte()), 0x7B => Mnemonic::JPO(self.parse_byte()), 0x7C => Mnemonic::JL(self.parse_byte()), 0x7D => Mnemonic::JGE(self.parse_byte()), 0x7E => Mnemonic::JLE(self.parse_byte()), 0x7F => Mnemonic::JG(self.parse_byte()), 0x80 => modrmgprb!(self), 0x81 => modrmgprv!(self), 0x82 => modrmgprb!(self), // same as 0x80 0x83 => panic!("Sign extented GPR1 not yet implemented"), 0x84 => modrmb!(self, TEST), 0x85 => modrmv!(self, TEST), 0x86 => modrmb!(self, XHCG), 0x87 => modrmv!(self, XHCG), 0x88 => modrmb!(self, MOV_FromReg), 0x89 => modrmv!(self, MOV_FromReg), 0x8A => modrmb!(self, MOV_ToReg), 0x8B => modrmv!(self, MOV_ToReg), 0x8C => modrms!(self, MOV_FromSReg), 0x8E => modrms!(self, MOV_ToSReg), 0x8D => modrmv!(self, LEA), 0x8F => { let target = self.parse_modrm_byte(Operand::Word(0)).0; let mem = match target { ModRmTarget::Memory(idx) => idx, _ => panic!("POP_M instruction given a register to pop into"), }; Mnemonic::POP_M(mem) } 0x90 => Mnemonic::NOP(), 0x91 => Mnemonic::XCHG_AX(Register::CX), 0x92 => Mnemonic::XCHG_AX(Register::DX), 0x93 => Mnemonic::XCHG_AX(Register::BX), 0x94 => Mnemonic::XCHG_AX(Register::SP), 0x95 => Mnemonic::XCHG_AX(Register::BP), 0x96 => Mnemonic::XCHG_AX(Register::SI), 0x97 => Mnemonic::XCHG_AX(Register::DI), 0x98 => Mnemonic::CBW, 0x99 => Mnemonic::CWD, 0x9A => Mnemonic::CALL(Pointer { segment: self.parse_word(), offset: self.parse_word(), }), 0x9B => Mnemonic::WAIT, 0x9C => Mnemonic::PUSHF, 0x9D => Mnemonic::POPF, 0x9E => Mnemonic::SAHF, 0x9F => Mnemonic::LAHF, 0xCD => Mnemonic::INT(self.parse_byte()), 0xBB => Mnemonic::MOV_BXIv(self.parse_word()), _ => { eprintln!("Encountered unknown instruction '0x{:x}'", opcode); eprintln!("Offset might be misaligned and data is being interpreted."); eprintln!("Existing to avoid further misinterpretation..."); exit(1); } }; println!("{}", self.instruction); instructions.push(self.instruction.clone()); self.instruction = Instruction::new(); self.offset += 1; } Ok(instructions) } }