From a5cffa485220550a8c81ff90376acd412453b03e Mon Sep 17 00:00:00 2001 From: Marco Thomas Date: Tue, 1 Jul 2025 12:04:20 +0900 Subject: [PATCH] fix(interpreter): impl fetch and decode I parsed all instructions before executing, but this is not how intel works. We need to decode the instructions, pointed to by IP, on the fly. --- src/aout.rs | 22 +- src/disasm.rs | 839 ++++++++++++++++----------------- src/instructions.rs | 10 +- src/interpreter/interpreter.rs | 168 +++---- src/main.rs | 11 +- 5 files changed, 511 insertions(+), 539 deletions(-) diff --git a/src/aout.rs b/src/aout.rs index 9642564..3c6789f 100644 --- a/src/aout.rs +++ b/src/aout.rs @@ -1,9 +1,13 @@ //! Internal a.out File abstraction. use core::fmt; -use std::ffi::{c_uchar, c_ushort}; +use std::{ + ffi::{c_uchar, c_ushort}, + fs::File, + io::Read, +}; -use crate::operands::Byte; +use crate::{Args, disasm::DisasmError, operands::Byte}; #[allow(non_camel_case_types)] pub type c_long = i32; // we use a a.out with 32 byte @@ -25,6 +29,20 @@ impl fmt::Display for Aout { } impl Aout { + pub fn new_from_args(args: &Args) -> Self { + let path = args + .path + .clone() + .ok_or(DisasmError::NoFile(args.path.clone())) + .unwrap(); + let mut file = File::open(path).unwrap(); + let mut buf = Vec::new(); + file.read_to_end(&mut buf).unwrap(); + let aout = Aout::new(buf); + log::debug!("{:?}", aout); + aout + } + pub fn new(buf: Vec) -> Self { let hdr = Header { magic: [buf[0], buf[1]], diff --git a/src/disasm.rs b/src/disasm.rs index d716ec5..3742bca 100644 --- a/src/disasm.rs +++ b/src/disasm.rs @@ -12,7 +12,6 @@ use crate::{ }; use crate::{modrm_8b_register, modrm_16b_register, modrm_sregister}; use core::fmt; -use std::{fs::File, io::Read}; #[derive(Debug, Clone)] /// Select, wheter 8, or 16-bit Registers should be selected. @@ -84,24 +83,15 @@ impl fmt::Display for DisasmError { #[derive(Debug, Clone)] pub struct Disassembler { - offset: usize, // the current offset in the disasm process + pub offset: usize, // the current offset in the disasm process pub aout: Aout, // the aout binary - instruction: Instruction, // the instruction, which is currently being parsed + pub instruction: Instruction, // the instruction, which is currently being parsed instructions: Vec, // all parsed instructions } impl Disassembler { pub fn new(args: &Args) -> Self { - let path = args - .path - .clone() - .ok_or(DisasmError::NoFile(args.path.clone())) - .unwrap(); - let mut file = File::open(path).unwrap(); - let mut buf = Vec::new(); - file.read_to_end(&mut buf).unwrap(); - let aout = Aout::new(buf); - log::debug!("{:?}", aout); + let aout = Aout::new_from_args(args); Disassembler { offset: 0, @@ -472,7 +462,7 @@ impl Disassembler { fn remove_trailing_padding(&mut self) { let mut until = self.instructions.len(); for i in self.instructions.iter().rev() { - match i.opcode { + match i.mnemonic { // 0x00 0x00 in binary Mnemonic::ADD_FromReg( ModRmTarget::Memory(MemoryIndex { @@ -493,416 +483,9 @@ impl Disassembler { self.instructions.truncate(until); } - /// Decode instructions by matching byte signature to their mnemonics and - /// depending on the instruction, parsing some operands afterwards. - /// All parsing is done in capsulated functions, here everything just - /// gets consolodated. fn decode_instructions(&mut self) -> Result<(), DisasmError> { - log::debug!("Starting to decode text of length {}", self.aout.text.len()); while self.offset < self.aout.text.len() { - // reset mutable current instruction - self.instruction = Instruction::new(); - self.instruction.addr = self.offset; - - // fetch next opcode - let opcode = self.aout.text[self.offset]; - - // additional raw bytes will be pushed by parse functions - self.instruction.raw.push(opcode); - - log::debug!("Parsing next opcode with opcode: {opcode:#04x}"); - self.instruction.opcode = match opcode { - 0x00 => modrm_8b_register!(self, ADD_FromReg), - 0x01 => modrm_16b_register!(self, ADD_FromReg), - 0x02 => modrm_8b_register!(self, ADD_ToReg), - 0x03 => modrm_16b_register!(self, ADD_ToReg), - 0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?), - 0x05 => Mnemonic::ADD_AXIv(self.parse_word()?), - - 0x06 => Mnemonic::PUSH_S(SegmentRegister::ES), - 0x07 => Mnemonic::POP_S(SegmentRegister::ES), - - 0x08 => modrm_8b_register!(self, OR_FromReg), - 0x09 => modrm_16b_register!(self, OR_FromReg), - 0x0A => modrm_8b_register!(self, OR_ToReg), - 0x0B => modrm_16b_register!(self, OR_ToReg), - 0x0C => Mnemonic::OR_ALIb(self.parse_byte()?), - 0x0D => Mnemonic::OR_AXIv(self.parse_word()?), - - 0x0E => Mnemonic::PUSH_S(SegmentRegister::CS), - - 0x0F => return Err(DisasmError::OpcodeUndefined(opcode)), - - 0x10 => modrm_8b_register!(self, ADC_FromReg), - 0x11 => modrm_16b_register!(self, ADC_FromReg), - 0x12 => modrm_8b_register!(self, ADC_ToReg), - 0x13 => modrm_16b_register!(self, ADC_ToReg), - 0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?), - 0x15 => Mnemonic::ADC_AXIv(self.parse_word()?), - - 0x16 => Mnemonic::PUSH_S(SegmentRegister::SS), - 0x17 => Mnemonic::POP_S(SegmentRegister::SS), - - 0x18 => modrm_8b_register!(self, SBB_FromReg), - 0x19 => modrm_16b_register!(self, SBB_FromReg), - 0x1A => modrm_8b_register!(self, SBB_ToReg), - 0x1B => modrm_16b_register!(self, SBB_ToReg), - 0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?), - 0x1D => Mnemonic::SBB_AXIv(self.parse_word()?), - - 0x1E => Mnemonic::PUSH_S(SegmentRegister::DS), - 0x1F => Mnemonic::POP_S(SegmentRegister::DS), - - 0x20 => modrm_8b_register!(self, AND_FromReg), - 0x21 => modrm_16b_register!(self, AND_FromReg), - 0x22 => modrm_8b_register!(self, AND_ToReg), - 0x23 => modrm_16b_register!(self, AND_ToReg), - 0x24 => Mnemonic::AND_ALIb(self.parse_byte()?), - 0x25 => Mnemonic::AND_AXIv(self.parse_word()?), - - 0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES), - 0x27 => Mnemonic::DAA, - - 0x28 => modrm_8b_register!(self, SUB_FromReg), - 0x29 => modrm_16b_register!(self, SUB_FromReg), - 0x2A => modrm_8b_register!(self, SUB_ToReg), - 0x2B => modrm_16b_register!(self, SUB_ToReg), - 0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?), - 0x2D => Mnemonic::SUB_AXIv(self.parse_word()?), - - 0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS), - 0x2F => Mnemonic::DAS, - - 0x30 => modrm_8b_register!(self, XOR_FromReg), - 0x31 => modrm_16b_register!(self, XOR_FromReg), - 0x32 => modrm_8b_register!(self, XOR_ToReg), - 0x33 => modrm_16b_register!(self, XOR_ToReg), - 0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?), - 0x35 => Mnemonic::XOR_AXIv(self.parse_word()?), - - 0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS), - 0x37 => Mnemonic::AAA, - - 0x38 => modrm_8b_register!(self, CMP_FromReg), - 0x39 => modrm_16b_register!(self, CMP_FromReg), - 0x3A => modrm_8b_register!(self, CMP_ToReg), - 0x3B => modrm_16b_register!(self, CMP_ToReg), - 0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?), - 0x3D => Mnemonic::CMP_AXIv(self.parse_word()?), - - 0x3E => Mnemonic::OVERRIDE(SegmentRegister::DS), - 0x3F => Mnemonic::AAS, - - 0x40 => Mnemonic::INC_Reg(Register::AX), - 0x41 => Mnemonic::INC_Reg(Register::CX), - 0x42 => Mnemonic::INC_Reg(Register::DX), - 0x43 => Mnemonic::INC_Reg(Register::BX), - 0x44 => Mnemonic::INC_Reg(Register::SP), - 0x45 => Mnemonic::INC_Reg(Register::BP), - 0x46 => Mnemonic::INC_Reg(Register::SI), - 0x47 => Mnemonic::INC_Reg(Register::DI), - - 0x48 => Mnemonic::DEC_Reg(Register::AX), - 0x49 => Mnemonic::DEC_Reg(Register::CX), - 0x4A => Mnemonic::DEC_Reg(Register::DX), - 0x4B => Mnemonic::DEC_Reg(Register::BX), - 0x4C => Mnemonic::DEC_Reg(Register::SP), - 0x4D => Mnemonic::DEC_Reg(Register::BP), - 0x4E => Mnemonic::DEC_Reg(Register::SI), - 0x4F => Mnemonic::DEC_Reg(Register::DI), - - 0x50 => Mnemonic::PUSH_R(Register::AX), - 0x51 => Mnemonic::PUSH_R(Register::CX), - 0x52 => Mnemonic::PUSH_R(Register::DX), - 0x53 => Mnemonic::PUSH_R(Register::BX), - 0x54 => Mnemonic::PUSH_R(Register::SP), - 0x55 => Mnemonic::PUSH_R(Register::BP), - 0x56 => Mnemonic::PUSH_R(Register::SI), - 0x57 => Mnemonic::PUSH_R(Register::DI), - - 0x58 => Mnemonic::POP_R(Register::AX), - 0x59 => Mnemonic::POP_R(Register::CX), - 0x5A => Mnemonic::POP_R(Register::DX), - 0x5B => Mnemonic::POP_R(Register::BX), - 0x5C => Mnemonic::POP_R(Register::SP), - 0x5D => Mnemonic::POP_R(Register::BP), - 0x5E => Mnemonic::POP_R(Register::SI), - 0x5F => Mnemonic::POP_R(Register::DI), - - 0x60..=0x6F => return Err(DisasmError::OpcodeUndefined(opcode)), - - 0x70 => Mnemonic::JO(self.parse_j_byte()?), - 0x71 => Mnemonic::JNO(self.parse_j_byte()?), - 0x72 => Mnemonic::JB(self.parse_j_byte()?), - 0x73 => Mnemonic::JNB(self.parse_j_byte()?), - 0x74 => Mnemonic::JZ(self.parse_j_byte()?), - 0x75 => Mnemonic::JNZ(self.parse_j_byte()?), - 0x76 => Mnemonic::JBE(self.parse_j_byte()?), - 0x77 => Mnemonic::JA(self.parse_j_byte()?), - 0x78 => Mnemonic::JS(self.parse_j_byte()?), - 0x79 => Mnemonic::JNS(self.parse_j_byte()?), - 0x7A => Mnemonic::JPE(self.parse_j_byte()?), - 0x7B => Mnemonic::JPO(self.parse_j_byte()?), - 0x7C => Mnemonic::JL(self.parse_j_byte()?), - 0x7D => Mnemonic::JGE(self.parse_j_byte()?), - 0x7E => Mnemonic::JLE(self.parse_j_byte()?), - 0x7F => Mnemonic::JG(self.parse_j_byte()?), - - // Group 1 - 0x80 => { - let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; - let imm = self.parse_byte()?; - Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Byte(imm))? - } - 0x81 => { - let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; - let imm = self.parse_word()?; - Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Word(imm))? - } - 0x82 => { - // same as 0x80 - let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; - let imm = self.parse_byte()?; - Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Byte(imm))? - } - 0x83 => { - // byte extended version - let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; - let imm = self.parse_byte()?; - Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Byte(imm))? - } - - 0x84 => modrm_8b_register!(self, TEST), - 0x85 => modrm_16b_register!(self, TEST), - - 0x86 => modrm_8b_register!(self, XCHG), - 0x87 => modrm_16b_register!(self, XCHG), - - 0x88 => modrm_8b_register!(self, MOV_FromReg), - 0x89 => modrm_16b_register!(self, MOV_FromReg), - 0x8A => modrm_8b_register!(self, MOV_ToReg), - 0x8B => modrm_16b_register!(self, MOV_ToReg), - 0x8C => modrm_sregister!(self, MOV_FromSReg), - 0x8E => modrm_sregister!(self, MOV_ToSReg), - - 0x8D => modrm_16b_register!(self, LEA), - - 0x8F => { - let (target, _) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; - let mem = match target { - ModRmTarget::Memory(idx) => idx, - _ => { - return Err(DisasmError::IllegalOperand( - "POP (memory) instruction given a register to pop into".into(), - )); - } - }; - Mnemonic::POP_M(mem) - } - 0x90 => Mnemonic::NOP(), - - 0x91 => Mnemonic::XCHG_AX(Register::CX), - 0x92 => Mnemonic::XCHG_AX(Register::DX), - 0x93 => Mnemonic::XCHG_AX(Register::BX), - 0x94 => Mnemonic::XCHG_AX(Register::SP), - 0x95 => Mnemonic::XCHG_AX(Register::BP), - 0x96 => Mnemonic::XCHG_AX(Register::SI), - 0x97 => Mnemonic::XCHG_AX(Register::DI), - - 0x98 => Mnemonic::CBW, - 0x99 => Mnemonic::CWD, - - 0x9A => Mnemonic::CALL_p(self.parse_ptr()?), - - 0x9B => Mnemonic::WAIT, - - 0x9C => Mnemonic::PUSHF, - 0x9D => Mnemonic::POPF, - 0x9E => Mnemonic::SAHF, - 0x9F => Mnemonic::LAHF, - - 0xA0 => Mnemonic::MOV_AL0b(self.parse_byte()?), - 0xA1 => Mnemonic::MOV_AX0v(self.parse_word()?), - 0xA2 => Mnemonic::MOV_0bAL(self.parse_byte()?), - 0xA3 => Mnemonic::MOV_0vAX(self.parse_word()?), - 0xA4 => Mnemonic::MOVSB, - 0xA5 => Mnemonic::MOVSW, - - 0xA6 => Mnemonic::CMPSB, - 0xA7 => Mnemonic::CMPSW, - - 0xA8 => Mnemonic::TEST_ALIb(self.parse_byte()?), - 0xA9 => Mnemonic::TEST_AXIv(self.parse_word()?), - - 0xAA => Mnemonic::STOSB, - 0xAB => Mnemonic::STOSW, - 0xAC => Mnemonic::LODSB, - 0xAD => Mnemonic::LODSW, - 0xAE => Mnemonic::SCASB, - 0xAF => Mnemonic::SCASW, - - 0xB0 => Mnemonic::MOV_ALIb(self.parse_byte()?), - 0xB1 => Mnemonic::MOV_CLIb(self.parse_byte()?), - 0xB2 => Mnemonic::MOV_DLIb(self.parse_byte()?), - 0xB3 => Mnemonic::MOV_BLIb(self.parse_byte()?), - 0xB4 => Mnemonic::MOV_AHIb(self.parse_byte()?), - 0xB5 => Mnemonic::MOV_CHIb(self.parse_byte()?), - 0xB6 => Mnemonic::MOV_DHIb(self.parse_byte()?), - 0xB7 => Mnemonic::MOV_BHIb(self.parse_byte()?), - 0xB8 => Mnemonic::MOV_AXIv(self.parse_word()?), - 0xB9 => Mnemonic::MOV_CXIv(self.parse_word()?), - 0xBA => Mnemonic::MOV_DXIv(self.parse_word()?), - 0xBB => Mnemonic::MOV_BXIv(self.parse_word()?), - 0xBC => Mnemonic::MOV_SPIv(self.parse_word()?), - 0xBD => Mnemonic::MOV_BPIv(self.parse_word()?), - 0xBE => Mnemonic::MOV_SIIv(self.parse_word()?), - 0xBF => Mnemonic::MOV_DIIv(self.parse_word()?), - - 0xC0..=0xC1 => return Err(DisasmError::OpcodeUndefined(opcode)), - - 0xC2 => Mnemonic::RET_Iw(self.parse_word()?), - 0xC3 => Mnemonic::RET, - - 0xC4 => { - let (target, reg_id) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; - let reg = Register::by_id(ImmediateOperand::Word(reg_id as Word))?; - let ptr = Pointer16::try_from(target)?; - Mnemonic::LES(reg, ptr) - } - 0xC5 => { - let (target, reg_id) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; - let reg = Register::by_id(ImmediateOperand::Word(reg_id as Word))?; - let ptr = Pointer16::try_from(target)?; - Mnemonic::LDS(reg, ptr) - } - - 0xC6 => { - let (target, _) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; - Mnemonic::MOV_Ib(target, self.parse_byte()?) - } - 0xC7 => { - let (target, _) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; - Mnemonic::MOV_Iv(target, self.parse_word()?) - } - - 0xC8..=0xC9 => return Err(DisasmError::OpcodeUndefined(opcode)), - - 0xCA => Mnemonic::RETF_Iw(self.parse_word()?), - 0xCB => Mnemonic::RETF, - - 0xCC => Mnemonic::INT(3), - 0xCD => Mnemonic::INT(self.parse_byte()?), - - 0xCE => Mnemonic::INTO, - 0xCF => Mnemonic::IRET, - - // Group 2 - 0xD0 => { - let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; - Self::modrm_reg_to_grp2_1(reg, target)? - } - 0xD1 => { - let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; - Self::modrm_reg_to_grp2_1(reg, target)? - } - 0xD2 => { - let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; - Self::modrm_reg_to_grp2_cl(reg, target)? - } - 0xD3 => { - let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; - Self::modrm_reg_to_grp2_cl(reg, target)? - } - - 0xD4 => Mnemonic::AAM(self.parse_byte()?), - 0xD5 => Mnemonic::AAD(self.parse_byte()?), - - 0xD6 => return Err(DisasmError::OpcodeUndefined(opcode)), - - 0xD7 => Mnemonic::XLAT, - - 0xD8..=0xDF => return Err(DisasmError::OpcodeUndefined(opcode)), - - 0xE0 => Mnemonic::LOOPNZ(self.parse_j_byte()?), - 0xE1 => Mnemonic::LOOPZ(self.parse_j_byte()?), - 0xE2 => Mnemonic::LOOP(self.parse_j_byte()?), - 0xE3 => Mnemonic::JCXZ(self.parse_j_byte()?), - - 0xE4 => Mnemonic::IN_AL(self.parse_byte()?), - 0xE5 => Mnemonic::IN_AX(self.parse_byte()?), - 0xE6 => Mnemonic::OUT_AL(self.parse_byte()?), - 0xE7 => Mnemonic::OUT_AX(self.parse_byte()?), - - 0xE8 => Mnemonic::CALL_v(self.parse_j_word()?), - - 0xE9 => Mnemonic::JMP_v(self.parse_j_word()?), - 0xEA => Mnemonic::JMP_p(self.parse_ptr()?), - 0xEB => Mnemonic::JMP_b(self.parse_j_byte()?), - - 0xEC => Mnemonic::IN_ALDX, - 0xED => Mnemonic::IN_AXDX, - - 0xEE => Mnemonic::OUT_ALDX, - 0xEF => Mnemonic::OUT_AXDX, - - 0xF0 => Mnemonic::LOCK, - 0xF1 => return Err(DisasmError::OpcodeUndefined(opcode)), - - 0xF2 => Mnemonic::REPNZ, - 0xF3 => Mnemonic::REPZ, - - 0xF4 => Mnemonic::HLT, - - 0xF5 => Mnemonic::CMC, - - // Group 3a - 0xF6 => { - let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; - self.modrm_reg_to_grp3(reg, target, InstructionWidth::Byte)? - } - // Group 3b - 0xF7 => { - let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; - self.modrm_reg_to_grp3(reg, target, InstructionWidth::Word)? - } - - 0xF8 => Mnemonic::CLC, - 0xF9 => Mnemonic::STC, - 0xFA => Mnemonic::CLI, - 0xFB => Mnemonic::STI, - 0xFC => Mnemonic::CLD, - 0xFD => Mnemonic::STD, - - // Group 4 - 0xFE => { - let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; - match reg { - 0b0 => Mnemonic::INC_Mod(target), - 0b1 => Mnemonic::DEC_Mod(target), - _ => return Err(DisasmError::IllegalGroupMnemonic(4, reg)), - } - } - - // Group 5 - 0xFF => { - let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; - match reg { - 0b000 => Mnemonic::INC_Mod(target), - 0b001 => Mnemonic::DEC_Mod(target), - 0b010 => Mnemonic::CALL_Mod(target), - 0b011 => Mnemonic::CALL_Mp(Pointer16::try_from(target)?), - 0b100 => Mnemonic::JMP_Mod(target), - 0b101 => Mnemonic::JMP_Mp(Pointer16::try_from(target)?), - 0b110 => Mnemonic::PUSH_Mod(target), - // 0b111 => unused - _ => return Err(DisasmError::IllegalGroupMnemonic(5, reg)), - } - } - }; - - // Save parsed instruction - log::debug!("{}", self.instruction); - self.instructions.push(self.instruction.clone()); + self.decode_instruction()?; // Advance offset to hover the next potential opcode self.offset += 1; @@ -910,6 +493,418 @@ impl Disassembler { Ok(()) } + + /// Decode an instruction by matching byte signature to their mnemonics and + /// depending on the instruction, parsing some operands afterwards. + /// All parsing is done in capsulated functions, here everything just + /// gets consolodated. + pub fn decode_instruction(&mut self) -> Result<(), DisasmError> { + // reset mutable current instruction + self.instruction = Instruction::new(); + self.instruction.addr = self.offset; + + // fetch next opcode + let opcode = self.aout.text[self.offset]; + log::debug!("Parsing next opcode with opcode: {opcode:#04x}"); + + // additional raw bytes will be pushed by parse functions + self.instruction.raw.push(opcode); + + self.instruction.mnemonic = match opcode { + 0x00 => modrm_8b_register!(self, ADD_FromReg), + 0x01 => modrm_16b_register!(self, ADD_FromReg), + 0x02 => modrm_8b_register!(self, ADD_ToReg), + 0x03 => modrm_16b_register!(self, ADD_ToReg), + 0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?), + 0x05 => Mnemonic::ADD_AXIv(self.parse_word()?), + + 0x06 => Mnemonic::PUSH_S(SegmentRegister::ES), + 0x07 => Mnemonic::POP_S(SegmentRegister::ES), + + 0x08 => modrm_8b_register!(self, OR_FromReg), + 0x09 => modrm_16b_register!(self, OR_FromReg), + 0x0A => modrm_8b_register!(self, OR_ToReg), + 0x0B => modrm_16b_register!(self, OR_ToReg), + 0x0C => Mnemonic::OR_ALIb(self.parse_byte()?), + 0x0D => Mnemonic::OR_AXIv(self.parse_word()?), + + 0x0E => Mnemonic::PUSH_S(SegmentRegister::CS), + + 0x0F => return Err(DisasmError::OpcodeUndefined(opcode)), + + 0x10 => modrm_8b_register!(self, ADC_FromReg), + 0x11 => modrm_16b_register!(self, ADC_FromReg), + 0x12 => modrm_8b_register!(self, ADC_ToReg), + 0x13 => modrm_16b_register!(self, ADC_ToReg), + 0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?), + 0x15 => Mnemonic::ADC_AXIv(self.parse_word()?), + + 0x16 => Mnemonic::PUSH_S(SegmentRegister::SS), + 0x17 => Mnemonic::POP_S(SegmentRegister::SS), + + 0x18 => modrm_8b_register!(self, SBB_FromReg), + 0x19 => modrm_16b_register!(self, SBB_FromReg), + 0x1A => modrm_8b_register!(self, SBB_ToReg), + 0x1B => modrm_16b_register!(self, SBB_ToReg), + 0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?), + 0x1D => Mnemonic::SBB_AXIv(self.parse_word()?), + + 0x1E => Mnemonic::PUSH_S(SegmentRegister::DS), + 0x1F => Mnemonic::POP_S(SegmentRegister::DS), + + 0x20 => modrm_8b_register!(self, AND_FromReg), + 0x21 => modrm_16b_register!(self, AND_FromReg), + 0x22 => modrm_8b_register!(self, AND_ToReg), + 0x23 => modrm_16b_register!(self, AND_ToReg), + 0x24 => Mnemonic::AND_ALIb(self.parse_byte()?), + 0x25 => Mnemonic::AND_AXIv(self.parse_word()?), + + 0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES), + 0x27 => Mnemonic::DAA, + + 0x28 => modrm_8b_register!(self, SUB_FromReg), + 0x29 => modrm_16b_register!(self, SUB_FromReg), + 0x2A => modrm_8b_register!(self, SUB_ToReg), + 0x2B => modrm_16b_register!(self, SUB_ToReg), + 0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?), + 0x2D => Mnemonic::SUB_AXIv(self.parse_word()?), + + 0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS), + 0x2F => Mnemonic::DAS, + + 0x30 => modrm_8b_register!(self, XOR_FromReg), + 0x31 => modrm_16b_register!(self, XOR_FromReg), + 0x32 => modrm_8b_register!(self, XOR_ToReg), + 0x33 => modrm_16b_register!(self, XOR_ToReg), + 0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?), + 0x35 => Mnemonic::XOR_AXIv(self.parse_word()?), + + 0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS), + 0x37 => Mnemonic::AAA, + + 0x38 => modrm_8b_register!(self, CMP_FromReg), + 0x39 => modrm_16b_register!(self, CMP_FromReg), + 0x3A => modrm_8b_register!(self, CMP_ToReg), + 0x3B => modrm_16b_register!(self, CMP_ToReg), + 0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?), + 0x3D => Mnemonic::CMP_AXIv(self.parse_word()?), + + 0x3E => Mnemonic::OVERRIDE(SegmentRegister::DS), + 0x3F => Mnemonic::AAS, + + 0x40 => Mnemonic::INC_Reg(Register::AX), + 0x41 => Mnemonic::INC_Reg(Register::CX), + 0x42 => Mnemonic::INC_Reg(Register::DX), + 0x43 => Mnemonic::INC_Reg(Register::BX), + 0x44 => Mnemonic::INC_Reg(Register::SP), + 0x45 => Mnemonic::INC_Reg(Register::BP), + 0x46 => Mnemonic::INC_Reg(Register::SI), + 0x47 => Mnemonic::INC_Reg(Register::DI), + + 0x48 => Mnemonic::DEC_Reg(Register::AX), + 0x49 => Mnemonic::DEC_Reg(Register::CX), + 0x4A => Mnemonic::DEC_Reg(Register::DX), + 0x4B => Mnemonic::DEC_Reg(Register::BX), + 0x4C => Mnemonic::DEC_Reg(Register::SP), + 0x4D => Mnemonic::DEC_Reg(Register::BP), + 0x4E => Mnemonic::DEC_Reg(Register::SI), + 0x4F => Mnemonic::DEC_Reg(Register::DI), + + 0x50 => Mnemonic::PUSH_R(Register::AX), + 0x51 => Mnemonic::PUSH_R(Register::CX), + 0x52 => Mnemonic::PUSH_R(Register::DX), + 0x53 => Mnemonic::PUSH_R(Register::BX), + 0x54 => Mnemonic::PUSH_R(Register::SP), + 0x55 => Mnemonic::PUSH_R(Register::BP), + 0x56 => Mnemonic::PUSH_R(Register::SI), + 0x57 => Mnemonic::PUSH_R(Register::DI), + + 0x58 => Mnemonic::POP_R(Register::AX), + 0x59 => Mnemonic::POP_R(Register::CX), + 0x5A => Mnemonic::POP_R(Register::DX), + 0x5B => Mnemonic::POP_R(Register::BX), + 0x5C => Mnemonic::POP_R(Register::SP), + 0x5D => Mnemonic::POP_R(Register::BP), + 0x5E => Mnemonic::POP_R(Register::SI), + 0x5F => Mnemonic::POP_R(Register::DI), + + 0x60..=0x6F => return Err(DisasmError::OpcodeUndefined(opcode)), + + 0x70 => Mnemonic::JO(self.parse_j_byte()?), + 0x71 => Mnemonic::JNO(self.parse_j_byte()?), + 0x72 => Mnemonic::JB(self.parse_j_byte()?), + 0x73 => Mnemonic::JNB(self.parse_j_byte()?), + 0x74 => Mnemonic::JZ(self.parse_j_byte()?), + 0x75 => Mnemonic::JNZ(self.parse_j_byte()?), + 0x76 => Mnemonic::JBE(self.parse_j_byte()?), + 0x77 => Mnemonic::JA(self.parse_j_byte()?), + 0x78 => Mnemonic::JS(self.parse_j_byte()?), + 0x79 => Mnemonic::JNS(self.parse_j_byte()?), + 0x7A => Mnemonic::JPE(self.parse_j_byte()?), + 0x7B => Mnemonic::JPO(self.parse_j_byte()?), + 0x7C => Mnemonic::JL(self.parse_j_byte()?), + 0x7D => Mnemonic::JGE(self.parse_j_byte()?), + 0x7E => Mnemonic::JLE(self.parse_j_byte()?), + 0x7F => Mnemonic::JG(self.parse_j_byte()?), + + // Group 1 + 0x80 => { + let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; + let imm = self.parse_byte()?; + Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Byte(imm))? + } + 0x81 => { + let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; + let imm = self.parse_word()?; + Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Word(imm))? + } + 0x82 => { + // same as 0x80 + let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; + let imm = self.parse_byte()?; + Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Byte(imm))? + } + 0x83 => { + // byte extended version + let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; + let imm = self.parse_byte()?; + Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Byte(imm))? + } + + 0x84 => modrm_8b_register!(self, TEST), + 0x85 => modrm_16b_register!(self, TEST), + + 0x86 => modrm_8b_register!(self, XCHG), + 0x87 => modrm_16b_register!(self, XCHG), + + 0x88 => modrm_8b_register!(self, MOV_FromReg), + 0x89 => modrm_16b_register!(self, MOV_FromReg), + 0x8A => modrm_8b_register!(self, MOV_ToReg), + 0x8B => modrm_16b_register!(self, MOV_ToReg), + 0x8C => modrm_sregister!(self, MOV_FromSReg), + 0x8E => modrm_sregister!(self, MOV_ToSReg), + + 0x8D => modrm_16b_register!(self, LEA), + + 0x8F => { + let (target, _) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; + let mem = match target { + ModRmTarget::Memory(idx) => idx, + _ => { + return Err(DisasmError::IllegalOperand( + "POP (memory) instruction given a register to pop into".into(), + )); + } + }; + Mnemonic::POP_M(mem) + } + 0x90 => Mnemonic::NOP(), + + 0x91 => Mnemonic::XCHG_AX(Register::CX), + 0x92 => Mnemonic::XCHG_AX(Register::DX), + 0x93 => Mnemonic::XCHG_AX(Register::BX), + 0x94 => Mnemonic::XCHG_AX(Register::SP), + 0x95 => Mnemonic::XCHG_AX(Register::BP), + 0x96 => Mnemonic::XCHG_AX(Register::SI), + 0x97 => Mnemonic::XCHG_AX(Register::DI), + + 0x98 => Mnemonic::CBW, + 0x99 => Mnemonic::CWD, + + 0x9A => Mnemonic::CALL_p(self.parse_ptr()?), + + 0x9B => Mnemonic::WAIT, + + 0x9C => Mnemonic::PUSHF, + 0x9D => Mnemonic::POPF, + 0x9E => Mnemonic::SAHF, + 0x9F => Mnemonic::LAHF, + + 0xA0 => Mnemonic::MOV_AL0b(self.parse_byte()?), + 0xA1 => Mnemonic::MOV_AX0v(self.parse_word()?), + 0xA2 => Mnemonic::MOV_0bAL(self.parse_byte()?), + 0xA3 => Mnemonic::MOV_0vAX(self.parse_word()?), + 0xA4 => Mnemonic::MOVSB, + 0xA5 => Mnemonic::MOVSW, + + 0xA6 => Mnemonic::CMPSB, + 0xA7 => Mnemonic::CMPSW, + + 0xA8 => Mnemonic::TEST_ALIb(self.parse_byte()?), + 0xA9 => Mnemonic::TEST_AXIv(self.parse_word()?), + + 0xAA => Mnemonic::STOSB, + 0xAB => Mnemonic::STOSW, + 0xAC => Mnemonic::LODSB, + 0xAD => Mnemonic::LODSW, + 0xAE => Mnemonic::SCASB, + 0xAF => Mnemonic::SCASW, + + 0xB0 => Mnemonic::MOV_ALIb(self.parse_byte()?), + 0xB1 => Mnemonic::MOV_CLIb(self.parse_byte()?), + 0xB2 => Mnemonic::MOV_DLIb(self.parse_byte()?), + 0xB3 => Mnemonic::MOV_BLIb(self.parse_byte()?), + 0xB4 => Mnemonic::MOV_AHIb(self.parse_byte()?), + 0xB5 => Mnemonic::MOV_CHIb(self.parse_byte()?), + 0xB6 => Mnemonic::MOV_DHIb(self.parse_byte()?), + 0xB7 => Mnemonic::MOV_BHIb(self.parse_byte()?), + 0xB8 => Mnemonic::MOV_AXIv(self.parse_word()?), + 0xB9 => Mnemonic::MOV_CXIv(self.parse_word()?), + 0xBA => Mnemonic::MOV_DXIv(self.parse_word()?), + 0xBB => Mnemonic::MOV_BXIv(self.parse_word()?), + 0xBC => Mnemonic::MOV_SPIv(self.parse_word()?), + 0xBD => Mnemonic::MOV_BPIv(self.parse_word()?), + 0xBE => Mnemonic::MOV_SIIv(self.parse_word()?), + 0xBF => Mnemonic::MOV_DIIv(self.parse_word()?), + + 0xC0..=0xC1 => return Err(DisasmError::OpcodeUndefined(opcode)), + + 0xC2 => Mnemonic::RET_Iw(self.parse_word()?), + 0xC3 => Mnemonic::RET, + + 0xC4 => { + let (target, reg_id) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; + let reg = Register::by_id(ImmediateOperand::Word(reg_id as Word))?; + let ptr = Pointer16::try_from(target)?; + Mnemonic::LES(reg, ptr) + } + 0xC5 => { + let (target, reg_id) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; + let reg = Register::by_id(ImmediateOperand::Word(reg_id as Word))?; + let ptr = Pointer16::try_from(target)?; + Mnemonic::LDS(reg, ptr) + } + + 0xC6 => { + let (target, _) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; + Mnemonic::MOV_Ib(target, self.parse_byte()?) + } + 0xC7 => { + let (target, _) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; + Mnemonic::MOV_Iv(target, self.parse_word()?) + } + + 0xC8..=0xC9 => return Err(DisasmError::OpcodeUndefined(opcode)), + + 0xCA => Mnemonic::RETF_Iw(self.parse_word()?), + 0xCB => Mnemonic::RETF, + + 0xCC => Mnemonic::INT(3), + 0xCD => Mnemonic::INT(self.parse_byte()?), + + 0xCE => Mnemonic::INTO, + 0xCF => Mnemonic::IRET, + + // Group 2 + 0xD0 => { + let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; + Self::modrm_reg_to_grp2_1(reg, target)? + } + 0xD1 => { + let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; + Self::modrm_reg_to_grp2_1(reg, target)? + } + 0xD2 => { + let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; + Self::modrm_reg_to_grp2_cl(reg, target)? + } + 0xD3 => { + let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; + Self::modrm_reg_to_grp2_cl(reg, target)? + } + + 0xD4 => Mnemonic::AAM(self.parse_byte()?), + 0xD5 => Mnemonic::AAD(self.parse_byte()?), + + 0xD6 => return Err(DisasmError::OpcodeUndefined(opcode)), + + 0xD7 => Mnemonic::XLAT, + + 0xD8..=0xDF => return Err(DisasmError::OpcodeUndefined(opcode)), + + 0xE0 => Mnemonic::LOOPNZ(self.parse_j_byte()?), + 0xE1 => Mnemonic::LOOPZ(self.parse_j_byte()?), + 0xE2 => Mnemonic::LOOP(self.parse_j_byte()?), + 0xE3 => Mnemonic::JCXZ(self.parse_j_byte()?), + + 0xE4 => Mnemonic::IN_AL(self.parse_byte()?), + 0xE5 => Mnemonic::IN_AX(self.parse_byte()?), + 0xE6 => Mnemonic::OUT_AL(self.parse_byte()?), + 0xE7 => Mnemonic::OUT_AX(self.parse_byte()?), + + 0xE8 => Mnemonic::CALL_v(self.parse_j_word()?), + + 0xE9 => Mnemonic::JMP_v(self.parse_j_word()?), + 0xEA => Mnemonic::JMP_p(self.parse_ptr()?), + 0xEB => Mnemonic::JMP_b(self.parse_j_byte()?), + + 0xEC => Mnemonic::IN_ALDX, + 0xED => Mnemonic::IN_AXDX, + + 0xEE => Mnemonic::OUT_ALDX, + 0xEF => Mnemonic::OUT_AXDX, + + 0xF0 => Mnemonic::LOCK, + 0xF1 => return Err(DisasmError::OpcodeUndefined(opcode)), + + 0xF2 => Mnemonic::REPNZ, + 0xF3 => Mnemonic::REPZ, + + 0xF4 => Mnemonic::HLT, + + 0xF5 => Mnemonic::CMC, + + // Group 3a + 0xF6 => { + let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; + self.modrm_reg_to_grp3(reg, target, InstructionWidth::Byte)? + } + // Group 3b + 0xF7 => { + let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; + self.modrm_reg_to_grp3(reg, target, InstructionWidth::Word)? + } + + 0xF8 => Mnemonic::CLC, + 0xF9 => Mnemonic::STC, + 0xFA => Mnemonic::CLI, + 0xFB => Mnemonic::STI, + 0xFC => Mnemonic::CLD, + 0xFD => Mnemonic::STD, + + // Group 4 + 0xFE => { + let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?; + match reg { + 0b0 => Mnemonic::INC_Mod(target), + 0b1 => Mnemonic::DEC_Mod(target), + _ => return Err(DisasmError::IllegalGroupMnemonic(4, reg)), + } + } + + // Group 5 + 0xFF => { + let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?; + match reg { + 0b000 => Mnemonic::INC_Mod(target), + 0b001 => Mnemonic::DEC_Mod(target), + 0b010 => Mnemonic::CALL_Mod(target), + 0b011 => Mnemonic::CALL_Mp(Pointer16::try_from(target)?), + 0b100 => Mnemonic::JMP_Mod(target), + 0b101 => Mnemonic::JMP_Mp(Pointer16::try_from(target)?), + 0b110 => Mnemonic::PUSH_Mod(target), + // 0b111 => unused + _ => return Err(DisasmError::IllegalGroupMnemonic(5, reg)), + } + } + }; + + // Save parsed instruction + log::debug!("Parsed {}", self.instruction); + self.instructions.push(self.instruction.clone()); + + Ok(()) + } } // #[cfg(test)] diff --git a/src/instructions.rs b/src/instructions.rs index 4626274..1034894 100644 --- a/src/instructions.rs +++ b/src/instructions.rs @@ -11,9 +11,9 @@ use core::fmt; /// contains the `Mnemonic` that will be executed, alongside its starting offset /// and the raw parsed bytes pub struct Instruction { - pub addr: usize, // location of the instruction start - pub raw: Vec, // raw value of instruction - pub opcode: Mnemonic, // actual instruction + pub addr: usize, // location of the instruction start + pub raw: Vec, // raw value of instruction + pub mnemonic: Mnemonic, // actual instruction } impl Instruction { @@ -21,7 +21,7 @@ impl Instruction { Instruction { addr: 0, raw: Vec::new(), - opcode: Mnemonic::NOP(), + mnemonic: Mnemonic::NOP(), } } } @@ -41,7 +41,7 @@ impl fmt::Display for Instruction { ) .unwrap(); - write!(f, "\t{}", self.opcode) + write!(f, "\t{}", self.mnemonic) } } diff --git a/src/interpreter/interpreter.rs b/src/interpreter/interpreter.rs index 3ae41b9..b3ddf33 100644 --- a/src/interpreter/interpreter.rs +++ b/src/interpreter/interpreter.rs @@ -2,11 +2,13 @@ use core::fmt; use std::{fmt::Debug, process::exit}; use crate::{ + Args, + aout::Aout, + disasm::Disassembler, instructions::{Instruction, Mnemonic}, interpreter::{ computer::{CarryUsage, RotationDirection}, interrupt::Mess1, - register::SegmentRegister, }, operands::{Byte, ImmediateOperand, ModRmTarget, Word}, }; @@ -16,12 +18,9 @@ use super::{ interrupt::InterruptMessage, }; -type InstructionPointer<'a> = std::slice::Iter<'a, Instruction>; - #[derive(Debug, Clone)] pub enum InterpreterError { InvalidSyscall(Byte), - InstructionNotFound(Word), MemoryOutOfBound(Word), } @@ -31,9 +30,6 @@ impl fmt::Display for InterpreterError { InterpreterError::InvalidSyscall(id) => { write!(f, "The syscall with ID {} is unknown", id) } - InterpreterError::InstructionNotFound(addr) => { - write!(f, "IP({addr}) points at invalid instruction") - } InterpreterError::MemoryOutOfBound(addr) => { write!( f, @@ -47,30 +43,47 @@ impl fmt::Display for InterpreterError { #[derive(Debug, Clone)] pub struct Interpreter { computer: Computer, - instructions: Vec, + text: Vec, + ip: usize, + disassembler: Disassembler, } impl Interpreter { - pub fn new(instructions: Vec, data: Vec) -> Self { + pub fn new(args: &Args) -> Self { + let aout = Aout::new_from_args(args); Self { - computer: Computer::new(data), - instructions, + computer: Computer::new(aout.data), + text: aout.text, + ip: 0, + disassembler: Disassembler::new(args), } } - pub fn interpret(&mut self) -> Result<(), InterpreterError> { - let mut ip = Self::find_instruction(&self.instructions, 0, &self.computer.sregs) - .ok_or(InterpreterError::InstructionNotFound(0))?; + /// Sets instruction pointer in compliance with [`Register::CS`]. + pub fn set_ip(&mut self, ip: usize) { + self.ip = ip + (self.computer.sregs.cs * 16) as usize + } + + /// Gets instruction pointer in compliance with [`Register::CS`]. + pub fn get_ip(&self) -> usize { + self.ip + (self.computer.sregs.cs * 16) as usize + } + + pub fn interpret(&mut self) -> Result<(), InterpreterError> { + while self.ip < self.text.len() { + self.disassembler.offset = self.ip; + // XXX remove unwrap + self.disassembler.decode_instruction().unwrap(); + let current_instruction = self.disassembler.instruction.clone(); - while let Some(cur_instr) = ip.next() { log::info!( "{} IP({:04x})\t {:<32}", self.computer, - cur_instr.addr, - cur_instr.opcode.to_string(), + current_instruction.addr, + current_instruction.mnemonic.to_string(), ); - match cur_instr.opcode { + match current_instruction.mnemonic { /* * ADD */ @@ -381,7 +394,7 @@ impl Interpreter { | Mnemonic::JMP_b(offset) | Mnemonic::JMP_v(offset) => { let flags = self.computer.flags.clone(); - let flag = match cur_instr.opcode { + let flag = match current_instruction.mnemonic { Mnemonic::JO(_) => flags.of, Mnemonic::JNO(_) => !flags.of, Mnemonic::JB(_) => flags.cf, @@ -402,7 +415,8 @@ impl Interpreter { _ => panic!("unreachable"), }; if flag { - Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset); + self.set_ip(offset); + continue; } } @@ -411,66 +425,35 @@ impl Interpreter { */ Mnemonic::JMP_p(ptr) => { self.computer.sregs.cs = ptr.segment; - Self::ip_jump( - &self.instructions, - &mut ip, - &self.computer.sregs, - ptr.offset.into(), - ); + self.set_ip(ptr.offset.into()); + continue; } Mnemonic::JMP_Mp(ptr) => { - Self::ip_jump( - &self.instructions, - &mut ip, - &self.computer.sregs, - ptr.word.into(), - ); + self.set_ip(ptr.word.into()); + continue; } - Mnemonic::JMP_Mod(target) => Self::ip_jump( - &self.instructions, - &mut ip, - &self.computer.sregs, - self.computer.read_modrm(target)?.into(), - ), + Mnemonic::JMP_Mod(target) => self.set_ip(self.computer.read_modrm(target)?.into()), Mnemonic::CALL_p(ptr) => { - if let Some(next_instr) = ip.next() { - self.computer.push_stack(next_instr.addr.into())?; - } + self.save_next_instruction_into_stack(¤t_instruction)?; + self.computer.sregs.cs = ptr.segment; - Self::ip_jump( - &self.instructions, - &mut ip, - &self.computer.sregs, - ptr.offset.into(), - ); + self.set_ip(ptr.offset.into()); + continue; } Mnemonic::CALL_v(offset) => { - if let Some(next_instr) = ip.next() { - self.computer.push_stack(next_instr.addr.into())?; - } - Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset); + self.save_next_instruction_into_stack(¤t_instruction)?; + self.set_ip(offset); + continue; } Mnemonic::CALL_Mod(target) => { - if let Some(next_instr) = ip.next() { - self.computer.push_stack(next_instr.addr.into())?; - } - Self::ip_jump( - &self.instructions, - &mut ip, - &self.computer.sregs, - self.computer.read_modrm(target)?.into(), - ); + self.save_next_instruction_into_stack(¤t_instruction)?; + self.set_ip(self.computer.read_modrm(target)?.into()); + continue; } Mnemonic::CALL_Mp(ptr) => { - if let Some(next_instr) = ip.next() { - self.computer.push_stack(next_instr.addr.into())?; - } - Self::ip_jump( - &self.instructions, - &mut ip, - &self.computer.sregs, - ptr.word.into(), - ); + self.save_next_instruction_into_stack(¤t_instruction)?; + self.set_ip(ptr.word.into()); + continue; } /* @@ -601,13 +584,9 @@ impl Interpreter { * RET */ Mnemonic::RET => { - let offset = self.computer.pop_stack()?; - Self::ip_jump( - &self.instructions, - &mut ip, - &self.computer.sregs, - offset as usize, - ); + let return_addr = self.computer.pop_stack()?; + self.set_ip(return_addr as usize); + continue; } /* @@ -771,6 +750,9 @@ impl Interpreter { } _ => log::info!("no action done"), } + + // Go to next instruction + self.ip += current_instruction.raw.len(); } Ok(()) @@ -824,31 +806,15 @@ impl Interpreter { Ok(()) } - /// Find the starting addr of an instruction in the list of all parsed - /// instructions and return the iterator to that matching instruction, to - /// allow for further traversal from that point on. - /// I bet, that this is not really fast, but I could'nt come up with a - /// better idea so far. - fn find_instruction<'a>( - items: &'a Vec, - ip_addr: usize, - sregs: &SegmentRegister, - ) -> Option> { - items - .iter() - .position(|instruction| instruction.addr == ip_addr + (sregs.cs * 16) as usize) - .map(|index| items[index..].iter()) - } + /// Used for CALL and JUMP instructions. + fn save_next_instruction_into_stack( + &mut self, + current_instruction: &Instruction, + ) -> Result<(), InterpreterError> { + let instruction_size_in_bytes = current_instruction.raw.len(); + self.computer + .push_stack((self.get_ip() + instruction_size_in_bytes).into())?; - /// Jump [`InstructionPointer`] `ip` to an `offset`. - fn ip_jump<'a>( - instructions: &'a Vec, - ip: &mut InstructionPointer<'a>, - sregs: &SegmentRegister, - offset: usize, - ) { - if let Some(next_instr) = Self::find_instruction(&instructions, offset, sregs) { - *ip = next_instr; - } + Ok(()) } } diff --git a/src/main.rs b/src/main.rs index 018f034..34ec5af 100644 --- a/src/main.rs +++ b/src/main.rs @@ -69,15 +69,8 @@ fn main() { } } Command::Interpret => { - let mut disasm = Disassembler::new(&args); - let instructions = disasm.disassemble(args.dump); - match instructions { - Ok(instrs) => { - let mut interpreter = Interpreter::new(instrs, disasm.aout.data); - interpreter.interpret().unwrap(); - } - _ => {} - } + let mut interpreter = Interpreter::new(&args); + interpreter.interpret().unwrap(); } } }