fix(interpreter): impl fetch and decode
I parsed all instructions before executing, but this is not how intel works. We need to decode the instructions, pointed to by IP, on the fly.
This commit is contained in:
22
src/aout.rs
22
src/aout.rs
@@ -1,9 +1,13 @@
|
||||
//! Internal a.out File abstraction.
|
||||
|
||||
use core::fmt;
|
||||
use std::ffi::{c_uchar, c_ushort};
|
||||
use std::{
|
||||
ffi::{c_uchar, c_ushort},
|
||||
fs::File,
|
||||
io::Read,
|
||||
};
|
||||
|
||||
use crate::operands::Byte;
|
||||
use crate::{Args, disasm::DisasmError, operands::Byte};
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type c_long = i32; // we use a a.out with 32 byte
|
||||
@@ -25,6 +29,20 @@ impl fmt::Display for Aout {
|
||||
}
|
||||
|
||||
impl Aout {
|
||||
pub fn new_from_args(args: &Args) -> Self {
|
||||
let path = args
|
||||
.path
|
||||
.clone()
|
||||
.ok_or(DisasmError::NoFile(args.path.clone()))
|
||||
.unwrap();
|
||||
let mut file = File::open(path).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf).unwrap();
|
||||
let aout = Aout::new(buf);
|
||||
log::debug!("{:?}", aout);
|
||||
aout
|
||||
}
|
||||
|
||||
pub fn new(buf: Vec<u8>) -> Self {
|
||||
let hdr = Header {
|
||||
magic: [buf[0], buf[1]],
|
||||
|
||||
839
src/disasm.rs
839
src/disasm.rs
@@ -12,7 +12,6 @@ use crate::{
|
||||
};
|
||||
use crate::{modrm_8b_register, modrm_16b_register, modrm_sregister};
|
||||
use core::fmt;
|
||||
use std::{fs::File, io::Read};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// Select, wheter 8, or 16-bit Registers should be selected.
|
||||
@@ -84,24 +83,15 @@ impl fmt::Display for DisasmError {
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Disassembler {
|
||||
offset: usize, // the current offset in the disasm process
|
||||
pub offset: usize, // the current offset in the disasm process
|
||||
pub aout: Aout, // the aout binary
|
||||
instruction: Instruction, // the instruction, which is currently being parsed
|
||||
pub instruction: Instruction, // the instruction, which is currently being parsed
|
||||
instructions: Vec<Instruction>, // all parsed instructions
|
||||
}
|
||||
|
||||
impl Disassembler {
|
||||
pub fn new(args: &Args) -> Self {
|
||||
let path = args
|
||||
.path
|
||||
.clone()
|
||||
.ok_or(DisasmError::NoFile(args.path.clone()))
|
||||
.unwrap();
|
||||
let mut file = File::open(path).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf).unwrap();
|
||||
let aout = Aout::new(buf);
|
||||
log::debug!("{:?}", aout);
|
||||
let aout = Aout::new_from_args(args);
|
||||
|
||||
Disassembler {
|
||||
offset: 0,
|
||||
@@ -472,7 +462,7 @@ impl Disassembler {
|
||||
fn remove_trailing_padding(&mut self) {
|
||||
let mut until = self.instructions.len();
|
||||
for i in self.instructions.iter().rev() {
|
||||
match i.opcode {
|
||||
match i.mnemonic {
|
||||
// 0x00 0x00 in binary
|
||||
Mnemonic::ADD_FromReg(
|
||||
ModRmTarget::Memory(MemoryIndex {
|
||||
@@ -493,416 +483,9 @@ impl Disassembler {
|
||||
self.instructions.truncate(until);
|
||||
}
|
||||
|
||||
/// Decode instructions by matching byte signature to their mnemonics and
|
||||
/// depending on the instruction, parsing some operands afterwards.
|
||||
/// All parsing is done in capsulated functions, here everything just
|
||||
/// gets consolodated.
|
||||
fn decode_instructions(&mut self) -> Result<(), DisasmError> {
|
||||
log::debug!("Starting to decode text of length {}", self.aout.text.len());
|
||||
while self.offset < self.aout.text.len() {
|
||||
// reset mutable current instruction
|
||||
self.instruction = Instruction::new();
|
||||
self.instruction.addr = self.offset;
|
||||
|
||||
// fetch next opcode
|
||||
let opcode = self.aout.text[self.offset];
|
||||
|
||||
// additional raw bytes will be pushed by parse functions
|
||||
self.instruction.raw.push(opcode);
|
||||
|
||||
log::debug!("Parsing next opcode with opcode: {opcode:#04x}");
|
||||
self.instruction.opcode = match opcode {
|
||||
0x00 => modrm_8b_register!(self, ADD_FromReg),
|
||||
0x01 => modrm_16b_register!(self, ADD_FromReg),
|
||||
0x02 => modrm_8b_register!(self, ADD_ToReg),
|
||||
0x03 => modrm_16b_register!(self, ADD_ToReg),
|
||||
0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?),
|
||||
0x05 => Mnemonic::ADD_AXIv(self.parse_word()?),
|
||||
|
||||
0x06 => Mnemonic::PUSH_S(SegmentRegister::ES),
|
||||
0x07 => Mnemonic::POP_S(SegmentRegister::ES),
|
||||
|
||||
0x08 => modrm_8b_register!(self, OR_FromReg),
|
||||
0x09 => modrm_16b_register!(self, OR_FromReg),
|
||||
0x0A => modrm_8b_register!(self, OR_ToReg),
|
||||
0x0B => modrm_16b_register!(self, OR_ToReg),
|
||||
0x0C => Mnemonic::OR_ALIb(self.parse_byte()?),
|
||||
0x0D => Mnemonic::OR_AXIv(self.parse_word()?),
|
||||
|
||||
0x0E => Mnemonic::PUSH_S(SegmentRegister::CS),
|
||||
|
||||
0x0F => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0x10 => modrm_8b_register!(self, ADC_FromReg),
|
||||
0x11 => modrm_16b_register!(self, ADC_FromReg),
|
||||
0x12 => modrm_8b_register!(self, ADC_ToReg),
|
||||
0x13 => modrm_16b_register!(self, ADC_ToReg),
|
||||
0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?),
|
||||
0x15 => Mnemonic::ADC_AXIv(self.parse_word()?),
|
||||
|
||||
0x16 => Mnemonic::PUSH_S(SegmentRegister::SS),
|
||||
0x17 => Mnemonic::POP_S(SegmentRegister::SS),
|
||||
|
||||
0x18 => modrm_8b_register!(self, SBB_FromReg),
|
||||
0x19 => modrm_16b_register!(self, SBB_FromReg),
|
||||
0x1A => modrm_8b_register!(self, SBB_ToReg),
|
||||
0x1B => modrm_16b_register!(self, SBB_ToReg),
|
||||
0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?),
|
||||
0x1D => Mnemonic::SBB_AXIv(self.parse_word()?),
|
||||
|
||||
0x1E => Mnemonic::PUSH_S(SegmentRegister::DS),
|
||||
0x1F => Mnemonic::POP_S(SegmentRegister::DS),
|
||||
|
||||
0x20 => modrm_8b_register!(self, AND_FromReg),
|
||||
0x21 => modrm_16b_register!(self, AND_FromReg),
|
||||
0x22 => modrm_8b_register!(self, AND_ToReg),
|
||||
0x23 => modrm_16b_register!(self, AND_ToReg),
|
||||
0x24 => Mnemonic::AND_ALIb(self.parse_byte()?),
|
||||
0x25 => Mnemonic::AND_AXIv(self.parse_word()?),
|
||||
|
||||
0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES),
|
||||
0x27 => Mnemonic::DAA,
|
||||
|
||||
0x28 => modrm_8b_register!(self, SUB_FromReg),
|
||||
0x29 => modrm_16b_register!(self, SUB_FromReg),
|
||||
0x2A => modrm_8b_register!(self, SUB_ToReg),
|
||||
0x2B => modrm_16b_register!(self, SUB_ToReg),
|
||||
0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?),
|
||||
0x2D => Mnemonic::SUB_AXIv(self.parse_word()?),
|
||||
|
||||
0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS),
|
||||
0x2F => Mnemonic::DAS,
|
||||
|
||||
0x30 => modrm_8b_register!(self, XOR_FromReg),
|
||||
0x31 => modrm_16b_register!(self, XOR_FromReg),
|
||||
0x32 => modrm_8b_register!(self, XOR_ToReg),
|
||||
0x33 => modrm_16b_register!(self, XOR_ToReg),
|
||||
0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?),
|
||||
0x35 => Mnemonic::XOR_AXIv(self.parse_word()?),
|
||||
|
||||
0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS),
|
||||
0x37 => Mnemonic::AAA,
|
||||
|
||||
0x38 => modrm_8b_register!(self, CMP_FromReg),
|
||||
0x39 => modrm_16b_register!(self, CMP_FromReg),
|
||||
0x3A => modrm_8b_register!(self, CMP_ToReg),
|
||||
0x3B => modrm_16b_register!(self, CMP_ToReg),
|
||||
0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?),
|
||||
0x3D => Mnemonic::CMP_AXIv(self.parse_word()?),
|
||||
|
||||
0x3E => Mnemonic::OVERRIDE(SegmentRegister::DS),
|
||||
0x3F => Mnemonic::AAS,
|
||||
|
||||
0x40 => Mnemonic::INC_Reg(Register::AX),
|
||||
0x41 => Mnemonic::INC_Reg(Register::CX),
|
||||
0x42 => Mnemonic::INC_Reg(Register::DX),
|
||||
0x43 => Mnemonic::INC_Reg(Register::BX),
|
||||
0x44 => Mnemonic::INC_Reg(Register::SP),
|
||||
0x45 => Mnemonic::INC_Reg(Register::BP),
|
||||
0x46 => Mnemonic::INC_Reg(Register::SI),
|
||||
0x47 => Mnemonic::INC_Reg(Register::DI),
|
||||
|
||||
0x48 => Mnemonic::DEC_Reg(Register::AX),
|
||||
0x49 => Mnemonic::DEC_Reg(Register::CX),
|
||||
0x4A => Mnemonic::DEC_Reg(Register::DX),
|
||||
0x4B => Mnemonic::DEC_Reg(Register::BX),
|
||||
0x4C => Mnemonic::DEC_Reg(Register::SP),
|
||||
0x4D => Mnemonic::DEC_Reg(Register::BP),
|
||||
0x4E => Mnemonic::DEC_Reg(Register::SI),
|
||||
0x4F => Mnemonic::DEC_Reg(Register::DI),
|
||||
|
||||
0x50 => Mnemonic::PUSH_R(Register::AX),
|
||||
0x51 => Mnemonic::PUSH_R(Register::CX),
|
||||
0x52 => Mnemonic::PUSH_R(Register::DX),
|
||||
0x53 => Mnemonic::PUSH_R(Register::BX),
|
||||
0x54 => Mnemonic::PUSH_R(Register::SP),
|
||||
0x55 => Mnemonic::PUSH_R(Register::BP),
|
||||
0x56 => Mnemonic::PUSH_R(Register::SI),
|
||||
0x57 => Mnemonic::PUSH_R(Register::DI),
|
||||
|
||||
0x58 => Mnemonic::POP_R(Register::AX),
|
||||
0x59 => Mnemonic::POP_R(Register::CX),
|
||||
0x5A => Mnemonic::POP_R(Register::DX),
|
||||
0x5B => Mnemonic::POP_R(Register::BX),
|
||||
0x5C => Mnemonic::POP_R(Register::SP),
|
||||
0x5D => Mnemonic::POP_R(Register::BP),
|
||||
0x5E => Mnemonic::POP_R(Register::SI),
|
||||
0x5F => Mnemonic::POP_R(Register::DI),
|
||||
|
||||
0x60..=0x6F => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0x70 => Mnemonic::JO(self.parse_j_byte()?),
|
||||
0x71 => Mnemonic::JNO(self.parse_j_byte()?),
|
||||
0x72 => Mnemonic::JB(self.parse_j_byte()?),
|
||||
0x73 => Mnemonic::JNB(self.parse_j_byte()?),
|
||||
0x74 => Mnemonic::JZ(self.parse_j_byte()?),
|
||||
0x75 => Mnemonic::JNZ(self.parse_j_byte()?),
|
||||
0x76 => Mnemonic::JBE(self.parse_j_byte()?),
|
||||
0x77 => Mnemonic::JA(self.parse_j_byte()?),
|
||||
0x78 => Mnemonic::JS(self.parse_j_byte()?),
|
||||
0x79 => Mnemonic::JNS(self.parse_j_byte()?),
|
||||
0x7A => Mnemonic::JPE(self.parse_j_byte()?),
|
||||
0x7B => Mnemonic::JPO(self.parse_j_byte()?),
|
||||
0x7C => Mnemonic::JL(self.parse_j_byte()?),
|
||||
0x7D => Mnemonic::JGE(self.parse_j_byte()?),
|
||||
0x7E => Mnemonic::JLE(self.parse_j_byte()?),
|
||||
0x7F => Mnemonic::JG(self.parse_j_byte()?),
|
||||
|
||||
// Group 1
|
||||
0x80 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
let imm = self.parse_byte()?;
|
||||
Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Byte(imm))?
|
||||
}
|
||||
0x81 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
let imm = self.parse_word()?;
|
||||
Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Word(imm))?
|
||||
}
|
||||
0x82 => {
|
||||
// same as 0x80
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
let imm = self.parse_byte()?;
|
||||
Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Byte(imm))?
|
||||
}
|
||||
0x83 => {
|
||||
// byte extended version
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
let imm = self.parse_byte()?;
|
||||
Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Byte(imm))?
|
||||
}
|
||||
|
||||
0x84 => modrm_8b_register!(self, TEST),
|
||||
0x85 => modrm_16b_register!(self, TEST),
|
||||
|
||||
0x86 => modrm_8b_register!(self, XCHG),
|
||||
0x87 => modrm_16b_register!(self, XCHG),
|
||||
|
||||
0x88 => modrm_8b_register!(self, MOV_FromReg),
|
||||
0x89 => modrm_16b_register!(self, MOV_FromReg),
|
||||
0x8A => modrm_8b_register!(self, MOV_ToReg),
|
||||
0x8B => modrm_16b_register!(self, MOV_ToReg),
|
||||
0x8C => modrm_sregister!(self, MOV_FromSReg),
|
||||
0x8E => modrm_sregister!(self, MOV_ToSReg),
|
||||
|
||||
0x8D => modrm_16b_register!(self, LEA),
|
||||
|
||||
0x8F => {
|
||||
let (target, _) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
let mem = match target {
|
||||
ModRmTarget::Memory(idx) => idx,
|
||||
_ => {
|
||||
return Err(DisasmError::IllegalOperand(
|
||||
"POP (memory) instruction given a register to pop into".into(),
|
||||
));
|
||||
}
|
||||
};
|
||||
Mnemonic::POP_M(mem)
|
||||
}
|
||||
0x90 => Mnemonic::NOP(),
|
||||
|
||||
0x91 => Mnemonic::XCHG_AX(Register::CX),
|
||||
0x92 => Mnemonic::XCHG_AX(Register::DX),
|
||||
0x93 => Mnemonic::XCHG_AX(Register::BX),
|
||||
0x94 => Mnemonic::XCHG_AX(Register::SP),
|
||||
0x95 => Mnemonic::XCHG_AX(Register::BP),
|
||||
0x96 => Mnemonic::XCHG_AX(Register::SI),
|
||||
0x97 => Mnemonic::XCHG_AX(Register::DI),
|
||||
|
||||
0x98 => Mnemonic::CBW,
|
||||
0x99 => Mnemonic::CWD,
|
||||
|
||||
0x9A => Mnemonic::CALL_p(self.parse_ptr()?),
|
||||
|
||||
0x9B => Mnemonic::WAIT,
|
||||
|
||||
0x9C => Mnemonic::PUSHF,
|
||||
0x9D => Mnemonic::POPF,
|
||||
0x9E => Mnemonic::SAHF,
|
||||
0x9F => Mnemonic::LAHF,
|
||||
|
||||
0xA0 => Mnemonic::MOV_AL0b(self.parse_byte()?),
|
||||
0xA1 => Mnemonic::MOV_AX0v(self.parse_word()?),
|
||||
0xA2 => Mnemonic::MOV_0bAL(self.parse_byte()?),
|
||||
0xA3 => Mnemonic::MOV_0vAX(self.parse_word()?),
|
||||
0xA4 => Mnemonic::MOVSB,
|
||||
0xA5 => Mnemonic::MOVSW,
|
||||
|
||||
0xA6 => Mnemonic::CMPSB,
|
||||
0xA7 => Mnemonic::CMPSW,
|
||||
|
||||
0xA8 => Mnemonic::TEST_ALIb(self.parse_byte()?),
|
||||
0xA9 => Mnemonic::TEST_AXIv(self.parse_word()?),
|
||||
|
||||
0xAA => Mnemonic::STOSB,
|
||||
0xAB => Mnemonic::STOSW,
|
||||
0xAC => Mnemonic::LODSB,
|
||||
0xAD => Mnemonic::LODSW,
|
||||
0xAE => Mnemonic::SCASB,
|
||||
0xAF => Mnemonic::SCASW,
|
||||
|
||||
0xB0 => Mnemonic::MOV_ALIb(self.parse_byte()?),
|
||||
0xB1 => Mnemonic::MOV_CLIb(self.parse_byte()?),
|
||||
0xB2 => Mnemonic::MOV_DLIb(self.parse_byte()?),
|
||||
0xB3 => Mnemonic::MOV_BLIb(self.parse_byte()?),
|
||||
0xB4 => Mnemonic::MOV_AHIb(self.parse_byte()?),
|
||||
0xB5 => Mnemonic::MOV_CHIb(self.parse_byte()?),
|
||||
0xB6 => Mnemonic::MOV_DHIb(self.parse_byte()?),
|
||||
0xB7 => Mnemonic::MOV_BHIb(self.parse_byte()?),
|
||||
0xB8 => Mnemonic::MOV_AXIv(self.parse_word()?),
|
||||
0xB9 => Mnemonic::MOV_CXIv(self.parse_word()?),
|
||||
0xBA => Mnemonic::MOV_DXIv(self.parse_word()?),
|
||||
0xBB => Mnemonic::MOV_BXIv(self.parse_word()?),
|
||||
0xBC => Mnemonic::MOV_SPIv(self.parse_word()?),
|
||||
0xBD => Mnemonic::MOV_BPIv(self.parse_word()?),
|
||||
0xBE => Mnemonic::MOV_SIIv(self.parse_word()?),
|
||||
0xBF => Mnemonic::MOV_DIIv(self.parse_word()?),
|
||||
|
||||
0xC0..=0xC1 => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0xC2 => Mnemonic::RET_Iw(self.parse_word()?),
|
||||
0xC3 => Mnemonic::RET,
|
||||
|
||||
0xC4 => {
|
||||
let (target, reg_id) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
let reg = Register::by_id(ImmediateOperand::Word(reg_id as Word))?;
|
||||
let ptr = Pointer16::try_from(target)?;
|
||||
Mnemonic::LES(reg, ptr)
|
||||
}
|
||||
0xC5 => {
|
||||
let (target, reg_id) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
let reg = Register::by_id(ImmediateOperand::Word(reg_id as Word))?;
|
||||
let ptr = Pointer16::try_from(target)?;
|
||||
Mnemonic::LDS(reg, ptr)
|
||||
}
|
||||
|
||||
0xC6 => {
|
||||
let (target, _) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
Mnemonic::MOV_Ib(target, self.parse_byte()?)
|
||||
}
|
||||
0xC7 => {
|
||||
let (target, _) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
Mnemonic::MOV_Iv(target, self.parse_word()?)
|
||||
}
|
||||
|
||||
0xC8..=0xC9 => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0xCA => Mnemonic::RETF_Iw(self.parse_word()?),
|
||||
0xCB => Mnemonic::RETF,
|
||||
|
||||
0xCC => Mnemonic::INT(3),
|
||||
0xCD => Mnemonic::INT(self.parse_byte()?),
|
||||
|
||||
0xCE => Mnemonic::INTO,
|
||||
0xCF => Mnemonic::IRET,
|
||||
|
||||
// Group 2
|
||||
0xD0 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
Self::modrm_reg_to_grp2_1(reg, target)?
|
||||
}
|
||||
0xD1 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
Self::modrm_reg_to_grp2_1(reg, target)?
|
||||
}
|
||||
0xD2 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
Self::modrm_reg_to_grp2_cl(reg, target)?
|
||||
}
|
||||
0xD3 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
Self::modrm_reg_to_grp2_cl(reg, target)?
|
||||
}
|
||||
|
||||
0xD4 => Mnemonic::AAM(self.parse_byte()?),
|
||||
0xD5 => Mnemonic::AAD(self.parse_byte()?),
|
||||
|
||||
0xD6 => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0xD7 => Mnemonic::XLAT,
|
||||
|
||||
0xD8..=0xDF => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0xE0 => Mnemonic::LOOPNZ(self.parse_j_byte()?),
|
||||
0xE1 => Mnemonic::LOOPZ(self.parse_j_byte()?),
|
||||
0xE2 => Mnemonic::LOOP(self.parse_j_byte()?),
|
||||
0xE3 => Mnemonic::JCXZ(self.parse_j_byte()?),
|
||||
|
||||
0xE4 => Mnemonic::IN_AL(self.parse_byte()?),
|
||||
0xE5 => Mnemonic::IN_AX(self.parse_byte()?),
|
||||
0xE6 => Mnemonic::OUT_AL(self.parse_byte()?),
|
||||
0xE7 => Mnemonic::OUT_AX(self.parse_byte()?),
|
||||
|
||||
0xE8 => Mnemonic::CALL_v(self.parse_j_word()?),
|
||||
|
||||
0xE9 => Mnemonic::JMP_v(self.parse_j_word()?),
|
||||
0xEA => Mnemonic::JMP_p(self.parse_ptr()?),
|
||||
0xEB => Mnemonic::JMP_b(self.parse_j_byte()?),
|
||||
|
||||
0xEC => Mnemonic::IN_ALDX,
|
||||
0xED => Mnemonic::IN_AXDX,
|
||||
|
||||
0xEE => Mnemonic::OUT_ALDX,
|
||||
0xEF => Mnemonic::OUT_AXDX,
|
||||
|
||||
0xF0 => Mnemonic::LOCK,
|
||||
0xF1 => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0xF2 => Mnemonic::REPNZ,
|
||||
0xF3 => Mnemonic::REPZ,
|
||||
|
||||
0xF4 => Mnemonic::HLT,
|
||||
|
||||
0xF5 => Mnemonic::CMC,
|
||||
|
||||
// Group 3a
|
||||
0xF6 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
self.modrm_reg_to_grp3(reg, target, InstructionWidth::Byte)?
|
||||
}
|
||||
// Group 3b
|
||||
0xF7 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
self.modrm_reg_to_grp3(reg, target, InstructionWidth::Word)?
|
||||
}
|
||||
|
||||
0xF8 => Mnemonic::CLC,
|
||||
0xF9 => Mnemonic::STC,
|
||||
0xFA => Mnemonic::CLI,
|
||||
0xFB => Mnemonic::STI,
|
||||
0xFC => Mnemonic::CLD,
|
||||
0xFD => Mnemonic::STD,
|
||||
|
||||
// Group 4
|
||||
0xFE => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
match reg {
|
||||
0b0 => Mnemonic::INC_Mod(target),
|
||||
0b1 => Mnemonic::DEC_Mod(target),
|
||||
_ => return Err(DisasmError::IllegalGroupMnemonic(4, reg)),
|
||||
}
|
||||
}
|
||||
|
||||
// Group 5
|
||||
0xFF => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
match reg {
|
||||
0b000 => Mnemonic::INC_Mod(target),
|
||||
0b001 => Mnemonic::DEC_Mod(target),
|
||||
0b010 => Mnemonic::CALL_Mod(target),
|
||||
0b011 => Mnemonic::CALL_Mp(Pointer16::try_from(target)?),
|
||||
0b100 => Mnemonic::JMP_Mod(target),
|
||||
0b101 => Mnemonic::JMP_Mp(Pointer16::try_from(target)?),
|
||||
0b110 => Mnemonic::PUSH_Mod(target),
|
||||
// 0b111 => unused
|
||||
_ => return Err(DisasmError::IllegalGroupMnemonic(5, reg)),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Save parsed instruction
|
||||
log::debug!("{}", self.instruction);
|
||||
self.instructions.push(self.instruction.clone());
|
||||
self.decode_instruction()?;
|
||||
|
||||
// Advance offset to hover the next potential opcode
|
||||
self.offset += 1;
|
||||
@@ -910,6 +493,418 @@ impl Disassembler {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Decode an instruction by matching byte signature to their mnemonics and
|
||||
/// depending on the instruction, parsing some operands afterwards.
|
||||
/// All parsing is done in capsulated functions, here everything just
|
||||
/// gets consolodated.
|
||||
pub fn decode_instruction(&mut self) -> Result<(), DisasmError> {
|
||||
// reset mutable current instruction
|
||||
self.instruction = Instruction::new();
|
||||
self.instruction.addr = self.offset;
|
||||
|
||||
// fetch next opcode
|
||||
let opcode = self.aout.text[self.offset];
|
||||
log::debug!("Parsing next opcode with opcode: {opcode:#04x}");
|
||||
|
||||
// additional raw bytes will be pushed by parse functions
|
||||
self.instruction.raw.push(opcode);
|
||||
|
||||
self.instruction.mnemonic = match opcode {
|
||||
0x00 => modrm_8b_register!(self, ADD_FromReg),
|
||||
0x01 => modrm_16b_register!(self, ADD_FromReg),
|
||||
0x02 => modrm_8b_register!(self, ADD_ToReg),
|
||||
0x03 => modrm_16b_register!(self, ADD_ToReg),
|
||||
0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?),
|
||||
0x05 => Mnemonic::ADD_AXIv(self.parse_word()?),
|
||||
|
||||
0x06 => Mnemonic::PUSH_S(SegmentRegister::ES),
|
||||
0x07 => Mnemonic::POP_S(SegmentRegister::ES),
|
||||
|
||||
0x08 => modrm_8b_register!(self, OR_FromReg),
|
||||
0x09 => modrm_16b_register!(self, OR_FromReg),
|
||||
0x0A => modrm_8b_register!(self, OR_ToReg),
|
||||
0x0B => modrm_16b_register!(self, OR_ToReg),
|
||||
0x0C => Mnemonic::OR_ALIb(self.parse_byte()?),
|
||||
0x0D => Mnemonic::OR_AXIv(self.parse_word()?),
|
||||
|
||||
0x0E => Mnemonic::PUSH_S(SegmentRegister::CS),
|
||||
|
||||
0x0F => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0x10 => modrm_8b_register!(self, ADC_FromReg),
|
||||
0x11 => modrm_16b_register!(self, ADC_FromReg),
|
||||
0x12 => modrm_8b_register!(self, ADC_ToReg),
|
||||
0x13 => modrm_16b_register!(self, ADC_ToReg),
|
||||
0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?),
|
||||
0x15 => Mnemonic::ADC_AXIv(self.parse_word()?),
|
||||
|
||||
0x16 => Mnemonic::PUSH_S(SegmentRegister::SS),
|
||||
0x17 => Mnemonic::POP_S(SegmentRegister::SS),
|
||||
|
||||
0x18 => modrm_8b_register!(self, SBB_FromReg),
|
||||
0x19 => modrm_16b_register!(self, SBB_FromReg),
|
||||
0x1A => modrm_8b_register!(self, SBB_ToReg),
|
||||
0x1B => modrm_16b_register!(self, SBB_ToReg),
|
||||
0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?),
|
||||
0x1D => Mnemonic::SBB_AXIv(self.parse_word()?),
|
||||
|
||||
0x1E => Mnemonic::PUSH_S(SegmentRegister::DS),
|
||||
0x1F => Mnemonic::POP_S(SegmentRegister::DS),
|
||||
|
||||
0x20 => modrm_8b_register!(self, AND_FromReg),
|
||||
0x21 => modrm_16b_register!(self, AND_FromReg),
|
||||
0x22 => modrm_8b_register!(self, AND_ToReg),
|
||||
0x23 => modrm_16b_register!(self, AND_ToReg),
|
||||
0x24 => Mnemonic::AND_ALIb(self.parse_byte()?),
|
||||
0x25 => Mnemonic::AND_AXIv(self.parse_word()?),
|
||||
|
||||
0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES),
|
||||
0x27 => Mnemonic::DAA,
|
||||
|
||||
0x28 => modrm_8b_register!(self, SUB_FromReg),
|
||||
0x29 => modrm_16b_register!(self, SUB_FromReg),
|
||||
0x2A => modrm_8b_register!(self, SUB_ToReg),
|
||||
0x2B => modrm_16b_register!(self, SUB_ToReg),
|
||||
0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?),
|
||||
0x2D => Mnemonic::SUB_AXIv(self.parse_word()?),
|
||||
|
||||
0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS),
|
||||
0x2F => Mnemonic::DAS,
|
||||
|
||||
0x30 => modrm_8b_register!(self, XOR_FromReg),
|
||||
0x31 => modrm_16b_register!(self, XOR_FromReg),
|
||||
0x32 => modrm_8b_register!(self, XOR_ToReg),
|
||||
0x33 => modrm_16b_register!(self, XOR_ToReg),
|
||||
0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?),
|
||||
0x35 => Mnemonic::XOR_AXIv(self.parse_word()?),
|
||||
|
||||
0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS),
|
||||
0x37 => Mnemonic::AAA,
|
||||
|
||||
0x38 => modrm_8b_register!(self, CMP_FromReg),
|
||||
0x39 => modrm_16b_register!(self, CMP_FromReg),
|
||||
0x3A => modrm_8b_register!(self, CMP_ToReg),
|
||||
0x3B => modrm_16b_register!(self, CMP_ToReg),
|
||||
0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?),
|
||||
0x3D => Mnemonic::CMP_AXIv(self.parse_word()?),
|
||||
|
||||
0x3E => Mnemonic::OVERRIDE(SegmentRegister::DS),
|
||||
0x3F => Mnemonic::AAS,
|
||||
|
||||
0x40 => Mnemonic::INC_Reg(Register::AX),
|
||||
0x41 => Mnemonic::INC_Reg(Register::CX),
|
||||
0x42 => Mnemonic::INC_Reg(Register::DX),
|
||||
0x43 => Mnemonic::INC_Reg(Register::BX),
|
||||
0x44 => Mnemonic::INC_Reg(Register::SP),
|
||||
0x45 => Mnemonic::INC_Reg(Register::BP),
|
||||
0x46 => Mnemonic::INC_Reg(Register::SI),
|
||||
0x47 => Mnemonic::INC_Reg(Register::DI),
|
||||
|
||||
0x48 => Mnemonic::DEC_Reg(Register::AX),
|
||||
0x49 => Mnemonic::DEC_Reg(Register::CX),
|
||||
0x4A => Mnemonic::DEC_Reg(Register::DX),
|
||||
0x4B => Mnemonic::DEC_Reg(Register::BX),
|
||||
0x4C => Mnemonic::DEC_Reg(Register::SP),
|
||||
0x4D => Mnemonic::DEC_Reg(Register::BP),
|
||||
0x4E => Mnemonic::DEC_Reg(Register::SI),
|
||||
0x4F => Mnemonic::DEC_Reg(Register::DI),
|
||||
|
||||
0x50 => Mnemonic::PUSH_R(Register::AX),
|
||||
0x51 => Mnemonic::PUSH_R(Register::CX),
|
||||
0x52 => Mnemonic::PUSH_R(Register::DX),
|
||||
0x53 => Mnemonic::PUSH_R(Register::BX),
|
||||
0x54 => Mnemonic::PUSH_R(Register::SP),
|
||||
0x55 => Mnemonic::PUSH_R(Register::BP),
|
||||
0x56 => Mnemonic::PUSH_R(Register::SI),
|
||||
0x57 => Mnemonic::PUSH_R(Register::DI),
|
||||
|
||||
0x58 => Mnemonic::POP_R(Register::AX),
|
||||
0x59 => Mnemonic::POP_R(Register::CX),
|
||||
0x5A => Mnemonic::POP_R(Register::DX),
|
||||
0x5B => Mnemonic::POP_R(Register::BX),
|
||||
0x5C => Mnemonic::POP_R(Register::SP),
|
||||
0x5D => Mnemonic::POP_R(Register::BP),
|
||||
0x5E => Mnemonic::POP_R(Register::SI),
|
||||
0x5F => Mnemonic::POP_R(Register::DI),
|
||||
|
||||
0x60..=0x6F => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0x70 => Mnemonic::JO(self.parse_j_byte()?),
|
||||
0x71 => Mnemonic::JNO(self.parse_j_byte()?),
|
||||
0x72 => Mnemonic::JB(self.parse_j_byte()?),
|
||||
0x73 => Mnemonic::JNB(self.parse_j_byte()?),
|
||||
0x74 => Mnemonic::JZ(self.parse_j_byte()?),
|
||||
0x75 => Mnemonic::JNZ(self.parse_j_byte()?),
|
||||
0x76 => Mnemonic::JBE(self.parse_j_byte()?),
|
||||
0x77 => Mnemonic::JA(self.parse_j_byte()?),
|
||||
0x78 => Mnemonic::JS(self.parse_j_byte()?),
|
||||
0x79 => Mnemonic::JNS(self.parse_j_byte()?),
|
||||
0x7A => Mnemonic::JPE(self.parse_j_byte()?),
|
||||
0x7B => Mnemonic::JPO(self.parse_j_byte()?),
|
||||
0x7C => Mnemonic::JL(self.parse_j_byte()?),
|
||||
0x7D => Mnemonic::JGE(self.parse_j_byte()?),
|
||||
0x7E => Mnemonic::JLE(self.parse_j_byte()?),
|
||||
0x7F => Mnemonic::JG(self.parse_j_byte()?),
|
||||
|
||||
// Group 1
|
||||
0x80 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
let imm = self.parse_byte()?;
|
||||
Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Byte(imm))?
|
||||
}
|
||||
0x81 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
let imm = self.parse_word()?;
|
||||
Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Word(imm))?
|
||||
}
|
||||
0x82 => {
|
||||
// same as 0x80
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
let imm = self.parse_byte()?;
|
||||
Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Byte(imm))?
|
||||
}
|
||||
0x83 => {
|
||||
// byte extended version
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
let imm = self.parse_byte()?;
|
||||
Self::modrm_reg_to_grp1(reg, target, ImmediateOperand::Byte(imm))?
|
||||
}
|
||||
|
||||
0x84 => modrm_8b_register!(self, TEST),
|
||||
0x85 => modrm_16b_register!(self, TEST),
|
||||
|
||||
0x86 => modrm_8b_register!(self, XCHG),
|
||||
0x87 => modrm_16b_register!(self, XCHG),
|
||||
|
||||
0x88 => modrm_8b_register!(self, MOV_FromReg),
|
||||
0x89 => modrm_16b_register!(self, MOV_FromReg),
|
||||
0x8A => modrm_8b_register!(self, MOV_ToReg),
|
||||
0x8B => modrm_16b_register!(self, MOV_ToReg),
|
||||
0x8C => modrm_sregister!(self, MOV_FromSReg),
|
||||
0x8E => modrm_sregister!(self, MOV_ToSReg),
|
||||
|
||||
0x8D => modrm_16b_register!(self, LEA),
|
||||
|
||||
0x8F => {
|
||||
let (target, _) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
let mem = match target {
|
||||
ModRmTarget::Memory(idx) => idx,
|
||||
_ => {
|
||||
return Err(DisasmError::IllegalOperand(
|
||||
"POP (memory) instruction given a register to pop into".into(),
|
||||
));
|
||||
}
|
||||
};
|
||||
Mnemonic::POP_M(mem)
|
||||
}
|
||||
0x90 => Mnemonic::NOP(),
|
||||
|
||||
0x91 => Mnemonic::XCHG_AX(Register::CX),
|
||||
0x92 => Mnemonic::XCHG_AX(Register::DX),
|
||||
0x93 => Mnemonic::XCHG_AX(Register::BX),
|
||||
0x94 => Mnemonic::XCHG_AX(Register::SP),
|
||||
0x95 => Mnemonic::XCHG_AX(Register::BP),
|
||||
0x96 => Mnemonic::XCHG_AX(Register::SI),
|
||||
0x97 => Mnemonic::XCHG_AX(Register::DI),
|
||||
|
||||
0x98 => Mnemonic::CBW,
|
||||
0x99 => Mnemonic::CWD,
|
||||
|
||||
0x9A => Mnemonic::CALL_p(self.parse_ptr()?),
|
||||
|
||||
0x9B => Mnemonic::WAIT,
|
||||
|
||||
0x9C => Mnemonic::PUSHF,
|
||||
0x9D => Mnemonic::POPF,
|
||||
0x9E => Mnemonic::SAHF,
|
||||
0x9F => Mnemonic::LAHF,
|
||||
|
||||
0xA0 => Mnemonic::MOV_AL0b(self.parse_byte()?),
|
||||
0xA1 => Mnemonic::MOV_AX0v(self.parse_word()?),
|
||||
0xA2 => Mnemonic::MOV_0bAL(self.parse_byte()?),
|
||||
0xA3 => Mnemonic::MOV_0vAX(self.parse_word()?),
|
||||
0xA4 => Mnemonic::MOVSB,
|
||||
0xA5 => Mnemonic::MOVSW,
|
||||
|
||||
0xA6 => Mnemonic::CMPSB,
|
||||
0xA7 => Mnemonic::CMPSW,
|
||||
|
||||
0xA8 => Mnemonic::TEST_ALIb(self.parse_byte()?),
|
||||
0xA9 => Mnemonic::TEST_AXIv(self.parse_word()?),
|
||||
|
||||
0xAA => Mnemonic::STOSB,
|
||||
0xAB => Mnemonic::STOSW,
|
||||
0xAC => Mnemonic::LODSB,
|
||||
0xAD => Mnemonic::LODSW,
|
||||
0xAE => Mnemonic::SCASB,
|
||||
0xAF => Mnemonic::SCASW,
|
||||
|
||||
0xB0 => Mnemonic::MOV_ALIb(self.parse_byte()?),
|
||||
0xB1 => Mnemonic::MOV_CLIb(self.parse_byte()?),
|
||||
0xB2 => Mnemonic::MOV_DLIb(self.parse_byte()?),
|
||||
0xB3 => Mnemonic::MOV_BLIb(self.parse_byte()?),
|
||||
0xB4 => Mnemonic::MOV_AHIb(self.parse_byte()?),
|
||||
0xB5 => Mnemonic::MOV_CHIb(self.parse_byte()?),
|
||||
0xB6 => Mnemonic::MOV_DHIb(self.parse_byte()?),
|
||||
0xB7 => Mnemonic::MOV_BHIb(self.parse_byte()?),
|
||||
0xB8 => Mnemonic::MOV_AXIv(self.parse_word()?),
|
||||
0xB9 => Mnemonic::MOV_CXIv(self.parse_word()?),
|
||||
0xBA => Mnemonic::MOV_DXIv(self.parse_word()?),
|
||||
0xBB => Mnemonic::MOV_BXIv(self.parse_word()?),
|
||||
0xBC => Mnemonic::MOV_SPIv(self.parse_word()?),
|
||||
0xBD => Mnemonic::MOV_BPIv(self.parse_word()?),
|
||||
0xBE => Mnemonic::MOV_SIIv(self.parse_word()?),
|
||||
0xBF => Mnemonic::MOV_DIIv(self.parse_word()?),
|
||||
|
||||
0xC0..=0xC1 => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0xC2 => Mnemonic::RET_Iw(self.parse_word()?),
|
||||
0xC3 => Mnemonic::RET,
|
||||
|
||||
0xC4 => {
|
||||
let (target, reg_id) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
let reg = Register::by_id(ImmediateOperand::Word(reg_id as Word))?;
|
||||
let ptr = Pointer16::try_from(target)?;
|
||||
Mnemonic::LES(reg, ptr)
|
||||
}
|
||||
0xC5 => {
|
||||
let (target, reg_id) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
let reg = Register::by_id(ImmediateOperand::Word(reg_id as Word))?;
|
||||
let ptr = Pointer16::try_from(target)?;
|
||||
Mnemonic::LDS(reg, ptr)
|
||||
}
|
||||
|
||||
0xC6 => {
|
||||
let (target, _) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
Mnemonic::MOV_Ib(target, self.parse_byte()?)
|
||||
}
|
||||
0xC7 => {
|
||||
let (target, _) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
Mnemonic::MOV_Iv(target, self.parse_word()?)
|
||||
}
|
||||
|
||||
0xC8..=0xC9 => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0xCA => Mnemonic::RETF_Iw(self.parse_word()?),
|
||||
0xCB => Mnemonic::RETF,
|
||||
|
||||
0xCC => Mnemonic::INT(3),
|
||||
0xCD => Mnemonic::INT(self.parse_byte()?),
|
||||
|
||||
0xCE => Mnemonic::INTO,
|
||||
0xCF => Mnemonic::IRET,
|
||||
|
||||
// Group 2
|
||||
0xD0 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
Self::modrm_reg_to_grp2_1(reg, target)?
|
||||
}
|
||||
0xD1 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
Self::modrm_reg_to_grp2_1(reg, target)?
|
||||
}
|
||||
0xD2 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
Self::modrm_reg_to_grp2_cl(reg, target)?
|
||||
}
|
||||
0xD3 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
Self::modrm_reg_to_grp2_cl(reg, target)?
|
||||
}
|
||||
|
||||
0xD4 => Mnemonic::AAM(self.parse_byte()?),
|
||||
0xD5 => Mnemonic::AAD(self.parse_byte()?),
|
||||
|
||||
0xD6 => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0xD7 => Mnemonic::XLAT,
|
||||
|
||||
0xD8..=0xDF => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0xE0 => Mnemonic::LOOPNZ(self.parse_j_byte()?),
|
||||
0xE1 => Mnemonic::LOOPZ(self.parse_j_byte()?),
|
||||
0xE2 => Mnemonic::LOOP(self.parse_j_byte()?),
|
||||
0xE3 => Mnemonic::JCXZ(self.parse_j_byte()?),
|
||||
|
||||
0xE4 => Mnemonic::IN_AL(self.parse_byte()?),
|
||||
0xE5 => Mnemonic::IN_AX(self.parse_byte()?),
|
||||
0xE6 => Mnemonic::OUT_AL(self.parse_byte()?),
|
||||
0xE7 => Mnemonic::OUT_AX(self.parse_byte()?),
|
||||
|
||||
0xE8 => Mnemonic::CALL_v(self.parse_j_word()?),
|
||||
|
||||
0xE9 => Mnemonic::JMP_v(self.parse_j_word()?),
|
||||
0xEA => Mnemonic::JMP_p(self.parse_ptr()?),
|
||||
0xEB => Mnemonic::JMP_b(self.parse_j_byte()?),
|
||||
|
||||
0xEC => Mnemonic::IN_ALDX,
|
||||
0xED => Mnemonic::IN_AXDX,
|
||||
|
||||
0xEE => Mnemonic::OUT_ALDX,
|
||||
0xEF => Mnemonic::OUT_AXDX,
|
||||
|
||||
0xF0 => Mnemonic::LOCK,
|
||||
0xF1 => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0xF2 => Mnemonic::REPNZ,
|
||||
0xF3 => Mnemonic::REPZ,
|
||||
|
||||
0xF4 => Mnemonic::HLT,
|
||||
|
||||
0xF5 => Mnemonic::CMC,
|
||||
|
||||
// Group 3a
|
||||
0xF6 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
self.modrm_reg_to_grp3(reg, target, InstructionWidth::Byte)?
|
||||
}
|
||||
// Group 3b
|
||||
0xF7 => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
self.modrm_reg_to_grp3(reg, target, InstructionWidth::Word)?
|
||||
}
|
||||
|
||||
0xF8 => Mnemonic::CLC,
|
||||
0xF9 => Mnemonic::STC,
|
||||
0xFA => Mnemonic::CLI,
|
||||
0xFB => Mnemonic::STI,
|
||||
0xFC => Mnemonic::CLD,
|
||||
0xFD => Mnemonic::STD,
|
||||
|
||||
// Group 4
|
||||
0xFE => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Byte))?;
|
||||
match reg {
|
||||
0b0 => Mnemonic::INC_Mod(target),
|
||||
0b1 => Mnemonic::DEC_Mod(target),
|
||||
_ => return Err(DisasmError::IllegalGroupMnemonic(4, reg)),
|
||||
}
|
||||
}
|
||||
|
||||
// Group 5
|
||||
0xFF => {
|
||||
let (target, reg) = self.parse_modrm_byte(Some(InstructionWidth::Word))?;
|
||||
match reg {
|
||||
0b000 => Mnemonic::INC_Mod(target),
|
||||
0b001 => Mnemonic::DEC_Mod(target),
|
||||
0b010 => Mnemonic::CALL_Mod(target),
|
||||
0b011 => Mnemonic::CALL_Mp(Pointer16::try_from(target)?),
|
||||
0b100 => Mnemonic::JMP_Mod(target),
|
||||
0b101 => Mnemonic::JMP_Mp(Pointer16::try_from(target)?),
|
||||
0b110 => Mnemonic::PUSH_Mod(target),
|
||||
// 0b111 => unused
|
||||
_ => return Err(DisasmError::IllegalGroupMnemonic(5, reg)),
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Save parsed instruction
|
||||
log::debug!("Parsed {}", self.instruction);
|
||||
self.instructions.push(self.instruction.clone());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// #[cfg(test)]
|
||||
|
||||
@@ -11,9 +11,9 @@ use core::fmt;
|
||||
/// contains the `Mnemonic` that will be executed, alongside its starting offset
|
||||
/// and the raw parsed bytes
|
||||
pub struct Instruction {
|
||||
pub addr: usize, // location of the instruction start
|
||||
pub raw: Vec<u8>, // raw value of instruction
|
||||
pub opcode: Mnemonic, // actual instruction
|
||||
pub addr: usize, // location of the instruction start
|
||||
pub raw: Vec<u8>, // raw value of instruction
|
||||
pub mnemonic: Mnemonic, // actual instruction
|
||||
}
|
||||
|
||||
impl Instruction {
|
||||
@@ -21,7 +21,7 @@ impl Instruction {
|
||||
Instruction {
|
||||
addr: 0,
|
||||
raw: Vec::new(),
|
||||
opcode: Mnemonic::NOP(),
|
||||
mnemonic: Mnemonic::NOP(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -41,7 +41,7 @@ impl fmt::Display for Instruction {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
write!(f, "\t{}", self.opcode)
|
||||
write!(f, "\t{}", self.mnemonic)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,11 +2,13 @@ use core::fmt;
|
||||
use std::{fmt::Debug, process::exit};
|
||||
|
||||
use crate::{
|
||||
Args,
|
||||
aout::Aout,
|
||||
disasm::Disassembler,
|
||||
instructions::{Instruction, Mnemonic},
|
||||
interpreter::{
|
||||
computer::{CarryUsage, RotationDirection},
|
||||
interrupt::Mess1,
|
||||
register::SegmentRegister,
|
||||
},
|
||||
operands::{Byte, ImmediateOperand, ModRmTarget, Word},
|
||||
};
|
||||
@@ -16,12 +18,9 @@ use super::{
|
||||
interrupt::InterruptMessage,
|
||||
};
|
||||
|
||||
type InstructionPointer<'a> = std::slice::Iter<'a, Instruction>;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum InterpreterError {
|
||||
InvalidSyscall(Byte),
|
||||
InstructionNotFound(Word),
|
||||
MemoryOutOfBound(Word),
|
||||
}
|
||||
|
||||
@@ -31,9 +30,6 @@ impl fmt::Display for InterpreterError {
|
||||
InterpreterError::InvalidSyscall(id) => {
|
||||
write!(f, "The syscall with ID {} is unknown", id)
|
||||
}
|
||||
InterpreterError::InstructionNotFound(addr) => {
|
||||
write!(f, "IP({addr}) points at invalid instruction")
|
||||
}
|
||||
InterpreterError::MemoryOutOfBound(addr) => {
|
||||
write!(
|
||||
f,
|
||||
@@ -47,30 +43,47 @@ impl fmt::Display for InterpreterError {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Interpreter {
|
||||
computer: Computer,
|
||||
instructions: Vec<Instruction>,
|
||||
text: Vec<u8>,
|
||||
ip: usize,
|
||||
disassembler: Disassembler,
|
||||
}
|
||||
|
||||
impl Interpreter {
|
||||
pub fn new(instructions: Vec<Instruction>, data: Vec<Byte>) -> Self {
|
||||
pub fn new(args: &Args) -> Self {
|
||||
let aout = Aout::new_from_args(args);
|
||||
Self {
|
||||
computer: Computer::new(data),
|
||||
instructions,
|
||||
computer: Computer::new(aout.data),
|
||||
text: aout.text,
|
||||
ip: 0,
|
||||
disassembler: Disassembler::new(args),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn interpret(&mut self) -> Result<(), InterpreterError> {
|
||||
let mut ip = Self::find_instruction(&self.instructions, 0, &self.computer.sregs)
|
||||
.ok_or(InterpreterError::InstructionNotFound(0))?;
|
||||
/// Sets instruction pointer in compliance with [`Register::CS`].
|
||||
pub fn set_ip(&mut self, ip: usize) {
|
||||
self.ip = ip + (self.computer.sregs.cs * 16) as usize
|
||||
}
|
||||
|
||||
/// Gets instruction pointer in compliance with [`Register::CS`].
|
||||
pub fn get_ip(&self) -> usize {
|
||||
self.ip + (self.computer.sregs.cs * 16) as usize
|
||||
}
|
||||
|
||||
pub fn interpret(&mut self) -> Result<(), InterpreterError> {
|
||||
while self.ip < self.text.len() {
|
||||
self.disassembler.offset = self.ip;
|
||||
// XXX remove unwrap
|
||||
self.disassembler.decode_instruction().unwrap();
|
||||
let current_instruction = self.disassembler.instruction.clone();
|
||||
|
||||
while let Some(cur_instr) = ip.next() {
|
||||
log::info!(
|
||||
"{} IP({:04x})\t {:<32}",
|
||||
self.computer,
|
||||
cur_instr.addr,
|
||||
cur_instr.opcode.to_string(),
|
||||
current_instruction.addr,
|
||||
current_instruction.mnemonic.to_string(),
|
||||
);
|
||||
|
||||
match cur_instr.opcode {
|
||||
match current_instruction.mnemonic {
|
||||
/*
|
||||
* ADD
|
||||
*/
|
||||
@@ -381,7 +394,7 @@ impl Interpreter {
|
||||
| Mnemonic::JMP_b(offset)
|
||||
| Mnemonic::JMP_v(offset) => {
|
||||
let flags = self.computer.flags.clone();
|
||||
let flag = match cur_instr.opcode {
|
||||
let flag = match current_instruction.mnemonic {
|
||||
Mnemonic::JO(_) => flags.of,
|
||||
Mnemonic::JNO(_) => !flags.of,
|
||||
Mnemonic::JB(_) => flags.cf,
|
||||
@@ -402,7 +415,8 @@ impl Interpreter {
|
||||
_ => panic!("unreachable"),
|
||||
};
|
||||
if flag {
|
||||
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
|
||||
self.set_ip(offset);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -411,66 +425,35 @@ impl Interpreter {
|
||||
*/
|
||||
Mnemonic::JMP_p(ptr) => {
|
||||
self.computer.sregs.cs = ptr.segment;
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
ptr.offset.into(),
|
||||
);
|
||||
self.set_ip(ptr.offset.into());
|
||||
continue;
|
||||
}
|
||||
Mnemonic::JMP_Mp(ptr) => {
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
ptr.word.into(),
|
||||
);
|
||||
self.set_ip(ptr.word.into());
|
||||
continue;
|
||||
}
|
||||
Mnemonic::JMP_Mod(target) => Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
self.computer.read_modrm(target)?.into(),
|
||||
),
|
||||
Mnemonic::JMP_Mod(target) => self.set_ip(self.computer.read_modrm(target)?.into()),
|
||||
Mnemonic::CALL_p(ptr) => {
|
||||
if let Some(next_instr) = ip.next() {
|
||||
self.computer.push_stack(next_instr.addr.into())?;
|
||||
}
|
||||
self.save_next_instruction_into_stack(¤t_instruction)?;
|
||||
|
||||
self.computer.sregs.cs = ptr.segment;
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
ptr.offset.into(),
|
||||
);
|
||||
self.set_ip(ptr.offset.into());
|
||||
continue;
|
||||
}
|
||||
Mnemonic::CALL_v(offset) => {
|
||||
if let Some(next_instr) = ip.next() {
|
||||
self.computer.push_stack(next_instr.addr.into())?;
|
||||
}
|
||||
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
|
||||
self.save_next_instruction_into_stack(¤t_instruction)?;
|
||||
self.set_ip(offset);
|
||||
continue;
|
||||
}
|
||||
Mnemonic::CALL_Mod(target) => {
|
||||
if let Some(next_instr) = ip.next() {
|
||||
self.computer.push_stack(next_instr.addr.into())?;
|
||||
}
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
self.computer.read_modrm(target)?.into(),
|
||||
);
|
||||
self.save_next_instruction_into_stack(¤t_instruction)?;
|
||||
self.set_ip(self.computer.read_modrm(target)?.into());
|
||||
continue;
|
||||
}
|
||||
Mnemonic::CALL_Mp(ptr) => {
|
||||
if let Some(next_instr) = ip.next() {
|
||||
self.computer.push_stack(next_instr.addr.into())?;
|
||||
}
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
ptr.word.into(),
|
||||
);
|
||||
self.save_next_instruction_into_stack(¤t_instruction)?;
|
||||
self.set_ip(ptr.word.into());
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -601,13 +584,9 @@ impl Interpreter {
|
||||
* RET
|
||||
*/
|
||||
Mnemonic::RET => {
|
||||
let offset = self.computer.pop_stack()?;
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
offset as usize,
|
||||
);
|
||||
let return_addr = self.computer.pop_stack()?;
|
||||
self.set_ip(return_addr as usize);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -771,6 +750,9 @@ impl Interpreter {
|
||||
}
|
||||
_ => log::info!("no action done"),
|
||||
}
|
||||
|
||||
// Go to next instruction
|
||||
self.ip += current_instruction.raw.len();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -824,31 +806,15 @@ impl Interpreter {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Find the starting addr of an instruction in the list of all parsed
|
||||
/// instructions and return the iterator to that matching instruction, to
|
||||
/// allow for further traversal from that point on.
|
||||
/// I bet, that this is not really fast, but I could'nt come up with a
|
||||
/// better idea so far.
|
||||
fn find_instruction<'a>(
|
||||
items: &'a Vec<Instruction>,
|
||||
ip_addr: usize,
|
||||
sregs: &SegmentRegister,
|
||||
) -> Option<InstructionPointer<'a>> {
|
||||
items
|
||||
.iter()
|
||||
.position(|instruction| instruction.addr == ip_addr + (sregs.cs * 16) as usize)
|
||||
.map(|index| items[index..].iter())
|
||||
}
|
||||
/// Used for CALL and JUMP instructions.
|
||||
fn save_next_instruction_into_stack(
|
||||
&mut self,
|
||||
current_instruction: &Instruction,
|
||||
) -> Result<(), InterpreterError> {
|
||||
let instruction_size_in_bytes = current_instruction.raw.len();
|
||||
self.computer
|
||||
.push_stack((self.get_ip() + instruction_size_in_bytes).into())?;
|
||||
|
||||
/// Jump [`InstructionPointer`] `ip` to an `offset`.
|
||||
fn ip_jump<'a>(
|
||||
instructions: &'a Vec<Instruction>,
|
||||
ip: &mut InstructionPointer<'a>,
|
||||
sregs: &SegmentRegister,
|
||||
offset: usize,
|
||||
) {
|
||||
if let Some(next_instr) = Self::find_instruction(&instructions, offset, sregs) {
|
||||
*ip = next_instr;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
11
src/main.rs
11
src/main.rs
@@ -69,15 +69,8 @@ fn main() {
|
||||
}
|
||||
}
|
||||
Command::Interpret => {
|
||||
let mut disasm = Disassembler::new(&args);
|
||||
let instructions = disasm.disassemble(args.dump);
|
||||
match instructions {
|
||||
Ok(instrs) => {
|
||||
let mut interpreter = Interpreter::new(instrs, disasm.aout.data);
|
||||
interpreter.interpret().unwrap();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
let mut interpreter = Interpreter::new(&args);
|
||||
interpreter.interpret().unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user