fix(interpreter): impl fetch and decode
I parsed all instructions before executing, but this is not how intel works. We need to decode the instructions, pointed to by IP, on the fly.
This commit is contained in:
22
src/aout.rs
22
src/aout.rs
@@ -1,9 +1,13 @@
|
||||
//! Internal a.out File abstraction.
|
||||
|
||||
use core::fmt;
|
||||
use std::ffi::{c_uchar, c_ushort};
|
||||
use std::{
|
||||
ffi::{c_uchar, c_ushort},
|
||||
fs::File,
|
||||
io::Read,
|
||||
};
|
||||
|
||||
use crate::operands::Byte;
|
||||
use crate::{Args, disasm::DisasmError, operands::Byte};
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type c_long = i32; // we use a a.out with 32 byte
|
||||
@@ -25,6 +29,20 @@ impl fmt::Display for Aout {
|
||||
}
|
||||
|
||||
impl Aout {
|
||||
pub fn new_from_args(args: &Args) -> Self {
|
||||
let path = args
|
||||
.path
|
||||
.clone()
|
||||
.ok_or(DisasmError::NoFile(args.path.clone()))
|
||||
.unwrap();
|
||||
let mut file = File::open(path).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf).unwrap();
|
||||
let aout = Aout::new(buf);
|
||||
log::debug!("{:?}", aout);
|
||||
aout
|
||||
}
|
||||
|
||||
pub fn new(buf: Vec<u8>) -> Self {
|
||||
let hdr = Header {
|
||||
magic: [buf[0], buf[1]],
|
||||
|
||||
@@ -12,7 +12,6 @@ use crate::{
|
||||
};
|
||||
use crate::{modrm_8b_register, modrm_16b_register, modrm_sregister};
|
||||
use core::fmt;
|
||||
use std::{fs::File, io::Read};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
/// Select, wheter 8, or 16-bit Registers should be selected.
|
||||
@@ -84,24 +83,15 @@ impl fmt::Display for DisasmError {
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Disassembler {
|
||||
offset: usize, // the current offset in the disasm process
|
||||
pub offset: usize, // the current offset in the disasm process
|
||||
pub aout: Aout, // the aout binary
|
||||
instruction: Instruction, // the instruction, which is currently being parsed
|
||||
pub instruction: Instruction, // the instruction, which is currently being parsed
|
||||
instructions: Vec<Instruction>, // all parsed instructions
|
||||
}
|
||||
|
||||
impl Disassembler {
|
||||
pub fn new(args: &Args) -> Self {
|
||||
let path = args
|
||||
.path
|
||||
.clone()
|
||||
.ok_or(DisasmError::NoFile(args.path.clone()))
|
||||
.unwrap();
|
||||
let mut file = File::open(path).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf).unwrap();
|
||||
let aout = Aout::new(buf);
|
||||
log::debug!("{:?}", aout);
|
||||
let aout = Aout::new_from_args(args);
|
||||
|
||||
Disassembler {
|
||||
offset: 0,
|
||||
@@ -472,7 +462,7 @@ impl Disassembler {
|
||||
fn remove_trailing_padding(&mut self) {
|
||||
let mut until = self.instructions.len();
|
||||
for i in self.instructions.iter().rev() {
|
||||
match i.opcode {
|
||||
match i.mnemonic {
|
||||
// 0x00 0x00 in binary
|
||||
Mnemonic::ADD_FromReg(
|
||||
ModRmTarget::Memory(MemoryIndex {
|
||||
@@ -493,25 +483,34 @@ impl Disassembler {
|
||||
self.instructions.truncate(until);
|
||||
}
|
||||
|
||||
/// Decode instructions by matching byte signature to their mnemonics and
|
||||
fn decode_instructions(&mut self) -> Result<(), DisasmError> {
|
||||
while self.offset < self.aout.text.len() {
|
||||
self.decode_instruction()?;
|
||||
|
||||
// Advance offset to hover the next potential opcode
|
||||
self.offset += 1;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Decode an instruction by matching byte signature to their mnemonics and
|
||||
/// depending on the instruction, parsing some operands afterwards.
|
||||
/// All parsing is done in capsulated functions, here everything just
|
||||
/// gets consolodated.
|
||||
fn decode_instructions(&mut self) -> Result<(), DisasmError> {
|
||||
log::debug!("Starting to decode text of length {}", self.aout.text.len());
|
||||
while self.offset < self.aout.text.len() {
|
||||
pub fn decode_instruction(&mut self) -> Result<(), DisasmError> {
|
||||
// reset mutable current instruction
|
||||
self.instruction = Instruction::new();
|
||||
self.instruction.addr = self.offset;
|
||||
|
||||
// fetch next opcode
|
||||
let opcode = self.aout.text[self.offset];
|
||||
log::debug!("Parsing next opcode with opcode: {opcode:#04x}");
|
||||
|
||||
// additional raw bytes will be pushed by parse functions
|
||||
self.instruction.raw.push(opcode);
|
||||
|
||||
log::debug!("Parsing next opcode with opcode: {opcode:#04x}");
|
||||
self.instruction.opcode = match opcode {
|
||||
self.instruction.mnemonic = match opcode {
|
||||
0x00 => modrm_8b_register!(self, ADD_FromReg),
|
||||
0x01 => modrm_16b_register!(self, ADD_FromReg),
|
||||
0x02 => modrm_8b_register!(self, ADD_ToReg),
|
||||
@@ -901,13 +900,9 @@ impl Disassembler {
|
||||
};
|
||||
|
||||
// Save parsed instruction
|
||||
log::debug!("{}", self.instruction);
|
||||
log::debug!("Parsed {}", self.instruction);
|
||||
self.instructions.push(self.instruction.clone());
|
||||
|
||||
// Advance offset to hover the next potential opcode
|
||||
self.offset += 1;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ use core::fmt;
|
||||
pub struct Instruction {
|
||||
pub addr: usize, // location of the instruction start
|
||||
pub raw: Vec<u8>, // raw value of instruction
|
||||
pub opcode: Mnemonic, // actual instruction
|
||||
pub mnemonic: Mnemonic, // actual instruction
|
||||
}
|
||||
|
||||
impl Instruction {
|
||||
@@ -21,7 +21,7 @@ impl Instruction {
|
||||
Instruction {
|
||||
addr: 0,
|
||||
raw: Vec::new(),
|
||||
opcode: Mnemonic::NOP(),
|
||||
mnemonic: Mnemonic::NOP(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -41,7 +41,7 @@ impl fmt::Display for Instruction {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
write!(f, "\t{}", self.opcode)
|
||||
write!(f, "\t{}", self.mnemonic)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2,11 +2,13 @@ use core::fmt;
|
||||
use std::{fmt::Debug, process::exit};
|
||||
|
||||
use crate::{
|
||||
Args,
|
||||
aout::Aout,
|
||||
disasm::Disassembler,
|
||||
instructions::{Instruction, Mnemonic},
|
||||
interpreter::{
|
||||
computer::{CarryUsage, RotationDirection},
|
||||
interrupt::Mess1,
|
||||
register::SegmentRegister,
|
||||
},
|
||||
operands::{Byte, ImmediateOperand, ModRmTarget, Word},
|
||||
};
|
||||
@@ -16,12 +18,9 @@ use super::{
|
||||
interrupt::InterruptMessage,
|
||||
};
|
||||
|
||||
type InstructionPointer<'a> = std::slice::Iter<'a, Instruction>;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum InterpreterError {
|
||||
InvalidSyscall(Byte),
|
||||
InstructionNotFound(Word),
|
||||
MemoryOutOfBound(Word),
|
||||
}
|
||||
|
||||
@@ -31,9 +30,6 @@ impl fmt::Display for InterpreterError {
|
||||
InterpreterError::InvalidSyscall(id) => {
|
||||
write!(f, "The syscall with ID {} is unknown", id)
|
||||
}
|
||||
InterpreterError::InstructionNotFound(addr) => {
|
||||
write!(f, "IP({addr}) points at invalid instruction")
|
||||
}
|
||||
InterpreterError::MemoryOutOfBound(addr) => {
|
||||
write!(
|
||||
f,
|
||||
@@ -47,30 +43,47 @@ impl fmt::Display for InterpreterError {
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Interpreter {
|
||||
computer: Computer,
|
||||
instructions: Vec<Instruction>,
|
||||
text: Vec<u8>,
|
||||
ip: usize,
|
||||
disassembler: Disassembler,
|
||||
}
|
||||
|
||||
impl Interpreter {
|
||||
pub fn new(instructions: Vec<Instruction>, data: Vec<Byte>) -> Self {
|
||||
pub fn new(args: &Args) -> Self {
|
||||
let aout = Aout::new_from_args(args);
|
||||
Self {
|
||||
computer: Computer::new(data),
|
||||
instructions,
|
||||
computer: Computer::new(aout.data),
|
||||
text: aout.text,
|
||||
ip: 0,
|
||||
disassembler: Disassembler::new(args),
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets instruction pointer in compliance with [`Register::CS`].
|
||||
pub fn set_ip(&mut self, ip: usize) {
|
||||
self.ip = ip + (self.computer.sregs.cs * 16) as usize
|
||||
}
|
||||
|
||||
/// Gets instruction pointer in compliance with [`Register::CS`].
|
||||
pub fn get_ip(&self) -> usize {
|
||||
self.ip + (self.computer.sregs.cs * 16) as usize
|
||||
}
|
||||
|
||||
pub fn interpret(&mut self) -> Result<(), InterpreterError> {
|
||||
let mut ip = Self::find_instruction(&self.instructions, 0, &self.computer.sregs)
|
||||
.ok_or(InterpreterError::InstructionNotFound(0))?;
|
||||
while self.ip < self.text.len() {
|
||||
self.disassembler.offset = self.ip;
|
||||
// XXX remove unwrap
|
||||
self.disassembler.decode_instruction().unwrap();
|
||||
let current_instruction = self.disassembler.instruction.clone();
|
||||
|
||||
while let Some(cur_instr) = ip.next() {
|
||||
log::info!(
|
||||
"{} IP({:04x})\t {:<32}",
|
||||
self.computer,
|
||||
cur_instr.addr,
|
||||
cur_instr.opcode.to_string(),
|
||||
current_instruction.addr,
|
||||
current_instruction.mnemonic.to_string(),
|
||||
);
|
||||
|
||||
match cur_instr.opcode {
|
||||
match current_instruction.mnemonic {
|
||||
/*
|
||||
* ADD
|
||||
*/
|
||||
@@ -381,7 +394,7 @@ impl Interpreter {
|
||||
| Mnemonic::JMP_b(offset)
|
||||
| Mnemonic::JMP_v(offset) => {
|
||||
let flags = self.computer.flags.clone();
|
||||
let flag = match cur_instr.opcode {
|
||||
let flag = match current_instruction.mnemonic {
|
||||
Mnemonic::JO(_) => flags.of,
|
||||
Mnemonic::JNO(_) => !flags.of,
|
||||
Mnemonic::JB(_) => flags.cf,
|
||||
@@ -402,7 +415,8 @@ impl Interpreter {
|
||||
_ => panic!("unreachable"),
|
||||
};
|
||||
if flag {
|
||||
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
|
||||
self.set_ip(offset);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -411,66 +425,35 @@ impl Interpreter {
|
||||
*/
|
||||
Mnemonic::JMP_p(ptr) => {
|
||||
self.computer.sregs.cs = ptr.segment;
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
ptr.offset.into(),
|
||||
);
|
||||
self.set_ip(ptr.offset.into());
|
||||
continue;
|
||||
}
|
||||
Mnemonic::JMP_Mp(ptr) => {
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
ptr.word.into(),
|
||||
);
|
||||
self.set_ip(ptr.word.into());
|
||||
continue;
|
||||
}
|
||||
Mnemonic::JMP_Mod(target) => Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
self.computer.read_modrm(target)?.into(),
|
||||
),
|
||||
Mnemonic::JMP_Mod(target) => self.set_ip(self.computer.read_modrm(target)?.into()),
|
||||
Mnemonic::CALL_p(ptr) => {
|
||||
if let Some(next_instr) = ip.next() {
|
||||
self.computer.push_stack(next_instr.addr.into())?;
|
||||
}
|
||||
self.save_next_instruction_into_stack(¤t_instruction)?;
|
||||
|
||||
self.computer.sregs.cs = ptr.segment;
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
ptr.offset.into(),
|
||||
);
|
||||
self.set_ip(ptr.offset.into());
|
||||
continue;
|
||||
}
|
||||
Mnemonic::CALL_v(offset) => {
|
||||
if let Some(next_instr) = ip.next() {
|
||||
self.computer.push_stack(next_instr.addr.into())?;
|
||||
}
|
||||
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
|
||||
self.save_next_instruction_into_stack(¤t_instruction)?;
|
||||
self.set_ip(offset);
|
||||
continue;
|
||||
}
|
||||
Mnemonic::CALL_Mod(target) => {
|
||||
if let Some(next_instr) = ip.next() {
|
||||
self.computer.push_stack(next_instr.addr.into())?;
|
||||
}
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
self.computer.read_modrm(target)?.into(),
|
||||
);
|
||||
self.save_next_instruction_into_stack(¤t_instruction)?;
|
||||
self.set_ip(self.computer.read_modrm(target)?.into());
|
||||
continue;
|
||||
}
|
||||
Mnemonic::CALL_Mp(ptr) => {
|
||||
if let Some(next_instr) = ip.next() {
|
||||
self.computer.push_stack(next_instr.addr.into())?;
|
||||
}
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
ptr.word.into(),
|
||||
);
|
||||
self.save_next_instruction_into_stack(¤t_instruction)?;
|
||||
self.set_ip(ptr.word.into());
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -601,13 +584,9 @@ impl Interpreter {
|
||||
* RET
|
||||
*/
|
||||
Mnemonic::RET => {
|
||||
let offset = self.computer.pop_stack()?;
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
offset as usize,
|
||||
);
|
||||
let return_addr = self.computer.pop_stack()?;
|
||||
self.set_ip(return_addr as usize);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -771,6 +750,9 @@ impl Interpreter {
|
||||
}
|
||||
_ => log::info!("no action done"),
|
||||
}
|
||||
|
||||
// Go to next instruction
|
||||
self.ip += current_instruction.raw.len();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -824,31 +806,15 @@ impl Interpreter {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Find the starting addr of an instruction in the list of all parsed
|
||||
/// instructions and return the iterator to that matching instruction, to
|
||||
/// allow for further traversal from that point on.
|
||||
/// I bet, that this is not really fast, but I could'nt come up with a
|
||||
/// better idea so far.
|
||||
fn find_instruction<'a>(
|
||||
items: &'a Vec<Instruction>,
|
||||
ip_addr: usize,
|
||||
sregs: &SegmentRegister,
|
||||
) -> Option<InstructionPointer<'a>> {
|
||||
items
|
||||
.iter()
|
||||
.position(|instruction| instruction.addr == ip_addr + (sregs.cs * 16) as usize)
|
||||
.map(|index| items[index..].iter())
|
||||
}
|
||||
/// Used for CALL and JUMP instructions.
|
||||
fn save_next_instruction_into_stack(
|
||||
&mut self,
|
||||
current_instruction: &Instruction,
|
||||
) -> Result<(), InterpreterError> {
|
||||
let instruction_size_in_bytes = current_instruction.raw.len();
|
||||
self.computer
|
||||
.push_stack((self.get_ip() + instruction_size_in_bytes).into())?;
|
||||
|
||||
/// Jump [`InstructionPointer`] `ip` to an `offset`.
|
||||
fn ip_jump<'a>(
|
||||
instructions: &'a Vec<Instruction>,
|
||||
ip: &mut InstructionPointer<'a>,
|
||||
sregs: &SegmentRegister,
|
||||
offset: usize,
|
||||
) {
|
||||
if let Some(next_instr) = Self::find_instruction(&instructions, offset, sregs) {
|
||||
*ip = next_instr;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,15 +69,8 @@ fn main() {
|
||||
}
|
||||
}
|
||||
Command::Interpret => {
|
||||
let mut disasm = Disassembler::new(&args);
|
||||
let instructions = disasm.disassemble(args.dump);
|
||||
match instructions {
|
||||
Ok(instrs) => {
|
||||
let mut interpreter = Interpreter::new(instrs, disasm.aout.data);
|
||||
let mut interpreter = Interpreter::new(&args);
|
||||
interpreter.interpret().unwrap();
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user