fix(interpreter): impl fetch and decode

I parsed all instructions before executing, but this is not how
intel works.
We need to decode the instructions, pointed to by IP, on the fly.
This commit is contained in:
2025-07-01 12:04:20 +09:00
parent f3fd655908
commit a5cffa4852
5 changed files with 511 additions and 539 deletions

View File

@@ -1,9 +1,13 @@
//! Internal a.out File abstraction.
use core::fmt;
use std::ffi::{c_uchar, c_ushort};
use std::{
ffi::{c_uchar, c_ushort},
fs::File,
io::Read,
};
use crate::operands::Byte;
use crate::{Args, disasm::DisasmError, operands::Byte};
#[allow(non_camel_case_types)]
pub type c_long = i32; // we use a a.out with 32 byte
@@ -25,6 +29,20 @@ impl fmt::Display for Aout {
}
impl Aout {
pub fn new_from_args(args: &Args) -> Self {
let path = args
.path
.clone()
.ok_or(DisasmError::NoFile(args.path.clone()))
.unwrap();
let mut file = File::open(path).unwrap();
let mut buf = Vec::new();
file.read_to_end(&mut buf).unwrap();
let aout = Aout::new(buf);
log::debug!("{:?}", aout);
aout
}
pub fn new(buf: Vec<u8>) -> Self {
let hdr = Header {
magic: [buf[0], buf[1]],

View File

@@ -12,7 +12,6 @@ use crate::{
};
use crate::{modrm_8b_register, modrm_16b_register, modrm_sregister};
use core::fmt;
use std::{fs::File, io::Read};
#[derive(Debug, Clone)]
/// Select, wheter 8, or 16-bit Registers should be selected.
@@ -84,24 +83,15 @@ impl fmt::Display for DisasmError {
#[derive(Debug, Clone)]
pub struct Disassembler {
offset: usize, // the current offset in the disasm process
pub offset: usize, // the current offset in the disasm process
pub aout: Aout, // the aout binary
instruction: Instruction, // the instruction, which is currently being parsed
pub instruction: Instruction, // the instruction, which is currently being parsed
instructions: Vec<Instruction>, // all parsed instructions
}
impl Disassembler {
pub fn new(args: &Args) -> Self {
let path = args
.path
.clone()
.ok_or(DisasmError::NoFile(args.path.clone()))
.unwrap();
let mut file = File::open(path).unwrap();
let mut buf = Vec::new();
file.read_to_end(&mut buf).unwrap();
let aout = Aout::new(buf);
log::debug!("{:?}", aout);
let aout = Aout::new_from_args(args);
Disassembler {
offset: 0,
@@ -472,7 +462,7 @@ impl Disassembler {
fn remove_trailing_padding(&mut self) {
let mut until = self.instructions.len();
for i in self.instructions.iter().rev() {
match i.opcode {
match i.mnemonic {
// 0x00 0x00 in binary
Mnemonic::ADD_FromReg(
ModRmTarget::Memory(MemoryIndex {
@@ -493,25 +483,34 @@ impl Disassembler {
self.instructions.truncate(until);
}
/// Decode instructions by matching byte signature to their mnemonics and
fn decode_instructions(&mut self) -> Result<(), DisasmError> {
while self.offset < self.aout.text.len() {
self.decode_instruction()?;
// Advance offset to hover the next potential opcode
self.offset += 1;
}
Ok(())
}
/// Decode an instruction by matching byte signature to their mnemonics and
/// depending on the instruction, parsing some operands afterwards.
/// All parsing is done in capsulated functions, here everything just
/// gets consolodated.
fn decode_instructions(&mut self) -> Result<(), DisasmError> {
log::debug!("Starting to decode text of length {}", self.aout.text.len());
while self.offset < self.aout.text.len() {
pub fn decode_instruction(&mut self) -> Result<(), DisasmError> {
// reset mutable current instruction
self.instruction = Instruction::new();
self.instruction.addr = self.offset;
// fetch next opcode
let opcode = self.aout.text[self.offset];
log::debug!("Parsing next opcode with opcode: {opcode:#04x}");
// additional raw bytes will be pushed by parse functions
self.instruction.raw.push(opcode);
log::debug!("Parsing next opcode with opcode: {opcode:#04x}");
self.instruction.opcode = match opcode {
self.instruction.mnemonic = match opcode {
0x00 => modrm_8b_register!(self, ADD_FromReg),
0x01 => modrm_16b_register!(self, ADD_FromReg),
0x02 => modrm_8b_register!(self, ADD_ToReg),
@@ -901,13 +900,9 @@ impl Disassembler {
};
// Save parsed instruction
log::debug!("{}", self.instruction);
log::debug!("Parsed {}", self.instruction);
self.instructions.push(self.instruction.clone());
// Advance offset to hover the next potential opcode
self.offset += 1;
}
Ok(())
}
}

View File

@@ -13,7 +13,7 @@ use core::fmt;
pub struct Instruction {
pub addr: usize, // location of the instruction start
pub raw: Vec<u8>, // raw value of instruction
pub opcode: Mnemonic, // actual instruction
pub mnemonic: Mnemonic, // actual instruction
}
impl Instruction {
@@ -21,7 +21,7 @@ impl Instruction {
Instruction {
addr: 0,
raw: Vec::new(),
opcode: Mnemonic::NOP(),
mnemonic: Mnemonic::NOP(),
}
}
}
@@ -41,7 +41,7 @@ impl fmt::Display for Instruction {
)
.unwrap();
write!(f, "\t{}", self.opcode)
write!(f, "\t{}", self.mnemonic)
}
}

View File

@@ -2,11 +2,13 @@ use core::fmt;
use std::{fmt::Debug, process::exit};
use crate::{
Args,
aout::Aout,
disasm::Disassembler,
instructions::{Instruction, Mnemonic},
interpreter::{
computer::{CarryUsage, RotationDirection},
interrupt::Mess1,
register::SegmentRegister,
},
operands::{Byte, ImmediateOperand, ModRmTarget, Word},
};
@@ -16,12 +18,9 @@ use super::{
interrupt::InterruptMessage,
};
type InstructionPointer<'a> = std::slice::Iter<'a, Instruction>;
#[derive(Debug, Clone)]
pub enum InterpreterError {
InvalidSyscall(Byte),
InstructionNotFound(Word),
MemoryOutOfBound(Word),
}
@@ -31,9 +30,6 @@ impl fmt::Display for InterpreterError {
InterpreterError::InvalidSyscall(id) => {
write!(f, "The syscall with ID {} is unknown", id)
}
InterpreterError::InstructionNotFound(addr) => {
write!(f, "IP({addr}) points at invalid instruction")
}
InterpreterError::MemoryOutOfBound(addr) => {
write!(
f,
@@ -47,30 +43,47 @@ impl fmt::Display for InterpreterError {
#[derive(Debug, Clone)]
pub struct Interpreter {
computer: Computer,
instructions: Vec<Instruction>,
text: Vec<u8>,
ip: usize,
disassembler: Disassembler,
}
impl Interpreter {
pub fn new(instructions: Vec<Instruction>, data: Vec<Byte>) -> Self {
pub fn new(args: &Args) -> Self {
let aout = Aout::new_from_args(args);
Self {
computer: Computer::new(data),
instructions,
computer: Computer::new(aout.data),
text: aout.text,
ip: 0,
disassembler: Disassembler::new(args),
}
}
/// Sets instruction pointer in compliance with [`Register::CS`].
pub fn set_ip(&mut self, ip: usize) {
self.ip = ip + (self.computer.sregs.cs * 16) as usize
}
/// Gets instruction pointer in compliance with [`Register::CS`].
pub fn get_ip(&self) -> usize {
self.ip + (self.computer.sregs.cs * 16) as usize
}
pub fn interpret(&mut self) -> Result<(), InterpreterError> {
let mut ip = Self::find_instruction(&self.instructions, 0, &self.computer.sregs)
.ok_or(InterpreterError::InstructionNotFound(0))?;
while self.ip < self.text.len() {
self.disassembler.offset = self.ip;
// XXX remove unwrap
self.disassembler.decode_instruction().unwrap();
let current_instruction = self.disassembler.instruction.clone();
while let Some(cur_instr) = ip.next() {
log::info!(
"{} IP({:04x})\t {:<32}",
self.computer,
cur_instr.addr,
cur_instr.opcode.to_string(),
current_instruction.addr,
current_instruction.mnemonic.to_string(),
);
match cur_instr.opcode {
match current_instruction.mnemonic {
/*
* ADD
*/
@@ -381,7 +394,7 @@ impl Interpreter {
| Mnemonic::JMP_b(offset)
| Mnemonic::JMP_v(offset) => {
let flags = self.computer.flags.clone();
let flag = match cur_instr.opcode {
let flag = match current_instruction.mnemonic {
Mnemonic::JO(_) => flags.of,
Mnemonic::JNO(_) => !flags.of,
Mnemonic::JB(_) => flags.cf,
@@ -402,7 +415,8 @@ impl Interpreter {
_ => panic!("unreachable"),
};
if flag {
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
self.set_ip(offset);
continue;
}
}
@@ -411,66 +425,35 @@ impl Interpreter {
*/
Mnemonic::JMP_p(ptr) => {
self.computer.sregs.cs = ptr.segment;
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
ptr.offset.into(),
);
self.set_ip(ptr.offset.into());
continue;
}
Mnemonic::JMP_Mp(ptr) => {
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
ptr.word.into(),
);
self.set_ip(ptr.word.into());
continue;
}
Mnemonic::JMP_Mod(target) => Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
self.computer.read_modrm(target)?.into(),
),
Mnemonic::JMP_Mod(target) => self.set_ip(self.computer.read_modrm(target)?.into()),
Mnemonic::CALL_p(ptr) => {
if let Some(next_instr) = ip.next() {
self.computer.push_stack(next_instr.addr.into())?;
}
self.save_next_instruction_into_stack(&current_instruction)?;
self.computer.sregs.cs = ptr.segment;
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
ptr.offset.into(),
);
self.set_ip(ptr.offset.into());
continue;
}
Mnemonic::CALL_v(offset) => {
if let Some(next_instr) = ip.next() {
self.computer.push_stack(next_instr.addr.into())?;
}
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
self.save_next_instruction_into_stack(&current_instruction)?;
self.set_ip(offset);
continue;
}
Mnemonic::CALL_Mod(target) => {
if let Some(next_instr) = ip.next() {
self.computer.push_stack(next_instr.addr.into())?;
}
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
self.computer.read_modrm(target)?.into(),
);
self.save_next_instruction_into_stack(&current_instruction)?;
self.set_ip(self.computer.read_modrm(target)?.into());
continue;
}
Mnemonic::CALL_Mp(ptr) => {
if let Some(next_instr) = ip.next() {
self.computer.push_stack(next_instr.addr.into())?;
}
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
ptr.word.into(),
);
self.save_next_instruction_into_stack(&current_instruction)?;
self.set_ip(ptr.word.into());
continue;
}
/*
@@ -601,13 +584,9 @@ impl Interpreter {
* RET
*/
Mnemonic::RET => {
let offset = self.computer.pop_stack()?;
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
offset as usize,
);
let return_addr = self.computer.pop_stack()?;
self.set_ip(return_addr as usize);
continue;
}
/*
@@ -771,6 +750,9 @@ impl Interpreter {
}
_ => log::info!("no action done"),
}
// Go to next instruction
self.ip += current_instruction.raw.len();
}
Ok(())
@@ -824,31 +806,15 @@ impl Interpreter {
Ok(())
}
/// Find the starting addr of an instruction in the list of all parsed
/// instructions and return the iterator to that matching instruction, to
/// allow for further traversal from that point on.
/// I bet, that this is not really fast, but I could'nt come up with a
/// better idea so far.
fn find_instruction<'a>(
items: &'a Vec<Instruction>,
ip_addr: usize,
sregs: &SegmentRegister,
) -> Option<InstructionPointer<'a>> {
items
.iter()
.position(|instruction| instruction.addr == ip_addr + (sregs.cs * 16) as usize)
.map(|index| items[index..].iter())
}
/// Used for CALL and JUMP instructions.
fn save_next_instruction_into_stack(
&mut self,
current_instruction: &Instruction,
) -> Result<(), InterpreterError> {
let instruction_size_in_bytes = current_instruction.raw.len();
self.computer
.push_stack((self.get_ip() + instruction_size_in_bytes).into())?;
/// Jump [`InstructionPointer`] `ip` to an `offset`.
fn ip_jump<'a>(
instructions: &'a Vec<Instruction>,
ip: &mut InstructionPointer<'a>,
sregs: &SegmentRegister,
offset: usize,
) {
if let Some(next_instr) = Self::find_instruction(&instructions, offset, sregs) {
*ip = next_instr;
}
Ok(())
}
}

View File

@@ -69,15 +69,8 @@ fn main() {
}
}
Command::Interpret => {
let mut disasm = Disassembler::new(&args);
let instructions = disasm.disassemble(args.dump);
match instructions {
Ok(instrs) => {
let mut interpreter = Interpreter::new(instrs, disasm.aout.data);
let mut interpreter = Interpreter::new(&args);
interpreter.interpret().unwrap();
}
_ => {}
}
}
}
}