fix(interpreter): impl fetch and decode

I parsed all instructions before executing, but this is not how
intel works.
We need to decode the instructions, pointed to by IP, on the fly.
This commit is contained in:
2025-07-01 12:04:20 +09:00
parent f3fd655908
commit a5cffa4852
5 changed files with 511 additions and 539 deletions

View File

@@ -1,9 +1,13 @@
//! Internal a.out File abstraction. //! Internal a.out File abstraction.
use core::fmt; use core::fmt;
use std::ffi::{c_uchar, c_ushort}; use std::{
ffi::{c_uchar, c_ushort},
fs::File,
io::Read,
};
use crate::operands::Byte; use crate::{Args, disasm::DisasmError, operands::Byte};
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
pub type c_long = i32; // we use a a.out with 32 byte pub type c_long = i32; // we use a a.out with 32 byte
@@ -25,6 +29,20 @@ impl fmt::Display for Aout {
} }
impl Aout { impl Aout {
pub fn new_from_args(args: &Args) -> Self {
let path = args
.path
.clone()
.ok_or(DisasmError::NoFile(args.path.clone()))
.unwrap();
let mut file = File::open(path).unwrap();
let mut buf = Vec::new();
file.read_to_end(&mut buf).unwrap();
let aout = Aout::new(buf);
log::debug!("{:?}", aout);
aout
}
pub fn new(buf: Vec<u8>) -> Self { pub fn new(buf: Vec<u8>) -> Self {
let hdr = Header { let hdr = Header {
magic: [buf[0], buf[1]], magic: [buf[0], buf[1]],

View File

@@ -12,7 +12,6 @@ use crate::{
}; };
use crate::{modrm_8b_register, modrm_16b_register, modrm_sregister}; use crate::{modrm_8b_register, modrm_16b_register, modrm_sregister};
use core::fmt; use core::fmt;
use std::{fs::File, io::Read};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
/// Select, wheter 8, or 16-bit Registers should be selected. /// Select, wheter 8, or 16-bit Registers should be selected.
@@ -84,24 +83,15 @@ impl fmt::Display for DisasmError {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Disassembler { pub struct Disassembler {
offset: usize, // the current offset in the disasm process pub offset: usize, // the current offset in the disasm process
pub aout: Aout, // the aout binary pub aout: Aout, // the aout binary
instruction: Instruction, // the instruction, which is currently being parsed pub instruction: Instruction, // the instruction, which is currently being parsed
instructions: Vec<Instruction>, // all parsed instructions instructions: Vec<Instruction>, // all parsed instructions
} }
impl Disassembler { impl Disassembler {
pub fn new(args: &Args) -> Self { pub fn new(args: &Args) -> Self {
let path = args let aout = Aout::new_from_args(args);
.path
.clone()
.ok_or(DisasmError::NoFile(args.path.clone()))
.unwrap();
let mut file = File::open(path).unwrap();
let mut buf = Vec::new();
file.read_to_end(&mut buf).unwrap();
let aout = Aout::new(buf);
log::debug!("{:?}", aout);
Disassembler { Disassembler {
offset: 0, offset: 0,
@@ -472,7 +462,7 @@ impl Disassembler {
fn remove_trailing_padding(&mut self) { fn remove_trailing_padding(&mut self) {
let mut until = self.instructions.len(); let mut until = self.instructions.len();
for i in self.instructions.iter().rev() { for i in self.instructions.iter().rev() {
match i.opcode { match i.mnemonic {
// 0x00 0x00 in binary // 0x00 0x00 in binary
Mnemonic::ADD_FromReg( Mnemonic::ADD_FromReg(
ModRmTarget::Memory(MemoryIndex { ModRmTarget::Memory(MemoryIndex {
@@ -493,25 +483,34 @@ impl Disassembler {
self.instructions.truncate(until); self.instructions.truncate(until);
} }
/// Decode instructions by matching byte signature to their mnemonics and fn decode_instructions(&mut self) -> Result<(), DisasmError> {
while self.offset < self.aout.text.len() {
self.decode_instruction()?;
// Advance offset to hover the next potential opcode
self.offset += 1;
}
Ok(())
}
/// Decode an instruction by matching byte signature to their mnemonics and
/// depending on the instruction, parsing some operands afterwards. /// depending on the instruction, parsing some operands afterwards.
/// All parsing is done in capsulated functions, here everything just /// All parsing is done in capsulated functions, here everything just
/// gets consolodated. /// gets consolodated.
fn decode_instructions(&mut self) -> Result<(), DisasmError> { pub fn decode_instruction(&mut self) -> Result<(), DisasmError> {
log::debug!("Starting to decode text of length {}", self.aout.text.len());
while self.offset < self.aout.text.len() {
// reset mutable current instruction // reset mutable current instruction
self.instruction = Instruction::new(); self.instruction = Instruction::new();
self.instruction.addr = self.offset; self.instruction.addr = self.offset;
// fetch next opcode // fetch next opcode
let opcode = self.aout.text[self.offset]; let opcode = self.aout.text[self.offset];
log::debug!("Parsing next opcode with opcode: {opcode:#04x}");
// additional raw bytes will be pushed by parse functions // additional raw bytes will be pushed by parse functions
self.instruction.raw.push(opcode); self.instruction.raw.push(opcode);
log::debug!("Parsing next opcode with opcode: {opcode:#04x}"); self.instruction.mnemonic = match opcode {
self.instruction.opcode = match opcode {
0x00 => modrm_8b_register!(self, ADD_FromReg), 0x00 => modrm_8b_register!(self, ADD_FromReg),
0x01 => modrm_16b_register!(self, ADD_FromReg), 0x01 => modrm_16b_register!(self, ADD_FromReg),
0x02 => modrm_8b_register!(self, ADD_ToReg), 0x02 => modrm_8b_register!(self, ADD_ToReg),
@@ -901,13 +900,9 @@ impl Disassembler {
}; };
// Save parsed instruction // Save parsed instruction
log::debug!("{}", self.instruction); log::debug!("Parsed {}", self.instruction);
self.instructions.push(self.instruction.clone()); self.instructions.push(self.instruction.clone());
// Advance offset to hover the next potential opcode
self.offset += 1;
}
Ok(()) Ok(())
} }
} }

View File

@@ -13,7 +13,7 @@ use core::fmt;
pub struct Instruction { pub struct Instruction {
pub addr: usize, // location of the instruction start pub addr: usize, // location of the instruction start
pub raw: Vec<u8>, // raw value of instruction pub raw: Vec<u8>, // raw value of instruction
pub opcode: Mnemonic, // actual instruction pub mnemonic: Mnemonic, // actual instruction
} }
impl Instruction { impl Instruction {
@@ -21,7 +21,7 @@ impl Instruction {
Instruction { Instruction {
addr: 0, addr: 0,
raw: Vec::new(), raw: Vec::new(),
opcode: Mnemonic::NOP(), mnemonic: Mnemonic::NOP(),
} }
} }
} }
@@ -41,7 +41,7 @@ impl fmt::Display for Instruction {
) )
.unwrap(); .unwrap();
write!(f, "\t{}", self.opcode) write!(f, "\t{}", self.mnemonic)
} }
} }

View File

@@ -2,11 +2,13 @@ use core::fmt;
use std::{fmt::Debug, process::exit}; use std::{fmt::Debug, process::exit};
use crate::{ use crate::{
Args,
aout::Aout,
disasm::Disassembler,
instructions::{Instruction, Mnemonic}, instructions::{Instruction, Mnemonic},
interpreter::{ interpreter::{
computer::{CarryUsage, RotationDirection}, computer::{CarryUsage, RotationDirection},
interrupt::Mess1, interrupt::Mess1,
register::SegmentRegister,
}, },
operands::{Byte, ImmediateOperand, ModRmTarget, Word}, operands::{Byte, ImmediateOperand, ModRmTarget, Word},
}; };
@@ -16,12 +18,9 @@ use super::{
interrupt::InterruptMessage, interrupt::InterruptMessage,
}; };
type InstructionPointer<'a> = std::slice::Iter<'a, Instruction>;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum InterpreterError { pub enum InterpreterError {
InvalidSyscall(Byte), InvalidSyscall(Byte),
InstructionNotFound(Word),
MemoryOutOfBound(Word), MemoryOutOfBound(Word),
} }
@@ -31,9 +30,6 @@ impl fmt::Display for InterpreterError {
InterpreterError::InvalidSyscall(id) => { InterpreterError::InvalidSyscall(id) => {
write!(f, "The syscall with ID {} is unknown", id) write!(f, "The syscall with ID {} is unknown", id)
} }
InterpreterError::InstructionNotFound(addr) => {
write!(f, "IP({addr}) points at invalid instruction")
}
InterpreterError::MemoryOutOfBound(addr) => { InterpreterError::MemoryOutOfBound(addr) => {
write!( write!(
f, f,
@@ -47,30 +43,47 @@ impl fmt::Display for InterpreterError {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Interpreter { pub struct Interpreter {
computer: Computer, computer: Computer,
instructions: Vec<Instruction>, text: Vec<u8>,
ip: usize,
disassembler: Disassembler,
} }
impl Interpreter { impl Interpreter {
pub fn new(instructions: Vec<Instruction>, data: Vec<Byte>) -> Self { pub fn new(args: &Args) -> Self {
let aout = Aout::new_from_args(args);
Self { Self {
computer: Computer::new(data), computer: Computer::new(aout.data),
instructions, text: aout.text,
ip: 0,
disassembler: Disassembler::new(args),
} }
} }
/// Sets instruction pointer in compliance with [`Register::CS`].
pub fn set_ip(&mut self, ip: usize) {
self.ip = ip + (self.computer.sregs.cs * 16) as usize
}
/// Gets instruction pointer in compliance with [`Register::CS`].
pub fn get_ip(&self) -> usize {
self.ip + (self.computer.sregs.cs * 16) as usize
}
pub fn interpret(&mut self) -> Result<(), InterpreterError> { pub fn interpret(&mut self) -> Result<(), InterpreterError> {
let mut ip = Self::find_instruction(&self.instructions, 0, &self.computer.sregs) while self.ip < self.text.len() {
.ok_or(InterpreterError::InstructionNotFound(0))?; self.disassembler.offset = self.ip;
// XXX remove unwrap
self.disassembler.decode_instruction().unwrap();
let current_instruction = self.disassembler.instruction.clone();
while let Some(cur_instr) = ip.next() {
log::info!( log::info!(
"{} IP({:04x})\t {:<32}", "{} IP({:04x})\t {:<32}",
self.computer, self.computer,
cur_instr.addr, current_instruction.addr,
cur_instr.opcode.to_string(), current_instruction.mnemonic.to_string(),
); );
match cur_instr.opcode { match current_instruction.mnemonic {
/* /*
* ADD * ADD
*/ */
@@ -381,7 +394,7 @@ impl Interpreter {
| Mnemonic::JMP_b(offset) | Mnemonic::JMP_b(offset)
| Mnemonic::JMP_v(offset) => { | Mnemonic::JMP_v(offset) => {
let flags = self.computer.flags.clone(); let flags = self.computer.flags.clone();
let flag = match cur_instr.opcode { let flag = match current_instruction.mnemonic {
Mnemonic::JO(_) => flags.of, Mnemonic::JO(_) => flags.of,
Mnemonic::JNO(_) => !flags.of, Mnemonic::JNO(_) => !flags.of,
Mnemonic::JB(_) => flags.cf, Mnemonic::JB(_) => flags.cf,
@@ -402,7 +415,8 @@ impl Interpreter {
_ => panic!("unreachable"), _ => panic!("unreachable"),
}; };
if flag { if flag {
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset); self.set_ip(offset);
continue;
} }
} }
@@ -411,66 +425,35 @@ impl Interpreter {
*/ */
Mnemonic::JMP_p(ptr) => { Mnemonic::JMP_p(ptr) => {
self.computer.sregs.cs = ptr.segment; self.computer.sregs.cs = ptr.segment;
Self::ip_jump( self.set_ip(ptr.offset.into());
&self.instructions, continue;
&mut ip,
&self.computer.sregs,
ptr.offset.into(),
);
} }
Mnemonic::JMP_Mp(ptr) => { Mnemonic::JMP_Mp(ptr) => {
Self::ip_jump( self.set_ip(ptr.word.into());
&self.instructions, continue;
&mut ip,
&self.computer.sregs,
ptr.word.into(),
);
} }
Mnemonic::JMP_Mod(target) => Self::ip_jump( Mnemonic::JMP_Mod(target) => self.set_ip(self.computer.read_modrm(target)?.into()),
&self.instructions,
&mut ip,
&self.computer.sregs,
self.computer.read_modrm(target)?.into(),
),
Mnemonic::CALL_p(ptr) => { Mnemonic::CALL_p(ptr) => {
if let Some(next_instr) = ip.next() { self.save_next_instruction_into_stack(&current_instruction)?;
self.computer.push_stack(next_instr.addr.into())?;
}
self.computer.sregs.cs = ptr.segment; self.computer.sregs.cs = ptr.segment;
Self::ip_jump( self.set_ip(ptr.offset.into());
&self.instructions, continue;
&mut ip,
&self.computer.sregs,
ptr.offset.into(),
);
} }
Mnemonic::CALL_v(offset) => { Mnemonic::CALL_v(offset) => {
if let Some(next_instr) = ip.next() { self.save_next_instruction_into_stack(&current_instruction)?;
self.computer.push_stack(next_instr.addr.into())?; self.set_ip(offset);
} continue;
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
} }
Mnemonic::CALL_Mod(target) => { Mnemonic::CALL_Mod(target) => {
if let Some(next_instr) = ip.next() { self.save_next_instruction_into_stack(&current_instruction)?;
self.computer.push_stack(next_instr.addr.into())?; self.set_ip(self.computer.read_modrm(target)?.into());
} continue;
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
self.computer.read_modrm(target)?.into(),
);
} }
Mnemonic::CALL_Mp(ptr) => { Mnemonic::CALL_Mp(ptr) => {
if let Some(next_instr) = ip.next() { self.save_next_instruction_into_stack(&current_instruction)?;
self.computer.push_stack(next_instr.addr.into())?; self.set_ip(ptr.word.into());
} continue;
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
ptr.word.into(),
);
} }
/* /*
@@ -601,13 +584,9 @@ impl Interpreter {
* RET * RET
*/ */
Mnemonic::RET => { Mnemonic::RET => {
let offset = self.computer.pop_stack()?; let return_addr = self.computer.pop_stack()?;
Self::ip_jump( self.set_ip(return_addr as usize);
&self.instructions, continue;
&mut ip,
&self.computer.sregs,
offset as usize,
);
} }
/* /*
@@ -771,6 +750,9 @@ impl Interpreter {
} }
_ => log::info!("no action done"), _ => log::info!("no action done"),
} }
// Go to next instruction
self.ip += current_instruction.raw.len();
} }
Ok(()) Ok(())
@@ -824,31 +806,15 @@ impl Interpreter {
Ok(()) Ok(())
} }
/// Find the starting addr of an instruction in the list of all parsed /// Used for CALL and JUMP instructions.
/// instructions and return the iterator to that matching instruction, to fn save_next_instruction_into_stack(
/// allow for further traversal from that point on. &mut self,
/// I bet, that this is not really fast, but I could'nt come up with a current_instruction: &Instruction,
/// better idea so far. ) -> Result<(), InterpreterError> {
fn find_instruction<'a>( let instruction_size_in_bytes = current_instruction.raw.len();
items: &'a Vec<Instruction>, self.computer
ip_addr: usize, .push_stack((self.get_ip() + instruction_size_in_bytes).into())?;
sregs: &SegmentRegister,
) -> Option<InstructionPointer<'a>> {
items
.iter()
.position(|instruction| instruction.addr == ip_addr + (sregs.cs * 16) as usize)
.map(|index| items[index..].iter())
}
/// Jump [`InstructionPointer`] `ip` to an `offset`. Ok(())
fn ip_jump<'a>(
instructions: &'a Vec<Instruction>,
ip: &mut InstructionPointer<'a>,
sregs: &SegmentRegister,
offset: usize,
) {
if let Some(next_instr) = Self::find_instruction(&instructions, offset, sregs) {
*ip = next_instr;
}
} }
} }

View File

@@ -69,15 +69,8 @@ fn main() {
} }
} }
Command::Interpret => { Command::Interpret => {
let mut disasm = Disassembler::new(&args); let mut interpreter = Interpreter::new(&args);
let instructions = disasm.disassemble(args.dump);
match instructions {
Ok(instrs) => {
let mut interpreter = Interpreter::new(instrs, disasm.aout.data);
interpreter.interpret().unwrap(); interpreter.interpret().unwrap();
} }
_ => {}
}
}
} }
} }