ft: initial work in interpreter

This commit is contained in:
2025-06-03 21:31:28 +09:00
parent 5ee80c9364
commit ac69d75273
8 changed files with 344 additions and 51 deletions

View File

@@ -8,12 +8,12 @@ use crate::operands::{Byte, Word};
#[allow(non_camel_case_types)] #[allow(non_camel_case_types)]
pub type c_long = i32; // we use a a.out with 32 byte pub type c_long = i32; // we use a a.out with 32 byte
#[derive(Debug)] #[derive(Debug, Clone)]
/// Internal representation of the a.out binary format. /// Internal representation of the a.out binary format.
pub struct Aout { pub struct Aout {
pub header: Header, pub header: Header,
pub text: Vec<Byte>, pub text: Vec<Byte>,
pub data: Vec<Word>, pub data: Vec<Byte>,
} }
impl fmt::Display for Aout { impl fmt::Display for Aout {
@@ -48,20 +48,16 @@ impl Aout {
let text_section = &buf[text_start..text_end]; let text_section = &buf[text_start..text_end];
let data_section = &buf[data_start..data_end]; let data_section = &buf[data_start..data_end];
let data_words: Vec<Word> = data_section
.chunks_exact(2)
.map(|chunk| u16::from_le_bytes(chunk.try_into().unwrap()))
.collect();
Aout { Aout {
header: hdr, header: hdr,
text: Vec::from(text_section), text: Vec::from(text_section),
data: Vec::from(data_words), data: Vec::from(data_section),
} }
} }
} }
#[derive(Debug)] #[derive(Debug, Clone)]
pub struct Header { pub struct Header {
pub magic: [c_uchar; 2], // magic number pub magic: [c_uchar; 2], // magic number
pub flags: c_uchar, // flags, see below pub flags: c_uchar, // flags, see below

View File

@@ -77,10 +77,10 @@ impl fmt::Display for DisasmError {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Disassembler { pub struct Disassembler {
pub offset: usize, // the current offset in the disasm process offset: usize, // the current offset in the disasm process
pub text: Vec<u8>, // the aout binary pub aout: Aout, // the aout binary
pub instruction: Instruction, // the instruction, which is currently being parsed instruction: Instruction, // the instruction, which is currently being parsed
pub instructions: Vec<Instruction>, // all parsed instructions instructions: Vec<Instruction>, // all parsed instructions
} }
impl Disassembler { impl Disassembler {
@@ -98,7 +98,7 @@ impl Disassembler {
Disassembler { Disassembler {
offset: 0, offset: 0,
text: aout.text, aout,
instruction: Instruction::new(), instruction: Instruction::new(),
instructions: Vec::new(), instructions: Vec::new(),
} }
@@ -147,9 +147,9 @@ impl Disassembler {
fn parse_byte(&mut self) -> Result<Byte, DisasmError> { fn parse_byte(&mut self) -> Result<Byte, DisasmError> {
log::debug!("Attempting to parse byte at {:#04x} ...", self.offset); log::debug!("Attempting to parse byte at {:#04x} ...", self.offset);
// check if the byte would be out of bounds // check if the byte would be out of bounds
if self.offset + 1 == self.text.len() { if self.offset + 1 == self.aout.text.len() {
// check if text section ends with single 0x00 padding byte // check if text section ends with single 0x00 padding byte
if self.text[self.offset] == 0 { if self.aout.text[self.offset] == 0 {
return Err(DisasmError::EndOfTextSection); return Err(DisasmError::EndOfTextSection);
// else its just an out of bounds read // else its just an out of bounds read
} else { } else {
@@ -161,6 +161,7 @@ impl Disassembler {
} }
let byte = self let byte = self
.aout
.text .text
.get(self.offset) .get(self.offset)
.ok_or(DisasmError::ReadBeyondTextSection)?; .ok_or(DisasmError::ReadBeyondTextSection)?;
@@ -472,7 +473,7 @@ impl Disassembler {
} }
log::debug!( log::debug!(
"Truncated file by {} bytes by removing trailing padding bytes.", "Truncated file by {} bytes by removing trailing padding bytes.",
self.text.len() - until self.aout.text.len() - until
); );
self.instructions.truncate(until); self.instructions.truncate(until);
} }
@@ -482,14 +483,14 @@ impl Disassembler {
/// All parsing is done in capsulated functions, here everything just /// All parsing is done in capsulated functions, here everything just
/// gets consolodated. /// gets consolodated.
fn decode_instructions(&mut self) -> Result<(), DisasmError> { fn decode_instructions(&mut self) -> Result<(), DisasmError> {
log::debug!("Starting to decode text of length {}", self.text.len()); log::debug!("Starting to decode text of length {}", self.aout.text.len());
while self.offset < self.text.len() { while self.offset < self.aout.text.len() {
// reset mutable current instruction // reset mutable current instruction
self.instruction = Instruction::new(); self.instruction = Instruction::new();
self.instruction.start = self.offset; self.instruction.start = self.offset;
// fetch next opcode // fetch next opcode
let opcode = self.text[self.offset]; let opcode = self.aout.text[self.offset];
// additional raw bytes will be pushed by parse functions // additional raw bytes will be pushed by parse functions
self.instruction.raw.push(opcode); self.instruction.raw.push(opcode);
@@ -896,35 +897,35 @@ impl Disassembler {
} }
} }
#[cfg(test)] // #[cfg(test)]
mod tests { // mod tests {
use super::*; // use super::*;
#[test] // #[test]
fn test_basic() { // fn test_basic() {
let text = Vec::from([0x0, 0x0]); // let text = Vec::from([0x0, 0x0]);
let mut disassembler = Disassembler { // let mut disassembler = Disassembler {
offset: 0, // offset: 0,
text, // text,
instruction: Instruction::new(), // instruction: Instruction::new(),
instructions: Vec::new(), // instructions: Vec::new(),
}; // };
disassembler.decode_instructions().unwrap(); // disassembler.decode_instructions().unwrap();
let instructions = disassembler.instructions; // let instructions = disassembler.instructions;
assert_eq!( // assert_eq!(
instructions[0], // instructions[0],
Instruction { // Instruction {
start: 0, // start: 0,
raw: Vec::from([0, 0]), // raw: Vec::from([0, 0]),
opcode: Mnemonic::ADD_FromReg( // opcode: Mnemonic::ADD_FromReg(
ModRmTarget::Memory(MemoryIndex { // ModRmTarget::Memory(MemoryIndex {
base: Some(Register::BX), // base: Some(Register::BX),
index: Some(Register::SI), // index: Some(Register::SI),
displacement: None // displacement: None
}), // }),
Register::AL // Register::AL
) // )
} // }
) // )
} // }
} // }

View File

@@ -0,0 +1,28 @@
use core::fmt;
use crate::operands::Byte;
use super::{flags::Flags, register::Register};
#[derive(Debug, Clone)]
pub struct Computer {
pub regs: Register,
pub flags: Flags,
pub memory: [Byte; 65536],
}
impl Computer {
pub fn new() -> Self {
Self {
regs: Register::new(),
flags: Flags::new(),
memory: [0; 65536],
}
}
}
impl fmt::Display for Computer {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} | {}", self.regs, self.flags)
}
}

48
src/interpreter/flags.rs Normal file
View File

@@ -0,0 +1,48 @@
use core::fmt;
#[derive(Debug, Clone)]
pub struct Flags {
pub of: bool,
pub df: bool,
pub r#if: bool,
pub tf: bool,
pub sf: bool,
pub zf: bool,
pub nf: bool,
pub pf: bool,
pub cf: bool,
}
impl Flags {
pub fn new() -> Self {
Self {
of: false,
df: false,
r#if: false,
tf: false,
sf: false,
zf: false,
nf: false,
pf: false,
cf: false,
}
}
}
impl fmt::Display for Flags {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"OF({}) DF({}) IF({}) TF({}) SF({}) ZF({}) NF({}) PF({}) CF({})",
self.of as i32,
self.df as i32,
self.r#if as i32,
self.tf as i32,
self.sf as i32,
self.zf as i32,
self.nf as i32,
self.pf as i32,
self.cf as i32,
)
}
}

View File

@@ -0,0 +1,129 @@
use core::fmt;
use std::{fmt::Debug, process::exit};
use crate::{
instructions::{Instruction, Mnemonic},
operands::{Byte, Word},
};
use super::computer::Computer;
#[derive(Debug, Clone)]
pub enum InterpreterError {
EndOfData,
InvalidSyscall(u8),
}
impl fmt::Display for InterpreterError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
InterpreterError::EndOfData => write!(f, "Read beyond the available data section"),
InterpreterError::InvalidSyscall(id) => {
write!(f, "The syscall with ID {} is unknown", id)
}
}
}
}
#[derive(Debug, Clone)]
pub struct Interpreter {
computer: Computer,
instructions: Vec<Instruction>,
data: Vec<Byte>,
}
impl Interpreter {
pub fn new(instructions: Vec<Instruction>, data: Vec<Byte>) -> Self {
Self {
computer: Computer::new(),
instructions,
data,
}
}
pub fn interpret(&mut self) -> Result<(), InterpreterError> {
for instr in self.instructions.iter() {
log::info!(
"IP({:04x})\t {:<15} | {}",
instr.start,
instr.opcode.to_string(),
self.computer
);
match instr.opcode {
Mnemonic::MOV_BXIv(word) => self.computer.regs.bx.write(word),
Mnemonic::INT(id) => self.handle_int(id)?,
_ => todo!(),
}
}
Ok(())
}
fn handle_int(&self, id: u8) -> Result<(), InterpreterError> {
let bx = self.computer.regs.bx.read() as usize;
// a message is always 8 words aligned
let len = 2 * 8;
let data = self
.data
.get(bx..bx + len)
.ok_or(InterpreterError::EndOfData)?
.to_owned();
let interrupt_data = InterruptData::new(data);
// simulate interrupt handler code of MINIX
match id {
// sofware interrupts
0x20 => {
match interrupt_data.interrupt_id {
0x04 => {
let fd = interrupt_data.m_type;
let location = interrupt_data.data_position;
let len = interrupt_data.count;
log::info!("executing write({}, {}, {})", fd, location, len);
for byte in &self.data[location as usize..] {
if *byte == 0x00 {
break;
} else {
print!("{}", *byte as char);
}
}
}
0x01 => {
let exit_code = interrupt_data.data_position;
log::info!("executing exit({})", exit_code);
exit(exit_code.into())
}
_ => todo!(),
};
}
_ => return Err(InterpreterError::InvalidSyscall(id)),
}
Ok(())
}
}
#[derive(Debug, Clone)]
// https://cse.unl.edu/~goddard/Courses/CSCE351/Lectures/Lecture8.pdf
pub struct InterruptData {
pub m_type: Word, // Operation requested
pub interrupt_id: Word, // Minor device to use
pub proc_nr: Word, // Process requesting the I/O
pub count: Word, // Word count or ioctl code
pub position: Word, // Position on device
pub data_position: Word, // Minor device to use
}
impl InterruptData {
pub fn new(data: Vec<u8>) -> Self {
Self {
m_type: Word::from_le_bytes([data[0], data[1]]),
interrupt_id: Word::from_le_bytes([data[2], data[3]]),
proc_nr: Word::from_le_bytes([data[4], data[5]]),
count: Word::from_le_bytes([data[6], data[7]]),
position: Word::from_le_bytes([data[8], data[9]]),
data_position: Word::from_le_bytes([data[10], data[11]]),
}
}
}

4
src/interpreter/mod.rs Normal file
View File

@@ -0,0 +1,4 @@
mod computer;
mod flags;
pub mod interpreter;
mod register;

View File

@@ -0,0 +1,75 @@
use crate::operands::{Byte, Word};
use core::fmt;
#[derive(Debug, Clone, Copy)]
pub struct Register {
pub ax: AX,
pub bx: BX,
pub cx: CX,
pub dx: DX,
pub sp: Word,
pub bp: Word,
pub si: Word,
pub di: Word,
}
impl Register {
pub fn new() -> Self {
Self {
ax: AX::new(),
bx: BX::new(),
cx: CX::new(),
dx: DX::new(),
sp: 0,
bp: 0,
si: 0,
di: 0,
}
}
}
impl fmt::Display for Register {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"AX({}) BX({}) CX({}) DX({}) SP({:04x}) BP({:04x}) SI({:04x}) DI({:04x})",
self.ax, self.bx, self.cx, self.dx, self.sp, self.bp, self.si, self.di
)
}
}
macro_rules! gen_regs {
($ident:ident) => {
#[derive(Debug, Clone, Copy)]
pub struct $ident {
upper: Byte,
lower: Byte,
}
impl $ident {
pub fn new() -> Self {
Self { upper: 0, lower: 0 }
}
pub fn read(self) -> Word {
Word::from_le_bytes([self.lower, self.upper])
}
pub fn write(&mut self, word: Word) {
let [low, high]: [u8; 2] = word.to_le_bytes();
self.lower = low;
self.upper = high;
}
}
impl fmt::Display for $ident {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:04x}", Word::from_le_bytes([self.lower, self.upper]))
}
}
};
}
gen_regs!(AX);
gen_regs!(BX);
gen_regs!(CX);
gen_regs!(DX);

View File

@@ -1,10 +1,12 @@
use clap::{Parser, Subcommand}; use clap::{Parser, Subcommand};
use disasm::Disassembler; use disasm::Disassembler;
use interpreter::interpreter::Interpreter;
mod aout; mod aout;
mod disasm; mod disasm;
mod disasm_macros; mod disasm_macros;
mod instructions; mod instructions;
mod interpreter;
mod operands; mod operands;
mod register; mod register;
@@ -49,6 +51,16 @@ fn main() {
_ => {} _ => {}
} }
} }
_ => panic!("Command not yet implemented"), Command::Interpret => {
let mut disasm = Disassembler::new(&args);
let instructions = disasm.disassemble(args.dump);
match instructions {
Ok(instrs) => {
let mut interpreter = Interpreter::new(instrs, disasm.aout.data);
interpreter.interpret().unwrap();
}
_ => {}
}
}
} }
} }