ft(interpreter): impl far jumps with correct CS addressing

This commit is contained in:
2025-06-18 16:41:49 +09:00
parent 6678a1ef4a
commit 4aeacc649a
8 changed files with 116 additions and 30 deletions

View File

@@ -49,9 +49,13 @@ This project is under active development and primarily used by me to explore som
Expect bugs and some missing features.
I mainly test with 'official' binaries from the MINIX source tree.
Currently, everything is in the binary, but I want to move some parts to a lib, which would make it much easier to ignore the Minix 1.x specifics and would allow for more generic usage of this 8086 (e.g. implenting an own simple BIOS or OS).
E.g. currently the interrupt handler is hardcoded to support only Minix 1.x interrupts.
But first I want to implement all features correctly and add tests for all of them, before I want to move to that.
Currently, everything is in the binary, but I want to move some parts to a lib, which would make it much easier to ignore the Minix 1.x specifics (e.g. currently with a hardcoded interrupt handler) and would allow for more generic usage of this 8086 (e.g. implenting an own simple BIOS or OS).
But first I want to implement all features correctly and add tests for all of them, before I want to move to that.
## Caveats
Interpreted code is disassembled into a Vector, which will also be used for execution.
This means, that the code is not actually loaded into memory, but the `CS:IP` addressing scheme is still being used.
## Documentation
@@ -62,7 +66,9 @@ $ cargo doc
$ firefox target/doc/8086_rs/index.html
```
For the implementation of all instructions I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix.
For the implementation of the disassembly, I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix.
For the implementation of the interpreter, I used the Intel "Intel® 64 and IA-32 Architectures Software Developers Manual Volume 2 (2A, 2B, 2C & 2D): Instruction Set Reference, A-Z" Spec.
## FAQ

View File

@@ -502,7 +502,7 @@ impl Disassembler {
while self.offset < self.aout.text.len() {
// reset mutable current instruction
self.instruction = Instruction::new();
self.instruction.start = self.offset;
self.instruction.addr = self.offset;
// fetch next opcode
let opcode = self.aout.text[self.offset];

View File

@@ -11,7 +11,7 @@ use core::fmt;
/// contains the `Mnemonic` that will be executed, alongside its starting offset
/// and the raw parsed bytes
pub struct Instruction {
pub start: usize, // location of the instruction start
pub addr: usize, // location of the instruction start
pub raw: Vec<u8>, // raw value of instruction
pub opcode: Mnemonic, // actual instruction
}
@@ -19,7 +19,7 @@ pub struct Instruction {
impl Instruction {
pub fn new() -> Self {
Instruction {
start: 0,
addr: 0,
raw: Vec::new(),
opcode: Mnemonic::NOP(),
}
@@ -28,7 +28,7 @@ impl Instruction {
impl fmt::Display for Instruction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:04x}:\t", self.start).unwrap();
write!(f, "{:04x}:\t", self.addr).unwrap();
write!(
f,

View File

@@ -2,7 +2,10 @@ use core::fmt;
use crate::operands::{ImmediateOperand, ModRmTarget, Word};
use super::{flags::Flags, interpreter::InterpreterError, memory::Memory, register::Register};
use super::{
flags::Flags, interpreter::InterpreterError, memory::Memory, register::Register,
register::SegmentRegister,
};
/// Wrapper for easier argument passing of polymorph arithmetic operations.
#[derive(Debug, Clone)]
@@ -18,6 +21,7 @@ type Rhs = ImmediateOperand;
#[derive(Debug, Clone)]
pub struct Computer {
pub regs: Register,
pub sregs: SegmentRegister,
pub flags: Flags,
pub memory: Memory,
}
@@ -26,6 +30,7 @@ impl Computer {
pub fn new() -> Self {
Self {
regs: Register::new(),
sregs: SegmentRegister::new(),
flags: Flags::new(),
memory: Memory::new(),
}
@@ -280,7 +285,7 @@ pub enum CarryUsage {
impl fmt::Display for Computer {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} {}", self.regs, self.flags)
write!(f, "{} {} {}", self.regs, self.sregs, self.flags)
}
}

View File

@@ -7,6 +7,7 @@ use crate::{
computer::{CarryUsage, RotationDirection},
interrupt::Mess1,
memory::Memory,
register::SegmentRegister,
},
operands::{Byte, ImmediateOperand, ModRmTarget, Word},
};
@@ -67,14 +68,14 @@ impl Interpreter {
}
pub fn interpret(&mut self) -> Result<(), InterpreterError> {
let mut ip = Self::find_instruction(&self.instructions, 0)
let mut ip = Self::find_instruction(&self.instructions, 0, &self.computer.sregs)
.ok_or(InterpreterError::InstructionNotFound(0))?;
while let Some(cur_instr) = ip.next() {
log::info!(
"{} IP({:04x})\t {:<32}",
self.computer,
cur_instr.start,
cur_instr.addr,
cur_instr.opcode.to_string(),
);
@@ -400,49 +401,84 @@ impl Interpreter {
_ => panic!("unreachable"),
};
if flag {
Self::ip_jump(&self.instructions, &mut ip, offset);
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
}
}
/*
* Long jumps and calls
*/
Mnemonic::JMP_p(_) => {
todo!()
Mnemonic::JMP_p(ptr) => {
self.computer.sregs.cs = ptr.segment;
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
ptr.offset.into(),
);
}
Mnemonic::JMP_Mp(_) => {
todo!()
Mnemonic::JMP_Mp(ptr) => {
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
ptr.word.into(),
);
}
Mnemonic::JMP_Mod(target) => match target {
ModRmTarget::Memory(idx) => Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
self.computer.memory.read(&self.computer.regs, idx).into(),
),
ModRmTarget::Register(register) => Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
self.computer.regs.read(register).into(),
),
},
Mnemonic::CALL_p(_) => todo!(),
Mnemonic::CALL_p(ptr) => {
if let Some(next_instr) = ip.next() {
self.computer.push_stack(next_instr.addr.into())?;
}
self.computer.sregs.cs = ptr.segment;
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
ptr.offset.into(),
);
}
Mnemonic::CALL_v(offset) => {
if let Some(next_instr) = ip.next() {
self.computer.push_stack(next_instr.start.into())?;
self.computer.push_stack(next_instr.addr.into())?;
}
Self::ip_jump(&self.instructions, &mut ip, offset);
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
}
Mnemonic::CALL_Mod(target) => {
if let Some(next_instr) = ip.next() {
self.computer.push_stack(next_instr.start.into())?;
self.computer.push_stack(next_instr.addr.into())?;
}
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
self.computer.read_modrm(target).into(),
);
}
Mnemonic::CALL_Mp(_) => todo!(),
Mnemonic::CALL_Mp(ptr) => {
if let Some(next_instr) = ip.next() {
self.computer.push_stack(next_instr.addr.into())?;
}
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
ptr.word.into(),
);
}
/*
* Test
@@ -572,7 +608,12 @@ impl Interpreter {
*/
Mnemonic::RET => {
let offset = self.computer.pop_stack()?;
Self::ip_jump(&self.instructions, &mut ip, offset as usize);
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
offset as usize,
);
}
/*
@@ -781,11 +822,12 @@ impl Interpreter {
/// better idea so far.
fn find_instruction<'a>(
items: &'a Vec<Instruction>,
addr: usize,
ip_addr: usize,
sregs: &SegmentRegister,
) -> Option<InstructionPointer<'a>> {
items
.iter()
.position(|i| i.start == addr)
.position(|instruction| instruction.addr == ip_addr + (sregs.cs * 16) as usize)
.map(|index| items[index..].iter())
}
@@ -793,9 +835,10 @@ impl Interpreter {
fn ip_jump<'a>(
instructions: &'a Vec<Instruction>,
ip: &mut InstructionPointer<'a>,
sregs: &SegmentRegister,
offset: usize,
) {
if let Some(next_instr) = Self::find_instruction(&instructions, offset) {
if let Some(next_instr) = Self::find_instruction(&instructions, offset, sregs) {
*ip = next_instr;
}
}

View File

@@ -2,21 +2,24 @@ use crate::operands::{Byte, Displacement, ImmediateOperand, MemoryIndex, Word};
use super::interpreter::InterpreterError;
/// 2*20 = 1MiB
const MEMORY_SIZE: usize = 1048576;
#[derive(Debug, Clone, Copy)]
pub struct Memory {
memory: [Byte; Word::MAX as usize],
memory: [Byte; MEMORY_SIZE as usize],
}
impl Memory {
pub fn new() -> Self {
Self {
memory: [0; Word::MAX as usize],
memory: [0; MEMORY_SIZE as usize],
}
}
/// Safely writes a [`Word`] into an index of memory.
pub fn write_raw(&mut self, idx: Word, val: Word) -> Result<(), InterpreterError> {
if idx + 1 > Word::MAX {
if (idx + 1) as usize > MEMORY_SIZE {
return Err(InterpreterError::MemoryOutOfBound(idx));
} else {
let [low, high] = val.to_le_bytes();

View File

@@ -162,3 +162,32 @@ gen_regs!(AX);
gen_regs!(BX);
gen_regs!(CX);
gen_regs!(DX);
#[derive(Debug, Clone, Copy)]
pub struct SegmentRegister {
pub ds: Word,
pub es: Word,
pub ss: Word,
pub cs: Word,
}
impl SegmentRegister {
pub fn new() -> Self {
Self {
ds: 0,
es: 0,
ss: 0,
cs: 0,
}
}
}
impl fmt::Display for SegmentRegister {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"DS({}) ES({}) SS({}) CS({})",
self.ds, self.es, self.ss, self.cs
)
}
}

View File

@@ -567,7 +567,7 @@ pub struct Pointer16 {
impl std::fmt::Display for Pointer16 {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "ptr [{:#04x}]", self.word)
write!(f, "ptr word [{:#04x}]", self.word)
}
}