ft(interpreter): impl far jumps with correct CS addressing

This commit is contained in:
2025-06-18 16:41:49 +09:00
parent 6678a1ef4a
commit 4aeacc649a
8 changed files with 116 additions and 30 deletions

View File

@@ -49,10 +49,14 @@ This project is under active development and primarily used by me to explore som
Expect bugs and some missing features. Expect bugs and some missing features.
I mainly test with 'official' binaries from the MINIX source tree. I mainly test with 'official' binaries from the MINIX source tree.
Currently, everything is in the binary, but I want to move some parts to a lib, which would make it much easier to ignore the Minix 1.x specifics and would allow for more generic usage of this 8086 (e.g. implenting an own simple BIOS or OS). Currently, everything is in the binary, but I want to move some parts to a lib, which would make it much easier to ignore the Minix 1.x specifics (e.g. currently with a hardcoded interrupt handler) and would allow for more generic usage of this 8086 (e.g. implenting an own simple BIOS or OS).
E.g. currently the interrupt handler is hardcoded to support only Minix 1.x interrupts.
But first I want to implement all features correctly and add tests for all of them, before I want to move to that. But first I want to implement all features correctly and add tests for all of them, before I want to move to that.
## Caveats
Interpreted code is disassembled into a Vector, which will also be used for execution.
This means, that the code is not actually loaded into memory, but the `CS:IP` addressing scheme is still being used.
## Documentation ## Documentation
@@ -62,7 +66,9 @@ $ cargo doc
$ firefox target/doc/8086_rs/index.html $ firefox target/doc/8086_rs/index.html
``` ```
For the implementation of all instructions I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix. For the implementation of the disassembly, I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix.
For the implementation of the interpreter, I used the Intel "Intel® 64 and IA-32 Architectures Software Developers Manual Volume 2 (2A, 2B, 2C & 2D): Instruction Set Reference, A-Z" Spec.
## FAQ ## FAQ

View File

@@ -502,7 +502,7 @@ impl Disassembler {
while self.offset < self.aout.text.len() { while self.offset < self.aout.text.len() {
// reset mutable current instruction // reset mutable current instruction
self.instruction = Instruction::new(); self.instruction = Instruction::new();
self.instruction.start = self.offset; self.instruction.addr = self.offset;
// fetch next opcode // fetch next opcode
let opcode = self.aout.text[self.offset]; let opcode = self.aout.text[self.offset];

View File

@@ -11,7 +11,7 @@ use core::fmt;
/// contains the `Mnemonic` that will be executed, alongside its starting offset /// contains the `Mnemonic` that will be executed, alongside its starting offset
/// and the raw parsed bytes /// and the raw parsed bytes
pub struct Instruction { pub struct Instruction {
pub start: usize, // location of the instruction start pub addr: usize, // location of the instruction start
pub raw: Vec<u8>, // raw value of instruction pub raw: Vec<u8>, // raw value of instruction
pub opcode: Mnemonic, // actual instruction pub opcode: Mnemonic, // actual instruction
} }
@@ -19,7 +19,7 @@ pub struct Instruction {
impl Instruction { impl Instruction {
pub fn new() -> Self { pub fn new() -> Self {
Instruction { Instruction {
start: 0, addr: 0,
raw: Vec::new(), raw: Vec::new(),
opcode: Mnemonic::NOP(), opcode: Mnemonic::NOP(),
} }
@@ -28,7 +28,7 @@ impl Instruction {
impl fmt::Display for Instruction { impl fmt::Display for Instruction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:04x}:\t", self.start).unwrap(); write!(f, "{:04x}:\t", self.addr).unwrap();
write!( write!(
f, f,

View File

@@ -2,7 +2,10 @@ use core::fmt;
use crate::operands::{ImmediateOperand, ModRmTarget, Word}; use crate::operands::{ImmediateOperand, ModRmTarget, Word};
use super::{flags::Flags, interpreter::InterpreterError, memory::Memory, register::Register}; use super::{
flags::Flags, interpreter::InterpreterError, memory::Memory, register::Register,
register::SegmentRegister,
};
/// Wrapper for easier argument passing of polymorph arithmetic operations. /// Wrapper for easier argument passing of polymorph arithmetic operations.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@@ -18,6 +21,7 @@ type Rhs = ImmediateOperand;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Computer { pub struct Computer {
pub regs: Register, pub regs: Register,
pub sregs: SegmentRegister,
pub flags: Flags, pub flags: Flags,
pub memory: Memory, pub memory: Memory,
} }
@@ -26,6 +30,7 @@ impl Computer {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
regs: Register::new(), regs: Register::new(),
sregs: SegmentRegister::new(),
flags: Flags::new(), flags: Flags::new(),
memory: Memory::new(), memory: Memory::new(),
} }
@@ -280,7 +285,7 @@ pub enum CarryUsage {
impl fmt::Display for Computer { impl fmt::Display for Computer {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{} {}", self.regs, self.flags) write!(f, "{} {} {}", self.regs, self.sregs, self.flags)
} }
} }

View File

@@ -7,6 +7,7 @@ use crate::{
computer::{CarryUsage, RotationDirection}, computer::{CarryUsage, RotationDirection},
interrupt::Mess1, interrupt::Mess1,
memory::Memory, memory::Memory,
register::SegmentRegister,
}, },
operands::{Byte, ImmediateOperand, ModRmTarget, Word}, operands::{Byte, ImmediateOperand, ModRmTarget, Word},
}; };
@@ -67,14 +68,14 @@ impl Interpreter {
} }
pub fn interpret(&mut self) -> Result<(), InterpreterError> { pub fn interpret(&mut self) -> Result<(), InterpreterError> {
let mut ip = Self::find_instruction(&self.instructions, 0) let mut ip = Self::find_instruction(&self.instructions, 0, &self.computer.sregs)
.ok_or(InterpreterError::InstructionNotFound(0))?; .ok_or(InterpreterError::InstructionNotFound(0))?;
while let Some(cur_instr) = ip.next() { while let Some(cur_instr) = ip.next() {
log::info!( log::info!(
"{} IP({:04x})\t {:<32}", "{} IP({:04x})\t {:<32}",
self.computer, self.computer,
cur_instr.start, cur_instr.addr,
cur_instr.opcode.to_string(), cur_instr.opcode.to_string(),
); );
@@ -400,49 +401,84 @@ impl Interpreter {
_ => panic!("unreachable"), _ => panic!("unreachable"),
}; };
if flag { if flag {
Self::ip_jump(&self.instructions, &mut ip, offset); Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
} }
} }
/* /*
* Long jumps and calls * Long jumps and calls
*/ */
Mnemonic::JMP_p(_) => { Mnemonic::JMP_p(ptr) => {
todo!() self.computer.sregs.cs = ptr.segment;
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
ptr.offset.into(),
);
} }
Mnemonic::JMP_Mp(_) => { Mnemonic::JMP_Mp(ptr) => {
todo!() Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
ptr.word.into(),
);
} }
Mnemonic::JMP_Mod(target) => match target { Mnemonic::JMP_Mod(target) => match target {
ModRmTarget::Memory(idx) => Self::ip_jump( ModRmTarget::Memory(idx) => Self::ip_jump(
&self.instructions, &self.instructions,
&mut ip, &mut ip,
&self.computer.sregs,
self.computer.memory.read(&self.computer.regs, idx).into(), self.computer.memory.read(&self.computer.regs, idx).into(),
), ),
ModRmTarget::Register(register) => Self::ip_jump( ModRmTarget::Register(register) => Self::ip_jump(
&self.instructions, &self.instructions,
&mut ip, &mut ip,
&self.computer.sregs,
self.computer.regs.read(register).into(), self.computer.regs.read(register).into(),
), ),
}, },
Mnemonic::CALL_p(_) => todo!(), Mnemonic::CALL_p(ptr) => {
if let Some(next_instr) = ip.next() {
self.computer.push_stack(next_instr.addr.into())?;
}
self.computer.sregs.cs = ptr.segment;
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
ptr.offset.into(),
);
}
Mnemonic::CALL_v(offset) => { Mnemonic::CALL_v(offset) => {
if let Some(next_instr) = ip.next() { if let Some(next_instr) = ip.next() {
self.computer.push_stack(next_instr.start.into())?; self.computer.push_stack(next_instr.addr.into())?;
} }
Self::ip_jump(&self.instructions, &mut ip, offset); Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
} }
Mnemonic::CALL_Mod(target) => { Mnemonic::CALL_Mod(target) => {
if let Some(next_instr) = ip.next() { if let Some(next_instr) = ip.next() {
self.computer.push_stack(next_instr.start.into())?; self.computer.push_stack(next_instr.addr.into())?;
} }
Self::ip_jump( Self::ip_jump(
&self.instructions, &self.instructions,
&mut ip, &mut ip,
&self.computer.sregs,
self.computer.read_modrm(target).into(), self.computer.read_modrm(target).into(),
); );
} }
Mnemonic::CALL_Mp(_) => todo!(), Mnemonic::CALL_Mp(ptr) => {
if let Some(next_instr) = ip.next() {
self.computer.push_stack(next_instr.addr.into())?;
}
Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
ptr.word.into(),
);
}
/* /*
* Test * Test
@@ -572,7 +608,12 @@ impl Interpreter {
*/ */
Mnemonic::RET => { Mnemonic::RET => {
let offset = self.computer.pop_stack()?; let offset = self.computer.pop_stack()?;
Self::ip_jump(&self.instructions, &mut ip, offset as usize); Self::ip_jump(
&self.instructions,
&mut ip,
&self.computer.sregs,
offset as usize,
);
} }
/* /*
@@ -781,11 +822,12 @@ impl Interpreter {
/// better idea so far. /// better idea so far.
fn find_instruction<'a>( fn find_instruction<'a>(
items: &'a Vec<Instruction>, items: &'a Vec<Instruction>,
addr: usize, ip_addr: usize,
sregs: &SegmentRegister,
) -> Option<InstructionPointer<'a>> { ) -> Option<InstructionPointer<'a>> {
items items
.iter() .iter()
.position(|i| i.start == addr) .position(|instruction| instruction.addr == ip_addr + (sregs.cs * 16) as usize)
.map(|index| items[index..].iter()) .map(|index| items[index..].iter())
} }
@@ -793,9 +835,10 @@ impl Interpreter {
fn ip_jump<'a>( fn ip_jump<'a>(
instructions: &'a Vec<Instruction>, instructions: &'a Vec<Instruction>,
ip: &mut InstructionPointer<'a>, ip: &mut InstructionPointer<'a>,
sregs: &SegmentRegister,
offset: usize, offset: usize,
) { ) {
if let Some(next_instr) = Self::find_instruction(&instructions, offset) { if let Some(next_instr) = Self::find_instruction(&instructions, offset, sregs) {
*ip = next_instr; *ip = next_instr;
} }
} }

View File

@@ -2,21 +2,24 @@ use crate::operands::{Byte, Displacement, ImmediateOperand, MemoryIndex, Word};
use super::interpreter::InterpreterError; use super::interpreter::InterpreterError;
/// 2*20 = 1MiB
const MEMORY_SIZE: usize = 1048576;
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub struct Memory { pub struct Memory {
memory: [Byte; Word::MAX as usize], memory: [Byte; MEMORY_SIZE as usize],
} }
impl Memory { impl Memory {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
memory: [0; Word::MAX as usize], memory: [0; MEMORY_SIZE as usize],
} }
} }
/// Safely writes a [`Word`] into an index of memory. /// Safely writes a [`Word`] into an index of memory.
pub fn write_raw(&mut self, idx: Word, val: Word) -> Result<(), InterpreterError> { pub fn write_raw(&mut self, idx: Word, val: Word) -> Result<(), InterpreterError> {
if idx + 1 > Word::MAX { if (idx + 1) as usize > MEMORY_SIZE {
return Err(InterpreterError::MemoryOutOfBound(idx)); return Err(InterpreterError::MemoryOutOfBound(idx));
} else { } else {
let [low, high] = val.to_le_bytes(); let [low, high] = val.to_le_bytes();

View File

@@ -162,3 +162,32 @@ gen_regs!(AX);
gen_regs!(BX); gen_regs!(BX);
gen_regs!(CX); gen_regs!(CX);
gen_regs!(DX); gen_regs!(DX);
#[derive(Debug, Clone, Copy)]
pub struct SegmentRegister {
pub ds: Word,
pub es: Word,
pub ss: Word,
pub cs: Word,
}
impl SegmentRegister {
pub fn new() -> Self {
Self {
ds: 0,
es: 0,
ss: 0,
cs: 0,
}
}
}
impl fmt::Display for SegmentRegister {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"DS({}) ES({}) SS({}) CS({})",
self.ds, self.es, self.ss, self.cs
)
}
}

View File

@@ -567,7 +567,7 @@ pub struct Pointer16 {
impl std::fmt::Display for Pointer16 { impl std::fmt::Display for Pointer16 {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "ptr [{:#04x}]", self.word) write!(f, "ptr word [{:#04x}]", self.word)
} }
} }