ft(interpreter): impl far jumps with correct CS addressing
This commit is contained in:
14
README.md
14
README.md
@@ -49,9 +49,13 @@ This project is under active development and primarily used by me to explore som
|
||||
Expect bugs and some missing features.
|
||||
I mainly test with 'official' binaries from the MINIX source tree.
|
||||
|
||||
Currently, everything is in the binary, but I want to move some parts to a lib, which would make it much easier to ignore the Minix 1.x specifics and would allow for more generic usage of this 8086 (e.g. implenting an own simple BIOS or OS).
|
||||
E.g. currently the interrupt handler is hardcoded to support only Minix 1.x interrupts.
|
||||
But first I want to implement all features correctly and add tests for all of them, before I want to move to that.
|
||||
Currently, everything is in the binary, but I want to move some parts to a lib, which would make it much easier to ignore the Minix 1.x specifics (e.g. currently with a hardcoded interrupt handler) and would allow for more generic usage of this 8086 (e.g. implenting an own simple BIOS or OS).
|
||||
But first I want to implement all features correctly and add tests for all of them, before I want to move to that.
|
||||
|
||||
## Caveats
|
||||
|
||||
Interpreted code is disassembled into a Vector, which will also be used for execution.
|
||||
This means, that the code is not actually loaded into memory, but the `CS:IP` addressing scheme is still being used.
|
||||
|
||||
|
||||
## Documentation
|
||||
@@ -62,7 +66,9 @@ $ cargo doc
|
||||
$ firefox target/doc/8086_rs/index.html
|
||||
```
|
||||
|
||||
For the implementation of all instructions I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix.
|
||||
For the implementation of the disassembly, I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix.
|
||||
|
||||
For the implementation of the interpreter, I used the Intel "Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 2 (2A, 2B, 2C & 2D): Instruction Set Reference, A-Z" Spec.
|
||||
|
||||
|
||||
## FAQ
|
||||
|
||||
@@ -502,7 +502,7 @@ impl Disassembler {
|
||||
while self.offset < self.aout.text.len() {
|
||||
// reset mutable current instruction
|
||||
self.instruction = Instruction::new();
|
||||
self.instruction.start = self.offset;
|
||||
self.instruction.addr = self.offset;
|
||||
|
||||
// fetch next opcode
|
||||
let opcode = self.aout.text[self.offset];
|
||||
|
||||
@@ -11,7 +11,7 @@ use core::fmt;
|
||||
/// contains the `Mnemonic` that will be executed, alongside its starting offset
|
||||
/// and the raw parsed bytes
|
||||
pub struct Instruction {
|
||||
pub start: usize, // location of the instruction start
|
||||
pub addr: usize, // location of the instruction start
|
||||
pub raw: Vec<u8>, // raw value of instruction
|
||||
pub opcode: Mnemonic, // actual instruction
|
||||
}
|
||||
@@ -19,7 +19,7 @@ pub struct Instruction {
|
||||
impl Instruction {
|
||||
pub fn new() -> Self {
|
||||
Instruction {
|
||||
start: 0,
|
||||
addr: 0,
|
||||
raw: Vec::new(),
|
||||
opcode: Mnemonic::NOP(),
|
||||
}
|
||||
@@ -28,7 +28,7 @@ impl Instruction {
|
||||
|
||||
impl fmt::Display for Instruction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{:04x}:\t", self.start).unwrap();
|
||||
write!(f, "{:04x}:\t", self.addr).unwrap();
|
||||
|
||||
write!(
|
||||
f,
|
||||
|
||||
@@ -2,7 +2,10 @@ use core::fmt;
|
||||
|
||||
use crate::operands::{ImmediateOperand, ModRmTarget, Word};
|
||||
|
||||
use super::{flags::Flags, interpreter::InterpreterError, memory::Memory, register::Register};
|
||||
use super::{
|
||||
flags::Flags, interpreter::InterpreterError, memory::Memory, register::Register,
|
||||
register::SegmentRegister,
|
||||
};
|
||||
|
||||
/// Wrapper for easier argument passing of polymorph arithmetic operations.
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -18,6 +21,7 @@ type Rhs = ImmediateOperand;
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Computer {
|
||||
pub regs: Register,
|
||||
pub sregs: SegmentRegister,
|
||||
pub flags: Flags,
|
||||
pub memory: Memory,
|
||||
}
|
||||
@@ -26,6 +30,7 @@ impl Computer {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
regs: Register::new(),
|
||||
sregs: SegmentRegister::new(),
|
||||
flags: Flags::new(),
|
||||
memory: Memory::new(),
|
||||
}
|
||||
@@ -280,7 +285,7 @@ pub enum CarryUsage {
|
||||
|
||||
impl fmt::Display for Computer {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{} {}", self.regs, self.flags)
|
||||
write!(f, "{} {} {}", self.regs, self.sregs, self.flags)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ use crate::{
|
||||
computer::{CarryUsage, RotationDirection},
|
||||
interrupt::Mess1,
|
||||
memory::Memory,
|
||||
register::SegmentRegister,
|
||||
},
|
||||
operands::{Byte, ImmediateOperand, ModRmTarget, Word},
|
||||
};
|
||||
@@ -67,14 +68,14 @@ impl Interpreter {
|
||||
}
|
||||
|
||||
pub fn interpret(&mut self) -> Result<(), InterpreterError> {
|
||||
let mut ip = Self::find_instruction(&self.instructions, 0)
|
||||
let mut ip = Self::find_instruction(&self.instructions, 0, &self.computer.sregs)
|
||||
.ok_or(InterpreterError::InstructionNotFound(0))?;
|
||||
|
||||
while let Some(cur_instr) = ip.next() {
|
||||
log::info!(
|
||||
"{} IP({:04x})\t {:<32}",
|
||||
self.computer,
|
||||
cur_instr.start,
|
||||
cur_instr.addr,
|
||||
cur_instr.opcode.to_string(),
|
||||
);
|
||||
|
||||
@@ -400,49 +401,84 @@ impl Interpreter {
|
||||
_ => panic!("unreachable"),
|
||||
};
|
||||
if flag {
|
||||
Self::ip_jump(&self.instructions, &mut ip, offset);
|
||||
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Long jumps and calls
|
||||
*/
|
||||
Mnemonic::JMP_p(_) => {
|
||||
todo!()
|
||||
Mnemonic::JMP_p(ptr) => {
|
||||
self.computer.sregs.cs = ptr.segment;
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
ptr.offset.into(),
|
||||
);
|
||||
}
|
||||
Mnemonic::JMP_Mp(_) => {
|
||||
todo!()
|
||||
Mnemonic::JMP_Mp(ptr) => {
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
ptr.word.into(),
|
||||
);
|
||||
}
|
||||
Mnemonic::JMP_Mod(target) => match target {
|
||||
ModRmTarget::Memory(idx) => Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
self.computer.memory.read(&self.computer.regs, idx).into(),
|
||||
),
|
||||
ModRmTarget::Register(register) => Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
self.computer.regs.read(register).into(),
|
||||
),
|
||||
},
|
||||
Mnemonic::CALL_p(_) => todo!(),
|
||||
Mnemonic::CALL_p(ptr) => {
|
||||
if let Some(next_instr) = ip.next() {
|
||||
self.computer.push_stack(next_instr.addr.into())?;
|
||||
}
|
||||
self.computer.sregs.cs = ptr.segment;
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
ptr.offset.into(),
|
||||
);
|
||||
}
|
||||
Mnemonic::CALL_v(offset) => {
|
||||
if let Some(next_instr) = ip.next() {
|
||||
self.computer.push_stack(next_instr.start.into())?;
|
||||
self.computer.push_stack(next_instr.addr.into())?;
|
||||
}
|
||||
Self::ip_jump(&self.instructions, &mut ip, offset);
|
||||
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
|
||||
}
|
||||
Mnemonic::CALL_Mod(target) => {
|
||||
if let Some(next_instr) = ip.next() {
|
||||
self.computer.push_stack(next_instr.start.into())?;
|
||||
self.computer.push_stack(next_instr.addr.into())?;
|
||||
}
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
self.computer.read_modrm(target).into(),
|
||||
);
|
||||
}
|
||||
Mnemonic::CALL_Mp(_) => todo!(),
|
||||
Mnemonic::CALL_Mp(ptr) => {
|
||||
if let Some(next_instr) = ip.next() {
|
||||
self.computer.push_stack(next_instr.addr.into())?;
|
||||
}
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
ptr.word.into(),
|
||||
);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test
|
||||
@@ -572,7 +608,12 @@ impl Interpreter {
|
||||
*/
|
||||
Mnemonic::RET => {
|
||||
let offset = self.computer.pop_stack()?;
|
||||
Self::ip_jump(&self.instructions, &mut ip, offset as usize);
|
||||
Self::ip_jump(
|
||||
&self.instructions,
|
||||
&mut ip,
|
||||
&self.computer.sregs,
|
||||
offset as usize,
|
||||
);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -781,11 +822,12 @@ impl Interpreter {
|
||||
/// better idea so far.
|
||||
fn find_instruction<'a>(
|
||||
items: &'a Vec<Instruction>,
|
||||
addr: usize,
|
||||
ip_addr: usize,
|
||||
sregs: &SegmentRegister,
|
||||
) -> Option<InstructionPointer<'a>> {
|
||||
items
|
||||
.iter()
|
||||
.position(|i| i.start == addr)
|
||||
.position(|instruction| instruction.addr == ip_addr + (sregs.cs * 16) as usize)
|
||||
.map(|index| items[index..].iter())
|
||||
}
|
||||
|
||||
@@ -793,9 +835,10 @@ impl Interpreter {
|
||||
fn ip_jump<'a>(
|
||||
instructions: &'a Vec<Instruction>,
|
||||
ip: &mut InstructionPointer<'a>,
|
||||
sregs: &SegmentRegister,
|
||||
offset: usize,
|
||||
) {
|
||||
if let Some(next_instr) = Self::find_instruction(&instructions, offset) {
|
||||
if let Some(next_instr) = Self::find_instruction(&instructions, offset, sregs) {
|
||||
*ip = next_instr;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,21 +2,24 @@ use crate::operands::{Byte, Displacement, ImmediateOperand, MemoryIndex, Word};
|
||||
|
||||
use super::interpreter::InterpreterError;
|
||||
|
||||
/// 2*20 = 1MiB
|
||||
const MEMORY_SIZE: usize = 1048576;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Memory {
|
||||
memory: [Byte; Word::MAX as usize],
|
||||
memory: [Byte; MEMORY_SIZE as usize],
|
||||
}
|
||||
|
||||
impl Memory {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
memory: [0; Word::MAX as usize],
|
||||
memory: [0; MEMORY_SIZE as usize],
|
||||
}
|
||||
}
|
||||
|
||||
/// Safely writes a [`Word`] into an index of memory.
|
||||
pub fn write_raw(&mut self, idx: Word, val: Word) -> Result<(), InterpreterError> {
|
||||
if idx + 1 > Word::MAX {
|
||||
if (idx + 1) as usize > MEMORY_SIZE {
|
||||
return Err(InterpreterError::MemoryOutOfBound(idx));
|
||||
} else {
|
||||
let [low, high] = val.to_le_bytes();
|
||||
|
||||
@@ -162,3 +162,32 @@ gen_regs!(AX);
|
||||
gen_regs!(BX);
|
||||
gen_regs!(CX);
|
||||
gen_regs!(DX);
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct SegmentRegister {
|
||||
pub ds: Word,
|
||||
pub es: Word,
|
||||
pub ss: Word,
|
||||
pub cs: Word,
|
||||
}
|
||||
|
||||
impl SegmentRegister {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
ds: 0,
|
||||
es: 0,
|
||||
ss: 0,
|
||||
cs: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SegmentRegister {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"DS({}) ES({}) SS({}) CS({})",
|
||||
self.ds, self.es, self.ss, self.cs
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -567,7 +567,7 @@ pub struct Pointer16 {
|
||||
|
||||
impl std::fmt::Display for Pointer16 {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "ptr [{:#04x}]", self.word)
|
||||
write!(f, "ptr word [{:#04x}]", self.word)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user