ft(interpreter): impl far jumps with correct CS addressing
This commit is contained in:
12
README.md
12
README.md
@@ -49,10 +49,14 @@ This project is under active development and primarily used by me to explore som
|
|||||||
Expect bugs and some missing features.
|
Expect bugs and some missing features.
|
||||||
I mainly test with 'official' binaries from the MINIX source tree.
|
I mainly test with 'official' binaries from the MINIX source tree.
|
||||||
|
|
||||||
Currently, everything is in the binary, but I want to move some parts to a lib, which would make it much easier to ignore the Minix 1.x specifics and would allow for more generic usage of this 8086 (e.g. implenting an own simple BIOS or OS).
|
Currently, everything is in the binary, but I want to move some parts to a lib, which would make it much easier to ignore the Minix 1.x specifics (e.g. currently with a hardcoded interrupt handler) and would allow for more generic usage of this 8086 (e.g. implenting an own simple BIOS or OS).
|
||||||
E.g. currently the interrupt handler is hardcoded to support only Minix 1.x interrupts.
|
|
||||||
But first I want to implement all features correctly and add tests for all of them, before I want to move to that.
|
But first I want to implement all features correctly and add tests for all of them, before I want to move to that.
|
||||||
|
|
||||||
|
## Caveats
|
||||||
|
|
||||||
|
Interpreted code is disassembled into a Vector, which will also be used for execution.
|
||||||
|
This means, that the code is not actually loaded into memory, but the `CS:IP` addressing scheme is still being used.
|
||||||
|
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
|
|
||||||
@@ -62,7 +66,9 @@ $ cargo doc
|
|||||||
$ firefox target/doc/8086_rs/index.html
|
$ firefox target/doc/8086_rs/index.html
|
||||||
```
|
```
|
||||||
|
|
||||||
For the implementation of all instructions I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix.
|
For the implementation of the disassembly, I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix.
|
||||||
|
|
||||||
|
For the implementation of the interpreter, I used the Intel "Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 2 (2A, 2B, 2C & 2D): Instruction Set Reference, A-Z" Spec.
|
||||||
|
|
||||||
|
|
||||||
## FAQ
|
## FAQ
|
||||||
|
|||||||
@@ -502,7 +502,7 @@ impl Disassembler {
|
|||||||
while self.offset < self.aout.text.len() {
|
while self.offset < self.aout.text.len() {
|
||||||
// reset mutable current instruction
|
// reset mutable current instruction
|
||||||
self.instruction = Instruction::new();
|
self.instruction = Instruction::new();
|
||||||
self.instruction.start = self.offset;
|
self.instruction.addr = self.offset;
|
||||||
|
|
||||||
// fetch next opcode
|
// fetch next opcode
|
||||||
let opcode = self.aout.text[self.offset];
|
let opcode = self.aout.text[self.offset];
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ use core::fmt;
|
|||||||
/// contains the `Mnemonic` that will be executed, alongside its starting offset
|
/// contains the `Mnemonic` that will be executed, alongside its starting offset
|
||||||
/// and the raw parsed bytes
|
/// and the raw parsed bytes
|
||||||
pub struct Instruction {
|
pub struct Instruction {
|
||||||
pub start: usize, // location of the instruction start
|
pub addr: usize, // location of the instruction start
|
||||||
pub raw: Vec<u8>, // raw value of instruction
|
pub raw: Vec<u8>, // raw value of instruction
|
||||||
pub opcode: Mnemonic, // actual instruction
|
pub opcode: Mnemonic, // actual instruction
|
||||||
}
|
}
|
||||||
@@ -19,7 +19,7 @@ pub struct Instruction {
|
|||||||
impl Instruction {
|
impl Instruction {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Instruction {
|
Instruction {
|
||||||
start: 0,
|
addr: 0,
|
||||||
raw: Vec::new(),
|
raw: Vec::new(),
|
||||||
opcode: Mnemonic::NOP(),
|
opcode: Mnemonic::NOP(),
|
||||||
}
|
}
|
||||||
@@ -28,7 +28,7 @@ impl Instruction {
|
|||||||
|
|
||||||
impl fmt::Display for Instruction {
|
impl fmt::Display for Instruction {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
write!(f, "{:04x}:\t", self.start).unwrap();
|
write!(f, "{:04x}:\t", self.addr).unwrap();
|
||||||
|
|
||||||
write!(
|
write!(
|
||||||
f,
|
f,
|
||||||
|
|||||||
@@ -2,7 +2,10 @@ use core::fmt;
|
|||||||
|
|
||||||
use crate::operands::{ImmediateOperand, ModRmTarget, Word};
|
use crate::operands::{ImmediateOperand, ModRmTarget, Word};
|
||||||
|
|
||||||
use super::{flags::Flags, interpreter::InterpreterError, memory::Memory, register::Register};
|
use super::{
|
||||||
|
flags::Flags, interpreter::InterpreterError, memory::Memory, register::Register,
|
||||||
|
register::SegmentRegister,
|
||||||
|
};
|
||||||
|
|
||||||
/// Wrapper for easier argument passing of polymorph arithmetic operations.
|
/// Wrapper for easier argument passing of polymorph arithmetic operations.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@@ -18,6 +21,7 @@ type Rhs = ImmediateOperand;
|
|||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Computer {
|
pub struct Computer {
|
||||||
pub regs: Register,
|
pub regs: Register,
|
||||||
|
pub sregs: SegmentRegister,
|
||||||
pub flags: Flags,
|
pub flags: Flags,
|
||||||
pub memory: Memory,
|
pub memory: Memory,
|
||||||
}
|
}
|
||||||
@@ -26,6 +30,7 @@ impl Computer {
|
|||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
regs: Register::new(),
|
regs: Register::new(),
|
||||||
|
sregs: SegmentRegister::new(),
|
||||||
flags: Flags::new(),
|
flags: Flags::new(),
|
||||||
memory: Memory::new(),
|
memory: Memory::new(),
|
||||||
}
|
}
|
||||||
@@ -280,7 +285,7 @@ pub enum CarryUsage {
|
|||||||
|
|
||||||
impl fmt::Display for Computer {
|
impl fmt::Display for Computer {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
write!(f, "{} {}", self.regs, self.flags)
|
write!(f, "{} {} {}", self.regs, self.sregs, self.flags)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ use crate::{
|
|||||||
computer::{CarryUsage, RotationDirection},
|
computer::{CarryUsage, RotationDirection},
|
||||||
interrupt::Mess1,
|
interrupt::Mess1,
|
||||||
memory::Memory,
|
memory::Memory,
|
||||||
|
register::SegmentRegister,
|
||||||
},
|
},
|
||||||
operands::{Byte, ImmediateOperand, ModRmTarget, Word},
|
operands::{Byte, ImmediateOperand, ModRmTarget, Word},
|
||||||
};
|
};
|
||||||
@@ -67,14 +68,14 @@ impl Interpreter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn interpret(&mut self) -> Result<(), InterpreterError> {
|
pub fn interpret(&mut self) -> Result<(), InterpreterError> {
|
||||||
let mut ip = Self::find_instruction(&self.instructions, 0)
|
let mut ip = Self::find_instruction(&self.instructions, 0, &self.computer.sregs)
|
||||||
.ok_or(InterpreterError::InstructionNotFound(0))?;
|
.ok_or(InterpreterError::InstructionNotFound(0))?;
|
||||||
|
|
||||||
while let Some(cur_instr) = ip.next() {
|
while let Some(cur_instr) = ip.next() {
|
||||||
log::info!(
|
log::info!(
|
||||||
"{} IP({:04x})\t {:<32}",
|
"{} IP({:04x})\t {:<32}",
|
||||||
self.computer,
|
self.computer,
|
||||||
cur_instr.start,
|
cur_instr.addr,
|
||||||
cur_instr.opcode.to_string(),
|
cur_instr.opcode.to_string(),
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -400,49 +401,84 @@ impl Interpreter {
|
|||||||
_ => panic!("unreachable"),
|
_ => panic!("unreachable"),
|
||||||
};
|
};
|
||||||
if flag {
|
if flag {
|
||||||
Self::ip_jump(&self.instructions, &mut ip, offset);
|
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Long jumps and calls
|
* Long jumps and calls
|
||||||
*/
|
*/
|
||||||
Mnemonic::JMP_p(_) => {
|
Mnemonic::JMP_p(ptr) => {
|
||||||
todo!()
|
self.computer.sregs.cs = ptr.segment;
|
||||||
|
Self::ip_jump(
|
||||||
|
&self.instructions,
|
||||||
|
&mut ip,
|
||||||
|
&self.computer.sregs,
|
||||||
|
ptr.offset.into(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
Mnemonic::JMP_Mp(_) => {
|
Mnemonic::JMP_Mp(ptr) => {
|
||||||
todo!()
|
Self::ip_jump(
|
||||||
|
&self.instructions,
|
||||||
|
&mut ip,
|
||||||
|
&self.computer.sregs,
|
||||||
|
ptr.word.into(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
Mnemonic::JMP_Mod(target) => match target {
|
Mnemonic::JMP_Mod(target) => match target {
|
||||||
ModRmTarget::Memory(idx) => Self::ip_jump(
|
ModRmTarget::Memory(idx) => Self::ip_jump(
|
||||||
&self.instructions,
|
&self.instructions,
|
||||||
&mut ip,
|
&mut ip,
|
||||||
|
&self.computer.sregs,
|
||||||
self.computer.memory.read(&self.computer.regs, idx).into(),
|
self.computer.memory.read(&self.computer.regs, idx).into(),
|
||||||
),
|
),
|
||||||
ModRmTarget::Register(register) => Self::ip_jump(
|
ModRmTarget::Register(register) => Self::ip_jump(
|
||||||
&self.instructions,
|
&self.instructions,
|
||||||
&mut ip,
|
&mut ip,
|
||||||
|
&self.computer.sregs,
|
||||||
self.computer.regs.read(register).into(),
|
self.computer.regs.read(register).into(),
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
Mnemonic::CALL_p(_) => todo!(),
|
Mnemonic::CALL_p(ptr) => {
|
||||||
|
if let Some(next_instr) = ip.next() {
|
||||||
|
self.computer.push_stack(next_instr.addr.into())?;
|
||||||
|
}
|
||||||
|
self.computer.sregs.cs = ptr.segment;
|
||||||
|
Self::ip_jump(
|
||||||
|
&self.instructions,
|
||||||
|
&mut ip,
|
||||||
|
&self.computer.sregs,
|
||||||
|
ptr.offset.into(),
|
||||||
|
);
|
||||||
|
}
|
||||||
Mnemonic::CALL_v(offset) => {
|
Mnemonic::CALL_v(offset) => {
|
||||||
if let Some(next_instr) = ip.next() {
|
if let Some(next_instr) = ip.next() {
|
||||||
self.computer.push_stack(next_instr.start.into())?;
|
self.computer.push_stack(next_instr.addr.into())?;
|
||||||
}
|
}
|
||||||
Self::ip_jump(&self.instructions, &mut ip, offset);
|
Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset);
|
||||||
}
|
}
|
||||||
Mnemonic::CALL_Mod(target) => {
|
Mnemonic::CALL_Mod(target) => {
|
||||||
if let Some(next_instr) = ip.next() {
|
if let Some(next_instr) = ip.next() {
|
||||||
self.computer.push_stack(next_instr.start.into())?;
|
self.computer.push_stack(next_instr.addr.into())?;
|
||||||
}
|
}
|
||||||
Self::ip_jump(
|
Self::ip_jump(
|
||||||
&self.instructions,
|
&self.instructions,
|
||||||
&mut ip,
|
&mut ip,
|
||||||
|
&self.computer.sregs,
|
||||||
self.computer.read_modrm(target).into(),
|
self.computer.read_modrm(target).into(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Mnemonic::CALL_Mp(_) => todo!(),
|
Mnemonic::CALL_Mp(ptr) => {
|
||||||
|
if let Some(next_instr) = ip.next() {
|
||||||
|
self.computer.push_stack(next_instr.addr.into())?;
|
||||||
|
}
|
||||||
|
Self::ip_jump(
|
||||||
|
&self.instructions,
|
||||||
|
&mut ip,
|
||||||
|
&self.computer.sregs,
|
||||||
|
ptr.word.into(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Test
|
* Test
|
||||||
@@ -572,7 +608,12 @@ impl Interpreter {
|
|||||||
*/
|
*/
|
||||||
Mnemonic::RET => {
|
Mnemonic::RET => {
|
||||||
let offset = self.computer.pop_stack()?;
|
let offset = self.computer.pop_stack()?;
|
||||||
Self::ip_jump(&self.instructions, &mut ip, offset as usize);
|
Self::ip_jump(
|
||||||
|
&self.instructions,
|
||||||
|
&mut ip,
|
||||||
|
&self.computer.sregs,
|
||||||
|
offset as usize,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -781,11 +822,12 @@ impl Interpreter {
|
|||||||
/// better idea so far.
|
/// better idea so far.
|
||||||
fn find_instruction<'a>(
|
fn find_instruction<'a>(
|
||||||
items: &'a Vec<Instruction>,
|
items: &'a Vec<Instruction>,
|
||||||
addr: usize,
|
ip_addr: usize,
|
||||||
|
sregs: &SegmentRegister,
|
||||||
) -> Option<InstructionPointer<'a>> {
|
) -> Option<InstructionPointer<'a>> {
|
||||||
items
|
items
|
||||||
.iter()
|
.iter()
|
||||||
.position(|i| i.start == addr)
|
.position(|instruction| instruction.addr == ip_addr + (sregs.cs * 16) as usize)
|
||||||
.map(|index| items[index..].iter())
|
.map(|index| items[index..].iter())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -793,9 +835,10 @@ impl Interpreter {
|
|||||||
fn ip_jump<'a>(
|
fn ip_jump<'a>(
|
||||||
instructions: &'a Vec<Instruction>,
|
instructions: &'a Vec<Instruction>,
|
||||||
ip: &mut InstructionPointer<'a>,
|
ip: &mut InstructionPointer<'a>,
|
||||||
|
sregs: &SegmentRegister,
|
||||||
offset: usize,
|
offset: usize,
|
||||||
) {
|
) {
|
||||||
if let Some(next_instr) = Self::find_instruction(&instructions, offset) {
|
if let Some(next_instr) = Self::find_instruction(&instructions, offset, sregs) {
|
||||||
*ip = next_instr;
|
*ip = next_instr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,21 +2,24 @@ use crate::operands::{Byte, Displacement, ImmediateOperand, MemoryIndex, Word};
|
|||||||
|
|
||||||
use super::interpreter::InterpreterError;
|
use super::interpreter::InterpreterError;
|
||||||
|
|
||||||
|
/// 2*20 = 1MiB
|
||||||
|
const MEMORY_SIZE: usize = 1048576;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
pub struct Memory {
|
pub struct Memory {
|
||||||
memory: [Byte; Word::MAX as usize],
|
memory: [Byte; MEMORY_SIZE as usize],
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Memory {
|
impl Memory {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
memory: [0; Word::MAX as usize],
|
memory: [0; MEMORY_SIZE as usize],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Safely writes a [`Word`] into an index of memory.
|
/// Safely writes a [`Word`] into an index of memory.
|
||||||
pub fn write_raw(&mut self, idx: Word, val: Word) -> Result<(), InterpreterError> {
|
pub fn write_raw(&mut self, idx: Word, val: Word) -> Result<(), InterpreterError> {
|
||||||
if idx + 1 > Word::MAX {
|
if (idx + 1) as usize > MEMORY_SIZE {
|
||||||
return Err(InterpreterError::MemoryOutOfBound(idx));
|
return Err(InterpreterError::MemoryOutOfBound(idx));
|
||||||
} else {
|
} else {
|
||||||
let [low, high] = val.to_le_bytes();
|
let [low, high] = val.to_le_bytes();
|
||||||
|
|||||||
@@ -162,3 +162,32 @@ gen_regs!(AX);
|
|||||||
gen_regs!(BX);
|
gen_regs!(BX);
|
||||||
gen_regs!(CX);
|
gen_regs!(CX);
|
||||||
gen_regs!(DX);
|
gen_regs!(DX);
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct SegmentRegister {
|
||||||
|
pub ds: Word,
|
||||||
|
pub es: Word,
|
||||||
|
pub ss: Word,
|
||||||
|
pub cs: Word,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SegmentRegister {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
ds: 0,
|
||||||
|
es: 0,
|
||||||
|
ss: 0,
|
||||||
|
cs: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for SegmentRegister {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"DS({}) ES({}) SS({}) CS({})",
|
||||||
|
self.ds, self.es, self.ss, self.cs
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -567,7 +567,7 @@ pub struct Pointer16 {
|
|||||||
|
|
||||||
impl std::fmt::Display for Pointer16 {
|
impl std::fmt::Display for Pointer16 {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
write!(f, "ptr [{:#04x}]", self.word)
|
write!(f, "ptr word [{:#04x}]", self.word)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user