diff --git a/README.md b/README.md index be8ceea..c77bd6f 100644 --- a/README.md +++ b/README.md @@ -49,9 +49,13 @@ This project is under active development and primarily used by me to explore som Expect bugs and some missing features. I mainly test with 'official' binaries from the MINIX source tree. -Currently, everything is in the binary, but I want to move some parts to a lib, which would make it much easier to ignore the Minix 1.x specifics and would allow for more generic usage of this 8086 (e.g. implenting an own simple BIOS or OS). -E.g. currently the interrupt handler is hardcoded to support only Minix 1.x interrupts. -But first I want to implement all features correctly and add tests for all of them, before I want to move to that. +Currently, everything is in the binary, but I want to move some parts to a lib, which would make it much easier to ignore the Minix 1.x specifics (e.g. currently with a hardcoded interrupt handler) and would allow for more generic usage of this 8086 (e.g. implenting an own simple BIOS or OS). +But first I want to implement all features correctly and add tests for all of them, before I want to move to that. + +## Caveats + +Interpreted code is disassembled into a Vector, which will also be used for execution. +This means, that the code is not actually loaded into memory, but the `CS:IP` addressing scheme is still being used. ## Documentation @@ -62,7 +66,9 @@ $ cargo doc $ firefox target/doc/8086_rs/index.html ``` -For the implementation of all instructions I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix. +For the implementation of the disassembly, I used the Intel "8086 16-BIT HMOS MICROPROCESSOR" Spec, as well as [this](http://www.mlsite.net/8086/8086_table.txt) overview of all Opcode variants used in conjunction with [this](http://www.mlsite.net/8086/) decoding matrix. + +For the implementation of the interpreter, I used the Intel "Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 2 (2A, 2B, 2C & 2D): Instruction Set Reference, A-Z" Spec. ## FAQ diff --git a/src/disasm.rs b/src/disasm.rs index 3027054..01f2ff6 100644 --- a/src/disasm.rs +++ b/src/disasm.rs @@ -502,7 +502,7 @@ impl Disassembler { while self.offset < self.aout.text.len() { // reset mutable current instruction self.instruction = Instruction::new(); - self.instruction.start = self.offset; + self.instruction.addr = self.offset; // fetch next opcode let opcode = self.aout.text[self.offset]; diff --git a/src/instructions.rs b/src/instructions.rs index fa84432..4626274 100644 --- a/src/instructions.rs +++ b/src/instructions.rs @@ -11,7 +11,7 @@ use core::fmt; /// contains the `Mnemonic` that will be executed, alongside its starting offset /// and the raw parsed bytes pub struct Instruction { - pub start: usize, // location of the instruction start + pub addr: usize, // location of the instruction start pub raw: Vec, // raw value of instruction pub opcode: Mnemonic, // actual instruction } @@ -19,7 +19,7 @@ pub struct Instruction { impl Instruction { pub fn new() -> Self { Instruction { - start: 0, + addr: 0, raw: Vec::new(), opcode: Mnemonic::NOP(), } @@ -28,7 +28,7 @@ impl Instruction { impl fmt::Display for Instruction { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{:04x}:\t", self.start).unwrap(); + write!(f, "{:04x}:\t", self.addr).unwrap(); write!( f, diff --git a/src/interpreter/computer.rs b/src/interpreter/computer.rs index 6dbda18..4627bf4 100644 --- a/src/interpreter/computer.rs +++ b/src/interpreter/computer.rs @@ -2,7 +2,10 @@ use core::fmt; use crate::operands::{ImmediateOperand, ModRmTarget, Word}; -use super::{flags::Flags, interpreter::InterpreterError, memory::Memory, register::Register}; +use super::{ + flags::Flags, interpreter::InterpreterError, memory::Memory, register::Register, + register::SegmentRegister, +}; /// Wrapper for easier argument passing of polymorph arithmetic operations. #[derive(Debug, Clone)] @@ -18,6 +21,7 @@ type Rhs = ImmediateOperand; #[derive(Debug, Clone)] pub struct Computer { pub regs: Register, + pub sregs: SegmentRegister, pub flags: Flags, pub memory: Memory, } @@ -26,6 +30,7 @@ impl Computer { pub fn new() -> Self { Self { regs: Register::new(), + sregs: SegmentRegister::new(), flags: Flags::new(), memory: Memory::new(), } @@ -280,7 +285,7 @@ pub enum CarryUsage { impl fmt::Display for Computer { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{} {}", self.regs, self.flags) + write!(f, "{} {} {}", self.regs, self.sregs, self.flags) } } diff --git a/src/interpreter/interpreter.rs b/src/interpreter/interpreter.rs index bc1aa61..2ea4713 100644 --- a/src/interpreter/interpreter.rs +++ b/src/interpreter/interpreter.rs @@ -7,6 +7,7 @@ use crate::{ computer::{CarryUsage, RotationDirection}, interrupt::Mess1, memory::Memory, + register::SegmentRegister, }, operands::{Byte, ImmediateOperand, ModRmTarget, Word}, }; @@ -67,14 +68,14 @@ impl Interpreter { } pub fn interpret(&mut self) -> Result<(), InterpreterError> { - let mut ip = Self::find_instruction(&self.instructions, 0) + let mut ip = Self::find_instruction(&self.instructions, 0, &self.computer.sregs) .ok_or(InterpreterError::InstructionNotFound(0))?; while let Some(cur_instr) = ip.next() { log::info!( "{} IP({:04x})\t {:<32}", self.computer, - cur_instr.start, + cur_instr.addr, cur_instr.opcode.to_string(), ); @@ -400,49 +401,84 @@ impl Interpreter { _ => panic!("unreachable"), }; if flag { - Self::ip_jump(&self.instructions, &mut ip, offset); + Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset); } } /* * Long jumps and calls */ - Mnemonic::JMP_p(_) => { - todo!() + Mnemonic::JMP_p(ptr) => { + self.computer.sregs.cs = ptr.segment; + Self::ip_jump( + &self.instructions, + &mut ip, + &self.computer.sregs, + ptr.offset.into(), + ); } - Mnemonic::JMP_Mp(_) => { - todo!() + Mnemonic::JMP_Mp(ptr) => { + Self::ip_jump( + &self.instructions, + &mut ip, + &self.computer.sregs, + ptr.word.into(), + ); } Mnemonic::JMP_Mod(target) => match target { ModRmTarget::Memory(idx) => Self::ip_jump( &self.instructions, &mut ip, + &self.computer.sregs, self.computer.memory.read(&self.computer.regs, idx).into(), ), ModRmTarget::Register(register) => Self::ip_jump( &self.instructions, &mut ip, + &self.computer.sregs, self.computer.regs.read(register).into(), ), }, - Mnemonic::CALL_p(_) => todo!(), + Mnemonic::CALL_p(ptr) => { + if let Some(next_instr) = ip.next() { + self.computer.push_stack(next_instr.addr.into())?; + } + self.computer.sregs.cs = ptr.segment; + Self::ip_jump( + &self.instructions, + &mut ip, + &self.computer.sregs, + ptr.offset.into(), + ); + } Mnemonic::CALL_v(offset) => { if let Some(next_instr) = ip.next() { - self.computer.push_stack(next_instr.start.into())?; + self.computer.push_stack(next_instr.addr.into())?; } - Self::ip_jump(&self.instructions, &mut ip, offset); + Self::ip_jump(&self.instructions, &mut ip, &self.computer.sregs, offset); } Mnemonic::CALL_Mod(target) => { if let Some(next_instr) = ip.next() { - self.computer.push_stack(next_instr.start.into())?; + self.computer.push_stack(next_instr.addr.into())?; } Self::ip_jump( &self.instructions, &mut ip, + &self.computer.sregs, self.computer.read_modrm(target).into(), ); } - Mnemonic::CALL_Mp(_) => todo!(), + Mnemonic::CALL_Mp(ptr) => { + if let Some(next_instr) = ip.next() { + self.computer.push_stack(next_instr.addr.into())?; + } + Self::ip_jump( + &self.instructions, + &mut ip, + &self.computer.sregs, + ptr.word.into(), + ); + } /* * Test @@ -572,7 +608,12 @@ impl Interpreter { */ Mnemonic::RET => { let offset = self.computer.pop_stack()?; - Self::ip_jump(&self.instructions, &mut ip, offset as usize); + Self::ip_jump( + &self.instructions, + &mut ip, + &self.computer.sregs, + offset as usize, + ); } /* @@ -781,11 +822,12 @@ impl Interpreter { /// better idea so far. fn find_instruction<'a>( items: &'a Vec, - addr: usize, + ip_addr: usize, + sregs: &SegmentRegister, ) -> Option> { items .iter() - .position(|i| i.start == addr) + .position(|instruction| instruction.addr == ip_addr + (sregs.cs * 16) as usize) .map(|index| items[index..].iter()) } @@ -793,9 +835,10 @@ impl Interpreter { fn ip_jump<'a>( instructions: &'a Vec, ip: &mut InstructionPointer<'a>, + sregs: &SegmentRegister, offset: usize, ) { - if let Some(next_instr) = Self::find_instruction(&instructions, offset) { + if let Some(next_instr) = Self::find_instruction(&instructions, offset, sregs) { *ip = next_instr; } } diff --git a/src/interpreter/memory.rs b/src/interpreter/memory.rs index 84f7033..a2a398c 100644 --- a/src/interpreter/memory.rs +++ b/src/interpreter/memory.rs @@ -2,21 +2,24 @@ use crate::operands::{Byte, Displacement, ImmediateOperand, MemoryIndex, Word}; use super::interpreter::InterpreterError; +/// 2*20 = 1MiB +const MEMORY_SIZE: usize = 1048576; + #[derive(Debug, Clone, Copy)] pub struct Memory { - memory: [Byte; Word::MAX as usize], + memory: [Byte; MEMORY_SIZE as usize], } impl Memory { pub fn new() -> Self { Self { - memory: [0; Word::MAX as usize], + memory: [0; MEMORY_SIZE as usize], } } /// Safely writes a [`Word`] into an index of memory. pub fn write_raw(&mut self, idx: Word, val: Word) -> Result<(), InterpreterError> { - if idx + 1 > Word::MAX { + if (idx + 1) as usize > MEMORY_SIZE { return Err(InterpreterError::MemoryOutOfBound(idx)); } else { let [low, high] = val.to_le_bytes(); diff --git a/src/interpreter/register.rs b/src/interpreter/register.rs index 38385e1..e60fc43 100644 --- a/src/interpreter/register.rs +++ b/src/interpreter/register.rs @@ -162,3 +162,32 @@ gen_regs!(AX); gen_regs!(BX); gen_regs!(CX); gen_regs!(DX); + +#[derive(Debug, Clone, Copy)] +pub struct SegmentRegister { + pub ds: Word, + pub es: Word, + pub ss: Word, + pub cs: Word, +} + +impl SegmentRegister { + pub fn new() -> Self { + Self { + ds: 0, + es: 0, + ss: 0, + cs: 0, + } + } +} + +impl fmt::Display for SegmentRegister { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "DS({}) ES({}) SS({}) CS({})", + self.ds, self.es, self.ss, self.cs + ) + } +} diff --git a/src/operands.rs b/src/operands.rs index e506b37..c2bbf92 100644 --- a/src/operands.rs +++ b/src/operands.rs @@ -567,7 +567,7 @@ pub struct Pointer16 { impl std::fmt::Display for Pointer16 { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "ptr [{:#04x}]", self.word) + write!(f, "ptr word [{:#04x}]", self.word) } }