Files
8086-rs/src/instructions.rs
Marco Thomas a25e0a3890 ft: fixed modrm target calculation
While implementing some more mnemonics and testing
them, it was clear that modrm parsing was wrong.
Now reg to reg and immediates, together with
GPR1 interpretation should work as expected.

GPR1 interpretation can currently not be merged
into the modrm function, as with the current
abstraction the REG needs to select the correct
mnemonic, for which we need to also know the second
operand, which will only be parsed afterwards.
But this will be incorporated at some point, this
just marks the first working state.
2025-05-13 12:07:22 +09:00

385 lines
11 KiB
Rust

use core::fmt;
// b: 8, w: 16, v: 16 -> i just treat v and w the same, if nothing blows up
#[allow(non_camel_case_types)]
pub type b = u8;
#[allow(non_camel_case_types)]
pub type w = u16;
#[derive(Debug, Clone)]
#[allow(dead_code)]
/// A single 'line' of executable ASM is called an Instruction, which
/// contains the `Opcode` that will be executed, alongside its starting offset
/// and the raw parsed bytes
pub struct Instruction {
pub start: usize, // location of the instruction start
pub raw: Vec<u8>, // raw value of instruction
pub opcode: Mnemonic, // actual instruction
}
impl Instruction {
pub fn new() -> Self {
Instruction {
start: 0,
raw: Vec::new(),
opcode: Mnemonic::NOP(),
}
}
}
impl fmt::Display for Instruction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:04x}: ", self.start).unwrap();
for b in self.raw.iter() {
write!(f, "{:02x}", b).unwrap();
}
write!(f, "\t{}", self.opcode)
}
}
#[derive(Debug, Clone)]
#[allow(dead_code, non_camel_case_types)]
/// All possible opcode variantions.
// XXX: convert this copy and paste horror in a proc macro like
// enum Opcode {
// #[derive(default_variations)]
// ADD,
// ...
// }
// which then add all variants and also create the matching logic for
// src/disasm.rs decode_instructions()
pub enum Mnemonic {
NOP(),
// ADD
ADD_FromReg(ModRmTarget, Register),
ADD_ToReg(ModRmTarget, Register),
ADD_Ib(ModRmTarget, ImmediateByte),
ADD_Iv(ModRmTarget, ImmediateWord),
ADD_ALIb(ImmediateByte),
ADD_AXIv(ImmediateWord),
// PUSH
PUSH_R(Register),
PUSH_S(SegmentRegister),
// POP
POP_S(SegmentRegister),
POP_R(Register),
// OR
OR_FromReg(ModRmTarget, Register),
OR_ToReg(ModRmTarget, Register),
OR_Ib(ModRmTarget, ImmediateByte),
OR_Iv(ModRmTarget, ImmediateWord),
OR_ALIb(ImmediateByte),
OR_AXIv(ImmediateWord),
// ADC
ADC_FromReg(ModRmTarget, Register),
ADC_ToReg(ModRmTarget, Register),
ADC_Ib(ModRmTarget, ImmediateByte),
ADC_Iv(ModRmTarget, ImmediateWord),
ADC_ALIb(ImmediateByte),
ADC_AXIv(ImmediateWord),
// SBB
SBB_FromReg(ModRmTarget, Register),
SBB_ToReg(ModRmTarget, Register),
SBB_Ib(ModRmTarget, ImmediateByte),
SBB_Iv(ModRmTarget, ImmediateWord),
SBB_ALIb(ImmediateByte),
SBB_AXIv(ImmediateWord),
// AND
AND_FromReg(ModRmTarget, Register),
AND_ToReg(ModRmTarget, Register),
AND_Ib(ModRmTarget, ImmediateByte),
AND_Iv(ModRmTarget, ImmediateWord),
AND_ALIb(ImmediateByte),
AND_AXIv(ImmediateWord),
// Override
OVERRIDE(SegmentRegister),
// Decimal Adjustment
DAA,
DAS,
AAA,
AAS,
// SUB
SUB_FromReg(ModRmTarget, Register),
SUB_ToReg(ModRmTarget, Register),
SUB_Ib(ModRmTarget, ImmediateByte),
SUB_Iv(ModRmTarget, ImmediateWord),
SUB_ALIb(ImmediateByte),
SUB_AXIv(ImmediateWord),
// XOR
XOR_FromReg(ModRmTarget, Register),
XOR_ToReg(ModRmTarget, Register),
XOR_Ib(ModRmTarget, ImmediateByte),
XOR_Iv(ModRmTarget, ImmediateWord),
XOR_ALIb(ImmediateByte),
XOR_AXIv(ImmediateWord),
// CMP
CMP_FromReg(ModRmTarget, Register),
CMP_ToReg(ModRmTarget, Register),
CMP_Ib(ModRmTarget, ImmediateByte),
CMP_Iv(ModRmTarget, ImmediateWord),
CMP_ALIb(ImmediateByte),
CMP_AXIv(ImmediateWord),
// INC
INC(Register),
// DEC
DEC(Register),
// Jumps
JO(ImmediateByte),
JNO(ImmediateByte),
JB(ImmediateByte),
JNB(ImmediateByte),
JZ(ImmediateByte),
JNZ(ImmediateByte),
JBE(ImmediateByte),
JA(ImmediateByte),
JS(ImmediateByte),
JNS(ImmediateByte),
JPE(ImmediateByte),
JPO(ImmediateByte),
JL(ImmediateByte),
JGE(ImmediateByte),
JLE(ImmediateByte),
JG(ImmediateByte),
// TEST
TEST(ModRmTarget, Register),
//XHCG
XHCG(ModRmTarget, Register),
// MOV
MOV_FromReg(ModRmTarget, Register),
MOV_ToReg(ModRmTarget, Register),
MOV_FromSReg(ModRmTarget, SegmentRegister),
MOV_ToSReg(ModRmTarget, SegmentRegister),
MOV_BXIv(ImmediateWord),
// LEA
LEA(ModRmTarget, Register),
// INT
INT(ImmediateByte),
}
impl fmt::Display for Mnemonic {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::INT(byte) => write!(f, "INT, {:x}", byte),
Self::ADD_FromReg(mem, reg) => write!(f, "ADD {}, {}", mem, reg),
Self::ADD_ToReg(mem, reg) => write!(f, "ADD {}, {}", reg, mem),
Self::CMP_Iv(mem, imm) => write!(f, "CMP {}, {:04x}", mem, imm),
Self::LEA(mem, reg) => write!(f, "LEA {}, {}", reg, mem),
Self::MOV_BXIv(word) => write!(f, "MOV BX, {:04x}", word),
Self::XOR_FromReg(mem, reg) => write!(f, "XOR {}, {}", mem, reg),
_ => write!(f, "??? ??, ??"),
}
}
}
/// Registers of a 8086 processor
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub enum Register {
// 8 bit
// low bytes
AL,
CL,
DL,
BL,
// high bytes
AH,
CH,
DH,
BH,
// 16 bit
AX, // accumulator
CX, // counter
DX, // data
BX, // base
SP, // stack pointer
BP, // base pointer
SI, // source index
DI, // base index
}
/// Selector for Register or Segment Register
pub type RegisterId = u8;
#[allow(dead_code)]
impl Register {
/// Find the register corresponding to the 8086 bytecode ID
pub fn by_id(id: OperandSize) -> Self {
match id {
OperandSize::Byte(b) => match b {
0b000 => Self::AL,
0b001 => Self::CL,
0b010 => Self::DL,
0b011 => Self::BL,
0b100 => Self::AH,
0b101 => Self::CH,
0b110 => Self::DH,
0b111 => Self::BH,
_ => panic!("Invalid 8bit register ID encountered"),
},
OperandSize::Word(w) => match w {
0b000 => Self::AX,
0b001 => Self::CX,
0b010 => Self::DX,
0b011 => Self::BX,
0b100 => Self::SP,
0b101 => Self::BP,
0b110 => Self::SI,
0b111 => Self::DI,
_ => panic!("Invalid 16bit register ID encountered"),
},
}
}
}
impl fmt::Display for Register {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::AX => write!(f, "AX"),
Self::BX => write!(f, "BX"),
Self::CX => write!(f, "CX"),
Self::DX => write!(f, "DX"),
Self::AH => write!(f, "AH"),
Self::AL => write!(f, "AL"),
Self::BL => write!(f, "BL"),
Self::BH => write!(f, "BH"),
Self::CH => write!(f, "CH"),
Self::CL => write!(f, "CL"),
Self::DH => write!(f, "DH"),
Self::DL => write!(f, "DL"),
Self::DI => write!(f, "DI"),
Self::SI => write!(f, "SI"),
Self::BP => write!(f, "BP"),
Self::SP => write!(f, "SP"),
}
}
}
/// Segment Registers of a 8086 processor
#[derive(Debug, Clone)]
#[allow(dead_code)]
pub enum SegmentRegister {
DS,
ES,
SS,
CS,
}
#[allow(dead_code)]
impl SegmentRegister {
/// Find the SRegister corresponding to the 8086 bytecode ID
pub fn by_id(id: u8) -> Self {
match id {
0x00 => Self::ES,
0x01 => Self::CS,
0x10 => Self::SS,
0x11 => Self::DS,
_ => panic!("Invalid segment register ID encountered"),
}
}
}
impl fmt::Display for SegmentRegister {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::DS => write!(f, "DS"),
Self::ES => write!(f, "ES"),
Self::SS => write!(f, "SS"),
Self::CS => write!(f, "CS"),
}
}
}
/// An immediate byte value for an instruction.
#[derive(Debug, Clone)]
pub struct ImmediateByte(pub b);
/// An immediate word value for an instruction
#[derive(Debug, Clone)]
pub struct ImmediateWord(pub w);
macro_rules! impl_display_and_lowerhex {
($name:ident) => {
impl std::fmt::Display for $name {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl std::fmt::LowerHex for $name {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::LowerHex::fmt(&self.0, f)
}
}
};
}
impl_display_and_lowerhex!(ImmediateByte);
impl_display_and_lowerhex!(ImmediateWord);
#[derive(Debug, Clone)]
/// ModRM byte can either target a memory location or some register
pub enum ModRmTarget {
Memory(MemoryIndex),
Register(Register),
}
impl std::fmt::Display for ModRmTarget {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Memory(idx) => write!(f, "{}", idx),
Self::Register(reg) => write!(f, "{}", reg),
}
}
}
/// A memory index operand is usually created by ModRM bytes or words.
/// e.g. [bx+si]
#[derive(Debug, Clone)]
pub struct MemoryIndex {
pub base: Option<Register>,
pub index: Option<Register>,
pub displacement: Option<OperandSize>,
}
impl fmt::Display for MemoryIndex {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match &self.base {
Some(base) => match &self.index {
Some(index) => match &self.displacement {
Some(displacement) => write!(f, "[{} + {} + {}]", base, index, displacement),
None => write!(f, "[{} + {}]", base, index),
},
None => match &self.displacement {
Some(displacement) => write!(f, "[{} + {}]", base, displacement),
None => write!(f, "[{} + 0]", base),
},
},
None => match &self.index {
Some(index) => match &self.displacement {
Some(displacement) => write!(f, "{} + {}", index, displacement),
None => write!(f, "[{} + 0]", index),
},
None => panic!("Invalid MemoryIndex encountered"),
},
}
}
}
#[derive(Debug, Clone)]
#[allow(dead_code)]
/// Can be used to encode either byte or word operands
pub enum OperandSize {
Byte(u8),
Word(u16),
}
impl fmt::Display for OperandSize {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Byte(byte) => write!(f, "{}", byte),
Self::Word(word) => write!(f, "{}", word),
}
}
}