ft: add modrm parsing

This commit is contained in:
2025-05-07 22:46:58 +09:00
parent 2af4578c8b
commit 849895a437
3 changed files with 550 additions and 63 deletions

View File

@@ -1,5 +1,7 @@
use core::fmt;
use crate::disasm::Displacement;
pub type MemAddress = u8;
#[derive(Debug)]
@@ -23,18 +25,6 @@ impl MetaInstruction {
instruction: Instruction::NOP(),
}
}
/// Parse n bytes from text section and advance offet.
/// Used to get the operands.
pub fn take_n_bytes(&mut self, size: usize, offset: &mut usize, text: &Vec<u8>) {
self.size = size;
self.raw = text[*offset as usize..]
.iter()
.take(size)
.cloned()
.collect();
*offset += size;
}
}
impl fmt::Display for MetaInstruction {
@@ -47,26 +37,45 @@ impl fmt::Display for MetaInstruction {
}
}
#[derive(Debug)]
pub struct MemoryIndex {
pub base: Option<Register>,
pub index: Option<Register>,
pub displacement: Option<Displacement>,
}
impl fmt::Display for MemoryIndex {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match &self.base {
Some(base) => match &self.index {
Some(index) => match &self.displacement {
Some(displacement) => write!(f, "[{}+{}+{}]", base, index, displacement),
None => write!(f, "[{}+{}]", base, index),
},
None => match &self.displacement {
Some(displacement) => write!(f, "[{}+{}]", base, displacement),
None => write!(f, "[{}]", base),
},
},
None => match &self.index {
Some(index) => match &self.displacement {
Some(displacement) => write!(f, "{}+{}", index, displacement),
None => write!(f, "[{}]", index),
},
None => panic!("Invalid MemoryIndex encountered"),
},
}
}
}
#[derive(Debug)]
#[allow(dead_code, non_camel_case_types)]
pub enum Instruction {
NOP(),
// ADD
ADD_RM(Register, Memory),
ADD_MR(Memory, Register),
ADD_RR(Register, Register),
ADD_MI(Memory, ImmediateByte),
ADD_RI(Register, ImmediateByte),
ADD_EbGb(MemoryIndex, Register),
// MOV
MOV_RM(Register, Memory),
MOV_MR(Memory, Register),
MOV_RR(Register, Register),
MOV_MI(Memory, ImmediateByte),
MOV_RI(Register, ImmediateWord),
MOV_SM(SRegister, Memory),
MOV_MS(Memory, SRegister),
MOV_RS(Register, SRegister),
MOV_SR(SRegister, Register),
MOV_BXIv(Register, ImmediateWord),
// INT
INT(ImmediateByte),
}
@@ -75,7 +84,8 @@ impl fmt::Display for Instruction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::INT(byte) => write!(f, "INT, {:x}", byte),
Self::MOV_RI(reg, word) => write!(f, "MOV {}, {:04x}", reg, word),
Self::ADD_EbGb(mem, reg) => write!(f, "ADD {}, {}", mem, reg),
Self::MOV_BXIv(reg, word) => write!(f, "MOV {}, {:04x}", reg, word),
_ => write!(f, "display not yet implemented"),
}
}
@@ -84,38 +94,43 @@ impl fmt::Display for Instruction {
// Types for operand encoding
#[derive(Debug)]
pub struct Memory(pub MemAddress);
// b: 8, w: 16, v: 16 -> i just treat v and w the same, if nothing blows up
#[derive(Debug)]
pub struct ImmediateByte(pub u8);
#[derive(Debug)]
pub struct ImmediateWord(pub u16);
// ... and the displays for all of them
impl fmt::Display for Memory {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
macro_rules! impl_display {
($name:ident) => {
impl std::fmt::Display for $name {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
};
}
impl fmt::Display for ImmediateByte {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl fmt::LowerHex for ImmediateByte {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::LowerHex::fmt(&self.0, f)
}
}
impl fmt::Display for ImmediateWord {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl fmt::LowerHex for ImmediateWord {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::LowerHex::fmt(&self.0, f)
}
macro_rules! impl_display_and_lowerhex {
($name:ident) => {
impl std::fmt::Display for $name {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}
impl std::fmt::LowerHex for $name {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::LowerHex::fmt(&self.0, f)
}
}
};
}
impl_display!(Memory);
impl_display_and_lowerhex!(ImmediateByte);
impl_display_and_lowerhex!(ImmediateWord);
/// Registers of a 8086 processor
#[derive(Debug)]
#[allow(dead_code)]
@@ -138,6 +153,32 @@ pub enum Register {
SP,
}
#[allow(dead_code)]
impl Register {
/// Find the register corresponding to the 8086 bytecode ID
pub fn by_id(id: u8) -> Self {
match id {
0x00 => Self::AL,
0x01 => Self::CL,
0x02 => Self::DL,
0x03 => Self::BL,
0x04 => Self::AH,
0x05 => Self::CH,
0x06 => Self::DH,
0x07 => Self::BH,
0x10 => Self::AX,
0x11 => Self::CX,
0x12 => Self::DX,
0x13 => Self::BX,
0x14 => Self::SP,
0x15 => Self::BP,
0x16 => Self::SI,
0x17 => Self::DI,
_ => panic!("Invalid register ID encountered"),
}
}
}
impl fmt::Display for Register {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
@@ -161,17 +202,31 @@ impl fmt::Display for Register {
}
}
/// SRegisters of a 8086 processor
/// Segment Registers of a 8086 processor
#[derive(Debug)]
#[allow(dead_code)]
pub enum SRegister {
pub enum SegmentRegister {
DS,
ES,
SS,
CS,
}
impl fmt::Display for SRegister {
#[allow(dead_code)]
impl SegmentRegister {
/// Find the SRegister corresponding to the 8086 bytecode ID
pub fn by_id(id: u8) -> Self {
match id {
0x30 => Self::ES,
0x31 => Self::CS,
0x32 => Self::SS,
0x33 => Self::DS,
_ => panic!("Invalid segment register ID encountered"),
}
}
}
impl fmt::Display for SegmentRegister {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::DS => write!(f, "DS"),