ft: add modrm parsing
This commit is contained in:
146
src/disasm.rs
146
src/disasm.rs
@@ -2,6 +2,7 @@ use core::fmt;
|
||||
use std::{fs::File, io::Read, process::exit};
|
||||
|
||||
use crate::aout::Aout;
|
||||
use crate::instructions::MemoryIndex;
|
||||
use crate::{
|
||||
Args,
|
||||
instructions::{ImmediateByte, ImmediateWord, Instruction, MetaInstruction, Register},
|
||||
@@ -63,7 +64,7 @@ fn decode_instructions(aout: &Aout) -> Result<Vec<MetaInstruction>, DisasmError>
|
||||
// 3. read as many bytes as this instruction needs (registers, immidiates, ...)
|
||||
// repeat until no bytes left
|
||||
|
||||
let instructions = Vec::new();
|
||||
let mut instructions = Vec::new();
|
||||
let mut offset = 0;
|
||||
|
||||
let text = &aout.text;
|
||||
@@ -73,19 +74,29 @@ fn decode_instructions(aout: &Aout) -> Result<Vec<MetaInstruction>, DisasmError>
|
||||
|
||||
let opcode = text[offset];
|
||||
match opcode {
|
||||
// 0x00 => {} // ADD
|
||||
// ADD
|
||||
0x00 => {
|
||||
let (mem_index, mut raw) = parse_modrm_byte(&mut offset, text);
|
||||
let reg = parse_byte(&mut offset, text);
|
||||
instr.size = 2 + raw.len();
|
||||
instr.raw = Vec::from([opcode]);
|
||||
instr.raw.append(&mut raw);
|
||||
instr.raw.push(reg);
|
||||
instr.instruction = Instruction::ADD_EbGb(mem_index, Register::by_id(reg));
|
||||
}
|
||||
// INT
|
||||
0xCD => {
|
||||
instr.take_n_bytes(2, &mut offset, text);
|
||||
instr.instruction = Instruction::INT(ImmediateByte(instr.raw[1]));
|
||||
let byte = parse_byte(&mut offset, text);
|
||||
instr.size = 2;
|
||||
instr.raw = Vec::from([opcode, byte]);
|
||||
instr.instruction = Instruction::INT(ImmediateByte(byte));
|
||||
}
|
||||
// MOV
|
||||
0xBB => {
|
||||
instr.take_n_bytes(3, &mut offset, text);
|
||||
instr.instruction = Instruction::MOV_RI(
|
||||
Register::BX,
|
||||
ImmediateWord(u16::from_le_bytes([instr.raw[1], instr.raw[2]])),
|
||||
);
|
||||
let (word, raw) = parse_word(&mut offset, text);
|
||||
instr.size = 3;
|
||||
instr.raw = Vec::from([opcode, raw.0, raw.1]);
|
||||
instr.instruction = Instruction::MOV_BXIv(Register::BX, ImmediateWord(word));
|
||||
}
|
||||
_ => {
|
||||
eprintln!("Encountered unknown instruction '0x{:x}'", opcode);
|
||||
@@ -96,8 +107,123 @@ fn decode_instructions(aout: &Aout) -> Result<Vec<MetaInstruction>, DisasmError>
|
||||
};
|
||||
|
||||
println!("{}", instr);
|
||||
// dbg!(&instr);
|
||||
instructions.push(instr);
|
||||
}
|
||||
|
||||
Ok(instructions)
|
||||
}
|
||||
|
||||
/// Parse a single byte of binary, return it and advance the offset.
|
||||
pub fn parse_byte(offset: &mut usize, text: &Vec<u8>) -> u8 {
|
||||
*offset += 1;
|
||||
let byte = text[*offset];
|
||||
*offset += 1;
|
||||
byte
|
||||
}
|
||||
/// Parse a single word of binary, return it and advance the offset.
|
||||
pub fn parse_word(offset: &mut usize, text: &Vec<u8>) -> (u16, (u8, u8)) {
|
||||
*offset += 1;
|
||||
let byte1 = text[*offset];
|
||||
let byte2 = text[*offset + 1];
|
||||
*offset += 2;
|
||||
(u16::from_le_bytes([byte1, byte2]), (byte1, byte2))
|
||||
}
|
||||
/// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset.
|
||||
pub fn parse_modrm_byte(offset: &mut usize, text: &Vec<u8>) -> (MemoryIndex, Vec<u8>) {
|
||||
// Calculate ModRM byte with bitmask
|
||||
let opcode = text[*offset];
|
||||
let modulo = opcode >> 6;
|
||||
let reg = (opcode >> 3) & 7;
|
||||
let rm = opcode & 7;
|
||||
|
||||
let mut displacement_raw = Vec::new();
|
||||
let displacement = match modulo {
|
||||
0 => {
|
||||
if rm == 6 {
|
||||
// XXX: handle special case
|
||||
panic!("Handle modulo == 0, rm == 6");
|
||||
}
|
||||
None
|
||||
}
|
||||
1 => {
|
||||
*offset += 2; // one additional byte was read
|
||||
let byte = parse_byte(offset, text);
|
||||
displacement_raw.push(byte);
|
||||
log::debug!("Additional byte during ModRM parsing was read.");
|
||||
Some(Displacement::Byte(byte))
|
||||
}
|
||||
2 => {
|
||||
*offset += 3; // two additional bytes (word) was read
|
||||
let (word, raw) = parse_word(offset, text);
|
||||
displacement_raw.push(raw.0);
|
||||
displacement_raw.push(raw.1);
|
||||
log::debug!("Additional two bytes during ModRM parsing was read.");
|
||||
Some(Displacement::Word(word))
|
||||
}
|
||||
3 => panic!("TODO: handle modulo == 3"),
|
||||
_ => panic!("Invalid ModRM byte encountered"),
|
||||
};
|
||||
|
||||
let index = match rm {
|
||||
0 => MemoryIndex {
|
||||
base: Some(Register::BX),
|
||||
index: Some(Register::SI),
|
||||
displacement,
|
||||
},
|
||||
1 => MemoryIndex {
|
||||
base: Some(Register::BX),
|
||||
index: Some(Register::DI),
|
||||
displacement,
|
||||
},
|
||||
2 => MemoryIndex {
|
||||
base: Some(Register::BP),
|
||||
index: Some(Register::SI),
|
||||
displacement,
|
||||
},
|
||||
3 => MemoryIndex {
|
||||
base: Some(Register::BP),
|
||||
index: Some(Register::DI),
|
||||
displacement,
|
||||
},
|
||||
4 => MemoryIndex {
|
||||
base: None,
|
||||
index: Some(Register::SI),
|
||||
displacement,
|
||||
},
|
||||
5 => MemoryIndex {
|
||||
base: None,
|
||||
index: Some(Register::DI),
|
||||
displacement,
|
||||
},
|
||||
6 => MemoryIndex {
|
||||
base: Some(Register::BP),
|
||||
index: None,
|
||||
displacement,
|
||||
},
|
||||
7 => MemoryIndex {
|
||||
base: Some(Register::BX),
|
||||
index: None,
|
||||
displacement,
|
||||
},
|
||||
_ => panic!("Invalid ModRM byte encountered"),
|
||||
};
|
||||
|
||||
return (index, displacement_raw);
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
/// Displacement for ModRM
|
||||
pub enum Displacement {
|
||||
Byte(u8),
|
||||
Word(u16),
|
||||
}
|
||||
|
||||
impl fmt::Display for Displacement {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::Byte(byte) => write!(f, "{}", byte),
|
||||
Self::Word(word) => write!(f, "{}", word),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
use core::fmt;
|
||||
|
||||
use crate::disasm::Displacement;
|
||||
|
||||
pub type MemAddress = u8;
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -23,18 +25,6 @@ impl MetaInstruction {
|
||||
instruction: Instruction::NOP(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse n bytes from text section and advance offet.
|
||||
/// Used to get the operands.
|
||||
pub fn take_n_bytes(&mut self, size: usize, offset: &mut usize, text: &Vec<u8>) {
|
||||
self.size = size;
|
||||
self.raw = text[*offset as usize..]
|
||||
.iter()
|
||||
.take(size)
|
||||
.cloned()
|
||||
.collect();
|
||||
*offset += size;
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MetaInstruction {
|
||||
@@ -47,26 +37,45 @@ impl fmt::Display for MetaInstruction {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MemoryIndex {
|
||||
pub base: Option<Register>,
|
||||
pub index: Option<Register>,
|
||||
pub displacement: Option<Displacement>,
|
||||
}
|
||||
|
||||
impl fmt::Display for MemoryIndex {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match &self.base {
|
||||
Some(base) => match &self.index {
|
||||
Some(index) => match &self.displacement {
|
||||
Some(displacement) => write!(f, "[{}+{}+{}]", base, index, displacement),
|
||||
None => write!(f, "[{}+{}]", base, index),
|
||||
},
|
||||
None => match &self.displacement {
|
||||
Some(displacement) => write!(f, "[{}+{}]", base, displacement),
|
||||
None => write!(f, "[{}]", base),
|
||||
},
|
||||
},
|
||||
None => match &self.index {
|
||||
Some(index) => match &self.displacement {
|
||||
Some(displacement) => write!(f, "{}+{}", index, displacement),
|
||||
None => write!(f, "[{}]", index),
|
||||
},
|
||||
None => panic!("Invalid MemoryIndex encountered"),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code, non_camel_case_types)]
|
||||
pub enum Instruction {
|
||||
NOP(),
|
||||
// ADD
|
||||
ADD_RM(Register, Memory),
|
||||
ADD_MR(Memory, Register),
|
||||
ADD_RR(Register, Register),
|
||||
ADD_MI(Memory, ImmediateByte),
|
||||
ADD_RI(Register, ImmediateByte),
|
||||
ADD_EbGb(MemoryIndex, Register),
|
||||
// MOV
|
||||
MOV_RM(Register, Memory),
|
||||
MOV_MR(Memory, Register),
|
||||
MOV_RR(Register, Register),
|
||||
MOV_MI(Memory, ImmediateByte),
|
||||
MOV_RI(Register, ImmediateWord),
|
||||
MOV_SM(SRegister, Memory),
|
||||
MOV_MS(Memory, SRegister),
|
||||
MOV_RS(Register, SRegister),
|
||||
MOV_SR(SRegister, Register),
|
||||
MOV_BXIv(Register, ImmediateWord),
|
||||
// INT
|
||||
INT(ImmediateByte),
|
||||
}
|
||||
@@ -75,7 +84,8 @@ impl fmt::Display for Instruction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::INT(byte) => write!(f, "INT, {:x}", byte),
|
||||
Self::MOV_RI(reg, word) => write!(f, "MOV {}, {:04x}", reg, word),
|
||||
Self::ADD_EbGb(mem, reg) => write!(f, "ADD {}, {}", mem, reg),
|
||||
Self::MOV_BXIv(reg, word) => write!(f, "MOV {}, {:04x}", reg, word),
|
||||
_ => write!(f, "display not yet implemented"),
|
||||
}
|
||||
}
|
||||
@@ -84,38 +94,43 @@ impl fmt::Display for Instruction {
|
||||
// Types for operand encoding
|
||||
#[derive(Debug)]
|
||||
pub struct Memory(pub MemAddress);
|
||||
// b: 8, w: 16, v: 16 -> i just treat v and w the same, if nothing blows up
|
||||
#[derive(Debug)]
|
||||
pub struct ImmediateByte(pub u8);
|
||||
#[derive(Debug)]
|
||||
pub struct ImmediateWord(pub u16);
|
||||
|
||||
// ... and the displays for all of them
|
||||
impl fmt::Display for Memory {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
macro_rules! impl_display {
|
||||
($name:ident) => {
|
||||
impl std::fmt::Display for $name {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
impl fmt::Display for ImmediateByte {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
impl fmt::LowerHex for ImmediateByte {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fmt::LowerHex::fmt(&self.0, f)
|
||||
}
|
||||
}
|
||||
impl fmt::Display for ImmediateWord {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
impl fmt::LowerHex for ImmediateWord {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fmt::LowerHex::fmt(&self.0, f)
|
||||
}
|
||||
|
||||
macro_rules! impl_display_and_lowerhex {
|
||||
($name:ident) => {
|
||||
impl std::fmt::Display for $name {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::LowerHex for $name {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
std::fmt::LowerHex::fmt(&self.0, f)
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_display!(Memory);
|
||||
impl_display_and_lowerhex!(ImmediateByte);
|
||||
impl_display_and_lowerhex!(ImmediateWord);
|
||||
|
||||
/// Registers of a 8086 processor
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
@@ -138,6 +153,32 @@ pub enum Register {
|
||||
SP,
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl Register {
|
||||
/// Find the register corresponding to the 8086 bytecode ID
|
||||
pub fn by_id(id: u8) -> Self {
|
||||
match id {
|
||||
0x00 => Self::AL,
|
||||
0x01 => Self::CL,
|
||||
0x02 => Self::DL,
|
||||
0x03 => Self::BL,
|
||||
0x04 => Self::AH,
|
||||
0x05 => Self::CH,
|
||||
0x06 => Self::DH,
|
||||
0x07 => Self::BH,
|
||||
0x10 => Self::AX,
|
||||
0x11 => Self::CX,
|
||||
0x12 => Self::DX,
|
||||
0x13 => Self::BX,
|
||||
0x14 => Self::SP,
|
||||
0x15 => Self::BP,
|
||||
0x16 => Self::SI,
|
||||
0x17 => Self::DI,
|
||||
_ => panic!("Invalid register ID encountered"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Register {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
@@ -161,17 +202,31 @@ impl fmt::Display for Register {
|
||||
}
|
||||
}
|
||||
|
||||
/// SRegisters of a 8086 processor
|
||||
/// Segment Registers of a 8086 processor
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub enum SRegister {
|
||||
pub enum SegmentRegister {
|
||||
DS,
|
||||
ES,
|
||||
SS,
|
||||
CS,
|
||||
}
|
||||
|
||||
impl fmt::Display for SRegister {
|
||||
#[allow(dead_code)]
|
||||
impl SegmentRegister {
|
||||
/// Find the SRegister corresponding to the 8086 bytecode ID
|
||||
pub fn by_id(id: u8) -> Self {
|
||||
match id {
|
||||
0x30 => Self::ES,
|
||||
0x31 => Self::CS,
|
||||
0x32 => Self::SS,
|
||||
0x33 => Self::DS,
|
||||
_ => panic!("Invalid segment register ID encountered"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SegmentRegister {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::DS => write!(f, "DS"),
|
||||
|
||||
Reference in New Issue
Block a user