ft: add modrm parsing

This commit is contained in:
2025-05-07 22:46:58 +09:00
parent 2af4578c8b
commit 849895a437
3 changed files with 550 additions and 63 deletions

View File

@@ -2,6 +2,7 @@ use core::fmt;
use std::{fs::File, io::Read, process::exit};
use crate::aout::Aout;
use crate::instructions::MemoryIndex;
use crate::{
Args,
instructions::{ImmediateByte, ImmediateWord, Instruction, MetaInstruction, Register},
@@ -63,7 +64,7 @@ fn decode_instructions(aout: &Aout) -> Result<Vec<MetaInstruction>, DisasmError>
// 3. read as many bytes as this instruction needs (registers, immidiates, ...)
// repeat until no bytes left
let instructions = Vec::new();
let mut instructions = Vec::new();
let mut offset = 0;
let text = &aout.text;
@@ -73,19 +74,29 @@ fn decode_instructions(aout: &Aout) -> Result<Vec<MetaInstruction>, DisasmError>
let opcode = text[offset];
match opcode {
// 0x00 => {} // ADD
// ADD
0x00 => {
let (mem_index, mut raw) = parse_modrm_byte(&mut offset, text);
let reg = parse_byte(&mut offset, text);
instr.size = 2 + raw.len();
instr.raw = Vec::from([opcode]);
instr.raw.append(&mut raw);
instr.raw.push(reg);
instr.instruction = Instruction::ADD_EbGb(mem_index, Register::by_id(reg));
}
// INT
0xCD => {
instr.take_n_bytes(2, &mut offset, text);
instr.instruction = Instruction::INT(ImmediateByte(instr.raw[1]));
let byte = parse_byte(&mut offset, text);
instr.size = 2;
instr.raw = Vec::from([opcode, byte]);
instr.instruction = Instruction::INT(ImmediateByte(byte));
}
// MOV
0xBB => {
instr.take_n_bytes(3, &mut offset, text);
instr.instruction = Instruction::MOV_RI(
Register::BX,
ImmediateWord(u16::from_le_bytes([instr.raw[1], instr.raw[2]])),
);
let (word, raw) = parse_word(&mut offset, text);
instr.size = 3;
instr.raw = Vec::from([opcode, raw.0, raw.1]);
instr.instruction = Instruction::MOV_BXIv(Register::BX, ImmediateWord(word));
}
_ => {
eprintln!("Encountered unknown instruction '0x{:x}'", opcode);
@@ -96,8 +107,123 @@ fn decode_instructions(aout: &Aout) -> Result<Vec<MetaInstruction>, DisasmError>
};
println!("{}", instr);
// dbg!(&instr);
instructions.push(instr);
}
Ok(instructions)
}
/// Parse a single byte of binary, return it and advance the offset.
pub fn parse_byte(offset: &mut usize, text: &Vec<u8>) -> u8 {
*offset += 1;
let byte = text[*offset];
*offset += 1;
byte
}
/// Parse a single word of binary, return it and advance the offset.
pub fn parse_word(offset: &mut usize, text: &Vec<u8>) -> (u16, (u8, u8)) {
*offset += 1;
let byte1 = text[*offset];
let byte2 = text[*offset + 1];
*offset += 2;
(u16::from_le_bytes([byte1, byte2]), (byte1, byte2))
}
/// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset.
pub fn parse_modrm_byte(offset: &mut usize, text: &Vec<u8>) -> (MemoryIndex, Vec<u8>) {
// Calculate ModRM byte with bitmask
let opcode = text[*offset];
let modulo = opcode >> 6;
let reg = (opcode >> 3) & 7;
let rm = opcode & 7;
let mut displacement_raw = Vec::new();
let displacement = match modulo {
0 => {
if rm == 6 {
// XXX: handle special case
panic!("Handle modulo == 0, rm == 6");
}
None
}
1 => {
*offset += 2; // one additional byte was read
let byte = parse_byte(offset, text);
displacement_raw.push(byte);
log::debug!("Additional byte during ModRM parsing was read.");
Some(Displacement::Byte(byte))
}
2 => {
*offset += 3; // two additional bytes (word) was read
let (word, raw) = parse_word(offset, text);
displacement_raw.push(raw.0);
displacement_raw.push(raw.1);
log::debug!("Additional two bytes during ModRM parsing was read.");
Some(Displacement::Word(word))
}
3 => panic!("TODO: handle modulo == 3"),
_ => panic!("Invalid ModRM byte encountered"),
};
let index = match rm {
0 => MemoryIndex {
base: Some(Register::BX),
index: Some(Register::SI),
displacement,
},
1 => MemoryIndex {
base: Some(Register::BX),
index: Some(Register::DI),
displacement,
},
2 => MemoryIndex {
base: Some(Register::BP),
index: Some(Register::SI),
displacement,
},
3 => MemoryIndex {
base: Some(Register::BP),
index: Some(Register::DI),
displacement,
},
4 => MemoryIndex {
base: None,
index: Some(Register::SI),
displacement,
},
5 => MemoryIndex {
base: None,
index: Some(Register::DI),
displacement,
},
6 => MemoryIndex {
base: Some(Register::BP),
index: None,
displacement,
},
7 => MemoryIndex {
base: Some(Register::BX),
index: None,
displacement,
},
_ => panic!("Invalid ModRM byte encountered"),
};
return (index, displacement_raw);
}
#[derive(Debug)]
#[allow(dead_code)]
/// Displacement for ModRM
pub enum Displacement {
Byte(u8),
Word(u16),
}
impl fmt::Display for Displacement {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Byte(byte) => write!(f, "{}", byte),
Self::Word(word) => write!(f, "{}", word),
}
}
}