fix: cleanup structs into correct files
This commit is contained in:
266
src/disasm.rs
266
src/disasm.rs
@@ -2,13 +2,14 @@ use core::fmt;
|
||||
use std::{fs::File, io::Read, process::exit};
|
||||
|
||||
use crate::aout::Aout;
|
||||
use crate::instructions::MemoryIndex;
|
||||
use crate::instructions::{Displacement, MemoryIndex};
|
||||
use crate::{
|
||||
Args,
|
||||
instructions::{ImmediateByte, ImmediateWord, Instruction, MetaInstruction, Register},
|
||||
instructions::{ImmediateByte, ImmediateWord, Instruction, Opcode, Register},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
/// Generic errors, which are encountered during parsing.
|
||||
pub enum DisasmError {
|
||||
NoFile(Option<String>),
|
||||
IoError(std::io::Error),
|
||||
@@ -29,13 +30,12 @@ impl fmt::Display for DisasmError {
|
||||
}
|
||||
}
|
||||
|
||||
/// Disassemble the binary in `path` into a vector of instructions
|
||||
/// This function just calls all other
|
||||
pub fn disasm(args: &Args) -> Result<Vec<MetaInstruction>, DisasmError> {
|
||||
/// Disassemble the binary in `path` into a vector of instructions.
|
||||
/// Main entry point to the disassembly.
|
||||
pub fn disasm(args: &Args) -> Result<Vec<Instruction>, DisasmError> {
|
||||
let contents = path_to_buf(args)?;
|
||||
let aout = Aout::new(contents);
|
||||
|
||||
// XXX: 00 is just 0, maybe this could be a problem?
|
||||
log::debug!("{:?}", aout);
|
||||
|
||||
let instructions = decode_instructions(&aout)?;
|
||||
@@ -43,7 +43,7 @@ pub fn disasm(args: &Args) -> Result<Vec<MetaInstruction>, DisasmError> {
|
||||
Ok(instructions)
|
||||
}
|
||||
|
||||
/// Read a filepath into a buffer
|
||||
/// Read a filepath into a u8 buffer.
|
||||
fn path_to_buf(args: &Args) -> Result<Vec<u8>, DisasmError> {
|
||||
let path = args
|
||||
.path
|
||||
@@ -57,7 +57,7 @@ fn path_to_buf(args: &Args) -> Result<Vec<u8>, DisasmError> {
|
||||
}
|
||||
|
||||
/// Decode instructions from the text section of the provided binary
|
||||
fn decode_instructions(aout: &Aout) -> Result<Vec<MetaInstruction>, DisasmError> {
|
||||
fn decode_instructions(aout: &Aout) -> Result<Vec<Instruction>, DisasmError> {
|
||||
// naive approach:
|
||||
// 1. read byte
|
||||
// 2. pattern match to see which instruction it is
|
||||
@@ -65,38 +65,38 @@ fn decode_instructions(aout: &Aout) -> Result<Vec<MetaInstruction>, DisasmError>
|
||||
// repeat until no bytes left
|
||||
|
||||
let mut instructions = Vec::new();
|
||||
let mut offset = 0;
|
||||
let mut disassembler = Disassembler {
|
||||
offset: 0,
|
||||
text: aout.text.clone(),
|
||||
};
|
||||
|
||||
let text = &aout.text;
|
||||
while offset < aout.text.len() {
|
||||
let mut instr = MetaInstruction::new();
|
||||
instr.start = offset;
|
||||
while disassembler.offset < disassembler.text.len() {
|
||||
let mut instr = Instruction::new();
|
||||
instr.start = disassembler.offset;
|
||||
|
||||
let opcode = text[offset];
|
||||
let opcode = disassembler.text[disassembler.offset];
|
||||
instr.raw.push(opcode);
|
||||
match opcode {
|
||||
// ADD
|
||||
0x00 => {
|
||||
let (mem_index, mut raw) = parse_modrm_byte(&mut offset, text);
|
||||
let reg = parse_byte(&mut offset, text);
|
||||
instr.size = 2 + raw.len();
|
||||
instr.raw = Vec::from([opcode]);
|
||||
let (mem_index, mut raw) = disassembler.parse_modrm_byte();
|
||||
let reg = disassembler.parse_byte();
|
||||
instr.raw.append(&mut raw);
|
||||
instr.raw.push(reg);
|
||||
instr.instruction = Instruction::ADD_EbGb(mem_index, Register::by_id(reg));
|
||||
instr.opcode = Opcode::ADD_EbGb(mem_index, Register::by_id(reg));
|
||||
}
|
||||
// INT
|
||||
0xCD => {
|
||||
let byte = parse_byte(&mut offset, text);
|
||||
instr.size = 2;
|
||||
instr.raw = Vec::from([opcode, byte]);
|
||||
instr.instruction = Instruction::INT(ImmediateByte(byte));
|
||||
let byte = disassembler.parse_byte();
|
||||
instr.raw.push(byte);
|
||||
instr.opcode = Opcode::INT(ImmediateByte(byte));
|
||||
}
|
||||
// MOV
|
||||
0xBB => {
|
||||
let (word, raw) = parse_word(&mut offset, text);
|
||||
instr.size = 3;
|
||||
instr.raw = Vec::from([opcode, raw.0, raw.1]);
|
||||
instr.instruction = Instruction::MOV_BXIv(Register::BX, ImmediateWord(word));
|
||||
let (word, raw) = disassembler.parse_word();
|
||||
instr.raw.push(raw.0);
|
||||
instr.raw.push(raw.1);
|
||||
instr.opcode = Opcode::MOV_BXIv(Register::BX, ImmediateWord(word));
|
||||
}
|
||||
_ => {
|
||||
eprintln!("Encountered unknown instruction '0x{:x}'", opcode);
|
||||
@@ -113,117 +113,111 @@ fn decode_instructions(aout: &Aout) -> Result<Vec<MetaInstruction>, DisasmError>
|
||||
Ok(instructions)
|
||||
}
|
||||
|
||||
/// Parse a single byte of binary, return it and advance the offset.
|
||||
pub fn parse_byte(offset: &mut usize, text: &Vec<u8>) -> u8 {
|
||||
*offset += 1;
|
||||
let byte = text[*offset];
|
||||
*offset += 1;
|
||||
byte
|
||||
}
|
||||
/// Parse a single word of binary, return it and advance the offset.
|
||||
pub fn parse_word(offset: &mut usize, text: &Vec<u8>) -> (u16, (u8, u8)) {
|
||||
*offset += 1;
|
||||
let byte1 = text[*offset];
|
||||
let byte2 = text[*offset + 1];
|
||||
*offset += 2;
|
||||
(u16::from_le_bytes([byte1, byte2]), (byte1, byte2))
|
||||
}
|
||||
/// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset.
|
||||
pub fn parse_modrm_byte(offset: &mut usize, text: &Vec<u8>) -> (MemoryIndex, Vec<u8>) {
|
||||
// Calculate ModRM byte with bitmask
|
||||
let opcode = text[*offset];
|
||||
let modulo = opcode >> 6;
|
||||
let reg = (opcode >> 3) & 7;
|
||||
let rm = opcode & 7;
|
||||
|
||||
let mut displacement_raw = Vec::new();
|
||||
let displacement = match modulo {
|
||||
0 => {
|
||||
if rm == 6 {
|
||||
// XXX: handle special case
|
||||
panic!("Handle modulo == 0, rm == 6");
|
||||
}
|
||||
None
|
||||
}
|
||||
1 => {
|
||||
*offset += 2; // one additional byte was read
|
||||
let byte = parse_byte(offset, text);
|
||||
displacement_raw.push(byte);
|
||||
log::debug!("Additional byte during ModRM parsing was read.");
|
||||
Some(Displacement::Byte(byte))
|
||||
}
|
||||
2 => {
|
||||
*offset += 3; // two additional bytes (word) was read
|
||||
let (word, raw) = parse_word(offset, text);
|
||||
displacement_raw.push(raw.0);
|
||||
displacement_raw.push(raw.1);
|
||||
log::debug!("Additional two bytes during ModRM parsing was read.");
|
||||
Some(Displacement::Word(word))
|
||||
}
|
||||
3 => panic!("TODO: handle modulo == 3"),
|
||||
_ => panic!("Invalid ModRM byte encountered"),
|
||||
};
|
||||
|
||||
let index = match rm {
|
||||
0 => MemoryIndex {
|
||||
base: Some(Register::BX),
|
||||
index: Some(Register::SI),
|
||||
displacement,
|
||||
},
|
||||
1 => MemoryIndex {
|
||||
base: Some(Register::BX),
|
||||
index: Some(Register::DI),
|
||||
displacement,
|
||||
},
|
||||
2 => MemoryIndex {
|
||||
base: Some(Register::BP),
|
||||
index: Some(Register::SI),
|
||||
displacement,
|
||||
},
|
||||
3 => MemoryIndex {
|
||||
base: Some(Register::BP),
|
||||
index: Some(Register::DI),
|
||||
displacement,
|
||||
},
|
||||
4 => MemoryIndex {
|
||||
base: None,
|
||||
index: Some(Register::SI),
|
||||
displacement,
|
||||
},
|
||||
5 => MemoryIndex {
|
||||
base: None,
|
||||
index: Some(Register::DI),
|
||||
displacement,
|
||||
},
|
||||
6 => MemoryIndex {
|
||||
base: Some(Register::BP),
|
||||
index: None,
|
||||
displacement,
|
||||
},
|
||||
7 => MemoryIndex {
|
||||
base: Some(Register::BX),
|
||||
index: None,
|
||||
displacement,
|
||||
},
|
||||
_ => panic!("Invalid ModRM byte encountered"),
|
||||
};
|
||||
|
||||
return (index, displacement_raw);
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
/// Displacement for ModRM
|
||||
pub enum Displacement {
|
||||
Byte(u8),
|
||||
Word(u16),
|
||||
struct Disassembler {
|
||||
pub offset: usize,
|
||||
pub text: Vec<u8>,
|
||||
}
|
||||
|
||||
impl fmt::Display for Displacement {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::Byte(byte) => write!(f, "{}", byte),
|
||||
Self::Word(word) => write!(f, "{}", word),
|
||||
}
|
||||
impl Disassembler {
|
||||
/// Parse a single byte of binary, return it and advance the offset.
|
||||
/// Returns the read byte.
|
||||
pub fn parse_byte(&mut self) -> u8 {
|
||||
self.offset += 1;
|
||||
let byte = self.text[self.offset];
|
||||
self.offset += 1;
|
||||
byte
|
||||
}
|
||||
/// Parse a single word of binary, return it and advance the offset.
|
||||
/// Returns the read word and a tuple of the read raw bytes
|
||||
pub fn parse_word(&mut self) -> (u16, (u8, u8)) {
|
||||
self.offset += 1;
|
||||
let byte1 = self.text[self.offset];
|
||||
let byte2 = self.text[self.offset + 1];
|
||||
self.offset += 2;
|
||||
(u16::from_le_bytes([byte1, byte2]), (byte1, byte2))
|
||||
}
|
||||
/// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset.
|
||||
/// Returns the parsed modrm memory access, as well as all read raw bytes
|
||||
pub fn parse_modrm_byte(&mut self) -> (MemoryIndex, Vec<u8>) {
|
||||
// Calculate ModRM byte with bitmask
|
||||
let opcode = self.text[self.offset];
|
||||
let modulo = opcode >> 6;
|
||||
let reg = (opcode >> 3) & 7;
|
||||
let rm = opcode & 7;
|
||||
|
||||
let mut displacement_raw = Vec::new();
|
||||
let displacement = match modulo {
|
||||
0 => {
|
||||
if rm == 6 {
|
||||
// XXX: handle special case
|
||||
panic!("Handle modulo == 0, rm == 6");
|
||||
}
|
||||
None
|
||||
}
|
||||
1 => {
|
||||
self.offset += 2; // one additional byte was read
|
||||
let byte = self.parse_byte();
|
||||
displacement_raw.push(byte);
|
||||
log::debug!("Additional byte during ModRM parsing was read.");
|
||||
Some(Displacement::Byte(byte))
|
||||
}
|
||||
2 => {
|
||||
self.offset += 3; // two additional bytes (word) was read
|
||||
let (word, raw) = self.parse_word();
|
||||
displacement_raw.push(raw.0);
|
||||
displacement_raw.push(raw.1);
|
||||
log::debug!("Additional two bytes during ModRM parsing was read.");
|
||||
Some(Displacement::Word(word))
|
||||
}
|
||||
3 => panic!("TODO: handle modulo == 3"),
|
||||
_ => panic!("Invalid ModRM byte encountered"),
|
||||
};
|
||||
|
||||
let index = match rm {
|
||||
0 => MemoryIndex {
|
||||
base: Some(Register::BX),
|
||||
index: Some(Register::SI),
|
||||
displacement,
|
||||
},
|
||||
1 => MemoryIndex {
|
||||
base: Some(Register::BX),
|
||||
index: Some(Register::DI),
|
||||
displacement,
|
||||
},
|
||||
2 => MemoryIndex {
|
||||
base: Some(Register::BP),
|
||||
index: Some(Register::SI),
|
||||
displacement,
|
||||
},
|
||||
3 => MemoryIndex {
|
||||
base: Some(Register::BP),
|
||||
index: Some(Register::DI),
|
||||
displacement,
|
||||
},
|
||||
4 => MemoryIndex {
|
||||
base: None,
|
||||
index: Some(Register::SI),
|
||||
displacement,
|
||||
},
|
||||
5 => MemoryIndex {
|
||||
base: None,
|
||||
index: Some(Register::DI),
|
||||
displacement,
|
||||
},
|
||||
6 => MemoryIndex {
|
||||
base: Some(Register::BP),
|
||||
index: None,
|
||||
displacement,
|
||||
},
|
||||
7 => MemoryIndex {
|
||||
base: Some(Register::BX),
|
||||
index: None,
|
||||
displacement,
|
||||
},
|
||||
_ => panic!("Invalid ModRM byte encountered"),
|
||||
};
|
||||
|
||||
return (index, displacement_raw);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user