fix: dont interpret padding as instructions
a.out padds the text section with 0-bytes, which where interpreted as 0x00 0x00 instruction and occasionally as a single 0x00 byte. Add logic to ignore single 0x00 bytes and to remove dangling 0x00 0x00 instructions at the end of the instruction vec, so only the 'actual' instructions are presented in the end. Also adjust visibility of methods, so only the truncated instructions will ever be presented. Of course, this could remove an actual `0x00 0x00` instruction from the end, but they would not have any effect on execution anyway.
This commit is contained in:
@@ -33,14 +33,11 @@ impl Aout {
|
||||
|
||||
let text_start = hdr.hdrlen as usize;
|
||||
let text_end = text_start + hdr.text as usize;
|
||||
let data_start = text_end + 1;
|
||||
let data_start = text_end;
|
||||
let data_end = data_start + hdr.data as usize;
|
||||
|
||||
dbg!(&hdr);
|
||||
|
||||
let text_section = &buf[text_start..text_end];
|
||||
// let data_section = &buf[data_start..data_end];
|
||||
let data_section = [];
|
||||
let data_section = &buf[data_start..data_end];
|
||||
|
||||
Aout {
|
||||
header: hdr,
|
||||
|
||||
262
src/disasm.rs
262
src/disasm.rs
@@ -1,7 +1,5 @@
|
||||
//! The main dissembling logic.
|
||||
|
||||
use env_logger::Target;
|
||||
|
||||
use crate::aout::Aout;
|
||||
use crate::operands::{
|
||||
Byte, DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer, Word,
|
||||
@@ -11,7 +9,7 @@ use crate::{
|
||||
Args,
|
||||
instructions::{Instruction, Mnemonic},
|
||||
};
|
||||
use crate::{modrmb, modrms, modrmv};
|
||||
use crate::{modrm_instruction_sregister, modrm_instruction_wordwidth, modrm_target_bytewidth};
|
||||
use core::fmt;
|
||||
use std::{fs::File, io::Read, process::exit};
|
||||
|
||||
@@ -25,7 +23,9 @@ pub enum DisasmError {
|
||||
IllegalModRMByteMode(u8),
|
||||
IllegalModRMByteIndex(u8),
|
||||
IllegalOperand(String),
|
||||
ReadBeyondTextSection(),
|
||||
ReadBeyondTextSection,
|
||||
// not an error per se, it indicates a single 0x00 byte padding
|
||||
EndOfTextSection,
|
||||
UnknownRegister(usize),
|
||||
}
|
||||
|
||||
@@ -61,7 +61,7 @@ impl fmt::Display for DisasmError {
|
||||
modrm
|
||||
),
|
||||
DisasmError::IllegalOperand(msg) => write!(f, "Error (Illegal operand). {}", msg),
|
||||
DisasmError::ReadBeyondTextSection() => write!(
|
||||
DisasmError::ReadBeyondTextSection => write!(
|
||||
f,
|
||||
"Error (Out of bounds access). Wanted to paese an additional byte, but there is no more text section.",
|
||||
),
|
||||
@@ -69,61 +69,62 @@ impl fmt::Display for DisasmError {
|
||||
f,
|
||||
"Error (Unknown register). The register with ID {id} is unknown",
|
||||
),
|
||||
DisasmError::EndOfTextSection => write!(f, "Warning. End of text section reached."),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Disassemble the binary in `path` into a vector of instructions.
|
||||
/// Main entry point to the disassembly.
|
||||
pub fn disasm(args: &Args) -> Result<Vec<Instruction>, DisasmError> {
|
||||
let contents = path_to_buf(args)?;
|
||||
let aout = Aout::new(contents);
|
||||
|
||||
log::debug!("{:?}", aout);
|
||||
|
||||
let mut disasm = Disassembler::new(aout);
|
||||
disasm.decode_instructions()
|
||||
}
|
||||
|
||||
/// Read a filepath into a u8 buffer.
|
||||
fn path_to_buf(args: &Args) -> Result<Vec<u8>, DisasmError> {
|
||||
let path = args
|
||||
.path
|
||||
.clone()
|
||||
.ok_or(DisasmError::NoFile(args.path.clone()))?;
|
||||
let mut file = File::open(path)?;
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf)?;
|
||||
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Disassembler {
|
||||
pub offset: usize, // the current offset in the disasm process
|
||||
pub text: Vec<u8>, // the aout binary
|
||||
pub instruction: Instruction, // the instruction, which is currently being parsed
|
||||
pub instructions: Vec<Instruction>, // all parsed instructions
|
||||
}
|
||||
|
||||
impl Disassembler {
|
||||
pub fn new(aout: Aout) -> Self {
|
||||
pub fn new(args: &Args) -> Self {
|
||||
let path = args
|
||||
.path
|
||||
.clone()
|
||||
.ok_or(DisasmError::NoFile(args.path.clone()))
|
||||
.unwrap();
|
||||
let mut file = File::open(path).unwrap();
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf).unwrap();
|
||||
let aout = Aout::new(buf);
|
||||
log::debug!("{:?}", aout);
|
||||
|
||||
Disassembler {
|
||||
offset: 0,
|
||||
text: aout.text,
|
||||
instruction: Instruction::new(),
|
||||
instructions: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a single byte of binary, return it and advance the offset.
|
||||
/// Returns the read byte.
|
||||
pub fn parse_byte(&mut self) -> Result<Byte, DisasmError> {
|
||||
fn parse_byte(&mut self) -> Result<Byte, DisasmError> {
|
||||
log::debug!("Attempting to parse byte at {:#04x} ...", self.offset);
|
||||
// advance to operand
|
||||
// check if the byte would be out of bounds
|
||||
if self.offset + 1 == self.text.len() {
|
||||
// check if text section ends with single 0x00 padding byte
|
||||
if self.text[self.offset] == 0 {
|
||||
return Err(DisasmError::EndOfTextSection);
|
||||
// else its just an out of bounds read
|
||||
} else {
|
||||
return Err(DisasmError::ReadBeyondTextSection);
|
||||
}
|
||||
// if not, advance offset to next byte
|
||||
} else {
|
||||
self.offset += 1;
|
||||
}
|
||||
|
||||
let byte = self
|
||||
.text
|
||||
.get(self.offset)
|
||||
.ok_or(DisasmError::ReadBeyondTextSection())?;
|
||||
.ok_or(DisasmError::ReadBeyondTextSection)?;
|
||||
log::debug!("Parsed byte {byte:#04x}");
|
||||
self.instruction.raw.push(*byte);
|
||||
Ok(*byte)
|
||||
@@ -132,7 +133,7 @@ impl Disassembler {
|
||||
/// Parse a single word of binary.
|
||||
/// Just a wrapper for parsing a byte twice.
|
||||
/// Returns the read word.
|
||||
pub fn parse_word(&mut self) -> Result<Word, DisasmError> {
|
||||
fn parse_word(&mut self) -> Result<Word, DisasmError> {
|
||||
log::debug!("Attempting to parse word at {:#04x} ...", self.offset);
|
||||
let byte1 = self.parse_byte()?;
|
||||
let byte2 = self.parse_byte()?;
|
||||
@@ -142,7 +143,7 @@ impl Disassembler {
|
||||
/// Parse a single byte of binary and interpret as as signed.
|
||||
/// The isize contains a relative offset to be added to the address
|
||||
/// of the subsequent instruction.
|
||||
pub fn parse_j_byte(&mut self) -> Result<isize, DisasmError> {
|
||||
fn parse_j_byte(&mut self) -> Result<isize, DisasmError> {
|
||||
log::debug!("Attempting to parse Jb at {:#04x} ...", self.offset);
|
||||
// first interpret as 2-complement, then cast for addition
|
||||
let byte = self.parse_byte()? as IByte as isize;
|
||||
@@ -170,7 +171,7 @@ impl Disassembler {
|
||||
}
|
||||
|
||||
/// Parse a pointer type.
|
||||
pub fn parse_ptr(&mut self) -> Result<Pointer, DisasmError> {
|
||||
fn parse_ptr(&mut self) -> Result<Pointer, DisasmError> {
|
||||
log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset);
|
||||
let byte0 = self.parse_byte()?;
|
||||
let byte1 = self.parse_byte()?;
|
||||
@@ -195,7 +196,7 @@ impl Disassembler {
|
||||
|
||||
/// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset.
|
||||
/// Returns the parsed modrm target and the source register
|
||||
pub fn parse_modrm_byte(
|
||||
fn parse_modrm_byte(
|
||||
&mut self,
|
||||
register_width: Operand,
|
||||
) -> Result<(ModRmTarget, RegisterId), DisasmError> {
|
||||
@@ -307,7 +308,7 @@ impl Disassembler {
|
||||
/// Group 1 always have an ModRM target (all modrm bits, without reg) as
|
||||
/// first and an imm value as second operand (which has to be parsed before
|
||||
/// call to this function), but is available in both Byte and Word length.
|
||||
pub fn modrm_reg_to_grp1(
|
||||
fn modrm_reg_to_grp1(
|
||||
modrm_reg_byte: u8,
|
||||
target: ModRmTarget,
|
||||
register_id: Operand,
|
||||
@@ -342,7 +343,7 @@ impl Disassembler {
|
||||
/// Group 2 only has a single operand, the other one is either a constant
|
||||
/// 1 (not present in the binary) or the CL register.
|
||||
/// This function assumes the operand to be 1
|
||||
pub fn modrm_reg_to_grp2_1(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
|
||||
fn modrm_reg_to_grp2_1(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
|
||||
match reg {
|
||||
0b000 => Ok(Mnemonic::ROL_b(target, 1)),
|
||||
0b001 => Ok(Mnemonic::ROR_b(target, 1)),
|
||||
@@ -360,7 +361,7 @@ impl Disassembler {
|
||||
/// Group 2 only has a single operand, the other one is either a constant
|
||||
/// 1 (not present in the binary) or the CL register.
|
||||
/// This function assumes the operand to be CL register.
|
||||
pub fn modrm_reg_to_grp2_cl(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
|
||||
fn modrm_reg_to_grp2_cl(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
|
||||
match reg {
|
||||
0b000 => Ok(Mnemonic::ROL_fromReg(target, Register::CL)),
|
||||
0b001 => Ok(Mnemonic::ROR_fromReg(target, Register::CL)),
|
||||
@@ -377,7 +378,7 @@ impl Disassembler {
|
||||
/// Match the modrm reg bits to the GPR3a/b mnemonics.
|
||||
/// Group 3 only has a single operand, which is the ModRmTarget selected
|
||||
/// by modrm bits.
|
||||
pub fn modrm_reg_to_grp3(
|
||||
fn modrm_reg_to_grp3(
|
||||
&mut self,
|
||||
reg: u8,
|
||||
target: ModRmTarget,
|
||||
@@ -402,46 +403,99 @@ impl Disassembler {
|
||||
/// Parse an Mp Operand (Memory Pointer).
|
||||
/// An Mp is a ModRM byte with the `reg` bits ignored and an additional
|
||||
/// 2 words parsed for a `Pointer` type.
|
||||
pub fn modrm_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> {
|
||||
fn modrm_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> {
|
||||
let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?;
|
||||
let ptr = self.parse_ptr()?;
|
||||
Ok((target, ptr))
|
||||
}
|
||||
|
||||
/// Decode instructions from the text section of the provided binary
|
||||
pub fn decode_instructions(&mut self) -> Result<Vec<Instruction>, DisasmError> {
|
||||
// naive approach:
|
||||
// 1. read byte
|
||||
// 2. pattern match to see which instruction it is
|
||||
// 3. read as many bytes as this instruction needs (registers, immidiates, ...)
|
||||
// repeat until no bytes left
|
||||
/// a.out pads the text section with 0x00 bytes. During parsing, these get
|
||||
/// interpreted as `0x00 0x00`, which have to get removed for an authentic
|
||||
/// disassembly.
|
||||
/// This is done in favor of removing all 0x00 bytes in the beginning,
|
||||
/// as this could remove an actual 0x00 byte as operand of the final
|
||||
/// instruction. Of course, this could remove an actual `0x00 0x00`
|
||||
/// instruction from the end, but they would not have any effect on
|
||||
/// execution anyway.
|
||||
fn remove_trailing_padding(&mut self) {
|
||||
let mut until = self.instructions.len();
|
||||
for i in self.instructions.iter().rev() {
|
||||
match i.opcode {
|
||||
// 0x00 0x00 in binary
|
||||
Mnemonic::ADD_FromReg(
|
||||
ModRmTarget::Memory(MemoryIndex {
|
||||
base: Some(Register::BX),
|
||||
index: Some(Register::SI),
|
||||
displacement: None,
|
||||
}),
|
||||
Register::AL,
|
||||
) => until -= 1,
|
||||
// stop when another instruction is hit
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
log::debug!(
|
||||
"Truncated file by {} bytes by removing trailing padding bytes.",
|
||||
self.text.len() - until
|
||||
);
|
||||
self.instructions.truncate(until);
|
||||
}
|
||||
|
||||
let mut instructions = Vec::new();
|
||||
/// Start the disassmble and allow for some error handling wrapped around
|
||||
/// the actual decoding function.
|
||||
pub fn disassemble(&mut self) -> Result<Vec<Instruction>, DisasmError> {
|
||||
let parsing = self.decode_instructions();
|
||||
|
||||
// a.out pads the text section to byte align, so the fasely interpreted
|
||||
// instructions have to be removed.
|
||||
self.remove_trailing_padding();
|
||||
let instructions = self.instructions.clone();
|
||||
|
||||
// allow for warning-type errors to pass through, as they are not fatal
|
||||
match parsing {
|
||||
Ok(_) => Ok(instructions),
|
||||
Err(e) => match e {
|
||||
DisasmError::EndOfTextSection => {
|
||||
log::debug!("Solo padded 0-byte at end of file was found. Ignoring.");
|
||||
Ok(instructions)
|
||||
}
|
||||
_ => {
|
||||
println!("Encountered error during disassembly: {e}");
|
||||
Err(e)
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Decode instructions by matching their byte signature to their mnemonics.
|
||||
fn decode_instructions(&mut self) -> Result<(), DisasmError> {
|
||||
log::debug!("Starting to decode text of length {}", self.text.len());
|
||||
while self.offset < self.text.len() {
|
||||
// reset mutable current instruction
|
||||
self.instruction = Instruction::new();
|
||||
self.instruction.start = self.offset;
|
||||
|
||||
// fetch next opcode
|
||||
let opcode = self.text[self.offset];
|
||||
|
||||
// additional raw bytes will be pushed by parse functions
|
||||
self.instruction.raw.push(opcode);
|
||||
// XXX: convert this copy and paste horror into a proc macro
|
||||
|
||||
self.instruction.opcode = match opcode {
|
||||
0x00 => modrmb!(self, ADD_FromReg),
|
||||
0x01 => modrmv!(self, ADD_FromReg),
|
||||
0x02 => modrmb!(self, ADD_ToReg),
|
||||
0x03 => modrmv!(self, ADD_ToReg),
|
||||
0x00 => modrm_target_bytewidth!(self, ADD_FromReg),
|
||||
0x01 => modrm_instruction_wordwidth!(self, ADD_FromReg),
|
||||
0x02 => modrm_target_bytewidth!(self, ADD_ToReg),
|
||||
0x03 => modrm_instruction_wordwidth!(self, ADD_ToReg),
|
||||
0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?),
|
||||
0x05 => Mnemonic::ADD_AXIv(self.parse_word()?),
|
||||
|
||||
0x06 => Mnemonic::PUSH_S(SegmentRegister::ES),
|
||||
0x07 => Mnemonic::POP_S(SegmentRegister::ES),
|
||||
|
||||
0x08 => modrmb!(self, OR_FromReg),
|
||||
0x09 => modrmv!(self, OR_FromReg),
|
||||
0x0A => modrmb!(self, OR_ToReg),
|
||||
0x0B => modrmv!(self, OR_ToReg),
|
||||
0x08 => modrm_target_bytewidth!(self, OR_FromReg),
|
||||
0x09 => modrm_instruction_wordwidth!(self, OR_FromReg),
|
||||
0x0A => modrm_target_bytewidth!(self, OR_ToReg),
|
||||
0x0B => modrm_instruction_wordwidth!(self, OR_ToReg),
|
||||
0x0C => Mnemonic::OR_ALIb(self.parse_byte()?),
|
||||
0x0D => Mnemonic::OR_AXIv(self.parse_word()?),
|
||||
|
||||
@@ -449,60 +503,60 @@ impl Disassembler {
|
||||
|
||||
0x0F => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||
|
||||
0x10 => modrmb!(self, ADC_FromReg),
|
||||
0x11 => modrmv!(self, ADC_FromReg),
|
||||
0x12 => modrmb!(self, ADC_ToReg),
|
||||
0x13 => modrmv!(self, ADC_ToReg),
|
||||
0x10 => modrm_target_bytewidth!(self, ADC_FromReg),
|
||||
0x11 => modrm_instruction_wordwidth!(self, ADC_FromReg),
|
||||
0x12 => modrm_target_bytewidth!(self, ADC_ToReg),
|
||||
0x13 => modrm_instruction_wordwidth!(self, ADC_ToReg),
|
||||
0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?),
|
||||
0x15 => Mnemonic::ADC_AXIv(self.parse_word()?),
|
||||
|
||||
0x16 => Mnemonic::PUSH_S(SegmentRegister::SS),
|
||||
0x17 => Mnemonic::POP_S(SegmentRegister::SS),
|
||||
|
||||
0x18 => modrmb!(self, SBB_FromReg),
|
||||
0x19 => modrmv!(self, SBB_FromReg),
|
||||
0x1A => modrmb!(self, SBB_ToReg),
|
||||
0x1B => modrmv!(self, SBB_ToReg),
|
||||
0x18 => modrm_target_bytewidth!(self, SBB_FromReg),
|
||||
0x19 => modrm_instruction_wordwidth!(self, SBB_FromReg),
|
||||
0x1A => modrm_target_bytewidth!(self, SBB_ToReg),
|
||||
0x1B => modrm_instruction_wordwidth!(self, SBB_ToReg),
|
||||
0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?),
|
||||
0x1D => Mnemonic::SBB_AXIv(self.parse_word()?),
|
||||
|
||||
0x1E => Mnemonic::PUSH_S(SegmentRegister::DS),
|
||||
0x1F => Mnemonic::POP_S(SegmentRegister::DS),
|
||||
|
||||
0x20 => modrmb!(self, AND_FromReg),
|
||||
0x21 => modrmv!(self, AND_FromReg),
|
||||
0x22 => modrmb!(self, AND_ToReg),
|
||||
0x23 => modrmv!(self, AND_ToReg),
|
||||
0x20 => modrm_target_bytewidth!(self, AND_FromReg),
|
||||
0x21 => modrm_instruction_wordwidth!(self, AND_FromReg),
|
||||
0x22 => modrm_target_bytewidth!(self, AND_ToReg),
|
||||
0x23 => modrm_instruction_wordwidth!(self, AND_ToReg),
|
||||
0x24 => Mnemonic::AND_ALIb(self.parse_byte()?),
|
||||
0x25 => Mnemonic::AND_AXIv(self.parse_word()?),
|
||||
|
||||
0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES),
|
||||
0x27 => Mnemonic::DAA,
|
||||
|
||||
0x28 => modrmb!(self, SUB_FromReg),
|
||||
0x29 => modrmv!(self, SUB_FromReg),
|
||||
0x2A => modrmb!(self, SUB_ToReg),
|
||||
0x2B => modrmv!(self, SUB_ToReg),
|
||||
0x28 => modrm_target_bytewidth!(self, SUB_FromReg),
|
||||
0x29 => modrm_instruction_wordwidth!(self, SUB_FromReg),
|
||||
0x2A => modrm_target_bytewidth!(self, SUB_ToReg),
|
||||
0x2B => modrm_instruction_wordwidth!(self, SUB_ToReg),
|
||||
0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?),
|
||||
0x2D => Mnemonic::SUB_AXIv(self.parse_word()?),
|
||||
|
||||
0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS),
|
||||
0x2F => Mnemonic::DAS,
|
||||
|
||||
0x30 => modrmb!(self, XOR_FromReg),
|
||||
0x31 => modrmv!(self, XOR_FromReg),
|
||||
0x32 => modrmb!(self, XOR_ToReg),
|
||||
0x33 => modrmv!(self, XOR_ToReg),
|
||||
0x30 => modrm_target_bytewidth!(self, XOR_FromReg),
|
||||
0x31 => modrm_instruction_wordwidth!(self, XOR_FromReg),
|
||||
0x32 => modrm_target_bytewidth!(self, XOR_ToReg),
|
||||
0x33 => modrm_instruction_wordwidth!(self, XOR_ToReg),
|
||||
0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?),
|
||||
0x35 => Mnemonic::XOR_AXIv(self.parse_word()?),
|
||||
|
||||
0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS),
|
||||
0x37 => Mnemonic::AAA,
|
||||
|
||||
0x38 => modrmb!(self, CMP_FromReg),
|
||||
0x39 => modrmv!(self, CMP_FromReg),
|
||||
0x3A => modrmb!(self, CMP_ToReg),
|
||||
0x3B => modrmv!(self, CMP_ToReg),
|
||||
0x38 => modrm_target_bytewidth!(self, CMP_FromReg),
|
||||
0x39 => modrm_instruction_wordwidth!(self, CMP_FromReg),
|
||||
0x3A => modrm_target_bytewidth!(self, CMP_ToReg),
|
||||
0x3B => modrm_instruction_wordwidth!(self, CMP_ToReg),
|
||||
0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?),
|
||||
0x3D => Mnemonic::CMP_AXIv(self.parse_word()?),
|
||||
|
||||
@@ -588,20 +642,20 @@ impl Disassembler {
|
||||
Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))?
|
||||
}
|
||||
|
||||
0x84 => modrmb!(self, TEST),
|
||||
0x85 => modrmv!(self, TEST),
|
||||
0x84 => modrm_target_bytewidth!(self, TEST),
|
||||
0x85 => modrm_instruction_wordwidth!(self, TEST),
|
||||
|
||||
0x86 => modrmb!(self, XCHG),
|
||||
0x87 => modrmv!(self, XCHG),
|
||||
0x86 => modrm_target_bytewidth!(self, XCHG),
|
||||
0x87 => modrm_instruction_wordwidth!(self, XCHG),
|
||||
|
||||
0x88 => modrmb!(self, MOV_FromReg),
|
||||
0x89 => modrmv!(self, MOV_FromReg),
|
||||
0x8A => modrmb!(self, MOV_ToReg),
|
||||
0x8B => modrmv!(self, MOV_ToReg),
|
||||
0x8C => modrms!(self, MOV_FromSReg),
|
||||
0x8E => modrms!(self, MOV_ToSReg),
|
||||
0x88 => modrm_target_bytewidth!(self, MOV_FromReg),
|
||||
0x89 => modrm_instruction_wordwidth!(self, MOV_FromReg),
|
||||
0x8A => modrm_target_bytewidth!(self, MOV_ToReg),
|
||||
0x8B => modrm_instruction_wordwidth!(self, MOV_ToReg),
|
||||
0x8C => modrm_instruction_sregister!(self, MOV_FromSReg),
|
||||
0x8E => modrm_instruction_sregister!(self, MOV_ToSReg),
|
||||
|
||||
0x8D => modrmv!(self, LEA),
|
||||
0x8D => modrm_instruction_wordwidth!(self, LEA),
|
||||
|
||||
0x8F => {
|
||||
let (target, _) = self.parse_modrm_byte(Operand::Word(0))?;
|
||||
@@ -818,13 +872,15 @@ impl Disassembler {
|
||||
}
|
||||
};
|
||||
|
||||
println!("{}", self.instruction);
|
||||
instructions.push(self.instruction.clone());
|
||||
self.instruction = Instruction::new();
|
||||
// Save parsed instruction
|
||||
log::debug!("{}", self.instruction);
|
||||
self.instructions.push(self.instruction.clone());
|
||||
|
||||
// Advance offset to hover the next potential opcode
|
||||
self.offset += 1;
|
||||
}
|
||||
|
||||
Ok(instructions)
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -839,11 +895,12 @@ mod tests {
|
||||
offset: 0,
|
||||
text,
|
||||
instruction: Instruction::new(),
|
||||
instructions: Vec::new(),
|
||||
};
|
||||
let instructions = disassembler.decode_instructions().ok();
|
||||
if let Some(instrs) = instructions {
|
||||
disassembler.decode_instructions().unwrap();
|
||||
let instructions = disassembler.instructions;
|
||||
assert_eq!(
|
||||
instrs[0],
|
||||
instructions[0],
|
||||
Instruction {
|
||||
start: 0,
|
||||
raw: Vec::from([0, 0]),
|
||||
@@ -859,4 +916,3 @@ mod tests {
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
#[macro_export]
|
||||
/// Generate a Mnemonic for an 8-bit Register from a ModRM byte.
|
||||
macro_rules! modrmb {
|
||||
macro_rules! modrm_target_bytewidth {
|
||||
($self:ident, $variant:ident) => {{
|
||||
let (target, reg) = $self.parse_modrm_byte(Operand::Byte(0))?;
|
||||
Mnemonic::$variant(target, Register::by_id(Operand::Byte(reg))?)
|
||||
@@ -11,7 +11,7 @@ macro_rules! modrmb {
|
||||
|
||||
#[macro_export]
|
||||
/// Generate a Mnemonic for a 16-bit Register from a ModRM byte.
|
||||
macro_rules! modrmv {
|
||||
macro_rules! modrm_instruction_wordwidth {
|
||||
($self:ident, $variant:ident) => {{
|
||||
let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?;
|
||||
Mnemonic::$variant(target, Register::by_id(Operand::Word(reg.into()))?)
|
||||
@@ -20,7 +20,7 @@ macro_rules! modrmv {
|
||||
|
||||
#[macro_export]
|
||||
/// Generate a Mnemonic for a 16-bit Segment Register from a ModRM byte.
|
||||
macro_rules! modrms {
|
||||
macro_rules! modrm_instruction_sregister {
|
||||
($self:ident, $variant:ident) => {{
|
||||
let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?;
|
||||
Mnemonic::$variant(target, SegmentRegister::by_id(reg)?)
|
||||
|
||||
@@ -296,6 +296,8 @@ pub enum Mnemonic {
|
||||
AAD(Byte),
|
||||
// MISC
|
||||
XLAT,
|
||||
// Not part of 8086:
|
||||
EOT, // End of Text Section
|
||||
}
|
||||
|
||||
impl fmt::Display for Mnemonic {
|
||||
|
||||
11
src/main.rs
11
src/main.rs
@@ -1,4 +1,5 @@
|
||||
use clap::{Parser, Subcommand};
|
||||
use disasm::Disassembler;
|
||||
|
||||
mod aout;
|
||||
mod disasm;
|
||||
@@ -37,14 +38,12 @@ fn main() {
|
||||
|
||||
match args.command {
|
||||
Command::Disasm => {
|
||||
let instructions = disasm::disasm(&args);
|
||||
let mut disasm = Disassembler::new(&args);
|
||||
let instructions = disasm.disassemble();
|
||||
match instructions {
|
||||
Err(e) => {
|
||||
println!("(undefined)");
|
||||
println!("Encountered error during parsing: {e}")
|
||||
}
|
||||
Ok(instrs) => instrs.iter().for_each(|i| println!("{i}")),
|
||||
_ => {}
|
||||
};
|
||||
}
|
||||
}
|
||||
_ => panic!("Command not yet implemented"),
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user