fix: dont interpret padding as instructions
a.out padds the text section with 0-bytes, which where interpreted as 0x00 0x00 instruction and occasionally as a single 0x00 byte. Add logic to ignore single 0x00 bytes and to remove dangling 0x00 0x00 instructions at the end of the instruction vec, so only the 'actual' instructions are presented in the end. Also adjust visibility of methods, so only the truncated instructions will ever be presented. Of course, this could remove an actual `0x00 0x00` instruction from the end, but they would not have any effect on execution anyway.
This commit is contained in:
@@ -33,14 +33,11 @@ impl Aout {
|
|||||||
|
|
||||||
let text_start = hdr.hdrlen as usize;
|
let text_start = hdr.hdrlen as usize;
|
||||||
let text_end = text_start + hdr.text as usize;
|
let text_end = text_start + hdr.text as usize;
|
||||||
let data_start = text_end + 1;
|
let data_start = text_end;
|
||||||
let data_end = data_start + hdr.data as usize;
|
let data_end = data_start + hdr.data as usize;
|
||||||
|
|
||||||
dbg!(&hdr);
|
|
||||||
|
|
||||||
let text_section = &buf[text_start..text_end];
|
let text_section = &buf[text_start..text_end];
|
||||||
// let data_section = &buf[data_start..data_end];
|
let data_section = &buf[data_start..data_end];
|
||||||
let data_section = [];
|
|
||||||
|
|
||||||
Aout {
|
Aout {
|
||||||
header: hdr,
|
header: hdr,
|
||||||
|
|||||||
298
src/disasm.rs
298
src/disasm.rs
@@ -1,7 +1,5 @@
|
|||||||
//! The main dissembling logic.
|
//! The main dissembling logic.
|
||||||
|
|
||||||
use env_logger::Target;
|
|
||||||
|
|
||||||
use crate::aout::Aout;
|
use crate::aout::Aout;
|
||||||
use crate::operands::{
|
use crate::operands::{
|
||||||
Byte, DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer, Word,
|
Byte, DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer, Word,
|
||||||
@@ -11,7 +9,7 @@ use crate::{
|
|||||||
Args,
|
Args,
|
||||||
instructions::{Instruction, Mnemonic},
|
instructions::{Instruction, Mnemonic},
|
||||||
};
|
};
|
||||||
use crate::{modrmb, modrms, modrmv};
|
use crate::{modrm_instruction_sregister, modrm_instruction_wordwidth, modrm_target_bytewidth};
|
||||||
use core::fmt;
|
use core::fmt;
|
||||||
use std::{fs::File, io::Read, process::exit};
|
use std::{fs::File, io::Read, process::exit};
|
||||||
|
|
||||||
@@ -25,7 +23,9 @@ pub enum DisasmError {
|
|||||||
IllegalModRMByteMode(u8),
|
IllegalModRMByteMode(u8),
|
||||||
IllegalModRMByteIndex(u8),
|
IllegalModRMByteIndex(u8),
|
||||||
IllegalOperand(String),
|
IllegalOperand(String),
|
||||||
ReadBeyondTextSection(),
|
ReadBeyondTextSection,
|
||||||
|
// not an error per se, it indicates a single 0x00 byte padding
|
||||||
|
EndOfTextSection,
|
||||||
UnknownRegister(usize),
|
UnknownRegister(usize),
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -61,7 +61,7 @@ impl fmt::Display for DisasmError {
|
|||||||
modrm
|
modrm
|
||||||
),
|
),
|
||||||
DisasmError::IllegalOperand(msg) => write!(f, "Error (Illegal operand). {}", msg),
|
DisasmError::IllegalOperand(msg) => write!(f, "Error (Illegal operand). {}", msg),
|
||||||
DisasmError::ReadBeyondTextSection() => write!(
|
DisasmError::ReadBeyondTextSection => write!(
|
||||||
f,
|
f,
|
||||||
"Error (Out of bounds access). Wanted to paese an additional byte, but there is no more text section.",
|
"Error (Out of bounds access). Wanted to paese an additional byte, but there is no more text section.",
|
||||||
),
|
),
|
||||||
@@ -69,61 +69,62 @@ impl fmt::Display for DisasmError {
|
|||||||
f,
|
f,
|
||||||
"Error (Unknown register). The register with ID {id} is unknown",
|
"Error (Unknown register). The register with ID {id} is unknown",
|
||||||
),
|
),
|
||||||
|
DisasmError::EndOfTextSection => write!(f, "Warning. End of text section reached."),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Disassemble the binary in `path` into a vector of instructions.
|
|
||||||
/// Main entry point to the disassembly.
|
|
||||||
pub fn disasm(args: &Args) -> Result<Vec<Instruction>, DisasmError> {
|
|
||||||
let contents = path_to_buf(args)?;
|
|
||||||
let aout = Aout::new(contents);
|
|
||||||
|
|
||||||
log::debug!("{:?}", aout);
|
|
||||||
|
|
||||||
let mut disasm = Disassembler::new(aout);
|
|
||||||
disasm.decode_instructions()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Read a filepath into a u8 buffer.
|
|
||||||
fn path_to_buf(args: &Args) -> Result<Vec<u8>, DisasmError> {
|
|
||||||
let path = args
|
|
||||||
.path
|
|
||||||
.clone()
|
|
||||||
.ok_or(DisasmError::NoFile(args.path.clone()))?;
|
|
||||||
let mut file = File::open(path)?;
|
|
||||||
let mut buf = Vec::new();
|
|
||||||
file.read_to_end(&mut buf)?;
|
|
||||||
|
|
||||||
Ok(buf)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Disassembler {
|
pub struct Disassembler {
|
||||||
pub offset: usize, // the current offset in the disasm process
|
pub offset: usize, // the current offset in the disasm process
|
||||||
pub text: Vec<u8>, // the aout binary
|
pub text: Vec<u8>, // the aout binary
|
||||||
pub instruction: Instruction, // the instruction, which is currently being parsed
|
pub instruction: Instruction, // the instruction, which is currently being parsed
|
||||||
|
pub instructions: Vec<Instruction>, // all parsed instructions
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Disassembler {
|
impl Disassembler {
|
||||||
pub fn new(aout: Aout) -> Self {
|
pub fn new(args: &Args) -> Self {
|
||||||
|
let path = args
|
||||||
|
.path
|
||||||
|
.clone()
|
||||||
|
.ok_or(DisasmError::NoFile(args.path.clone()))
|
||||||
|
.unwrap();
|
||||||
|
let mut file = File::open(path).unwrap();
|
||||||
|
let mut buf = Vec::new();
|
||||||
|
file.read_to_end(&mut buf).unwrap();
|
||||||
|
let aout = Aout::new(buf);
|
||||||
|
log::debug!("{:?}", aout);
|
||||||
|
|
||||||
Disassembler {
|
Disassembler {
|
||||||
offset: 0,
|
offset: 0,
|
||||||
text: aout.text,
|
text: aout.text,
|
||||||
instruction: Instruction::new(),
|
instruction: Instruction::new(),
|
||||||
|
instructions: Vec::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a single byte of binary, return it and advance the offset.
|
/// Parse a single byte of binary, return it and advance the offset.
|
||||||
/// Returns the read byte.
|
/// Returns the read byte.
|
||||||
pub fn parse_byte(&mut self) -> Result<Byte, DisasmError> {
|
fn parse_byte(&mut self) -> Result<Byte, DisasmError> {
|
||||||
log::debug!("Attempting to parse byte at {:#04x} ...", self.offset);
|
log::debug!("Attempting to parse byte at {:#04x} ...", self.offset);
|
||||||
// advance to operand
|
// check if the byte would be out of bounds
|
||||||
self.offset += 1;
|
if self.offset + 1 == self.text.len() {
|
||||||
|
// check if text section ends with single 0x00 padding byte
|
||||||
|
if self.text[self.offset] == 0 {
|
||||||
|
return Err(DisasmError::EndOfTextSection);
|
||||||
|
// else its just an out of bounds read
|
||||||
|
} else {
|
||||||
|
return Err(DisasmError::ReadBeyondTextSection);
|
||||||
|
}
|
||||||
|
// if not, advance offset to next byte
|
||||||
|
} else {
|
||||||
|
self.offset += 1;
|
||||||
|
}
|
||||||
|
|
||||||
let byte = self
|
let byte = self
|
||||||
.text
|
.text
|
||||||
.get(self.offset)
|
.get(self.offset)
|
||||||
.ok_or(DisasmError::ReadBeyondTextSection())?;
|
.ok_or(DisasmError::ReadBeyondTextSection)?;
|
||||||
log::debug!("Parsed byte {byte:#04x}");
|
log::debug!("Parsed byte {byte:#04x}");
|
||||||
self.instruction.raw.push(*byte);
|
self.instruction.raw.push(*byte);
|
||||||
Ok(*byte)
|
Ok(*byte)
|
||||||
@@ -132,7 +133,7 @@ impl Disassembler {
|
|||||||
/// Parse a single word of binary.
|
/// Parse a single word of binary.
|
||||||
/// Just a wrapper for parsing a byte twice.
|
/// Just a wrapper for parsing a byte twice.
|
||||||
/// Returns the read word.
|
/// Returns the read word.
|
||||||
pub fn parse_word(&mut self) -> Result<Word, DisasmError> {
|
fn parse_word(&mut self) -> Result<Word, DisasmError> {
|
||||||
log::debug!("Attempting to parse word at {:#04x} ...", self.offset);
|
log::debug!("Attempting to parse word at {:#04x} ...", self.offset);
|
||||||
let byte1 = self.parse_byte()?;
|
let byte1 = self.parse_byte()?;
|
||||||
let byte2 = self.parse_byte()?;
|
let byte2 = self.parse_byte()?;
|
||||||
@@ -142,7 +143,7 @@ impl Disassembler {
|
|||||||
/// Parse a single byte of binary and interpret as as signed.
|
/// Parse a single byte of binary and interpret as as signed.
|
||||||
/// The isize contains a relative offset to be added to the address
|
/// The isize contains a relative offset to be added to the address
|
||||||
/// of the subsequent instruction.
|
/// of the subsequent instruction.
|
||||||
pub fn parse_j_byte(&mut self) -> Result<isize, DisasmError> {
|
fn parse_j_byte(&mut self) -> Result<isize, DisasmError> {
|
||||||
log::debug!("Attempting to parse Jb at {:#04x} ...", self.offset);
|
log::debug!("Attempting to parse Jb at {:#04x} ...", self.offset);
|
||||||
// first interpret as 2-complement, then cast for addition
|
// first interpret as 2-complement, then cast for addition
|
||||||
let byte = self.parse_byte()? as IByte as isize;
|
let byte = self.parse_byte()? as IByte as isize;
|
||||||
@@ -170,7 +171,7 @@ impl Disassembler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Parse a pointer type.
|
/// Parse a pointer type.
|
||||||
pub fn parse_ptr(&mut self) -> Result<Pointer, DisasmError> {
|
fn parse_ptr(&mut self) -> Result<Pointer, DisasmError> {
|
||||||
log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset);
|
log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset);
|
||||||
let byte0 = self.parse_byte()?;
|
let byte0 = self.parse_byte()?;
|
||||||
let byte1 = self.parse_byte()?;
|
let byte1 = self.parse_byte()?;
|
||||||
@@ -195,7 +196,7 @@ impl Disassembler {
|
|||||||
|
|
||||||
/// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset.
|
/// Parse a single modrm byte, return the resulting MemoryIndex and advance the offset.
|
||||||
/// Returns the parsed modrm target and the source register
|
/// Returns the parsed modrm target and the source register
|
||||||
pub fn parse_modrm_byte(
|
fn parse_modrm_byte(
|
||||||
&mut self,
|
&mut self,
|
||||||
register_width: Operand,
|
register_width: Operand,
|
||||||
) -> Result<(ModRmTarget, RegisterId), DisasmError> {
|
) -> Result<(ModRmTarget, RegisterId), DisasmError> {
|
||||||
@@ -307,7 +308,7 @@ impl Disassembler {
|
|||||||
/// Group 1 always have an ModRM target (all modrm bits, without reg) as
|
/// Group 1 always have an ModRM target (all modrm bits, without reg) as
|
||||||
/// first and an imm value as second operand (which has to be parsed before
|
/// first and an imm value as second operand (which has to be parsed before
|
||||||
/// call to this function), but is available in both Byte and Word length.
|
/// call to this function), but is available in both Byte and Word length.
|
||||||
pub fn modrm_reg_to_grp1(
|
fn modrm_reg_to_grp1(
|
||||||
modrm_reg_byte: u8,
|
modrm_reg_byte: u8,
|
||||||
target: ModRmTarget,
|
target: ModRmTarget,
|
||||||
register_id: Operand,
|
register_id: Operand,
|
||||||
@@ -342,7 +343,7 @@ impl Disassembler {
|
|||||||
/// Group 2 only has a single operand, the other one is either a constant
|
/// Group 2 only has a single operand, the other one is either a constant
|
||||||
/// 1 (not present in the binary) or the CL register.
|
/// 1 (not present in the binary) or the CL register.
|
||||||
/// This function assumes the operand to be 1
|
/// This function assumes the operand to be 1
|
||||||
pub fn modrm_reg_to_grp2_1(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
|
fn modrm_reg_to_grp2_1(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
|
||||||
match reg {
|
match reg {
|
||||||
0b000 => Ok(Mnemonic::ROL_b(target, 1)),
|
0b000 => Ok(Mnemonic::ROL_b(target, 1)),
|
||||||
0b001 => Ok(Mnemonic::ROR_b(target, 1)),
|
0b001 => Ok(Mnemonic::ROR_b(target, 1)),
|
||||||
@@ -360,7 +361,7 @@ impl Disassembler {
|
|||||||
/// Group 2 only has a single operand, the other one is either a constant
|
/// Group 2 only has a single operand, the other one is either a constant
|
||||||
/// 1 (not present in the binary) or the CL register.
|
/// 1 (not present in the binary) or the CL register.
|
||||||
/// This function assumes the operand to be CL register.
|
/// This function assumes the operand to be CL register.
|
||||||
pub fn modrm_reg_to_grp2_cl(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
|
fn modrm_reg_to_grp2_cl(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
|
||||||
match reg {
|
match reg {
|
||||||
0b000 => Ok(Mnemonic::ROL_fromReg(target, Register::CL)),
|
0b000 => Ok(Mnemonic::ROL_fromReg(target, Register::CL)),
|
||||||
0b001 => Ok(Mnemonic::ROR_fromReg(target, Register::CL)),
|
0b001 => Ok(Mnemonic::ROR_fromReg(target, Register::CL)),
|
||||||
@@ -377,7 +378,7 @@ impl Disassembler {
|
|||||||
/// Match the modrm reg bits to the GPR3a/b mnemonics.
|
/// Match the modrm reg bits to the GPR3a/b mnemonics.
|
||||||
/// Group 3 only has a single operand, which is the ModRmTarget selected
|
/// Group 3 only has a single operand, which is the ModRmTarget selected
|
||||||
/// by modrm bits.
|
/// by modrm bits.
|
||||||
pub fn modrm_reg_to_grp3(
|
fn modrm_reg_to_grp3(
|
||||||
&mut self,
|
&mut self,
|
||||||
reg: u8,
|
reg: u8,
|
||||||
target: ModRmTarget,
|
target: ModRmTarget,
|
||||||
@@ -402,46 +403,99 @@ impl Disassembler {
|
|||||||
/// Parse an Mp Operand (Memory Pointer).
|
/// Parse an Mp Operand (Memory Pointer).
|
||||||
/// An Mp is a ModRM byte with the `reg` bits ignored and an additional
|
/// An Mp is a ModRM byte with the `reg` bits ignored and an additional
|
||||||
/// 2 words parsed for a `Pointer` type.
|
/// 2 words parsed for a `Pointer` type.
|
||||||
pub fn modrm_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> {
|
fn modrm_mp(&mut self) -> Result<(ModRmTarget, Pointer), DisasmError> {
|
||||||
let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?;
|
let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?;
|
||||||
let ptr = self.parse_ptr()?;
|
let ptr = self.parse_ptr()?;
|
||||||
Ok((target, ptr))
|
Ok((target, ptr))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Decode instructions from the text section of the provided binary
|
/// a.out pads the text section with 0x00 bytes. During parsing, these get
|
||||||
pub fn decode_instructions(&mut self) -> Result<Vec<Instruction>, DisasmError> {
|
/// interpreted as `0x00 0x00`, which have to get removed for an authentic
|
||||||
// naive approach:
|
/// disassembly.
|
||||||
// 1. read byte
|
/// This is done in favor of removing all 0x00 bytes in the beginning,
|
||||||
// 2. pattern match to see which instruction it is
|
/// as this could remove an actual 0x00 byte as operand of the final
|
||||||
// 3. read as many bytes as this instruction needs (registers, immidiates, ...)
|
/// instruction. Of course, this could remove an actual `0x00 0x00`
|
||||||
// repeat until no bytes left
|
/// instruction from the end, but they would not have any effect on
|
||||||
|
/// execution anyway.
|
||||||
|
fn remove_trailing_padding(&mut self) {
|
||||||
|
let mut until = self.instructions.len();
|
||||||
|
for i in self.instructions.iter().rev() {
|
||||||
|
match i.opcode {
|
||||||
|
// 0x00 0x00 in binary
|
||||||
|
Mnemonic::ADD_FromReg(
|
||||||
|
ModRmTarget::Memory(MemoryIndex {
|
||||||
|
base: Some(Register::BX),
|
||||||
|
index: Some(Register::SI),
|
||||||
|
displacement: None,
|
||||||
|
}),
|
||||||
|
Register::AL,
|
||||||
|
) => until -= 1,
|
||||||
|
// stop when another instruction is hit
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log::debug!(
|
||||||
|
"Truncated file by {} bytes by removing trailing padding bytes.",
|
||||||
|
self.text.len() - until
|
||||||
|
);
|
||||||
|
self.instructions.truncate(until);
|
||||||
|
}
|
||||||
|
|
||||||
let mut instructions = Vec::new();
|
/// Start the disassmble and allow for some error handling wrapped around
|
||||||
|
/// the actual decoding function.
|
||||||
|
pub fn disassemble(&mut self) -> Result<Vec<Instruction>, DisasmError> {
|
||||||
|
let parsing = self.decode_instructions();
|
||||||
|
|
||||||
|
// a.out pads the text section to byte align, so the fasely interpreted
|
||||||
|
// instructions have to be removed.
|
||||||
|
self.remove_trailing_padding();
|
||||||
|
let instructions = self.instructions.clone();
|
||||||
|
|
||||||
|
// allow for warning-type errors to pass through, as they are not fatal
|
||||||
|
match parsing {
|
||||||
|
Ok(_) => Ok(instructions),
|
||||||
|
Err(e) => match e {
|
||||||
|
DisasmError::EndOfTextSection => {
|
||||||
|
log::debug!("Solo padded 0-byte at end of file was found. Ignoring.");
|
||||||
|
Ok(instructions)
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
println!("Encountered error during disassembly: {e}");
|
||||||
|
Err(e)
|
||||||
|
}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decode instructions by matching their byte signature to their mnemonics.
|
||||||
|
fn decode_instructions(&mut self) -> Result<(), DisasmError> {
|
||||||
log::debug!("Starting to decode text of length {}", self.text.len());
|
log::debug!("Starting to decode text of length {}", self.text.len());
|
||||||
while self.offset < self.text.len() {
|
while self.offset < self.text.len() {
|
||||||
|
// reset mutable current instruction
|
||||||
|
self.instruction = Instruction::new();
|
||||||
self.instruction.start = self.offset;
|
self.instruction.start = self.offset;
|
||||||
|
|
||||||
|
// fetch next opcode
|
||||||
let opcode = self.text[self.offset];
|
let opcode = self.text[self.offset];
|
||||||
|
|
||||||
// additional raw bytes will be pushed by parse functions
|
// additional raw bytes will be pushed by parse functions
|
||||||
self.instruction.raw.push(opcode);
|
self.instruction.raw.push(opcode);
|
||||||
// XXX: convert this copy and paste horror into a proc macro
|
|
||||||
self.instruction.opcode = match opcode {
|
self.instruction.opcode = match opcode {
|
||||||
0x00 => modrmb!(self, ADD_FromReg),
|
0x00 => modrm_target_bytewidth!(self, ADD_FromReg),
|
||||||
0x01 => modrmv!(self, ADD_FromReg),
|
0x01 => modrm_instruction_wordwidth!(self, ADD_FromReg),
|
||||||
0x02 => modrmb!(self, ADD_ToReg),
|
0x02 => modrm_target_bytewidth!(self, ADD_ToReg),
|
||||||
0x03 => modrmv!(self, ADD_ToReg),
|
0x03 => modrm_instruction_wordwidth!(self, ADD_ToReg),
|
||||||
0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?),
|
0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?),
|
||||||
0x05 => Mnemonic::ADD_AXIv(self.parse_word()?),
|
0x05 => Mnemonic::ADD_AXIv(self.parse_word()?),
|
||||||
|
|
||||||
0x06 => Mnemonic::PUSH_S(SegmentRegister::ES),
|
0x06 => Mnemonic::PUSH_S(SegmentRegister::ES),
|
||||||
0x07 => Mnemonic::POP_S(SegmentRegister::ES),
|
0x07 => Mnemonic::POP_S(SegmentRegister::ES),
|
||||||
|
|
||||||
0x08 => modrmb!(self, OR_FromReg),
|
0x08 => modrm_target_bytewidth!(self, OR_FromReg),
|
||||||
0x09 => modrmv!(self, OR_FromReg),
|
0x09 => modrm_instruction_wordwidth!(self, OR_FromReg),
|
||||||
0x0A => modrmb!(self, OR_ToReg),
|
0x0A => modrm_target_bytewidth!(self, OR_ToReg),
|
||||||
0x0B => modrmv!(self, OR_ToReg),
|
0x0B => modrm_instruction_wordwidth!(self, OR_ToReg),
|
||||||
0x0C => Mnemonic::OR_ALIb(self.parse_byte()?),
|
0x0C => Mnemonic::OR_ALIb(self.parse_byte()?),
|
||||||
0x0D => Mnemonic::OR_AXIv(self.parse_word()?),
|
0x0D => Mnemonic::OR_AXIv(self.parse_word()?),
|
||||||
|
|
||||||
@@ -449,60 +503,60 @@ impl Disassembler {
|
|||||||
|
|
||||||
0x0F => return Err(DisasmError::OpcodeUndefined(opcode)),
|
0x0F => return Err(DisasmError::OpcodeUndefined(opcode)),
|
||||||
|
|
||||||
0x10 => modrmb!(self, ADC_FromReg),
|
0x10 => modrm_target_bytewidth!(self, ADC_FromReg),
|
||||||
0x11 => modrmv!(self, ADC_FromReg),
|
0x11 => modrm_instruction_wordwidth!(self, ADC_FromReg),
|
||||||
0x12 => modrmb!(self, ADC_ToReg),
|
0x12 => modrm_target_bytewidth!(self, ADC_ToReg),
|
||||||
0x13 => modrmv!(self, ADC_ToReg),
|
0x13 => modrm_instruction_wordwidth!(self, ADC_ToReg),
|
||||||
0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?),
|
0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?),
|
||||||
0x15 => Mnemonic::ADC_AXIv(self.parse_word()?),
|
0x15 => Mnemonic::ADC_AXIv(self.parse_word()?),
|
||||||
|
|
||||||
0x16 => Mnemonic::PUSH_S(SegmentRegister::SS),
|
0x16 => Mnemonic::PUSH_S(SegmentRegister::SS),
|
||||||
0x17 => Mnemonic::POP_S(SegmentRegister::SS),
|
0x17 => Mnemonic::POP_S(SegmentRegister::SS),
|
||||||
|
|
||||||
0x18 => modrmb!(self, SBB_FromReg),
|
0x18 => modrm_target_bytewidth!(self, SBB_FromReg),
|
||||||
0x19 => modrmv!(self, SBB_FromReg),
|
0x19 => modrm_instruction_wordwidth!(self, SBB_FromReg),
|
||||||
0x1A => modrmb!(self, SBB_ToReg),
|
0x1A => modrm_target_bytewidth!(self, SBB_ToReg),
|
||||||
0x1B => modrmv!(self, SBB_ToReg),
|
0x1B => modrm_instruction_wordwidth!(self, SBB_ToReg),
|
||||||
0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?),
|
0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?),
|
||||||
0x1D => Mnemonic::SBB_AXIv(self.parse_word()?),
|
0x1D => Mnemonic::SBB_AXIv(self.parse_word()?),
|
||||||
|
|
||||||
0x1E => Mnemonic::PUSH_S(SegmentRegister::DS),
|
0x1E => Mnemonic::PUSH_S(SegmentRegister::DS),
|
||||||
0x1F => Mnemonic::POP_S(SegmentRegister::DS),
|
0x1F => Mnemonic::POP_S(SegmentRegister::DS),
|
||||||
|
|
||||||
0x20 => modrmb!(self, AND_FromReg),
|
0x20 => modrm_target_bytewidth!(self, AND_FromReg),
|
||||||
0x21 => modrmv!(self, AND_FromReg),
|
0x21 => modrm_instruction_wordwidth!(self, AND_FromReg),
|
||||||
0x22 => modrmb!(self, AND_ToReg),
|
0x22 => modrm_target_bytewidth!(self, AND_ToReg),
|
||||||
0x23 => modrmv!(self, AND_ToReg),
|
0x23 => modrm_instruction_wordwidth!(self, AND_ToReg),
|
||||||
0x24 => Mnemonic::AND_ALIb(self.parse_byte()?),
|
0x24 => Mnemonic::AND_ALIb(self.parse_byte()?),
|
||||||
0x25 => Mnemonic::AND_AXIv(self.parse_word()?),
|
0x25 => Mnemonic::AND_AXIv(self.parse_word()?),
|
||||||
|
|
||||||
0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES),
|
0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES),
|
||||||
0x27 => Mnemonic::DAA,
|
0x27 => Mnemonic::DAA,
|
||||||
|
|
||||||
0x28 => modrmb!(self, SUB_FromReg),
|
0x28 => modrm_target_bytewidth!(self, SUB_FromReg),
|
||||||
0x29 => modrmv!(self, SUB_FromReg),
|
0x29 => modrm_instruction_wordwidth!(self, SUB_FromReg),
|
||||||
0x2A => modrmb!(self, SUB_ToReg),
|
0x2A => modrm_target_bytewidth!(self, SUB_ToReg),
|
||||||
0x2B => modrmv!(self, SUB_ToReg),
|
0x2B => modrm_instruction_wordwidth!(self, SUB_ToReg),
|
||||||
0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?),
|
0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?),
|
||||||
0x2D => Mnemonic::SUB_AXIv(self.parse_word()?),
|
0x2D => Mnemonic::SUB_AXIv(self.parse_word()?),
|
||||||
|
|
||||||
0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS),
|
0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS),
|
||||||
0x2F => Mnemonic::DAS,
|
0x2F => Mnemonic::DAS,
|
||||||
|
|
||||||
0x30 => modrmb!(self, XOR_FromReg),
|
0x30 => modrm_target_bytewidth!(self, XOR_FromReg),
|
||||||
0x31 => modrmv!(self, XOR_FromReg),
|
0x31 => modrm_instruction_wordwidth!(self, XOR_FromReg),
|
||||||
0x32 => modrmb!(self, XOR_ToReg),
|
0x32 => modrm_target_bytewidth!(self, XOR_ToReg),
|
||||||
0x33 => modrmv!(self, XOR_ToReg),
|
0x33 => modrm_instruction_wordwidth!(self, XOR_ToReg),
|
||||||
0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?),
|
0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?),
|
||||||
0x35 => Mnemonic::XOR_AXIv(self.parse_word()?),
|
0x35 => Mnemonic::XOR_AXIv(self.parse_word()?),
|
||||||
|
|
||||||
0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS),
|
0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS),
|
||||||
0x37 => Mnemonic::AAA,
|
0x37 => Mnemonic::AAA,
|
||||||
|
|
||||||
0x38 => modrmb!(self, CMP_FromReg),
|
0x38 => modrm_target_bytewidth!(self, CMP_FromReg),
|
||||||
0x39 => modrmv!(self, CMP_FromReg),
|
0x39 => modrm_instruction_wordwidth!(self, CMP_FromReg),
|
||||||
0x3A => modrmb!(self, CMP_ToReg),
|
0x3A => modrm_target_bytewidth!(self, CMP_ToReg),
|
||||||
0x3B => modrmv!(self, CMP_ToReg),
|
0x3B => modrm_instruction_wordwidth!(self, CMP_ToReg),
|
||||||
0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?),
|
0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?),
|
||||||
0x3D => Mnemonic::CMP_AXIv(self.parse_word()?),
|
0x3D => Mnemonic::CMP_AXIv(self.parse_word()?),
|
||||||
|
|
||||||
@@ -588,20 +642,20 @@ impl Disassembler {
|
|||||||
Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))?
|
Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))?
|
||||||
}
|
}
|
||||||
|
|
||||||
0x84 => modrmb!(self, TEST),
|
0x84 => modrm_target_bytewidth!(self, TEST),
|
||||||
0x85 => modrmv!(self, TEST),
|
0x85 => modrm_instruction_wordwidth!(self, TEST),
|
||||||
|
|
||||||
0x86 => modrmb!(self, XCHG),
|
0x86 => modrm_target_bytewidth!(self, XCHG),
|
||||||
0x87 => modrmv!(self, XCHG),
|
0x87 => modrm_instruction_wordwidth!(self, XCHG),
|
||||||
|
|
||||||
0x88 => modrmb!(self, MOV_FromReg),
|
0x88 => modrm_target_bytewidth!(self, MOV_FromReg),
|
||||||
0x89 => modrmv!(self, MOV_FromReg),
|
0x89 => modrm_instruction_wordwidth!(self, MOV_FromReg),
|
||||||
0x8A => modrmb!(self, MOV_ToReg),
|
0x8A => modrm_target_bytewidth!(self, MOV_ToReg),
|
||||||
0x8B => modrmv!(self, MOV_ToReg),
|
0x8B => modrm_instruction_wordwidth!(self, MOV_ToReg),
|
||||||
0x8C => modrms!(self, MOV_FromSReg),
|
0x8C => modrm_instruction_sregister!(self, MOV_FromSReg),
|
||||||
0x8E => modrms!(self, MOV_ToSReg),
|
0x8E => modrm_instruction_sregister!(self, MOV_ToSReg),
|
||||||
|
|
||||||
0x8D => modrmv!(self, LEA),
|
0x8D => modrm_instruction_wordwidth!(self, LEA),
|
||||||
|
|
||||||
0x8F => {
|
0x8F => {
|
||||||
let (target, _) = self.parse_modrm_byte(Operand::Word(0))?;
|
let (target, _) = self.parse_modrm_byte(Operand::Word(0))?;
|
||||||
@@ -818,13 +872,15 @@ impl Disassembler {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
println!("{}", self.instruction);
|
// Save parsed instruction
|
||||||
instructions.push(self.instruction.clone());
|
log::debug!("{}", self.instruction);
|
||||||
self.instruction = Instruction::new();
|
self.instructions.push(self.instruction.clone());
|
||||||
|
|
||||||
|
// Advance offset to hover the next potential opcode
|
||||||
self.offset += 1;
|
self.offset += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(instructions)
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -839,24 +895,24 @@ mod tests {
|
|||||||
offset: 0,
|
offset: 0,
|
||||||
text,
|
text,
|
||||||
instruction: Instruction::new(),
|
instruction: Instruction::new(),
|
||||||
|
instructions: Vec::new(),
|
||||||
};
|
};
|
||||||
let instructions = disassembler.decode_instructions().ok();
|
disassembler.decode_instructions().unwrap();
|
||||||
if let Some(instrs) = instructions {
|
let instructions = disassembler.instructions;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
instrs[0],
|
instructions[0],
|
||||||
Instruction {
|
Instruction {
|
||||||
start: 0,
|
start: 0,
|
||||||
raw: Vec::from([0, 0]),
|
raw: Vec::from([0, 0]),
|
||||||
opcode: Mnemonic::ADD_FromReg(
|
opcode: Mnemonic::ADD_FromReg(
|
||||||
ModRmTarget::Memory(MemoryIndex {
|
ModRmTarget::Memory(MemoryIndex {
|
||||||
base: Some(Register::BX),
|
base: Some(Register::BX),
|
||||||
index: Some(Register::SI),
|
index: Some(Register::SI),
|
||||||
displacement: None
|
displacement: None
|
||||||
}),
|
}),
|
||||||
Register::AL
|
Register::AL
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
/// Generate a Mnemonic for an 8-bit Register from a ModRM byte.
|
/// Generate a Mnemonic for an 8-bit Register from a ModRM byte.
|
||||||
macro_rules! modrmb {
|
macro_rules! modrm_target_bytewidth {
|
||||||
($self:ident, $variant:ident) => {{
|
($self:ident, $variant:ident) => {{
|
||||||
let (target, reg) = $self.parse_modrm_byte(Operand::Byte(0))?;
|
let (target, reg) = $self.parse_modrm_byte(Operand::Byte(0))?;
|
||||||
Mnemonic::$variant(target, Register::by_id(Operand::Byte(reg))?)
|
Mnemonic::$variant(target, Register::by_id(Operand::Byte(reg))?)
|
||||||
@@ -11,7 +11,7 @@ macro_rules! modrmb {
|
|||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
/// Generate a Mnemonic for a 16-bit Register from a ModRM byte.
|
/// Generate a Mnemonic for a 16-bit Register from a ModRM byte.
|
||||||
macro_rules! modrmv {
|
macro_rules! modrm_instruction_wordwidth {
|
||||||
($self:ident, $variant:ident) => {{
|
($self:ident, $variant:ident) => {{
|
||||||
let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?;
|
let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?;
|
||||||
Mnemonic::$variant(target, Register::by_id(Operand::Word(reg.into()))?)
|
Mnemonic::$variant(target, Register::by_id(Operand::Word(reg.into()))?)
|
||||||
@@ -20,7 +20,7 @@ macro_rules! modrmv {
|
|||||||
|
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
/// Generate a Mnemonic for a 16-bit Segment Register from a ModRM byte.
|
/// Generate a Mnemonic for a 16-bit Segment Register from a ModRM byte.
|
||||||
macro_rules! modrms {
|
macro_rules! modrm_instruction_sregister {
|
||||||
($self:ident, $variant:ident) => {{
|
($self:ident, $variant:ident) => {{
|
||||||
let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?;
|
let (target, reg) = $self.parse_modrm_byte(Operand::Word(0))?;
|
||||||
Mnemonic::$variant(target, SegmentRegister::by_id(reg)?)
|
Mnemonic::$variant(target, SegmentRegister::by_id(reg)?)
|
||||||
|
|||||||
@@ -296,6 +296,8 @@ pub enum Mnemonic {
|
|||||||
AAD(Byte),
|
AAD(Byte),
|
||||||
// MISC
|
// MISC
|
||||||
XLAT,
|
XLAT,
|
||||||
|
// Not part of 8086:
|
||||||
|
EOT, // End of Text Section
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for Mnemonic {
|
impl fmt::Display for Mnemonic {
|
||||||
|
|||||||
11
src/main.rs
11
src/main.rs
@@ -1,4 +1,5 @@
|
|||||||
use clap::{Parser, Subcommand};
|
use clap::{Parser, Subcommand};
|
||||||
|
use disasm::Disassembler;
|
||||||
|
|
||||||
mod aout;
|
mod aout;
|
||||||
mod disasm;
|
mod disasm;
|
||||||
@@ -37,14 +38,12 @@ fn main() {
|
|||||||
|
|
||||||
match args.command {
|
match args.command {
|
||||||
Command::Disasm => {
|
Command::Disasm => {
|
||||||
let instructions = disasm::disasm(&args);
|
let mut disasm = Disassembler::new(&args);
|
||||||
|
let instructions = disasm.disassemble();
|
||||||
match instructions {
|
match instructions {
|
||||||
Err(e) => {
|
Ok(instrs) => instrs.iter().for_each(|i| println!("{i}")),
|
||||||
println!("(undefined)");
|
|
||||||
println!("Encountered error during parsing: {e}")
|
|
||||||
}
|
|
||||||
_ => {}
|
_ => {}
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
_ => panic!("Command not yet implemented"),
|
_ => panic!("Command not yet implemented"),
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user