931 lines
38 KiB
Rust
931 lines
38 KiB
Rust
//! The main dissembling logic.
|
|
|
|
use crate::aout::Aout;
|
|
use crate::operands::{
|
|
Byte, DWord, Displacement, IByte, IWord, MemoryIndex, ModRmTarget, Operand, Pointer16,
|
|
Pointer32, Word,
|
|
};
|
|
use crate::register::{Register, RegisterId, SegmentRegister};
|
|
use crate::{
|
|
Args,
|
|
instructions::{Instruction, Mnemonic},
|
|
};
|
|
use crate::{modrm_8b_register, modrm_16b_register, modrm_sregister};
|
|
use core::fmt;
|
|
use std::{fs::File, io::Read};
|
|
|
|
#[derive(Debug)]
|
|
/// Generic errors, which are encountered during parsing.
|
|
pub enum DisasmError {
|
|
NoFile(Option<String>),
|
|
IoError(std::io::Error),
|
|
OpcodeUndefined(u8),
|
|
IllegalGroupMnemonic(u8, u8),
|
|
IllegalModRMByteMode(u8),
|
|
IllegalModRMByteIndex(u8),
|
|
IllegalOperand(String),
|
|
ReadBeyondTextSection,
|
|
// not an error per se, it indicates a single 0x00 byte padding
|
|
EndOfTextSection,
|
|
UnknownRegister(usize),
|
|
}
|
|
|
|
impl From<std::io::Error> for DisasmError {
|
|
fn from(error: std::io::Error) -> Self {
|
|
DisasmError::IoError(error)
|
|
}
|
|
}
|
|
|
|
impl fmt::Display for DisasmError {
|
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
|
match self {
|
|
DisasmError::NoFile(msg) => write!(f, "No file error: {:?}", msg),
|
|
DisasmError::IoError(msg) => write!(f, "{}", msg),
|
|
DisasmError::OpcodeUndefined(opcode) => write!(
|
|
f,
|
|
"Error (Undefined Opcode). '{:#04x} is considered undefined by the Spec for 8086.\nMaybe you are trying to interpret a x86 binary?\nAborting to stop misinterpretation of following instructions.",
|
|
opcode
|
|
),
|
|
DisasmError::IllegalGroupMnemonic(group, mnemonic) => write!(
|
|
f,
|
|
"Error (Illegal group mnemonic). While parsing the ModRM reg field for groups, the following bit-combination for GRP{group} is unknown: {}",
|
|
mnemonic
|
|
),
|
|
DisasmError::IllegalModRMByteMode(modrm) => write!(
|
|
f,
|
|
"Error (Illegal modrm byte). While deconstructing a ModRM byte, the following mode is unknown: {}",
|
|
modrm
|
|
),
|
|
DisasmError::IllegalModRMByteIndex(modrm) => write!(
|
|
f,
|
|
"Error (Illegal modrm byte). While deconstructing a ModRM byte, the following index is unknown: {}",
|
|
modrm
|
|
),
|
|
DisasmError::IllegalOperand(msg) => write!(f, "Error (Illegal operand). {}", msg),
|
|
DisasmError::ReadBeyondTextSection => write!(
|
|
f,
|
|
"Error (Out of bounds access). Wanted to paese an additional byte, but there is no more text section.",
|
|
),
|
|
DisasmError::UnknownRegister(id) => write!(
|
|
f,
|
|
"Error (Unknown register). The register with ID {id} is unknown",
|
|
),
|
|
DisasmError::EndOfTextSection => write!(f, "Warning. End of text section reached."),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct Disassembler {
|
|
pub offset: usize, // the current offset in the disasm process
|
|
pub text: Vec<u8>, // the aout binary
|
|
pub instruction: Instruction, // the instruction, which is currently being parsed
|
|
pub instructions: Vec<Instruction>, // all parsed instructions
|
|
}
|
|
|
|
impl Disassembler {
|
|
pub fn new(args: &Args) -> Self {
|
|
let path = args
|
|
.path
|
|
.clone()
|
|
.ok_or(DisasmError::NoFile(args.path.clone()))
|
|
.unwrap();
|
|
let mut file = File::open(path).unwrap();
|
|
let mut buf = Vec::new();
|
|
file.read_to_end(&mut buf).unwrap();
|
|
let aout = Aout::new(buf);
|
|
log::debug!("{:?}", aout);
|
|
|
|
Disassembler {
|
|
offset: 0,
|
|
text: aout.text,
|
|
instruction: Instruction::new(),
|
|
instructions: Vec::new(),
|
|
}
|
|
}
|
|
|
|
/// Start the disassmble and allow for some error handling wrapped around
|
|
/// the actual decoding function.
|
|
pub fn disassemble(&mut self, dump: bool) -> Result<Vec<Instruction>, DisasmError> {
|
|
let is_ok = self.decode_instructions();
|
|
|
|
// a.out pads the text section to byte align, so the fasely interpreted
|
|
// instructions have to be removed.
|
|
self.remove_trailing_padding();
|
|
|
|
// read instructions from disassembler object instead of decode function
|
|
// to allow some error's to act as warnings (see below)
|
|
let instructions = self.instructions.clone();
|
|
|
|
// allow for warning-type errors to pass through, as they are not fatal
|
|
match is_ok {
|
|
Ok(_) => Ok(instructions),
|
|
Err(e) => match e {
|
|
DisasmError::EndOfTextSection => {
|
|
log::debug!("Solo padded 0-byte at end of file was found. Ignoring.");
|
|
Ok(instructions)
|
|
}
|
|
_ => {
|
|
if dump {
|
|
self.instructions.iter().for_each(|i| println!("{i}"));
|
|
println!(
|
|
"Encountered error during disassembly, but this is the process so far...\n{e}\nRun with RUST_LOG=debug for furhter information."
|
|
);
|
|
} else {
|
|
println!(
|
|
"Encountered error during disassembly.\nRun with --dump to get sucessfully parsed instructions.\n{e}"
|
|
);
|
|
}
|
|
Err(e)
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Parse a single byte of the binary and advance the offset.
|
|
/// Returns the read byte (Intel b operand).
|
|
fn parse_byte(&mut self) -> Result<Byte, DisasmError> {
|
|
log::debug!("Attempting to parse byte at {:#04x} ...", self.offset);
|
|
// check if the byte would be out of bounds
|
|
if self.offset + 1 == self.text.len() {
|
|
// check if text section ends with single 0x00 padding byte
|
|
if self.text[self.offset] == 0 {
|
|
return Err(DisasmError::EndOfTextSection);
|
|
// else its just an out of bounds read
|
|
} else {
|
|
return Err(DisasmError::ReadBeyondTextSection);
|
|
}
|
|
// if not, advance offset to next byte
|
|
} else {
|
|
self.offset += 1;
|
|
}
|
|
|
|
let byte = self
|
|
.text
|
|
.get(self.offset)
|
|
.ok_or(DisasmError::ReadBeyondTextSection)?;
|
|
log::debug!("Parsed byte {byte:#04x}");
|
|
self.instruction.raw.push(*byte);
|
|
Ok(*byte)
|
|
}
|
|
|
|
/// Parse a single word of the binary and advance the offset.
|
|
/// Just a wrapper for parsing a byte twice.
|
|
/// Returns the read word (Intel w/v operand).
|
|
fn parse_word(&mut self) -> Result<Word, DisasmError> {
|
|
log::debug!("Attempting to parse word at {:#04x} ...", self.offset);
|
|
let byte1 = self.parse_byte()?;
|
|
let byte2 = self.parse_byte()?;
|
|
Ok(u16::from_le_bytes([byte1, byte2]))
|
|
}
|
|
|
|
/// Parse a single of the binary, interpret it as signed and advance the
|
|
/// offset.
|
|
/// Returns the read byte added to the address of the subsequent instruction
|
|
/// to act as a relative offset (Intel Jb operand).
|
|
fn parse_j_byte(&mut self) -> Result<isize, DisasmError> {
|
|
log::debug!("Attempting to parse Jb at {:#04x} ...", self.offset);
|
|
// first interpret as 2-complement, then cast for addition
|
|
let byte = self.parse_byte()? as IByte as isize;
|
|
let next_addr = (self.offset + 1) as isize;
|
|
log::debug!(
|
|
"Parsed Jb consists of {byte:#04x} + {next_addr:#04x} = {:#04x}",
|
|
byte + next_addr
|
|
);
|
|
Ok(byte + next_addr)
|
|
}
|
|
|
|
/// Parse a word of the binary, interpret it as signed and advance the
|
|
/// offset.
|
|
/// Returns the read word added to the address of the subsequent instruction
|
|
/// to act as a relative offset (Intel Jw/Jv operand).
|
|
pub fn parse_j_word(&mut self) -> Result<isize, DisasmError> {
|
|
log::debug!("Attempting to parse Jv at {:#04x} ...", self.offset);
|
|
// first interpret as 2-complement, then cast for addition
|
|
let word = self.parse_word()? as IWord as isize;
|
|
let next_addr = (self.offset + 1) as isize;
|
|
log::debug!(
|
|
"Parsed Jv consists of {word:#04x} + {next_addr:#04x} = {:#04x}",
|
|
word + next_addr
|
|
);
|
|
Ok(word + next_addr)
|
|
}
|
|
|
|
/// Parse a single pointer of the binary and advance the offset.
|
|
/// Just a wrapper for parsing a byte 4 types and constructing a pointer
|
|
/// type.
|
|
/// Returns the read pointer (Intel p operand).
|
|
fn parse_ptr(&mut self) -> Result<Pointer32, DisasmError> {
|
|
log::debug!("Attempting to parse pointer at {:#04x} ...", self.offset);
|
|
let byte0 = self.parse_byte()?;
|
|
let byte1 = self.parse_byte()?;
|
|
let byte2 = self.parse_byte()?;
|
|
let byte3 = self.parse_byte()?;
|
|
|
|
Ok(Pointer32 {
|
|
raw: DWord::from_le_bytes([byte0, byte1, byte2, byte3]),
|
|
segment: Word::from_le_bytes([byte2, byte3]),
|
|
offset: Word::from_le_bytes([byte0, byte1]),
|
|
})
|
|
}
|
|
|
|
/// Parse a single ModRM byte, calculate the [`ModRmTarget`] (Memory or
|
|
/// Register) from that byte and advance the offset.
|
|
/// It is always just a single byte, even for word-width instructions.
|
|
/// Returns the [`ModRmTarget`] (either memory or a register) as well as the
|
|
/// `reg` bitfield, which will later be used to determine another register
|
|
/// or even mnemonic in the group-type instructions.
|
|
fn parse_modrm_byte(
|
|
&mut self,
|
|
register_width: Operand,
|
|
) -> Result<(ModRmTarget, RegisterId), DisasmError> {
|
|
let modrm = self.parse_byte()?;
|
|
|
|
let mode = (modrm >> 6) & 0b11;
|
|
let reg = (modrm >> 3) & 0b111;
|
|
let rm = modrm & 0b111;
|
|
|
|
log::debug!(
|
|
"{:#04x} deconstructed into: {:#b}, {:#b}, {:#b}",
|
|
modrm,
|
|
mode,
|
|
reg,
|
|
rm
|
|
);
|
|
|
|
// not unused, but overwritten before first read
|
|
#[allow(unused_assignments)]
|
|
let mut displacement = None;
|
|
match mode {
|
|
0b00 => {
|
|
if rm == 0b110 {
|
|
let word = Displacement::IWord(self.parse_word()? as IWord);
|
|
log::debug!("ModRM direct memory read at {word:?}");
|
|
displacement = Some(word);
|
|
return Ok((
|
|
ModRmTarget::Memory(MemoryIndex {
|
|
base: None,
|
|
index: None,
|
|
displacement,
|
|
}),
|
|
reg,
|
|
));
|
|
} else {
|
|
log::debug!("ModRM does not have a displacement");
|
|
displacement = None;
|
|
}
|
|
}
|
|
0b01 => {
|
|
let byte = Displacement::IByte(self.parse_byte()? as IByte);
|
|
log::debug!("ModRM has a single byte of displacement: {byte}.");
|
|
displacement = Some(byte);
|
|
}
|
|
0b10 => {
|
|
let word = Displacement::IWord(self.parse_word()? as IWord);
|
|
log::debug!("ModRM has a single word of displacement: {word}");
|
|
displacement = Some(word);
|
|
}
|
|
0b11 => {
|
|
log::debug!(
|
|
"ModRM selected Register to Register: ({rm:#b}) to/from RegID ({reg:#b})"
|
|
);
|
|
let target = match register_width {
|
|
Operand::Byte(_) => ModRmTarget::Register(Register::by_id(Operand::Byte(rm))?),
|
|
Operand::Word(_) => {
|
|
ModRmTarget::Register(Register::by_id(Operand::Word(rm as Word))?)
|
|
}
|
|
};
|
|
return Ok((target, reg));
|
|
}
|
|
_ => return Err(DisasmError::IllegalModRMByteMode(mode)),
|
|
};
|
|
|
|
let index = match rm {
|
|
0b0000 => MemoryIndex {
|
|
base: Some(Register::BX),
|
|
index: Some(Register::SI),
|
|
displacement,
|
|
},
|
|
0b0001 => MemoryIndex {
|
|
base: Some(Register::BX),
|
|
index: Some(Register::DI),
|
|
displacement,
|
|
},
|
|
0b0010 => MemoryIndex {
|
|
base: Some(Register::BP),
|
|
index: Some(Register::SI),
|
|
displacement,
|
|
},
|
|
0b0011 => MemoryIndex {
|
|
base: Some(Register::BP),
|
|
index: Some(Register::DI),
|
|
displacement,
|
|
},
|
|
0b0100 => MemoryIndex {
|
|
base: None,
|
|
index: Some(Register::SI),
|
|
displacement,
|
|
},
|
|
0b0101 => MemoryIndex {
|
|
base: None,
|
|
index: Some(Register::DI),
|
|
displacement,
|
|
},
|
|
0b0110 => MemoryIndex {
|
|
base: Some(Register::BP),
|
|
index: None,
|
|
displacement,
|
|
},
|
|
0b0111 => MemoryIndex {
|
|
base: Some(Register::BX),
|
|
index: None,
|
|
displacement,
|
|
},
|
|
_ => return Err(DisasmError::IllegalModRMByteIndex(rm)),
|
|
};
|
|
|
|
Ok((ModRmTarget::Memory(index), reg))
|
|
}
|
|
|
|
/// Match the ModRM `reg` bitfield to Intel Group 1-type instructions. Group
|
|
/// 1 always has an [`ModRmTarget`] as first and a [`Register`] as second
|
|
/// operand, which is determined by the ModRM `reg` field, aswell as the
|
|
/// bit-width of the instruction currently being parsed.
|
|
fn modrm_reg_to_grp1(
|
|
reg: u8,
|
|
target: ModRmTarget,
|
|
instruction_width: Operand,
|
|
) -> Result<Mnemonic, DisasmError> {
|
|
match instruction_width {
|
|
Operand::Byte(b) => match reg {
|
|
0b000 => Ok(Mnemonic::ADD_Ib(target, b)),
|
|
0b001 => Ok(Mnemonic::OR_Ib(target, b)),
|
|
0b010 => Ok(Mnemonic::ADC_Ib(target, b)),
|
|
0b011 => Ok(Mnemonic::SBB_Ib(target, b)),
|
|
0b100 => Ok(Mnemonic::AND_Ib(target, b)),
|
|
0b101 => Ok(Mnemonic::SUB_Ib(target, b)),
|
|
0b110 => Ok(Mnemonic::XOR_Ib(target, b)),
|
|
0b111 => Ok(Mnemonic::CMP_Ib(target, b)),
|
|
_ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)),
|
|
},
|
|
Operand::Word(w) => match reg {
|
|
0b000 => Ok(Mnemonic::ADD_Iv(target, w)),
|
|
0b001 => Ok(Mnemonic::OR_Iv(target, w)),
|
|
0b010 => Ok(Mnemonic::ADC_Iv(target, w)),
|
|
0b011 => Ok(Mnemonic::SBB_Iv(target, w)),
|
|
0b100 => Ok(Mnemonic::AND_Iv(target, w)),
|
|
0b101 => Ok(Mnemonic::SUB_Iv(target, w)),
|
|
0b110 => Ok(Mnemonic::XOR_Iv(target, w)),
|
|
0b111 => Ok(Mnemonic::CMP_Iv(target, w)),
|
|
_ => return Err(DisasmError::IllegalGroupMnemonic(1, reg)),
|
|
},
|
|
}
|
|
}
|
|
|
|
/// Match the ModRM `reg` bits to Intel Group 2-type instructions. Group 2
|
|
/// always only has a single operand, the other is either `1` or the `CL`
|
|
/// register.
|
|
/// This function assumes the operand to be `1`.
|
|
/// See [`Self::modrm_reg_to_grp2_cl`] for the counter part.
|
|
fn modrm_reg_to_grp2_1(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
|
|
match reg {
|
|
0b000 => Ok(Mnemonic::ROL_b(target, 1)),
|
|
0b001 => Ok(Mnemonic::ROR_b(target, 1)),
|
|
0b010 => Ok(Mnemonic::RCL_b(target, 1)),
|
|
0b011 => Ok(Mnemonic::RCR_b(target, 1)),
|
|
0b100 => Ok(Mnemonic::SHL_b(target, 1)),
|
|
0b101 => Ok(Mnemonic::SHR_b(target, 1)),
|
|
0b110 => Ok(Mnemonic::SAR_b(target, 1)),
|
|
0b111 => Ok(Mnemonic::SAR_b(target, 1)),
|
|
_ => return Err(DisasmError::IllegalGroupMnemonic(2, reg)),
|
|
}
|
|
}
|
|
|
|
/// Match the ModRM `reg` bits to Intel Group 2-type instructions. Group 2
|
|
/// always only has a single operand, the other is either `1` or the `CL`
|
|
/// register.
|
|
/// This function assumes the operand to be [`Register::CL`].
|
|
/// See [`Self::modrm_reg_to_grp2_cl`] for the counter part.
|
|
fn modrm_reg_to_grp2_cl(reg: u8, target: ModRmTarget) -> Result<Mnemonic, DisasmError> {
|
|
match reg {
|
|
0b000 => Ok(Mnemonic::ROL_fromReg(target, Register::CL)),
|
|
0b001 => Ok(Mnemonic::ROR_fromReg(target, Register::CL)),
|
|
0b010 => Ok(Mnemonic::RCL_fromReg(target, Register::CL)),
|
|
0b011 => Ok(Mnemonic::RCR_fromReg(target, Register::CL)),
|
|
0b100 => Ok(Mnemonic::SHL_fromReg(target, Register::CL)),
|
|
0b101 => Ok(Mnemonic::SHR_fromReg(target, Register::CL)),
|
|
0b110 => Ok(Mnemonic::SAR_fromReg(target, Register::CL)),
|
|
0b111 => Ok(Mnemonic::SAR_fromReg(target, Register::CL)),
|
|
_ => return Err(DisasmError::IllegalGroupMnemonic(2, reg)),
|
|
}
|
|
}
|
|
|
|
/// Match the ModRM `reg` bits to Intel Group 3a/b-type instructions.
|
|
/// Group 3 selects an unary mnemonic with the `reg` bit fiels. The operand
|
|
/// is the [`ModRmTarget`].
|
|
fn modrm_reg_to_grp3(
|
|
&mut self,
|
|
reg: u8,
|
|
target: ModRmTarget,
|
|
width: Operand,
|
|
) -> Result<Mnemonic, DisasmError> {
|
|
match reg {
|
|
0b000 => match width {
|
|
Operand::Byte(_) => Ok(Mnemonic::TEST_Ib(target, self.parse_byte()?)),
|
|
Operand::Word(_) => Ok(Mnemonic::TEST_Iv(target, self.parse_word()?)),
|
|
},
|
|
// 0b001 => // unused
|
|
0b010 => Ok(Mnemonic::NOT(target)),
|
|
0b011 => Ok(Mnemonic::NEG(target)),
|
|
0b100 => Ok(Mnemonic::MUL(target)),
|
|
0b101 => Ok(Mnemonic::IMUL(target)),
|
|
0b110 => Ok(Mnemonic::DIV(target)),
|
|
0b111 => Ok(Mnemonic::IDIV(target)),
|
|
_ => Err(DisasmError::IllegalGroupMnemonic(3, reg)),
|
|
}
|
|
}
|
|
|
|
/// a.out pads the text section with 0x00 bytes. During parsing, these get
|
|
/// interpreted as `0x00 0x00`, which have to get removed for an authentic
|
|
/// disassembly.
|
|
/// This is done in favor of removing all 0x00 bytes in the beginning,
|
|
/// as this could remove an actual `0x00` byte as operand of the final
|
|
/// real instruction. Of course, this could remove an actual `0x00 0x00`
|
|
/// instruction from the end, but they would not have any effect on
|
|
/// execution anyway.
|
|
fn remove_trailing_padding(&mut self) {
|
|
let mut until = self.instructions.len();
|
|
for i in self.instructions.iter().rev() {
|
|
match i.opcode {
|
|
// 0x00 0x00 in binary
|
|
Mnemonic::ADD_FromReg(
|
|
ModRmTarget::Memory(MemoryIndex {
|
|
base: Some(Register::BX),
|
|
index: Some(Register::SI),
|
|
displacement: None,
|
|
}),
|
|
Register::AL,
|
|
) => until -= 1,
|
|
// stop when another instruction is hit
|
|
_ => break,
|
|
}
|
|
}
|
|
log::debug!(
|
|
"Truncated file by {} bytes by removing trailing padding bytes.",
|
|
self.text.len() - until
|
|
);
|
|
self.instructions.truncate(until);
|
|
}
|
|
|
|
/// Decode instructions by matching byte signature to their mnemonics and
|
|
/// depending on the instruction, parsing some operands afterwards.
|
|
/// All parsing is done in capsulated functions, here everything just
|
|
/// gets consolodated.
|
|
fn decode_instructions(&mut self) -> Result<(), DisasmError> {
|
|
log::debug!("Starting to decode text of length {}", self.text.len());
|
|
while self.offset < self.text.len() {
|
|
// reset mutable current instruction
|
|
self.instruction = Instruction::new();
|
|
self.instruction.start = self.offset;
|
|
|
|
// fetch next opcode
|
|
let opcode = self.text[self.offset];
|
|
|
|
// additional raw bytes will be pushed by parse functions
|
|
self.instruction.raw.push(opcode);
|
|
|
|
log::debug!("Parsing next opcode with opcode: {opcode:#04x}");
|
|
self.instruction.opcode = match opcode {
|
|
0x00 => modrm_8b_register!(self, ADD_FromReg),
|
|
0x01 => modrm_16b_register!(self, ADD_FromReg),
|
|
0x02 => modrm_8b_register!(self, ADD_ToReg),
|
|
0x03 => modrm_16b_register!(self, ADD_ToReg),
|
|
0x04 => Mnemonic::ADD_ALIb(self.parse_byte()?),
|
|
0x05 => Mnemonic::ADD_AXIv(self.parse_word()?),
|
|
|
|
0x06 => Mnemonic::PUSH_S(SegmentRegister::ES),
|
|
0x07 => Mnemonic::POP_S(SegmentRegister::ES),
|
|
|
|
0x08 => modrm_8b_register!(self, OR_FromReg),
|
|
0x09 => modrm_16b_register!(self, OR_FromReg),
|
|
0x0A => modrm_8b_register!(self, OR_ToReg),
|
|
0x0B => modrm_16b_register!(self, OR_ToReg),
|
|
0x0C => Mnemonic::OR_ALIb(self.parse_byte()?),
|
|
0x0D => Mnemonic::OR_AXIv(self.parse_word()?),
|
|
|
|
0x0E => Mnemonic::PUSH_S(SegmentRegister::CS),
|
|
|
|
0x0F => return Err(DisasmError::OpcodeUndefined(opcode)),
|
|
|
|
0x10 => modrm_8b_register!(self, ADC_FromReg),
|
|
0x11 => modrm_16b_register!(self, ADC_FromReg),
|
|
0x12 => modrm_8b_register!(self, ADC_ToReg),
|
|
0x13 => modrm_16b_register!(self, ADC_ToReg),
|
|
0x14 => Mnemonic::ADC_ALIb(self.parse_byte()?),
|
|
0x15 => Mnemonic::ADC_AXIv(self.parse_word()?),
|
|
|
|
0x16 => Mnemonic::PUSH_S(SegmentRegister::SS),
|
|
0x17 => Mnemonic::POP_S(SegmentRegister::SS),
|
|
|
|
0x18 => modrm_8b_register!(self, SBB_FromReg),
|
|
0x19 => modrm_16b_register!(self, SBB_FromReg),
|
|
0x1A => modrm_8b_register!(self, SBB_ToReg),
|
|
0x1B => modrm_16b_register!(self, SBB_ToReg),
|
|
0x1C => Mnemonic::SBB_ALIb(self.parse_byte()?),
|
|
0x1D => Mnemonic::SBB_AXIv(self.parse_word()?),
|
|
|
|
0x1E => Mnemonic::PUSH_S(SegmentRegister::DS),
|
|
0x1F => Mnemonic::POP_S(SegmentRegister::DS),
|
|
|
|
0x20 => modrm_8b_register!(self, AND_FromReg),
|
|
0x21 => modrm_16b_register!(self, AND_FromReg),
|
|
0x22 => modrm_8b_register!(self, AND_ToReg),
|
|
0x23 => modrm_16b_register!(self, AND_ToReg),
|
|
0x24 => Mnemonic::AND_ALIb(self.parse_byte()?),
|
|
0x25 => Mnemonic::AND_AXIv(self.parse_word()?),
|
|
|
|
0x26 => Mnemonic::OVERRIDE(SegmentRegister::ES),
|
|
0x27 => Mnemonic::DAA,
|
|
|
|
0x28 => modrm_8b_register!(self, SUB_FromReg),
|
|
0x29 => modrm_16b_register!(self, SUB_FromReg),
|
|
0x2A => modrm_8b_register!(self, SUB_ToReg),
|
|
0x2B => modrm_16b_register!(self, SUB_ToReg),
|
|
0x2C => Mnemonic::SUB_ALIb(self.parse_byte()?),
|
|
0x2D => Mnemonic::SUB_AXIv(self.parse_word()?),
|
|
|
|
0x2E => Mnemonic::OVERRIDE(SegmentRegister::CS),
|
|
0x2F => Mnemonic::DAS,
|
|
|
|
0x30 => modrm_8b_register!(self, XOR_FromReg),
|
|
0x31 => modrm_16b_register!(self, XOR_FromReg),
|
|
0x32 => modrm_8b_register!(self, XOR_ToReg),
|
|
0x33 => modrm_16b_register!(self, XOR_ToReg),
|
|
0x34 => Mnemonic::XOR_ALIb(self.parse_byte()?),
|
|
0x35 => Mnemonic::XOR_AXIv(self.parse_word()?),
|
|
|
|
0x36 => Mnemonic::OVERRIDE(SegmentRegister::SS),
|
|
0x37 => Mnemonic::AAA,
|
|
|
|
0x38 => modrm_8b_register!(self, CMP_FromReg),
|
|
0x39 => modrm_16b_register!(self, CMP_FromReg),
|
|
0x3A => modrm_8b_register!(self, CMP_ToReg),
|
|
0x3B => modrm_16b_register!(self, CMP_ToReg),
|
|
0x3C => Mnemonic::CMP_ALIb(self.parse_byte()?),
|
|
0x3D => Mnemonic::CMP_AXIv(self.parse_word()?),
|
|
|
|
0x3E => Mnemonic::OVERRIDE(SegmentRegister::DS),
|
|
0x3F => Mnemonic::AAS,
|
|
|
|
0x40 => Mnemonic::INC_Reg(Register::AX),
|
|
0x41 => Mnemonic::INC_Reg(Register::CX),
|
|
0x42 => Mnemonic::INC_Reg(Register::DX),
|
|
0x43 => Mnemonic::INC_Reg(Register::BX),
|
|
0x44 => Mnemonic::INC_Reg(Register::SP),
|
|
0x45 => Mnemonic::INC_Reg(Register::BP),
|
|
0x46 => Mnemonic::INC_Reg(Register::SI),
|
|
0x47 => Mnemonic::INC_Reg(Register::DI),
|
|
|
|
0x48 => Mnemonic::DEC_Reg(Register::AX),
|
|
0x49 => Mnemonic::DEC_Reg(Register::CX),
|
|
0x4A => Mnemonic::DEC_Reg(Register::DX),
|
|
0x4B => Mnemonic::DEC_Reg(Register::BX),
|
|
0x4C => Mnemonic::DEC_Reg(Register::SP),
|
|
0x4D => Mnemonic::DEC_Reg(Register::BP),
|
|
0x4E => Mnemonic::DEC_Reg(Register::SI),
|
|
0x4F => Mnemonic::DEC_Reg(Register::DI),
|
|
|
|
0x50 => Mnemonic::PUSH_R(Register::AX),
|
|
0x51 => Mnemonic::PUSH_R(Register::CX),
|
|
0x52 => Mnemonic::PUSH_R(Register::DX),
|
|
0x53 => Mnemonic::PUSH_R(Register::BX),
|
|
0x54 => Mnemonic::PUSH_R(Register::SP),
|
|
0x55 => Mnemonic::PUSH_R(Register::BP),
|
|
0x56 => Mnemonic::PUSH_R(Register::SI),
|
|
0x57 => Mnemonic::PUSH_R(Register::DI),
|
|
|
|
0x58 => Mnemonic::POP_R(Register::AX),
|
|
0x59 => Mnemonic::POP_R(Register::CX),
|
|
0x5A => Mnemonic::POP_R(Register::DX),
|
|
0x5B => Mnemonic::POP_R(Register::BX),
|
|
0x5C => Mnemonic::POP_R(Register::SP),
|
|
0x5D => Mnemonic::POP_R(Register::BP),
|
|
0x5E => Mnemonic::POP_R(Register::SI),
|
|
0x5F => Mnemonic::POP_R(Register::DI),
|
|
|
|
0x60..=0x6F => return Err(DisasmError::OpcodeUndefined(opcode)),
|
|
|
|
0x70 => Mnemonic::JO(self.parse_j_byte()?),
|
|
0x71 => Mnemonic::JNO(self.parse_j_byte()?),
|
|
0x72 => Mnemonic::JB(self.parse_j_byte()?),
|
|
0x73 => Mnemonic::JNB(self.parse_j_byte()?),
|
|
0x74 => Mnemonic::JZ(self.parse_j_byte()?),
|
|
0x75 => Mnemonic::JNZ(self.parse_j_byte()?),
|
|
0x76 => Mnemonic::JBE(self.parse_j_byte()?),
|
|
0x77 => Mnemonic::JA(self.parse_j_byte()?),
|
|
0x78 => Mnemonic::JS(self.parse_j_byte()?),
|
|
0x79 => Mnemonic::JNS(self.parse_j_byte()?),
|
|
0x7A => Mnemonic::JPE(self.parse_j_byte()?),
|
|
0x7B => Mnemonic::JPO(self.parse_j_byte()?),
|
|
0x7C => Mnemonic::JL(self.parse_j_byte()?),
|
|
0x7D => Mnemonic::JGE(self.parse_j_byte()?),
|
|
0x7E => Mnemonic::JLE(self.parse_j_byte()?),
|
|
0x7F => Mnemonic::JG(self.parse_j_byte()?),
|
|
|
|
// Group 1
|
|
0x80 => {
|
|
let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?;
|
|
let imm = self.parse_byte()?;
|
|
Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))?
|
|
}
|
|
0x81 => {
|
|
let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?;
|
|
let imm = self.parse_word()?;
|
|
Self::modrm_reg_to_grp1(reg, target, Operand::Word(imm))?
|
|
}
|
|
0x82 => {
|
|
// same as 0x80
|
|
let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?;
|
|
let imm = self.parse_byte()?;
|
|
Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))?
|
|
}
|
|
0x83 => {
|
|
// byte extended version
|
|
let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?;
|
|
let imm = self.parse_byte()?;
|
|
Self::modrm_reg_to_grp1(reg, target, Operand::Byte(imm))?
|
|
}
|
|
|
|
0x84 => modrm_8b_register!(self, TEST),
|
|
0x85 => modrm_16b_register!(self, TEST),
|
|
|
|
0x86 => modrm_8b_register!(self, XCHG),
|
|
0x87 => modrm_16b_register!(self, XCHG),
|
|
|
|
0x88 => modrm_8b_register!(self, MOV_FromReg),
|
|
0x89 => modrm_16b_register!(self, MOV_FromReg),
|
|
0x8A => modrm_8b_register!(self, MOV_ToReg),
|
|
0x8B => modrm_16b_register!(self, MOV_ToReg),
|
|
0x8C => modrm_sregister!(self, MOV_FromSReg),
|
|
0x8E => modrm_sregister!(self, MOV_ToSReg),
|
|
|
|
0x8D => modrm_16b_register!(self, LEA),
|
|
|
|
0x8F => {
|
|
let (target, _) = self.parse_modrm_byte(Operand::Word(0))?;
|
|
let mem = match target {
|
|
ModRmTarget::Memory(idx) => idx,
|
|
_ => {
|
|
return Err(DisasmError::IllegalOperand(
|
|
"POP (memory) instruction given a register to pop into".into(),
|
|
));
|
|
}
|
|
};
|
|
Mnemonic::POP_M(mem)
|
|
}
|
|
0x90 => Mnemonic::NOP(),
|
|
|
|
0x91 => Mnemonic::XCHG_AX(Register::CX),
|
|
0x92 => Mnemonic::XCHG_AX(Register::DX),
|
|
0x93 => Mnemonic::XCHG_AX(Register::BX),
|
|
0x94 => Mnemonic::XCHG_AX(Register::SP),
|
|
0x95 => Mnemonic::XCHG_AX(Register::BP),
|
|
0x96 => Mnemonic::XCHG_AX(Register::SI),
|
|
0x97 => Mnemonic::XCHG_AX(Register::DI),
|
|
|
|
0x98 => Mnemonic::CBW,
|
|
0x99 => Mnemonic::CWD,
|
|
|
|
0x9A => Mnemonic::CALL_p(self.parse_ptr()?),
|
|
|
|
0x9B => Mnemonic::WAIT,
|
|
|
|
0x9C => Mnemonic::PUSHF,
|
|
0x9D => Mnemonic::POPF,
|
|
0x9E => Mnemonic::SAHF,
|
|
0x9F => Mnemonic::LAHF,
|
|
|
|
0xA0 => Mnemonic::MOV_AL0b(self.parse_byte()?),
|
|
0xA1 => Mnemonic::MOV_AX0v(self.parse_word()?),
|
|
0xA2 => Mnemonic::MOV_0bAL(self.parse_byte()?),
|
|
0xA3 => Mnemonic::MOV_0vAX(self.parse_word()?),
|
|
0xA4 => Mnemonic::MOVSB,
|
|
0xA5 => Mnemonic::MOVSW,
|
|
|
|
0xA6 => Mnemonic::CMPSB,
|
|
0xA7 => Mnemonic::CMPSW,
|
|
|
|
0xA8 => Mnemonic::TEST_ALIb(self.parse_byte()?),
|
|
0xA9 => Mnemonic::TEST_AXIv(self.parse_word()?),
|
|
|
|
0xAA => Mnemonic::STOSB,
|
|
0xAB => Mnemonic::STOSW,
|
|
0xAC => Mnemonic::LODSB,
|
|
0xAD => Mnemonic::LODSW,
|
|
0xAE => Mnemonic::SCASB,
|
|
0xAF => Mnemonic::SCASW,
|
|
|
|
0xB0 => Mnemonic::MOV_ALIb(self.parse_byte()?),
|
|
0xB1 => Mnemonic::MOV_CLIb(self.parse_byte()?),
|
|
0xB2 => Mnemonic::MOV_DLIb(self.parse_byte()?),
|
|
0xB3 => Mnemonic::MOV_BLIb(self.parse_byte()?),
|
|
0xB4 => Mnemonic::MOV_AHIb(self.parse_byte()?),
|
|
0xB5 => Mnemonic::MOV_CHIb(self.parse_byte()?),
|
|
0xB6 => Mnemonic::MOV_DHIb(self.parse_byte()?),
|
|
0xB7 => Mnemonic::MOV_BHIb(self.parse_byte()?),
|
|
0xB8 => Mnemonic::MOV_AXIv(self.parse_word()?),
|
|
0xB9 => Mnemonic::MOV_CXIv(self.parse_word()?),
|
|
0xBA => Mnemonic::MOV_DXIv(self.parse_word()?),
|
|
0xBB => Mnemonic::MOV_BXIv(self.parse_word()?),
|
|
0xBC => Mnemonic::MOV_SPIv(self.parse_word()?),
|
|
0xBD => Mnemonic::MOV_BPIv(self.parse_word()?),
|
|
0xBE => Mnemonic::MOV_SIIv(self.parse_word()?),
|
|
0xBF => Mnemonic::MOV_DIIv(self.parse_word()?),
|
|
|
|
0xC0..=0xC1 => return Err(DisasmError::OpcodeUndefined(opcode)),
|
|
|
|
0xC2 => Mnemonic::RET_Iw(self.parse_word()?),
|
|
0xC3 => Mnemonic::RET,
|
|
|
|
0xC4 => {
|
|
let (target, reg_id) = self.parse_modrm_byte(Operand::Word(0))?;
|
|
let reg = Register::by_id(Operand::Word(reg_id as Word))?;
|
|
let ptr = Pointer16::try_from(target)?;
|
|
Mnemonic::LES(reg, ptr)
|
|
}
|
|
0xC5 => {
|
|
let (target, reg_id) = self.parse_modrm_byte(Operand::Word(0))?;
|
|
let reg = Register::by_id(Operand::Word(reg_id as Word))?;
|
|
let ptr = Pointer16::try_from(target)?;
|
|
Mnemonic::LDS(reg, ptr)
|
|
}
|
|
|
|
0xC6 => {
|
|
let (target, _) = self.parse_modrm_byte(Operand::Byte(0))?;
|
|
Mnemonic::MOV_Ib(target, self.parse_byte()?)
|
|
}
|
|
0xC7 => {
|
|
let (target, _) = self.parse_modrm_byte(Operand::Word(0))?;
|
|
Mnemonic::MOV_Iv(target, self.parse_word()?)
|
|
}
|
|
|
|
0xC8..=0xC9 => return Err(DisasmError::OpcodeUndefined(opcode)),
|
|
|
|
0xCA => Mnemonic::RETF_Iw(self.parse_word()?),
|
|
0xCB => Mnemonic::RETF,
|
|
|
|
0xCC => Mnemonic::INT(3),
|
|
0xCD => Mnemonic::INT(self.parse_byte()?),
|
|
|
|
0xCE => Mnemonic::INTO,
|
|
0xCF => Mnemonic::IRET,
|
|
|
|
// Group 2
|
|
0xD0 => {
|
|
let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?;
|
|
Self::modrm_reg_to_grp2_1(reg, target)?
|
|
}
|
|
0xD1 => {
|
|
let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?;
|
|
Self::modrm_reg_to_grp2_1(reg, target)?
|
|
}
|
|
0xD2 => {
|
|
let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?;
|
|
Self::modrm_reg_to_grp2_cl(reg, target)?
|
|
}
|
|
0xD3 => {
|
|
let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?;
|
|
Self::modrm_reg_to_grp2_cl(reg, target)?
|
|
}
|
|
|
|
0xD4 => Mnemonic::AAM(self.parse_byte()?),
|
|
0xD5 => Mnemonic::AAD(self.parse_byte()?),
|
|
|
|
0xD6 => return Err(DisasmError::OpcodeUndefined(opcode)),
|
|
|
|
0xD7 => Mnemonic::XLAT,
|
|
|
|
0xD8..=0xDF => return Err(DisasmError::OpcodeUndefined(opcode)),
|
|
|
|
0xE0 => Mnemonic::LOOPNZ(self.parse_j_byte()?),
|
|
0xE1 => Mnemonic::LOOPZ(self.parse_j_byte()?),
|
|
0xE2 => Mnemonic::LOOP(self.parse_j_byte()?),
|
|
0xE3 => Mnemonic::JCXZ(self.parse_j_byte()?),
|
|
|
|
0xE4 => Mnemonic::IN_AL(self.parse_byte()?),
|
|
0xE5 => Mnemonic::IN_AX(self.parse_byte()?),
|
|
0xE6 => Mnemonic::OUT_AL(self.parse_byte()?),
|
|
0xE7 => Mnemonic::OUT_AX(self.parse_byte()?),
|
|
|
|
0xE8 => Mnemonic::CALL_v(self.parse_j_word()?),
|
|
|
|
0xE9 => Mnemonic::JMP_v(self.parse_j_word()?),
|
|
0xEA => Mnemonic::JMP_p(self.parse_ptr()?),
|
|
0xEB => Mnemonic::JMP_b(self.parse_j_byte()?),
|
|
|
|
0xEC => Mnemonic::IN_ALDX,
|
|
0xED => Mnemonic::IN_AXDX,
|
|
|
|
0xEE => Mnemonic::OUT_ALDX,
|
|
0xEF => Mnemonic::OUT_AXDX,
|
|
|
|
0xF0 => Mnemonic::LOCK,
|
|
0xF1 => return Err(DisasmError::OpcodeUndefined(opcode)),
|
|
|
|
0xF2 => Mnemonic::REPNZ,
|
|
0xF3 => Mnemonic::REPZ,
|
|
|
|
0xF4 => Mnemonic::HLT,
|
|
|
|
0xF5 => Mnemonic::CMC,
|
|
|
|
// Group 3a
|
|
0xF6 => {
|
|
let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?;
|
|
self.modrm_reg_to_grp3(reg, target, Operand::Byte(0))?
|
|
}
|
|
// Group 3b
|
|
0xF7 => {
|
|
let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?;
|
|
self.modrm_reg_to_grp3(reg, target, Operand::Word(0))?
|
|
}
|
|
|
|
0xF8 => Mnemonic::CLC,
|
|
0xF9 => Mnemonic::STC,
|
|
0xFA => Mnemonic::CLI,
|
|
0xFB => Mnemonic::STI,
|
|
0xFC => Mnemonic::CLD,
|
|
0xFD => Mnemonic::STD,
|
|
|
|
// Group 4
|
|
0xFE => {
|
|
let (target, reg) = self.parse_modrm_byte(Operand::Byte(0))?;
|
|
match reg {
|
|
0b0 => Mnemonic::INC_Mod(target),
|
|
0b1 => Mnemonic::DEC_Mod(target),
|
|
_ => return Err(DisasmError::IllegalGroupMnemonic(4, reg)),
|
|
}
|
|
}
|
|
|
|
// Group 5
|
|
0xFF => {
|
|
let (target, reg) = self.parse_modrm_byte(Operand::Word(0))?;
|
|
match reg {
|
|
0b000 => Mnemonic::INC_Mod(target),
|
|
0b001 => Mnemonic::DEC_Mod(target),
|
|
0b010 => Mnemonic::CALL_Mod(target),
|
|
0b011 => Mnemonic::CALL_Mp(Pointer16::try_from(target)?),
|
|
0b100 => Mnemonic::JMP_Mod(target),
|
|
0b101 => Mnemonic::JMP_Mp(Pointer16::try_from(target)?),
|
|
0b110 => Mnemonic::PUSH_Mod(target),
|
|
// 0b111 => unused
|
|
_ => return Err(DisasmError::IllegalGroupMnemonic(5, reg)),
|
|
}
|
|
}
|
|
};
|
|
|
|
// Save parsed instruction
|
|
log::debug!("{}", self.instruction);
|
|
self.instructions.push(self.instruction.clone());
|
|
|
|
// Advance offset to hover the next potential opcode
|
|
self.offset += 1;
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_basic() {
|
|
let text = Vec::from([0x0, 0x0]);
|
|
let mut disassembler = Disassembler {
|
|
offset: 0,
|
|
text,
|
|
instruction: Instruction::new(),
|
|
instructions: Vec::new(),
|
|
};
|
|
disassembler.decode_instructions().unwrap();
|
|
let instructions = disassembler.instructions;
|
|
assert_eq!(
|
|
instructions[0],
|
|
Instruction {
|
|
start: 0,
|
|
raw: Vec::from([0, 0]),
|
|
opcode: Mnemonic::ADD_FromReg(
|
|
ModRmTarget::Memory(MemoryIndex {
|
|
base: Some(Register::BX),
|
|
index: Some(Register::SI),
|
|
displacement: None
|
|
}),
|
|
Register::AL
|
|
)
|
|
}
|
|
)
|
|
}
|
|
}
|