ft: initial disasm of example data
This only contains two instructions of which I know the correct output.
This commit is contained in:
62
src/aout.rs
Normal file
62
src/aout.rs
Normal file
@@ -0,0 +1,62 @@
|
||||
use std::ffi::{c_uchar, c_ushort};
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type c_long = i32; // we use a a.out with 32 byte
|
||||
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub struct Aout {
|
||||
pub header: Header,
|
||||
pub text: Vec<u8>,
|
||||
pub data: Vec<u8>,
|
||||
}
|
||||
|
||||
impl Aout {
|
||||
pub fn new(buf: Vec<u8>) -> Self {
|
||||
let hdr = Header {
|
||||
magic: [buf[0], buf[1]],
|
||||
flags: buf[2],
|
||||
cpu: buf[3],
|
||||
hdrlen: buf[4],
|
||||
unused: buf[5],
|
||||
version: c_ushort::from_be_bytes([buf[6], buf[7]]),
|
||||
text: c_long::from_le_bytes([buf[8], buf[9], buf[10], buf[11]]),
|
||||
data: c_long::from_le_bytes([buf[12], buf[13], buf[14], buf[15]]),
|
||||
bss: c_long::from_le_bytes([buf[16], buf[17], buf[18], buf[19]]),
|
||||
entry: c_long::from_le_bytes([buf[20], buf[21], buf[22], buf[23]]),
|
||||
total: c_long::from_le_bytes([buf[24], buf[25], buf[26], buf[27]]),
|
||||
syms: c_long::from_le_bytes([buf[28], buf[29], buf[30], buf[31]]),
|
||||
};
|
||||
|
||||
let text_start = hdr.hdrlen as usize;
|
||||
let text_end = text_start + hdr.text as usize;
|
||||
let data_start = text_end + 1;
|
||||
let data_end = data_start + hdr.data as usize;
|
||||
|
||||
let text_section = &buf[text_start..text_end];
|
||||
let data_section = &buf[data_start..data_end];
|
||||
|
||||
Aout {
|
||||
header: hdr,
|
||||
text: Vec::from(text_section),
|
||||
data: Vec::from(data_section),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub struct Header {
|
||||
pub magic: [c_uchar; 2], // magic number
|
||||
pub flags: c_uchar, // flags, see below
|
||||
pub cpu: c_uchar, // cpu id
|
||||
pub hdrlen: c_uchar, // length of header
|
||||
pub unused: c_uchar, // reserved for future use
|
||||
pub version: c_ushort, // version stamp
|
||||
pub text: c_long, // size of text segment in bytes
|
||||
pub data: c_long, // size of data segment in bytes
|
||||
pub bss: c_long, // size of bss segment in bytes
|
||||
pub entry: c_long, // entry point
|
||||
pub total: c_long, // total memory allocated
|
||||
pub syms: c_long, // size of symbol table
|
||||
}
|
||||
1
src/decode.rs
Normal file
1
src/decode.rs
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
103
src/disasm.rs
Normal file
103
src/disasm.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
use core::fmt;
|
||||
use std::{fs::File, io::Read, process::exit};
|
||||
|
||||
use crate::aout::Aout;
|
||||
use crate::{
|
||||
Args,
|
||||
instructions::{ImmediateByte, ImmediateWord, Instruction, MetaInstruction, Register},
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum DisasmError {
|
||||
NoFile(Option<String>),
|
||||
IoError(std::io::Error),
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for DisasmError {
|
||||
fn from(error: std::io::Error) -> Self {
|
||||
DisasmError::IoError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for DisasmError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
DisasmError::NoFile(msg) => write!(f, "No file error: {:?}", msg),
|
||||
DisasmError::IoError(msg) => write!(f, "{}", msg),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Disassemble the binary in `path` into a vector of instructions
|
||||
/// This function just calls all other
|
||||
pub fn disasm(args: &Args) -> Result<Vec<MetaInstruction>, DisasmError> {
|
||||
let contents = path_to_buf(args)?;
|
||||
let aout = Aout::new(contents);
|
||||
|
||||
// XXX: 00 is just 0, maybe this could be a problem?
|
||||
log::debug!("{:?}", aout);
|
||||
|
||||
let instructions = decode_instructions(&aout)?;
|
||||
|
||||
Ok(instructions)
|
||||
}
|
||||
|
||||
/// Read a filepath into a buffer
|
||||
fn path_to_buf(args: &Args) -> Result<Vec<u8>, DisasmError> {
|
||||
let path = args
|
||||
.path
|
||||
.clone()
|
||||
.ok_or(DisasmError::NoFile(args.path.clone()))?;
|
||||
let mut file = File::open(path)?;
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf)?;
|
||||
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
/// Decode instructions from the text section of the provided binary
|
||||
fn decode_instructions(aout: &Aout) -> Result<Vec<MetaInstruction>, DisasmError> {
|
||||
// naive approach:
|
||||
// 1. read byte
|
||||
// 2. pattern match to see which instruction it is
|
||||
// 3. read as many bytes as this instruction needs (registers, immidiates, ...)
|
||||
// repeat until no bytes left
|
||||
|
||||
let instructions = Vec::new();
|
||||
let mut offset = 0;
|
||||
|
||||
let text = &aout.text;
|
||||
while offset < aout.text.len() {
|
||||
let mut instr = MetaInstruction::new();
|
||||
instr.start = offset;
|
||||
|
||||
let opcode = text[offset];
|
||||
match opcode {
|
||||
// 0x00 => {} // ADD
|
||||
// INT
|
||||
0xCD => {
|
||||
instr.take_n_bytes(2, &mut offset, text);
|
||||
instr.instruction = Instruction::INT(ImmediateByte(instr.raw[1]));
|
||||
}
|
||||
// MOV
|
||||
0xBB => {
|
||||
instr.take_n_bytes(3, &mut offset, text);
|
||||
instr.instruction = Instruction::MOV_RI(
|
||||
Register::BX,
|
||||
ImmediateWord(u16::from_le_bytes([instr.raw[1], instr.raw[2]])),
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
eprintln!("Encountered unknown instruction '0x{:x}'", opcode);
|
||||
eprintln!("Offset might be misaligned and data is being interpreted.");
|
||||
eprintln!("Existing to avoid further misinterpretation...");
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
println!("{}", instr);
|
||||
// dbg!(&instr);
|
||||
}
|
||||
|
||||
Ok(instructions)
|
||||
}
|
||||
183
src/instructions.rs
Normal file
183
src/instructions.rs
Normal file
@@ -0,0 +1,183 @@
|
||||
use core::fmt;
|
||||
|
||||
pub type MemAddress = u8;
|
||||
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
/// A single 'line' of executable ASM is called a MetaInstruction, which
|
||||
/// contains the `Instruction`, which will be executed, alongside some Meta
|
||||
/// Informations.
|
||||
pub struct MetaInstruction {
|
||||
pub start: usize, // location of the instruction start
|
||||
pub size: usize, // size of the instruction in bytes
|
||||
pub raw: Vec<u8>, // raw value of instruction
|
||||
pub instruction: Instruction, // actual instruction
|
||||
}
|
||||
|
||||
impl MetaInstruction {
|
||||
pub fn new() -> Self {
|
||||
MetaInstruction {
|
||||
start: 0,
|
||||
size: 0,
|
||||
raw: Vec::new(),
|
||||
instruction: Instruction::NOP(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse n bytes from text section and advance offet.
|
||||
/// Used to get the operands.
|
||||
pub fn take_n_bytes(&mut self, size: usize, offset: &mut usize, text: &Vec<u8>) {
|
||||
self.size = size;
|
||||
self.raw = text[*offset as usize..]
|
||||
.iter()
|
||||
.take(size)
|
||||
.cloned()
|
||||
.collect();
|
||||
*offset += size;
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for MetaInstruction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{:04x}: ", self.start).unwrap();
|
||||
for b in self.raw.iter() {
|
||||
write!(f, "{:02x}", b).unwrap();
|
||||
}
|
||||
write!(f, "\t{}", self.instruction)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code, non_camel_case_types)]
|
||||
pub enum Instruction {
|
||||
NOP(),
|
||||
// ADD
|
||||
ADD_RM(Register, Memory),
|
||||
ADD_MR(Memory, Register),
|
||||
ADD_RR(Register, Register),
|
||||
ADD_MI(Memory, ImmediateByte),
|
||||
ADD_RI(Register, ImmediateByte),
|
||||
// MOV
|
||||
MOV_RM(Register, Memory),
|
||||
MOV_MR(Memory, Register),
|
||||
MOV_RR(Register, Register),
|
||||
MOV_MI(Memory, ImmediateByte),
|
||||
MOV_RI(Register, ImmediateWord),
|
||||
MOV_SM(SRegister, Memory),
|
||||
MOV_MS(Memory, SRegister),
|
||||
MOV_RS(Register, SRegister),
|
||||
MOV_SR(SRegister, Register),
|
||||
// INT
|
||||
INT(ImmediateByte),
|
||||
}
|
||||
|
||||
impl fmt::Display for Instruction {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::INT(byte) => write!(f, "INT, {:x}", byte),
|
||||
Self::MOV_RI(reg, word) => write!(f, "MOV {}, {:04x}", reg, word),
|
||||
_ => write!(f, "display not yet implemented"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Types for operand encoding
|
||||
#[derive(Debug)]
|
||||
pub struct Memory(pub MemAddress);
|
||||
#[derive(Debug)]
|
||||
pub struct ImmediateByte(pub u8);
|
||||
#[derive(Debug)]
|
||||
pub struct ImmediateWord(pub u16);
|
||||
|
||||
// ... and the displays for all of them
|
||||
impl fmt::Display for Memory {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
impl fmt::Display for ImmediateByte {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
impl fmt::LowerHex for ImmediateByte {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fmt::LowerHex::fmt(&self.0, f)
|
||||
}
|
||||
}
|
||||
impl fmt::Display for ImmediateWord {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
impl fmt::LowerHex for ImmediateWord {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fmt::LowerHex::fmt(&self.0, f)
|
||||
}
|
||||
}
|
||||
|
||||
/// Registers of a 8086 processor
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub enum Register {
|
||||
AX,
|
||||
BX,
|
||||
CX,
|
||||
DX,
|
||||
AH,
|
||||
AL,
|
||||
BL,
|
||||
BH,
|
||||
CH,
|
||||
CL,
|
||||
DH,
|
||||
DL,
|
||||
DI,
|
||||
SI,
|
||||
BP,
|
||||
SP,
|
||||
}
|
||||
|
||||
impl fmt::Display for Register {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::AX => write!(f, "AX"),
|
||||
Self::BX => write!(f, "BX"),
|
||||
Self::CX => write!(f, "CX"),
|
||||
Self::DX => write!(f, "DX"),
|
||||
Self::AH => write!(f, "AH"),
|
||||
Self::AL => write!(f, "AL"),
|
||||
Self::BL => write!(f, "BL"),
|
||||
Self::BH => write!(f, "BH"),
|
||||
Self::CH => write!(f, "CH"),
|
||||
Self::CL => write!(f, "CL"),
|
||||
Self::DH => write!(f, "DH"),
|
||||
Self::DL => write!(f, "DL"),
|
||||
Self::DI => write!(f, "DI"),
|
||||
Self::SI => write!(f, "SI"),
|
||||
Self::BP => write!(f, "BP"),
|
||||
Self::SP => write!(f, "SP"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// SRegisters of a 8086 processor
|
||||
#[derive(Debug)]
|
||||
#[allow(dead_code)]
|
||||
pub enum SRegister {
|
||||
DS,
|
||||
ES,
|
||||
SS,
|
||||
CS,
|
||||
}
|
||||
|
||||
impl fmt::Display for SRegister {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
Self::DS => write!(f, "DS"),
|
||||
Self::ES => write!(f, "ES"),
|
||||
Self::SS => write!(f, "SS"),
|
||||
Self::CS => write!(f, "CS"),
|
||||
}
|
||||
}
|
||||
}
|
||||
41
src/main.rs
Normal file
41
src/main.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
use clap::{Parser, Subcommand};
|
||||
|
||||
mod aout;
|
||||
mod decode;
|
||||
mod disasm;
|
||||
mod instructions;
|
||||
|
||||
#[derive(Subcommand, Debug)]
|
||||
enum Command {
|
||||
/// Disassemble the binary into 8086 instructions
|
||||
Disasm,
|
||||
|
||||
/// Interpret the binary as 8086 Minix
|
||||
Interpret,
|
||||
}
|
||||
|
||||
/// Simple prgram to diasm and interpret Minix binaries
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(version, about, long_about = None)]
|
||||
struct Args {
|
||||
#[command(subcommand)]
|
||||
command: Command,
|
||||
|
||||
/// Path of the binary
|
||||
#[arg(short, long, global = true)]
|
||||
path: Option<String>,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
env_logger::init();
|
||||
|
||||
let args = Args::parse();
|
||||
log::debug!("{:?}", args);
|
||||
|
||||
match args.command {
|
||||
Command::Disasm => {
|
||||
let _instructions = disasm::disasm(&args).unwrap();
|
||||
}
|
||||
_ => panic!("Command not yet implemented"),
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user