Files
8086-rs/src/operands.rs

814 lines
26 KiB
Rust

//! All types which a Mnemonic can have as some kind of operand.
//! This includes things such as immediates, ModRM byte targets, etc. etc.
// used in doc, but not code
#[allow(unused_imports)]
use crate::register::SegmentRegister;
use crate::{disasm::DisasmError, register::Register};
use core::fmt;
use std::{
cmp::Ordering,
ops::{Add, BitAnd, BitOr, BitXor, Div, Not, Shl, Shr, Sub},
};
pub type Byte = u8; // b
pub type IByte = i8; // used for displacements of memory access
pub type Word = u16; // w or v
pub type IWord = i16; // used for displacement of memory access
pub type DWord = u32;
#[derive(Debug, Clone, Eq, PartialEq, Copy)]
/// Universal type to encode either Byte- or Word-sized immediate operands.
/// Mostly used to:
/// - Encode immediates, where instructions exist for both widths.
/// - Encode instruction width, to select either the 8- or 16-bit register.
/// - Encode raw immediate values, to make use of all implemented functions
/// of this type.
pub enum ImmediateOperand {
Byte(Byte),
Word(Word),
}
impl ImmediateOperand {
/// Return bits of internal value.
pub fn bits(self) -> Vec<bool> {
match self {
ImmediateOperand::Byte(b) => (0..8).map(|i| (b & (1 << i)) != 0).collect(),
ImmediateOperand::Word(w) => (0..8).map(|i| (w & (1 << i)) != 0).collect(),
}
}
/// Sign-extend [`Self::Byte`] into [`Self::Word`].
/// Returns [`Self::Word`], if already a word.
pub fn sign_extend(self) -> Self {
match self {
Self::Byte(_) => {
return if self.msb() {
self.flip_sign().word().flip_sign()
} else {
self.word()
};
}
Self::Word(_) => self,
}
}
/// Interprets [`Self::Byte`] as [`Self::Word`].
/// Returns word, if already a [`Self::Word`].
/// CAUTION: You probably want to use [`Self::sign_extend()`] instead.
fn word(self) -> Self {
match self {
Self::Byte(b) => Self::Word(b as Word),
Self::Word(_) => self,
}
}
/// Flip most significant bit.
pub fn flip_sign(self) -> Self {
match self {
Self::Byte(b) => Self::Byte(b ^ (1 << 7)),
Self::Word(w) => Self::Word(w ^ (1 << 15)),
}
}
/// Sets or removes sign.
pub fn set_sign(self, sign: bool) -> Self {
match self {
Self::Byte(b) => {
let msb = 1 << 7;
Self::Byte(if sign { b | msb } else { b & !msb })
}
Self::Word(w) => {
let msb = 1 << 15;
Self::Word(if sign { w | msb } else { w & !msb })
}
}
}
/// Check if inner value is zero.
pub fn zero(&self) -> bool {
match self {
Self::Byte(byte) => return *byte == 0,
Self::Word(word) => return *word == 0,
}
}
/// Check if leasy significant byte has even number of 1's.
pub fn parity(&self) -> bool {
match self {
Self::Byte(byte) => return byte.count_ones() % 2 != 0,
Self::Word(word) => {
let [low, _]: [u8; 2] = word.to_le_bytes();
return low.count_ones() % 2 != 0;
}
}
}
/// Check if least significant bit is set.
pub fn lsb(&self) -> bool {
match self {
Self::Byte(byte) => return byte & 1 == 1,
Self::Word(word) => return word & 1 == 1,
}
}
/// Check if most significant bit is set.
/// If the number is interpreted as signed, this acts as the sign bit.
pub fn msb(&self) -> bool {
match self {
Self::Byte(byte) => return (byte >> 7) == 1,
Self::Word(word) => return (word >> 15) == 1,
}
}
/// Multiply values and return the extended u32, split into two words.
pub fn mul(&self, other: Self) -> (Word, Word) {
let result: u32 = match self {
Self::Byte(lhsb) => match other {
Self::Byte(rhsb) => *lhsb as u32 * rhsb as u32,
Self::Word(rhsw) => match other.sign_extend() {
Self::Word(lhsw) => lhsw as u32 * rhsw as u32,
_ => panic!("unreachable"),
},
},
Self::Word(lhsw) => match other {
Self::Word(rhsw) => *lhsw as u32 * rhsw as u32,
Self::Byte(_) => match other.sign_extend() {
Self::Word(rhsw) => *lhsw as u32 * rhsw as u32,
_ => panic!("unreachable"),
},
},
};
let bytes = result.to_le_bytes();
let lower = Word::from_le_bytes([bytes[0], bytes[1]]);
let upper = Word::from_le_bytes([bytes[2], bytes[3]]);
(lower, upper)
}
}
impl PartialOrd for ImmediateOperand {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for ImmediateOperand {
fn cmp(&self, other: &Self) -> Ordering {
match (self, other) {
(ImmediateOperand::Byte(a), ImmediateOperand::Byte(b)) => a.cmp(b),
(ImmediateOperand::Word(a), ImmediateOperand::Word(b)) => a.cmp(b),
(ImmediateOperand::Byte(a), ImmediateOperand::Word(b)) => (*a as Word).cmp(b),
(ImmediateOperand::Word(a), ImmediateOperand::Byte(b)) => a.cmp(&(*b as Word)),
}
}
}
impl From<Vec<bool>> for ImmediateOperand {
fn from(bits: Vec<bool>) -> Self {
if bits.len() == 8 {
let val = bits
.iter()
.enumerate()
.fold(0, |acc, (i, &bit)| acc | ((bit as u8) << i));
Self::Byte(val)
} else if bits.len() == 16 {
let val = bits
.iter()
.enumerate()
.fold(0, |acc, (i, &bit)| acc | ((bit as u16) << i));
Self::Word(val)
} else {
panic!("Invalid bit length");
}
}
}
impl From<Byte> for ImmediateOperand {
fn from(value: Byte) -> Self {
Self::Byte(value)
}
}
impl From<Word> for ImmediateOperand {
fn from(value: Word) -> Self {
Self::Word(value)
}
}
impl From<usize> for ImmediateOperand {
fn from(value: usize) -> Self {
if value > Word::MAX as usize {
panic!("Cannot convert usize to ImmediateOperand::Word")
} else {
ImmediateOperand::Word(value as Word)
}
}
}
impl Into<MemoryIndex> for ImmediateOperand {
fn into(self) -> MemoryIndex {
MemoryIndex {
base: None,
index: None,
displacement: Some(self),
}
}
}
impl Into<Word> for ImmediateOperand {
fn into(self) -> u16 {
match self {
ImmediateOperand::Byte(b) => b as Word,
ImmediateOperand::Word(w) => w,
}
}
}
impl Into<usize> for ImmediateOperand {
fn into(self) -> usize {
match self {
ImmediateOperand::Byte(b) => b as usize,
ImmediateOperand::Word(w) => w as usize,
}
}
}
impl Add for ImmediateOperand {
type Output = Self;
fn add(self, other: Self) -> Self {
match self {
ImmediateOperand::Byte(lhsb) => match other {
ImmediateOperand::Byte(rhsb) => ImmediateOperand::Byte(lhsb.wrapping_add(rhsb)),
ImmediateOperand::Word(rhsw) => ImmediateOperand::Word(match other.sign_extend() {
ImmediateOperand::Word(lhsw) => lhsw.wrapping_add(rhsw),
_ => panic!("unreachable"),
}),
},
ImmediateOperand::Word(lhsw) => match other {
ImmediateOperand::Word(rhsw) => ImmediateOperand::Word(lhsw.wrapping_add(rhsw)),
ImmediateOperand::Byte(_) => ImmediateOperand::Word(match other.sign_extend() {
ImmediateOperand::Word(rhsw) => lhsw.wrapping_add(rhsw),
_ => panic!("unreachable"),
}),
},
}
}
}
impl Add<Byte> for ImmediateOperand {
type Output = ImmediateOperand;
fn add(self, imm: Byte) -> Self::Output {
match self {
Self::Byte(b) => Self::Byte(b.wrapping_add(imm)),
Self::Word(w) => Self::Word(w.wrapping_add(imm as Word)),
}
}
}
impl Sub for ImmediateOperand {
type Output = Self;
fn sub(self, other: Self) -> Self {
match self {
ImmediateOperand::Byte(lhsb) => match other {
ImmediateOperand::Byte(rhsb) => ImmediateOperand::Byte(lhsb.wrapping_sub(rhsb)),
ImmediateOperand::Word(rhsw) => ImmediateOperand::Word(match other.sign_extend() {
ImmediateOperand::Word(lhsw) => lhsw.wrapping_sub(rhsw),
_ => panic!("unreachable"),
}),
},
ImmediateOperand::Word(lhsw) => match other {
ImmediateOperand::Word(rhsw) => ImmediateOperand::Word(lhsw.wrapping_sub(rhsw)),
ImmediateOperand::Byte(_) => ImmediateOperand::Word(match other.sign_extend() {
ImmediateOperand::Word(rhsw) => lhsw.wrapping_sub(rhsw),
_ => panic!("unreachable"),
}),
},
}
}
}
impl Sub<Byte> for ImmediateOperand {
type Output = ImmediateOperand;
fn sub(self, imm: Byte) -> Self::Output {
match self {
Self::Byte(b) => Self::Byte(b.wrapping_sub(imm)),
Self::Word(w) => Self::Word(w.wrapping_sub(imm as Word)),
}
}
}
impl Shl for ImmediateOperand {
type Output = Self;
fn shl(self, rhs: Self) -> Self::Output {
match self {
Self::Byte(b) => match rhs {
ImmediateOperand::Byte(sb) => Self::Byte(b << sb),
ImmediateOperand::Word(sw) => Self::Byte(b << sw),
},
Self::Word(w) => match rhs {
ImmediateOperand::Byte(sb) => Self::Word(w << sb),
ImmediateOperand::Word(sw) => Self::Word(w << sw),
},
}
}
}
impl Shl<Word> for ImmediateOperand {
type Output = Self;
fn shl(self, rhs: Word) -> Self::Output {
match self {
Self::Byte(b) => Self::Byte(b << rhs),
Self::Word(w) => Self::Word(w << rhs),
}
}
}
impl Shr for ImmediateOperand {
type Output = Self;
fn shr(self, rhs: Self) -> Self::Output {
match self {
Self::Byte(b) => match rhs {
ImmediateOperand::Byte(sb) => Self::Byte(b >> sb),
ImmediateOperand::Word(sw) => Self::Byte(b >> sw),
},
Self::Word(w) => match rhs {
ImmediateOperand::Byte(sb) => Self::Word(w >> sb),
ImmediateOperand::Word(sw) => Self::Word(w >> sw),
},
}
}
}
impl Shr<Word> for ImmediateOperand {
type Output = Self;
fn shr(self, rhs: Word) -> Self::Output {
match self {
Self::Byte(b) => Self::Byte(b >> rhs),
Self::Word(w) => Self::Word(w >> rhs),
}
}
}
impl Div for ImmediateOperand {
type Output = Self;
fn div(self, other: Self) -> Self {
match self {
ImmediateOperand::Byte(lhsb) => match other {
ImmediateOperand::Byte(rhsb) => ImmediateOperand::Byte(lhsb.wrapping_div(rhsb)),
ImmediateOperand::Word(rhsw) => ImmediateOperand::Word(match other.sign_extend() {
ImmediateOperand::Word(lhsw) => lhsw.wrapping_div(rhsw),
_ => panic!("unreachable"),
}),
},
ImmediateOperand::Word(lhsw) => match other {
ImmediateOperand::Word(rhsw) => ImmediateOperand::Word(lhsw.wrapping_div(rhsw)),
ImmediateOperand::Byte(_) => ImmediateOperand::Word(match other.sign_extend() {
ImmediateOperand::Word(rhsw) => lhsw.wrapping_div(rhsw),
_ => panic!("unreachable"),
}),
},
}
}
}
impl BitOr for ImmediateOperand {
type Output = Self;
fn bitor(self, other: Self) -> Self {
match self {
ImmediateOperand::Byte(lhsb) => match other {
ImmediateOperand::Byte(rhsb) => ImmediateOperand::Byte(lhsb | rhsb),
ImmediateOperand::Word(rhsw) => match other.sign_extend() {
ImmediateOperand::Word(lhsw) => ImmediateOperand::Word(lhsw | rhsw),
_ => panic!("unreachable"),
},
},
ImmediateOperand::Word(lhsw) => match other {
ImmediateOperand::Word(rhsw) => ImmediateOperand::Word(lhsw | rhsw),
ImmediateOperand::Byte(_) => match other.sign_extend() {
ImmediateOperand::Word(rhsw) => ImmediateOperand::Word(lhsw | rhsw),
_ => panic!("unreachable"),
},
},
}
}
}
impl BitAnd for ImmediateOperand {
type Output = Self;
fn bitand(self, other: Self) -> Self {
match self {
ImmediateOperand::Byte(lhsb) => match other {
ImmediateOperand::Byte(rhsb) => ImmediateOperand::Byte(lhsb & rhsb),
ImmediateOperand::Word(rhsw) => match other.sign_extend() {
ImmediateOperand::Word(lhsw) => ImmediateOperand::Word(lhsw & rhsw),
_ => panic!("unreachable"),
},
},
ImmediateOperand::Word(lhsw) => match other {
ImmediateOperand::Word(rhsw) => ImmediateOperand::Word(lhsw & rhsw),
ImmediateOperand::Byte(_) => match other.sign_extend() {
ImmediateOperand::Word(rhsw) => ImmediateOperand::Word(lhsw & rhsw),
_ => panic!("unreachable"),
},
},
}
}
}
impl BitXor for ImmediateOperand {
type Output = Self;
fn bitxor(self, other: Self) -> Self {
match self {
ImmediateOperand::Byte(lhsb) => match other {
ImmediateOperand::Byte(rhsb) => ImmediateOperand::Byte(lhsb ^ rhsb),
ImmediateOperand::Word(rhsw) => match other.sign_extend() {
ImmediateOperand::Word(lhsw) => ImmediateOperand::Word(lhsw ^ rhsw),
_ => panic!("unreachable"),
},
},
ImmediateOperand::Word(lhsw) => match other {
ImmediateOperand::Word(rhsw) => ImmediateOperand::Word(lhsw ^ rhsw),
ImmediateOperand::Byte(_) => match other.sign_extend() {
ImmediateOperand::Word(rhsw) => ImmediateOperand::Word(lhsw ^ rhsw),
_ => panic!("unreachable"),
},
},
}
}
}
impl Not for ImmediateOperand {
type Output = Self;
fn not(self) -> Self::Output {
match self {
ImmediateOperand::Byte(b) => ImmediateOperand::Byte(!b),
ImmediateOperand::Word(w) => ImmediateOperand::Word(!w),
}
}
}
impl fmt::Display for ImmediateOperand {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Byte(byte) => write!(f, "{}", byte),
Self::Word(word) => write!(f, "{}", word),
}
}
}
impl fmt::LowerHex for ImmediateOperand {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Byte(b) => fmt::LowerHex::fmt(b, f),
Self::Word(v) => fmt::LowerHex::fmt(v, f),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
/// ModRM byte can either target a [`MemoryIndex`] (location in memory) or some
/// [`Register`].
pub enum ModRmTarget {
Memory(MemoryIndex),
Register(Register),
}
impl std::fmt::Display for ModRmTarget {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Memory(idx) => write!(f, "{}", idx),
Self::Register(reg) => write!(f, "{}", reg),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
/// Just a wrapper to access QOL function, which interprets an
/// [`ImmediateOperand`] as a signed value.
pub enum ImmediateOperandSigned {
Byte(IByte),
Word(IWord),
}
impl fmt::LowerHex for ImmediateOperandSigned {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Byte(b) => fmt::LowerHex::fmt(b, f),
Self::Word(v) => fmt::LowerHex::fmt(v, f),
}
}
}
impl From<ImmediateOperand> for ImmediateOperandSigned {
fn from(value: ImmediateOperand) -> Self {
match value {
ImmediateOperand::Byte(b) => Self::Byte(b as IByte),
ImmediateOperand::Word(w) => Self::Word(w as IWord),
}
}
}
impl std::fmt::Display for ImmediateOperandSigned {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Byte(b) => {
if *b > 0 {
write!(f, " + {:#x}", b)
} else {
write!(f, " - {:#x}", b * -1)
}
}
Self::Word(w) => {
if *w > 0 {
write!(f, " + {:#x}", w)
} else {
write!(f, " - {:#x}", w * -1)
}
}
}
}
}
impl Add<ImmediateOperandSigned> for ImmediateOperand {
type Output = ImmediateOperand;
fn add(self, disp: ImmediateOperandSigned) -> Self::Output {
// Warning: this gets rid of the sign, which is fine as long as it is
// used for a memory index, which can never be negative.
// In that case, the subtract wraps.
match disp {
ImmediateOperandSigned::Byte(byte) => {
if byte < 0 {
return self - ImmediateOperand::Byte((byte * -1) as Byte);
} else {
return self + ImmediateOperand::Byte(byte as Byte);
}
}
ImmediateOperandSigned::Word(word) => {
if word < 0 {
return self - ImmediateOperand::Word((word * -1) as Word);
} else {
return self + ImmediateOperand::Word(word as Word);
}
}
}
}
}
/// A memory index operand is usually created by ModRM bytes or words.
/// e.g. [bx+si]
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub struct MemoryIndex {
pub base: Option<Register>,
pub index: Option<Register>,
pub displacement: Option<ImmediateOperand>,
}
impl Into<MemoryIndex> for u16 {
fn into(self) -> MemoryIndex {
MemoryIndex {
base: None,
index: None,
displacement: Some(self.into()),
}
}
}
impl fmt::Display for MemoryIndex {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match &self.base {
Some(base) => match &self.index {
Some(index) => match &self.displacement {
Some(displacement) => {
write!(
f,
"[{} + {}{}]",
base,
index,
ImmediateOperandSigned::from(displacement.clone().to_owned())
)
}
None => write!(f, "[{} + {}]", base, index),
},
None => match &self.displacement {
Some(displacement) => write!(
f,
"[{}{}]",
base,
ImmediateOperandSigned::from(displacement.clone().to_owned())
),
None => write!(f, "[{}]", base),
},
},
None => match &self.index {
Some(index) => match &self.displacement {
Some(displacement) => write!(
f,
"[{}{}]",
index,
ImmediateOperandSigned::from(displacement.clone().to_owned())
),
None => write!(f, "[{}]", index),
},
None => match &self.displacement {
Some(displacement) => write!(
f,
"[{:#x}]",
ImmediateOperandSigned::from(displacement.clone().to_owned())
),
None => panic!("Memory Index without base, index and displacement"),
},
},
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
/// 16-bit pointer for access, usually with a [`SegmentRegister`] as segment
/// and [`Pointer16`] as offset.
/// Generally, this type only gets constructed in rare scenarios, when the
/// [`Displacement`] of a parsed [`ModRmTarget`] is used as a raw pointer.
pub struct Pointer16 {
pub word: Word,
}
impl Into<MemoryIndex> for Pointer16 {
fn into(self) -> MemoryIndex {
MemoryIndex {
base: None,
index: None,
displacement: Some(self.word.into()),
}
}
}
impl Add<u16> for Pointer16 {
type Output = Self;
fn add(self, rhs: u16) -> Self::Output {
Self {
word: self.word + rhs,
}
}
}
impl std::fmt::Display for Pointer16 {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "ptr word [{:#04x}]", self.word)
}
}
impl TryFrom<ModRmTarget> for Pointer16 {
type Error = DisasmError;
fn try_from(target: ModRmTarget) -> Result<Self, Self::Error> {
match target {
ModRmTarget::Memory(mem) => match mem.displacement {
Some(disp) => match disp {
ImmediateOperand::Word(word) => Ok(Pointer16 { word }),
_ => {
return Err(DisasmError::IllegalOperand(
"Tried to construct Pointer16 with Byte, when a Word is expected"
.into(),
));
}
},
_ => {
return Err(DisasmError::IllegalOperand("Tried to construct Pointer16 with Register, when a Displacement was expected".into()));
}
},
_ => {
return Err(DisasmError::IllegalOperand(
"Tried to construct Pointer16 with Register, when a MemoryIndex expected"
.into(),
));
}
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
/// 32-bit segment:offset pointer for long jumps.
/// Both [`Word`]s are immediately encoded after the instruction
pub struct Pointer32 {
pub raw: DWord,
pub segment: Word,
pub offset: Word,
}
impl std::fmt::Display for Pointer32 {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(f, "{:#04x}:{:#04x}", self.segment, self.offset)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ord() {
let b1 = ImmediateOperand::Byte(1);
let b5 = ImmediateOperand::Byte(5);
let w1 = ImmediateOperand::Word(1);
let w10 = ImmediateOperand::Word(10);
assert_eq!(b1 < b5, true);
assert_eq!(w1 < w10, true);
assert_eq!(b1 < w10, true);
assert_eq!(w1 < b5, true);
}
#[test]
fn add_byte_byte() {
let a = ImmediateOperand::Byte(5);
let b = ImmediateOperand::Byte(7);
assert_eq!(a + b, ImmediateOperand::Byte(5 + 7))
}
#[test]
fn add_word_word() {
let a = ImmediateOperand::Word(5);
let b = ImmediateOperand::Word(7);
assert_eq!(a + b, ImmediateOperand::Word(5 + 7))
}
#[test]
fn add_byte_word() {
let a = ImmediateOperand::Byte(5);
let b = ImmediateOperand::Word(7);
assert_eq!(b + a, ImmediateOperand::Word(5 + 7))
}
#[test]
fn sub_byte_word() {
let a = ImmediateOperand::Byte(5);
let b = ImmediateOperand::Word(7);
assert_eq!(b - a, ImmediateOperand::Word(7 - 5))
}
#[test]
fn test_msb() {
let pos = ImmediateOperand::Byte(1 << 4);
let neg = ImmediateOperand::Byte(1 << 7);
assert_eq!(pos.msb(), false);
assert_eq!(neg.msb(), true);
}
#[test]
fn test_as_word() {
let b: u8 = 5;
let byte = ImmediateOperand::Byte(b);
let word = ImmediateOperand::Word(b as Word);
assert_eq!(byte.word(), word);
assert_eq!(word, word);
}
#[test]
fn test_flip_sign_neg_to_pos() {
let b = 0 << 2;
let byte = ImmediateOperand::Byte((1 << 7) | b);
let word = ImmediateOperand::Word((1 << 15) | b as Word);
assert_eq!(byte.flip_sign(), ImmediateOperand::Byte(b));
assert_eq!(word.flip_sign(), ImmediateOperand::Word(b as Word));
}
#[test]
fn test_flip_sign_pos_to_neg() {
let b = 1 << 2;
let byte = ImmediateOperand::Byte(b);
let word = ImmediateOperand::Word(b as Word);
assert_eq!(byte.flip_sign(), ImmediateOperand::Byte((1 << 7) | b));
assert_eq!(
word.flip_sign(),
ImmediateOperand::Word((1 << 15) | b as Word)
);
}
#[test]
fn test_sign_extend() {
let byte = ImmediateOperand::Byte(0b01010101);
let word = ImmediateOperand::Word(0b0000000001010101);
assert_eq!(byte.sign_extend(), word);
}
#[test]
fn test_sign_extend_neg() {
let byte = ImmediateOperand::Byte(1 << 7);
let word = ImmediateOperand::Word(1 << 15);
assert_eq!(byte.sign_extend(), word);
}
}