File: //usr/include/hphp/util/asm-x64.h
/*
+----------------------------------------------------------------------+
| HipHop for PHP |
+----------------------------------------------------------------------+
| Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
*/
#ifndef incl_HPHP_UTIL_ASM_X64_H_
#define incl_HPHP_UTIL_ASM_X64_H_
#include <type_traits>
#include "hphp/util/atomic.h"
#include "hphp/util/data-block.h"
#include "hphp/util/immed.h"
#include "hphp/util/safe-cast.h"
#include "hphp/util/trace.h"
/*
* An experimental macro assembler for x64, that strives for low coupling to
* the runtime environment.
*
* There are more complete assemblers out there; if you use this one
* yourself, expect not to find all the instructions you wanted to use. You
* may have to go spelunking in the Intel manuals:
*
* http://www.intel.com/products/processor/manuals/
*
* If you're looking for something more fully baked, here are some options
* to consider:
*
* 1. Nanojit or llvm, both of which translate abstract virtual machine
* instructions to the native target architecture, or
* 2. The embedded assemblers from v8, the Sun JVM, etc.
*/
/*
* Some members cannot be const because their values aren't known in
* an initialization list. Like the opposite of the "mutable" keyword.
* This declares this property to readers.
*/
#define logical_const /* nothing */
namespace HPHP { namespace jit {
#define TRACEMOD ::HPHP::Trace::asmx64
//////////////////////////////////////////////////////////////////////
struct MemoryRef;
struct RIPRelativeRef;
struct ScaledIndex;
struct ScaledIndexDisp;
struct DispReg;
const uint8_t kOpsizePrefix = 0x66;
struct Reg64 {
explicit constexpr Reg64(int rn) : rn(rn) {}
// Integer conversion is allowed but only explicitly. (It's not
// unusual to want to printf registers, etc. Just cast it first.)
explicit constexpr operator int() const { return rn; }
MemoryRef operator[](intptr_t disp) const;
MemoryRef operator[](Reg64) const;
MemoryRef operator[](ScaledIndex) const;
MemoryRef operator[](ScaledIndexDisp) const;
MemoryRef operator[](DispReg) const;
constexpr bool operator==(Reg64 o) const { return rn == o.rn; }
constexpr bool operator!=(Reg64 o) const { return rn != o.rn; }
private:
int rn;
};
#define SIMPLE_REGTYPE(What) \
struct What { \
explicit constexpr What(int rn) : rn(rn) {} \
explicit constexpr operator int() const { return rn; } \
constexpr bool operator==(What o) const { return rn == o.rn; } \
constexpr bool operator!=(What o) const { return rn != o.rn; } \
private: \
int rn; \
}
SIMPLE_REGTYPE(Reg32);
SIMPLE_REGTYPE(Reg16);
SIMPLE_REGTYPE(Reg8);
SIMPLE_REGTYPE(RegXMM);
SIMPLE_REGTYPE(RegSF);
#undef SIMPLE_REGTYPE
struct RegRIP {
RIPRelativeRef operator[](intptr_t disp) const;
};
// Convert between physical registers of different sizes
inline Reg8 rbyte(Reg32 r) { return Reg8(int(r)); }
inline Reg8 rbyte(Reg64 r) { return Reg8(int(r)); }
inline Reg16 r16(Reg8 r) { return Reg16(int(r)); }
inline Reg32 r32(Reg8 r) { return Reg32(int(r)); }
inline Reg32 r32(Reg16 r) { return Reg32(int(r)); }
inline Reg32 r32(Reg32 r) { return r; }
inline Reg32 r32(Reg64 r) { return Reg32(int(r)); }
inline Reg64 r64(Reg8 r) { return Reg64(int(r)); }
inline Reg64 r64(Reg16 r) { return Reg64(int(r)); }
inline Reg64 r64(Reg32 r) { return Reg64(int(r)); }
inline Reg64 r64(Reg64 r) { return r; }
//////////////////////////////////////////////////////////////////////
/*
* The following structures define intermediate types for various
* addressing modes. They overload some operators to allow using
* registers to look somewhat like pointers.
*
* E.g. rax[rbx*2 + 3] or *(rax + rbx*2 + 3).
*
* These operators are not defined commutatively; the thought is it
* mandates the order you normally write them in a .S, but it could be
* changed if this proves undesirable.
*/
// reg*x
struct ScaledIndex {
explicit ScaledIndex(Reg64 index, intptr_t scale)
: index(index)
, scale(scale)
{
assert((scale == 0x1 || scale == 0x2 || scale == 0x4 || scale == 0x8) &&
"Invalid index register scaling (must be 1,2,4 or 8).");
assert(int(index) != -1 && "invalid register");
}
Reg64 index;
intptr_t scale;
};
// reg*x + disp
struct ScaledIndexDisp {
explicit ScaledIndexDisp(ScaledIndex si, intptr_t disp)
: si(si)
, disp(disp)
{}
ScaledIndexDisp operator+(intptr_t x) const {
return ScaledIndexDisp(si, disp + x);
}
ScaledIndexDisp operator-(intptr_t x) const {
return ScaledIndexDisp(si, disp - x);
}
ScaledIndex si;
intptr_t disp;
};
// reg+x
struct DispReg {
explicit DispReg(Reg64 base, intptr_t disp = 0)
: base(base)
, disp(disp)
{
assert(int(base) != -1 && "invalid register");
}
// Constructor for baseless().
explicit DispReg(intptr_t disp)
: base(-1)
, disp(disp)
{}
MemoryRef operator*() const;
MemoryRef operator[](intptr_t) const;
DispReg operator+(intptr_t x) const {
return DispReg(base, disp + x);
}
DispReg operator-(intptr_t x) const {
return DispReg(base, disp - x);
}
Reg64 base;
intptr_t disp;
};
// reg + reg*x + y
struct IndexedDispReg {
explicit IndexedDispReg(Reg64 base, ScaledIndex sr)
: base(base)
, index(sr.index)
, scale(sr.scale)
, disp(0)
{}
explicit IndexedDispReg(DispReg r)
: base(r.base)
, index(-1)
, scale(1)
, disp(r.disp)
{}
// Constructor for baseless()
explicit IndexedDispReg(ScaledIndexDisp sid)
: base(-1)
, index(sid.si.index)
, scale(sid.si.scale)
, disp(sid.disp)
{}
MemoryRef operator*() const;
MemoryRef operator[](intptr_t disp) const;
IndexedDispReg operator+(intptr_t disp) const {
auto ret = *this;
ret.disp += disp;
return ret;
}
IndexedDispReg operator-(intptr_t disp) const {
auto ret = *this;
ret.disp -= disp;
return ret;
}
Reg64 base;
Reg64 index;
int scale;
intptr_t disp; // TODO #4613274: should be int32_t
};
// rip+x
struct DispRIP {
explicit DispRIP(intptr_t disp) : disp(disp) {}
RIPRelativeRef operator*() const;
RIPRelativeRef operator[](intptr_t x) const;
DispRIP operator+(intptr_t x) const {
return DispRIP(disp + x);
}
DispRIP operator-(intptr_t x) const {
return DispRIP(disp - x);
}
intptr_t disp;
};
// *(reg + x)
struct MemoryRef {
explicit MemoryRef(DispReg dr) : r(dr) {}
explicit MemoryRef(IndexedDispReg idr) : r(idr) {}
IndexedDispReg r;
};
// *(rip + x)
struct RIPRelativeRef {
explicit RIPRelativeRef(DispRIP r) : r(r) {}
DispRIP r;
};
inline MemoryRef IndexedDispReg::operator*() const {
return MemoryRef(*this);
}
inline MemoryRef IndexedDispReg::operator[](intptr_t x) const {
return *(*this + x);
}
inline MemoryRef DispReg::operator*() const {
return MemoryRef(*this);
}
inline MemoryRef DispReg::operator[](intptr_t x) const {
return *(*this + x);
}
inline RIPRelativeRef DispRIP::operator*() const {
return RIPRelativeRef(*this);
}
inline RIPRelativeRef DispRIP::operator[](intptr_t x) const {
return *(*this + x);
}
inline DispReg operator+(Reg64 r, intptr_t d) { return DispReg(r, d); }
inline DispReg operator-(Reg64 r, intptr_t d) { return DispReg(r, -d); }
inline DispRIP operator+(RegRIP r, intptr_t d) { return DispRIP(d); }
inline DispRIP operator-(RegRIP r, intptr_t d) { return DispRIP(d); }
inline ScaledIndex operator*(Reg64 r, int scale) {
return ScaledIndex(r, scale);
}
inline IndexedDispReg operator+(Reg64 base, ScaledIndex sr) {
return IndexedDispReg(base, sr);
}
inline ScaledIndexDisp operator+(ScaledIndex si, intptr_t disp) {
return ScaledIndexDisp(si, disp);
}
inline IndexedDispReg operator+(Reg64 b, Reg64 i) {
return b + ScaledIndex(i, 0x1);
}
inline MemoryRef operator*(Reg64 r) { return MemoryRef(DispReg(r)); }
inline DispRIP operator*(RegRIP r) { return DispRIP(0); }
inline MemoryRef Reg64::operator[](intptr_t disp) const {
return *(*this + disp);
}
inline MemoryRef Reg64::operator[](Reg64 idx) const {
return *(*this + idx * 1);
}
inline MemoryRef Reg64::operator[](ScaledIndex si) const {
return *(*this + si);
}
inline MemoryRef Reg64::operator[](DispReg dr) const {
return *(*this + ScaledIndex(dr.base, 0x1) + dr.disp);
}
inline MemoryRef Reg64::operator[](ScaledIndexDisp sid) const {
return *(*this + sid.si + sid.disp);
}
inline RIPRelativeRef RegRIP::operator[](intptr_t disp) const {
return *(*this + disp);
}
/*
* Used for the x64 addressing mode where there is a displacement,
* possibly with a scaled index, but no base register.
*/
inline MemoryRef baseless(intptr_t disp) { return *(DispReg { disp }); }
inline MemoryRef baseless(ScaledIndexDisp sid) {
return *(IndexedDispReg { sid });
}
//////////////////////////////////////////////////////////////////////
namespace reg {
constexpr Reg64 rax(0);
constexpr Reg64 rcx(1);
constexpr Reg64 rdx(2);
constexpr Reg64 rbx(3);
constexpr Reg64 rsp(4);
constexpr Reg64 rbp(5);
constexpr Reg64 rsi(6);
constexpr Reg64 rdi(7);
constexpr Reg64 r8 (8);
constexpr Reg64 r9 (9);
constexpr Reg64 r10(10);
constexpr Reg64 r11(11);
constexpr Reg64 r12(12);
constexpr Reg64 r13(13);
constexpr Reg64 r14(14);
constexpr Reg64 r15(15);
constexpr RegRIP rip = RegRIP();
constexpr Reg32 eax (0);
constexpr Reg32 ecx (1);
constexpr Reg32 edx (2);
constexpr Reg32 ebx (3);
constexpr Reg32 esp (4);
constexpr Reg32 ebp (5);
constexpr Reg32 esi (6);
constexpr Reg32 edi (7);
constexpr Reg32 r8d (8);
constexpr Reg32 r9d (9);
constexpr Reg32 r10d(10);
constexpr Reg32 r11d(11);
constexpr Reg32 r12d(12);
constexpr Reg32 r13d(13);
constexpr Reg32 r14d(14);
constexpr Reg32 r15d(15);
constexpr Reg16 ax (0);
constexpr Reg16 cx (1);
constexpr Reg16 dx (2);
constexpr Reg16 bx (3);
constexpr Reg16 sp (4);
constexpr Reg16 bp (5);
constexpr Reg16 si (6);
constexpr Reg16 di (7);
constexpr Reg16 r8w (8);
constexpr Reg16 r9w (9);
constexpr Reg16 r10w(10);
constexpr Reg16 r11w(11);
constexpr Reg16 r12w(12);
constexpr Reg16 r13w(13);
constexpr Reg16 r14w(14);
constexpr Reg16 r15w(15);
constexpr Reg8 al (0);
constexpr Reg8 cl (1);
constexpr Reg8 dl (2);
constexpr Reg8 bl (3);
constexpr Reg8 spl (4);
constexpr Reg8 bpl (5);
constexpr Reg8 sil (6);
constexpr Reg8 dil (7);
constexpr Reg8 r8b (8);
constexpr Reg8 r9b (9);
constexpr Reg8 r10b(10);
constexpr Reg8 r11b(11);
constexpr Reg8 r12b(12);
constexpr Reg8 r13b(13);
constexpr Reg8 r14b(14);
constexpr Reg8 r15b(15);
// Reminder: these registers may not be mixed in any instruction
// using a REX prefix (i.e. anything using r8-r15, spl, bpl, sil,
// dil, etc).
constexpr Reg8 ah(0x80 | 4);
constexpr Reg8 ch(0x80 | 5);
constexpr Reg8 dh(0x80 | 6);
constexpr Reg8 bh(0x80 | 7);
constexpr RegXMM xmm0(0);
constexpr RegXMM xmm1(1);
constexpr RegXMM xmm2(2);
constexpr RegXMM xmm3(3);
constexpr RegXMM xmm4(4);
constexpr RegXMM xmm5(5);
constexpr RegXMM xmm6(6);
constexpr RegXMM xmm7(7);
constexpr RegXMM xmm8(8);
constexpr RegXMM xmm9(9);
constexpr RegXMM xmm10(10);
constexpr RegXMM xmm11(11);
constexpr RegXMM xmm12(12);
constexpr RegXMM xmm13(13);
constexpr RegXMM xmm14(14);
constexpr RegXMM xmm15(15);
#define X(x) if (r == x) return "%"#x
inline const char* regname(Reg64 r) {
X(rax); X(rbx); X(rcx); X(rdx); X(rsp); X(rbp); X(rsi); X(rdi);
X(r8); X(r9); X(r10); X(r11); X(r12); X(r13); X(r14); X(r15);
return nullptr;
}
inline const char* regname(Reg32 r) {
X(eax); X(ecx); X(edx); X(ebx); X(esp); X(ebp); X(esi); X(edi);
X(r8d); X(r9d); X(r10d); X(r11d); X(r12d); X(r13d); X(r14d); X(r15d);
return nullptr;
}
inline const char* regname(Reg16 r) {
X(ax); X(cx); X(dx); X(bx); X(sp); X(bp); X(si); X(di);
X(r8w); X(r9w); X(r10w); X(r11w); X(r12w); X(r13w); X(r14w); X(r15w);
return nullptr;
}
inline const char* regname(Reg8 r) {
X(al); X(cl); X(dl); X(bl); X(spl); X(bpl); X(sil); X(dil);
X(r8b); X(r9b); X(r10b); X(r11b); X(r12b); X(r13b); X(r14b); X(r15b);
X(ah); X(ch); X(dh); X(bh);
return nullptr;
}
inline const char* regname(RegXMM r) {
X(xmm0); X(xmm1); X(xmm2); X(xmm3); X(xmm4); X(xmm5); X(xmm6);
X(xmm7); X(xmm8); X(xmm9); X(xmm10); X(xmm11); X(xmm12); X(xmm13);
X(xmm14); X(xmm15);
return nullptr;
}
inline const char* regname(RegSF r) {
return "%flags";
}
#undef X
}
//////////////////////////////////////////////////////////////////////
enum X64InstrFlags {
IF_REVERSE = 0x0001, // The operand encoding for some instructions are
// "backwards" in x64; these instructions are
// called "reverse" instructions. There are a few
// details about emitting "reverse" instructions:
// (1) for the R_M address mode, we use the MR
// opcode, (2) for M_R and R address modes, we use
// the RM opcode, and (3) for the R_R address mode,
// we still use MR opcode, but we have to swap the
// first argument and the second argument.
IF_TWOBYTEOP = 0x0002, // Some instructions have two byte opcodes. For
// these instructions, an additional byte (0x0F) is
// emitted before the standard opcode byte.
IF_JCC = 0x0004, // instruction is jcc
IF_IMUL = 0x0008, // instruction is imul
IF_HAS_IMM8 = 0x0010, // instruction has an encoding that takes an 8-bit
// immediate
IF_SHIFT = 0x0020, // instruction is rol, ror, rcl, rcr, shl, shr, sar
IF_RET = 0x0040, // instruction is ret
IF_SHIFTD = 0x0080, // instruction is shld, shrd
IF_NO_REXW = 0x0100, // rexW prefix is not needed
IF_MOV = 0x0200, // instruction is mov
IF_COMPACTR = 0x0400, // instruction supports compact-R encoding
IF_RAX = 0x0800, // instruction supports special rax encoding
IF_XCHG = 0x1000, // instruction is xchg (not xchgb)
IF_BYTEREG = 0x2000, // instruction is movzbq, movsbq
IF_66PREFIXED = 0x4000, // instruction requires a manditory 0x66 prefix
IF_F3PREFIXED = 0x8000, // instruction requires a manditory 0xf3 prefix
IF_F2PREFIXED = 0x10000, // instruction requires a manditory 0xf2 prefix
IF_THREEBYTEOP = 0x20000, // instruction requires a 0x0F 0x3A prefix
IF_ROUND = 0x40000, // instruction is round(sp)d
};
/*
Address mode to table index map:
Table index 0 <- R_R / M_R(n) / R_M(r) / R(n)
Table index 1 <- R_M(n) / M_R(r) / R(r)
Table index 2 <- I / R_I / M_I / R_R_I / M_R_I / R_M_I
Table index 3 <- "/digit" value used by the above address modes
Table index 4 <- special R_I (for rax)
Table index 5 <- compact-R / none
(n) - for normal instructions only (IF_REVERSE flag is not set)
(r) - for reverse instructions only (IF_REVERSE flag is set)
0xF1 is used to indicate invalid opcodes.
*/
struct X64Instr {
unsigned char table[6];
unsigned long flags;
};
// 0 1 2 3 4 5 flags
const X64Instr instr_divsd = { { 0x5E,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
const X64Instr instr_movups = { { 0x10,0x11,0xF1,0x00,0xF1,0xF1 }, 0x0103 };
const X64Instr instr_movdqa = { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x4103 };
const X64Instr instr_movdqu = { { 0x6F,0x7F,0xF1,0x00,0xF1,0xF1 }, 0x8103 };
const X64Instr instr_movsd = { { 0x11,0x10,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
const X64Instr instr_gpr2xmm = { { 0x6e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
const X64Instr instr_xmm2gpr = { { 0x7e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4002 };
const X64Instr instr_xmmsub = { { 0x5c,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
const X64Instr instr_xmmadd = { { 0x58,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
const X64Instr instr_xmmmul = { { 0x59,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
const X64Instr instr_xmmsqrt = { { 0x51,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10102 };
const X64Instr instr_ucomisd = { { 0x2e,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4102 };
const X64Instr instr_pxor= { { 0xef,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4102 };
const X64Instr instr_psrlq= { { 0xF1,0xF1,0x73,0x02,0xF1,0xF1 }, 0x4112 };
const X64Instr instr_psllq= { { 0xF1,0xF1,0x73,0x06,0xF1,0xF1 }, 0x4112 };
const X64Instr instr_cvtsi2sd= { { 0x2a,0x2a,0xF1,0x00,0xF1,0xF1 }, 0x10002 };
const X64Instr instr_cvttsd2si={ { 0x2c,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10002 };
const X64Instr instr_lddqu = { { 0xF0,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x10103 };
const X64Instr instr_unpcklpd ={ { 0x14,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x4102 };
const X64Instr instr_jmp = { { 0xFF,0xF1,0xE9,0x04,0xE9,0xF1 }, 0x0910 };
const X64Instr instr_call = { { 0xFF,0xF1,0xE8,0x02,0xE8,0xF1 }, 0x0900 };
const X64Instr instr_push = { { 0xFF,0xF1,0x68,0x06,0xF1,0x50 }, 0x0510 };
const X64Instr instr_pop = { { 0x8F,0xF1,0xF1,0x00,0xF1,0x58 }, 0x0500 };
const X64Instr instr_inc = { { 0xFF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_dec = { { 0xFF,0xF1,0xF1,0x01,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_not = { { 0xF7,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_notb = { { 0xF6,0xF1,0xF1,0x02,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_neg = { { 0xF7,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_negb = { { 0xF6,0xF1,0xF1,0x03,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_add = { { 0x01,0x03,0x81,0x00,0x05,0xF1 }, 0x0810 };
const X64Instr instr_addb = { { 0x00,0x02,0x80,0x00,0x04,0xF1 }, 0x0810 };
const X64Instr instr_sub = { { 0x29,0x2B,0x81,0x05,0x2D,0xF1 }, 0x0810 };
const X64Instr instr_subb = { { 0x28,0x2A,0x80,0x05,0x2C,0xF1 }, 0x0810 };
const X64Instr instr_and = { { 0x21,0x23,0x81,0x04,0x25,0xF1 }, 0x0810 };
const X64Instr instr_andb = { { 0x20,0x22,0x80,0x04,0x24,0xF1 }, 0x0810 };
const X64Instr instr_or = { { 0x09,0x0B,0x81,0x01,0x0D,0xF1 }, 0x0810 };
const X64Instr instr_orb = { { 0x08,0x0A,0x80,0x01,0x0C,0xF1 }, 0x0810 };
const X64Instr instr_xor = { { 0x31,0x33,0x81,0x06,0x35,0xF1 }, 0x0810 };
const X64Instr instr_xorb = { { 0x30,0x32,0x80,0x06,0x34,0xF1 }, 0x0810 };
const X64Instr instr_mov = { { 0x89,0x8B,0xC7,0x00,0xF1,0xB8 }, 0x0600 };
const X64Instr instr_movb = { { 0x88,0x8A,0xC6,0x00,0xF1,0xB0 }, 0x0610 };
const X64Instr instr_test = { { 0x85,0x85,0xF7,0x00,0xA9,0xF1 }, 0x0800 };
const X64Instr instr_testb = { { 0x84,0x84,0xF6,0x00,0xA8,0xF1 }, 0x0810 };
const X64Instr instr_cmp = { { 0x39,0x3B,0x81,0x07,0x3D,0xF1 }, 0x0810 };
const X64Instr instr_cmpb = { { 0x38,0x3A,0x80,0x07,0x3C,0xF1 }, 0x0810 };
const X64Instr instr_sbb = { { 0x19,0x1B,0x81,0x03,0x1D,0xF1 }, 0x0810 };
const X64Instr instr_sbbb = { { 0x18,0x1A,0x80,0x03,0x1C,0xF1 }, 0x0810 };
const X64Instr instr_adc = { { 0x11,0x13,0x81,0x02,0x15,0xF1 }, 0x0810 };
const X64Instr instr_lea = { { 0xF1,0x8D,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_xchgb = { { 0x86,0x86,0xF1,0x00,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_xchg = { { 0x87,0x87,0xF1,0x00,0xF1,0x90 }, 0x1000 };
const X64Instr instr_imul = { { 0xAF,0xF7,0x69,0x05,0xF1,0xF1 }, 0x0019 };
const X64Instr instr_mul = { { 0xF7,0xF1,0xF1,0x04,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_div = { { 0xF7,0xF1,0xF1,0x06,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_idiv = { { 0xF7,0xF1,0xF1,0x07,0xF1,0xF1 }, 0x0000 };
const X64Instr instr_cdq = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x99 }, 0x0400 };
const X64Instr instr_ret = { { 0xF1,0xF1,0xC2,0x00,0xF1,0xC3 }, 0x0540 };
const X64Instr instr_jcc = { { 0xF1,0xF1,0x80,0x00,0xF1,0xF1 }, 0x0114 };
const X64Instr instr_cmovcc = { { 0x40,0x40,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
const X64Instr instr_setcc = { { 0x90,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0102 };
const X64Instr instr_movswx = { { 0xBF,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
const X64Instr instr_movsbx = { { 0xBE,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
const X64Instr instr_movzwx = { { 0xB7,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0003 };
const X64Instr instr_movzbx = { { 0xB6,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x2003 };
const X64Instr instr_cwde = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x98 }, 0x0400 };
const X64Instr instr_cqo = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x99 }, 0x0000 };
const X64Instr instr_rol = { { 0xD3,0xF1,0xC1,0x00,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_ror = { { 0xD3,0xF1,0xC1,0x01,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_rcl = { { 0xD3,0xF1,0xC1,0x02,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_rcr = { { 0xD3,0xF1,0xC1,0x03,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_shl = { { 0xD3,0xF1,0xC1,0x04,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_shr = { { 0xD3,0xF1,0xC1,0x05,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_sar = { { 0xD3,0xF1,0xC1,0x07,0xF1,0xF1 }, 0x0020 };
const X64Instr instr_xadd = { { 0xC1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
const X64Instr instr_cmpxchg = { { 0xB1,0xF1,0xF1,0x00,0xF1,0xF1 }, 0x0002 };
const X64Instr instr_nop = { { 0xF1,0xF1,0xF1,0x00,0xF1,0x90 }, 0x0500 };
const X64Instr instr_shld = { { 0xA5,0xF1,0xA4,0x00,0xF1,0xF1 }, 0x0082 };
const X64Instr instr_shrd = { { 0xAD,0xF1,0xAC,0x00,0xF1,0xF1 }, 0x0082 };
const X64Instr instr_int3 = { { 0xF1,0xF1,0xF1,0x00,0xF1,0xCC }, 0x0500 };
const X64Instr instr_roundsd = { { 0xF1,0xF1,0x0b,0x00,0xF1,0xF1 }, 0x64112 };
const X64Instr instr_cmpsd = { { 0xF1,0xF1,0xC2,0xF1,0xF1,0xF1 }, 0x10112 };
enum class RoundDirection : ssize_t {
nearest = 0,
floor = 1,
ceil = 2,
truncate = 3,
};
const char* show(RoundDirection);
enum class ComparisonPred : uint8_t {
// True if...
eq_ord = 0, // ...operands are ordered AND equal
ne_unord = 4, // ...operands are unordered OR unequal
};
enum ConditionCode {
CC_None = -1,
CC_O = 0x00,
CC_NO = 0x01,
CC_B = 0x02,
CC_NAE = 0x02,
CC_AE = 0x03,
CC_NB = 0x03,
CC_NC = 0x03,
CC_E = 0x04,
CC_Z = 0x04,
CC_NE = 0x05,
CC_NZ = 0x05,
CC_BE = 0x06,
CC_NA = 0x06,
CC_A = 0x07,
CC_NBE = 0x07,
CC_S = 0x08,
CC_NS = 0x09,
CC_P = 0x0A,
CC_NP = 0x0B,
CC_L = 0x0C,
CC_NGE = 0x0C,
CC_GE = 0x0D,
CC_NL = 0x0D,
CC_LE = 0x0E,
CC_NG = 0x0E,
CC_G = 0x0F,
CC_NLE = 0x0F,
};
// names of condition codes, indexable by the ConditionCode enum value.
extern const char* cc_names[];
inline ConditionCode ccNegate(ConditionCode c) {
return ConditionCode(int(c) ^ 1); // And you thought x86 was irregular!
}
///////////////////////////////////////////////////////////////////////////////
struct Label;
/**
* Copyright (c) 2009, Andrew J. Paroski
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * The names of the contributors may not be used to endorse or promote
* products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL ANDREW J. PAROSKI BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
struct X64Assembler {
private:
friend struct Label;
/*
* Type for register numbers, independent of the size we're going to
* be using it as. Also, the same register number may mean different
* physical registers for different instructions (e.g. xmm0 and rax
* are both 0). Only for internal use in X64Assembler.
*/
enum class RegNumber : int {};
static const RegNumber noreg = RegNumber(-1);
public:
explicit X64Assembler(CodeBlock& cb) : codeBlock(cb) {}
X64Assembler(const X64Assembler&) = delete;
X64Assembler& operator=(const X64Assembler&) = delete;
CodeBlock& code() const { return codeBlock; }
CodeAddress base() const {
return codeBlock.base();
}
CodeAddress frontier() const {
return codeBlock.frontier();
}
CodeAddress toDestAddress(CodeAddress addr) const {
return codeBlock.toDestAddress(addr);
}
void setFrontier(CodeAddress newFrontier) {
codeBlock.setFrontier(newFrontier);
}
size_t capacity() const {
return codeBlock.capacity();
}
size_t used() const {
return codeBlock.used();
}
size_t available() const {
return codeBlock.available();
}
bool contains(CodeAddress addr) const {
return codeBlock.contains(addr);
}
bool empty() const {
return codeBlock.empty();
}
void clear() {
codeBlock.clear();
}
bool canEmit(size_t nBytes) const {
assert(capacity() >= used());
return nBytes < (capacity() - used());
}
/*
* The following section defines the main interface for emitting
* x64.
*
* Simple Examples:
*
* a. movq (rax, rbx); // order is AT&T: src, dest
* a. loadq (*rax, rbx); // loads from *rax
* a. loadq (rax[0], rbx); // also loads from *rax
* a. storeq (rcx, rax[0xc]); // store to rax + 0xc
* a. addq (0x1, rbx); // increment rbx
*
* Addressing with index registers:
*
* a. movl (index, ecx);
* a. loadq (*rax, rbx);
* a. storeq (rbx, rbx[rcx*8]);
* a. call (rax); // indirect call
*
*/
#define BYTE_LOAD_OP(name, instr) \
void name##b(MemoryRef m, Reg8 r) { instrMR(instr, m, r); } \
#define LOAD_OP(name, instr) \
void name##q(MemoryRef m, Reg64 r) { instrMR(instr, m, r); } \
void name##l(MemoryRef m, Reg32 r) { instrMR(instr, m, r); } \
void name##w(MemoryRef m, Reg16 r) { instrMR(instr, m, r); } \
void name##q(RIPRelativeRef m, Reg64 r) { instrMR(instr, m, r); } \
BYTE_LOAD_OP(name, instr##b)
#define BYTE_STORE_OP(name, instr) \
void name##b(Reg8 r, MemoryRef m) { instrRM(instr, r, m); } \
void name##b(Immed i, MemoryRef m) { instrIM8(instr, i, m); } \
#define STORE_OP(name, instr) \
void name##w(Immed i, MemoryRef m) { instrIM16(instr, i, m); } \
void name##l(Immed i, MemoryRef m) { instrIM32(instr, i, m); } \
void name##w(Reg16 r, MemoryRef m) { instrRM(instr, r, m); } \
void name##l(Reg32 r, MemoryRef m) { instrRM(instr, r, m); } \
void name##q(Reg64 r, MemoryRef m) { instrRM(instr, r, m); } \
BYTE_STORE_OP(name, instr ## b)
#define BYTE_REG_OP(name, instr) \
void name##b(Reg8 r1, Reg8 r2) { instrRR(instr, r1, r2); } \
void name##b(Immed i, Reg8 r) { instrIR(instr, i, r); } \
#define REG_OP(name, instr) \
void name##q(Reg64 r1, Reg64 r2) { instrRR(instr, r1, r2); } \
void name##l(Reg32 r1, Reg32 r2) { instrRR(instr, r1, r2); } \
void name##w(Reg16 r1, Reg16 r2) { instrRR(instr, r1, r2); } \
void name##l(Immed i, Reg32 r) { instrIR(instr, i, r); } \
void name##w(Immed i, Reg16 r) { instrIR(instr, i, r); } \
BYTE_REG_OP(name, instr##b)
/*
* For when we a have a memory operand and the operand size is
* 64-bits, only a 32-bit (sign-extended) immediate is supported.
*/
#define IMM64_STORE_OP(name, instr) \
void name##q(Immed i, MemoryRef m) { \
return instrIM(instr, i, m); \
}
/*
* For instructions other than movq, even when the operand size is
* 64 bits only a 32-bit (sign-extended) immediate is supported.
*/
#define IMM64R_OP(name, instr) \
void name##q(Immed imm, Reg64 r) { \
always_assert(imm.fits(sz::dword)); \
return instrIR(instr, imm, r); \
}
#define FULL_OP(name, instr) \
LOAD_OP(name, instr) \
STORE_OP(name, instr) \
REG_OP(name, instr) \
IMM64_STORE_OP(name, instr) \
IMM64R_OP(name, instr)
// We rename x64's mov to store and load for improved code
// readability.
LOAD_OP (load, instr_mov)
STORE_OP (store, instr_mov)
IMM64_STORE_OP (store, instr_mov)
REG_OP (mov, instr_mov)
FULL_OP(add, instr_add)
FULL_OP(xor, instr_xor)
FULL_OP(sub, instr_sub)
FULL_OP(and, instr_and)
FULL_OP(or, instr_or)
FULL_OP(test,instr_test)
FULL_OP(cmp, instr_cmp)
FULL_OP(sbb, instr_sbb)
#undef IMM64_OP
#undef IMM64R_OP
#undef FULL_OP
#undef REG_OP
#undef STORE_OP
#undef LOAD_OP
#undef BYTE_LOAD_OP
#undef BYTE_STORE_OP
#undef BYTE_REG_OP
// 64-bit immediates work with mov to a register.
void movq(Immed64 imm, Reg64 r) { instrIR(instr_mov, imm, r); }
// movzbx is a special snowflake. We don't have movzbq because it behaves
// exactly the same as movzbl but takes an extra byte.
void loadzbl(MemoryRef m, Reg32 r) { instrMR(instr_movzbx,
m, rbyte(r)); }
void movzbl(Reg8 src, Reg32 dest) { emitRR32(instr_movzbx,
rn(src), rn(dest)); }
void movsbl(Reg8 src, Reg32 dest) { emitRR(instr_movsbx,
rn(src), rn(dest)); }
void movzwl(Reg16 src, Reg32 dest) { emitRR32(instr_movzwx,
rn(src), rn(dest)); }
void loadsbq(MemoryRef m, Reg64 r) { instrMR(instr_movsbx,
m, r); }
void movsbq(Reg8 src, Reg64 dest) { emitRR(instr_movsbx,
rn(src), rn(dest)); }
void lea(MemoryRef p, Reg64 reg) { instrMR(instr_lea, p, reg); }
void lea(RIPRelativeRef p, Reg64 reg) { instrMR(instr_lea, p, reg); }
void xchgq(Reg64 r1, Reg64 r2) { instrRR(instr_xchg, r1, r2); }
void xchgl(Reg32 r1, Reg32 r2) { instrRR(instr_xchg, r1, r2); }
void xchgb(Reg8 r1, Reg8 r2) { instrRR(instr_xchgb, r1, r2); }
void imul(Reg64 r1, Reg64 r2) { instrRR(instr_imul, r1, r2); }
void push(Reg64 r) { instrR(instr_push, r); }
void pushl(Reg32 r) { instrR(instr_push, r); }
void pop (Reg64 r) { instrR(instr_pop, r); }
void idiv(Reg64 r) { instrR(instr_idiv, r); }
void incq(Reg64 r) { instrR(instr_inc, r); }
void incl(Reg32 r) { instrR(instr_inc, r); }
void incw(Reg16 r) { instrR(instr_inc, r); }
void decq(Reg64 r) { instrR(instr_dec, r); }
void decl(Reg32 r) { instrR(instr_dec, r); }
void decw(Reg16 r) { instrR(instr_dec, r); }
void notb(Reg8 r) { instrR(instr_notb, r); }
void not(Reg64 r) { instrR(instr_not, r); }
void neg(Reg64 r) { instrR(instr_neg, r); }
void negb(Reg8 r) { instrR(instr_negb, r); }
void ret() { emit(instr_ret); }
void ret(Immed i) { emitI(instr_ret, i.w(), sz::word); }
void cqo() { emit(instr_cqo); }
void nop() { emit(instr_nop); }
void int3() { emit(instr_int3); }
void ud2() { byte(0x0f); byte(0x0b); }
void pushf() { byte(0x9c); }
void popf() { byte(0x9d); }
void lock() { byte(0xF0); }
void push(MemoryRef m) { instrM(instr_push, m); }
void pop (MemoryRef m) { instrM(instr_pop, m); }
void incq(MemoryRef m) { instrM(instr_inc, m); }
void incl(MemoryRef m) { instrM32(instr_inc, m); }
void incw(MemoryRef m) { instrM16(instr_inc, m); }
void decq(MemoryRef m) { instrM(instr_dec, m); }
void decl(MemoryRef m) { instrM32(instr_dec, m); }
void decw(MemoryRef m) { instrM16(instr_dec, m); }
void push(Immed64 i) { emitI(instr_push, i.q()); }
void movups(RegXMM x, MemoryRef m) { instrRM(instr_movups, x, m); }
void movups(MemoryRef m, RegXMM x) { instrMR(instr_movups, m, x); }
void movdqu(RegXMM x, MemoryRef m) { instrRM(instr_movdqu, x, m); }
void movdqu(MemoryRef m, RegXMM x) { instrMR(instr_movdqu, m, x); }
void movdqa(RegXMM x, RegXMM y) { instrRR(instr_movdqa, x, y); }
void movdqa(RegXMM x, MemoryRef m) { instrRM(instr_movdqa, x, m); }
void movdqa(MemoryRef m, RegXMM x) { instrMR(instr_movdqa, m, x); }
void movsd (RegXMM x, RegXMM y) { instrRR(instr_movsd, x, y); }
void movsd (RegXMM x, MemoryRef m) { instrRM(instr_movsd, x, m); }
void movsd (MemoryRef m, RegXMM x) { instrMR(instr_movsd, m, x); }
void movsd (RIPRelativeRef m, RegXMM x) { instrMR(instr_movsd, m, x); }
void lddqu (MemoryRef m, RegXMM x) { instrMR(instr_lddqu, m, x); }
void unpcklpd(RegXMM s, RegXMM d) { instrRR(instr_unpcklpd, d, s); }
void rorq (Immed i, Reg64 r) { instrIR(instr_ror, i, r); }
void shlq (Immed i, Reg64 r) { instrIR(instr_shl, i, r); }
void shrq (Immed i, Reg64 r) { instrIR(instr_shr, i, r); }
void sarq (Immed i, Reg64 r) { instrIR(instr_sar, i, r); }
void shll (Immed i, Reg32 r) { instrIR(instr_shl, i, r); }
void shrl (Immed i, Reg32 r) { instrIR(instr_shr, i, r); }
void shlw (Immed i, Reg16 r) { instrIR(instr_shl, i, r); }
void shrw (Immed i, Reg16 r) { instrIR(instr_shr, i, r); }
void shlq (Reg64 r) { instrR(instr_shl, r); }
void sarq (Reg64 r) { instrR(instr_sar, r); }
void roundsd (RoundDirection d, RegXMM src, RegXMM dst) {
emitIRR(instr_roundsd, rn(dst), rn(src), ssize_t(d));
}
void cmpsd(RegXMM src, RegXMM dst, ComparisonPred pred) {
emitIRR(instr_cmpsd, rn(dst), rn(src), ssize_t(pred));
}
/*
* Control-flow directives. Primitive labeling/patching facilities
* are available, as well as slightly higher-level ones via the
* Label class.
*/
bool jmpDeltaFits(CodeAddress dest) {
int64_t delta = dest - (codeBlock.frontier() + 5);
return deltaFits(delta, sz::dword);
}
void jmp(Reg64 r) { instrR(instr_jmp, r); }
void jmp(MemoryRef m) { instrM(instr_jmp, m); }
void jmp(RIPRelativeRef m) { instrM(instr_jmp, m); }
void call(Reg64 r) { instrR(instr_call, r); }
void call(MemoryRef m) { instrM(instr_call, m); }
void call(RIPRelativeRef m) { instrM(instr_call, m); }
void jmp8(CodeAddress dest) { emitJ8(instr_jmp, ssize_t(dest)); }
void jmp(CodeAddress dest) {
always_assert_flog(dest && jmpDeltaFits(dest), "Bad Jmp: {}", dest);
emitJ32(instr_jmp, ssize_t(dest));
}
void call(CodeAddress dest) {
always_assert(dest && jmpDeltaFits(dest));
emitJ32(instr_call, ssize_t(dest));
}
void jcc(ConditionCode cond, CodeAddress dest) {
emitCJ32(instr_jcc, cond, (ssize_t)dest);
}
void jcc8(ConditionCode cond, CodeAddress dest) {
emitCJ8(instr_jcc, cond, (ssize_t)dest);
}
void jmpAuto(CodeAddress dest) {
auto delta = dest - (codeBlock.frontier() + 2);
if (deltaFits(delta, sz::byte)) {
jmp8(dest);
} else {
jmp(dest);
}
}
void jccAuto(ConditionCode cc, CodeAddress dest) {
auto delta = dest - (codeBlock.frontier() + 2);
if (deltaFits(delta, sz::byte)) {
jcc8(cc, dest);
} else {
jcc(cc, dest);
}
}
void call(Label&);
void jmp(Label&);
void jmp8(Label&);
void jcc(ConditionCode, Label&);
void jcc8(ConditionCode, Label&);
#define CCS \
CC(o, CC_O) \
CC(no, CC_NO) \
CC(nae, CC_NAE) \
CC(ae, CC_AE) \
CC(nb, CC_NB) \
CC(e, CC_E) \
CC(z, CC_Z) \
CC(ne, CC_NE) \
CC(nz, CC_NZ) \
CC(b, CC_B) \
CC(be, CC_BE) \
CC(nbe, CC_NBE) \
CC(s, CC_S) \
CC(ns, CC_NS) \
CC(p, CC_P) \
CC(np, CC_NP) \
CC(nge, CC_NGE) \
CC(g, CC_G) \
CC(l, CC_L) \
CC(ge, CC_GE) \
CC(nl, CC_NL) \
CC(ng, CC_NG) \
CC(le, CC_LE) \
CC(nle, CC_NLE)
#define CC(_nm, _code) \
void j ## _nm(CodeAddress dest) { jcc(_code, dest); } \
void j ## _nm ## 8(CodeAddress dest) { jcc8(_code, dest); } \
void j ## _nm(Label&); \
void j ## _nm ## 8(Label&);
CCS
#undef CC
void setcc(int cc, Reg8 byteReg) {
emitCR(instr_setcc, cc, rn(byteReg), sz::byte);
}
#define CC(_nm, _cond) \
void set ## _nm(Reg8 byteReg) { \
setcc(_cond, byteReg); \
}
CCS
#undef CC
void psllq(Immed i, RegXMM r) { emitIR(instr_psllq, rn(r), i.b()); }
void psrlq(Immed i, RegXMM r) { emitIR(instr_psrlq, rn(r), i.b()); }
void movq_rx(Reg64 rSrc, RegXMM rdest) {
emitRR(instr_gpr2xmm, rn(rdest), rn(rSrc));
}
void movq_xr(RegXMM rSrc, Reg64 rdest) {
emitRR(instr_xmm2gpr, rn(rSrc), rn(rdest));
}
void addsd(RegXMM src, RegXMM srcdest) {
emitRR(instr_xmmadd, rn(srcdest), rn(src));
}
void mulsd(RegXMM src, RegXMM srcdest) {
emitRR(instr_xmmmul, rn(srcdest), rn(src));
}
void subsd(RegXMM src, RegXMM srcdest) {
emitRR(instr_xmmsub, rn(srcdest), rn(src));
}
void pxor(RegXMM src, RegXMM srcdest) {
emitRR(instr_pxor, rn(srcdest), rn(src));
}
void cvtsi2sd(Reg64 src, RegXMM dest) {
emitRR(instr_cvtsi2sd, rn(dest), rn(src));
}
void cvtsi2sd(MemoryRef m, RegXMM dest) {
instrMR(instr_cvtsi2sd, m, dest);
}
void ucomisd(RegXMM l, RegXMM r) {
emitRR(instr_ucomisd, rn(l), rn(r));
}
void sqrtsd(RegXMM src, RegXMM dest) {
emitRR(instr_xmmsqrt, rn(dest), rn(src));
}
void divsd(RegXMM src, RegXMM srcdest) {
emitRR(instr_divsd, rn(srcdest), rn(src));
}
void cvttsd2siq(RegXMM src, Reg64 dest) {
emitRR(instr_cvttsd2si, rn(dest), rn(src));
}
/*
* The following utility functions do more than emit specific code.
* (E.g. combine common idioms or patterns, smash code, etc.)
*/
void emitImmReg(Immed64 imm, Reg64 dest) {
if (imm.q() == 0) {
// Zeros the top bits also.
xorl (r32(dest), r32(dest));
return;
}
if (LIKELY(imm.q() > 0 && imm.fits(sz::dword))) {
// This will zero out the high-order bits.
movl (imm.l(), r32(dest));
return;
}
movq (imm.q(), dest);
}
static void patchJcc(CodeAddress jmp, CodeAddress from, CodeAddress dest) {
assert(jmp[0] == 0x0F && (jmp[1] & 0xF0) == 0x80);
ssize_t diff = dest - (from + 6);
*(int32_t*)(jmp + 2) = safe_cast<int32_t>(diff);
}
static void patchJcc8(CodeAddress jmp, CodeAddress from, CodeAddress dest) {
assert((jmp[0] & 0xF0) == 0x70);
ssize_t diff = dest - (from + 2); // one for opcode, one for offset
*(int8_t*)(jmp + 1) = safe_cast<int8_t>(diff);
}
static void patchJmp(CodeAddress jmp, CodeAddress from, CodeAddress dest) {
assert(jmp[0] == 0xE9);
ssize_t diff = dest - (from + 5);
*(int32_t*)(jmp + 1) = safe_cast<int32_t>(diff);
}
static void patchJmp8(CodeAddress jmp, CodeAddress from, CodeAddress dest) {
assert(jmp[0] == 0xEB);
ssize_t diff = dest - (from + 2); // one for opcode, one for offset
*(int8_t*)(jmp + 1) = safe_cast<int8_t>(diff);
}
static void patchCall(CodeAddress call, CodeAddress from, CodeAddress dest) {
assert(call[0] == 0xE8);
ssize_t diff = dest - (from + 5);
*(int32_t*)(call + 1) = safe_cast<int32_t>(diff);
}
void emitInt3s(int n) {
for (auto i = 0; i < n; ++i) {
byte(0xcc);
}
}
void emitNop(int n) {
if (n == 0) return;
static const uint8_t nops[][9] = {
{ },
{ 0x90 },
{ 0x66, 0x90 },
{ 0x0f, 0x1f, 0x00 },
{ 0x0f, 0x1f, 0x40, 0x00 },
{ 0x0f, 0x1f, 0x44, 0x00, 0x00 },
{ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00 },
{ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00 },
{ 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
{ 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00 },
};
// While n >= 9, emit 9 byte NOPs
while (n >= 9) {
bytes(9, nops[9]);
n -= 9;
}
bytes(n, nops[n]);
}
/*
* Low-level emitter functions.
*
* These functions are the core of the assembler, and can also be
* used directly.
*/
void byte(uint8_t b) {
codeBlock.byte(b);
}
void word(uint16_t w) {
codeBlock.word(w);
}
void dword(uint32_t dw) {
codeBlock.dword(dw);
}
void qword(uint64_t qw) {
codeBlock.qword(qw);
}
void bytes(size_t n, const uint8_t* bs) {
codeBlock.bytes(n, bs);
}
// op %r
// ------
// Restrictions:
// r cannot be set to 'none'
ALWAYS_INLINE
void emitCR(X64Instr op, int jcond, RegNumber regN, int opSz = sz::qword) {
assert(regN != noreg);
int r = int(regN);
// Opsize prefix
if (opSz == sz::word) {
byte(kOpsizePrefix);
}
// REX
unsigned char rex = 0;
bool highByteReg = false;
if (opSz == sz::byte) {
if (byteRegNeedsRex(r)) {
rex |= 0x40;
}
r = byteRegEncodeNumber(r, highByteReg);
}
if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
if (r & 8) rex |= 1;
if (rex) {
byte(0x40 | rex);
if (highByteReg) byteRegMisuse();
}
// If the instruction supports compact-R mode, use that
if (op.flags & IF_COMPACTR) {
byte(op.table[5] | (r & 7));
return;
}
char opcode = (op.flags & IF_REVERSE) ? op.table[1] : op.table[0];
char rval = op.table[3];
// Handle two byte opcodes
if (op.flags & IF_TWOBYTEOP) byte(0x0F);
byte(opcode | jcond);
emitModrm(3, rval, r);
}
ALWAYS_INLINE
void emitR(X64Instr op, RegNumber r, int opSz = sz::qword) {
emitCR(op, 0, r, opSz);
}
ALWAYS_INLINE
void emitR32(X64Instr op, RegNumber r) {
emitCR(op, 0, r, sz::dword);
}
ALWAYS_INLINE
void emitR16(X64Instr op, RegNumber r) {
emitCR(op, 0, r, sz::word);
}
// op %r2, %r1
// -----------
// Restrictions:
// r1 cannot be set to noreg
// r2 cannot be set to noreg
ALWAYS_INLINE
void emitCRR(X64Instr op, int jcond, RegNumber rn1, RegNumber rn2,
int opSz = sz::qword) {
assert(rn1 != noreg && rn2 != noreg);
int r1 = int(rn1);
int r2 = int(rn2);
bool reverse = ((op.flags & IF_REVERSE) != 0);
prefixBytes(op.flags, opSz);
// The xchg instruction is special; we have compact encodings for
// exchanging with rax or eax.
if (op.flags & IF_XCHG) {
if (r1 == int(reg::rax)) {
// REX
unsigned char rex = 0;
if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
assert(!(op.flags & IF_BYTEREG));
if (r2 & 8) rex |= (reverse ? 4 : 1);
if (rex) byte(0x40 | rex);
// If the second register is rax, emit opcode with the first
// register id embedded
byte(op.table[5] | (r2 & 7));
return;
} else if (r2 == int(reg::rax)) {
reverse = !reverse;
// REX
unsigned char rex = 0;
if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) {
rex |= 8;
}
if (r1 & 8) rex |= (reverse ? 1 : 4);
if (rex) byte(0x40 | rex);
// If the first register is rax, emit opcode with the second
// register id embedded
byte(op.table[5] | (r1 & 7));
return;
}
}
// REX
unsigned char rex = 0;
if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
bool highByteReg = false;
// movzbx's first operand is a bytereg regardless of operand size
if (opSz == sz::byte || (op.flags & IF_BYTEREG)) {
if (byteRegNeedsRex(r1) ||
(!(op.flags & IF_BYTEREG) && byteRegNeedsRex(r2))) {
rex |= 0x40;
}
r1 = byteRegEncodeNumber(r1, highByteReg);
r2 = byteRegEncodeNumber(r2, highByteReg);
}
if (r1 & 8) rex |= (reverse ? 1 : 4);
if (r2 & 8) rex |= (reverse ? 4 : 1);
if (rex) {
byte(0x40 | rex);
if (highByteReg) byteRegMisuse();
}
// For two byte opcodes
if ((op.flags & (IF_TWOBYTEOP | IF_IMUL)) != 0) byte(0x0F);
byte(op.table[0] | jcond);
if (reverse) {
emitModrm(3, r2, r1);
} else {
emitModrm(3, r1, r2);
}
}
ALWAYS_INLINE
void emitCRR32(X64Instr op, int jcond, RegNumber r1, RegNumber r2) {
emitCRR(op, jcond, r1, r2, sz::dword);
}
ALWAYS_INLINE
void emitRR(X64Instr op, RegNumber r1, RegNumber r2, int opSz = sz::qword) {
emitCRR(op, 0, r1, r2, opSz);
}
ALWAYS_INLINE
void emitRR32(X64Instr op, RegNumber r1, RegNumber r2) {
emitCRR(op, 0, r1, r2, sz::dword);
}
ALWAYS_INLINE
void emitRR16(X64Instr op, RegNumber r1, RegNumber r2) {
emitCRR(op, 0, r1, r2, sz::word);
}
ALWAYS_INLINE
void emitRR8(X64Instr op, RegNumber r1, RegNumber r2) {
emitCRR(op, 0, r1, r2, sz::byte);
}
// op $imm, %r
// -----------
// Restrictions:
// r cannot be set to noreg
ALWAYS_INLINE
void emitIR(X64Instr op, RegNumber rname, ssize_t imm,
int opSz = sz::qword) {
assert(rname != noreg);
int r = int(rname);
// Opsize prefix
prefixBytes(op.flags, opSz);
// Determine the size of the immediate. This might change opSz so
// do it first.
int immSize;
if ((op.flags & IF_MOV) && opSz == sz::qword) {
immSize = computeImmediateSizeForMovRI64(op, imm, opSz);
} else {
immSize = computeImmediateSize(op, imm, opSz);
}
// REX
unsigned char rex = 0;
bool highByteReg = false;
if (opSz == sz::byte) {
if (byteRegNeedsRex(r)) {
rex |= 0x40;
}
r = byteRegEncodeNumber(r, highByteReg);
}
if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
if (r & 8) rex |= 1;
if (rex) {
byte(0x40 | rex);
if (highByteReg) byteRegMisuse();
}
// Use the special rax encoding if the instruction supports it
if (r == int(reg::rax) && immSize == sz::dword &&
(op.flags & IF_RAX)) {
byte(op.table[4]);
emitImmediate(op, imm, immSize);
return;
}
// Use the compact-R encoding if the operand size and the immediate
// size are the same
if ((op.flags & IF_COMPACTR) && immSize == opSz) {
byte(op.table[5] | (r & 7));
emitImmediate(op, imm, immSize);
return;
}
// For two byte opcodes
if ((op.flags & (IF_TWOBYTEOP | IF_IMUL)) != 0) byte(0x0F);
int rval = op.table[3];
// shift/rotate instructions have special opcode when
// immediate is 1
if ((op.flags & IF_SHIFT) != 0 && imm == 1) {
byte(0xd1);
emitModrm(3, rval, r);
// don't emit immediate
return;
}
int opcode = (immSize == sz::byte && opSz != sz::byte) ?
(op.table[2] | 2) : op.table[2];
byte(opcode);
emitModrm(3, rval, r);
emitImmediate(op, imm, immSize);
}
ALWAYS_INLINE
void emitIR32(X64Instr op, RegNumber r, ssize_t imm) {
emitIR(op, r, imm, sz::dword);
}
ALWAYS_INLINE
void emitIR16(X64Instr op, RegNumber r, ssize_t imm) {
emitIR(op, r, safe_cast<int16_t>(imm), sz::word);
}
ALWAYS_INLINE
void emitIR8(X64Instr op, RegNumber r, ssize_t imm) {
emitIR(op, r, safe_cast<int8_t>(imm), sz::byte);
}
// op $imm, %r2, %r1
// -----------------
// Restrictions:
// r1 cannot be set to noreg
// r2 cannot be set to noreg
ALWAYS_INLINE
void emitIRR(X64Instr op, RegNumber rn1, RegNumber rn2, ssize_t imm,
int opSz = sz::qword) {
assert(rn1 != noreg && rn2 != noreg);
int r1 = int(rn1);
int r2 = int(rn2);
bool reverse = ((op.flags & IF_REVERSE) != 0);
// Opsize prefix
prefixBytes(op.flags, opSz);
// REX
unsigned char rex = 0;
if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
bool highByteReg = false;
if (opSz == sz::byte || (op.flags & IF_BYTEREG)) {
if (byteRegNeedsRex(r1) ||
(!(op.flags & IF_BYTEREG) && byteRegNeedsRex(r2))) {
rex |= 0x40;
}
r1 = byteRegEncodeNumber(r1, highByteReg);
r2 = byteRegEncodeNumber(r2, highByteReg);
}
if (r1 & 8) rex |= (reverse ? 1 : 4);
if (r2 & 8) rex |= (reverse ? 4 : 1);
if (rex) {
byte(0x40 | rex);
if (highByteReg) byteRegMisuse();
}
// Determine the size of the immediate
int immSize = computeImmediateSize(op, imm, opSz);
if (op.flags & IF_TWOBYTEOP || op.flags & IF_THREEBYTEOP) byte(0x0F);
if (op.flags & IF_THREEBYTEOP) byte(0x3a);
int opcode = (immSize == sz::byte && opSz != sz::byte &&
(op.flags & IF_ROUND) == 0) ?
(op.table[2] | 2) : op.table[2];
byte(opcode);
if (reverse) {
emitModrm(3, r2, r1);
} else {
emitModrm(3, r1, r2);
}
emitImmediate(op, imm, immSize);
}
ALWAYS_INLINE
void emitCI(X64Instr op, int jcond, ssize_t imm, int opSz = sz::qword) {
// Opsize prefix
prefixBytes(op.flags, opSz);
// REX
if ((op.flags & IF_NO_REXW) == 0) {
byte(0x48);
}
// Determine the size of the immediate
int immSize = computeImmediateSize(op, imm, opSz);
// Emit opcode
if ((op.flags & IF_JCC) != 0) {
// jcc is weird so we handle it separately
if (immSize != sz::byte) {
byte(0x0F);
byte(jcond | 0x80);
} else {
byte(jcond | 0x70);
}
} else {
int opcode = (immSize == sz::byte && opSz != sz::byte) ?
(op.table[2] | 2) : op.table[2];
byte(jcond | opcode);
}
emitImmediate(op, imm, immSize);
}
ALWAYS_INLINE
void emitI(X64Instr op, ssize_t imm, int opSz = sz::qword) {
emitCI(op, 0, imm, opSz);
}
ALWAYS_INLINE
void emitJ8(X64Instr op, ssize_t imm) {
assert((op.flags & IF_JCC) == 0);
ssize_t delta = imm - ((ssize_t)codeBlock.frontier() + 2);
// Emit opcode and 8-bit immediate
byte(0xEB);
byte(safe_cast<int8_t>(delta));
}
ALWAYS_INLINE
void emitCJ8(X64Instr op, int jcond, ssize_t imm) {
// this is for jcc only
assert(op.flags & IF_JCC);
ssize_t delta = imm - ((ssize_t)codeBlock.frontier() + 2);
// Emit opcode
byte(jcond | 0x70);
// Emit 8-bit offset
byte(safe_cast<int8_t>(delta));
}
ALWAYS_INLINE
void emitJ32(X64Instr op, ssize_t imm) {
// call and jmp are supported, jcc is not supported
assert((op.flags & IF_JCC) == 0);
int32_t delta =
safe_cast<int32_t>(imm - ((ssize_t)codeBlock.frontier() + 5));
uint8_t *bdelta = (uint8_t*)δ
uint8_t instr[] = { op.table[2],
bdelta[0], bdelta[1], bdelta[2], bdelta[3] };
bytes(5, instr);
}
ALWAYS_INLINE
void emitCJ32(X64Instr op, int jcond, ssize_t imm) {
// jcc is supported, call and jmp are not supported
assert(op.flags & IF_JCC);
int32_t delta =
safe_cast<int32_t>(imm - ((ssize_t)codeBlock.frontier() + 6));
uint8_t* bdelta = (uint8_t*)δ
uint8_t instr[6] = { 0x0f, uint8_t(0x80 | jcond),
bdelta[0], bdelta[1], bdelta[2], bdelta[3] };
bytes(6, instr);
}
// op disp(%br,%ir,s)
// (for reverse == false, hasImmediate == false, r == noreg)
// op $imm, disp(%br,%ir,s)
// (for reverse == false, hasImmediate == true, r == noreg)
// op %r, disp(%br,%ir,s)
// (for reverse == false, hasImmediate == false, r != noreg)
// op $imm, %r, disp(%br,%ir,s)
// (for reverse == false, hasImmediate == true, r != noreg)
// op disp(%br,%ir,s), %r
// (for reverse == true, hasImmediate == false, r != noreg)
// op $imm, disp(%br,%ir,s), %r
// (for reverse == true, hasImmediate == true, r != noreg)
// -----------------------------------------------------------------
// Restrictions:
// ir cannot be set to 'sp'
ALWAYS_INLINE
void emitCMX(X64Instr op, int jcond, RegNumber brName, RegNumber irName,
int s, int64_t disp,
RegNumber rName,
bool reverse = false,
ssize_t imm = 0,
bool hasImmediate = false,
int opSz = sz::qword,
bool ripRelative = false) {
assert(irName != rn(reg::rsp));
int ir = int(irName);
int r = int(rName);
int br = int(brName);
// The opsize prefix can be placed here, if the instruction
// deals with words.
// When an instruction has a manditory prefix, it goes before the
// REX byte if we end up needing one.
prefixBytes(op.flags, opSz);
// Determine immSize from the 'hasImmediate' flag
int immSize = sz::nosize;
if (hasImmediate) {
immSize = computeImmediateSize(op, imm, opSz);
}
if ((op.flags & IF_REVERSE) != 0) reverse = !reverse;
// Determine if we need to use a two byte opcode;
// imul is weird so we have a special case for it
bool twoByteOpcode = ((op.flags & IF_TWOBYTEOP) != 0) ||
((op.flags & IF_IMUL) != 0 && rName != noreg &&
immSize == sz::nosize);
// Again, imul is weird
if ((op.flags & IF_IMUL) != 0 && rName != noreg) {
reverse = !reverse;
}
// The wily rex byte, a multipurpose extension to the opcode space for x64
unsigned char rex = 0;
if ((op.flags & IF_NO_REXW) == 0 && opSz == sz::qword) rex |= 8;
bool highByteReg = false;
// XXX: This IF_BYTEREG check is a special case for movzbl: we currently
// encode it using an opSz of sz::byte but it doesn't actually have a
// byte-sized operand like other instructions can.
if (!(op.flags & IF_BYTEREG) && opSz == sz::byte && rName != noreg) {
if (byteRegNeedsRex(r)) {
rex |= 0x40;
}
r = byteRegEncodeNumber(r, highByteReg);
}
if (rName != noreg && (r & 8)) rex |= 4;
if (irName != noreg && (ir & 8)) rex |= 2;
if (brName != noreg && (br & 8)) rex |= 1;
if (rex) {
byte(0x40 | rex);
if (highByteReg) byteRegMisuse();
}
// Emit the opcode
if (immSize != sz::nosize) {
if (twoByteOpcode) byte(0x0F);
if (immSize == sz::byte && opSz != sz::byte) {
byte(op.table[2] | 2 | jcond);
} else {
byte(op.table[2] | jcond);
}
} else {
if (twoByteOpcode) byte(0x0F);
int opcode;
if ((op.flags & IF_IMUL) != 0) {
opcode = (rName == noreg) ? op.table[1] : op.table[0];
} else {
opcode = reverse ? op.table[1] : op.table[0];
}
byte(opcode | jcond);
}
// SIB byte if:
// 1. We're using an index register.
// 2. The base register is rsp-like.
// 3. We're doing a baseless disp access and it is not rip-relative.
bool sibIsNeeded =
ir != int(noreg) || /* 1 */
br == int(reg::rsp) || br == int(reg::r12) || /* 2 */
(br == int(noreg) && !ripRelative);
// If there is no register and no immediate, use the /r value
if (r == int(noreg)) r = op.table[3];
// If noreg was specified for 'ir', we use
// the encoding for the sp register
if (ir == int(noreg)) ir = 4;
int dispSize = sz::nosize;
if (disp != 0) {
if (!ripRelative && disp <= 127 && disp >= -128) {
dispSize = sz::byte;
} else {
dispSize = sz::dword;
}
}
// Set 'mod' based on the size of the displacement
int mod;
switch (dispSize) {
case sz::nosize: mod = 0; break;
case sz::byte: mod = 1; break;
default: mod = 2; break;
}
// Handle special cases for 'br'
if (br == int(noreg)) {
// If noreg was specified for 'br', we use the encoding
// for the rbp register (or rip, if we're emitting a
// rip-relative instruction), and we must set mod=0 and
// "upgrade" to a DWORD-sized displacement
br = 5;
mod = 0;
dispSize = sz::dword;
} else if ((br & 7) == 5 && dispSize == sz::nosize) {
// If br == rbp and no displacement was specified, we
// must "upgrade" to using a 1-byte displacement value
dispSize = sz::byte;
mod = 1;
}
// Emit modr/m and the sib
if (sibIsNeeded) {
// s: 0 1 2 3 4 5 6 7 8
static const int scaleLookup[] = { -1, 0, 1, -1, 2, -1, -1, -1, 3 };
assert(s > 0 && s <= 8);
int scale = scaleLookup[s];
assert(scale != -1);
emitModrm(mod, r, 4);
byte((scale << 6) | ((ir & 7) << 3) | (br & 7));
} else {
emitModrm(mod, r, br);
}
// Emit displacement if needed
if (dispSize == sz::dword) {
if (ripRelative) {
disp -= (int64_t)codeBlock.frontier() + immSize + dispSize;
}
dword(disp);
} else if (dispSize == sz::byte) {
byte(disp & 0xff);
}
// Emit immediate if needed
if (immSize != sz::nosize) {
emitImmediate(op, imm, immSize);
}
}
ALWAYS_INLINE
void emitIM(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
ssize_t imm, int opSz = sz::qword) {
emitCMX(op, 0, br, ir, s, disp, noreg, false, imm, true, opSz);
}
ALWAYS_INLINE
void emitIM8(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
ssize_t imm) {
emitCMX(op, 0, br, ir, s, disp, noreg, false, imm, true,
sz::byte);
}
ALWAYS_INLINE
void emitIM16(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
ssize_t imm) {
emitCMX(op, 0, br, ir, s, disp, noreg, false, imm, true,
sz::word);
}
ALWAYS_INLINE
void emitIM32(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
ssize_t imm) {
emitCMX(op, 0, br, ir, s, disp, noreg, false, imm, true, sz::dword);
}
ALWAYS_INLINE
void emitRM(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
RegNumber r, int opSz = sz::qword) {
emitCMX(op, 0, br, ir, s, disp, r, false, 0, false, opSz);
}
ALWAYS_INLINE
void emitRM32(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
RegNumber r) {
emitCMX(op, 0, br, ir, s, disp, r, false, 0, false, sz::dword);
}
ALWAYS_INLINE
void emitRM16(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
RegNumber r) {
emitCMX(op, 0, br, ir, s, disp, r, false, 0, false, sz::word);
}
ALWAYS_INLINE
void emitRM8(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
RegNumber r) {
emitCMX(op, 0, br, ir, s, disp, r, false, 0, false, sz::byte);
}
ALWAYS_INLINE
void emitCMR(X64Instr op, int jcond, RegNumber br, RegNumber ir,
int s, int disp, RegNumber r, int opSz = sz::qword) {
emitCMX(op, jcond, br, ir, s, disp, r, true, 0, false, opSz);
}
ALWAYS_INLINE
void emitMR(X64Instr op, RegNumber br, RegNumber ir, int s, int64_t disp,
RegNumber r, int opSz = sz::qword, bool ripRelative = false) {
emitCMX(op, 0, br, ir, s, disp, r, true, 0, false, opSz, ripRelative);
}
ALWAYS_INLINE
void emitMR32(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
RegNumber r) {
emitCMX(op, 0, br, ir, s, disp, r, true, 0, false, sz::dword);
}
ALWAYS_INLINE
void emitMR16(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
RegNumber r) {
emitCMX(op, 0, br, ir, s, disp, r, true, 0, false, sz::word);
}
ALWAYS_INLINE
void emitMR8(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
RegNumber r) {
emitCMX(op, 0, br, ir, s, disp, r, true, 0, false, sz::byte);
}
ALWAYS_INLINE
void emitIRM(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
RegNumber r, ssize_t imm, int opSz = sz::qword) {
emitCMX(op, 0, br, ir, s, disp, r, false, imm, true, opSz);
}
ALWAYS_INLINE
void emitIMR(X64Instr op, RegNumber br, RegNumber ir, int s, int disp,
RegNumber r, ssize_t imm, int opSz = sz::qword) {
emitCMX(op, 0, br, ir, s, disp, r, true, imm, true, opSz);
}
ALWAYS_INLINE
void emitM(X64Instr op, RegNumber br, RegNumber ir, int s, int64_t disp,
int opSz = sz::qword, bool ripRelative = false) {
emitCMX(op, 0, br, ir, s, disp, noreg, false, 0, false, opSz,
ripRelative);
}
ALWAYS_INLINE
void emitM32(X64Instr op, RegNumber br, RegNumber ir, int s, int disp) {
emitCMX(op, 0, br, ir, s, disp, noreg, false, 0, false, sz::dword);
}
ALWAYS_INLINE
void emitM16(X64Instr op, RegNumber br, RegNumber ir, int s, int disp) {
emitCMX(op, 0, br, ir, s, disp, noreg, false, 0, false, sz::word);
}
ALWAYS_INLINE
void emitCM(X64Instr op, int jcond, RegNumber br,
RegNumber ir, int s, int disp, int opSz = sz::qword) {
emitCMX(op, jcond, br, ir, s, disp, noreg, false, 0, false, opSz);
}
// emit (with no arguments)
ALWAYS_INLINE
void emit(X64Instr op) {
if ((op.flags & IF_NO_REXW) == 0) {
byte(0x48);
}
byte(op.table[5]);
}
// Segment register prefixes.
X64Assembler& fs() { byte(0x64); return *this; }
X64Assembler& gs() { byte(0x65); return *this; }
public:
/*
* The following functions use a naming convention for an older API
* to the assembler; conditional loads and moves haven't yet been
* ported.
*/
// CMOVcc [rbase + off], rdest
inline void cload_reg64_disp_reg64(ConditionCode cc, Reg64 rbase,
int off, Reg64 rdest) {
emitCMX(instr_cmovcc, cc, rn(rbase), noreg, sz::byte, off, rn(rdest),
false /*reverse*/);
}
inline void cload_reg64_disp_reg32(ConditionCode cc, Reg64 rbase,
int off, Reg32 rdest) {
emitCMX(instr_cmovcc, cc, rn(rbase), noreg, sz::byte, off, rn(rdest),
false /*reverse*/,
0 /*imm*/,
false /*hasImmediate*/,
sz::dword /*opSz*/);
}
inline void cmov_reg64_reg64(ConditionCode cc, Reg64 rsrc, Reg64 rdest) {
emitCRR(instr_cmovcc, cc, rn(rsrc), rn(rdest));
}
private:
bool byteRegNeedsRex(int rn) const {
// Without a rex, 4 through 7 mean the high 8-bit byte registers.
return rn >= 4 && rn <= 7;
}
int byteRegEncodeNumber(int rn, bool& seenHigh) const {
// We flag a bit in ah, ch, dh, bh so byteRegNeedsRex doesn't
// trigger.
if (rn & 0x80) seenHigh = true;
return rn & ~0x80;
}
// In 64-bit mode, you can't mix accesses to high byte registers
// with low byte registers other than al,cl,bl,dl. We assert this.
void byteRegMisuse() const {
assert(!"High byte registers can't be used with new x64 registers, or"
" anything requiring a REX prefix");
}
int computeImmediateSize(X64Instr op,
ssize_t imm,
int opsize = sz::dword) {
// Most instructions take a 32-bit or 16-bit immediate,
// depending on the presence of the opsize prefix (0x66).
int immSize = opsize == sz::word ? sz::word : sz::dword;
// ret always takes a 16-bit immediate.
if (op.flags & IF_RET) {
immSize = sz::word;
}
// Use an 8-bit immediate if the instruction supports it and if
// the immediate value fits in a byte
if (deltaFits(imm, sz::byte) && (op.flags & IF_HAS_IMM8) != 0) {
immSize = sz::byte;
}
return immSize;
}
void emitModrm(int x, int y, int z) {
byte((x << 6) | ((y & 7) << 3) | (z & 7));
}
/*
* The mov instruction supports an 8 byte immediate for the RI
* address mode when opSz is qword. It also supports a 4-byte
* immediate with opSz qword (the immediate is sign-extended).
*
* On the other hand, if it fits in 32-bits as an unsigned, we can
* change opSz to dword, which will zero the top 4 bytes instead of
* sign-extending.
*/
int computeImmediateSizeForMovRI64(X64Instr op, ssize_t imm, int& opSz) {
assert(opSz == sz::qword);
if (deltaFits(imm, sz::dword)) {
return computeImmediateSize(op, imm);
}
if (magFits(imm, sz::dword)) {
opSz = sz::dword;
return sz::dword;
}
return sz::qword;
}
void emitImmediate(X64Instr op, ssize_t imm, int immSize) {
if (immSize == sz::nosize) {
return;
}
if ((op.flags & (IF_SHIFT | IF_SHIFTD)) == 0) {
if (immSize == sz::dword) {
dword(imm);
} else if (immSize == sz::byte) {
byte(imm);
} else if (immSize == sz::word) {
word(imm);
} else {
qword(imm);
}
} else {
// we always use a byte-sized immediate for shift instructions
byte(imm);
}
}
void prefixBytes(unsigned long flags, int opSz) {
if (opSz == sz::word && !(flags & IF_RET)) byte(kOpsizePrefix);
if (flags & IF_66PREFIXED) byte(0x66);
if (flags & IF_F2PREFIXED) byte(0xF2);
if (flags & IF_F3PREFIXED) byte(0xF3);
}
private:
RegNumber rn(Reg8 r) { return RegNumber(int(r)); }
RegNumber rn(Reg16 r) { return RegNumber(int(r)); }
RegNumber rn(Reg32 r) { return RegNumber(int(r)); }
RegNumber rn(Reg64 r) { return RegNumber(int(r)); }
RegNumber rn(RegXMM r) { return RegNumber(int(r)); }
// Wraps a bunch of the emit* functions to make using them with the
// typed wrappers more terse. We should have these replace
// the emit functions eventually.
#define UMR(m) rn(m.r.base), rn(m.r.index), m.r.scale, m.r.disp
#define URIP(m) noreg, noreg, sz::byte, m.r.disp
void instrR(X64Instr op, Reg64 r) { emitR(op, rn(r)); }
void instrR(X64Instr op, Reg32 r) { emitR32(op, rn(r)); }
void instrR(X64Instr op, Reg16 r) { emitR16(op, rn(r)); }
void instrR(X64Instr op, Reg8 r) { emitR(op, rn(r), sz::byte); }
void instrRR(X64Instr op, Reg64 x, Reg64 y) { emitRR(op, rn(x), rn(y)); }
void instrRR(X64Instr op, Reg32 x, Reg32 y) { emitRR32(op, rn(x), rn(y)); }
void instrRR(X64Instr op, Reg16 x, Reg16 y) { emitRR16(op, rn(x), rn(y)); }
void instrRR(X64Instr op, Reg8 x, Reg8 y) { emitRR8(op, rn(x), rn(y)); }
void instrRR(X64Instr op, RegXMM x, RegXMM y) { emitRR(op, rn(x), rn(y)); }
void instrM(X64Instr op, MemoryRef m) { emitM(op, UMR(m)); }
void instrM(X64Instr op, RIPRelativeRef m) { emitM(op, URIP(m),
sz::qword, true); }
void instrM32(X64Instr op, MemoryRef m) { emitM32(op, UMR(m)); }
void instrM16(X64Instr op, MemoryRef m) { emitM16(op, UMR(m)); }
void instrRM(X64Instr op,
Reg64 r,
MemoryRef m) { emitRM(op, UMR(m), rn(r)); }
void instrRM(X64Instr op,
Reg32 r,
MemoryRef m) { emitRM32(op, UMR(m), rn(r)); }
void instrRM(X64Instr op,
Reg16 r,
MemoryRef m) { emitRM16(op, UMR(m), rn(r)); }
void instrRM(X64Instr op,
Reg8 r,
MemoryRef m) { emitRM8(op, UMR(m), rn(r)); }
void instrRM(X64Instr op,
RegXMM x,
MemoryRef m) { emitRM(op, UMR(m), rn(x)); }
void instrMR(X64Instr op,
MemoryRef m,
Reg64 r) { emitMR(op, UMR(m), rn(r)); }
void instrMR(X64Instr op,
MemoryRef m,
Reg32 r) { emitMR32(op, UMR(m), rn(r)); }
void instrMR(X64Instr op,
MemoryRef m,
Reg16 r) { emitMR16(op, UMR(m), rn(r)); }
void instrMR(X64Instr op,
MemoryRef m,
Reg8 r) { emitMR8(op, UMR(m), rn(r)); }
void instrMR(X64Instr op,
MemoryRef m,
RegXMM x) { emitMR(op, UMR(m), rn(x)); }
void instrMR(X64Instr op,
RIPRelativeRef m,
Reg64 r) { emitMR(op, URIP(m), rn(r),
sz::qword, true); }
void instrMR(X64Instr op,
RIPRelativeRef m,
RegXMM r) { emitMR(op, URIP(m), rn(r),
sz::qword, true); }
void instrIR(X64Instr op, Immed64 i, Reg64 r) {
emitIR(op, rn(r), i.q());
}
void instrIR(X64Instr op, Immed i, Reg64 r) {
emitIR(op, rn(r), i.q());
}
void instrIR(X64Instr op, Immed i, Reg32 r) {
emitIR32(op, rn(r), i.l());
}
void instrIR(X64Instr op, Immed i, Reg16 r) {
emitIR16(op, rn(r), i.w());
}
void instrIR(X64Instr op, Immed i, Reg8 r) {
emitIR8(op, rn(r), i.b());
}
void instrIM(X64Instr op, Immed i, MemoryRef m) {
emitIM(op, UMR(m), i.q());
}
void instrIM32(X64Instr op, Immed i, MemoryRef m) {
emitIM32(op, UMR(m), i.l());
}
void instrIM16(X64Instr op, Immed i, MemoryRef m) {
emitIM16(op, UMR(m), i.w());
}
void instrIM8(X64Instr op, Immed i, MemoryRef m) {
emitIM8(op, UMR(m), i.b());
}
#undef UMR
#undef URIP
CodeBlock& codeBlock;
};
//////////////////////////////////////////////////////////////////////
struct Label {
explicit Label()
: m_a(nullptr)
, m_address(nullptr)
{}
~Label() {
if (!m_toPatch.empty()) {
assert(m_a && m_address && "Label had jumps but was never set");
}
for (auto& ji : m_toPatch) {
auto realSrc = ji.a->toDestAddress(ji.addr);
switch (ji.type) {
case Branch::Jmp: ji.a->patchJmp(realSrc, ji.addr, m_address); break;
case Branch::Jmp8: ji.a->patchJmp8(realSrc, ji.addr, m_address); break;
case Branch::Jcc: ji.a->patchJcc(realSrc, ji.addr, m_address); break;
case Branch::Jcc8: ji.a->patchJcc8(realSrc, ji.addr, m_address); break;
case Branch::Call: ji.a->patchCall(realSrc, ji.addr, m_address); break;
}
}
}
Label(const Label&) = delete;
Label& operator=(const Label&) = delete;
void jmp(X64Assembler& a) {
addJump(&a, Branch::Jmp);
a.jmp(m_address ? m_address : a.frontier());
}
void jmp8(X64Assembler& a) {
addJump(&a, Branch::Jmp8);
a.jmp8(m_address ? m_address : a.frontier());
}
void jcc(X64Assembler& a, ConditionCode cc) {
addJump(&a, Branch::Jcc);
a.jcc(cc, m_address ? m_address : a.frontier());
}
void jcc8(X64Assembler& a, ConditionCode cc) {
addJump(&a, Branch::Jcc8);
a.jcc8(cc, m_address ? m_address : a.frontier());
}
void call(X64Assembler& a) {
addJump(&a, Branch::Call);
a.call(m_address ? m_address : a.frontier());
}
void jmpAuto(X64Assembler& a) {
assert(m_address);
auto delta = m_address - (a.frontier() + 2);
if (deltaFits(delta, sz::byte)) {
jmp8(a);
} else {
jmp(a);
}
}
void jccAuto(X64Assembler& a, ConditionCode cc) {
assert(m_address);
auto delta = m_address - (a.frontier() + 2);
if (deltaFits(delta, sz::byte)) {
jcc8(a, cc);
} else {
jcc(a, cc);
}
}
friend void asm_label(X64Assembler& a, Label& l) {
assert(!l.m_address && !l.m_a && "Label was already set");
l.m_a = &a;
l.m_address = a.frontier();
}
private:
enum class Branch {
Jcc,
Jcc8,
Jmp,
Jmp8,
Call
};
struct JumpInfo {
Branch type;
X64Assembler* a;
CodeAddress addr;
};
private:
void addJump(X64Assembler* a, Branch type) {
if (m_address) return;
JumpInfo info;
info.type = type;
info.a = a;
info.addr = a->codeBlock.frontier();
m_toPatch.push_back(info);
}
private:
X64Assembler* m_a;
CodeAddress m_address;
std::vector<JumpInfo> m_toPatch;
};
inline void X64Assembler::jmp(Label& l) { l.jmp(*this); }
inline void X64Assembler::jmp8(Label& l) { l.jmp8(*this); }
inline void X64Assembler::jcc(ConditionCode c, Label& l) { l.jcc(*this, c); }
inline void X64Assembler::jcc8(ConditionCode c, Label& l) { l.jcc8(*this, c); }
inline void X64Assembler::call(Label& l) { l.call(*this); }
#define CC(nm, code) \
inline void X64Assembler::j##nm(Label& l) { l.jcc(*this, code); } \
inline void X64Assembler::j##nm##8(Label& l) { l.jcc8(*this, code); }
CCS
#undef CC
//////////////////////////////////////////////////////////////////////
/*
* Select the assembler which contains a given address.
*
* E.g.:
*
* Asm& a = codeBlockChoose(toPatch, a, acold);
* a.patchJmp(...);
*/
inline CodeBlock& codeBlockChoose(CodeAddress addr) {
always_assert_flog(false,
"address {} was not part of any known code block", addr);
}
template<class... Blocks>
CodeBlock& codeBlockChoose(CodeAddress addr, CodeBlock& a, Blocks&... as) {
if (a.contains(addr)) return a;
return codeBlockChoose(addr, as...);
}
//////////////////////////////////////////////////////////////////////
namespace x64 {
struct DecodedInstruction {
DecodedInstruction(uint8_t* ip, uint8_t* base)
: m_base(base)
{ decode(ip); }
explicit DecodedInstruction(uint8_t* ip) : DecodedInstruction(ip, ip) {}
std::string toString();
size_t size() { return m_size; }
bool hasPicOffset() const { return m_flags.picOff; }
uint8_t* picAddress() const;
bool setPicAddress(uint8_t* target);
bool hasOffset() const { return m_offSz != 0; }
int32_t offset() const;
bool hasImmediate() const { return m_immSz; }
int64_t immediate() const;
bool setImmediate(int64_t value);
bool isNop() const;
bool isBranch(bool allowCond = true) const;
bool isCall() const;
bool isJmp() const;
bool isLea() const;
ConditionCode jccCondCode() const;
bool shrinkBranch();
void widenBranch();
uint8_t getModRm() const;
private:
void decode(uint8_t* ip);
bool decodePrefix(uint8_t* ip);
int decodeRexVexXop(uint8_t* ip);
int decodeOpcode(uint8_t* ip);
void determineOperandsMap0(uint8_t* ip);
void determineOperandsMap1(uint8_t* ip);
void determineOperandsMap2(uint8_t* ip);
void determineOperandsMap3(uint8_t* ip);
int decodeModRm(uint8_t* ip);
int decodeImm(uint8_t* ip);
// We may wish to decode an instruction whose address is m_ip, but treat all
// PIC references as relative to m_base.
uint8_t* m_base;
uint8_t* m_ip;
uint32_t m_size;
union {
uint32_t m_flagsVal;
struct {
uint32_t lock : 1;
uint32_t repNE : 1;
uint32_t rep : 1;
uint32_t cs : 1;
uint32_t ss : 1;
uint32_t ds : 1;
uint32_t es : 1;
uint32_t fs : 1;
uint32_t gs : 1;
uint32_t bTaken : 1;
uint32_t bNotTaken : 1;
uint32_t opndSzOvr : 1;
uint32_t addrSzOvr : 1;
uint32_t rex : 1;
uint32_t vex : 1;
uint32_t xop : 1;
uint32_t w : 1;
uint32_t r : 1;
uint32_t x : 1;
uint32_t b : 1;
uint32_t l : 1;
uint32_t def64 : 1;
uint32_t immIsAddr : 1;
uint32_t picOff : 1;
uint32_t hasModRm : 1;
uint32_t hasSib : 1;
} m_flags;
};
uint8_t m_map_select;
uint8_t m_xtra_op;
uint8_t m_opcode;
uint8_t m_immSz;
uint8_t m_offSz;
};
#undef TRACEMOD
#undef logical_const
#undef CCS
}}}
#endif