Skip to content

Commit 8c5aeba

Browse files
committed
mishegos: Construct VEX/XOP/EVEX opcodes
The VEX/XOP/EVEX "prefixes" actually behave like being part of the opcode (the opcode byte must immediately follow the "prefix"). Explicitly constructing these opcodes significantly increases the likelihood of correctly hitting vector instructions. This is interesting not only for coverage, but also for the plenty of UD constraints in AVX-512.
1 parent a34416b commit 8c5aeba

File tree

2 files changed

+49
-11
lines changed

2 files changed

+49
-11
lines changed

src/mishegos/mutator.c

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,16 @@ static uint8_t rand_byte() {
9494
#endif
9595

9696
/* Creates a random (potentially invalid) opcode.
97-
* Opcodes are 1-3 bytes long, and come in three formats:
97+
* Opcodes are 1-5 bytes long, and come in six formats:
9898
* 1. Single byte (raw opcode)
9999
* 2. Two bytes (escape byte, opcode)
100100
* 3. Three bytes (escape byte 1, escape byte 2, opcode)
101+
* 4. 2-byte VEX (VEX 0xc4, VEX byte 2, opcode)
102+
* 5. 3-byte VEX/XOP (VEX 0xc5/XOP 0x8f, VEX/XOP byte 2, VEX byte 3, opcode)
103+
* 6. 4-byte EVEX (EVEX 0x62, EVEX byte 2-4, opcode)
101104
*/
102105
static void rand_opcode(opcode *opc) {
103-
switch (rand_byte() % 4) {
106+
switch (rand_byte() % 8) {
104107
case 0: {
105108
opc->len = 1;
106109
opc->op[0] = rand_byte();
@@ -126,6 +129,38 @@ static void rand_opcode(opcode *opc) {
126129
opc->op[2] = rand_byte();
127130
break;
128131
}
132+
case 4: { // VEX 2-byte
133+
opc->len = 3;
134+
opc->op[0] = 0xc5;
135+
opc->op[1] = rand_byte();
136+
opc->op[2] = rand_byte();
137+
break;
138+
}
139+
case 5: { // VEX 3-byte
140+
opc->len = 4;
141+
opc->op[0] = 0xc4;
142+
opc->op[1] = rand_byte();
143+
opc->op[2] = rand_byte();
144+
opc->op[3] = rand_byte();
145+
break;
146+
}
147+
case 6: { // XOP
148+
opc->len = 4;
149+
opc->op[0] = 0x8f;
150+
opc->op[1] = rand_byte() | 0x08;
151+
opc->op[2] = rand_byte();
152+
opc->op[3] = rand_byte();
153+
break;
154+
}
155+
case 7: { // EVEX
156+
opc->len = 5;
157+
opc->op[0] = 0x62;
158+
opc->op[1] = rand_byte();
159+
opc->op[2] = rand_byte();
160+
opc->op[3] = rand_byte();
161+
opc->op[4] = rand_byte();
162+
break;
163+
}
129164
}
130165
}
131166

@@ -162,7 +197,7 @@ static void build_sliding_candidate() {
162197
}
163198
}
164199

165-
/* Opcode, up to 3 bytes.
200+
/* Opcode, up to 5 bytes.
166201
*/
167202
opcode opc;
168203
rand_opcode(&opc);
@@ -269,7 +304,7 @@ static bool structured_candidate(input_slot *slot) {
269304
}
270305

271306
/* Random (but structured) opcode. Same as sliding.
272-
* Running max: 8
307+
* Running max: 10
273308
*/
274309
opcode opc;
275310
rand_opcode(&opc);
@@ -278,7 +313,7 @@ static bool structured_candidate(input_slot *slot) {
278313

279314
/* One or none ModR/M bytes, and one or none SIB bytes.
280315
* Both of these are just 8-bit LUTs, so they can be fully random.
281-
* Running max: 10.
316+
* Running max: 12.
282317
*/
283318
if (rand_byte() % 2) {
284319
slot->raw_insn[len] = rand_byte();
@@ -290,11 +325,14 @@ static bool structured_candidate(input_slot *slot) {
290325
len++;
291326
}
292327

293-
/* Finally, we have up to 5 bytes to play with for the immediate and
328+
/* Finally, we have at least 3 bytes to play with for the immediate and
294329
* displacement. Fill some amount of that (maybe not all) with randomness.
295330
*/
296331
uint64_t tail = rand_long();
297332
uint8_t tail_size = rand_byte() % 6;
333+
if (len + tail_size > MISHEGOS_INSN_MAXLEN) {
334+
tail_size = MISHEGOS_INSN_MAXLEN - len;
335+
}
298336
memcpy(slot->raw_insn + len, &tail, tail_size);
299337
len += tail_size;
300338

src/mishegos/mutator.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66
*/
77
typedef struct __attribute__((packed)) {
88
uint8_t len;
9-
uint8_t op[3];
9+
uint8_t op[5];
1010
} opcode;
11-
static_assert(sizeof(opcode) == 4, "opcode should be 4 bytes");
11+
static_assert(sizeof(opcode) == 6, "opcode should be 6 bytes");
1212

1313
/* An x86 instruction is no longer than 15 bytes,
1414
* but the longest (potentially) structurally valid x86 instruction
15-
* is 26 bytes:
15+
* is 28 bytes:
1616
* 4 byte legacy prefix
1717
* 1 byte prefix
18-
* 3 byte opcode
18+
* 5 byte opcode (including VEX/XOP/EVEX prefix)
1919
* 1 byte ModR/M
2020
* 1 byte SIB
2121
* 8 byte displacement
@@ -28,7 +28,7 @@ static_assert(sizeof(opcode) == 4, "opcode should be 4 bytes");
2828
typedef struct {
2929
uint8_t off;
3030
uint8_t len;
31-
uint8_t insn[26];
31+
uint8_t insn[28];
3232
} insn_candidate;
3333

3434
void mutator_init();

0 commit comments

Comments
 (0)