/[pcre]/code/trunk/sljit/sljitNativeX86_common.c
ViewVC logotype

Contents of /code/trunk/sljit/sljitNativeX86_common.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1755 - (show annotations)
Thu Jul 18 06:13:14 2019 UTC (2 months, 3 weeks ago) by zherczeg
File MIME type: text/plain
File size: 83936 byte(s)
Error occurred while calculating annotation data.
JIT compiler update.
1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
30 return "x86" SLJIT_CPUINFO " ABI:fastcall";
31 #else
32 return "x86" SLJIT_CPUINFO;
33 #endif
34 }
35
36 /*
37 32b register indexes:
38 0 - EAX
39 1 - ECX
40 2 - EDX
41 3 - EBX
42 4 - ESP
43 5 - EBP
44 6 - ESI
45 7 - EDI
46 */
47
48 /*
49 64b register indexes:
50 0 - RAX
51 1 - RCX
52 2 - RDX
53 3 - RBX
54 4 - RSP
55 5 - RBP
56 6 - RSI
57 7 - RDI
58 8 - R8 - From now on REX prefix is required
59 9 - R9
60 10 - R10
61 11 - R11
62 12 - R12
63 13 - R13
64 14 - R14
65 15 - R15
66 */
67
68 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
69
70 /* Last register + 1. */
71 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
72
73 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
74 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
75 };
76
77 #define CHECK_EXTRA_REGS(p, w, do) \
78 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
79 if (p <= compiler->scratches) \
80 w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
81 else \
82 w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
83 p = SLJIT_MEM1(SLJIT_SP); \
84 do; \
85 }
86
87 #else /* SLJIT_CONFIG_X86_32 */
88
89 /* Last register + 1. */
90 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
91 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
92
93 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
94 Note: avoid to use r12 and r13 for memory addessing
95 therefore r12 is better to be a higher saved register. */
96 #ifndef _WIN64
97 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
98 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
99 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
100 };
101 /* low-map. reg_map & 0x7. */
102 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
103 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
104 };
105 #else
106 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
107 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
108 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
109 };
110 /* low-map. reg_map & 0x7. */
111 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
112 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
113 };
114 #endif
115
116 /* Args: xmm0-xmm3 */
117 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
118 4, 0, 1, 2, 3, 5, 6
119 };
120 /* low-map. freg_map & 0x7. */
121 static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
122 4, 0, 1, 2, 3, 5, 6
123 };
124
125 #define REX_W 0x48
126 #define REX_R 0x44
127 #define REX_X 0x42
128 #define REX_B 0x41
129 #define REX 0x40
130
131 #ifndef _WIN64
132 #define HALFWORD_MAX 0x7fffffffl
133 #define HALFWORD_MIN -0x80000000l
134 #else
135 #define HALFWORD_MAX 0x7fffffffll
136 #define HALFWORD_MIN -0x80000000ll
137 #endif
138
139 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
140 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
141
142 #define CHECK_EXTRA_REGS(p, w, do)
143
144 #endif /* SLJIT_CONFIG_X86_32 */
145
146 #define TMP_FREG (0)
147
148 /* Size flags for emit_x86_instruction: */
149 #define EX86_BIN_INS 0x0010
150 #define EX86_SHIFT_INS 0x0020
151 #define EX86_REX 0x0040
152 #define EX86_NO_REXW 0x0080
153 #define EX86_BYTE_ARG 0x0100
154 #define EX86_HALF_ARG 0x0200
155 #define EX86_PREF_66 0x0400
156 #define EX86_PREF_F2 0x0800
157 #define EX86_PREF_F3 0x1000
158 #define EX86_SSE2_OP1 0x2000
159 #define EX86_SSE2_OP2 0x4000
160 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
161
162 /* --------------------------------------------------------------------- */
163 /* Instrucion forms */
164 /* --------------------------------------------------------------------- */
165
166 #define ADD (/* BINARY */ 0 << 3)
167 #define ADD_EAX_i32 0x05
168 #define ADD_r_rm 0x03
169 #define ADD_rm_r 0x01
170 #define ADDSD_x_xm 0x58
171 #define ADC (/* BINARY */ 2 << 3)
172 #define ADC_EAX_i32 0x15
173 #define ADC_r_rm 0x13
174 #define ADC_rm_r 0x11
175 #define AND (/* BINARY */ 4 << 3)
176 #define AND_EAX_i32 0x25
177 #define AND_r_rm 0x23
178 #define AND_rm_r 0x21
179 #define ANDPD_x_xm 0x54
180 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
181 #define CALL_i32 0xe8
182 #define CALL_rm (/* GROUP_FF */ 2 << 3)
183 #define CDQ 0x99
184 #define CMOVE_r_rm (/* GROUP_0F */ 0x44)
185 #define CMP (/* BINARY */ 7 << 3)
186 #define CMP_EAX_i32 0x3d
187 #define CMP_r_rm 0x3b
188 #define CMP_rm_r 0x39
189 #define CVTPD2PS_x_xm 0x5a
190 #define CVTSI2SD_x_rm 0x2a
191 #define CVTTSD2SI_r_xm 0x2c
192 #define DIV (/* GROUP_F7 */ 6 << 3)
193 #define DIVSD_x_xm 0x5e
194 #define FSTPS 0xd9
195 #define FSTPD 0xdd
196 #define INT3 0xcc
197 #define IDIV (/* GROUP_F7 */ 7 << 3)
198 #define IMUL (/* GROUP_F7 */ 5 << 3)
199 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
200 #define IMUL_r_rm_i8 0x6b
201 #define IMUL_r_rm_i32 0x69
202 #define JE_i8 0x74
203 #define JNE_i8 0x75
204 #define JMP_i8 0xeb
205 #define JMP_i32 0xe9
206 #define JMP_rm (/* GROUP_FF */ 4 << 3)
207 #define LEA_r_m 0x8d
208 #define MOV_r_rm 0x8b
209 #define MOV_r_i32 0xb8
210 #define MOV_rm_r 0x89
211 #define MOV_rm_i32 0xc7
212 #define MOV_rm8_i8 0xc6
213 #define MOV_rm8_r8 0x88
214 #define MOVSD_x_xm 0x10
215 #define MOVSD_xm_x 0x11
216 #define MOVSXD_r_rm 0x63
217 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
218 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
219 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
220 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
221 #define MUL (/* GROUP_F7 */ 4 << 3)
222 #define MULSD_x_xm 0x59
223 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
224 #define NOP 0x90
225 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
226 #define OR (/* BINARY */ 1 << 3)
227 #define OR_r_rm 0x0b
228 #define OR_EAX_i32 0x0d
229 #define OR_rm_r 0x09
230 #define OR_rm8_r8 0x08
231 #define POP_r 0x58
232 #define POP_rm 0x8f
233 #define POPF 0x9d
234 #define PREFETCH 0x18
235 #define PUSH_i32 0x68
236 #define PUSH_r 0x50
237 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
238 #define PUSHF 0x9c
239 #define RET_near 0xc3
240 #define RET_i16 0xc2
241 #define SBB (/* BINARY */ 3 << 3)
242 #define SBB_EAX_i32 0x1d
243 #define SBB_r_rm 0x1b
244 #define SBB_rm_r 0x19
245 #define SAR (/* SHIFT */ 7 << 3)
246 #define SHL (/* SHIFT */ 4 << 3)
247 #define SHR (/* SHIFT */ 5 << 3)
248 #define SUB (/* BINARY */ 5 << 3)
249 #define SUB_EAX_i32 0x2d
250 #define SUB_r_rm 0x2b
251 #define SUB_rm_r 0x29
252 #define SUBSD_x_xm 0x5c
253 #define TEST_EAX_i32 0xa9
254 #define TEST_rm_r 0x85
255 #define UCOMISD_x_xm 0x2e
256 #define UNPCKLPD_x_xm 0x14
257 #define XCHG_EAX_r 0x90
258 #define XCHG_r_rm 0x87
259 #define XOR (/* BINARY */ 6 << 3)
260 #define XOR_EAX_i32 0x35
261 #define XOR_r_rm 0x33
262 #define XOR_rm_r 0x31
263 #define XORPD_x_xm 0x57
264
265 #define GROUP_0F 0x0f
266 #define GROUP_F7 0xf7
267 #define GROUP_FF 0xff
268 #define GROUP_BINARY_81 0x81
269 #define GROUP_BINARY_83 0x83
270 #define GROUP_SHIFT_1 0xd1
271 #define GROUP_SHIFT_N 0xc1
272 #define GROUP_SHIFT_CL 0xd3
273
274 #define MOD_REG 0xc0
275 #define MOD_DISP8 0x40
276
277 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
278
279 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
280 #define POP_REG(r) (*inst++ = (POP_r + (r)))
281 #define RET() (*inst++ = (RET_near))
282 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
283 /* r32, r/m32 */
284 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
285
286 /* Multithreading does not affect these static variables, since they store
287 built-in CPU features. Therefore they can be overwritten by different threads
288 if they detect the CPU features in the same time. */
289 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
290 static sljit_s32 cpu_has_sse2 = -1;
291 #endif
292 static sljit_s32 cpu_has_cmov = -1;
293
294 #ifdef _WIN32_WCE
295 #include <cmnintrin.h>
296 #elif defined(_MSC_VER) && _MSC_VER >= 1400
297 #include <intrin.h>
298 #endif
299
300 /******************************************************/
301 /* Unaligned-store functions */
302 /******************************************************/
303
304 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
305 {
306 SLJIT_MEMCPY(addr, &value, sizeof(value));
307 }
308
309 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
310 {
311 SLJIT_MEMCPY(addr, &value, sizeof(value));
312 }
313
314 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
315 {
316 SLJIT_MEMCPY(addr, &value, sizeof(value));
317 }
318
319 /******************************************************/
320 /* Utility functions */
321 /******************************************************/
322
323 static void get_cpu_features(void)
324 {
325 sljit_u32 features;
326
327 #if defined(_MSC_VER) && _MSC_VER >= 1400
328
329 int CPUInfo[4];
330 __cpuid(CPUInfo, 1);
331 features = (sljit_u32)CPUInfo[3];
332
333 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
334
335 /* AT&T syntax. */
336 __asm__ (
337 "movl $0x1, %%eax\n"
338 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
339 /* On x86-32, there is no red zone, so this
340 should work (no need for a local variable). */
341 "push %%ebx\n"
342 #endif
343 "cpuid\n"
344 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
345 "pop %%ebx\n"
346 #endif
347 "movl %%edx, %0\n"
348 : "=g" (features)
349 :
350 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
351 : "%eax", "%ecx", "%edx"
352 #else
353 : "%rax", "%rbx", "%rcx", "%rdx"
354 #endif
355 );
356
357 #else /* _MSC_VER && _MSC_VER >= 1400 */
358
359 /* Intel syntax. */
360 __asm {
361 mov eax, 1
362 cpuid
363 mov features, edx
364 }
365
366 #endif /* _MSC_VER && _MSC_VER >= 1400 */
367
368 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
369 cpu_has_sse2 = (features >> 26) & 0x1;
370 #endif
371 cpu_has_cmov = (features >> 15) & 0x1;
372 }
373
374 static sljit_u8 get_jump_code(sljit_s32 type)
375 {
376 switch (type) {
377 case SLJIT_EQUAL:
378 case SLJIT_EQUAL_F64:
379 return 0x84 /* je */;
380
381 case SLJIT_NOT_EQUAL:
382 case SLJIT_NOT_EQUAL_F64:
383 return 0x85 /* jne */;
384
385 case SLJIT_LESS:
386 case SLJIT_LESS_F64:
387 return 0x82 /* jc */;
388
389 case SLJIT_GREATER_EQUAL:
390 case SLJIT_GREATER_EQUAL_F64:
391 return 0x83 /* jae */;
392
393 case SLJIT_GREATER:
394 case SLJIT_GREATER_F64:
395 return 0x87 /* jnbe */;
396
397 case SLJIT_LESS_EQUAL:
398 case SLJIT_LESS_EQUAL_F64:
399 return 0x86 /* jbe */;
400
401 case SLJIT_SIG_LESS:
402 return 0x8c /* jl */;
403
404 case SLJIT_SIG_GREATER_EQUAL:
405 return 0x8d /* jnl */;
406
407 case SLJIT_SIG_GREATER:
408 return 0x8f /* jnle */;
409
410 case SLJIT_SIG_LESS_EQUAL:
411 return 0x8e /* jle */;
412
413 case SLJIT_OVERFLOW:
414 case SLJIT_MUL_OVERFLOW:
415 return 0x80 /* jo */;
416
417 case SLJIT_NOT_OVERFLOW:
418 case SLJIT_MUL_NOT_OVERFLOW:
419 return 0x81 /* jno */;
420
421 case SLJIT_UNORDERED_F64:
422 return 0x8a /* jp */;
423
424 case SLJIT_ORDERED_F64:
425 return 0x8b /* jpo */;
426 }
427 return 0;
428 }
429
430 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
431 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
432 #else
433 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr);
434 static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label);
435 #endif
436
437 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
438 {
439 sljit_s32 type = jump->flags >> TYPE_SHIFT;
440 sljit_s32 short_jump;
441 sljit_uw label_addr;
442
443 if (jump->flags & JUMP_LABEL)
444 label_addr = (sljit_uw)(code + jump->u.label->size);
445 else
446 label_addr = jump->u.target - executable_offset;
447
448 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
449
450 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
451 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
452 return generate_far_jump_code(jump, code_ptr);
453 #endif
454
455 if (type == SLJIT_JUMP) {
456 if (short_jump)
457 *code_ptr++ = JMP_i8;
458 else
459 *code_ptr++ = JMP_i32;
460 jump->addr++;
461 }
462 else if (type >= SLJIT_FAST_CALL) {
463 short_jump = 0;
464 *code_ptr++ = CALL_i32;
465 jump->addr++;
466 }
467 else if (short_jump) {
468 *code_ptr++ = get_jump_code(type) - 0x10;
469 jump->addr++;
470 }
471 else {
472 *code_ptr++ = GROUP_0F;
473 *code_ptr++ = get_jump_code(type);
474 jump->addr += 2;
475 }
476
477 if (short_jump) {
478 jump->flags |= PATCH_MB;
479 code_ptr += sizeof(sljit_s8);
480 } else {
481 jump->flags |= PATCH_MW;
482 code_ptr += sizeof(sljit_s32);
483 }
484
485 return code_ptr;
486 }
487
488 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
489 {
490 struct sljit_memory_fragment *buf;
491 sljit_u8 *code;
492 sljit_u8 *code_ptr;
493 sljit_u8 *buf_ptr;
494 sljit_u8 *buf_end;
495 sljit_u8 len;
496 sljit_sw executable_offset;
497 sljit_sw jump_addr;
498
499 struct sljit_label *label;
500 struct sljit_jump *jump;
501 struct sljit_const *const_;
502 struct sljit_put_label *put_label;
503
504 CHECK_ERROR_PTR();
505 CHECK_PTR(check_sljit_generate_code(compiler));
506 reverse_buf(compiler);
507
508 /* Second code generation pass. */
509 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
510 PTR_FAIL_WITH_EXEC_IF(code);
511 buf = compiler->buf;
512
513 code_ptr = code;
514 label = compiler->labels;
515 jump = compiler->jumps;
516 const_ = compiler->consts;
517 put_label = compiler->put_labels;
518 executable_offset = SLJIT_EXEC_OFFSET(code);
519
520 do {
521 buf_ptr = buf->memory;
522 buf_end = buf_ptr + buf->used_size;
523 do {
524 len = *buf_ptr++;
525 if (len > 0) {
526 /* The code is already generated. */
527 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
528 code_ptr += len;
529 buf_ptr += len;
530 }
531 else {
532 switch (*buf_ptr) {
533 case 0:
534 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
535 label->size = code_ptr - code;
536 label = label->next;
537 break;
538 case 1:
539 jump->addr = (sljit_uw)code_ptr;
540 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
541 code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset);
542 else {
543 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
544 code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset);
545 #else
546 code_ptr = generate_far_jump_code(jump, code_ptr);
547 #endif
548 }
549 jump = jump->next;
550 break;
551 case 2:
552 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
553 const_ = const_->next;
554 break;
555 default:
556 SLJIT_ASSERT(*buf_ptr == 3);
557 SLJIT_ASSERT(put_label->label);
558 put_label->addr = (sljit_uw)code_ptr;
559 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
560 code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
561 #endif
562 put_label = put_label->next;
563 break;
564 }
565 buf_ptr++;
566 }
567 } while (buf_ptr < buf_end);
568 SLJIT_ASSERT(buf_ptr == buf_end);
569 buf = buf->next;
570 } while (buf);
571
572 SLJIT_ASSERT(!label);
573 SLJIT_ASSERT(!jump);
574 SLJIT_ASSERT(!const_);
575 SLJIT_ASSERT(!put_label);
576 SLJIT_ASSERT(code_ptr <= code + compiler->size);
577
578 jump = compiler->jumps;
579 while (jump) {
580 jump_addr = jump->addr + executable_offset;
581
582 if (jump->flags & PATCH_MB) {
583 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
584 *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
585 } else if (jump->flags & PATCH_MW) {
586 if (jump->flags & JUMP_LABEL) {
587 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
588 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
589 #else
590 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
591 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
592 #endif
593 }
594 else {
595 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
596 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
597 #else
598 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
599 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
600 #endif
601 }
602 }
603 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
604 else if (jump->flags & PATCH_MD)
605 sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
606 #endif
607
608 jump = jump->next;
609 }
610
611 put_label = compiler->put_labels;
612 while (put_label) {
613 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
614 sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
615 #else
616 if (put_label->flags & PATCH_MD) {
617 SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX);
618 sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
619 }
620 else {
621 SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX);
622 sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr);
623 }
624 #endif
625
626 put_label = put_label->next;
627 }
628
629 compiler->error = SLJIT_ERR_COMPILED;
630 compiler->executable_offset = executable_offset;
631 compiler->executable_size = code_ptr - code;
632 return (void*)(code + executable_offset);
633 }
634
635 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
636 {
637 switch (feature_type) {
638 case SLJIT_HAS_FPU:
639 #ifdef SLJIT_IS_FPU_AVAILABLE
640 return SLJIT_IS_FPU_AVAILABLE;
641 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
642 if (cpu_has_sse2 == -1)
643 get_cpu_features();
644 return cpu_has_sse2;
645 #else /* SLJIT_DETECT_SSE2 */
646 return 1;
647 #endif /* SLJIT_DETECT_SSE2 */
648
649 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
650 case SLJIT_HAS_VIRTUAL_REGISTERS:
651 return 1;
652 #endif
653
654 case SLJIT_HAS_CLZ:
655 case SLJIT_HAS_CMOV:
656 if (cpu_has_cmov == -1)
657 get_cpu_features();
658 return cpu_has_cmov;
659
660 case SLJIT_HAS_SSE2:
661 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
662 if (cpu_has_sse2 == -1)
663 get_cpu_features();
664 return cpu_has_sse2;
665 #else
666 return 1;
667 #endif
668
669 default:
670 return 0;
671 }
672 }
673
674 /* --------------------------------------------------------------------- */
675 /* Operators */
676 /* --------------------------------------------------------------------- */
677
678 #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
679
680 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
681 sljit_u32 op_types,
682 sljit_s32 dst, sljit_sw dstw,
683 sljit_s32 src1, sljit_sw src1w,
684 sljit_s32 src2, sljit_sw src2w);
685
686 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
687 sljit_u32 op_types,
688 sljit_s32 dst, sljit_sw dstw,
689 sljit_s32 src1, sljit_sw src1w,
690 sljit_s32 src2, sljit_sw src2w);
691
692 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
693 sljit_s32 dst, sljit_sw dstw,
694 sljit_s32 src, sljit_sw srcw);
695
696 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
697 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
698
699 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
700 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
701
702 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
703 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
704
705 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
706 #include "sljitNativeX86_32.c"
707 #else
708 #include "sljitNativeX86_64.c"
709 #endif
710
711 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
712 sljit_s32 dst, sljit_sw dstw,
713 sljit_s32 src, sljit_sw srcw)
714 {
715 sljit_u8* inst;
716
717 SLJIT_ASSERT(dst != SLJIT_UNUSED);
718
719 if (FAST_IS_REG(src)) {
720 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
721 FAIL_IF(!inst);
722 *inst = MOV_rm_r;
723 return SLJIT_SUCCESS;
724 }
725 if (src & SLJIT_IMM) {
726 if (FAST_IS_REG(dst)) {
727 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
728 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
729 #else
730 if (!compiler->mode32) {
731 if (NOT_HALFWORD(srcw))
732 return emit_load_imm64(compiler, dst, srcw);
733 }
734 else
735 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
736 #endif
737 }
738 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
739 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
740 /* Immediate to memory move. Only SLJIT_MOV operation copies
741 an immediate directly into memory so TMP_REG1 can be used. */
742 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
743 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
744 FAIL_IF(!inst);
745 *inst = MOV_rm_r;
746 return SLJIT_SUCCESS;
747 }
748 #endif
749 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
750 FAIL_IF(!inst);
751 *inst = MOV_rm_i32;
752 return SLJIT_SUCCESS;
753 }
754 if (FAST_IS_REG(dst)) {
755 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
756 FAIL_IF(!inst);
757 *inst = MOV_r_rm;
758 return SLJIT_SUCCESS;
759 }
760
761 /* Memory to memory move. Only SLJIT_MOV operation copies
762 data from memory to memory so TMP_REG1 can be used. */
763 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
764 FAIL_IF(!inst);
765 *inst = MOV_r_rm;
766 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
767 FAIL_IF(!inst);
768 *inst = MOV_rm_r;
769 return SLJIT_SUCCESS;
770 }
771
772 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
773 {
774 sljit_u8 *inst;
775 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
776 sljit_s32 size;
777 #endif
778
779 CHECK_ERROR();
780 CHECK(check_sljit_emit_op0(compiler, op));
781
782 switch (GET_OPCODE(op)) {
783 case SLJIT_BREAKPOINT:
784 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
785 FAIL_IF(!inst);
786 INC_SIZE(1);
787 *inst = INT3;
788 break;
789 case SLJIT_NOP:
790 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
791 FAIL_IF(!inst);
792 INC_SIZE(1);
793 *inst = NOP;
794 break;
795 case SLJIT_LMUL_UW:
796 case SLJIT_LMUL_SW:
797 case SLJIT_DIVMOD_UW:
798 case SLJIT_DIVMOD_SW:
799 case SLJIT_DIV_UW:
800 case SLJIT_DIV_SW:
801 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
802 #ifdef _WIN64
803 SLJIT_ASSERT(
804 reg_map[SLJIT_R0] == 0
805 && reg_map[SLJIT_R1] == 2
806 && reg_map[TMP_REG1] > 7);
807 #else
808 SLJIT_ASSERT(
809 reg_map[SLJIT_R0] == 0
810 && reg_map[SLJIT_R1] < 7
811 && reg_map[TMP_REG1] == 2);
812 #endif
813 compiler->mode32 = op & SLJIT_I32_OP;
814 #endif
815 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
816
817 op = GET_OPCODE(op);
818 if ((op | 0x2) == SLJIT_DIV_UW) {
819 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
820 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
821 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
822 #else
823 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
824 #endif
825 FAIL_IF(!inst);
826 *inst = XOR_r_rm;
827 }
828
829 if ((op | 0x2) == SLJIT_DIV_SW) {
830 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
831 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
832 #endif
833
834 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
835 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
836 FAIL_IF(!inst);
837 INC_SIZE(1);
838 *inst = CDQ;
839 #else
840 if (compiler->mode32) {
841 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
842 FAIL_IF(!inst);
843 INC_SIZE(1);
844 *inst = CDQ;
845 } else {
846 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
847 FAIL_IF(!inst);
848 INC_SIZE(2);
849 *inst++ = REX_W;
850 *inst = CDQ;
851 }
852 #endif
853 }
854
855 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
856 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
857 FAIL_IF(!inst);
858 INC_SIZE(2);
859 *inst++ = GROUP_F7;
860 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
861 #else
862 #ifdef _WIN64
863 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
864 #else
865 size = (!compiler->mode32) ? 3 : 2;
866 #endif
867 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
868 FAIL_IF(!inst);
869 INC_SIZE(size);
870 #ifdef _WIN64
871 if (!compiler->mode32)
872 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
873 else if (op >= SLJIT_DIVMOD_UW)
874 *inst++ = REX_B;
875 *inst++ = GROUP_F7;
876 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
877 #else
878 if (!compiler->mode32)
879 *inst++ = REX_W;
880 *inst++ = GROUP_F7;
881 *inst = MOD_REG | reg_map[SLJIT_R1];
882 #endif
883 #endif
884 switch (op) {
885 case SLJIT_LMUL_UW:
886 *inst |= MUL;
887 break;
888 case SLJIT_LMUL_SW:
889 *inst |= IMUL;
890 break;
891 case SLJIT_DIVMOD_UW:
892 case SLJIT_DIV_UW:
893 *inst |= DIV;
894 break;
895 case SLJIT_DIVMOD_SW:
896 case SLJIT_DIV_SW:
897 *inst |= IDIV;
898 break;
899 }
900 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
901 if (op <= SLJIT_DIVMOD_SW)
902 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
903 #else
904 if (op >= SLJIT_DIV_UW)
905 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
906 #endif
907 break;
908 }
909
910 return SLJIT_SUCCESS;
911 }
912
913 #define ENCODE_PREFIX(prefix) \
914 do { \
915 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
916 FAIL_IF(!inst); \
917 INC_SIZE(1); \
918 *inst = (prefix); \
919 } while (0)
920
921 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
922 sljit_s32 dst, sljit_sw dstw,
923 sljit_s32 src, sljit_sw srcw)
924 {
925 sljit_u8* inst;
926 sljit_s32 dst_r;
927 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
928 sljit_s32 work_r;
929 #endif
930
931 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
932 compiler->mode32 = 0;
933 #endif
934
935 if (src & SLJIT_IMM) {
936 if (FAST_IS_REG(dst)) {
937 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
938 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
939 #else
940 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
941 FAIL_IF(!inst);
942 *inst = MOV_rm_i32;
943 return SLJIT_SUCCESS;
944 #endif
945 }
946 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
947 FAIL_IF(!inst);
948 *inst = MOV_rm8_i8;
949 return SLJIT_SUCCESS;
950 }
951
952 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
953
954 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
955 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
956 if (reg_map[src] >= 4) {
957 SLJIT_ASSERT(dst_r == TMP_REG1);
958 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
959 } else
960 dst_r = src;
961 #else
962 dst_r = src;
963 #endif
964 }
965 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
966 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
967 /* src, dst are registers. */
968 SLJIT_ASSERT(SLOW_IS_REG(dst));
969 if (reg_map[dst] < 4) {
970 if (dst != src)
971 EMIT_MOV(compiler, dst, 0, src, 0);
972 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
973 FAIL_IF(!inst);
974 *inst++ = GROUP_0F;
975 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
976 }
977 else {
978 if (dst != src)
979 EMIT_MOV(compiler, dst, 0, src, 0);
980 if (sign) {
981 /* shl reg, 24 */
982 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
983 FAIL_IF(!inst);
984 *inst |= SHL;
985 /* sar reg, 24 */
986 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
987 FAIL_IF(!inst);
988 *inst |= SAR;
989 }
990 else {
991 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
992 FAIL_IF(!inst);
993 *(inst + 1) |= AND;
994 }
995 }
996 return SLJIT_SUCCESS;
997 }
998 #endif
999 else {
1000 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
1001 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1002 FAIL_IF(!inst);
1003 *inst++ = GROUP_0F;
1004 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
1005 }
1006
1007 if (dst & SLJIT_MEM) {
1008 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1009 if (dst_r == TMP_REG1) {
1010 /* Find a non-used register, whose reg_map[src] < 4. */
1011 if ((dst & REG_MASK) == SLJIT_R0) {
1012 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
1013 work_r = SLJIT_R2;
1014 else
1015 work_r = SLJIT_R1;
1016 }
1017 else {
1018 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1019 work_r = SLJIT_R0;
1020 else if ((dst & REG_MASK) == SLJIT_R1)
1021 work_r = SLJIT_R2;
1022 else
1023 work_r = SLJIT_R1;
1024 }
1025
1026 if (work_r == SLJIT_R0) {
1027 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1028 }
1029 else {
1030 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1031 FAIL_IF(!inst);
1032 *inst = XCHG_r_rm;
1033 }
1034
1035 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
1036 FAIL_IF(!inst);
1037 *inst = MOV_rm8_r8;
1038
1039 if (work_r == SLJIT_R0) {
1040 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1041 }
1042 else {
1043 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1044 FAIL_IF(!inst);
1045 *inst = XCHG_r_rm;
1046 }
1047 }
1048 else {
1049 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1050 FAIL_IF(!inst);
1051 *inst = MOV_rm8_r8;
1052 }
1053 #else
1054 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1055 FAIL_IF(!inst);
1056 *inst = MOV_rm8_r8;
1057 #endif
1058 }
1059
1060 return SLJIT_SUCCESS;
1061 }
1062
1063 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1064 sljit_s32 src, sljit_sw srcw)
1065 {
1066 sljit_u8* inst;
1067
1068 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1069 compiler->mode32 = 1;
1070 #endif
1071
1072 inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1073 FAIL_IF(!inst);
1074 *inst++ = GROUP_0F;
1075 *inst++ = PREFETCH;
1076
1077 if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
1078 *inst |= (3 << 3);
1079 else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
1080 *inst |= (2 << 3);
1081 else
1082 *inst |= (1 << 3);
1083
1084 return SLJIT_SUCCESS;
1085 }
1086
1087 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1088 sljit_s32 dst, sljit_sw dstw,
1089 sljit_s32 src, sljit_sw srcw)
1090 {
1091 sljit_u8* inst;
1092 sljit_s32 dst_r;
1093
1094 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1095 compiler->mode32 = 0;
1096 #endif
1097
1098 if (src & SLJIT_IMM) {
1099 if (FAST_IS_REG(dst)) {
1100 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1101 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1102 #else
1103 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1104 FAIL_IF(!inst);
1105 *inst = MOV_rm_i32;
1106 return SLJIT_SUCCESS;
1107 #endif
1108 }
1109 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1110 FAIL_IF(!inst);
1111 *inst = MOV_rm_i32;
1112 return SLJIT_SUCCESS;
1113 }
1114
1115 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1116
1117 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1118 dst_r = src;
1119 else {
1120 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1121 FAIL_IF(!inst);
1122 *inst++ = GROUP_0F;
1123 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1124 }
1125
1126 if (dst & SLJIT_MEM) {
1127 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1128 FAIL_IF(!inst);
1129 *inst = MOV_rm_r;
1130 }
1131
1132 return SLJIT_SUCCESS;
1133 }
1134
1135 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1136 sljit_s32 dst, sljit_sw dstw,
1137 sljit_s32 src, sljit_sw srcw)
1138 {
1139 sljit_u8* inst;
1140
1141 if (dst == src && dstw == srcw) {
1142 /* Same input and output */
1143 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1144 FAIL_IF(!inst);
1145 *inst++ = GROUP_F7;
1146 *inst |= opcode;
1147 return SLJIT_SUCCESS;
1148 }
1149
1150 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
1151 dst = TMP_REG1;
1152
1153 if (FAST_IS_REG(dst)) {
1154 EMIT_MOV(compiler, dst, 0, src, srcw);
1155 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1156 FAIL_IF(!inst);
1157 *inst++ = GROUP_F7;
1158 *inst |= opcode;
1159 return SLJIT_SUCCESS;
1160 }
1161
1162 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1163 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1164 FAIL_IF(!inst);
1165 *inst++ = GROUP_F7;
1166 *inst |= opcode;
1167 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1168 return SLJIT_SUCCESS;
1169 }
1170
1171 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1172 sljit_s32 dst, sljit_sw dstw,
1173 sljit_s32 src, sljit_sw srcw)
1174 {
1175 sljit_u8* inst;
1176
1177 if (dst == SLJIT_UNUSED)
1178 dst = TMP_REG1;
1179
1180 if (FAST_IS_REG(dst)) {
1181 EMIT_MOV(compiler, dst, 0, src, srcw);
1182 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1183 FAIL_IF(!inst);
1184 *inst++ = GROUP_F7;
1185 *inst |= NOT_rm;
1186 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1187 FAIL_IF(!inst);
1188 *inst = OR_r_rm;
1189 return SLJIT_SUCCESS;
1190 }
1191
1192 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1193 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1194 FAIL_IF(!inst);
1195 *inst++ = GROUP_F7;
1196 *inst |= NOT_rm;
1197 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1198 FAIL_IF(!inst);
1199 *inst = OR_r_rm;
1200 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1201 return SLJIT_SUCCESS;
1202 }
1203
1204 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1205 static const sljit_sw emit_clz_arg = 32 + 31;
1206 #endif
1207
1208 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
1209 sljit_s32 dst, sljit_sw dstw,
1210 sljit_s32 src, sljit_sw srcw)
1211 {
1212 sljit_u8* inst;
1213 sljit_s32 dst_r;
1214
1215 SLJIT_UNUSED_ARG(op_flags);
1216
1217 if (cpu_has_cmov == -1)
1218 get_cpu_features();
1219
1220 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1221
1222 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1223 FAIL_IF(!inst);
1224 *inst++ = GROUP_0F;
1225 *inst = BSR_r_rm;
1226
1227 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1228 if (cpu_has_cmov) {
1229 if (dst_r != TMP_REG1) {
1230 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
1231 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1232 }
1233 else
1234 inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
1235
1236 FAIL_IF(!inst);
1237 *inst++ = GROUP_0F;
1238 *inst = CMOVE_r_rm;
1239 }
1240 else
1241 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
1242
1243 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1244 #else
1245 if (cpu_has_cmov) {
1246 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31));
1247
1248 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1249 FAIL_IF(!inst);
1250 *inst++ = GROUP_0F;
1251 *inst = CMOVE_r_rm;
1252 }
1253 else
1254 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)));
1255
1256 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
1257 #endif
1258
1259 FAIL_IF(!inst);
1260 *(inst + 1) |= XOR;
1261
1262 if (dst & SLJIT_MEM)
1263 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1264 return SLJIT_SUCCESS;
1265 }
1266
1267 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1268 sljit_s32 dst, sljit_sw dstw,
1269 sljit_s32 src, sljit_sw srcw)
1270 {
1271 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1272 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1273 sljit_s32 dst_is_ereg = 0;
1274 #endif
1275
1276 CHECK_ERROR();
1277 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1278 ADJUST_LOCAL_OFFSET(dst, dstw);
1279 ADJUST_LOCAL_OFFSET(src, srcw);
1280
1281 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1282 CHECK_EXTRA_REGS(src, srcw, (void)0);
1283 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1284 compiler->mode32 = op_flags & SLJIT_I32_OP;
1285 #endif
1286
1287 if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
1288 if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
1289 return emit_prefetch(compiler, op, src, srcw);
1290 return SLJIT_SUCCESS;
1291 }
1292
1293 op = GET_OPCODE(op);
1294
1295 if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1296 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1297 compiler->mode32 = 0;
1298 #endif
1299
1300 if (FAST_IS_REG(src) && src == dst) {
1301 if (!TYPE_CAST_NEEDED(op))
1302 return SLJIT_SUCCESS;
1303 }
1304
1305 if (op_flags & SLJIT_I32_OP) {
1306 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1307 if (src & SLJIT_MEM) {
1308 if (op == SLJIT_MOV_S32)
1309 op = SLJIT_MOV_U32;
1310 }
1311 else if (src & SLJIT_IMM) {
1312 if (op == SLJIT_MOV_U32)
1313 op = SLJIT_MOV_S32;
1314 }
1315 #endif
1316 }
1317
1318 if (src & SLJIT_IMM) {
1319 switch (op) {
1320 case SLJIT_MOV_U8:
1321 srcw = (sljit_u8)srcw;
1322 break;
1323 case SLJIT_MOV_S8:
1324 srcw = (sljit_s8)srcw;
1325 break;
1326 case SLJIT_MOV_U16:
1327 srcw = (sljit_u16)srcw;
1328 break;
1329 case SLJIT_MOV_S16:
1330 srcw = (sljit_s16)srcw;
1331 break;
1332 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1333 case SLJIT_MOV_U32:
1334 srcw = (sljit_u32)srcw;
1335 break;
1336 case SLJIT_MOV_S32:
1337 srcw = (sljit_s32)srcw;
1338 break;
1339 #endif
1340 }
1341 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1342 if (SLJIT_UNLIKELY(dst_is_ereg))
1343 return emit_mov(compiler, dst, dstw, src, srcw);
1344 #endif
1345 }
1346
1347 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1348 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1349 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1350 dst = TMP_REG1;
1351 }
1352 #endif
1353
1354 switch (op) {
1355 case SLJIT_MOV:
1356 case SLJIT_MOV_P:
1357 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1358 case SLJIT_MOV_U32:
1359 case SLJIT_MOV_S32:
1360 #endif
1361 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1362 break;
1363 case SLJIT_MOV_U8:
1364 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1365 break;
1366 case SLJIT_MOV_S8:
1367 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1368 break;
1369 case SLJIT_MOV_U16:
1370 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1371 break;
1372 case SLJIT_MOV_S16:
1373 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1374 break;
1375 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1376 case SLJIT_MOV_U32:
1377 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1378 break;
1379 case SLJIT_MOV_S32:
1380 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1381 break;
1382 #endif
1383 }
1384
1385 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1386 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1387 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1388 #endif
1389 return SLJIT_SUCCESS;
1390 }
1391
1392 switch (op) {
1393 case SLJIT_NOT:
1394 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
1395 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1396 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1397
1398 case SLJIT_NEG:
1399 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1400
1401 case SLJIT_CLZ:
1402 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1403 }
1404
1405 return SLJIT_SUCCESS;
1406 }
1407
1408 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1409
1410 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1411 if (IS_HALFWORD(immw) || compiler->mode32) { \
1412 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1413 FAIL_IF(!inst); \
1414 *(inst + 1) |= (op_imm); \
1415 } \
1416 else { \
1417 FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
1418 inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
1419 FAIL_IF(!inst); \
1420 *inst = (op_mr); \
1421 }
1422
1423 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1424 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1425
1426 #else
1427
1428 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1429 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1430 FAIL_IF(!inst); \
1431 *(inst + 1) |= (op_imm);
1432
1433 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1434 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1435
1436 #endif
1437
1438 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1439 sljit_u32 op_types,
1440 sljit_s32 dst, sljit_sw dstw,
1441 sljit_s32 src1, sljit_sw src1w,
1442 sljit_s32 src2, sljit_sw src2w)
1443 {
1444 sljit_u8* inst;
1445 sljit_u8 op_eax_imm = (op_types >> 24);
1446 sljit_u8 op_rm = (op_types >> 16) & 0xff;
1447 sljit_u8 op_mr = (op_types >> 8) & 0xff;
1448 sljit_u8 op_imm = op_types & 0xff;
1449
1450 if (dst == SLJIT_UNUSED) {
1451 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1452 if (src2 & SLJIT_IMM) {
1453 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1454 }
1455 else {
1456 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1457 FAIL_IF(!inst);
1458 *inst = op_rm;
1459 }
1460 return SLJIT_SUCCESS;
1461 }
1462
1463 if (dst == src1 && dstw == src1w) {
1464 if (src2 & SLJIT_IMM) {
1465 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1466 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1467 #else
1468 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1469 #endif
1470 BINARY_EAX_IMM(op_eax_imm, src2w);
1471 }
1472 else {
1473 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1474 }
1475 }
1476 else if (FAST_IS_REG(dst)) {
1477 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1478 FAIL_IF(!inst);
1479 *inst = op_rm;
1480 }
1481 else if (FAST_IS_REG(src2)) {
1482 /* Special exception for sljit_emit_op_flags. */
1483 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1484 FAIL_IF(!inst);
1485 *inst = op_mr;
1486 }
1487 else {
1488 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1489 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1490 FAIL_IF(!inst);
1491 *inst = op_mr;
1492 }
1493 return SLJIT_SUCCESS;
1494 }
1495
1496 /* Only for cumulative operations. */
1497 if (dst == src2 && dstw == src2w) {
1498 if (src1 & SLJIT_IMM) {
1499 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1500 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1501 #else
1502 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1503 #endif
1504 BINARY_EAX_IMM(op_eax_imm, src1w);
1505 }
1506 else {
1507 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1508 }
1509 }
1510 else if (FAST_IS_REG(dst)) {
1511 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1512 FAIL_IF(!inst);
1513 *inst = op_rm;
1514 }
1515 else if (FAST_IS_REG(src1)) {
1516 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1517 FAIL_IF(!inst);
1518 *inst = op_mr;
1519 }
1520 else {
1521 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1522 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1523 FAIL_IF(!inst);
1524 *inst = op_mr;
1525 }
1526 return SLJIT_SUCCESS;
1527 }
1528
1529 /* General version. */
1530 if (FAST_IS_REG(dst)) {
1531 EMIT_MOV(compiler, dst, 0, src1, src1w);
1532 if (src2 & SLJIT_IMM) {
1533 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1534 }
1535 else {
1536 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1537 FAIL_IF(!inst);
1538 *inst = op_rm;
1539 }
1540 }
1541 else {
1542 /* This version requires less memory writing. */
1543 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1544 if (src2 & SLJIT_IMM) {
1545 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1546 }
1547 else {
1548 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1549 FAIL_IF(!inst);
1550 *inst = op_rm;
1551 }
1552 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1553 }
1554
1555 return SLJIT_SUCCESS;
1556 }
1557
1558 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1559 sljit_u32 op_types,
1560 sljit_s32 dst, sljit_sw dstw,
1561 sljit_s32 src1, sljit_sw src1w,
1562 sljit_s32 src2, sljit_sw src2w)
1563 {
1564 sljit_u8* inst;
1565 sljit_u8 op_eax_imm = (op_types >> 24);
1566 sljit_u8 op_rm = (op_types >> 16) & 0xff;
1567 sljit_u8 op_mr = (op_types >> 8) & 0xff;
1568 sljit_u8 op_imm = op_types & 0xff;
1569
1570 if (dst == SLJIT_UNUSED) {
1571 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1572 if (src2 & SLJIT_IMM) {
1573 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1574 }
1575 else {
1576 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1577 FAIL_IF(!inst);
1578 *inst = op_rm;
1579 }
1580 return SLJIT_SUCCESS;
1581 }
1582
1583 if (dst == src1 && dstw == src1w) {
1584 if (src2 & SLJIT_IMM) {
1585 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1586 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1587 #else
1588 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1589 #endif
1590 BINARY_EAX_IMM(op_eax_imm, src2w);
1591 }
1592 else {
1593 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1594 }
1595 }
1596 else if (FAST_IS_REG(dst)) {
1597 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1598 FAIL_IF(!inst);
1599 *inst = op_rm;
1600 }
1601 else if (FAST_IS_REG(src2)) {
1602 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1603 FAIL_IF(!inst);
1604 *inst = op_mr;
1605 }
1606 else {
1607 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1608 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1609 FAIL_IF(!inst);
1610 *inst = op_mr;
1611 }
1612 return SLJIT_SUCCESS;
1613 }
1614
1615 /* General version. */
1616 if (FAST_IS_REG(dst) && dst != src2) {
1617 EMIT_MOV(compiler, dst, 0, src1, src1w);
1618 if (src2 & SLJIT_IMM) {
1619 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1620 }
1621 else {
1622 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1623 FAIL_IF(!inst);
1624 *inst = op_rm;
1625 }
1626 }
1627 else {
1628 /* This version requires less memory writing. */
1629 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1630 if (src2 & SLJIT_IMM) {
1631 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1632 }
1633 else {
1634 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1635 FAIL_IF(!inst);
1636 *inst = op_rm;
1637 }
1638 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1639 }
1640
1641 return SLJIT_SUCCESS;
1642 }
1643
1644 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1645 sljit_s32 dst, sljit_sw dstw,
1646 sljit_s32 src1, sljit_sw src1w,
1647 sljit_s32 src2, sljit_sw src2w)
1648 {
1649 sljit_u8* inst;
1650 sljit_s32 dst_r;
1651
1652 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1653
1654 /* Register destination. */
1655 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1656 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1657 FAIL_IF(!inst);
1658 *inst++ = GROUP_0F;
1659 *inst = IMUL_r_rm;
1660 }
1661 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1662 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1663 FAIL_IF(!inst);
1664 *inst++ = GROUP_0F;
1665 *inst = IMUL_r_rm;
1666 }
1667 else if (src1 & SLJIT_IMM) {
1668 if (src2 & SLJIT_IMM) {
1669 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1670 src2 = dst_r;
1671 src2w = 0;
1672 }
1673
1674 if (src1w <= 127 && src1w >= -128) {
1675 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1676 FAIL_IF(!inst);
1677 *inst = IMUL_r_rm_i8;
1678 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1679 FAIL_IF(!inst);
1680 INC_SIZE(1);
1681 *inst = (sljit_s8)src1w;
1682 }
1683 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1684 else {
1685 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1686 FAIL_IF(!inst);
1687 *inst = IMUL_r_rm_i32;
1688 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1689 FAIL_IF(!inst);
1690 INC_SIZE(4);
1691 sljit_unaligned_store_sw(inst, src1w);
1692 }
1693 #else
1694 else if (IS_HALFWORD(src1w)) {
1695 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1696 FAIL_IF(!inst);
1697 *inst = IMUL_r_rm_i32;
1698 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1699 FAIL_IF(!inst);
1700 INC_SIZE(4);
1701 sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
1702 }
1703 else {
1704 if (dst_r != src2)
1705 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1706 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1707 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1708 FAIL_IF(!inst);
1709 *inst++ = GROUP_0F;
1710 *inst = IMUL_r_rm;
1711 }
1712 #endif
1713 }
1714 else if (src2 & SLJIT_IMM) {
1715 /* Note: src1 is NOT immediate. */
1716
1717 if (src2w <= 127 && src2w >= -128) {
1718 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1719 FAIL_IF(!inst);
1720 *inst = IMUL_r_rm_i8;
1721 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1722 FAIL_IF(!inst);
1723 INC_SIZE(1);
1724 *inst = (sljit_s8)src2w;
1725 }
1726 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1727 else {
1728 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1729 FAIL_IF(!inst);
1730 *inst = IMUL_r_rm_i32;
1731 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1732 FAIL_IF(!inst);
1733 INC_SIZE(4);
1734 sljit_unaligned_store_sw(inst, src2w);
1735 }
1736 #else
1737 else if (IS_HALFWORD(src2w)) {
1738 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1739 FAIL_IF(!inst);
1740 *inst = IMUL_r_rm_i32;
1741 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1742 FAIL_IF(!inst);
1743 INC_SIZE(4);
1744 sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
1745 }
1746 else {
1747 if (dst_r != src1)
1748 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1749 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1750 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1751 FAIL_IF(!inst);
1752 *inst++ = GROUP_0F;
1753 *inst = IMUL_r_rm;
1754 }
1755 #endif
1756 }
1757 else {
1758 /* Neither argument is immediate. */
1759 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1760 dst_r = TMP_REG1;
1761 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1762 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1763 FAIL_IF(!inst);
1764 *inst++ = GROUP_0F;
1765 *inst = IMUL_r_rm;
1766 }
1767
1768 if (dst & SLJIT_MEM)
1769 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1770
1771 return SLJIT_SUCCESS;
1772 }
1773
1774 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
1775 sljit_s32 dst, sljit_sw dstw,
1776 sljit_s32 src1, sljit_sw src1w,
1777 sljit_s32 src2, sljit_sw src2w)
1778 {
1779 sljit_u8* inst;
1780 sljit_s32 dst_r, done = 0;
1781
1782 /* These cases better be left to handled by normal way. */
1783 if (dst == src1 && dstw == src1w)
1784 return SLJIT_ERR_UNSUPPORTED;
1785 if (dst == src2 && dstw == src2w)
1786 return SLJIT_ERR_UNSUPPORTED;
1787
1788 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1789
1790 if (FAST_IS_REG(src1)) {
1791 if (FAST_IS_REG(src2)) {
1792 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1793 FAIL_IF(!inst);
1794 *inst = LEA_r_m;
1795 done = 1;
1796 }
1797 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1798 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1799 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
1800 #else
1801 if (src2 & SLJIT_IMM) {
1802 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1803 #endif
1804 FAIL_IF(!inst);
1805 *inst = LEA_r_m;
1806 done = 1;
1807 }
1808 }
1809 else if (FAST_IS_REG(src2)) {
1810 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1811 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1812 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
1813 #else
1814 if (src1 & SLJIT_IMM) {
1815 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1816 #endif
1817 FAIL_IF(!inst);
1818 *inst = LEA_r_m;
1819 done = 1;
1820 }
1821 }
1822
1823 if (done) {
1824 if (dst_r == TMP_REG1)
1825 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1826 return SLJIT_SUCCESS;
1827 }
1828 return SLJIT_ERR_UNSUPPORTED;
1829 }
1830
1831 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1832 sljit_s32 src1, sljit_sw src1w,
1833 sljit_s32 src2, sljit_sw src2w)
1834 {
1835 sljit_u8* inst;
1836
1837 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1838 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1839 #else
1840 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1841 #endif
1842 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1843 return SLJIT_SUCCESS;
1844 }
1845
1846 if (FAST_IS_REG(src1)) {
1847 if (src2 & SLJIT_IMM) {
1848 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1849 }
1850 else {
1851 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1852 FAIL_IF(!inst);
1853 *inst = CMP_r_rm;
1854 }
1855 return SLJIT_SUCCESS;
1856 }
1857
1858 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1859 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1860 FAIL_IF(!inst);
1861 *inst = CMP_rm_r;
1862 return SLJIT_SUCCESS;
1863 }
1864
1865 if (src2 & SLJIT_IMM) {
1866 if (src1 & SLJIT_IMM) {
1867 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1868 src1 = TMP_REG1;
1869 src1w = 0;
1870 }
1871 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1872 }
1873 else {
1874 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1875 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1876 FAIL_IF(!inst);
1877 *inst = CMP_r_rm;
1878 }
1879 return SLJIT_SUCCESS;
1880 }
1881
1882 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
1883 sljit_s32 src1, sljit_sw src1w,
1884 sljit_s32 src2, sljit_sw src2w)
1885 {
1886 sljit_u8* inst;
1887
1888 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1889 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1890 #else
1891 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1892 #endif
1893 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1894 return SLJIT_SUCCESS;
1895 }
1896
1897 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1898 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1899 #else
1900 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1901 #endif
1902 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1903 return SLJIT_SUCCESS;
1904 }
1905
1906 if (!(src1 & SLJIT_IMM)) {
1907 if (src2 & SLJIT_IMM) {
1908 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1909 if (IS_HALFWORD(src2w) || compiler->mode32) {
1910 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1911 FAIL_IF(!inst);
1912 *inst = GROUP_F7;
1913 }
1914 else {
1915 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
1916 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
1917 FAIL_IF(!inst);
1918 *inst = TEST_rm_r;
1919 }
1920 #else
1921 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1922 FAIL_IF(!inst);
1923 *inst = GROUP_F7;
1924 #endif
1925 return SLJIT_SUCCESS;
1926 }
1927 else if (FAST_IS_REG(src1)) {
1928 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1929 FAIL_IF(!inst);
1930 *inst = TEST_rm_r;
1931 return SLJIT_SUCCESS;
1932 }
1933 }
1934
1935 if (!(src2 & SLJIT_IMM)) {
1936 if (src1 & SLJIT_IMM) {
1937 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1938 if (IS_HALFWORD(src1w) || compiler->mode32) {
1939 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
1940 FAIL_IF(!inst);
1941 *inst = GROUP_F7;
1942 }
1943 else {
1944 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
1945 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1946 FAIL_IF(!inst);
1947 *inst = TEST_rm_r;
1948 }
1949 #else
1950 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
1951 FAIL_IF(!inst);
1952 *inst = GROUP_F7;
1953 #endif
1954 return SLJIT_SUCCESS;
1955 }
1956 else if (FAST_IS_REG(src2)) {
1957 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1958 FAIL_IF(!inst);
1959 *inst = TEST_rm_r;
1960 return SLJIT_SUCCESS;
1961 }
1962 }
1963
1964 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1965 if (src2 & SLJIT_IMM) {
1966 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1967 if (IS_HALFWORD(src2w) || compiler->mode32) {
1968 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1969 FAIL_IF(!inst);
1970 *inst = GROUP_F7;
1971 }
1972 else {
1973 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1974 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1975 FAIL_IF(!inst);
1976 *inst = TEST_rm_r;
1977 }
1978 #else
1979 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1980 FAIL_IF(!inst);
1981 *inst = GROUP_F7;
1982 #endif
1983 }
1984 else {
1985 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1986 FAIL_IF(!inst);
1987 *inst = TEST_rm_r;
1988 }
1989 return SLJIT_SUCCESS;
1990 }
1991
1992 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
1993 sljit_u8 mode,
1994 sljit_s32 dst, sljit_sw dstw,
1995 sljit_s32 src1, sljit_sw src1w,
1996 sljit_s32 src2, sljit_sw src2w)
1997 {
1998 sljit_u8* inst;
1999
2000 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2001 if (dst == src1 && dstw == src1w) {
2002 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2003 FAIL_IF(!inst);
2004 *inst |= mode;
2005 return SLJIT_SUCCESS;
2006 }
2007 if (dst == SLJIT_UNUSED) {
2008 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2009 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2010 FAIL_IF(!inst);
2011 *inst |= mode;
2012 return SLJIT_SUCCESS;
2013 }
2014 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2015 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2016 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2017 FAIL_IF(!inst);
2018 *inst |= mode;
2019 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2020 return SLJIT_SUCCESS;
2021 }
2022 if (FAST_IS_REG(dst)) {
2023 EMIT_MOV(compiler, dst, 0, src1, src1w);
2024 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2025 FAIL_IF(!inst);
2026 *inst |= mode;
2027 return SLJIT_SUCCESS;
2028 }
2029
2030 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2031 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2032 FAIL_IF(!inst);
2033 *inst |= mode;
2034 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2035 return SLJIT_SUCCESS;
2036 }
2037
2038 if (dst == SLJIT_PREF_SHIFT_REG) {
2039 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2040 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2041 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2042 FAIL_IF(!inst);
2043 *inst |= mode;
2044 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2045 }
2046 else if (SLOW_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2047 if (src1 != dst)
2048 EMIT_MOV(compiler, dst, 0, src1, src1w);
2049 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2050 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2051 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2052 FAIL_IF(!inst);
2053 *inst |= mode;
2054 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2055 }
2056 else {
2057 /* This case is complex since ecx itself may be used for
2058 addressing, and this case must be supported as well. */
2059 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2060 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2061 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2062 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2063 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2064 FAIL_IF(!inst);
2065 *inst |= mode;
2066 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2067 #else
2068 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2069 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2070 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2071 FAIL_IF(!inst);
2072 *inst |= mode;
2073 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2074 #endif
2075 if (dst != SLJIT_UNUSED)
2076 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2077 }
2078
2079 return SLJIT_SUCCESS;
2080 }
2081
2082 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2083 sljit_u8 mode, sljit_s32 set_flags,
2084 sljit_s32 dst, sljit_sw dstw,
2085 sljit_s32 src1, sljit_sw src1w,
2086 sljit_s32 src2, sljit_sw src2w)
2087 {
2088 /* The CPU does not set flags if the shift count is 0. */
2089 if (src2 & SLJIT_IMM) {
2090 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2091 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2092 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2093 #else
2094 if ((src2w & 0x1f) != 0)
2095 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2096 #endif
2097 if (!set_flags)
2098 return emit_mov(compiler, dst, dstw, src1, src1w);
2099 /* OR dst, src, 0 */
2100 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2101 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2102 }
2103
2104 if (!set_flags)
2105 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2106
2107 if (!FAST_IS_REG(dst))
2108 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2109
2110 FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
2111
2112 if (FAST_IS_REG(dst))
2113 return emit_cmp_binary(compiler, (dst == SLJIT_UNUSED) ? TMP_REG1 : dst, dstw, SLJIT_IMM, 0);
2114 return SLJIT_SUCCESS;
2115 }
2116
2117 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2118 sljit_s32 dst, sljit_sw dstw,
2119 sljit_s32 src1, sljit_sw src1w,
2120 sljit_s32 src2, sljit_sw src2w)
2121 {
2122 CHECK_ERROR();
2123 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2124 ADJUST_LOCAL_OFFSET(dst, dstw);
2125 ADJUST_LOCAL_OFFSET(src1, src1w);
2126 ADJUST_LOCAL_OFFSET(src2, src2w);
2127
2128 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2129 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2130 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2131 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2132 compiler->mode32 = op & SLJIT_I32_OP;
2133 #endif
2134
2135 if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
2136 return SLJIT_SUCCESS;
2137
2138 switch (GET_OPCODE(op)) {
2139 case SLJIT_ADD:
2140 if (!HAS_FLAGS(op)) {
2141 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2142 return compiler->error;
2143 }
2144 return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
2145 dst, dstw, src1, src1w, src2, src2w);
2146 case SLJIT_ADDC:
2147 return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
2148 dst, dstw, src1, src1w, src2, src2w);
2149 case SLJIT_SUB:
2150 if (!HAS_FLAGS(op)) {
2151 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2152 return compiler->error;
2153 }
2154
2155 if (dst == SLJIT_UNUSED)
2156 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2157 return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
2158 dst, dstw, src1, src1w, src2, src2w);
2159 case SLJIT_SUBC:
2160 return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
2161 dst, dstw, src1, src1w, src2, src2w);
2162 case SLJIT_MUL:
2163 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2164 case SLJIT_AND:
2165 if (dst == SLJIT_UNUSED)
2166 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2167 return emit_cum_binary(compiler, BINARY_OPCODE(AND),
2168 dst, dstw, src1, src1w, src2, src2w);
2169 case SLJIT_OR:
2170 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2171 dst, dstw, src1, src1w, src2, src2w);
2172 case SLJIT_XOR:
2173 return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
2174 dst, dstw, src1, src1w, src2, src2w);
2175 case SLJIT_SHL:
2176 return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2177 dst, dstw, src1, src1w, src2, src2w);
2178 case SLJIT_LSHR:
2179 return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2180 dst, dstw, src1, src1w, src2, src2w);
2181 case SLJIT_ASHR:
2182 return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2183 dst, dstw, src1, src1w, src2, src2w);
2184 }
2185
2186 return SLJIT_SUCCESS;
2187 }
2188
2189 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2190 {
2191 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2192 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2193 if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
2194 return -1;
2195 #endif
2196 return reg_map[reg];
2197 }
2198
2199 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2200 {
2201 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2202 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2203 return reg;
2204 #else
2205 return freg_map[reg];
2206 #endif
2207 }
2208
2209 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2210 void *instruction, sljit_s32 size)
2211 {
2212 sljit_u8 *inst;
2213
2214 CHECK_ERROR();
2215 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2216
2217 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2218 FAIL_IF(!inst);
2219 INC_SIZE(size);
2220 SLJIT_MEMCPY(inst, instruction, size);
2221 return SLJIT_SUCCESS;
2222 }
2223
2224 /* --------------------------------------------------------------------- */
2225 /* Floating point operators */
2226 /* --------------------------------------------------------------------- */
2227
2228 /* Alignment(3) + 4 * 16 bytes. */
2229 static sljit_s32 sse2_data[3 + (4 * 4)];
2230 static sljit_s32 *sse2_buffer;
2231
2232 static void init_compiler(void)
2233 {
2234 /* Align to 16 bytes. */
2235 sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
2236
2237 /* Single precision constants (each constant is 16 byte long). */
2238 sse2_buffer[0] = 0x80000000;
2239 sse2_buffer[4] = 0x7fffffff;
2240 /* Double precision constants (each constant is 16 byte long). */
2241 sse2_buffer[8] = 0;
2242 sse2_buffer[9] = 0x80000000;
2243 sse2_buffer[12] = 0xffffffff;
2244 sse2_buffer[13] = 0x7fffffff;
2245 }
2246
2247 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2248 sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2249 {
2250 sljit_u8 *inst;
2251
2252 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2253 FAIL_IF(!inst);
2254 *inst++ = GROUP_0F;
2255 *inst = opcode;
2256 return SLJIT_SUCCESS;
2257 }
2258
2259 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2260 sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2261 {
2262 sljit_u8 *inst;
2263
2264 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2265 FAIL_IF(!inst);
2266 *inst++ = GROUP_0F;
2267 *inst = opcode;
2268 return SLJIT_SUCCESS;
2269 }
2270
2271 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2272 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2273 {
2274 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2275 }
2276
2277 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2278 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2279 {
2280 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2281 }
2282
2283 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2284 sljit_s32 dst, sljit_sw dstw,
2285 sljit_s32 src, sljit_sw srcw)
2286 {
2287 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2288 sljit_u8 *inst;
2289
2290 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2291 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2292 compiler->mode32 = 0;
2293 #endif
2294
2295 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2296 FAIL_IF(!inst);
2297 *inst++ = GROUP_0F;
2298 *inst = CVTTSD2SI_r_xm;
2299
2300 if (dst & SLJIT_MEM)
2301 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2302 return SLJIT_SUCCESS;
2303 }
2304
2305 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2306 sljit_s32 dst, sljit_sw dstw,
2307 sljit_s32 src, sljit_sw srcw)
2308 {
2309 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2310 sljit_u8 *inst;
2311
2312 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2313 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2314 compiler->mode32 = 0;
2315 #endif
2316
2317 if (src & SLJIT_IMM) {
2318 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2319 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2320 srcw = (sljit_s32)srcw;
2321 #endif
2322 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2323 src = TMP_REG1;
2324 srcw = 0;
2325 }
2326
2327 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2328 FAIL_IF(!inst);
2329 *inst++ = GROUP_0F;
2330 *inst = CVTSI2SD_x_rm;
2331
2332 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2333 compiler->mode32 = 1;
2334 #endif
2335 if (dst_r == TMP_FREG)
2336 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2337 return SLJIT_SUCCESS;
2338 }
2339
2340 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2341 sljit_s32 src1, sljit_sw src1w,
2342 sljit_s32 src2, sljit_sw src2w)
2343 {
2344 if (!FAST_IS_REG(src1)) {
2345 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2346 src1 = TMP_FREG;
2347 }
2348
2349 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
2350 }
2351
2352 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2353 sljit_s32 dst, sljit_sw dstw,
2354 sljit_s32 src, sljit_sw srcw)
2355 {
2356 sljit_s32 dst_r;
2357
2358 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2359 compiler->mode32 = 1;
2360 #endif
2361
2362 CHECK_ERROR();
2363 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2364
2365 if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2366 if (FAST_IS_REG(dst))
2367 return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
2368 if (FAST_IS_REG(src))
2369 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
2370 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
2371 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2372 }
2373
2374 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2375 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2376 if (FAST_IS_REG(src)) {
2377 /* We overwrite the high bits of source. From SLJIT point of view,
2378 this is not an issue.
2379 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2380 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
2381 }
2382 else {
2383 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
2384 src = TMP_FREG;
2385 }
2386
2387 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
2388 if (dst_r == TMP_FREG)
2389 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2390 return SLJIT_SUCCESS;
2391 }
2392
2393 if (FAST_IS_REG(dst)) {
2394 dst_r = dst;
2395 if (dst != src)
2396 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2397 }
2398 else {
2399 dst_r = TMP_FREG;
2400 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2401 }
2402
2403 switch (GET_OPCODE(op)) {
2404 case SLJIT_NEG_F64:
2405 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
2406 break;
2407
2408 case SLJIT_ABS_F64:
2409 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2410 break;
2411 }
2412
2413 if (dst_r == TMP_FREG)
2414 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2415 return SLJIT_SUCCESS;
2416 }
2417
2418 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2419 sljit_s32 dst, sljit_sw dstw,
2420 sljit_s32 src1, sljit_sw src1w,
2421 sljit_s32 src2, sljit_sw src2w)
2422 {
2423 sljit_s32 dst_r;
2424
2425 CHECK_ERROR();
2426 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2427 ADJUST_LOCAL_OFFSET(dst, dstw);
2428 ADJUST_LOCAL_OFFSET(src1, src1w);
2429 ADJUST_LOCAL_OFFSET(src2, src2w);
2430
2431 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2432 compiler->mode32 = 1;
2433 #endif
2434
2435 if (FAST_IS_REG(dst)) {
2436 dst_r = dst;
2437 if (dst == src1)
2438 ; /* Do nothing here. */
2439 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2440 /* Swap arguments. */
2441 src2 = src1;
2442 src2w = src1w;
2443 }
2444 else if (dst != src2)
2445 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
2446 else {
2447 dst_r = TMP_FREG;
2448 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2449 }
2450 }
2451 else {
2452 dst_r = TMP_FREG;
2453 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2454 }
2455
2456 switch (GET_OPCODE(op)) {
2457 case SLJIT_ADD_F64:
2458 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2459 break;
2460
2461 case SLJIT_SUB_F64:
2462 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2463 break;
2464
2465 case SLJIT_MUL_F64:
2466 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2467 break;
2468
2469 case SLJIT_DIV_F64:
2470 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2471 break;
2472 }
2473
2474 if (dst_r == TMP_FREG)
2475 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2476 return SLJIT_SUCCESS;
2477 }
2478
2479 /* --------------------------------------------------------------------- */
2480 /* Conditional instructions */
2481 /* --------------------------------------------------------------------- */
2482
2483 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2484 {
2485 sljit_u8 *inst;
2486 struct sljit_label *label;
2487
2488 CHECK_ERROR_PTR();
2489 CHECK_PTR(check_sljit_emit_label(compiler));
2490
2491 if (compiler->last_label && compiler->last_label->size == compiler->size)
2492 return compiler->last_label;
2493
2494 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2495 PTR_FAIL_IF(!label);
2496 set_label(label, compiler);
2497
2498 inst = (sljit_u8*)ensure_buf(compiler, 2);
2499 PTR_FAIL_IF(!inst);
2500
2501 *inst++ = 0;
2502 *inst++ = 0;
2503
2504 return label;
2505 }
2506
2507 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2508 {
2509 sljit_u8 *inst;
2510 struct sljit_jump *jump;
2511
2512 CHECK_ERROR_PTR();
2513 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2514
2515 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2516 PTR_FAIL_IF_NULL(jump);
2517 set_jump(jump, compiler, (type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT));
2518
2519 /* Worst case size. */
2520 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2521 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2522 #else
2523 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2524 #endif
2525
2526 inst = (sljit_u8*)ensure_buf(compiler, 2);
2527 PTR_FAIL_IF_NULL(inst);
2528
2529 *inst++ = 0;
2530 *inst++ = 1;
2531 return jump;
2532 }
2533
2534 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2535 {
2536 sljit_u8 *inst;
2537 struct sljit_jump *jump;
2538
2539 CHECK_ERROR();
2540 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2541 ADJUST_LOCAL_OFFSET(src, srcw);
2542
2543 CHECK_EXTRA_REGS(src, srcw, (void)0);
2544
2545 if (src == SLJIT_IMM) {
2546 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2547 FAIL_IF_NULL(jump);
2548 set_jump(jump, compiler, JUMP_ADDR | (type << TYPE_SHIFT));
2549 jump->u.target = srcw;
2550
2551 /* Worst case size. */
2552 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2553 compiler->size += 5;
2554 #else
2555 compiler->size += 10 + 3;
2556 #endif
2557
2558 inst = (sljit_u8*)ensure_buf(compiler, 2);
2559 FAIL_IF_NULL(inst);
2560
2561 *inst++ = 0;
2562 *inst++ = 1;
2563 }
2564 else {
2565 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2566 /* REX_W is not necessary (src is not immediate). */
2567 compiler->mode32 = 1;
2568 #endif
2569 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2570 FAIL_IF(!inst);
2571 *inst++ = GROUP_FF;
2572 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2573 }
2574 return SLJIT_SUCCESS;
2575 }
2576
2577 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2578 sljit_s32 dst, sljit_sw dstw,
2579 sljit_s32 type)
2580 {
2581 sljit_u8 *inst;
2582 sljit_u8 cond_set = 0;
2583 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2584 sljit_s32 reg;
2585 #endif
2586 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
2587 sljit_s32 dst_save = dst;
2588 sljit_sw dstw_save = dstw;
2589
2590 CHECK_ERROR();
2591 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2592
2593 ADJUST_LOCAL_OFFSET(dst, dstw);
2594 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2595
2596 type &= 0xff;
2597 /* setcc = jcc + 0x10. */
2598 cond_set = get_jump_code(type) + 0x10;
2599
2600 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2601 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
2602 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
2603 FAIL_IF(!inst);
2604 INC_SIZE(4 + 3);
2605 /* Set low register to conditional flag. */
2606 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2607 *inst++ = GROUP_0F;
2608 *inst++ = cond_set;
2609 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2610 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2611 *inst++ = OR_rm8_r8;
2612 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2613 return SLJIT_SUCCESS;
2614 }
2615
2616 reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2617
2618 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
2619 FAIL_IF(!inst);
2620 INC_SIZE(4 + 4);
2621 /* Set low register to conditional flag. */
2622 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2623 *inst++ = GROUP_0F;
2624 *inst++ = cond_set;
2625 *inst++ = MOD_REG | reg_lmap[reg];
2626 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2627 /* The movzx instruction does not affect flags. */
2628 *inst++ = GROUP_0F;
2629 *inst++ = MOVZX_r_rm8;
2630 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2631
2632 if (reg != TMP_REG1)
2633 return SLJIT_SUCCESS;
2634
2635 if (GET_OPCODE(op) < SLJIT_ADD) {
2636 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2637 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2638 }
2639
2640 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2641 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2642 compiler->skip_checks = 1;
2643 #endif
2644 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2645
2646 #else
2647 /* The SLJIT_CONFIG_X86_32 code path starts here. */
2648 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2649 if (reg_map[dst] <= 4) {
2650 /* Low byte is accessible. */
2651 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
2652 FAIL_IF(!inst);
2653 INC_SIZE(3 + 3);
2654 /* Set low byte to conditional flag. */
2655 *inst++ = GROUP_0F;
2656 *inst++ = cond_set;
2657 *inst++ = MOD_REG | reg_map[dst];
2658
2659 *inst++ = GROUP_0F;
2660 *inst++ = MOVZX_r_rm8;
2661 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2662 return SLJIT_SUCCESS;
2663 }
2664
2665 /* Low byte is not accessible. */
2666 if (cpu_has_cmov == -1)
2667 get_cpu_features();
2668
2669 if (cpu_has_cmov) {
2670 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2671 /* a xor reg, reg operation would overwrite the flags. */
2672 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2673
2674 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
2675 FAIL_IF(!inst);
2676 INC_SIZE(3);
2677
2678 *inst++ = GROUP_0F;
2679 /* cmovcc = setcc - 0x50. */
2680 *inst++ = cond_set - 0x50;
2681 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2682 return SLJIT_SUCCESS;
2683 }
2684
2685 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2686 FAIL_IF(!inst);
2687 INC_SIZE(1 + 3 + 3 + 1);
2688 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2689 /* Set al to conditional flag. */
2690 *inst++ = GROUP_0F;
2691 *inst++ = cond_set;
2692 *inst++ = MOD_REG | 0 /* eax */;
2693
2694 *inst++ = GROUP_0F;
2695 *inst++ = MOVZX_r_rm8;
2696 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2697 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2698 return SLJIT_SUCCESS;
2699 }
2700
2701 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
2702 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
2703
2704 if (dst != SLJIT_R0) {
2705 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2706 FAIL_IF(!inst);
2707 INC_SIZE(1 + 3 + 2 + 1);
2708 /* Set low register to conditional flag. */
2709 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2710 *inst++ = GROUP_0F;
2711 *inst++ = cond_set;
2712 *inst++ = MOD_REG | 0 /* eax */;
2713 *inst++ = OR_rm8_r8;
2714 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2715 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2716 }
2717 else {
2718 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2719 FAIL_IF(!inst);
2720 INC_SIZE(2 + 3 + 2 + 2);
2721 /* Set low register to conditional flag. */
2722 *inst++ = XCHG_r_rm;
2723 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2724 *inst++ = GROUP_0F;
2725 *inst++ = cond_set;
2726 *inst++ = MOD_REG | 1 /* ecx */;
2727 *inst++ = OR_rm8_r8;
2728 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2729 *inst++ = XCHG_r_rm;
2730 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2731 }
2732 return SLJIT_SUCCESS;
2733 }
2734
2735 /* Set TMP_REG1 to the bit. */
2736 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2737 FAIL_IF(!inst);
2738 INC_SIZE(1 + 3 + 3 + 1);
2739 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2740 /* Set al to conditional flag. */
2741 *inst++ = GROUP_0F;
2742 *inst++ = cond_set;
2743 *inst++ = MOD_REG | 0 /* eax */;
2744
2745 *inst++ = GROUP_0F;
2746 *inst++ = MOVZX_r_rm8;
2747 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2748
2749 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2750
2751 if (GET_OPCODE(op) < SLJIT_ADD)
2752 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2753
2754 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2755 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2756 compiler->skip_checks = 1;
2757 #endif
2758 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2759 #endif /* SLJIT_CONFIG_X86_64 */
2760 }
2761
2762 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
2763 sljit_s32 dst_reg,
2764 sljit_s32 src, sljit_sw srcw)
2765 {
2766 sljit_u8* inst;
2767
2768 CHECK_ERROR();
2769 CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
2770
2771 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2772 dst_reg &= ~SLJIT_I32_OP;
2773
2774 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
2775 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2776 #else
2777 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
2778 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2779 #endif
2780
2781 /* ADJUST_LOCAL_OFFSET is not needed. */
2782 CHECK_EXTRA_REGS(src, srcw, (void)0);
2783
2784 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2785 compiler->mode32 = dst_reg & SLJIT_I32_OP;
2786 dst_reg &= ~SLJIT_I32_OP;
2787 #endif
2788
2789 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
2790 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
2791 src = TMP_REG1;
2792 srcw = 0;
2793 }
2794
2795 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
2796 FAIL_IF(!inst);
2797 *inst++ = GROUP_0F;
2798 *inst = get_jump_code(type & 0xff) - 0x40;
2799 return SLJIT_SUCCESS;
2800 }
2801
2802 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
2803 {
2804 CHECK_ERROR();
2805 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2806 ADJUST_LOCAL_OFFSET(dst, dstw);
2807
2808 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2809
2810 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2811 compiler->mode32 = 0;
2812 #endif
2813
2814 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2815
2816 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2817 if (NOT_HALFWORD(offset)) {
2818 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2819 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2820 SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2821 return compiler->error;
2822 #else
2823 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2824 #endif
2825 }
2826 #endif
2827
2828 if (offset != 0)
2829 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2830 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2831 }
2832
2833 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2834 {
2835 sljit_u8 *inst;
2836 struct sljit_const *const_;
2837 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2838 sljit_s32 reg;
2839 #endif
2840
2841 CHECK_ERROR_PTR();
2842 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2843 ADJUST_LOCAL_OFFSET(dst, dstw);
2844
2845 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2846
2847 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2848 PTR_FAIL_IF(!const_);
2849 set_const(const_, compiler);
2850
2851 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2852 compiler->mode32 = 0;
2853 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
2854
2855 if (emit_load_imm64(compiler, reg, init_value))
2856 return NULL;
2857 #else
2858 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2859 return NULL;
2860 #endif
2861
2862 inst = (sljit_u8*)ensure_buf(compiler, 2);
2863 PTR_FAIL_IF(!inst);
2864
2865 *inst++ = 0;
2866 *inst++ = 2;
2867
2868 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2869 if (dst & SLJIT_MEM)
2870 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2871 return NULL;
2872 #endif
2873
2874 return const_;
2875 }
2876
2877 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
2878 {
2879 struct sljit_put_label *put_label;
2880 sljit_u8 *inst;
2881 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2882 sljit_s32 reg;
2883 sljit_uw start_size;
2884 #endif
2885
2886 CHECK_ERROR_PTR();
2887 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
2888 ADJUST_LOCAL_OFFSET(dst, dstw);
2889
2890 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2891
2892 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
2893 PTR_FAIL_IF(!put_label);
2894 set_put_label(put_label, compiler, 0);
2895
2896 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2897 compiler->mode32 = 0;
2898 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
2899
2900 if (emit_load_imm64(compiler, reg, 0))
2901 return NULL;
2902 #else
2903 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0))
2904 return NULL;
2905 #endif
2906
2907 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2908 if (dst & SLJIT_MEM) {
2909 start_size = compiler->size;
2910 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2911 return NULL;
2912 put_label->flags = compiler->size - start_size;
2913 }
2914 #endif
2915
2916 inst = (sljit_u8*)ensure_buf(compiler, 2);
2917 PTR_FAIL_IF(!inst);
2918
2919 *inst++ = 0;
2920 *inst++ = 3;
2921
2922 return put_label;
2923 }
2924
2925 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
2926 {
2927 SLJIT_UNUSED_ARG(executable_offset);
2928 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2929 sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
2930 #else
2931 sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
2932 #endif
2933 }
2934
2935 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
2936 {
2937 SLJIT_UNUSED_ARG(executable_offset);
2938 sljit_unaligned_store_sw((void*)addr, new_constant);
2939 }

  ViewVC Help
Powered by ViewVC 1.1.5