/[pcre]/code/trunk/sljit/sljitNativeX86_common.c
ViewVC logotype

Contents of /code/trunk/sljit/sljitNativeX86_common.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1243 - (show annotations)
Tue Jan 29 08:11:36 2013 UTC (6 years, 11 months ago) by zherczeg
File MIME type: text/plain
File size: 83082 byte(s)
JIT compiler update.
1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
28 {
29 return "x86" SLJIT_CPUINFO;
30 }
31
32 /*
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - none
39 5 - EBP
40 6 - ESI
41 7 - EDI
42 */
43
44 /*
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - none
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
62 */
63
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65
66 /* Last register + 1. */
67 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
68
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
70 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
71 };
72
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
75 w = compiler->scratches_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_sw); \
76 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
77 do; \
78 } \
79 else if (p >= SLJIT_SAVED_EREG1 && p <= SLJIT_SAVED_EREG2) { \
80 w = compiler->saveds_start + (p - SLJIT_SAVED_EREG1) * sizeof(sljit_sw); \
81 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
82 do; \
83 }
84
85 #else /* SLJIT_CONFIG_X86_32 */
86
87 /* Last register + 1. */
88 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
89 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
90 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
91
92 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
93 Note: avoid to use r12 and r13 for memory addessing
94 therefore r12 is better for SAVED_EREG than SAVED_REG. */
95 #ifndef _WIN64
96 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
97 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
98 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
99 };
100 /* low-map. reg_map & 0x7. */
101 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
102 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1
103 };
104 #else
105 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
106 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
107 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 15, 4, 10, 8, 9
108 };
109 /* low-map. reg_map & 0x7. */
110 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
111 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 7, 4, 2, 0, 1
112 };
113 #endif
114
115 #define REX_W 0x48
116 #define REX_R 0x44
117 #define REX_X 0x42
118 #define REX_B 0x41
119 #define REX 0x40
120
121 #define IS_HALFWORD(x) ((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
122 #define NOT_HALFWORD(x) ((x) > 0x7fffffffll || (x) < -0x80000000ll)
123
124 #define CHECK_EXTRA_REGS(p, w, do)
125
126 #endif /* SLJIT_CONFIG_X86_32 */
127
128 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
129 #define TMP_FREG (0)
130 #endif
131
132 /* Size flags for emit_x86_instruction: */
133 #define EX86_BIN_INS 0x0010
134 #define EX86_SHIFT_INS 0x0020
135 #define EX86_REX 0x0040
136 #define EX86_NO_REXW 0x0080
137 #define EX86_BYTE_ARG 0x0100
138 #define EX86_HALF_ARG 0x0200
139 #define EX86_PREF_66 0x0400
140
141 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
142 #define EX86_SSE2 0x0800
143 #define EX86_PREF_F2 0x1000
144 #define EX86_PREF_F3 0x2000
145 #endif
146
147 /* --------------------------------------------------------------------- */
148 /* Instrucion forms */
149 /* --------------------------------------------------------------------- */
150
151 #define ADD (/* BINARY */ 0 << 3)
152 #define ADD_EAX_i32 0x05
153 #define ADD_r_rm 0x03
154 #define ADD_rm_r 0x01
155 #define ADDSD_x_xm 0x58
156 #define ADC (/* BINARY */ 2 << 3)
157 #define ADC_EAX_i32 0x15
158 #define ADC_r_rm 0x13
159 #define ADC_rm_r 0x11
160 #define AND (/* BINARY */ 4 << 3)
161 #define AND_EAX_i32 0x25
162 #define AND_r_rm 0x23
163 #define AND_rm_r 0x21
164 #define ANDPD_x_xm 0x54
165 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
166 #define CALL_i32 0xe8
167 #define CALL_rm (/* GROUP_FF */ 2 << 3)
168 #define CDQ 0x99
169 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
170 #define CMP (/* BINARY */ 7 << 3)
171 #define CMP_EAX_i32 0x3d
172 #define CMP_r_rm 0x3b
173 #define CMP_rm_r 0x39
174 #define DIV (/* GROUP_F7 */ 6 << 3)
175 #define DIVSD_x_xm 0x5e
176 #define INT3 0xcc
177 #define IDIV (/* GROUP_F7 */ 7 << 3)
178 #define IMUL (/* GROUP_F7 */ 5 << 3)
179 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
180 #define IMUL_r_rm_i8 0x6b
181 #define IMUL_r_rm_i32 0x69
182 #define JE_i8 0x74
183 #define JMP_i8 0xeb
184 #define JMP_i32 0xe9
185 #define JMP_rm (/* GROUP_FF */ 4 << 3)
186 #define LEA_r_m 0x8d
187 #define MOV_r_rm 0x8b
188 #define MOV_r_i32 0xb8
189 #define MOV_rm_r 0x89
190 #define MOV_rm_i32 0xc7
191 #define MOV_rm8_i8 0xc6
192 #define MOV_rm8_r8 0x88
193 #define MOVSD_x_xm 0x10
194 #define MOVSD_xm_x 0x11
195 #define MOVSXD_r_rm 0x63
196 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
197 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
198 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
199 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
200 #define MUL (/* GROUP_F7 */ 4 << 3)
201 #define MULSD_x_xm 0x59
202 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
203 #define NOP 0x90
204 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
205 #define OR (/* BINARY */ 1 << 3)
206 #define OR_r_rm 0x0b
207 #define OR_EAX_i32 0x0d
208 #define OR_rm_r 0x09
209 #define OR_rm8_r8 0x08
210 #define POP_r 0x58
211 #define POP_rm 0x8f
212 #define POPF 0x9d
213 #define PUSH_i32 0x68
214 #define PUSH_r 0x50
215 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
216 #define PUSHF 0x9c
217 #define RET_near 0xc3
218 #define RET_i16 0xc2
219 #define SBB (/* BINARY */ 3 << 3)
220 #define SBB_EAX_i32 0x1d
221 #define SBB_r_rm 0x1b
222 #define SBB_rm_r 0x19
223 #define SAR (/* SHIFT */ 7 << 3)
224 #define SHL (/* SHIFT */ 4 << 3)
225 #define SHR (/* SHIFT */ 5 << 3)
226 #define SUB (/* BINARY */ 5 << 3)
227 #define SUB_EAX_i32 0x2d
228 #define SUB_r_rm 0x2b
229 #define SUB_rm_r 0x29
230 #define SUBSD_x_xm 0x5c
231 #define TEST_EAX_i32 0xa9
232 #define TEST_rm_r 0x85
233 #define UCOMISD_x_xm 0x2e
234 #define XCHG_EAX_r 0x90
235 #define XCHG_r_rm 0x87
236 #define XOR (/* BINARY */ 6 << 3)
237 #define XOR_EAX_i32 0x35
238 #define XOR_r_rm 0x33
239 #define XOR_rm_r 0x31
240 #define XORPD_x_xm 0x57
241
242 #define GROUP_0F 0x0f
243 #define GROUP_F7 0xf7
244 #define GROUP_FF 0xff
245 #define GROUP_BINARY_81 0x81
246 #define GROUP_BINARY_83 0x83
247 #define GROUP_SHIFT_1 0xd1
248 #define GROUP_SHIFT_N 0xc1
249 #define GROUP_SHIFT_CL 0xd3
250
251 #define MOD_REG 0xc0
252 #define MOD_DISP8 0x40
253
254 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
255
256 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
257 #define POP_REG(r) (*inst++ = (POP_r + (r)))
258 #define RET() (*inst++ = (RET_near))
259 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
260 /* r32, r/m32 */
261 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
262
263 /* Multithreading does not affect these static variables, since they store
264 built-in CPU features. Therefore they can be overwritten by different threads
265 if they detect the CPU features in the same time. */
266 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
267 static sljit_si cpu_has_sse2 = -1;
268 #endif
269 static sljit_si cpu_has_cmov = -1;
270
271 #if defined(_MSC_VER) && _MSC_VER >= 1400
272 #include <intrin.h>
273 #endif
274
275 static void get_cpu_features(void)
276 {
277 sljit_ui features;
278
279 #if defined(_MSC_VER) && _MSC_VER >= 1400
280
281 int CPUInfo[4];
282 __cpuid(CPUInfo, 1);
283 features = (sljit_ui)CPUInfo[3];
284
285 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
286
287 /* AT&T syntax. */
288 __asm__ (
289 "movl $0x1, %%eax\n"
290 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
291 /* On x86-32, there is no red zone, so this
292 should work (no need for a local variable). */
293 "push %%ebx\n"
294 #endif
295 "cpuid\n"
296 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
297 "pop %%ebx\n"
298 #endif
299 "movl %%edx, %0\n"
300 : "=g" (features)
301 :
302 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
303 : "%eax", "%ecx", "%edx"
304 #else
305 : "%rax", "%rbx", "%rcx", "%rdx"
306 #endif
307 );
308
309 #else /* _MSC_VER && _MSC_VER >= 1400 */
310
311 /* Intel syntax. */
312 __asm {
313 mov eax, 1
314 cpuid
315 mov features, edx
316 }
317
318 #endif /* _MSC_VER && _MSC_VER >= 1400 */
319
320 #if (defined SLJIT_SSE2 && SLJIT_SSE2) && (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
321 cpu_has_sse2 = (features >> 26) & 0x1;
322 #endif
323 cpu_has_cmov = (features >> 15) & 0x1;
324 }
325
326 static sljit_ub get_jump_code(sljit_si type)
327 {
328 switch (type) {
329 case SLJIT_C_EQUAL:
330 case SLJIT_C_FLOAT_EQUAL:
331 return 0x84 /* je */;
332
333 case SLJIT_C_NOT_EQUAL:
334 case SLJIT_C_FLOAT_NOT_EQUAL:
335 return 0x85 /* jne */;
336
337 case SLJIT_C_LESS:
338 case SLJIT_C_FLOAT_LESS:
339 return 0x82 /* jc */;
340
341 case SLJIT_C_GREATER_EQUAL:
342 case SLJIT_C_FLOAT_GREATER_EQUAL:
343 return 0x83 /* jae */;
344
345 case SLJIT_C_GREATER:
346 case SLJIT_C_FLOAT_GREATER:
347 return 0x87 /* jnbe */;
348
349 case SLJIT_C_LESS_EQUAL:
350 case SLJIT_C_FLOAT_LESS_EQUAL:
351 return 0x86 /* jbe */;
352
353 case SLJIT_C_SIG_LESS:
354 return 0x8c /* jl */;
355
356 case SLJIT_C_SIG_GREATER_EQUAL:
357 return 0x8d /* jnl */;
358
359 case SLJIT_C_SIG_GREATER:
360 return 0x8f /* jnle */;
361
362 case SLJIT_C_SIG_LESS_EQUAL:
363 return 0x8e /* jle */;
364
365 case SLJIT_C_OVERFLOW:
366 case SLJIT_C_MUL_OVERFLOW:
367 return 0x80 /* jo */;
368
369 case SLJIT_C_NOT_OVERFLOW:
370 case SLJIT_C_MUL_NOT_OVERFLOW:
371 return 0x81 /* jno */;
372
373 case SLJIT_C_FLOAT_UNORDERED:
374 return 0x8a /* jp */;
375
376 case SLJIT_C_FLOAT_ORDERED:
377 return 0x8b /* jpo */;
378 }
379 return 0;
380 }
381
382 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
383
384 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
385 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
386 #endif
387
388 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
389 {
390 sljit_si short_jump;
391 sljit_uw label_addr;
392
393 if (jump->flags & JUMP_LABEL)
394 label_addr = (sljit_uw)(code + jump->u.label->size);
395 else
396 label_addr = jump->u.target;
397 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
398
399 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
400 if ((sljit_sw)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_sw)(label_addr - (jump->addr + 1)) < -0x80000000ll)
401 return generate_far_jump_code(jump, code_ptr, type);
402 #endif
403
404 if (type == SLJIT_JUMP) {
405 if (short_jump)
406 *code_ptr++ = JMP_i8;
407 else
408 *code_ptr++ = JMP_i32;
409 jump->addr++;
410 }
411 else if (type >= SLJIT_FAST_CALL) {
412 short_jump = 0;
413 *code_ptr++ = CALL_i32;
414 jump->addr++;
415 }
416 else if (short_jump) {
417 *code_ptr++ = get_jump_code(type) - 0x10;
418 jump->addr++;
419 }
420 else {
421 *code_ptr++ = GROUP_0F;
422 *code_ptr++ = get_jump_code(type);
423 jump->addr += 2;
424 }
425
426 if (short_jump) {
427 jump->flags |= PATCH_MB;
428 code_ptr += sizeof(sljit_sb);
429 } else {
430 jump->flags |= PATCH_MW;
431 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
432 code_ptr += sizeof(sljit_sw);
433 #else
434 code_ptr += sizeof(sljit_si);
435 #endif
436 }
437
438 return code_ptr;
439 }
440
441 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
442 {
443 struct sljit_memory_fragment *buf;
444 sljit_ub *code;
445 sljit_ub *code_ptr;
446 sljit_ub *buf_ptr;
447 sljit_ub *buf_end;
448 sljit_ub len;
449
450 struct sljit_label *label;
451 struct sljit_jump *jump;
452 struct sljit_const *const_;
453
454 CHECK_ERROR_PTR();
455 check_sljit_generate_code(compiler);
456 reverse_buf(compiler);
457
458 /* Second code generation pass. */
459 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
460 PTR_FAIL_WITH_EXEC_IF(code);
461 buf = compiler->buf;
462
463 code_ptr = code;
464 label = compiler->labels;
465 jump = compiler->jumps;
466 const_ = compiler->consts;
467 do {
468 buf_ptr = buf->memory;
469 buf_end = buf_ptr + buf->used_size;
470 do {
471 len = *buf_ptr++;
472 if (len > 0) {
473 /* The code is already generated. */
474 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
475 code_ptr += len;
476 buf_ptr += len;
477 }
478 else {
479 if (*buf_ptr >= 4) {
480 jump->addr = (sljit_uw)code_ptr;
481 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
482 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
483 else
484 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
485 jump = jump->next;
486 }
487 else if (*buf_ptr == 0) {
488 label->addr = (sljit_uw)code_ptr;
489 label->size = code_ptr - code;
490 label = label->next;
491 }
492 else if (*buf_ptr == 1) {
493 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
494 const_ = const_->next;
495 }
496 else {
497 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
498 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
499 buf_ptr++;
500 *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
501 code_ptr += sizeof(sljit_sw);
502 buf_ptr += sizeof(sljit_sw) - 1;
503 #else
504 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
505 buf_ptr += sizeof(sljit_sw);
506 #endif
507 }
508 buf_ptr++;
509 }
510 } while (buf_ptr < buf_end);
511 SLJIT_ASSERT(buf_ptr == buf_end);
512 buf = buf->next;
513 } while (buf);
514
515 SLJIT_ASSERT(!label);
516 SLJIT_ASSERT(!jump);
517 SLJIT_ASSERT(!const_);
518
519 jump = compiler->jumps;
520 while (jump) {
521 if (jump->flags & PATCH_MB) {
522 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
523 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
524 } else if (jump->flags & PATCH_MW) {
525 if (jump->flags & JUMP_LABEL) {
526 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
527 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
528 #else
529 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= -0x80000000ll && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= 0x7fffffffll);
530 *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
531 #endif
532 }
533 else {
534 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
535 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
536 #else
537 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= -0x80000000ll && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= 0x7fffffffll);
538 *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
539 #endif
540 }
541 }
542 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
543 else if (jump->flags & PATCH_MD)
544 *(sljit_sw*)jump->addr = jump->u.label->addr;
545 #endif
546
547 jump = jump->next;
548 }
549
550 /* Maybe we waste some space because of short jumps. */
551 SLJIT_ASSERT(code_ptr <= code + compiler->size);
552 compiler->error = SLJIT_ERR_COMPILED;
553 compiler->executable_size = code_ptr - code;
554 return (void*)code;
555 }
556
557 /* --------------------------------------------------------------------- */
558 /* Operators */
559 /* --------------------------------------------------------------------- */
560
561 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
562 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
563 sljit_si dst, sljit_sw dstw,
564 sljit_si src1, sljit_sw src1w,
565 sljit_si src2, sljit_sw src2w);
566
567 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
568 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
569 sljit_si dst, sljit_sw dstw,
570 sljit_si src1, sljit_sw src1w,
571 sljit_si src2, sljit_sw src2w);
572
573 static sljit_si emit_mov(struct sljit_compiler *compiler,
574 sljit_si dst, sljit_sw dstw,
575 sljit_si src, sljit_sw srcw);
576
577 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
578 {
579 sljit_ub *inst;
580
581 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
582 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
583 FAIL_IF(!inst);
584 INC_SIZE(5);
585 #else
586 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
587 FAIL_IF(!inst);
588 INC_SIZE(6);
589 *inst++ = REX_W;
590 #endif
591 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
592 *inst++ = 0x64;
593 *inst++ = 0x24;
594 *inst++ = (sljit_ub)sizeof(sljit_sw);
595 *inst++ = PUSHF;
596 compiler->flags_saved = 1;
597 return SLJIT_SUCCESS;
598 }
599
600 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
601 {
602 sljit_ub *inst;
603
604 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
605 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
606 FAIL_IF(!inst);
607 INC_SIZE(5);
608 *inst++ = POPF;
609 #else
610 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
611 FAIL_IF(!inst);
612 INC_SIZE(6);
613 *inst++ = POPF;
614 *inst++ = REX_W;
615 #endif
616 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
617 *inst++ = 0x64;
618 *inst++ = 0x24;
619 *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
620 compiler->flags_saved = keep_flags;
621 return SLJIT_SUCCESS;
622 }
623
624 #ifdef _WIN32
625 #include <malloc.h>
626
627 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
628 {
629 /* Workaround for calling the internal _chkstk() function on Windows.
630 This function touches all 4k pages belongs to the requested stack space,
631 which size is passed in local_size. This is necessary on Windows where
632 the stack can only grow in 4k steps. However, this function just burn
633 CPU cycles if the stack is large enough. However, you don't know it in
634 advance, so it must always be called. I think this is a bad design in
635 general even if it has some reasons. */
636 *(sljit_si*)alloca(local_size) = 0;
637 }
638
639 #endif
640
641 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
642 #include "sljitNativeX86_32.c"
643 #else
644 #include "sljitNativeX86_64.c"
645 #endif
646
647 static sljit_si emit_mov(struct sljit_compiler *compiler,
648 sljit_si dst, sljit_sw dstw,
649 sljit_si src, sljit_sw srcw)
650 {
651 sljit_ub* inst;
652
653 if (dst == SLJIT_UNUSED) {
654 /* No destination, doesn't need to setup flags. */
655 if (src & SLJIT_MEM) {
656 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
657 FAIL_IF(!inst);
658 *inst = MOV_r_rm;
659 }
660 return SLJIT_SUCCESS;
661 }
662 if (src <= TMP_REGISTER) {
663 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
664 FAIL_IF(!inst);
665 *inst = MOV_rm_r;
666 return SLJIT_SUCCESS;
667 }
668 if (src & SLJIT_IMM) {
669 if (dst <= TMP_REGISTER) {
670 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
671 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
672 #else
673 if (!compiler->mode32) {
674 if (NOT_HALFWORD(srcw))
675 return emit_load_imm64(compiler, dst, srcw);
676 }
677 else
678 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
679 #endif
680 }
681 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
682 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
683 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
684 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
685 FAIL_IF(!inst);
686 *inst = MOV_rm_r;
687 return SLJIT_SUCCESS;
688 }
689 #endif
690 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
691 FAIL_IF(!inst);
692 *inst = MOV_rm_i32;
693 return SLJIT_SUCCESS;
694 }
695 if (dst <= TMP_REGISTER) {
696 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
697 FAIL_IF(!inst);
698 *inst = MOV_r_rm;
699 return SLJIT_SUCCESS;
700 }
701
702 /* Memory to memory move. Requires two instruction. */
703 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
704 FAIL_IF(!inst);
705 *inst = MOV_r_rm;
706 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
707 FAIL_IF(!inst);
708 *inst = MOV_rm_r;
709 return SLJIT_SUCCESS;
710 }
711
712 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
713 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
714
715 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
716 {
717 sljit_ub *inst;
718 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
719 sljit_si size;
720 #endif
721
722 CHECK_ERROR();
723 check_sljit_emit_op0(compiler, op);
724
725 switch (GET_OPCODE(op)) {
726 case SLJIT_BREAKPOINT:
727 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
728 FAIL_IF(!inst);
729 INC_SIZE(1);
730 *inst = INT3;
731 break;
732 case SLJIT_NOP:
733 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
734 FAIL_IF(!inst);
735 INC_SIZE(1);
736 *inst = NOP;
737 break;
738 case SLJIT_UMUL:
739 case SLJIT_SMUL:
740 case SLJIT_UDIV:
741 case SLJIT_SDIV:
742 compiler->flags_saved = 0;
743 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
744 #ifdef _WIN64
745 SLJIT_COMPILE_ASSERT(
746 reg_map[SLJIT_SCRATCH_REG1] == 0
747 && reg_map[SLJIT_SCRATCH_REG2] == 2
748 && reg_map[TMP_REGISTER] > 7,
749 invalid_register_assignment_for_div_mul);
750 #else
751 SLJIT_COMPILE_ASSERT(
752 reg_map[SLJIT_SCRATCH_REG1] == 0
753 && reg_map[SLJIT_SCRATCH_REG2] < 7
754 && reg_map[TMP_REGISTER] == 2,
755 invalid_register_assignment_for_div_mul);
756 #endif
757 compiler->mode32 = op & SLJIT_INT_OP;
758 #endif
759
760 op = GET_OPCODE(op);
761 if (op == SLJIT_UDIV) {
762 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
763 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_SCRATCH_REG2, 0);
764 inst = emit_x86_instruction(compiler, 1, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0);
765 #else
766 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
767 #endif
768 FAIL_IF(!inst);
769 *inst = XOR_r_rm;
770 }
771
772 if (op == SLJIT_SDIV) {
773 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
774 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_SCRATCH_REG2, 0);
775 #endif
776
777 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
778 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
779 FAIL_IF(!inst);
780 INC_SIZE(1);
781 *inst = CDQ;
782 #else
783 if (compiler->mode32) {
784 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
785 FAIL_IF(!inst);
786 INC_SIZE(1);
787 *inst = CDQ;
788 } else {
789 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
790 FAIL_IF(!inst);
791 INC_SIZE(2);
792 *inst++ = REX_W;
793 *inst = CDQ;
794 }
795 #endif
796 }
797
798 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
799 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
800 FAIL_IF(!inst);
801 INC_SIZE(2);
802 *inst++ = GROUP_F7;
803 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_SCRATCH_REG2]);
804 #else
805 #ifdef _WIN64
806 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
807 #else
808 size = (!compiler->mode32) ? 3 : 2;
809 #endif
810 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
811 FAIL_IF(!inst);
812 INC_SIZE(size);
813 #ifdef _WIN64
814 if (!compiler->mode32)
815 *inst++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
816 else if (op >= SLJIT_UDIV)
817 *inst++ = REX_B;
818 *inst++ = GROUP_F7;
819 *inst = MOD_REG | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_SCRATCH_REG2]);
820 #else
821 if (!compiler->mode32)
822 *inst++ = REX_W;
823 *inst++ = GROUP_F7;
824 *inst = MOD_REG | reg_map[SLJIT_SCRATCH_REG2];
825 #endif
826 #endif
827 switch (op) {
828 case SLJIT_UMUL:
829 *inst |= MUL;
830 break;
831 case SLJIT_SMUL:
832 *inst |= IMUL;
833 break;
834 case SLJIT_UDIV:
835 *inst |= DIV;
836 break;
837 case SLJIT_SDIV:
838 *inst |= IDIV;
839 break;
840 }
841 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
842 EMIT_MOV(compiler, SLJIT_SCRATCH_REG2, 0, TMP_REGISTER, 0);
843 #endif
844 break;
845 }
846
847 return SLJIT_SUCCESS;
848 }
849
850 #define ENCODE_PREFIX(prefix) \
851 do { \
852 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
853 FAIL_IF(!inst); \
854 INC_SIZE(1); \
855 *inst = (prefix); \
856 } while (0)
857
858 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
859 sljit_si dst, sljit_sw dstw,
860 sljit_si src, sljit_sw srcw)
861 {
862 sljit_ub* inst;
863 sljit_si dst_r;
864 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
865 sljit_si work_r;
866 #endif
867
868 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
869 compiler->mode32 = 0;
870 #endif
871
872 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
873 return SLJIT_SUCCESS; /* Empty instruction. */
874
875 if (src & SLJIT_IMM) {
876 if (dst <= TMP_REGISTER) {
877 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
878 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
879 #else
880 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
881 FAIL_IF(!inst);
882 *inst = MOV_rm_i32;
883 return SLJIT_SUCCESS;
884 #endif
885 }
886 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
887 FAIL_IF(!inst);
888 *inst = MOV_rm8_i8;
889 return SLJIT_SUCCESS;
890 }
891
892 dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
893
894 if ((dst & SLJIT_MEM) && src <= TMP_REGISTER) {
895 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
896 if (reg_map[src] >= 4) {
897 SLJIT_ASSERT(dst_r == TMP_REGISTER);
898 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
899 } else
900 dst_r = src;
901 #else
902 dst_r = src;
903 #endif
904 }
905 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
906 else if (src <= TMP_REGISTER && reg_map[src] >= 4) {
907 /* src, dst are registers. */
908 SLJIT_ASSERT(dst >= SLJIT_SCRATCH_REG1 && dst <= TMP_REGISTER);
909 if (reg_map[dst] < 4) {
910 if (dst != src)
911 EMIT_MOV(compiler, dst, 0, src, 0);
912 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
913 FAIL_IF(!inst);
914 *inst++ = GROUP_0F;
915 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
916 }
917 else {
918 if (dst != src)
919 EMIT_MOV(compiler, dst, 0, src, 0);
920 if (sign) {
921 /* shl reg, 24 */
922 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
923 FAIL_IF(!inst);
924 *inst |= SHL;
925 /* sar reg, 24 */
926 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
927 FAIL_IF(!inst);
928 *inst |= SAR;
929 }
930 else {
931 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
932 FAIL_IF(!inst);
933 *(inst + 1) |= AND;
934 }
935 }
936 return SLJIT_SUCCESS;
937 }
938 #endif
939 else {
940 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
941 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
942 FAIL_IF(!inst);
943 *inst++ = GROUP_0F;
944 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
945 }
946
947 if (dst & SLJIT_MEM) {
948 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
949 if (dst_r == TMP_REGISTER) {
950 /* Find a non-used register, whose reg_map[src] < 4. */
951 if ((dst & 0xf) == SLJIT_SCRATCH_REG1) {
952 if ((dst & 0xf0) == (SLJIT_SCRATCH_REG2 << 4))
953 work_r = SLJIT_SCRATCH_REG3;
954 else
955 work_r = SLJIT_SCRATCH_REG2;
956 }
957 else {
958 if ((dst & 0xf0) != (SLJIT_SCRATCH_REG1 << 4))
959 work_r = SLJIT_SCRATCH_REG1;
960 else if ((dst & 0xf) == SLJIT_SCRATCH_REG2)
961 work_r = SLJIT_SCRATCH_REG3;
962 else
963 work_r = SLJIT_SCRATCH_REG2;
964 }
965
966 if (work_r == SLJIT_SCRATCH_REG1) {
967 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REGISTER]);
968 }
969 else {
970 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
971 FAIL_IF(!inst);
972 *inst = XCHG_r_rm;
973 }
974
975 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
976 FAIL_IF(!inst);
977 *inst = MOV_rm8_r8;
978
979 if (work_r == SLJIT_SCRATCH_REG1) {
980 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REGISTER]);
981 }
982 else {
983 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
984 FAIL_IF(!inst);
985 *inst = XCHG_r_rm;
986 }
987 }
988 else {
989 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
990 FAIL_IF(!inst);
991 *inst = MOV_rm8_r8;
992 }
993 #else
994 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
995 FAIL_IF(!inst);
996 *inst = MOV_rm8_r8;
997 #endif
998 }
999
1000 return SLJIT_SUCCESS;
1001 }
1002
1003 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
1004 sljit_si dst, sljit_sw dstw,
1005 sljit_si src, sljit_sw srcw)
1006 {
1007 sljit_ub* inst;
1008 sljit_si dst_r;
1009
1010 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1011 compiler->mode32 = 0;
1012 #endif
1013
1014 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1015 return SLJIT_SUCCESS; /* Empty instruction. */
1016
1017 if (src & SLJIT_IMM) {
1018 if (dst <= TMP_REGISTER) {
1019 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1020 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1021 #else
1022 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1023 FAIL_IF(!inst);
1024 *inst = MOV_rm_i32;
1025 return SLJIT_SUCCESS;
1026 #endif
1027 }
1028 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1029 FAIL_IF(!inst);
1030 *inst = MOV_rm_i32;
1031 return SLJIT_SUCCESS;
1032 }
1033
1034 dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
1035
1036 if ((dst & SLJIT_MEM) && src <= TMP_REGISTER)
1037 dst_r = src;
1038 else {
1039 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1040 FAIL_IF(!inst);
1041 *inst++ = GROUP_0F;
1042 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1043 }
1044
1045 if (dst & SLJIT_MEM) {
1046 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1047 FAIL_IF(!inst);
1048 *inst = MOV_rm_r;
1049 }
1050
1051 return SLJIT_SUCCESS;
1052 }
1053
1054 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
1055 sljit_si dst, sljit_sw dstw,
1056 sljit_si src, sljit_sw srcw)
1057 {
1058 sljit_ub* inst;
1059
1060 if (dst == SLJIT_UNUSED) {
1061 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
1062 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
1063 FAIL_IF(!inst);
1064 *inst++ = GROUP_F7;
1065 *inst |= opcode;
1066 return SLJIT_SUCCESS;
1067 }
1068 if (dst == src && dstw == srcw) {
1069 /* Same input and output */
1070 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1071 FAIL_IF(!inst);
1072 *inst++ = GROUP_F7;
1073 *inst |= opcode;
1074 return SLJIT_SUCCESS;
1075 }
1076 if (dst <= TMP_REGISTER) {
1077 EMIT_MOV(compiler, dst, 0, src, srcw);
1078 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1079 FAIL_IF(!inst);
1080 *inst++ = GROUP_F7;
1081 *inst |= opcode;
1082 return SLJIT_SUCCESS;
1083 }
1084 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
1085 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
1086 FAIL_IF(!inst);
1087 *inst++ = GROUP_F7;
1088 *inst |= opcode;
1089 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1090 return SLJIT_SUCCESS;
1091 }
1092
1093 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
1094 sljit_si dst, sljit_sw dstw,
1095 sljit_si src, sljit_sw srcw)
1096 {
1097 sljit_ub* inst;
1098
1099 if (dst == SLJIT_UNUSED) {
1100 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
1101 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
1102 FAIL_IF(!inst);
1103 *inst++ = GROUP_F7;
1104 *inst |= NOT_rm;
1105 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
1106 FAIL_IF(!inst);
1107 *inst = OR_r_rm;
1108 return SLJIT_SUCCESS;
1109 }
1110 if (dst <= TMP_REGISTER) {
1111 EMIT_MOV(compiler, dst, 0, src, srcw);
1112 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1113 FAIL_IF(!inst);
1114 *inst++ = GROUP_F7;
1115 *inst |= NOT_rm;
1116 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1117 FAIL_IF(!inst);
1118 *inst = OR_r_rm;
1119 return SLJIT_SUCCESS;
1120 }
1121 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
1122 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
1123 FAIL_IF(!inst);
1124 *inst++ = GROUP_F7;
1125 *inst |= NOT_rm;
1126 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
1127 FAIL_IF(!inst);
1128 *inst = OR_r_rm;
1129 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1130 return SLJIT_SUCCESS;
1131 }
1132
1133 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
1134 sljit_si dst, sljit_sw dstw,
1135 sljit_si src, sljit_sw srcw)
1136 {
1137 sljit_ub* inst;
1138 sljit_si dst_r;
1139
1140 SLJIT_UNUSED_ARG(op_flags);
1141 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1142 /* Just set the zero flag. */
1143 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
1144 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
1145 FAIL_IF(!inst);
1146 *inst++ = GROUP_F7;
1147 *inst |= NOT_rm;
1148 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1149 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
1150 #else
1151 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
1152 #endif
1153 FAIL_IF(!inst);
1154 *inst |= SHR;
1155 return SLJIT_SUCCESS;
1156 }
1157
1158 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1159 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_IMM, srcw);
1160 src = TMP_REGISTER;
1161 srcw = 0;
1162 }
1163
1164 inst = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
1165 FAIL_IF(!inst);
1166 *inst++ = GROUP_0F;
1167 *inst = BSR_r_rm;
1168
1169 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1170 if (dst <= TMP_REGISTER)
1171 dst_r = dst;
1172 else {
1173 /* Find an unused temporary register. */
1174 if ((dst & 0xf) != SLJIT_SCRATCH_REG1 && (dst & 0xf0) != (SLJIT_SCRATCH_REG1 << 4))
1175 dst_r = SLJIT_SCRATCH_REG1;
1176 else if ((dst & 0xf) != SLJIT_SCRATCH_REG2 && (dst & 0xf0) != (SLJIT_SCRATCH_REG2 << 4))
1177 dst_r = SLJIT_SCRATCH_REG2;
1178 else
1179 dst_r = SLJIT_SCRATCH_REG3;
1180 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1181 }
1182 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1183 #else
1184 dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REG2;
1185 compiler->mode32 = 0;
1186 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1187 compiler->mode32 = op_flags & SLJIT_INT_OP;
1188 #endif
1189
1190 if (cpu_has_cmov == -1)
1191 get_cpu_features();
1192
1193 if (cpu_has_cmov) {
1194 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
1195 FAIL_IF(!inst);
1196 *inst++ = GROUP_0F;
1197 *inst = CMOVNE_r_rm;
1198 } else {
1199 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1200 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1201 FAIL_IF(!inst);
1202 INC_SIZE(4);
1203
1204 *inst++ = JE_i8;
1205 *inst++ = 2;
1206 *inst++ = MOV_r_rm;
1207 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REGISTER];
1208 #else
1209 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
1210 FAIL_IF(!inst);
1211 INC_SIZE(5);
1212
1213 *inst++ = JE_i8;
1214 *inst++ = 3;
1215 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REGISTER] >= 8 ? REX_B : 0);
1216 *inst++ = MOV_r_rm;
1217 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REGISTER];
1218 #endif
1219 }
1220
1221 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1222 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1223 #else
1224 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1225 #endif
1226 FAIL_IF(!inst);
1227 *(inst + 1) |= XOR;
1228
1229 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1230 if (dst & SLJIT_MEM) {
1231 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1232 FAIL_IF(!inst);
1233 *inst = XCHG_r_rm;
1234 }
1235 #else
1236 if (dst & SLJIT_MEM)
1237 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1238 #endif
1239 return SLJIT_SUCCESS;
1240 }
1241
1242 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1243 sljit_si dst, sljit_sw dstw,
1244 sljit_si src, sljit_sw srcw)
1245 {
1246 sljit_ub* inst;
1247 sljit_si update = 0;
1248 sljit_si op_flags = GET_ALL_FLAGS(op);
1249 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1250 sljit_si dst_is_ereg = 0;
1251 sljit_si src_is_ereg = 0;
1252 #else
1253 # define src_is_ereg 0
1254 #endif
1255
1256 CHECK_ERROR();
1257 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1258 ADJUST_LOCAL_OFFSET(dst, dstw);
1259 ADJUST_LOCAL_OFFSET(src, srcw);
1260
1261 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1262 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1263 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1264 compiler->mode32 = op_flags & SLJIT_INT_OP;
1265 #endif
1266
1267 op = GET_OPCODE(op);
1268 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1269 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1270 compiler->mode32 = 0;
1271 #endif
1272
1273 if (op_flags & SLJIT_INT_OP) {
1274 if (src <= TMP_REGISTER && src == dst) {
1275 if (!TYPE_CAST_NEEDED(op))
1276 return SLJIT_SUCCESS;
1277 }
1278 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1279 if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1280 op = SLJIT_MOV_UI;
1281 if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1282 op = SLJIT_MOVU_UI;
1283 if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1284 op = SLJIT_MOV_SI;
1285 if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1286 op = SLJIT_MOVU_SI;
1287 #endif
1288 }
1289
1290 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1291 if (op >= SLJIT_MOVU) {
1292 update = 1;
1293 op -= 8;
1294 }
1295
1296 if (src & SLJIT_IMM) {
1297 switch (op) {
1298 case SLJIT_MOV_UB:
1299 srcw = (sljit_ub)srcw;
1300 break;
1301 case SLJIT_MOV_SB:
1302 srcw = (sljit_sb)srcw;
1303 break;
1304 case SLJIT_MOV_UH:
1305 srcw = (sljit_uh)srcw;
1306 break;
1307 case SLJIT_MOV_SH:
1308 srcw = (sljit_sh)srcw;
1309 break;
1310 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1311 case SLJIT_MOV_UI:
1312 srcw = (sljit_ui)srcw;
1313 break;
1314 case SLJIT_MOV_SI:
1315 srcw = (sljit_si)srcw;
1316 break;
1317 #endif
1318 }
1319 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1320 if (SLJIT_UNLIKELY(dst_is_ereg))
1321 return emit_mov(compiler, dst, dstw, src, srcw);
1322 #endif
1323 }
1324
1325 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
1326 inst = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
1327 FAIL_IF(!inst);
1328 *inst = LEA_r_m;
1329 src &= SLJIT_MEM | 0xf;
1330 srcw = 0;
1331 }
1332
1333 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1334 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1335 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
1336 dst = TMP_REGISTER;
1337 }
1338 #endif
1339
1340 switch (op) {
1341 case SLJIT_MOV:
1342 case SLJIT_MOV_P:
1343 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1344 case SLJIT_MOV_UI:
1345 case SLJIT_MOV_SI:
1346 #endif
1347 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1348 break;
1349 case SLJIT_MOV_UB:
1350 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1351 break;
1352 case SLJIT_MOV_SB:
1353 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1354 break;
1355 case SLJIT_MOV_UH:
1356 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1357 break;
1358 case SLJIT_MOV_SH:
1359 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1360 break;
1361 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1362 case SLJIT_MOV_UI:
1363 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1364 break;
1365 case SLJIT_MOV_SI:
1366 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1367 break;
1368 #endif
1369 }
1370
1371 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1372 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
1373 return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
1374 #endif
1375
1376 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
1377 inst = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
1378 FAIL_IF(!inst);
1379 *inst = LEA_r_m;
1380 }
1381 return SLJIT_SUCCESS;
1382 }
1383
1384 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1385 compiler->flags_saved = 0;
1386
1387 switch (op) {
1388 case SLJIT_NOT:
1389 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1390 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1391 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1392
1393 case SLJIT_NEG:
1394 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1395 FAIL_IF(emit_save_flags(compiler));
1396 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1397
1398 case SLJIT_CLZ:
1399 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1400 FAIL_IF(emit_save_flags(compiler));
1401 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1402 }
1403
1404 return SLJIT_SUCCESS;
1405
1406 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1407 # undef src_is_ereg
1408 #endif
1409 }
1410
1411 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1412
1413 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1414 if (IS_HALFWORD(immw) || compiler->mode32) { \
1415 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1416 FAIL_IF(!inst); \
1417 *(inst + 1) |= (op_imm); \
1418 } \
1419 else { \
1420 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1421 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1422 FAIL_IF(!inst); \
1423 *inst = (op_mr); \
1424 }
1425
1426 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1427 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1428
1429 #else
1430
1431 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1432 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1433 FAIL_IF(!inst); \
1434 *(inst + 1) |= (op_imm);
1435
1436 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1437 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1438
1439 #endif
1440
1441 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
1442 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1443 sljit_si dst, sljit_sw dstw,
1444 sljit_si src1, sljit_sw src1w,
1445 sljit_si src2, sljit_sw src2w)
1446 {
1447 sljit_ub* inst;
1448
1449 if (dst == SLJIT_UNUSED) {
1450 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1451 if (src2 & SLJIT_IMM) {
1452 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1453 }
1454 else {
1455 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1456 FAIL_IF(!inst);
1457 *inst = op_rm;
1458 }
1459 return SLJIT_SUCCESS;
1460 }
1461
1462 if (dst == src1 && dstw == src1w) {
1463 if (src2 & SLJIT_IMM) {
1464 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1465 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1466 #else
1467 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
1468 #endif
1469 BINARY_EAX_IMM(op_eax_imm, src2w);
1470 }
1471 else {
1472 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1473 }
1474 }
1475 else if (dst <= TMP_REGISTER) {
1476 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1477 FAIL_IF(!inst);
1478 *inst = op_rm;
1479 }
1480 else if (src2 <= TMP_REGISTER) {
1481 /* Special exception for sljit_emit_op_flags. */
1482 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1483 FAIL_IF(!inst);
1484 *inst = op_mr;
1485 }
1486 else {
1487 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1488 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1489 FAIL_IF(!inst);
1490 *inst = op_mr;
1491 }
1492 return SLJIT_SUCCESS;
1493 }
1494
1495 /* Only for cumulative operations. */
1496 if (dst == src2 && dstw == src2w) {
1497 if (src1 & SLJIT_IMM) {
1498 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1499 if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1500 #else
1501 if ((dst == SLJIT_SCRATCH_REG1) && (src1w > 127 || src1w < -128)) {
1502 #endif
1503 BINARY_EAX_IMM(op_eax_imm, src1w);
1504 }
1505 else {
1506 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1507 }
1508 }
1509 else if (dst <= TMP_REGISTER) {
1510 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1511 FAIL_IF(!inst);
1512 *inst = op_rm;
1513 }
1514 else if (src1 <= TMP_REGISTER) {
1515 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1516 FAIL_IF(!inst);
1517 *inst = op_mr;
1518 }
1519 else {
1520 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1521 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1522 FAIL_IF(!inst);
1523 *inst = op_mr;
1524 }
1525 return SLJIT_SUCCESS;
1526 }
1527
1528 /* General version. */
1529 if (dst <= TMP_REGISTER) {
1530 EMIT_MOV(compiler, dst, 0, src1, src1w);
1531 if (src2 & SLJIT_IMM) {
1532 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1533 }
1534 else {
1535 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1536 FAIL_IF(!inst);
1537 *inst = op_rm;
1538 }
1539 }
1540 else {
1541 /* This version requires less memory writing. */
1542 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1543 if (src2 & SLJIT_IMM) {
1544 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1545 }
1546 else {
1547 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1548 FAIL_IF(!inst);
1549 *inst = op_rm;
1550 }
1551 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1552 }
1553
1554 return SLJIT_SUCCESS;
1555 }
1556
1557 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
1558 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1559 sljit_si dst, sljit_sw dstw,
1560 sljit_si src1, sljit_sw src1w,
1561 sljit_si src2, sljit_sw src2w)
1562 {
1563 sljit_ub* inst;
1564
1565 if (dst == SLJIT_UNUSED) {
1566 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1567 if (src2 & SLJIT_IMM) {
1568 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1569 }
1570 else {
1571 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1572 FAIL_IF(!inst);
1573 *inst = op_rm;
1574 }
1575 return SLJIT_SUCCESS;
1576 }
1577
1578 if (dst == src1 && dstw == src1w) {
1579 if (src2 & SLJIT_IMM) {
1580 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1581 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1582 #else
1583 if ((dst == SLJIT_SCRATCH_REG1) && (src2w > 127 || src2w < -128)) {
1584 #endif
1585 BINARY_EAX_IMM(op_eax_imm, src2w);
1586 }
1587 else {
1588 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1589 }
1590 }
1591 else if (dst <= TMP_REGISTER) {
1592 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1593 FAIL_IF(!inst);
1594 *inst = op_rm;
1595 }
1596 else if (src2 <= TMP_REGISTER) {
1597 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1598 FAIL_IF(!inst);
1599 *inst = op_mr;
1600 }
1601 else {
1602 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1603 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1604 FAIL_IF(!inst);
1605 *inst = op_mr;
1606 }
1607 return SLJIT_SUCCESS;
1608 }
1609
1610 /* General version. */
1611 if (dst <= TMP_REGISTER && dst != src2) {
1612 EMIT_MOV(compiler, dst, 0, src1, src1w);
1613 if (src2 & SLJIT_IMM) {
1614 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1615 }
1616 else {
1617 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1618 FAIL_IF(!inst);
1619 *inst = op_rm;
1620 }
1621 }
1622 else {
1623 /* This version requires less memory writing. */
1624 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1625 if (src2 & SLJIT_IMM) {
1626 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1627 }
1628 else {
1629 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1630 FAIL_IF(!inst);
1631 *inst = op_rm;
1632 }
1633 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1634 }
1635
1636 return SLJIT_SUCCESS;
1637 }
1638
1639 static sljit_si emit_mul(struct sljit_compiler *compiler,
1640 sljit_si dst, sljit_sw dstw,
1641 sljit_si src1, sljit_sw src1w,
1642 sljit_si src2, sljit_sw src2w)
1643 {
1644 sljit_ub* inst;
1645 sljit_si dst_r;
1646
1647 dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
1648
1649 /* Register destination. */
1650 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1651 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1652 FAIL_IF(!inst);
1653 *inst++ = GROUP_0F;
1654 *inst = IMUL_r_rm;
1655 }
1656 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1657 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1658 FAIL_IF(!inst);
1659 *inst++ = GROUP_0F;
1660 *inst = IMUL_r_rm;
1661 }
1662 else if (src1 & SLJIT_IMM) {
1663 if (src2 & SLJIT_IMM) {
1664 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1665 src2 = dst_r;
1666 src2w = 0;
1667 }
1668
1669 if (src1w <= 127 && src1w >= -128) {
1670 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1671 FAIL_IF(!inst);
1672 *inst = IMUL_r_rm_i8;
1673 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1674 FAIL_IF(!inst);
1675 INC_SIZE(1);
1676 *inst = (sljit_sb)src1w;
1677 }
1678 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1679 else {
1680 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1681 FAIL_IF(!inst);
1682 *inst = IMUL_r_rm_i32;
1683 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1684 FAIL_IF(!inst);
1685 INC_SIZE(4);
1686 *(sljit_sw*)inst = src1w;
1687 }
1688 #else
1689 else if (IS_HALFWORD(src1w)) {
1690 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1691 FAIL_IF(!inst);
1692 *inst = IMUL_r_rm_i32;
1693 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1694 FAIL_IF(!inst);
1695 INC_SIZE(4);
1696 *(sljit_si*)inst = (sljit_si)src1w;
1697 }
1698 else {
1699 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1700 if (dst_r != src2)
1701 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1702 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1703 FAIL_IF(!inst);
1704 *inst++ = GROUP_0F;
1705 *inst = IMUL_r_rm;
1706 }
1707 #endif
1708 }
1709 else if (src2 & SLJIT_IMM) {
1710 /* Note: src1 is NOT immediate. */
1711
1712 if (src2w <= 127 && src2w >= -128) {
1713 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1714 FAIL_IF(!inst);
1715 *inst = IMUL_r_rm_i8;
1716 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1717 FAIL_IF(!inst);
1718 INC_SIZE(1);
1719 *inst = (sljit_sb)src2w;
1720 }
1721 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1722 else {
1723 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1724 FAIL_IF(!inst);
1725 *inst = IMUL_r_rm_i32;
1726 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1727 FAIL_IF(!inst);
1728 INC_SIZE(4);
1729 *(sljit_sw*)inst = src2w;
1730 }
1731 #else
1732 else if (IS_HALFWORD(src2w)) {
1733 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1734 FAIL_IF(!inst);
1735 *inst = IMUL_r_rm_i32;
1736 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1737 FAIL_IF(!inst);
1738 INC_SIZE(4);
1739 *(sljit_si*)inst = (sljit_si)src2w;
1740 }
1741 else {
1742 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1743 if (dst_r != src1)
1744 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1745 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1746 FAIL_IF(!inst);
1747 *inst++ = GROUP_0F;
1748 *inst = IMUL_r_rm;
1749 }
1750 #endif
1751 }
1752 else {
1753 /* Neither argument is immediate. */
1754 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1755 dst_r = TMP_REGISTER;
1756 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1757 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1758 FAIL_IF(!inst);
1759 *inst++ = GROUP_0F;
1760 *inst = IMUL_r_rm;
1761 }
1762
1763 if (dst_r == TMP_REGISTER)
1764 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1765
1766 return SLJIT_SUCCESS;
1767 }
1768
1769 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
1770 sljit_si dst, sljit_sw dstw,
1771 sljit_si src1, sljit_sw src1w,
1772 sljit_si src2, sljit_sw src2w)
1773 {
1774 sljit_ub* inst;
1775 sljit_si dst_r, done = 0;
1776
1777 /* These cases better be left to handled by normal way. */
1778 if (!keep_flags) {
1779 if (dst == src1 && dstw == src1w)
1780 return SLJIT_ERR_UNSUPPORTED;
1781 if (dst == src2 && dstw == src2w)
1782 return SLJIT_ERR_UNSUPPORTED;
1783 }
1784
1785 dst_r = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
1786
1787 if (src1 <= TMP_REGISTER) {
1788 if (src2 <= TMP_REGISTER || src2 == TMP_REGISTER) {
1789 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1790 FAIL_IF(!inst);
1791 *inst = LEA_r_m;
1792 done = 1;
1793 }
1794 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1795 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1796 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
1797 #else
1798 if (src2 & SLJIT_IMM) {
1799 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1800 #endif
1801 FAIL_IF(!inst);
1802 *inst = LEA_r_m;
1803 done = 1;
1804 }
1805 }
1806 else if (src2 <= TMP_REGISTER) {
1807 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1808 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1809 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
1810 #else
1811 if (src1 & SLJIT_IMM) {
1812 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1813 #endif
1814 FAIL_IF(!inst);
1815 *inst = LEA_r_m;
1816 done = 1;
1817 }
1818 }
1819
1820 if (done) {
1821 if (dst_r == TMP_REGISTER)
1822 return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
1823 return SLJIT_SUCCESS;
1824 }
1825 return SLJIT_ERR_UNSUPPORTED;
1826 }
1827
1828 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
1829 sljit_si src1, sljit_sw src1w,
1830 sljit_si src2, sljit_sw src2w)
1831 {
1832 sljit_ub* inst;
1833
1834 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1835 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1836 #else
1837 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1838 #endif
1839 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1840 return SLJIT_SUCCESS;
1841 }
1842
1843 if (src1 <= TMP_REGISTER) {
1844 if (src2 & SLJIT_IMM) {
1845 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1846 }
1847 else {
1848 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1849 FAIL_IF(!inst);
1850 *inst = CMP_r_rm;
1851 }
1852 return SLJIT_SUCCESS;
1853 }
1854
1855 if (src2 <= TMP_REGISTER && !(src1 & SLJIT_IMM)) {
1856 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1857 FAIL_IF(!inst);
1858 *inst = CMP_rm_r;
1859 return SLJIT_SUCCESS;
1860 }
1861
1862 if (src2 & SLJIT_IMM) {
1863 if (src1 & SLJIT_IMM) {
1864 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1865 src1 = TMP_REGISTER;
1866 src1w = 0;
1867 }
1868 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1869 }
1870 else {
1871 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1872 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1873 FAIL_IF(!inst);
1874 *inst = CMP_r_rm;
1875 }
1876 return SLJIT_SUCCESS;
1877 }
1878
1879 static sljit_si emit_test_binary(struct sljit_compiler *compiler,
1880 sljit_si src1, sljit_sw src1w,
1881 sljit_si src2, sljit_sw src2w)
1882 {
1883 sljit_ub* inst;
1884
1885 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1886 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1887 #else
1888 if (src1 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1889 #endif
1890 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1891 return SLJIT_SUCCESS;
1892 }
1893
1894 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1895 if (src2 == SLJIT_SCRATCH_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1896 #else
1897 if (src2 == SLJIT_SCRATCH_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1898 #endif
1899 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1900 return SLJIT_SUCCESS;
1901 }
1902
1903 if (src1 <= TMP_REGISTER) {
1904 if (src2 & SLJIT_IMM) {
1905 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1906 if (IS_HALFWORD(src2w) || compiler->mode32) {
1907 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1908 FAIL_IF(!inst);
1909 *inst = GROUP_F7;
1910 }
1911 else {
1912 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1913 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1914 FAIL_IF(!inst);
1915 *inst = TEST_rm_r;
1916 }
1917 #else
1918 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1919 FAIL_IF(!inst);
1920 *inst = GROUP_F7;
1921 #endif
1922 }
1923 else {
1924 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1925 FAIL_IF(!inst);
1926 *inst = TEST_rm_r;
1927 }
1928 return SLJIT_SUCCESS;
1929 }
1930
1931 if (src2 <= TMP_REGISTER) {
1932 if (src1 & SLJIT_IMM) {
1933 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1934 if (IS_HALFWORD(src1w) || compiler->mode32) {
1935 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1936 FAIL_IF(!inst);
1937 *inst = GROUP_F7;
1938 }
1939 else {
1940 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1941 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1942 FAIL_IF(!inst);
1943 *inst = TEST_rm_r;
1944 }
1945 #else
1946 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1947 FAIL_IF(!inst);
1948 *inst = GROUP_F7;
1949 #endif
1950 }
1951 else {
1952 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1953 FAIL_IF(!inst);
1954 *inst = TEST_rm_r;
1955 }
1956 return SLJIT_SUCCESS;
1957 }
1958
1959 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1960 if (src2 & SLJIT_IMM) {
1961 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1962 if (IS_HALFWORD(src2w) || compiler->mode32) {
1963 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1964 FAIL_IF(!inst);
1965 *inst = GROUP_F7;
1966 }
1967 else {
1968 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1969 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
1970 FAIL_IF(!inst);
1971 *inst = TEST_rm_r;
1972 }
1973 #else
1974 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1975 FAIL_IF(!inst);
1976 *inst = GROUP_F7;
1977 #endif
1978 }
1979 else {
1980 inst = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1981 FAIL_IF(!inst);
1982 *inst = TEST_rm_r;
1983 }
1984 return SLJIT_SUCCESS;
1985 }
1986
1987 static sljit_si emit_shift(struct sljit_compiler *compiler,
1988 sljit_ub mode,
1989 sljit_si dst, sljit_sw dstw,
1990 sljit_si src1, sljit_sw src1w,
1991 sljit_si src2, sljit_sw src2w)
1992 {
1993 sljit_ub* inst;
1994
1995 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1996 if (dst == src1 && dstw == src1w) {
1997 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1998 FAIL_IF(!inst);
1999 *inst |= mode;
2000 return SLJIT_SUCCESS;
2001 }
2002 if (dst == SLJIT_UNUSED) {
2003 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
2004 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
2005 FAIL_IF(!inst);
2006 *inst |= mode;
2007 return SLJIT_SUCCESS;
2008 }
2009 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2010 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
2011 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
2012 FAIL_IF(!inst);
2013 *inst |= mode;
2014 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
2015 return SLJIT_SUCCESS;
2016 }
2017 if (dst <= TMP_REGISTER) {
2018 EMIT_MOV(compiler, dst, 0, src1, src1w);
2019 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2020 FAIL_IF(!inst);
2021 *inst |= mode;
2022 return SLJIT_SUCCESS;
2023 }
2024
2025 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
2026 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
2027 FAIL_IF(!inst);
2028 *inst |= mode;
2029 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
2030 return SLJIT_SUCCESS;
2031 }
2032
2033 if (dst == SLJIT_PREF_SHIFT_REG) {
2034 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
2035 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2036 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
2037 FAIL_IF(!inst);
2038 *inst |= mode;
2039 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
2040 }
2041 else if (dst <= TMP_REGISTER && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2042 if (src1 != dst)
2043 EMIT_MOV(compiler, dst, 0, src1, src1w);
2044 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
2045 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2046 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2047 FAIL_IF(!inst);
2048 *inst |= mode;
2049 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
2050 }
2051 else {
2052 /* This case is really difficult, since ecx itself may used for
2053 addressing, and we must ensure to work even in that case. */
2054 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
2055 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2056 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2057 #else
2058 /* [esp+0] contains the flags. */
2059 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2060 #endif
2061 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2062 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
2063 FAIL_IF(!inst);
2064 *inst |= mode;
2065 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2066 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2067 #else
2068 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), sizeof(sljit_sw));
2069 #endif
2070 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
2071 }
2072
2073 return SLJIT_SUCCESS;
2074 }
2075
2076 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
2077 sljit_ub mode, sljit_si set_flags,
2078 sljit_si dst, sljit_sw dstw,
2079 sljit_si src1, sljit_sw src1w,
2080 sljit_si src2, sljit_sw src2w)
2081 {
2082 /* The CPU does not set flags if the shift count is 0. */
2083 if (src2 & SLJIT_IMM) {
2084 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2085 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2086 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2087 #else
2088 if ((src2w & 0x1f) != 0)
2089 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2090 #endif
2091 if (!set_flags)
2092 return emit_mov(compiler, dst, dstw, src1, src1w);
2093 /* OR dst, src, 0 */
2094 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2095 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2096 }
2097
2098 if (!set_flags)
2099 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2100
2101 if (!(dst <= TMP_REGISTER))
2102 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2103
2104 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2105
2106 if (dst <= TMP_REGISTER)
2107 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2108 return SLJIT_SUCCESS;
2109 }
2110
2111 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
2112 sljit_si dst, sljit_sw dstw,
2113 sljit_si src1, sljit_sw src1w,
2114 sljit_si src2, sljit_sw src2w)
2115 {
2116 CHECK_ERROR();
2117 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2118 ADJUST_LOCAL_OFFSET(dst, dstw);
2119 ADJUST_LOCAL_OFFSET(src1, src1w);
2120 ADJUST_LOCAL_OFFSET(src2, src2w);
2121
2122 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2123 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2124 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2125 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2126 compiler->mode32 = op & SLJIT_INT_OP;
2127 #endif
2128
2129 if (GET_OPCODE(op) >= SLJIT_MUL) {
2130 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2131 compiler->flags_saved = 0;
2132 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2133 FAIL_IF(emit_save_flags(compiler));
2134 }
2135
2136 switch (GET_OPCODE(op)) {
2137 case SLJIT_ADD:
2138 if (!GET_FLAGS(op)) {
2139 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2140 return compiler->error;
2141 }
2142 else
2143 compiler->flags_saved = 0;
2144 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2145 FAIL_IF(emit_save_flags(compiler));
2146 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2147 dst, dstw, src1, src1w, src2, src2w);
2148 case SLJIT_ADDC:
2149 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2150 FAIL_IF(emit_restore_flags(compiler, 1));
2151 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2152 FAIL_IF(emit_save_flags(compiler));
2153 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2154 compiler->flags_saved = 0;
2155 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2156 dst, dstw, src1, src1w, src2, src2w);
2157 case SLJIT_SUB:
2158 if (!GET_FLAGS(op)) {
2159 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2160 return compiler->error;
2161 }
2162 else
2163 compiler->flags_saved = 0;
2164 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2165 FAIL_IF(emit_save_flags(compiler));
2166 if (dst == SLJIT_UNUSED)
2167 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2168 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2169 dst, dstw, src1, src1w, src2, src2w);
2170 case SLJIT_SUBC:
2171 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2172 FAIL_IF(emit_restore_flags(compiler, 1));
2173 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2174 FAIL_IF(emit_save_flags(compiler));
2175 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2176 compiler->flags_saved = 0;
2177 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2178 dst, dstw, src1, src1w, src2, src2w);
2179 case SLJIT_MUL:
2180 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2181 case SLJIT_AND:
2182 if (dst == SLJIT_UNUSED)
2183 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2184 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2185 dst, dstw, src1, src1w, src2, src2w);
2186 case SLJIT_OR:
2187 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2188 dst, dstw, src1, src1w, src2, src2w);
2189 case SLJIT_XOR:
2190 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2191 dst, dstw, src1, src1w, src2, src2w);
2192 case SLJIT_SHL:
2193 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2194 dst, dstw, src1, src1w, src2, src2w);
2195 case SLJIT_LSHR:
2196 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2197 dst, dstw, src1, src1w, src2, src2w);
2198 case SLJIT_ASHR:
2199 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2200 dst, dstw, src1, src1w, src2, src2w);
2201 }
2202
2203 return SLJIT_SUCCESS;
2204 }
2205
2206 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
2207 {
2208 check_sljit_get_register_index(reg);
2209 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2210 if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
2211 || reg == SLJIT_SAVED_EREG1 || reg == SLJIT_SAVED_EREG2)
2212 return -1;
2213 #endif
2214 return reg_map[reg];
2215 }
2216
2217 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
2218 void *instruction, sljit_si size)
2219 {
2220 sljit_ub *inst;
2221
2222 CHECK_ERROR();
2223 check_sljit_emit_op_custom(compiler, instruction, size);
2224 SLJIT_ASSERT(size > 0 && size < 16);
2225
2226 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
2227 FAIL_IF(!inst);
2228 INC_SIZE(size);
2229 SLJIT_MEMMOVE(inst, instruction, size);
2230 return SLJIT_SUCCESS;
2231 }
2232
2233 /* --------------------------------------------------------------------- */
2234 /* Floating point operators */
2235 /* --------------------------------------------------------------------- */
2236
2237 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2238
2239 /* Alignment + 2 * 16 bytes. */
2240 static sljit_si sse2_data[3 + (4 + 4) * 2];
2241 static sljit_si *sse2_buffer;
2242
2243 static void init_compiler(void)
2244 {
2245 sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
2246 /* Single precision constants. */
2247 sse2_buffer[0] = 0x80000000;
2248 sse2_buffer[4] = 0x7fffffff;
2249 /* Double precision constants. */
2250 sse2_buffer[8] = 0;
2251 sse2_buffer[9] = 0x80000000;
2252 sse2_buffer[12] = 0xffffffff;
2253 sse2_buffer[13] = 0x7fffffff;
2254 }
2255
2256 #endif
2257
2258 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
2259 {
2260 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2261 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2262 if (cpu_has_sse2 == -1)
2263 get_cpu_features();
2264 return cpu_has_sse2;
2265 #else /* SLJIT_DETECT_SSE2 */
2266 return 1;
2267 #endif /* SLJIT_DETECT_SSE2 */
2268 #else /* SLJIT_SSE2 */
2269 return 0;
2270 #endif
2271 }
2272
2273 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2274
2275 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2276 sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2277 {
2278 sljit_ub *inst;
2279
2280 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2281 FAIL_IF(!inst);
2282 *inst++ = GROUP_0F;
2283 *inst = opcode;
2284 return SLJIT_SUCCESS;
2285 }
2286
2287 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2288 sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2289 {
2290 sljit_ub *inst;
2291
2292 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2293 FAIL_IF(!inst);
2294 *inst++ = GROUP_0F;
2295 *inst = opcode;
2296 return SLJIT_SUCCESS;
2297 }
2298
2299 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
2300 sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
2301 {
2302 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2303 }
2304
2305 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
2306 sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
2307 {
2308 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2309 }
2310
2311 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2312 sljit_si dst, sljit_sw dstw,
2313 sljit_si src, sljit_sw srcw)
2314 {
2315 sljit_si dst_r;
2316
2317 CHECK_ERROR();
2318 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2319
2320 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2321 compiler->mode32 = 1;
2322 #endif
2323
2324 if (GET_OPCODE(op) == SLJIT_CMPD) {
2325 compiler->flags_saved = 0;
2326 if (dst <= SLJIT_FLOAT_REG6)
2327 dst_r = dst;
2328 else {
2329 dst_r = TMP_FREG;
2330 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, dst, dstw));
2331 }
2332 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), dst_r, src, srcw);
2333 }
2334
2335 if (op == SLJIT_MOVD) {
2336 if (dst <= SLJIT_FLOAT_REG6)
2337 return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
2338 if (src <= SLJIT_FLOAT_REG6)
2339 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
2340 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
2341 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2342 }
2343
2344 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG6) {
2345 dst_r = dst;
2346 if (dst != src)
2347 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2348 }
2349 else {
2350 dst_r = TMP_FREG;
2351 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2352 }
2353
2354 switch (GET_OPCODE(op)) {
2355 case SLJIT_NEGD:
2356 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
2357 break;
2358
2359 case SLJIT_ABSD:
2360 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2361 break;
2362 }
2363
2364 if (dst_r == TMP_FREG)
2365 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2366 return SLJIT_SUCCESS;
2367 }
2368
2369 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2370 sljit_si dst, sljit_sw dstw,
2371 sljit_si src1, sljit_sw src1w,
2372 sljit_si src2, sljit_sw src2w)
2373 {
2374 sljit_si dst_r;
2375
2376 CHECK_ERROR();
2377 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2378
2379 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2380 compiler->mode32 = 1;
2381 #endif
2382
2383 if (dst <= SLJIT_FLOAT_REG6) {
2384 dst_r = dst;
2385 if (dst == src1)
2386 ; /* Do nothing here. */
2387 else if (dst == src2 && (op == SLJIT_ADDD || op == SLJIT_MULD)) {
2388 /* Swap arguments. */
2389 src2 = src1;
2390 src2w = src1w;
2391 }
2392 else if (dst != src2)
2393 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
2394 else {
2395 dst_r = TMP_FREG;
2396 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2397 }
2398 }
2399 else {
2400 dst_r = TMP_FREG;
2401 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2402 }
2403
2404 switch (GET_OPCODE(op)) {
2405 case SLJIT_ADDD:
2406 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2407 break;
2408
2409 case SLJIT_SUBD:
2410 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2411 break;
2412
2413 case SLJIT_MULD:
2414 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2415 break;
2416
2417 case SLJIT_DIVD:
2418 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2419 break;
2420 }
2421
2422 if (dst_r == TMP_FREG)
2423 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2424 return SLJIT_SUCCESS;
2425 }
2426
2427 #else
2428
2429 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2430 sljit_si dst, sljit_sw dstw,
2431 sljit_si src, sljit_sw srcw)
2432 {
2433 CHECK_ERROR();
2434 /* Should cause an assertion fail. */
2435 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2436 compiler->error = SLJIT_ERR_UNSUPPORTED;
2437 return SLJIT_ERR_UNSUPPORTED;
2438 }
2439
2440 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2441 sljit_si dst, sljit_sw dstw,
2442 sljit_si src1, sljit_sw src1w,
2443 sljit_si src2, sljit_sw src2w)
2444 {
2445 CHECK_ERROR();
2446 /* Should cause an assertion fail. */
2447 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2448 compiler->error = SLJIT_ERR_UNSUPPORTED;
2449 return SLJIT_ERR_UNSUPPORTED;
2450 }
2451
2452 #endif
2453
2454 /* --------------------------------------------------------------------- */
2455 /* Conditional instructions */
2456 /* --------------------------------------------------------------------- */
2457
2458 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2459 {
2460 sljit_ub *inst;
2461 struct sljit_label *label;
2462
2463 CHECK_ERROR_PTR();
2464 check_sljit_emit_label(compiler);
2465
2466 /* We should restore the flags before the label,
2467 since other taken jumps has their own flags as well. */
2468 if (SLJIT_UNLIKELY(compiler->flags_saved))
2469 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2470
2471 if (compiler->last_label && compiler->last_label->size == compiler->size)
2472 return compiler->last_label;
2473
2474 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2475 PTR_FAIL_IF(!label);
2476 set_label(label, compiler);
2477
2478 inst = (sljit_ub*)ensure_buf(compiler, 2);
2479 PTR_FAIL_IF(!inst);
2480
2481 *inst++ = 0;
2482 *inst++ = 0;
2483
2484 return label;
2485 }
2486
2487 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2488 {
2489 sljit_ub *inst;
2490 struct sljit_jump *jump;
2491
2492 CHECK_ERROR_PTR();
2493 check_sljit_emit_jump(compiler, type);
2494
2495 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2496 if ((type & 0xff) <= SLJIT_JUMP)
2497 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2498 compiler->flags_saved = 0;
2499 }
2500
2501 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2502 PTR_FAIL_IF_NULL(jump);
2503 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2504 type &= 0xff;
2505
2506 if (type >= SLJIT_CALL1)
2507 PTR_FAIL_IF(call_with_args(compiler, type));
2508
2509 /* Worst case size. */
2510 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2511 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2512 #else
2513 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2514 #endif
2515
2516 inst = (sljit_ub*)ensure_buf(compiler, 2);
2517 PTR_FAIL_IF_NULL(inst);
2518
2519 *inst++ = 0;
2520 *inst++ = type + 4;
2521 return jump;
2522 }
2523
2524 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2525 {
2526 sljit_ub *inst;
2527 struct sljit_jump *jump;
2528
2529 CHECK_ERROR();
2530 check_sljit_emit_ijump(compiler, type, src, srcw);
2531 ADJUST_LOCAL_OFFSET(src, srcw);
2532
2533 CHECK_EXTRA_REGS(src, srcw, (void)0);
2534
2535 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2536 if (type <= SLJIT_JUMP)
2537 FAIL_IF(emit_restore_flags(compiler, 0));
2538 compiler->flags_saved = 0;
2539 }
2540
2541 if (type >= SLJIT_CALL1) {
2542 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2543 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2544 if (src == SLJIT_SCRATCH_REG3) {
2545 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2546 src = TMP_REGISTER;
2547 }
2548 if (src == SLJIT_MEM1(SLJIT_LOCALS_REG) && type >= SLJIT_CALL3)
2549 srcw += sizeof(sljit_sw);
2550 #endif
2551 #endif
2552 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2553 if (src == SLJIT_SCRATCH_REG3) {
2554 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2555 src = TMP_REGISTER;
2556 }
2557 #endif
2558 FAIL_IF(call_with_args(compiler, type));
2559 }
2560
2561 if (src == SLJIT_IMM) {
2562 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2563 FAIL_IF_NULL(jump);
2564 set_jump(jump, compiler, JUMP_ADDR);
2565 jump->u.target = srcw;
2566
2567 /* Worst case size. */
2568 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2569 compiler->size += 5;
2570 #else
2571 compiler->size += 10 + 3;
2572 #endif
2573
2574 inst = (sljit_ub*)ensure_buf(compiler, 2);
2575 FAIL_IF_NULL(inst);
2576
2577 *inst++ = 0;
2578 *inst++ = type + 4;
2579 }
2580 else {
2581 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2582 /* REX_W is not necessary (src is not immediate). */
2583 compiler->mode32 = 1;
2584 #endif
2585 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2586 FAIL_IF(!inst);
2587 *inst++ = GROUP_FF;
2588 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2589 }
2590 return SLJIT_SUCCESS;
2591 }
2592
2593 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2594 sljit_si dst, sljit_sw dstw,
2595 sljit_si src, sljit_sw srcw,
2596 sljit_si type)
2597 {
2598 sljit_ub *inst;
2599 sljit_ub cond_set = 0;
2600 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2601 sljit_si reg;
2602 #else
2603 /* CHECK_EXTRA_REGS migh overwrite these values. */
2604 sljit_si dst_save = dst;
2605 sljit_sw dstw_save = dstw;
2606 #endif
2607
2608 CHECK_ERROR();
2609 check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type);
2610
2611 if (dst == SLJIT_UNUSED)
2612 return SLJIT_SUCCESS;
2613
2614 ADJUST_LOCAL_OFFSET(dst, dstw);
2615 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2616 if (SLJIT_UNLIKELY(compiler->flags_saved))
2617 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2618
2619 /* setcc = jcc + 0x10. */
2620 cond_set = get_jump_code(type) + 0x10;
2621
2622 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2623 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && dst <= TMP_REGISTER && dst == src) {
2624 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
2625 FAIL_IF(!inst);
2626 INC_SIZE(4 + 3);
2627 /* Set low register to conditional flag. */
2628 *inst++ = (reg_map[TMP_REGISTER] <= 7) ? REX : REX_B;
2629 *inst++ = GROUP_0F;
2630 *inst++ = cond_set;
2631 *inst++ = MOD_REG | reg_lmap[TMP_REGISTER];
2632 *inst++ = REX | (reg_map[TMP_REGISTER] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2633 *inst++ = OR_rm8_r8;
2634 *inst++ = MOD_REG | (reg_lmap[TMP_REGISTER] << 3) | reg_lmap[dst];
2635 return SLJIT_SUCCESS;
2636 }
2637
2638 reg = (op == SLJIT_MOV && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
2639
2640 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2641 FAIL_IF(!inst);
2642 INC_SIZE(4 + 4);
2643 /* Set low register to conditional flag. */
2644 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2645 *inst++ = GROUP_0F;
2646 *inst++ = cond_set;
2647 *inst++ = MOD_REG | reg_lmap[reg];
2648 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2649 *inst++ = GROUP_0F;
2650 *inst++ = MOVZX_r_rm8;
2651 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2652
2653 if (reg != TMP_REGISTER)
2654 return SLJIT_SUCCESS;
2655
2656 if (GET_OPCODE(op) < SLJIT_ADD) {
2657 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2658 return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
2659 }
2660 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2661 compiler->skip_checks = 1;
2662 #endif
2663 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2664 #else /* SLJIT_CONFIG_X86_64 */
2665 if (GET_OPCODE(op) < SLJIT_ADD && dst <= TMP_REGISTER) {
2666 if (reg_map[dst] <= 4) {
2667 /* Low byte is accessible. */
2668 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2669 FAIL_IF(!inst);
2670 INC_SIZE(3 + 3);
2671 /* Set low byte to conditional flag. */
2672 *inst++ = GROUP_0F;
2673 *inst++ = cond_set;
2674 *inst++ = MOD_REG | reg_map[dst];
2675
2676 *inst++ = GROUP_0F;
2677 *inst++ = MOVZX_r_rm8;
2678 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2679 return SLJIT_SUCCESS;
2680 }
2681
2682 /* Low byte is not accessible. */
2683 if (cpu_has_cmov == -1)
2684 get_cpu_features();
2685
2686 if (cpu_has_cmov) {
2687 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_IMM, 1);
2688 /* a xor reg, reg operation would overwrite the flags. */
2689 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2690
2691 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2692 FAIL_IF(!inst);
2693 INC_SIZE(3);
2694
2695 *inst++ = GROUP_0F;
2696 /* cmovcc = setcc - 0x50. */
2697 *inst++ = cond_set - 0x50;
2698 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REGISTER];
2699 return SLJIT_SUCCESS;
2700 }
2701
2702 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2703 FAIL_IF(!inst);
2704 INC_SIZE(1 + 3 + 3 + 1);
2705 *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
2706 /* Set al to conditional flag. */
2707 *inst++ = GROUP_0F;
2708 *inst++ = cond_set;
2709 *inst++ = MOD_REG | 0 /* eax */;
2710
2711 *inst++ = GROUP_0F;
2712 *inst++ = MOVZX_r_rm8;
2713 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2714 *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
2715 return SLJIT_SUCCESS;
2716 }
2717
2718 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && dst <= TMP_REGISTER && dst == src && reg_map[dst] <= 4) {
2719 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_SCRATCH_REG1] == 0, scratch_reg1_must_be_eax);
2720 if (dst != SLJIT_SCRATCH_REG1) {
2721 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2722 FAIL_IF(!inst);
2723 INC_SIZE(1 + 3 + 2 + 1);
2724 /* Set low register to conditional flag. */
2725 *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
2726 *inst++ = GROUP_0F;
2727 *inst++ = cond_set;
2728 *inst++ = MOD_REG | 0 /* eax */;
2729 *inst++ = OR_rm8_r8;
2730 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2731 *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
2732 }
2733 else {
2734 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2735 FAIL_IF(!inst);
2736 INC_SIZE(2 + 3 + 2 + 2);
2737 /* Set low register to conditional flag. */
2738 *inst++ = XCHG_r_rm;
2739 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REGISTER];
2740 *inst++ = GROUP_0F;
2741 *inst++ = cond_set;
2742 *inst++ = MOD_REG | 1 /* ecx */;
2743 *inst++ = OR_rm8_r8;
2744 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2745 *inst++ = XCHG_r_rm;
2746 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REGISTER];
2747 }
2748 return SLJIT_SUCCESS;
2749 }
2750
2751 /* Set TMP_REGISTER to the bit. */
2752 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2753 FAIL_IF(!inst);
2754 INC_SIZE(1 + 3 + 3 + 1);
2755 *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
2756 /* Set al to conditional flag. */
2757 *inst++ = GROUP_0F;
2758 *inst++ = cond_set;
2759 *inst++ = MOD_REG | 0 /* eax */;
2760
2761 *inst++ = GROUP_0F;
2762 *inst++ = MOVZX_r_rm8;
2763 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2764
2765 *inst++ = XCHG_EAX_r + reg_map[TMP_REGISTER];
2766
2767 if (GET_OPCODE(op) < SLJIT_ADD)
2768 return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
2769
2770 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2771 compiler->skip_checks = 1;
2772 #endif
2773 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REGISTER, 0);
2774 #endif /* SLJIT_CONFIG_X86_64 */
2775 }
2776
2777 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
2778 {
2779 CHECK_ERROR();
2780 check_sljit_get_local_base(compiler, dst, dstw, offset);
2781 ADJUST_LOCAL_OFFSET(dst, dstw);
2782
2783 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2784
2785 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2786 compiler->mode32 = 0;
2787 #endif
2788
2789 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_LOCALS_REG), offset);
2790
2791 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2792 if (NOT_HALFWORD(offset)) {
2793 FAIL_IF(emit_load_imm64(compiler, TMP_REGISTER, offset));
2794 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2795 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0) != SLJIT_ERR_UNSUPPORTED);
2796 return compiler->error;
2797 #else
2798 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, TMP_REGISTER, 0);
2799 #endif
2800 }
2801 #endif
2802
2803 if (offset != 0)
2804 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_LOCALS_REG, 0, SLJIT_IMM, offset);
2805 return emit_mov(compiler, dst, dstw, SLJIT_LOCALS_REG, 0);
2806 }
2807
2808 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2809 {
2810 sljit_ub *inst;
2811 struct sljit_const *const_;
2812 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2813 sljit_si reg;
2814 #endif
2815
2816 CHECK_ERROR_PTR();
2817 check_sljit_emit_const(compiler, dst, dstw, init_value);
2818 ADJUST_LOCAL_OFFSET(dst, dstw);
2819
2820 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2821
2822 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2823 PTR_FAIL_IF(!const_);
2824 set_const(const_, compiler);
2825
2826 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2827 compiler->mode32 = 0;
2828 reg = (dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
2829
2830 if (emit_load_imm64(compiler, reg, init_value))
2831 return NULL;
2832 #else
2833 if (dst == SLJIT_UNUSED)
2834 dst = TMP_REGISTER;
2835
2836 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2837 return NULL;
2838 #endif
2839
2840 inst = (sljit_ub*)ensure_buf(compiler, 2);
2841 PTR_FAIL_IF(!inst);
2842
2843 *inst++ = 0;
2844 *inst++ = 1;
2845
2846 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2847 if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
2848 if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
2849 return NULL;
2850 #endif
2851
2852 return const_;
2853 }
2854
2855 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2856 {
2857 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2858 *(sljit_sw*)addr = new_addr - (addr + 4);
2859 #else
2860 *(sljit_uw*)addr = new_addr;
2861 #endif
2862 }
2863
2864 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2865 {
2866 *(sljit_sw*)addr = new_constant;
2867 }

  ViewVC Help
Powered by ViewVC 1.1.5