/[pcre]/code/trunk/sljit/sljitNativeX86_common.c
ViewVC logotype

Contents of /code/trunk/sljit/sljitNativeX86_common.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 867 - (show annotations)
Fri Jan 13 13:34:07 2012 UTC (7 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 79903 byte(s)
JIT compiler update
1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
28 {
29 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
30 return "x86-32";
31 #else
32 return "x86-64";
33 #endif
34 }
35
36 /*
37 32b register indexes:
38 0 - EAX
39 1 - ECX
40 2 - EDX
41 3 - EBX
42 4 - none
43 5 - EBP
44 6 - ESI
45 7 - EDI
46 */
47
48 /*
49 64b register indexes:
50 0 - RAX
51 1 - RCX
52 2 - RDX
53 3 - RBX
54 4 - none
55 5 - RBP
56 6 - RSI
57 7 - RDI
58 8 - R8 - From now on REX prefix is required
59 9 - R9
60 10 - R10
61 11 - R11
62 12 - R12
63 13 - R13
64 14 - R14
65 15 - R15
66 */
67
68 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
69
70 /* Last register + 1. */
71 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
72
73 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
74 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
75 };
76
77 #define CHECK_EXTRA_REGS(p, w, do) \
78 if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
79 w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \
80 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
81 do; \
82 } \
83 else if (p >= SLJIT_GENERAL_EREG1 && p <= SLJIT_GENERAL_EREG2) { \
84 w = compiler->generals_start + (p - SLJIT_GENERAL_EREG1) * sizeof(sljit_w); \
85 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
86 do; \
87 }
88
89 #else /* SLJIT_CONFIG_X86_32 */
90
91 /* Last register + 1. */
92 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
93 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
94 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
95
96 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
97 Note: avoid to use r12 and r13 for memory addessing
98 therefore r12 is better for GENERAL_EREG than GENERAL_REG. */
99 #ifndef _WIN64
100 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
101 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
102 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
103 };
104 /* low-map. reg_map & 0x7. */
105 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
106 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1
107 };
108 #else
109 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
110 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
111 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 12, 15, 10, 8, 9
112 };
113 /* low-map. reg_map & 0x7. */
114 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
115 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 4, 7, 2, 0, 1
116 };
117 #endif
118
119 #define REX_W 0x48
120 #define REX_R 0x44
121 #define REX_X 0x42
122 #define REX_B 0x41
123 #define REX 0x40
124
125 typedef unsigned int sljit_uhw;
126 typedef int sljit_hw;
127
128 #define IS_HALFWORD(x) ((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
129 #define NOT_HALFWORD(x) ((x) > 0x7fffffffll || (x) < -0x80000000ll)
130
131 #define CHECK_EXTRA_REGS(p, w, do)
132
133 #endif /* SLJIT_CONFIG_X86_32 */
134
135 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
136 #define TMP_FREG (SLJIT_FLOAT_REG4 + 1)
137 #endif
138
139 /* Size flags for emit_x86_instruction: */
140 #define EX86_BIN_INS 0x0010
141 #define EX86_SHIFT_INS 0x0020
142 #define EX86_REX 0x0040
143 #define EX86_NO_REXW 0x0080
144 #define EX86_BYTE_ARG 0x0100
145 #define EX86_HALF_ARG 0x0200
146 #define EX86_PREF_66 0x0400
147
148 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
149 #define EX86_PREF_F2 0x0800
150 #define EX86_SSE2 0x1000
151 #endif
152
153 #define INC_SIZE(s) (*buf++ = (s), compiler->size += (s))
154 #define INC_CSIZE(s) (*code++ = (s), compiler->size += (s))
155
156 #define PUSH_REG(r) (*buf++ = (0x50 + (r)))
157 #define POP_REG(r) (*buf++ = (0x58 + (r)))
158 #define RET() (*buf++ = (0xc3))
159 #define RETN(n) (*buf++ = (0xc2), *buf++ = n, *buf++ = 0)
160 /* r32, r/m32 */
161 #define MOV_RM(mod, reg, rm) (*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm))
162
163 static sljit_ub get_jump_code(int type)
164 {
165 switch (type) {
166 case SLJIT_C_EQUAL:
167 case SLJIT_C_FLOAT_EQUAL:
168 return 0x84;
169
170 case SLJIT_C_NOT_EQUAL:
171 case SLJIT_C_FLOAT_NOT_EQUAL:
172 return 0x85;
173
174 case SLJIT_C_LESS:
175 case SLJIT_C_FLOAT_LESS:
176 return 0x82;
177
178 case SLJIT_C_GREATER_EQUAL:
179 case SLJIT_C_FLOAT_GREATER_EQUAL:
180 return 0x83;
181
182 case SLJIT_C_GREATER:
183 case SLJIT_C_FLOAT_GREATER:
184 return 0x87;
185
186 case SLJIT_C_LESS_EQUAL:
187 case SLJIT_C_FLOAT_LESS_EQUAL:
188 return 0x86;
189
190 case SLJIT_C_SIG_LESS:
191 return 0x8c;
192
193 case SLJIT_C_SIG_GREATER_EQUAL:
194 return 0x8d;
195
196 case SLJIT_C_SIG_GREATER:
197 return 0x8f;
198
199 case SLJIT_C_SIG_LESS_EQUAL:
200 return 0x8e;
201
202 case SLJIT_C_OVERFLOW:
203 case SLJIT_C_MUL_OVERFLOW:
204 return 0x80;
205
206 case SLJIT_C_NOT_OVERFLOW:
207 case SLJIT_C_MUL_NOT_OVERFLOW:
208 return 0x81;
209
210 case SLJIT_C_FLOAT_NAN:
211 return 0x8a;
212
213 case SLJIT_C_FLOAT_NOT_NAN:
214 return 0x8b;
215 }
216 return 0;
217 }
218
219 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type);
220
221 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
222 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type);
223 #endif
224
225 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type)
226 {
227 int short_jump;
228 sljit_uw label_addr;
229
230 if (jump->flags & JUMP_LABEL)
231 label_addr = (sljit_uw)(code + jump->u.label->size);
232 else
233 label_addr = jump->u.target;
234 short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127;
235
236 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
237 if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll)
238 return generate_far_jump_code(jump, code_ptr, type);
239 #endif
240
241 if (type == SLJIT_JUMP) {
242 if (short_jump)
243 *code_ptr++ = 0xeb;
244 else
245 *code_ptr++ = 0xe9;
246 jump->addr++;
247 }
248 else if (type >= SLJIT_FAST_CALL) {
249 short_jump = 0;
250 *code_ptr++ = 0xe8;
251 jump->addr++;
252 }
253 else if (short_jump) {
254 *code_ptr++ = get_jump_code(type) - 0x10;
255 jump->addr++;
256 }
257 else {
258 *code_ptr++ = 0x0f;
259 *code_ptr++ = get_jump_code(type);
260 jump->addr += 2;
261 }
262
263 if (short_jump) {
264 jump->flags |= PATCH_MB;
265 code_ptr += sizeof(sljit_b);
266 } else {
267 jump->flags |= PATCH_MW;
268 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
269 code_ptr += sizeof(sljit_w);
270 #else
271 code_ptr += sizeof(sljit_hw);
272 #endif
273 }
274
275 return code_ptr;
276 }
277
278 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
279 {
280 struct sljit_memory_fragment *buf;
281 sljit_ub *code;
282 sljit_ub *code_ptr;
283 sljit_ub *buf_ptr;
284 sljit_ub *buf_end;
285 sljit_ub len;
286
287 struct sljit_label *label;
288 struct sljit_jump *jump;
289 struct sljit_const *const_;
290
291 CHECK_ERROR_PTR();
292 check_sljit_generate_code(compiler);
293 reverse_buf(compiler);
294
295 /* Second code generation pass. */
296 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
297 PTR_FAIL_WITH_EXEC_IF(code);
298 buf = compiler->buf;
299
300 code_ptr = code;
301 label = compiler->labels;
302 jump = compiler->jumps;
303 const_ = compiler->consts;
304 do {
305 buf_ptr = buf->memory;
306 buf_end = buf_ptr + buf->used_size;
307 do {
308 len = *buf_ptr++;
309 if (len > 0) {
310 /* The code is already generated. */
311 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
312 code_ptr += len;
313 buf_ptr += len;
314 }
315 else {
316 if (*buf_ptr >= 4) {
317 jump->addr = (sljit_uw)code_ptr;
318 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
319 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
320 else
321 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
322 jump = jump->next;
323 }
324 else if (*buf_ptr == 0) {
325 label->addr = (sljit_uw)code_ptr;
326 label->size = code_ptr - code;
327 label = label->next;
328 }
329 else if (*buf_ptr == 1) {
330 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w);
331 const_ = const_->next;
332 }
333 else {
334 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
335 *code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
336 buf_ptr++;
337 *(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w));
338 code_ptr += sizeof(sljit_w);
339 buf_ptr += sizeof(sljit_w) - 1;
340 #else
341 code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr);
342 buf_ptr += sizeof(sljit_w);
343 #endif
344 }
345 buf_ptr++;
346 }
347 } while (buf_ptr < buf_end);
348 SLJIT_ASSERT(buf_ptr == buf_end);
349 buf = buf->next;
350 } while (buf);
351
352 SLJIT_ASSERT(!label);
353 SLJIT_ASSERT(!jump);
354 SLJIT_ASSERT(!const_);
355
356 jump = compiler->jumps;
357 while (jump) {
358 if (jump->flags & PATCH_MB) {
359 SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127);
360 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b)));
361 } else if (jump->flags & PATCH_MW) {
362 if (jump->flags & JUMP_LABEL) {
363 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
364 *(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w)));
365 #else
366 SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
367 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw)));
368 #endif
369 }
370 else {
371 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
372 *(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w)));
373 #else
374 SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
375 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw)));
376 #endif
377 }
378 }
379 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
380 else if (jump->flags & PATCH_MD)
381 *(sljit_w*)jump->addr = jump->u.label->addr;
382 #endif
383
384 jump = jump->next;
385 }
386
387 /* Maybe we waste some space because of short jumps. */
388 SLJIT_ASSERT(code_ptr <= code + compiler->size);
389 compiler->error = SLJIT_ERR_COMPILED;
390 compiler->executable_size = compiler->size;
391 return (void*)code;
392 }
393
394 /* --------------------------------------------------------------------- */
395 /* Operators */
396 /* --------------------------------------------------------------------- */
397
398 static int emit_cum_binary(struct sljit_compiler *compiler,
399 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
400 int dst, sljit_w dstw,
401 int src1, sljit_w src1w,
402 int src2, sljit_w src2w);
403
404 static int emit_non_cum_binary(struct sljit_compiler *compiler,
405 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
406 int dst, sljit_w dstw,
407 int src1, sljit_w src1w,
408 int src2, sljit_w src2w);
409
410 static int emit_mov(struct sljit_compiler *compiler,
411 int dst, sljit_w dstw,
412 int src, sljit_w srcw);
413
414 static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler)
415 {
416 sljit_ub *buf;
417
418 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
419 buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
420 FAIL_IF(!buf);
421 INC_SIZE(5);
422 *buf++ = 0x9c; /* pushfd */
423 #else
424 buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
425 FAIL_IF(!buf);
426 INC_SIZE(6);
427 *buf++ = 0x9c; /* pushfq */
428 *buf++ = 0x48;
429 #endif
430 *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
431 *buf++ = 0x64;
432 *buf++ = 0x24;
433 *buf++ = sizeof(sljit_w);
434 compiler->flags_saved = 1;
435 return SLJIT_SUCCESS;
436 }
437
438 static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags)
439 {
440 sljit_ub *buf;
441
442 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
443 buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
444 FAIL_IF(!buf);
445 INC_SIZE(5);
446 #else
447 buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
448 FAIL_IF(!buf);
449 INC_SIZE(6);
450 *buf++ = 0x48;
451 #endif
452 *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
453 *buf++ = 0x64;
454 *buf++ = 0x24;
455 *buf++ = (sljit_ub)-(int)sizeof(sljit_w);
456 *buf++ = 0x9d; /* popfd / popfq */
457 compiler->flags_saved = keep_flags;
458 return SLJIT_SUCCESS;
459 }
460
461 #ifdef _WIN32
462 #include <malloc.h>
463
464 static void SLJIT_CALL sljit_touch_stack(sljit_w local_size)
465 {
466 /* Workaround for calling _chkstk. */
467 alloca(local_size);
468 }
469 #endif
470
471 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
472 #include "sljitNativeX86_32.c"
473 #else
474 #include "sljitNativeX86_64.c"
475 #endif
476
477 static int emit_mov(struct sljit_compiler *compiler,
478 int dst, sljit_w dstw,
479 int src, sljit_w srcw)
480 {
481 sljit_ub* code;
482
483 if (dst == SLJIT_UNUSED) {
484 /* No destination, doesn't need to setup flags. */
485 if (src & SLJIT_MEM) {
486 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
487 FAIL_IF(!code);
488 *code = 0x8b;
489 }
490 return SLJIT_SUCCESS;
491 }
492 if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
493 code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
494 FAIL_IF(!code);
495 *code = 0x89;
496 return SLJIT_SUCCESS;
497 }
498 if (src & SLJIT_IMM) {
499 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
500 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
501 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
502 #else
503 if (!compiler->mode32) {
504 if (NOT_HALFWORD(srcw))
505 return emit_load_imm64(compiler, dst, srcw);
506 }
507 else
508 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw);
509 #endif
510 }
511 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
512 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
513 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
514 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
515 FAIL_IF(!code);
516 *code = 0x89;
517 return SLJIT_SUCCESS;
518 }
519 #endif
520 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
521 FAIL_IF(!code);
522 *code = 0xc7;
523 return SLJIT_SUCCESS;
524 }
525 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
526 code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
527 FAIL_IF(!code);
528 *code = 0x8b;
529 return SLJIT_SUCCESS;
530 }
531
532 /* Memory to memory move. Requires two instruction. */
533 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
534 FAIL_IF(!code);
535 *code = 0x8b;
536 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
537 FAIL_IF(!code);
538 *code = 0x89;
539 return SLJIT_SUCCESS;
540 }
541
542 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
543 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
544
545 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
546 {
547 sljit_ub *buf;
548 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
549 int size;
550 #endif
551
552 CHECK_ERROR();
553 check_sljit_emit_op0(compiler, op);
554
555 switch (GET_OPCODE(op)) {
556 case SLJIT_BREAKPOINT:
557 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
558 FAIL_IF(!buf);
559 INC_SIZE(1);
560 *buf = 0xcc;
561 break;
562 case SLJIT_NOP:
563 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
564 FAIL_IF(!buf);
565 INC_SIZE(1);
566 *buf = 0x90;
567 break;
568 case SLJIT_UMUL:
569 case SLJIT_SMUL:
570 case SLJIT_UDIV:
571 case SLJIT_SDIV:
572 compiler->flags_saved = 0;
573 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
574 #ifdef _WIN64
575 SLJIT_COMPILE_ASSERT(
576 reg_map[SLJIT_TEMPORARY_REG1] == 0
577 && reg_map[SLJIT_TEMPORARY_REG2] == 2
578 && reg_map[TMP_REGISTER] > 7,
579 invalid_register_assignment_for_div_mul);
580 #else
581 SLJIT_COMPILE_ASSERT(
582 reg_map[SLJIT_TEMPORARY_REG1] == 0
583 && reg_map[SLJIT_TEMPORARY_REG2] < 7
584 && reg_map[TMP_REGISTER] == 2,
585 invalid_register_assignment_for_div_mul);
586 #endif
587 compiler->mode32 = op & SLJIT_INT_OP;
588 #endif
589
590 op = GET_OPCODE(op);
591 if (op == SLJIT_UDIV) {
592 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
593 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
594 buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
595 #else
596 buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
597 #endif
598 FAIL_IF(!buf);
599 *buf = 0x33;
600 }
601
602 if (op == SLJIT_SDIV) {
603 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
604 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
605 #endif
606
607 /* CDQ instruction */
608 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
609 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
610 FAIL_IF(!buf);
611 INC_SIZE(1);
612 *buf = 0x99;
613 #else
614 if (compiler->mode32) {
615 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
616 FAIL_IF(!buf);
617 INC_SIZE(1);
618 *buf = 0x99;
619 } else {
620 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
621 FAIL_IF(!buf);
622 INC_SIZE(2);
623 *buf++ = REX_W;
624 *buf = 0x99;
625 }
626 #endif
627 }
628
629 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
630 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
631 FAIL_IF(!buf);
632 INC_SIZE(2);
633 *buf++ = 0xf7;
634 *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
635 #else
636 #ifdef _WIN64
637 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
638 #else
639 size = (!compiler->mode32) ? 3 : 2;
640 #endif
641 buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
642 FAIL_IF(!buf);
643 INC_SIZE(size);
644 #ifdef _WIN64
645 if (!compiler->mode32)
646 *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
647 else if (op >= SLJIT_UDIV)
648 *buf++ = REX_B;
649 *buf++ = 0xf7;
650 *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
651 #else
652 if (!compiler->mode32)
653 *buf++ = REX_W;
654 *buf++ = 0xf7;
655 *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
656 #endif
657 #endif
658 switch (op) {
659 case SLJIT_UMUL:
660 *buf |= 4 << 3;
661 break;
662 case SLJIT_SMUL:
663 *buf |= 5 << 3;
664 break;
665 case SLJIT_UDIV:
666 *buf |= 6 << 3;
667 break;
668 case SLJIT_SDIV:
669 *buf |= 7 << 3;
670 break;
671 }
672 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
673 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
674 #endif
675 break;
676 }
677
678 return SLJIT_SUCCESS;
679 }
680
681 #define ENCODE_PREFIX(prefix) \
682 do { \
683 code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
684 FAIL_IF(!code); \
685 INC_CSIZE(1); \
686 *code = (prefix); \
687 } while (0)
688
689 static int emit_mov_byte(struct sljit_compiler *compiler, int sign,
690 int dst, sljit_w dstw,
691 int src, sljit_w srcw)
692 {
693 sljit_ub* code;
694 int dst_r;
695 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
696 int work_r;
697 #endif
698
699 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
700 compiler->mode32 = 0;
701 #endif
702
703 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
704 return SLJIT_SUCCESS; /* Empty instruction. */
705
706 if (src & SLJIT_IMM) {
707 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
708 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
709 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
710 #else
711 return emit_load_imm64(compiler, dst, srcw);
712 #endif
713 }
714 code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
715 FAIL_IF(!code);
716 *code = 0xc6;
717 return SLJIT_SUCCESS;
718 }
719
720 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
721
722 if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
723 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
724 if (reg_map[src] >= 4) {
725 SLJIT_ASSERT(dst_r == TMP_REGISTER);
726 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
727 } else
728 dst_r = src;
729 #else
730 dst_r = src;
731 #endif
732 }
733 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
734 else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) {
735 /* src, dst are registers. */
736 SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER);
737 if (reg_map[dst] < 4) {
738 if (dst != src)
739 EMIT_MOV(compiler, dst, 0, src, 0);
740 code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
741 FAIL_IF(!code);
742 *code++ = 0x0f;
743 *code = sign ? 0xbe : 0xb6;
744 }
745 else {
746 if (dst != src)
747 EMIT_MOV(compiler, dst, 0, src, 0);
748 if (sign) {
749 /* shl reg, 24 */
750 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
751 FAIL_IF(!code);
752 *code |= 0x4 << 3;
753 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
754 FAIL_IF(!code);
755 /* shr/sar reg, 24 */
756 *code |= 0x7 << 3;
757 }
758 else {
759 /* and dst, 0xff */
760 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0);
761 FAIL_IF(!code);
762 *(code + 1) |= 0x4 << 3;
763 }
764 }
765 return SLJIT_SUCCESS;
766 }
767 #endif
768 else {
769 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
770 code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
771 FAIL_IF(!code);
772 *code++ = 0x0f;
773 *code = sign ? 0xbe : 0xb6;
774 }
775
776 if (dst & SLJIT_MEM) {
777 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
778 if (dst_r == TMP_REGISTER) {
779 /* Find a non-used register, whose reg_map[src] < 4. */
780 if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) {
781 if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4))
782 work_r = SLJIT_TEMPORARY_REG3;
783 else
784 work_r = SLJIT_TEMPORARY_REG2;
785 }
786 else {
787 if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
788 work_r = SLJIT_TEMPORARY_REG1;
789 else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2)
790 work_r = SLJIT_TEMPORARY_REG3;
791 else
792 work_r = SLJIT_TEMPORARY_REG2;
793 }
794
795 if (work_r == SLJIT_TEMPORARY_REG1) {
796 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
797 }
798 else {
799 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
800 FAIL_IF(!code);
801 *code = 0x87;
802 }
803
804 code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
805 FAIL_IF(!code);
806 *code = 0x88;
807
808 if (work_r == SLJIT_TEMPORARY_REG1) {
809 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
810 }
811 else {
812 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
813 FAIL_IF(!code);
814 *code = 0x87;
815 }
816 }
817 else {
818 code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
819 FAIL_IF(!code);
820 *code = 0x88;
821 }
822 #else
823 code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
824 FAIL_IF(!code);
825 *code = 0x88;
826 #endif
827 }
828
829 return SLJIT_SUCCESS;
830 }
831
832 static int emit_mov_half(struct sljit_compiler *compiler, int sign,
833 int dst, sljit_w dstw,
834 int src, sljit_w srcw)
835 {
836 sljit_ub* code;
837 int dst_r;
838
839 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
840 compiler->mode32 = 0;
841 #endif
842
843 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
844 return SLJIT_SUCCESS; /* Empty instruction. */
845
846 if (src & SLJIT_IMM) {
847 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
848 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
849 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
850 #else
851 return emit_load_imm64(compiler, dst, srcw);
852 #endif
853 }
854 code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
855 FAIL_IF(!code);
856 *code = 0xc7;
857 return SLJIT_SUCCESS;
858 }
859
860 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
861
862 if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS))
863 dst_r = src;
864 else {
865 code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
866 FAIL_IF(!code);
867 *code++ = 0x0f;
868 *code = sign ? 0xbf : 0xb7;
869 }
870
871 if (dst & SLJIT_MEM) {
872 code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
873 FAIL_IF(!code);
874 *code = 0x89;
875 }
876
877 return SLJIT_SUCCESS;
878 }
879
880 static int emit_unary(struct sljit_compiler *compiler, int un_index,
881 int dst, sljit_w dstw,
882 int src, sljit_w srcw)
883 {
884 sljit_ub* code;
885
886 if (dst == SLJIT_UNUSED) {
887 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
888 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
889 FAIL_IF(!code);
890 *code++ = 0xf7;
891 *code |= (un_index) << 3;
892 return SLJIT_SUCCESS;
893 }
894 if (dst == src && dstw == srcw) {
895 /* Same input and output */
896 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
897 FAIL_IF(!code);
898 *code++ = 0xf7;
899 *code |= (un_index) << 3;
900 return SLJIT_SUCCESS;
901 }
902 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
903 EMIT_MOV(compiler, dst, 0, src, srcw);
904 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
905 FAIL_IF(!code);
906 *code++ = 0xf7;
907 *code |= (un_index) << 3;
908 return SLJIT_SUCCESS;
909 }
910 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
911 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
912 FAIL_IF(!code);
913 *code++ = 0xf7;
914 *code |= (un_index) << 3;
915 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
916 return SLJIT_SUCCESS;
917 }
918
919 static int emit_not_with_flags(struct sljit_compiler *compiler,
920 int dst, sljit_w dstw,
921 int src, sljit_w srcw)
922 {
923 sljit_ub* code;
924
925 if (dst == SLJIT_UNUSED) {
926 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
927 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
928 FAIL_IF(!code);
929 *code++ = 0xf7;
930 *code |= 0x2 << 3;
931 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
932 FAIL_IF(!code);
933 *code = 0x0b;
934 return SLJIT_SUCCESS;
935 }
936 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
937 EMIT_MOV(compiler, dst, 0, src, srcw);
938 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
939 FAIL_IF(!code);
940 *code++ = 0xf7;
941 *code |= 0x2 << 3;
942 code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
943 FAIL_IF(!code);
944 *code = 0x0b;
945 return SLJIT_SUCCESS;
946 }
947 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
948 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
949 FAIL_IF(!code);
950 *code++ = 0xf7;
951 *code |= 0x2 << 3;
952 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
953 FAIL_IF(!code);
954 *code = 0x0b;
955 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
956 return SLJIT_SUCCESS;
957 }
958
959 static int emit_clz(struct sljit_compiler *compiler, int op,
960 int dst, sljit_w dstw,
961 int src, sljit_w srcw)
962 {
963 sljit_ub* code;
964 int dst_r;
965
966 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
967 /* Just set the zero flag. */
968 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
969 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
970 FAIL_IF(!code);
971 *code++ = 0xf7;
972 *code |= 0x2 << 3;
973 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
974 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
975 #else
976 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
977 #endif
978 FAIL_IF(!code);
979 *code |= 0x5 << 3;
980 return SLJIT_SUCCESS;
981 }
982
983 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
984 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
985 src = TMP_REGISTER;
986 srcw = 0;
987 }
988
989 code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
990 FAIL_IF(!code);
991 *code++ = 0x0f;
992 *code = 0xbd;
993
994 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
995 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER)
996 dst_r = dst;
997 else {
998 /* Find an unused temporary register. */
999 if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
1000 dst_r = SLJIT_TEMPORARY_REG1;
1001 else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4))
1002 dst_r = SLJIT_TEMPORARY_REG2;
1003 else
1004 dst_r = SLJIT_TEMPORARY_REG3;
1005 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1006 }
1007 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1008 #else
1009 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2;
1010 compiler->mode32 = 0;
1011 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1012 compiler->mode32 = op & SLJIT_INT_OP;
1013 #endif
1014
1015 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
1016 FAIL_IF(!code);
1017 *code++ = 0x0f;
1018 *code = 0x45;
1019
1020 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1021 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1022 #else
1023 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1024 #endif
1025 FAIL_IF(!code);
1026 *(code + 1) |= 0x6 << 3;
1027
1028 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1029 if (dst & SLJIT_MEM) {
1030 code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1031 FAIL_IF(!code);
1032 *code = 0x87;
1033 }
1034 #else
1035 if (dst & SLJIT_MEM)
1036 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1037 #endif
1038 return SLJIT_SUCCESS;
1039 }
1040
1041 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
1042 int dst, sljit_w dstw,
1043 int src, sljit_w srcw)
1044 {
1045 sljit_ub* code;
1046 int update = 0;
1047 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1048 int dst_is_ereg = 0;
1049 int src_is_ereg = 0;
1050 #else
1051 #define src_is_ereg 0
1052 #endif
1053
1054 CHECK_ERROR();
1055 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1056
1057 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1058 compiler->mode32 = op & SLJIT_INT_OP;
1059 #endif
1060 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1061 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1062
1063 if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
1064 op = GET_OPCODE(op);
1065 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1066 compiler->mode32 = 0;
1067 #endif
1068
1069 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
1070 if (op >= SLJIT_MOVU) {
1071 update = 1;
1072 op -= 7;
1073 }
1074
1075 if (src & SLJIT_IMM) {
1076 switch (op) {
1077 case SLJIT_MOV_UB:
1078 srcw = (unsigned char)srcw;
1079 break;
1080 case SLJIT_MOV_SB:
1081 srcw = (signed char)srcw;
1082 break;
1083 case SLJIT_MOV_UH:
1084 srcw = (unsigned short)srcw;
1085 break;
1086 case SLJIT_MOV_SH:
1087 srcw = (signed short)srcw;
1088 break;
1089 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1090 case SLJIT_MOV_UI:
1091 srcw = (unsigned int)srcw;
1092 break;
1093 case SLJIT_MOV_SI:
1094 srcw = (signed int)srcw;
1095 break;
1096 #endif
1097 }
1098 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1099 if (SLJIT_UNLIKELY(dst_is_ereg))
1100 return emit_mov(compiler, dst, dstw, src, srcw);
1101 #endif
1102 }
1103
1104 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
1105 code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
1106 FAIL_IF(!code);
1107 *code = 0x8d;
1108 src &= SLJIT_MEM | 0xf;
1109 srcw = 0;
1110 }
1111
1112 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1113 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) {
1114 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
1115 dst = TMP_REGISTER;
1116 }
1117 #endif
1118
1119 switch (op) {
1120 case SLJIT_MOV:
1121 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1122 case SLJIT_MOV_UI:
1123 case SLJIT_MOV_SI:
1124 #endif
1125 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1126 break;
1127 case SLJIT_MOV_UB:
1128 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw));
1129 break;
1130 case SLJIT_MOV_SB:
1131 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw));
1132 break;
1133 case SLJIT_MOV_UH:
1134 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw));
1135 break;
1136 case SLJIT_MOV_SH:
1137 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw));
1138 break;
1139 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1140 case SLJIT_MOV_UI:
1141 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw));
1142 break;
1143 case SLJIT_MOV_SI:
1144 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw));
1145 break;
1146 #endif
1147 }
1148
1149 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1150 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
1151 return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
1152 #endif
1153
1154 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
1155 code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
1156 FAIL_IF(!code);
1157 *code = 0x8d;
1158 }
1159 return SLJIT_SUCCESS;
1160 }
1161
1162 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1163 compiler->flags_saved = 0;
1164
1165 switch (GET_OPCODE(op)) {
1166 case SLJIT_NOT:
1167 if (SLJIT_UNLIKELY(op & SLJIT_SET_E))
1168 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1169 return emit_unary(compiler, 0x2, dst, dstw, src, srcw);
1170
1171 case SLJIT_NEG:
1172 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1173 FAIL_IF(emit_save_flags(compiler));
1174 return emit_unary(compiler, 0x3, dst, dstw, src, srcw);
1175
1176 case SLJIT_CLZ:
1177 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1178 FAIL_IF(emit_save_flags(compiler));
1179 return emit_clz(compiler, op, dst, dstw, src, srcw);
1180 }
1181
1182 return SLJIT_SUCCESS;
1183
1184 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1185 #undef src_is_ereg
1186 #endif
1187 }
1188
1189 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1190
1191 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1192 if (IS_HALFWORD(immw) || compiler->mode32) { \
1193 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1194 FAIL_IF(!code); \
1195 *(code + 1) |= (_op_imm_); \
1196 } \
1197 else { \
1198 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1199 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1200 FAIL_IF(!code); \
1201 *code = (_op_mr_); \
1202 }
1203
1204 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1205 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw))
1206
1207 #else
1208
1209 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1210 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1211 FAIL_IF(!code); \
1212 *(code + 1) |= (_op_imm_);
1213
1214 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1215 FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw))
1216
1217 #endif
1218
1219 static int emit_cum_binary(struct sljit_compiler *compiler,
1220 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1221 int dst, sljit_w dstw,
1222 int src1, sljit_w src1w,
1223 int src2, sljit_w src2w)
1224 {
1225 sljit_ub* code;
1226
1227 if (dst == SLJIT_UNUSED) {
1228 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1229 if (src2 & SLJIT_IMM) {
1230 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1231 }
1232 else {
1233 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1234 FAIL_IF(!code);
1235 *code = op_rm;
1236 }
1237 return SLJIT_SUCCESS;
1238 }
1239
1240 if (dst == src1 && dstw == src1w) {
1241 if (src2 & SLJIT_IMM) {
1242 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1243 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1244 #else
1245 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1246 #endif
1247 BINARY_EAX_IMM(op_eax_imm, src2w);
1248 }
1249 else {
1250 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1251 }
1252 }
1253 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1254 code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1255 FAIL_IF(!code);
1256 *code = op_rm;
1257 }
1258 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) {
1259 /* Special exception for sljit_emit_cond_value. */
1260 code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1261 FAIL_IF(!code);
1262 *code = op_mr;
1263 }
1264 else {
1265 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1266 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1267 FAIL_IF(!code);
1268 *code = op_mr;
1269 }
1270 return SLJIT_SUCCESS;
1271 }
1272
1273 /* Only for cumulative operations. */
1274 if (dst == src2 && dstw == src2w) {
1275 if (src1 & SLJIT_IMM) {
1276 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1277 if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1278 #else
1279 if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) {
1280 #endif
1281 BINARY_EAX_IMM(op_eax_imm, src1w);
1282 }
1283 else {
1284 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1285 }
1286 }
1287 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1288 code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1289 FAIL_IF(!code);
1290 *code = op_rm;
1291 }
1292 else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1293 code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1294 FAIL_IF(!code);
1295 *code = op_mr;
1296 }
1297 else {
1298 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1299 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1300 FAIL_IF(!code);
1301 *code = op_mr;
1302 }
1303 return SLJIT_SUCCESS;
1304 }
1305
1306 /* General version. */
1307 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1308 EMIT_MOV(compiler, dst, 0, src1, src1w);
1309 if (src2 & SLJIT_IMM) {
1310 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1311 }
1312 else {
1313 code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1314 FAIL_IF(!code);
1315 *code = op_rm;
1316 }
1317 }
1318 else {
1319 /* This version requires less memory writing. */
1320 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1321 if (src2 & SLJIT_IMM) {
1322 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1323 }
1324 else {
1325 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1326 FAIL_IF(!code);
1327 *code = op_rm;
1328 }
1329 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1330 }
1331
1332 return SLJIT_SUCCESS;
1333 }
1334
1335 static int emit_non_cum_binary(struct sljit_compiler *compiler,
1336 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1337 int dst, sljit_w dstw,
1338 int src1, sljit_w src1w,
1339 int src2, sljit_w src2w)
1340 {
1341 sljit_ub* code;
1342
1343 if (dst == SLJIT_UNUSED) {
1344 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1345 if (src2 & SLJIT_IMM) {
1346 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1347 }
1348 else {
1349 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1350 FAIL_IF(!code);
1351 *code = op_rm;
1352 }
1353 return SLJIT_SUCCESS;
1354 }
1355
1356 if (dst == src1 && dstw == src1w) {
1357 if (src2 & SLJIT_IMM) {
1358 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1359 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1360 #else
1361 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1362 #endif
1363 BINARY_EAX_IMM(op_eax_imm, src2w);
1364 }
1365 else {
1366 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1367 }
1368 }
1369 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1370 code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1371 FAIL_IF(!code);
1372 *code = op_rm;
1373 }
1374 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1375 code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1376 FAIL_IF(!code);
1377 *code = op_mr;
1378 }
1379 else {
1380 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1381 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1382 FAIL_IF(!code);
1383 *code = op_mr;
1384 }
1385 return SLJIT_SUCCESS;
1386 }
1387
1388 /* General version. */
1389 if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) {
1390 EMIT_MOV(compiler, dst, 0, src1, src1w);
1391 if (src2 & SLJIT_IMM) {
1392 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1393 }
1394 else {
1395 code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1396 FAIL_IF(!code);
1397 *code = op_rm;
1398 }
1399 }
1400 else {
1401 /* This version requires less memory writing. */
1402 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1403 if (src2 & SLJIT_IMM) {
1404 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1405 }
1406 else {
1407 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1408 FAIL_IF(!code);
1409 *code = op_rm;
1410 }
1411 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1412 }
1413
1414 return SLJIT_SUCCESS;
1415 }
1416
1417 static int emit_mul(struct sljit_compiler *compiler,
1418 int dst, sljit_w dstw,
1419 int src1, sljit_w src1w,
1420 int src2, sljit_w src2w)
1421 {
1422 sljit_ub* code;
1423 int dst_r;
1424
1425 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1426
1427 /* Register destination. */
1428 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1429 code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1430 FAIL_IF(!code);
1431 *code++ = 0x0f;
1432 *code = 0xaf;
1433 }
1434 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1435 code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1436 FAIL_IF(!code);
1437 *code++ = 0x0f;
1438 *code = 0xaf;
1439 }
1440 else if (src1 & SLJIT_IMM) {
1441 if (src2 & SLJIT_IMM) {
1442 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1443 src2 = dst_r;
1444 src2w = 0;
1445 }
1446
1447 if (src1w <= 127 && src1w >= -128) {
1448 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1449 FAIL_IF(!code);
1450 *code = 0x6b;
1451 code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1452 FAIL_IF(!code);
1453 INC_CSIZE(1);
1454 *code = (sljit_b)src1w;
1455 }
1456 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1457 else {
1458 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1459 FAIL_IF(!code);
1460 *code = 0x69;
1461 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1462 FAIL_IF(!code);
1463 INC_CSIZE(4);
1464 *(sljit_w*)code = src1w;
1465 }
1466 #else
1467 else if (IS_HALFWORD(src1w)) {
1468 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1469 FAIL_IF(!code);
1470 *code = 0x69;
1471 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1472 FAIL_IF(!code);
1473 INC_CSIZE(4);
1474 *(sljit_hw*)code = (sljit_hw)src1w;
1475 }
1476 else {
1477 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1478 if (dst_r != src2)
1479 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1480 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1481 FAIL_IF(!code);
1482 *code++ = 0x0f;
1483 *code = 0xaf;
1484 }
1485 #endif
1486 }
1487 else if (src2 & SLJIT_IMM) {
1488 /* Note: src1 is NOT immediate. */
1489
1490 if (src2w <= 127 && src2w >= -128) {
1491 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1492 FAIL_IF(!code);
1493 *code = 0x6b;
1494 code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1495 FAIL_IF(!code);
1496 INC_CSIZE(1);
1497 *code = (sljit_b)src2w;
1498 }
1499 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1500 else {
1501 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1502 FAIL_IF(!code);
1503 *code = 0x69;
1504 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1505 FAIL_IF(!code);
1506 INC_CSIZE(4);
1507 *(sljit_w*)code = src2w;
1508 }
1509 #else
1510 else if (IS_HALFWORD(src2w)) {
1511 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1512 FAIL_IF(!code);
1513 *code = 0x69;
1514 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1515 FAIL_IF(!code);
1516 INC_CSIZE(4);
1517 *(sljit_hw*)code = (sljit_hw)src2w;
1518 }
1519 else {
1520 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1521 if (dst_r != src1)
1522 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1523 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1524 FAIL_IF(!code);
1525 *code++ = 0x0f;
1526 *code = 0xaf;
1527 }
1528 #endif
1529 }
1530 else {
1531 /* Neither argument is immediate. */
1532 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1533 dst_r = TMP_REGISTER;
1534 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1535 code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1536 FAIL_IF(!code);
1537 *code++ = 0x0f;
1538 *code = 0xaf;
1539 }
1540
1541 if (dst_r == TMP_REGISTER)
1542 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1543
1544 return SLJIT_SUCCESS;
1545 }
1546
1547 static int emit_lea_binary(struct sljit_compiler *compiler,
1548 int dst, sljit_w dstw,
1549 int src1, sljit_w src1w,
1550 int src2, sljit_w src2w)
1551 {
1552 sljit_ub* code;
1553 int dst_r, done = 0;
1554
1555 /* These cases better be left to handled by normal way. */
1556 if (dst == src1 && dstw == src1w)
1557 return SLJIT_ERR_UNSUPPORTED;
1558 if (dst == src2 && dstw == src2w)
1559 return SLJIT_ERR_UNSUPPORTED;
1560
1561 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1562
1563 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1564 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1565 /* It is not possible to be both SLJIT_LOCALS_REG. */
1566 if (src1 != SLJIT_LOCALS_REG || src2 != SLJIT_LOCALS_REG) {
1567 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1568 FAIL_IF(!code);
1569 *code = 0x8d;
1570 done = 1;
1571 }
1572 }
1573 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1574 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1575 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w);
1576 #else
1577 if (src2 & SLJIT_IMM) {
1578 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1579 #endif
1580 FAIL_IF(!code);
1581 *code = 0x8d;
1582 done = 1;
1583 }
1584 }
1585 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1586 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1587 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1588 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w);
1589 #else
1590 if (src1 & SLJIT_IMM) {
1591 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1592 #endif
1593 FAIL_IF(!code);
1594 *code = 0x8d;
1595 done = 1;
1596 }
1597 }
1598
1599 if (done) {
1600 if (dst_r == TMP_REGISTER)
1601 return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
1602 return SLJIT_SUCCESS;
1603 }
1604 return SLJIT_ERR_UNSUPPORTED;
1605 }
1606
1607 static int emit_cmp_binary(struct sljit_compiler *compiler,
1608 int src1, sljit_w src1w,
1609 int src2, sljit_w src2w)
1610 {
1611 sljit_ub* code;
1612
1613 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1614 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1615 #else
1616 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1617 #endif
1618 BINARY_EAX_IMM(0x3d, src2w);
1619 return SLJIT_SUCCESS;
1620 }
1621
1622 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1623 if (src2 & SLJIT_IMM) {
1624 BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0);
1625 }
1626 else {
1627 code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1628 FAIL_IF(!code);
1629 *code = 0x3b;
1630 }
1631 return SLJIT_SUCCESS;
1632 }
1633
1634 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) {
1635 code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1636 FAIL_IF(!code);
1637 *code = 0x39;
1638 return SLJIT_SUCCESS;
1639 }
1640
1641 if (src2 & SLJIT_IMM) {
1642 if (src1 & SLJIT_IMM) {
1643 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1644 src1 = TMP_REGISTER;
1645 src1w = 0;
1646 }
1647 BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w);
1648 }
1649 else {
1650 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1651 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1652 FAIL_IF(!code);
1653 *code = 0x3b;
1654 }
1655 return SLJIT_SUCCESS;
1656 }
1657
1658 static int emit_test_binary(struct sljit_compiler *compiler,
1659 int src1, sljit_w src1w,
1660 int src2, sljit_w src2w)
1661 {
1662 sljit_ub* code;
1663
1664 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1665 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1666 #else
1667 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1668 #endif
1669 BINARY_EAX_IMM(0xa9, src2w);
1670 return SLJIT_SUCCESS;
1671 }
1672
1673 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1674 if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1675 #else
1676 if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1677 #endif
1678 BINARY_EAX_IMM(0xa9, src1w);
1679 return SLJIT_SUCCESS;
1680 }
1681
1682 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1683 if (src2 & SLJIT_IMM) {
1684 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1685 if (IS_HALFWORD(src2w) || compiler->mode32) {
1686 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1687 FAIL_IF(!code);
1688 *code = 0xf7;
1689 }
1690 else {
1691 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1692 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1693 FAIL_IF(!code);
1694 *code = 0x85;
1695 }
1696 #else
1697 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1698 FAIL_IF(!code);
1699 *code = 0xf7;
1700 #endif
1701 }
1702 else {
1703 code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1704 FAIL_IF(!code);
1705 *code = 0x85;
1706 }
1707 return SLJIT_SUCCESS;
1708 }
1709
1710 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1711 if (src1 & SLJIT_IMM) {
1712 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1713 if (IS_HALFWORD(src1w) || compiler->mode32) {
1714 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1715 FAIL_IF(!code);
1716 *code = 0xf7;
1717 }
1718 else {
1719 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1720 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1721 FAIL_IF(!code);
1722 *code = 0x85;
1723 }
1724 #else
1725 code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1726 FAIL_IF(!code);
1727 *code = 0xf7;
1728 #endif
1729 }
1730 else {
1731 code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1732 FAIL_IF(!code);
1733 *code = 0x85;
1734 }
1735 return SLJIT_SUCCESS;
1736 }
1737
1738 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1739 if (src2 & SLJIT_IMM) {
1740 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1741 if (IS_HALFWORD(src2w) || compiler->mode32) {
1742 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1743 FAIL_IF(!code);
1744 *code = 0xf7;
1745 }
1746 else {
1747 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1748 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
1749 FAIL_IF(!code);
1750 *code = 0x85;
1751 }
1752 #else
1753 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1754 FAIL_IF(!code);
1755 *code = 0xf7;
1756 #endif
1757 }
1758 else {
1759 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1760 FAIL_IF(!code);
1761 *code = 0x85;
1762 }
1763 return SLJIT_SUCCESS;
1764 }
1765
1766 static int emit_shift(struct sljit_compiler *compiler,
1767 sljit_ub mode,
1768 int dst, sljit_w dstw,
1769 int src1, sljit_w src1w,
1770 int src2, sljit_w src2w)
1771 {
1772 sljit_ub* code;
1773
1774 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1775 if (dst == src1 && dstw == src1w) {
1776 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1777 FAIL_IF(!code);
1778 *code |= mode;
1779 return SLJIT_SUCCESS;
1780 }
1781 if (dst == SLJIT_UNUSED) {
1782 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1783 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1784 FAIL_IF(!code);
1785 *code |= mode;
1786 return SLJIT_SUCCESS;
1787 }
1788 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
1789 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1790 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1791 FAIL_IF(!code);
1792 *code |= mode;
1793 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1794 return SLJIT_SUCCESS;
1795 }
1796 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1797 EMIT_MOV(compiler, dst, 0, src1, src1w);
1798 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
1799 FAIL_IF(!code);
1800 *code |= mode;
1801 return SLJIT_SUCCESS;
1802 }
1803
1804 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1805 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1806 FAIL_IF(!code);
1807 *code |= mode;
1808 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1809 return SLJIT_SUCCESS;
1810 }
1811
1812 if (dst == SLJIT_PREF_SHIFT_REG) {
1813 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1814 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1815 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1816 FAIL_IF(!code);
1817 *code |= mode;
1818 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1819 }
1820 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
1821 if (src1 != dst)
1822 EMIT_MOV(compiler, dst, 0, src1, src1w);
1823 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
1824 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1825 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
1826 FAIL_IF(!code);
1827 *code |= mode;
1828 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1829 }
1830 else {
1831 /* This case is really difficult, since ecx itself may used for
1832 addressing, and we must ensure to work even in that case. */
1833 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1834 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1835 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
1836 #else
1837 /* [esp - 4] is reserved for eflags. */
1838 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)), SLJIT_PREF_SHIFT_REG, 0);
1839 #endif
1840 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1841 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1842 FAIL_IF(!code);
1843 *code |= mode;
1844 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1845 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
1846 #else
1847 /* [esp - 4] is reserved for eflags. */
1848 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)));
1849 #endif
1850 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1851 }
1852
1853 return SLJIT_SUCCESS;
1854 }
1855
1856 static int emit_shift_with_flags(struct sljit_compiler *compiler,
1857 sljit_ub mode, int set_flags,
1858 int dst, sljit_w dstw,
1859 int src1, sljit_w src1w,
1860 int src2, sljit_w src2w)
1861 {
1862 /* The CPU does not set flags if the shift count is 0. */
1863 if (src2 & SLJIT_IMM) {
1864 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1865 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
1866 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1867 #else
1868 if ((src2w & 0x1f) != 0)
1869 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1870 #endif
1871 if (!set_flags)
1872 return emit_mov(compiler, dst, dstw, src1, src1w);
1873 /* OR dst, src, 0 */
1874 return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1875 dst, dstw, src1, src1w, SLJIT_IMM, 0);
1876 }
1877
1878 if (!set_flags)
1879 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1880
1881 if (!(dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS))
1882 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
1883
1884 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
1885
1886 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
1887 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
1888 return SLJIT_SUCCESS;
1889 }
1890
1891 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
1892 int dst, sljit_w dstw,
1893 int src1, sljit_w src1w,
1894 int src2, sljit_w src2w)
1895 {
1896 CHECK_ERROR();
1897 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1898
1899 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1900 compiler->mode32 = op & SLJIT_INT_OP;
1901 #endif
1902 CHECK_EXTRA_REGS(dst, dstw, (void)0);
1903 CHECK_EXTRA_REGS(src1, src1w, (void)0);
1904 CHECK_EXTRA_REGS(src2, src2w, (void)0);
1905
1906 if (GET_OPCODE(op) >= SLJIT_MUL) {
1907 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1908 compiler->flags_saved = 0;
1909 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1910 FAIL_IF(emit_save_flags(compiler));
1911 }
1912
1913 switch (GET_OPCODE(op)) {
1914 case SLJIT_ADD:
1915 if (!GET_FLAGS(op)) {
1916 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
1917 return compiler->error;
1918 }
1919 else
1920 compiler->flags_saved = 0;
1921 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1922 FAIL_IF(emit_save_flags(compiler));
1923 return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05,
1924 dst, dstw, src1, src1w, src2, src2w);
1925 case SLJIT_ADDC:
1926 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1927 FAIL_IF(emit_restore_flags(compiler, 1));
1928 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1929 FAIL_IF(emit_save_flags(compiler));
1930 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1931 compiler->flags_saved = 0;
1932 return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15,
1933 dst, dstw, src1, src1w, src2, src2w);
1934 case SLJIT_SUB:
1935 if (!GET_FLAGS(op)) {
1936 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
1937 return compiler->error;
1938 }
1939 else
1940 compiler->flags_saved = 0;
1941 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1942 FAIL_IF(emit_save_flags(compiler));
1943 if (dst == SLJIT_UNUSED)
1944 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
1945 return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
1946 dst, dstw, src1, src1w, src2, src2w);
1947 case SLJIT_SUBC:
1948 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1949 FAIL_IF(emit_restore_flags(compiler, 1));
1950 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1951 FAIL_IF(emit_save_flags(compiler));
1952 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1953 compiler->flags_saved = 0;
1954 return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d,
1955 dst, dstw, src1, src1w, src2, src2w);
1956 case SLJIT_MUL:
1957 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
1958 case SLJIT_AND:
1959 if (dst == SLJIT_UNUSED)
1960 return emit_test_binary(compiler, src1, src1w, src2, src2w);
1961 return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25,
1962 dst, dstw, src1, src1w, src2, src2w);
1963 case SLJIT_OR:
1964 return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1965 dst, dstw, src1, src1w, src2, src2w);
1966 case SLJIT_XOR:
1967 return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35,
1968 dst, dstw, src1, src1w, src2, src2w);
1969 case SLJIT_SHL:
1970 return emit_shift_with_flags(compiler, 0x4 << 3, GET_FLAGS(op),
1971 dst, dstw, src1, src1w, src2, src2w);
1972 case SLJIT_LSHR:
1973 return emit_shift_with_flags(compiler, 0x5 << 3, GET_FLAGS(op),
1974 dst, dstw, src1, src1w, src2, src2w);
1975 case SLJIT_ASHR:
1976 return emit_shift_with_flags(compiler, 0x7 << 3, GET_FLAGS(op),
1977 dst, dstw, src1, src1w, src2, src2w);
1978 }
1979
1980 return SLJIT_SUCCESS;
1981 }
1982
1983 SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
1984 {
1985 check_sljit_get_register_index(reg);
1986 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1987 if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
1988 || reg == SLJIT_GENERAL_EREG1 || reg == SLJIT_GENERAL_EREG2)
1989 return -1;
1990 #endif
1991 return reg_map[reg];
1992 }
1993
1994 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
1995 void *instruction, int size)
1996 {
1997 sljit_ub *buf;
1998
1999 CHECK_ERROR();
2000 check_sljit_emit_op_custom(compiler, instruction, size);
2001 SLJIT_ASSERT(size > 0 && size < 16);
2002
2003 buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
2004 FAIL_IF(!buf);
2005 INC_SIZE(size);
2006 SLJIT_MEMMOVE(buf, instruction, size);
2007 return SLJIT_SUCCESS;
2008 }
2009
2010 /* --------------------------------------------------------------------- */
2011 /* Floating point operators */
2012 /* --------------------------------------------------------------------- */
2013
2014 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2015 static int sse2_available = 0;
2016 #endif
2017
2018 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2019
2020 /* Alignment + 2 * 16 bytes. */
2021 static sljit_i sse2_data[3 + 4 + 4];
2022 static sljit_i *sse2_buffer;
2023
2024 static void init_compiler()
2025 {
2026 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2027 int features = 0;
2028 #endif
2029
2030 sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
2031 sse2_buffer[0] = 0;
2032 sse2_buffer[1] = 0x80000000;
2033 sse2_buffer[4] = 0xffffffff;
2034 sse2_buffer[5] = 0x7fffffff;
2035
2036 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2037 #ifdef __GNUC__
2038 /* AT&T syntax. */
2039 asm (
2040 "pushl %%ebx\n"
2041 "movl $0x1, %%eax\n"
2042 "cpuid\n"
2043 "popl %%ebx\n"
2044 "movl %%edx, %0\n"
2045 : "=g" (features)
2046 :
2047 : "%eax", "%ecx", "%edx"
2048 );
2049 #elif defined(_MSC_VER) || defined(__BORLANDC__)
2050 /* Intel syntax. */
2051 __asm {
2052 mov eax, 1
2053 push ebx
2054 cpuid
2055 pop ebx
2056 mov features, edx
2057 }
2058 #else
2059 #error "SLJIT_SSE2_AUTO is not implemented for this C compiler"
2060 #endif
2061 sse2_available = (features >> 26) & 0x1;
2062 #endif
2063 }
2064
2065 #endif
2066
2067 SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
2068 {
2069 /* Always available. */
2070 return 1;
2071 }
2072
2073 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2074
2075 static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2076 int xmm1, int xmm2, sljit_w xmm2w)
2077 {
2078 sljit_ub *buf;
2079
2080 buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2081 FAIL_IF(!buf);
2082 *buf++ = 0x0f;
2083 *buf = opcode;
2084 return SLJIT_SUCCESS;
2085 }
2086
2087 static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2088 int xmm1, int xmm2, sljit_w xmm2w)
2089 {
2090 sljit_ub *buf;
2091
2092 buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2093 FAIL_IF(!buf);
2094 *buf++ = 0x0f;
2095 *buf = opcode;
2096 return SLJIT_SUCCESS;
2097 }
2098
2099 static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler,
2100 int dst, int src, sljit_w srcw)
2101 {
2102 return emit_sse2(compiler, 0x10, dst, src, srcw);
2103 }
2104
2105 static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler,
2106 int dst, sljit_w dstw, int src)
2107 {
2108 return emit_sse2(compiler, 0x11, src, dst, dstw);
2109 }
2110
2111 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2112 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2113 #else
2114 static int sljit_emit_sse2_fop1(struct sljit_compiler *compiler, int op,
2115 #endif
2116 int dst, sljit_w dstw,
2117 int src, sljit_w srcw)
2118 {
2119 int dst_r;
2120
2121 CHECK_ERROR();
2122 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2123
2124 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2125 compiler->mode32 = 1;
2126 #endif
2127
2128 if (GET_OPCODE(op) == SLJIT_FCMP) {
2129 compiler->flags_saved = 0;
2130 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2131 dst_r = dst;
2132 else {
2133 dst_r = TMP_FREG;
2134 FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw));
2135 }
2136 return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw);
2137 }
2138
2139 if (op == SLJIT_FMOV) {
2140 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2141 return emit_sse2_load(compiler, dst, src, srcw);
2142 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4)
2143 return emit_sse2_store(compiler, dst, dstw, src);
2144 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw));
2145 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2146 }
2147
2148 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2149 dst_r = dst;
2150 if (dst != src)
2151 FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2152 }
2153 else {
2154 dst_r = TMP_FREG;
2155 FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2156 }
2157
2158 switch (op) {
2159 case SLJIT_FNEG:
2160 FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer));
2161 break;
2162
2163 case SLJIT_FABS:
2164 FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4)));
2165 break;
2166 }
2167
2168 if (dst_r == TMP_FREG)
2169 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2170 return SLJIT_SUCCESS;
2171 }
2172
2173 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2174 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2175 #else
2176 static int sljit_emit_sse2_fop2(struct sljit_compiler *compiler, int op,
2177 #endif
2178 int dst, sljit_w dstw,
2179 int src1, sljit_w src1w,
2180 int src2, sljit_w src2w)
2181 {
2182 int dst_r;
2183
2184 CHECK_ERROR();
2185 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2186
2187 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2188 compiler->mode32 = 1;
2189 #endif
2190
2191 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2192 dst_r = dst;
2193 if (dst == src1)
2194 ; /* Do nothing here. */
2195 else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) {
2196 /* Swap arguments. */
2197 src2 = src1;
2198 src2w = src1w;
2199 }
2200 else if (dst != src2)
2201 FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w));
2202 else {
2203 dst_r = TMP_FREG;
2204 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2205 }
2206 }
2207 else {
2208 dst_r = TMP_FREG;
2209 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2210 }
2211
2212 switch (op) {
2213 case SLJIT_FADD:
2214 FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w));
2215 break;
2216
2217 case SLJIT_FSUB:
2218 FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w));
2219 break;
2220
2221 case SLJIT_FMUL:
2222 FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w));
2223 break;
2224
2225 case SLJIT_FDIV:
2226 FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w));
2227 break;
2228 }
2229
2230 if (dst_r == TMP_FREG)
2231 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2232 return SLJIT_SUCCESS;
2233 }
2234
2235 #endif
2236
2237 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) || !(defined SLJIT_SSE2 && SLJIT_SSE2)
2238
2239 static int emit_fld(struct sljit_compiler *compiler,
2240 int src, sljit_w srcw)
2241 {
2242 sljit_ub *buf;
2243
2244 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2245 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2246 FAIL_IF(!buf);
2247 INC_SIZE(2);
2248 *buf++ = 0xd9;
2249 *buf = 0xc0 + src - 1;
2250 return SLJIT_SUCCESS;
2251 }
2252
2253 buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2254 FAIL_IF(!buf);
2255 *buf = 0xdd;
2256 return SLJIT_SUCCESS;
2257 }
2258
2259 static int emit_fop(struct sljit_compiler *compiler,
2260 sljit_ub st_arg, sljit_ub st_arg2,
2261 sljit_ub m64fp_arg, sljit_ub m64fp_arg2,
2262 int src, sljit_w srcw)
2263 {
2264 sljit_ub *buf;
2265
2266 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2267 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2268 FAIL_IF(!buf);
2269 INC_SIZE(2);
2270 *buf++ = st_arg;
2271 *buf = st_arg2 + src;
2272 return SLJIT_SUCCESS;
2273 }
2274
2275 buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2276 FAIL_IF(!buf);
2277 *buf++ = m64fp_arg;
2278 *buf |= m64fp_arg2;
2279 return SLJIT_SUCCESS;
2280 }
2281
2282 static int emit_fop_regs(struct sljit_compiler *compiler,
2283 sljit_ub st_arg, sljit_ub st_arg2,
2284 int src)
2285 {
2286 sljit_ub *buf;
2287
2288 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2289 FAIL_IF(!buf);
2290 INC_SIZE(2);
2291 *buf++ = st_arg;
2292 *buf = st_arg2 + src;
2293 return SLJIT_SUCCESS;
2294 }
2295
2296 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2297 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2298 #else
2299 static int sljit_emit_fpu_fop1(struct sljit_compiler *compiler, int op,
2300 #endif
2301 int dst, sljit_w dstw,
2302 int src, sljit_w srcw)
2303 {
2304 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2305 sljit_ub *buf;
2306 #endif
2307
2308 CHECK_ERROR();
2309 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2310
2311 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2312 compiler->mode32 = 1;
2313 #endif
2314
2315 if (GET_OPCODE(op) == SLJIT_FCMP) {
2316 compiler->flags_saved = 0;
2317 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2318 FAIL_IF(emit_fld(compiler, dst, dstw));
2319 FAIL_IF(emit_fop(compiler, 0xd8, 0xd8, 0xdc, 0x3 << 3, src, srcw));
2320
2321 /* Copy flags. */
2322 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2323 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2324 FAIL_IF(!buf);
2325 INC_SIZE(3);
2326 *buf++ = 0xdf;
2327 *buf++ = 0xe0;
2328 /* Note: lahf is not supported on all x86-64 architectures. */
2329 *buf++ = 0x9e;
2330 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2331 #else
2332 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2333 FAIL_IF(emit_fld(compiler, dst, dstw));
2334 FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2335 } else {
2336 FAIL_IF(emit_fld(compiler, src, srcw));
2337 FAIL_IF(emit_fld(compiler, dst + ((dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? 1 : 0), dstw));
2338 FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2339 FAIL_IF(emit_fop_regs(compiler, 0xdd, 0xd8, 0));
2340 }
2341 #endif
2342 return SLJIT_SUCCESS;
2343 }
2344
2345 FAIL_IF(emit_fld(compiler, src, srcw));
2346
2347 switch (op) {
2348 case SLJIT_FNEG:
2349 FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe0, 0));
2350 break;
2351 case SLJIT_FABS:
2352 FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe1, 0));
2353 break;
2354 }
2355
2356 FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2357
2358 return SLJIT_SUCCESS;
2359 }
2360
2361 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2362 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2363 #else
2364 static int sljit_emit_fpu_fop2(struct sljit_compiler *compiler, int op,
2365 #endif
2366 int dst, sljit_w dstw,
2367 int src1, sljit_w src1w,
2368 int src2, sljit_w src2w)
2369 {
2370 CHECK_ERROR();
2371 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2372
2373 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2374 compiler->mode32 = 1;
2375 #endif
2376
2377 if (src1 >= SLJIT_FLOAT_REG1 && src1 <= SLJIT_FLOAT_REG4 && dst == src1) {
2378 FAIL_IF(emit_fld(compiler, src2, src2w));
2379
2380 switch (op) {
2381 case SLJIT_FADD:
2382 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src1));
2383 break;
2384 case SLJIT_FSUB:
2385 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe8, src1));
2386 break;
2387 case SLJIT_FMUL:
2388 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src1));
2389 break;
2390 case SLJIT_FDIV:
2391 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf8, src1));
2392 break;
2393 }
2394 return SLJIT_SUCCESS;
2395 }
2396
2397 FAIL_IF(emit_fld(compiler, src1, src1w));
2398
2399 if (src2 >= SLJIT_FLOAT_REG1 && src2 <= SLJIT_FLOAT_REG4 && dst == src2) {
2400 switch (op) {
2401 case SLJIT_FADD:
2402 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src2));
2403 break;
2404 case SLJIT_FSUB:
2405 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe0, src2));
2406 break;
2407 case SLJIT_FMUL:
2408 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src2));
2409 break;
2410 case SLJIT_FDIV:
2411 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf0, src2));
2412 break;
2413 }
2414 return SLJIT_SUCCESS;
2415 }
2416
2417 switch (op) {
2418 case SLJIT_FADD:
2419 FAIL_IF(emit_fop(compiler, 0xd8, 0xc0, 0xdc, 0x0 << 3, src2, src2w));
2420 break;
2421 case SLJIT_FSUB:
2422 FAIL_IF(emit_fop(compiler, 0xd8, 0xe0, 0xdc, 0x4 << 3, src2, src2w));
2423 break;
2424 case SLJIT_FMUL:
2425 FAIL_IF(emit_fop(compiler, 0xd8, 0xc8, 0xdc, 0x1 << 3, src2, src2w));
2426 break;
2427 case SLJIT_FDIV:
2428 FAIL_IF(emit_fop(compiler, 0xd8, 0xf0, 0xdc, 0x6 << 3, src2, src2w));
2429 break;
2430 }
2431
2432 FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2433
2434 return SLJIT_SUCCESS;
2435 }
2436 #endif
2437
2438 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2439
2440 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2441 int dst, sljit_w dstw,
2442 int src, sljit_w srcw)
2443 {
2444 if (sse2_available)
2445 return sljit_emit_sse2_fop1(compiler, op, dst, dstw, src, srcw);
2446 else
2447 return sljit_emit_fpu_fop1(compiler, op, dst, dstw, src, srcw);
2448 }
2449
2450 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2451 int dst, sljit_w dstw,
2452 int src1, sljit_w src1w,
2453 int src2, sljit_w src2w)
2454 {
2455 if (sse2_available)
2456 return sljit_emit_sse2_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2457 else
2458 return sljit_emit_fpu_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2459 }
2460
2461 #endif
2462
2463 /* --------------------------------------------------------------------- */
2464 /* Conditional instructions */
2465 /* --------------------------------------------------------------------- */
2466
2467 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2468 {
2469 sljit_ub *buf;
2470 struct sljit_label *label;
2471
2472 CHECK_ERROR_PTR();
2473 check_sljit_emit_label(compiler);
2474
2475 /* We should restore the flags before the label,
2476 since other taken jumps has their own flags as well. */
2477 if (SLJIT_UNLIKELY(compiler->flags_saved))
2478 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2479
2480 if (compiler->last_label && compiler->last_label->size == compiler->size)
2481 return compiler->last_label;
2482
2483 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2484 PTR_FAIL_IF(!label);
2485 set_label(label, compiler);
2486
2487 buf = (sljit_ub*)ensure_buf(compiler, 2);
2488 PTR_FAIL_IF(!buf);
2489
2490 *buf++ = 0;
2491 *buf++ = 0;
2492
2493 return label;
2494 }
2495
2496 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
2497 {
2498 sljit_ub *buf;
2499 struct sljit_jump *jump;
2500
2501 CHECK_ERROR_PTR();
2502 check_sljit_emit_jump(compiler, type);
2503
2504 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2505 if ((type & 0xff) <= SLJIT_JUMP)
2506 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2507 compiler->flags_saved = 0;
2508 }
2509
2510 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2511 PTR_FAIL_IF_NULL(jump);
2512 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2513 type &= 0xff;
2514
2515 if (type >= SLJIT_CALL1)
2516 PTR_FAIL_IF(call_with_args(compiler, type));
2517
2518 /* Worst case size. */
2519 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2520 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2521 #else
2522 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2523 #endif
2524
2525 buf = (sljit_ub*)ensure_buf(compiler, 2);
2526 PTR_FAIL_IF_NULL(buf);
2527
2528 *buf++ = 0;
2529 *buf++ = type + 4;
2530 return jump;
2531 }
2532
2533 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
2534 {
2535 sljit_ub *code;
2536 struct sljit_jump *jump;
2537
2538 CHECK_ERROR();
2539 check_sljit_emit_ijump(compiler, type, src, srcw);
2540
2541 CHECK_EXTRA_REGS(src, srcw, (void)0);
2542 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2543 if (type <= SLJIT_JUMP)
2544 FAIL_IF(emit_restore_flags(compiler, 0));
2545 compiler->flags_saved = 0;
2546 }
2547
2548 if (type >= SLJIT_CALL1) {
2549 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2550 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2551 if (src == SLJIT_TEMPORARY_REG3) {
2552 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2553 src = TMP_REGISTER;
2554 }
2555 if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG && type >= SLJIT_CALL3) {
2556 if (src & 0xf0) {
2557 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2558 src = TMP_REGISTER;
2559 }
2560 else
2561 srcw += sizeof(sljit_w);
2562 }
2563 #else
2564 if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG) {
2565 if (src & 0xf0) {
2566 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2567 src = TMP_REGISTER;
2568 }
2569 else
2570 srcw += sizeof(sljit_w) * (type - SLJIT_CALL0);
2571 }
2572 #endif
2573 #endif
2574 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2575 if (src == SLJIT_TEMPORARY_REG3) {
2576 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2577 src = TMP_REGISTER;
2578 }
2579 #endif
2580 FAIL_IF(call_with_args(compiler, type));
2581 }
2582
2583 if (src == SLJIT_IMM) {
2584 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2585 FAIL_IF_NULL(jump);
2586 set_jump(jump, compiler, JUMP_ADDR);
2587 jump->u.target = srcw;
2588
2589 /* Worst case size. */
2590 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2591 compiler->size += 5;
2592 #else
2593 compiler->size += 10 + 3;
2594 #endif
2595
2596 code = (sljit_ub*)ensure_buf(compiler, 2);
2597 FAIL_IF_NULL(code);
2598
2599 *code++ = 0;
2600 *code++ = type + 4;
2601 }
2602 else {
2603 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2604 /* REX_W is not necessary (src is not immediate). */
2605 compiler->mode32 = 1;
2606 #endif
2607 code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2608 FAIL_IF(!code);
2609 *code++ = 0xff;
2610 *code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3);
2611 }
2612 return SLJIT_SUCCESS;
2613 }
2614
2615 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
2616 {
2617 sljit_ub *buf;
2618 sljit_ub cond_set = 0;
2619 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2620 int reg;
2621 #endif
2622
2623 CHECK_ERROR();
2624 check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
2625
2626 if (dst == SLJIT_UNUSED)
2627 return SLJIT_SUCCESS;
2628
2629 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2630 if (SLJIT_UNLIKELY(compiler->flags_saved))
2631 FAIL_IF(emit_restore_flags(compiler, 0));
2632
2633 switch (type) {
2634 case SLJIT_C_EQUAL:
2635 case SLJIT_C_FLOAT_EQUAL:
2636 cond_set = 0x94;
2637 break;
2638
2639 case SLJIT_C_NOT_EQUAL:
2640 case SLJIT_C_FLOAT_NOT_EQUAL:
2641 cond_set = 0x95;
2642 break;
2643
2644 case SLJIT_C_LESS:
2645 case SLJIT_C_FLOAT_LESS:
2646 cond_set = 0x92;
2647 break;
2648
2649 case SLJIT_C_GREATER_EQUAL:
2650 case SLJIT_C_FLOAT_GREATER_EQUAL:
2651 cond_set = 0x93;
2652 break;
2653
2654 case SLJIT_C_GREATER:
2655 case SLJIT_C_FLOAT_GREATER:
2656 cond_set = 0x97;
2657 break;
2658
2659 case SLJIT_C_LESS_EQUAL:
2660 case SLJIT_C_FLOAT_LESS_EQUAL:
2661 cond_set = 0x96;
2662 break;
2663
2664 case SLJIT_C_SIG_LESS:
2665 cond_set = 0x9c;
2666 break;
2667
2668 case SLJIT_C_SIG_GREATER_EQUAL:
2669 cond_set = 0x9d;
2670 break;
2671
2672 case SLJIT_C_SIG_GREATER:
2673 cond_set = 0x9f;
2674 break;
2675
2676 case SLJIT_C_SIG_LESS_EQUAL:
2677 cond_set = 0x9e;
2678 break;
2679
2680 case SLJIT_C_OVERFLOW:
2681 case SLJIT_C_MUL_OVERFLOW:
2682 cond_set = 0x90;
2683 break;
2684
2685 case SLJIT_C_NOT_OVERFLOW:
2686 case SLJIT_C_MUL_NOT_OVERFLOW:
2687 cond_set = 0x91;
2688 break;
2689
2690 case SLJIT_C_FLOAT_NAN:
2691 cond_set = 0x9a;
2692 break;
2693
2694 case SLJIT_C_FLOAT_NOT_NAN:
2695 cond_set = 0x9b;
2696 break;
2697 }
2698
2699 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2700 reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2701
2702 buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2703 FAIL_IF(!buf);
2704 INC_SIZE(4 + 4);
2705 /* Set low register to conditional flag. */
2706 *buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B;
2707 *buf++ = 0x0f;
2708 *buf++ = cond_set;
2709 *buf++ = 0xC0 | reg_lmap[reg];
2710 *buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2711 *buf++ = 0x0f;
2712 *buf++ = 0xb6;
2713 *buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg];
2714
2715 if (reg == TMP_REGISTER) {
2716 if (op == SLJIT_MOV) {
2717 compiler->mode32 = 0;
2718 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
2719 }
2720 else {
2721 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2722 compiler->skip_checks = 1;
2723 #endif
2724 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2725 }
2726 }
2727 #else
2728 if (op == SLJIT_MOV) {
2729 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2730 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2731 FAIL_IF(!buf);
2732 INC_SIZE(3 + 3);
2733 /* Set low byte to conditional flag. */
2734 *buf++ = 0x0f;
2735 *buf++ = cond_set;
2736 *buf++ = 0xC0 | reg_map[dst];
2737
2738 *buf++ = 0x0f;
2739 *buf++ = 0xb6;
2740 *buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst];
2741 }
2742 else {
2743 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2744
2745 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2746 FAIL_IF(!buf);
2747 INC_SIZE(3 + 3);
2748 /* Set al to conditional flag. */
2749 *buf++ = 0x0f;
2750 *buf++ = cond_set;
2751 *buf++ = 0xC0;
2752
2753 *buf++ = 0x0f;
2754 *buf++ = 0xb6;
2755 if (dst >= SLJIT_GENERAL_REG1 && dst <= SLJIT_NO_REGISTERS)
2756 *buf = 0xC0 | (reg_map[dst] << 3);
2757 else {
2758 *buf = 0xC0;
2759 EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
2760 }
2761
2762 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2763 }
2764 }
2765 else {
2766 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2767 EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
2768 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2769 FAIL_IF(!buf);
2770 INC_SIZE(3);
2771
2772 *buf++ = 0x0f;
2773 *buf++ = cond_set;
2774 *buf++ = 0xC0 | reg_map[dst];
2775 }
2776 else {
2777 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2778
2779 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
2780 FAIL_IF(!buf);
2781 INC_SIZE(3 + 3 + 1);
2782 /* Set al to conditional flag. */
2783 *buf++ = 0x0f;
2784 *buf++ = cond_set;
2785 *buf++ = 0xC0;
2786
2787 *buf++ = 0x0f;
2788 *buf++ = 0xb6;
2789 *buf++ = 0xC0;
2790
2791 *buf++ = 0x90 + reg_map[TMP_REGISTER];
2792 }
2793 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2794 compiler->skip_checks = 1;
2795 #endif
2796 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2797 }
2798 #endif
2799
2800 return SLJIT_SUCCESS;
2801 }
2802
2803 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
2804 {
2805 sljit_ub *buf;
2806 struct sljit_const *const_;
2807 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2808 int reg;
2809 #endif
2810
2811 CHECK_ERROR_PTR();
2812 check_sljit_emit_const(compiler, dst, dstw, init_value);
2813
2814 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2815
2816 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2817 PTR_FAIL_IF(!const_);
2818 set_const(const_, compiler);
2819
2820 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2821 compiler->mode32 = 0;
2822 reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2823
2824 if (emit_load_imm64(compiler, reg, init_value))
2825 return NULL;
2826 #else
2827 if (dst == SLJIT_UNUSED)
2828 dst = TMP_REGISTER;
2829
2830 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2831 return NULL;
2832 #endif
2833
2834 buf = (sljit_ub*)ensure_buf(compiler, 2);
2835 PTR_FAIL_IF(!buf);
2836
2837 *buf++ = 0;
2838 *buf++ = 1;
2839
2840 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2841 if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
2842 if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
2843 return NULL;
2844 #endif
2845
2846 return const_;
2847 }
2848
2849 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2850 {
2851 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2852 *(sljit_w*)addr = new_addr - (addr + 4);
2853 #else
2854 *(sljit_uw*)addr = new_addr;
2855 #endif
2856 }
2857
2858 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
2859 {
2860 *(sljit_w*)addr = new_constant;
2861 }

  ViewVC Help
Powered by ViewVC 1.1.5