/[pcre]/code/trunk/sljit/sljitNativeX86_common.c
ViewVC logotype

Contents of /code/trunk/sljit/sljitNativeX86_common.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 860 - (show annotations)
Mon Jan 9 20:12:58 2012 UTC (7 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 80134 byte(s)
rename PCRE_SCHAR16 to PCRE_UCHAR16 and JIT compiler update
1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
28 {
29 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
30 return "x86-32";
31 #else
32 return "x86-64";
33 #endif
34 }
35
36 /*
37 32b register indexes:
38 0 - EAX
39 1 - ECX
40 2 - EDX
41 3 - EBX
42 4 - none
43 5 - EBP
44 6 - ESI
45 7 - EDI
46 */
47
48 /*
49 64b register indexes:
50 0 - RAX
51 1 - RCX
52 2 - RDX
53 3 - RBX
54 4 - none
55 5 - RBP
56 6 - RSI
57 7 - RDI
58 8 - R8 - From now on REX prefix is required
59 9 - R9
60 10 - R10
61 11 - R11
62 12 - R12
63 13 - R13
64 14 - R14
65 15 - R15
66 */
67
68 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
69
70 /* Last register + 1. */
71 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
72
73 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
74 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
75 };
76
77 #define CHECK_EXTRA_REGS(p, w, do) \
78 if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
79 w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \
80 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
81 do; \
82 } \
83 else if (p >= SLJIT_GENERAL_EREG1 && p <= SLJIT_GENERAL_EREG2) { \
84 w = compiler->generals_start + (p - SLJIT_GENERAL_EREG1) * sizeof(sljit_w); \
85 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
86 do; \
87 }
88
89 #else /* SLJIT_CONFIG_X86_32 */
90
91 /* Last register + 1. */
92 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
93 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
94 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
95
96 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
97 Note: avoid to use r12 and r13 for memory addessing
98 therefore r12 is better for GENERAL_EREG than GENERAL_REG. */
99 #ifndef _WIN64
100 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
101 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
102 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
103 };
104 /* low-map. reg_map & 0x7. */
105 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
106 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1
107 };
108 #else
109 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
110 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
111 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 12, 15, 10, 8, 9
112 };
113 /* low-map. reg_map & 0x7. */
114 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
115 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 4, 7, 2, 0, 1
116 };
117 #endif
118
119 #define REX_W 0x48
120 #define REX_R 0x44
121 #define REX_X 0x42
122 #define REX_B 0x41
123 #define REX 0x40
124
125 typedef unsigned int sljit_uhw;
126 typedef int sljit_hw;
127
128 #define IS_HALFWORD(x) ((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
129 #define NOT_HALFWORD(x) ((x) > 0x7fffffffll || (x) < -0x80000000ll)
130
131 #define CHECK_EXTRA_REGS(p, w, do)
132
133 #endif /* SLJIT_CONFIG_X86_32 */
134
135 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
136 #define TMP_FREG (SLJIT_FLOAT_REG4 + 1)
137 #endif
138
139 /* Size flags for emit_x86_instruction: */
140 #define EX86_BIN_INS 0x0010
141 #define EX86_SHIFT_INS 0x0020
142 #define EX86_REX 0x0040
143 #define EX86_NO_REXW 0x0080
144 #define EX86_BYTE_ARG 0x0100
145 #define EX86_HALF_ARG 0x0200
146 #define EX86_PREF_66 0x0400
147
148 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
149 #define EX86_PREF_F2 0x0800
150 #define EX86_SSE2 0x1000
151 #endif
152
153 #define INC_SIZE(s) (*buf++ = (s), compiler->size += (s))
154 #define INC_CSIZE(s) (*code++ = (s), compiler->size += (s))
155
156 #define PUSH_REG(r) (*buf++ = (0x50 + (r)))
157 #define POP_REG(r) (*buf++ = (0x58 + (r)))
158 #define RET() (*buf++ = (0xc3))
159 #define RETN(n) (*buf++ = (0xc2), *buf++ = n, *buf++ = 0)
160 /* r32, r/m32 */
161 #define MOV_RM(mod, reg, rm) (*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm))
162
163 static sljit_ub get_jump_code(int type)
164 {
165 switch (type) {
166 case SLJIT_C_EQUAL:
167 case SLJIT_C_FLOAT_EQUAL:
168 return 0x84;
169
170 case SLJIT_C_NOT_EQUAL:
171 case SLJIT_C_FLOAT_NOT_EQUAL:
172 return 0x85;
173
174 case SLJIT_C_LESS:
175 case SLJIT_C_FLOAT_LESS:
176 return 0x82;
177
178 case SLJIT_C_GREATER_EQUAL:
179 case SLJIT_C_FLOAT_GREATER_EQUAL:
180 return 0x83;
181
182 case SLJIT_C_GREATER:
183 case SLJIT_C_FLOAT_GREATER:
184 return 0x87;
185
186 case SLJIT_C_LESS_EQUAL:
187 case SLJIT_C_FLOAT_LESS_EQUAL:
188 return 0x86;
189
190 case SLJIT_C_SIG_LESS:
191 return 0x8c;
192
193 case SLJIT_C_SIG_GREATER_EQUAL:
194 return 0x8d;
195
196 case SLJIT_C_SIG_GREATER:
197 return 0x8f;
198
199 case SLJIT_C_SIG_LESS_EQUAL:
200 return 0x8e;
201
202 case SLJIT_C_OVERFLOW:
203 case SLJIT_C_MUL_OVERFLOW:
204 return 0x80;
205
206 case SLJIT_C_NOT_OVERFLOW:
207 case SLJIT_C_MUL_NOT_OVERFLOW:
208 return 0x81;
209
210 case SLJIT_C_FLOAT_NAN:
211 return 0x8a;
212
213 case SLJIT_C_FLOAT_NOT_NAN:
214 return 0x8b;
215 }
216 return 0;
217 }
218
219 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type);
220
221 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
222 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type);
223 #endif
224
225 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type)
226 {
227 int short_jump;
228 sljit_uw label_addr;
229
230 if (jump->flags & JUMP_LABEL)
231 label_addr = (sljit_uw)(code + jump->u.label->size);
232 else
233 label_addr = jump->u.target;
234 short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127;
235
236 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
237 if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll)
238 return generate_far_jump_code(jump, code_ptr, type);
239 #endif
240
241 if (type == SLJIT_JUMP) {
242 if (short_jump)
243 *code_ptr++ = 0xeb;
244 else
245 *code_ptr++ = 0xe9;
246 jump->addr++;
247 }
248 else if (type >= SLJIT_FAST_CALL) {
249 short_jump = 0;
250 *code_ptr++ = 0xe8;
251 jump->addr++;
252 }
253 else if (short_jump) {
254 *code_ptr++ = get_jump_code(type) - 0x10;
255 jump->addr++;
256 }
257 else {
258 *code_ptr++ = 0x0f;
259 *code_ptr++ = get_jump_code(type);
260 jump->addr += 2;
261 }
262
263 if (short_jump) {
264 jump->flags |= PATCH_MB;
265 code_ptr += sizeof(sljit_b);
266 } else {
267 jump->flags |= PATCH_MW;
268 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
269 code_ptr += sizeof(sljit_w);
270 #else
271 code_ptr += sizeof(sljit_hw);
272 #endif
273 }
274
275 return code_ptr;
276 }
277
278 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
279 {
280 struct sljit_memory_fragment *buf;
281 sljit_ub *code;
282 sljit_ub *code_ptr;
283 sljit_ub *buf_ptr;
284 sljit_ub *buf_end;
285 sljit_ub len;
286
287 struct sljit_label *label;
288 struct sljit_jump *jump;
289 struct sljit_const *const_;
290
291 CHECK_ERROR_PTR();
292 check_sljit_generate_code(compiler);
293 reverse_buf(compiler);
294
295 /* Second code generation pass. */
296 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
297 PTR_FAIL_WITH_EXEC_IF(code);
298 buf = compiler->buf;
299
300 code_ptr = code;
301 label = compiler->labels;
302 jump = compiler->jumps;
303 const_ = compiler->consts;
304 do {
305 buf_ptr = buf->memory;
306 buf_end = buf_ptr + buf->used_size;
307 do {
308 len = *buf_ptr++;
309 if (len > 0) {
310 /* The code is already generated. */
311 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
312 code_ptr += len;
313 buf_ptr += len;
314 }
315 else {
316 if (*buf_ptr >= 4) {
317 jump->addr = (sljit_uw)code_ptr;
318 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
319 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
320 else
321 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
322 jump = jump->next;
323 }
324 else if (*buf_ptr == 0) {
325 label->addr = (sljit_uw)code_ptr;
326 label->size = code_ptr - code;
327 label = label->next;
328 }
329 else if (*buf_ptr == 1) {
330 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w);
331 const_ = const_->next;
332 }
333 else {
334 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
335 *code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
336 buf_ptr++;
337 *(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w));
338 code_ptr += sizeof(sljit_w);
339 buf_ptr += sizeof(sljit_w) - 1;
340 #else
341 code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr);
342 buf_ptr += sizeof(sljit_w);
343 #endif
344 }
345 buf_ptr++;
346 }
347 } while (buf_ptr < buf_end);
348 SLJIT_ASSERT(buf_ptr == buf_end);
349 buf = buf->next;
350 } while (buf);
351
352 SLJIT_ASSERT(!label);
353 SLJIT_ASSERT(!jump);
354 SLJIT_ASSERT(!const_);
355
356 jump = compiler->jumps;
357 while (jump) {
358 if (jump->flags & PATCH_MB) {
359 SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127);
360 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b)));
361 } else if (jump->flags & PATCH_MW) {
362 if (jump->flags & JUMP_LABEL) {
363 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
364 *(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w)));
365 #else
366 SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
367 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw)));
368 #endif
369 }
370 else {
371 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
372 *(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w)));
373 #else
374 SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
375 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw)));
376 #endif
377 }
378 }
379 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
380 else if (jump->flags & PATCH_MD)
381 *(sljit_w*)jump->addr = jump->u.label->addr;
382 #endif
383
384 jump = jump->next;
385 }
386
387 /* Maybe we waste some space because of short jumps. */
388 SLJIT_ASSERT(code_ptr <= code + compiler->size);
389 compiler->error = SLJIT_ERR_COMPILED;
390 compiler->executable_size = compiler->size;
391 return (void*)code;
392 }
393
394 /* --------------------------------------------------------------------- */
395 /* Operators */
396 /* --------------------------------------------------------------------- */
397
398 static int emit_cum_binary(struct sljit_compiler *compiler,
399 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
400 int dst, sljit_w dstw,
401 int src1, sljit_w src1w,
402 int src2, sljit_w src2w);
403
404 static int emit_non_cum_binary(struct sljit_compiler *compiler,
405 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
406 int dst, sljit_w dstw,
407 int src1, sljit_w src1w,
408 int src2, sljit_w src2w);
409
410 static int emit_mov(struct sljit_compiler *compiler,
411 int dst, sljit_w dstw,
412 int src, sljit_w srcw);
413
414 static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler)
415 {
416 sljit_ub *buf;
417
418 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
419 buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
420 FAIL_IF(!buf);
421 INC_SIZE(5);
422 *buf++ = 0x9c; /* pushfd */
423 #else
424 buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
425 FAIL_IF(!buf);
426 INC_SIZE(6);
427 *buf++ = 0x9c; /* pushfq */
428 *buf++ = 0x48;
429 #endif
430 *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
431 *buf++ = 0x64;
432 *buf++ = 0x24;
433 *buf++ = sizeof(sljit_w);
434 compiler->flags_saved = 1;
435 return SLJIT_SUCCESS;
436 }
437
438 static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags)
439 {
440 sljit_ub *buf;
441
442 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
443 buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
444 FAIL_IF(!buf);
445 INC_SIZE(5);
446 #else
447 buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
448 FAIL_IF(!buf);
449 INC_SIZE(6);
450 *buf++ = 0x48;
451 #endif
452 *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
453 *buf++ = 0x64;
454 *buf++ = 0x24;
455 *buf++ = (sljit_ub)-(int)sizeof(sljit_w);
456 *buf++ = 0x9d; /* popfd / popfq */
457 compiler->flags_saved = keep_flags;
458 return SLJIT_SUCCESS;
459 }
460
461 #ifdef _WIN32
462 #include <malloc.h>
463
464 static void SLJIT_CALL sljit_touch_stack(sljit_w local_size)
465 {
466 /* Workaround for calling _chkstk. */
467 alloca(local_size);
468 }
469 #endif
470
471 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
472 #include "sljitNativeX86_32.c"
473 #else
474 #include "sljitNativeX86_64.c"
475 #endif
476
477 static int emit_mov(struct sljit_compiler *compiler,
478 int dst, sljit_w dstw,
479 int src, sljit_w srcw)
480 {
481 sljit_ub* code;
482
483 if (dst == SLJIT_UNUSED) {
484 /* No destination, doesn't need to setup flags. */
485 if (src & SLJIT_MEM) {
486 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
487 FAIL_IF(!code);
488 *code = 0x8b;
489 }
490 return SLJIT_SUCCESS;
491 }
492 if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
493 code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
494 FAIL_IF(!code);
495 *code = 0x89;
496 return SLJIT_SUCCESS;
497 }
498 if (src & SLJIT_IMM) {
499 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
500 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
501 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
502 #else
503 if (!compiler->mode32) {
504 if (NOT_HALFWORD(srcw))
505 return emit_load_imm64(compiler, dst, srcw);
506 }
507 else
508 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw);
509 #endif
510 }
511 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
512 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
513 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
514 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
515 FAIL_IF(!code);
516 *code = 0x89;
517 return SLJIT_SUCCESS;
518 }
519 #endif
520 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
521 FAIL_IF(!code);
522 *code = 0xc7;
523 return SLJIT_SUCCESS;
524 }
525 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
526 code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
527 FAIL_IF(!code);
528 *code = 0x8b;
529 return SLJIT_SUCCESS;
530 }
531
532 /* Memory to memory move. Requires two instruction. */
533 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
534 FAIL_IF(!code);
535 *code = 0x8b;
536 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
537 FAIL_IF(!code);
538 *code = 0x89;
539 return SLJIT_SUCCESS;
540 }
541
542 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
543 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
544
545 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
546 {
547 sljit_ub *buf;
548 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
549 int size;
550 #endif
551
552 CHECK_ERROR();
553 check_sljit_emit_op0(compiler, op);
554
555 switch (GET_OPCODE(op)) {
556 case SLJIT_BREAKPOINT:
557 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
558 FAIL_IF(!buf);
559 INC_SIZE(1);
560 *buf = 0xcc;
561 break;
562 case SLJIT_NOP:
563 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
564 FAIL_IF(!buf);
565 INC_SIZE(1);
566 *buf = 0x90;
567 break;
568 case SLJIT_UMUL:
569 case SLJIT_SMUL:
570 case SLJIT_UDIV:
571 case SLJIT_SDIV:
572 compiler->flags_saved = 0;
573 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
574 #ifdef _WIN64
575 SLJIT_COMPILE_ASSERT(
576 reg_map[SLJIT_TEMPORARY_REG1] == 0
577 && reg_map[SLJIT_TEMPORARY_REG2] == 2
578 && reg_map[TMP_REGISTER] > 7,
579 invalid_register_assignment_for_div_mul);
580 #else
581 SLJIT_COMPILE_ASSERT(
582 reg_map[SLJIT_TEMPORARY_REG1] == 0
583 && reg_map[SLJIT_TEMPORARY_REG2] < 7
584 && reg_map[TMP_REGISTER] == 2,
585 invalid_register_assignment_for_div_mul);
586 #endif
587 compiler->mode32 = op & SLJIT_INT_OP;
588 #endif
589
590 op = GET_OPCODE(op);
591 if (op == SLJIT_UDIV) {
592 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
593 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
594 buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
595 #else
596 buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
597 #endif
598 FAIL_IF(!buf);
599 *buf = 0x33;
600 }
601
602 if (op == SLJIT_SDIV) {
603 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
604 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
605 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG1, 0);
606 #else
607 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
608 #endif
609
610 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
611 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
612 FAIL_IF(!buf);
613 INC_SIZE(3);
614 *buf++ = 0xc1;
615 *buf++ = 0xfa;
616 *buf = 0x1f;
617 #else
618 if (compiler->mode32) {
619 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
620 FAIL_IF(!buf);
621 INC_SIZE(3);
622 *buf++ = 0xc1;
623 *buf++ = 0xfa;
624 *buf = 0x1f;
625 } else {
626 buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
627 FAIL_IF(!buf);
628 INC_SIZE(4);
629 *buf++ = REX_W;
630 *buf++ = 0xc1;
631 *buf++ = 0xfa;
632 *buf = 0x3f;
633 }
634 #endif
635 }
636
637 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
638 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
639 FAIL_IF(!buf);
640 INC_SIZE(2);
641 *buf++ = 0xf7;
642 *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
643 #else
644 #ifdef _WIN64
645 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
646 #else
647 size = (!compiler->mode32) ? 3 : 2;
648 #endif
649 buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
650 FAIL_IF(!buf);
651 INC_SIZE(size);
652 #ifdef _WIN64
653 if (!compiler->mode32)
654 *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
655 else if (op >= SLJIT_UDIV)
656 *buf++ = REX_B;
657 *buf++ = 0xf7;
658 *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
659 #else
660 if (!compiler->mode32)
661 *buf++ = REX_W;
662 *buf++ = 0xf7;
663 *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
664 #endif
665 #endif
666 switch (op) {
667 case SLJIT_UMUL:
668 *buf |= 4 << 3;
669 break;
670 case SLJIT_SMUL:
671 *buf |= 5 << 3;
672 break;
673 case SLJIT_UDIV:
674 *buf |= 6 << 3;
675 break;
676 case SLJIT_SDIV:
677 *buf |= 7 << 3;
678 break;
679 }
680 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
681 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
682 #endif
683 break;
684 }
685
686 return SLJIT_SUCCESS;
687 }
688
689 #define ENCODE_PREFIX(prefix) \
690 do { \
691 code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
692 FAIL_IF(!code); \
693 INC_CSIZE(1); \
694 *code = (prefix); \
695 } while (0)
696
697 static int emit_mov_byte(struct sljit_compiler *compiler, int sign,
698 int dst, sljit_w dstw,
699 int src, sljit_w srcw)
700 {
701 sljit_ub* code;
702 int dst_r;
703 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
704 int work_r;
705 #endif
706
707 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
708 compiler->mode32 = 0;
709 #endif
710
711 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
712 return SLJIT_SUCCESS; /* Empty instruction. */
713
714 if (src & SLJIT_IMM) {
715 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
716 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
717 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
718 #else
719 return emit_load_imm64(compiler, dst, srcw);
720 #endif
721 }
722 code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
723 FAIL_IF(!code);
724 *code = 0xc6;
725 return SLJIT_SUCCESS;
726 }
727
728 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
729
730 if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
731 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
732 if (reg_map[src] >= 4) {
733 SLJIT_ASSERT(dst_r == TMP_REGISTER);
734 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
735 } else
736 dst_r = src;
737 #else
738 dst_r = src;
739 #endif
740 }
741 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
742 else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) {
743 /* src, dst are registers. */
744 SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER);
745 if (reg_map[dst] < 4) {
746 if (dst != src)
747 EMIT_MOV(compiler, dst, 0, src, 0);
748 code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
749 FAIL_IF(!code);
750 *code++ = 0x0f;
751 *code = sign ? 0xbe : 0xb6;
752 }
753 else {
754 if (dst != src)
755 EMIT_MOV(compiler, dst, 0, src, 0);
756 if (sign) {
757 /* shl reg, 24 */
758 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
759 FAIL_IF(!code);
760 *code |= 0x4 << 3;
761 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
762 FAIL_IF(!code);
763 /* shr/sar reg, 24 */
764 *code |= 0x7 << 3;
765 }
766 else {
767 /* and dst, 0xff */
768 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0);
769 FAIL_IF(!code);
770 *(code + 1) |= 0x4 << 3;
771 }
772 }
773 return SLJIT_SUCCESS;
774 }
775 #endif
776 else {
777 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
778 code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
779 FAIL_IF(!code);
780 *code++ = 0x0f;
781 *code = sign ? 0xbe : 0xb6;
782 }
783
784 if (dst & SLJIT_MEM) {
785 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
786 if (dst_r == TMP_REGISTER) {
787 /* Find a non-used register, whose reg_map[src] < 4. */
788 if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) {
789 if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4))
790 work_r = SLJIT_TEMPORARY_REG3;
791 else
792 work_r = SLJIT_TEMPORARY_REG2;
793 }
794 else {
795 if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
796 work_r = SLJIT_TEMPORARY_REG1;
797 else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2)
798 work_r = SLJIT_TEMPORARY_REG3;
799 else
800 work_r = SLJIT_TEMPORARY_REG2;
801 }
802
803 if (work_r == SLJIT_TEMPORARY_REG1) {
804 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
805 }
806 else {
807 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
808 FAIL_IF(!code);
809 *code = 0x87;
810 }
811
812 code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
813 FAIL_IF(!code);
814 *code = 0x88;
815
816 if (work_r == SLJIT_TEMPORARY_REG1) {
817 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
818 }
819 else {
820 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
821 FAIL_IF(!code);
822 *code = 0x87;
823 }
824 }
825 else {
826 code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
827 FAIL_IF(!code);
828 *code = 0x88;
829 }
830 #else
831 code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
832 FAIL_IF(!code);
833 *code = 0x88;
834 #endif
835 }
836
837 return SLJIT_SUCCESS;
838 }
839
840 static int emit_mov_half(struct sljit_compiler *compiler, int sign,
841 int dst, sljit_w dstw,
842 int src, sljit_w srcw)
843 {
844 sljit_ub* code;
845 int dst_r;
846
847 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
848 compiler->mode32 = 0;
849 #endif
850
851 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
852 return SLJIT_SUCCESS; /* Empty instruction. */
853
854 if (src & SLJIT_IMM) {
855 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
856 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
857 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
858 #else
859 return emit_load_imm64(compiler, dst, srcw);
860 #endif
861 }
862 code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
863 FAIL_IF(!code);
864 *code = 0xc7;
865 return SLJIT_SUCCESS;
866 }
867
868 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
869
870 if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS))
871 dst_r = src;
872 else {
873 code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
874 FAIL_IF(!code);
875 *code++ = 0x0f;
876 *code = sign ? 0xbf : 0xb7;
877 }
878
879 if (dst & SLJIT_MEM) {
880 code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
881 FAIL_IF(!code);
882 *code = 0x89;
883 }
884
885 return SLJIT_SUCCESS;
886 }
887
888 static int emit_unary(struct sljit_compiler *compiler, int un_index,
889 int dst, sljit_w dstw,
890 int src, sljit_w srcw)
891 {
892 sljit_ub* code;
893
894 if (dst == SLJIT_UNUSED) {
895 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
896 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
897 FAIL_IF(!code);
898 *code++ = 0xf7;
899 *code |= (un_index) << 3;
900 return SLJIT_SUCCESS;
901 }
902 if (dst == src && dstw == srcw) {
903 /* Same input and output */
904 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
905 FAIL_IF(!code);
906 *code++ = 0xf7;
907 *code |= (un_index) << 3;
908 return SLJIT_SUCCESS;
909 }
910 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
911 EMIT_MOV(compiler, dst, 0, src, srcw);
912 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
913 FAIL_IF(!code);
914 *code++ = 0xf7;
915 *code |= (un_index) << 3;
916 return SLJIT_SUCCESS;
917 }
918 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
919 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
920 FAIL_IF(!code);
921 *code++ = 0xf7;
922 *code |= (un_index) << 3;
923 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
924 return SLJIT_SUCCESS;
925 }
926
927 static int emit_not_with_flags(struct sljit_compiler *compiler,
928 int dst, sljit_w dstw,
929 int src, sljit_w srcw)
930 {
931 sljit_ub* code;
932
933 if (dst == SLJIT_UNUSED) {
934 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
935 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
936 FAIL_IF(!code);
937 *code++ = 0xf7;
938 *code |= 0x2 << 3;
939 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
940 FAIL_IF(!code);
941 *code = 0x0b;
942 return SLJIT_SUCCESS;
943 }
944 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
945 EMIT_MOV(compiler, dst, 0, src, srcw);
946 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
947 FAIL_IF(!code);
948 *code++ = 0xf7;
949 *code |= 0x2 << 3;
950 code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
951 FAIL_IF(!code);
952 *code = 0x0b;
953 return SLJIT_SUCCESS;
954 }
955 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
956 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
957 FAIL_IF(!code);
958 *code++ = 0xf7;
959 *code |= 0x2 << 3;
960 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
961 FAIL_IF(!code);
962 *code = 0x0b;
963 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
964 return SLJIT_SUCCESS;
965 }
966
967 static int emit_clz(struct sljit_compiler *compiler, int op,
968 int dst, sljit_w dstw,
969 int src, sljit_w srcw)
970 {
971 sljit_ub* code;
972 int dst_r;
973
974 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
975 /* Just set the zero flag. */
976 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
977 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
978 FAIL_IF(!code);
979 *code++ = 0xf7;
980 *code |= 0x2 << 3;
981 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
982 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
983 #else
984 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
985 #endif
986 FAIL_IF(!code);
987 *code |= 0x5 << 3;
988 return SLJIT_SUCCESS;
989 }
990
991 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
992 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
993 src = TMP_REGISTER;
994 srcw = 0;
995 }
996
997 code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
998 FAIL_IF(!code);
999 *code++ = 0x0f;
1000 *code = 0xbd;
1001
1002 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1003 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER)
1004 dst_r = dst;
1005 else {
1006 /* Find an unused temporary register. */
1007 if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
1008 dst_r = SLJIT_TEMPORARY_REG1;
1009 else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4))
1010 dst_r = SLJIT_TEMPORARY_REG2;
1011 else
1012 dst_r = SLJIT_TEMPORARY_REG3;
1013 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1014 }
1015 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1016 #else
1017 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2;
1018 compiler->mode32 = 0;
1019 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1020 compiler->mode32 = op & SLJIT_INT_OP;
1021 #endif
1022
1023 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
1024 FAIL_IF(!code);
1025 *code++ = 0x0f;
1026 *code = 0x45;
1027
1028 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1029 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1030 #else
1031 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1032 #endif
1033 FAIL_IF(!code);
1034 *(code + 1) |= 0x6 << 3;
1035
1036 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1037 if (dst & SLJIT_MEM) {
1038 code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1039 FAIL_IF(!code);
1040 *code = 0x87;
1041 }
1042 #else
1043 if (dst & SLJIT_MEM)
1044 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1045 #endif
1046 return SLJIT_SUCCESS;
1047 }
1048
1049 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
1050 int dst, sljit_w dstw,
1051 int src, sljit_w srcw)
1052 {
1053 sljit_ub* code;
1054 int update = 0;
1055 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1056 int dst_is_ereg = 0;
1057 int src_is_ereg = 0;
1058 #else
1059 #define src_is_ereg 0
1060 #endif
1061
1062 CHECK_ERROR();
1063 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1064
1065 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1066 compiler->mode32 = op & SLJIT_INT_OP;
1067 #endif
1068 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1069 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1070
1071 if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
1072 op = GET_OPCODE(op);
1073 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1074 compiler->mode32 = 0;
1075 #endif
1076
1077 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
1078 if (op >= SLJIT_MOVU) {
1079 update = 1;
1080 op -= 7;
1081 }
1082
1083 if (src & SLJIT_IMM) {
1084 switch (op) {
1085 case SLJIT_MOV_UB:
1086 srcw = (unsigned char)srcw;
1087 break;
1088 case SLJIT_MOV_SB:
1089 srcw = (signed char)srcw;
1090 break;
1091 case SLJIT_MOV_UH:
1092 srcw = (unsigned short)srcw;
1093 break;
1094 case SLJIT_MOV_SH:
1095 srcw = (signed short)srcw;
1096 break;
1097 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1098 case SLJIT_MOV_UI:
1099 srcw = (unsigned int)srcw;
1100 break;
1101 case SLJIT_MOV_SI:
1102 srcw = (signed int)srcw;
1103 break;
1104 #endif
1105 }
1106 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1107 if (SLJIT_UNLIKELY(dst_is_ereg))
1108 return emit_mov(compiler, dst, dstw, src, srcw);
1109 #endif
1110 }
1111
1112 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
1113 code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
1114 FAIL_IF(!code);
1115 *code = 0x8d;
1116 src &= SLJIT_MEM | 0xf;
1117 srcw = 0;
1118 }
1119
1120 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1121 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) {
1122 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
1123 dst = TMP_REGISTER;
1124 }
1125 #endif
1126
1127 switch (op) {
1128 case SLJIT_MOV:
1129 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1130 case SLJIT_MOV_UI:
1131 case SLJIT_MOV_SI:
1132 #endif
1133 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1134 break;
1135 case SLJIT_MOV_UB:
1136 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw));
1137 break;
1138 case SLJIT_MOV_SB:
1139 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw));
1140 break;
1141 case SLJIT_MOV_UH:
1142 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw));
1143 break;
1144 case SLJIT_MOV_SH:
1145 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw));
1146 break;
1147 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1148 case SLJIT_MOV_UI:
1149 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw));
1150 break;
1151 case SLJIT_MOV_SI:
1152 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw));
1153 break;
1154 #endif
1155 }
1156
1157 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1158 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
1159 return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
1160 #endif
1161
1162 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
1163 code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
1164 FAIL_IF(!code);
1165 *code = 0x8d;
1166 }
1167 return SLJIT_SUCCESS;
1168 }
1169
1170 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1171 compiler->flags_saved = 0;
1172
1173 switch (GET_OPCODE(op)) {
1174 case SLJIT_NOT:
1175 if (SLJIT_UNLIKELY(op & SLJIT_SET_E))
1176 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1177 return emit_unary(compiler, 0x2, dst, dstw, src, srcw);
1178
1179 case SLJIT_NEG:
1180 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1181 FAIL_IF(emit_save_flags(compiler));
1182 return emit_unary(compiler, 0x3, dst, dstw, src, srcw);
1183
1184 case SLJIT_CLZ:
1185 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1186 FAIL_IF(emit_save_flags(compiler));
1187 return emit_clz(compiler, op, dst, dstw, src, srcw);
1188 }
1189
1190 return SLJIT_SUCCESS;
1191
1192 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1193 #undef src_is_ereg
1194 #endif
1195 }
1196
1197 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1198
1199 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1200 if (IS_HALFWORD(immw) || compiler->mode32) { \
1201 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1202 FAIL_IF(!code); \
1203 *(code + 1) |= (_op_imm_); \
1204 } \
1205 else { \
1206 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1207 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1208 FAIL_IF(!code); \
1209 *code = (_op_mr_); \
1210 }
1211
1212 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1213 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw))
1214
1215 #else
1216
1217 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1218 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1219 FAIL_IF(!code); \
1220 *(code + 1) |= (_op_imm_);
1221
1222 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1223 FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw))
1224
1225 #endif
1226
1227 static int emit_cum_binary(struct sljit_compiler *compiler,
1228 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1229 int dst, sljit_w dstw,
1230 int src1, sljit_w src1w,
1231 int src2, sljit_w src2w)
1232 {
1233 sljit_ub* code;
1234
1235 if (dst == SLJIT_UNUSED) {
1236 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1237 if (src2 & SLJIT_IMM) {
1238 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1239 }
1240 else {
1241 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1242 FAIL_IF(!code);
1243 *code = op_rm;
1244 }
1245 return SLJIT_SUCCESS;
1246 }
1247
1248 if (dst == src1 && dstw == src1w) {
1249 if (src2 & SLJIT_IMM) {
1250 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1251 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1252 #else
1253 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1254 #endif
1255 BINARY_EAX_IMM(op_eax_imm, src2w);
1256 }
1257 else {
1258 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1259 }
1260 }
1261 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1262 code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1263 FAIL_IF(!code);
1264 *code = op_rm;
1265 }
1266 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) {
1267 /* Special exception for sljit_emit_cond_value. */
1268 code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1269 FAIL_IF(!code);
1270 *code = op_mr;
1271 }
1272 else {
1273 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1274 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1275 FAIL_IF(!code);
1276 *code = op_mr;
1277 }
1278 return SLJIT_SUCCESS;
1279 }
1280
1281 /* Only for cumulative operations. */
1282 if (dst == src2 && dstw == src2w) {
1283 if (src1 & SLJIT_IMM) {
1284 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1285 if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1286 #else
1287 if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) {
1288 #endif
1289 BINARY_EAX_IMM(op_eax_imm, src1w);
1290 }
1291 else {
1292 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1293 }
1294 }
1295 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1296 code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1297 FAIL_IF(!code);
1298 *code = op_rm;
1299 }
1300 else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1301 code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1302 FAIL_IF(!code);
1303 *code = op_mr;
1304 }
1305 else {
1306 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1307 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1308 FAIL_IF(!code);
1309 *code = op_mr;
1310 }
1311 return SLJIT_SUCCESS;
1312 }
1313
1314 /* General version. */
1315 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1316 EMIT_MOV(compiler, dst, 0, src1, src1w);
1317 if (src2 & SLJIT_IMM) {
1318 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1319 }
1320 else {
1321 code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1322 FAIL_IF(!code);
1323 *code = op_rm;
1324 }
1325 }
1326 else {
1327 /* This version requires less memory writing. */
1328 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1329 if (src2 & SLJIT_IMM) {
1330 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1331 }
1332 else {
1333 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1334 FAIL_IF(!code);
1335 *code = op_rm;
1336 }
1337 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1338 }
1339
1340 return SLJIT_SUCCESS;
1341 }
1342
1343 static int emit_non_cum_binary(struct sljit_compiler *compiler,
1344 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1345 int dst, sljit_w dstw,
1346 int src1, sljit_w src1w,
1347 int src2, sljit_w src2w)
1348 {
1349 sljit_ub* code;
1350
1351 if (dst == SLJIT_UNUSED) {
1352 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1353 if (src2 & SLJIT_IMM) {
1354 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1355 }
1356 else {
1357 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1358 FAIL_IF(!code);
1359 *code = op_rm;
1360 }
1361 return SLJIT_SUCCESS;
1362 }
1363
1364 if (dst == src1 && dstw == src1w) {
1365 if (src2 & SLJIT_IMM) {
1366 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1367 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1368 #else
1369 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1370 #endif
1371 BINARY_EAX_IMM(op_eax_imm, src2w);
1372 }
1373 else {
1374 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1375 }
1376 }
1377 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1378 code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1379 FAIL_IF(!code);
1380 *code = op_rm;
1381 }
1382 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1383 code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1384 FAIL_IF(!code);
1385 *code = op_mr;
1386 }
1387 else {
1388 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1389 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1390 FAIL_IF(!code);
1391 *code = op_mr;
1392 }
1393 return SLJIT_SUCCESS;
1394 }
1395
1396 /* General version. */
1397 if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) {
1398 EMIT_MOV(compiler, dst, 0, src1, src1w);
1399 if (src2 & SLJIT_IMM) {
1400 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1401 }
1402 else {
1403 code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1404 FAIL_IF(!code);
1405 *code = op_rm;
1406 }
1407 }
1408 else {
1409 /* This version requires less memory writing. */
1410 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1411 if (src2 & SLJIT_IMM) {
1412 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1413 }
1414 else {
1415 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1416 FAIL_IF(!code);
1417 *code = op_rm;
1418 }
1419 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1420 }
1421
1422 return SLJIT_SUCCESS;
1423 }
1424
1425 static int emit_mul(struct sljit_compiler *compiler,
1426 int dst, sljit_w dstw,
1427 int src1, sljit_w src1w,
1428 int src2, sljit_w src2w)
1429 {
1430 sljit_ub* code;
1431 int dst_r;
1432
1433 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1434
1435 /* Register destination. */
1436 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1437 code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1438 FAIL_IF(!code);
1439 *code++ = 0x0f;
1440 *code = 0xaf;
1441 }
1442 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1443 code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1444 FAIL_IF(!code);
1445 *code++ = 0x0f;
1446 *code = 0xaf;
1447 }
1448 else if (src1 & SLJIT_IMM) {
1449 if (src2 & SLJIT_IMM) {
1450 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1451 src2 = dst_r;
1452 src2w = 0;
1453 }
1454
1455 if (src1w <= 127 && src1w >= -128) {
1456 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1457 FAIL_IF(!code);
1458 *code = 0x6b;
1459 code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1460 FAIL_IF(!code);
1461 INC_CSIZE(1);
1462 *code = (sljit_b)src1w;
1463 }
1464 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1465 else {
1466 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1467 FAIL_IF(!code);
1468 *code = 0x69;
1469 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1470 FAIL_IF(!code);
1471 INC_CSIZE(4);
1472 *(sljit_w*)code = src1w;
1473 }
1474 #else
1475 else if (IS_HALFWORD(src1w)) {
1476 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1477 FAIL_IF(!code);
1478 *code = 0x69;
1479 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1480 FAIL_IF(!code);
1481 INC_CSIZE(4);
1482 *(sljit_hw*)code = (sljit_hw)src1w;
1483 }
1484 else {
1485 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1486 if (dst_r != src2)
1487 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1488 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1489 FAIL_IF(!code);
1490 *code++ = 0x0f;
1491 *code = 0xaf;
1492 }
1493 #endif
1494 }
1495 else if (src2 & SLJIT_IMM) {
1496 /* Note: src1 is NOT immediate. */
1497
1498 if (src2w <= 127 && src2w >= -128) {
1499 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1500 FAIL_IF(!code);
1501 *code = 0x6b;
1502 code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1503 FAIL_IF(!code);
1504 INC_CSIZE(1);
1505 *code = (sljit_b)src2w;
1506 }
1507 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1508 else {
1509 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1510 FAIL_IF(!code);
1511 *code = 0x69;
1512 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1513 FAIL_IF(!code);
1514 INC_CSIZE(4);
1515 *(sljit_w*)code = src2w;
1516 }
1517 #else
1518 else if (IS_HALFWORD(src2w)) {
1519 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1520 FAIL_IF(!code);
1521 *code = 0x69;
1522 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1523 FAIL_IF(!code);
1524 INC_CSIZE(4);
1525 *(sljit_hw*)code = (sljit_hw)src2w;
1526 }
1527 else {
1528 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1529 if (dst_r != src1)
1530 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1531 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1532 FAIL_IF(!code);
1533 *code++ = 0x0f;
1534 *code = 0xaf;
1535 }
1536 #endif
1537 }
1538 else {
1539 /* Neither argument is immediate. */
1540 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1541 dst_r = TMP_REGISTER;
1542 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1543 code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1544 FAIL_IF(!code);
1545 *code++ = 0x0f;
1546 *code = 0xaf;
1547 }
1548
1549 if (dst_r == TMP_REGISTER)
1550 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1551
1552 return SLJIT_SUCCESS;
1553 }
1554
1555 static int emit_lea_binary(struct sljit_compiler *compiler,
1556 int dst, sljit_w dstw,
1557 int src1, sljit_w src1w,
1558 int src2, sljit_w src2w)
1559 {
1560 sljit_ub* code;
1561 int dst_r, done = 0;
1562
1563 /* These cases better be left to handled by normal way. */
1564 if (dst == src1 && dstw == src1w)
1565 return SLJIT_ERR_UNSUPPORTED;
1566 if (dst == src2 && dstw == src2w)
1567 return SLJIT_ERR_UNSUPPORTED;
1568
1569 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1570
1571 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1572 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1573 /* It is not possible to be both SLJIT_LOCALS_REG. */
1574 if (src1 != SLJIT_LOCALS_REG || src2 != SLJIT_LOCALS_REG) {
1575 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1576 FAIL_IF(!code);
1577 *code = 0x8d;
1578 done = 1;
1579 }
1580 }
1581 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1582 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1583 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w);
1584 #else
1585 if (src2 & SLJIT_IMM) {
1586 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1587 #endif
1588 FAIL_IF(!code);
1589 *code = 0x8d;
1590 done = 1;
1591 }
1592 }
1593 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1594 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1595 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1596 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w);
1597 #else
1598 if (src1 & SLJIT_IMM) {
1599 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1600 #endif
1601 FAIL_IF(!code);
1602 *code = 0x8d;
1603 done = 1;
1604 }
1605 }
1606
1607 if (done) {
1608 if (dst_r == TMP_REGISTER)
1609 return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
1610 return SLJIT_SUCCESS;
1611 }
1612 return SLJIT_ERR_UNSUPPORTED;
1613 }
1614
1615 static int emit_cmp_binary(struct sljit_compiler *compiler,
1616 int src1, sljit_w src1w,
1617 int src2, sljit_w src2w)
1618 {
1619 sljit_ub* code;
1620
1621 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1622 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1623 #else
1624 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1625 #endif
1626 BINARY_EAX_IMM(0x3d, src2w);
1627 return SLJIT_SUCCESS;
1628 }
1629
1630 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1631 if (src2 & SLJIT_IMM) {
1632 BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0);
1633 }
1634 else {
1635 code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1636 FAIL_IF(!code);
1637 *code = 0x3b;
1638 }
1639 return SLJIT_SUCCESS;
1640 }
1641
1642 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) {
1643 code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1644 FAIL_IF(!code);
1645 *code = 0x39;
1646 return SLJIT_SUCCESS;
1647 }
1648
1649 if (src2 & SLJIT_IMM) {
1650 if (src1 & SLJIT_IMM) {
1651 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1652 src1 = TMP_REGISTER;
1653 src1w = 0;
1654 }
1655 BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w);
1656 }
1657 else {
1658 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1659 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1660 FAIL_IF(!code);
1661 *code = 0x3b;
1662 }
1663 return SLJIT_SUCCESS;
1664 }
1665
1666 static int emit_test_binary(struct sljit_compiler *compiler,
1667 int src1, sljit_w src1w,
1668 int src2, sljit_w src2w)
1669 {
1670 sljit_ub* code;
1671
1672 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1673 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1674 #else
1675 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1676 #endif
1677 BINARY_EAX_IMM(0xa9, src2w);
1678 return SLJIT_SUCCESS;
1679 }
1680
1681 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1682 if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1683 #else
1684 if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1685 #endif
1686 BINARY_EAX_IMM(0xa9, src1w);
1687 return SLJIT_SUCCESS;
1688 }
1689
1690 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1691 if (src2 & SLJIT_IMM) {
1692 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1693 if (IS_HALFWORD(src2w) || compiler->mode32) {
1694 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1695 FAIL_IF(!code);
1696 *code = 0xf7;
1697 }
1698 else {
1699 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1700 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1701 FAIL_IF(!code);
1702 *code = 0x85;
1703 }
1704 #else
1705 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1706 FAIL_IF(!code);
1707 *code = 0xf7;
1708 #endif
1709 }
1710 else {
1711 code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1712 FAIL_IF(!code);
1713 *code = 0x85;
1714 }
1715 return SLJIT_SUCCESS;
1716 }
1717
1718 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1719 if (src1 & SLJIT_IMM) {
1720 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1721 if (IS_HALFWORD(src1w) || compiler->mode32) {
1722 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1723 FAIL_IF(!code);
1724 *code = 0xf7;
1725 }
1726 else {
1727 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1728 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1729 FAIL_IF(!code);
1730 *code = 0x85;
1731 }
1732 #else
1733 code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1734 FAIL_IF(!code);
1735 *code = 0xf7;
1736 #endif
1737 }
1738 else {
1739 code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1740 FAIL_IF(!code);
1741 *code = 0x85;
1742 }
1743 return SLJIT_SUCCESS;
1744 }
1745
1746 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1747 if (src2 & SLJIT_IMM) {
1748 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1749 if (IS_HALFWORD(src2w) || compiler->mode32) {
1750 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1751 FAIL_IF(!code);
1752 *code = 0xf7;
1753 }
1754 else {
1755 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1756 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
1757 FAIL_IF(!code);
1758 *code = 0x85;
1759 }
1760 #else
1761 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1762 FAIL_IF(!code);
1763 *code = 0xf7;
1764 #endif
1765 }
1766 else {
1767 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1768 FAIL_IF(!code);
1769 *code = 0x85;
1770 }
1771 return SLJIT_SUCCESS;
1772 }
1773
1774 static int emit_shift(struct sljit_compiler *compiler,
1775 sljit_ub mode,
1776 int dst, sljit_w dstw,
1777 int src1, sljit_w src1w,
1778 int src2, sljit_w src2w)
1779 {
1780 sljit_ub* code;
1781
1782 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1783 if (dst == src1 && dstw == src1w) {
1784 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1785 FAIL_IF(!code);
1786 *code |= mode;
1787 return SLJIT_SUCCESS;
1788 }
1789 if (dst == SLJIT_UNUSED) {
1790 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1791 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1792 FAIL_IF(!code);
1793 *code |= mode;
1794 return SLJIT_SUCCESS;
1795 }
1796 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
1797 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1798 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1799 FAIL_IF(!code);
1800 *code |= mode;
1801 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1802 return SLJIT_SUCCESS;
1803 }
1804 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1805 EMIT_MOV(compiler, dst, 0, src1, src1w);
1806 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
1807 FAIL_IF(!code);
1808 *code |= mode;
1809 return SLJIT_SUCCESS;
1810 }
1811
1812 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1813 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1814 FAIL_IF(!code);
1815 *code |= mode;
1816 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1817 return SLJIT_SUCCESS;
1818 }
1819
1820 if (dst == SLJIT_PREF_SHIFT_REG) {
1821 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1822 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1823 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1824 FAIL_IF(!code);
1825 *code |= mode;
1826 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1827 }
1828 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
1829 if (src1 != dst)
1830 EMIT_MOV(compiler, dst, 0, src1, src1w);
1831 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
1832 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1833 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
1834 FAIL_IF(!code);
1835 *code |= mode;
1836 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1837 }
1838 else {
1839 /* This case is really difficult, since ecx itself may used for
1840 addressing, and we must ensure to work even in that case. */
1841 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1842 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1843 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
1844 #else
1845 /* [esp - 4] is reserved for eflags. */
1846 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)), SLJIT_PREF_SHIFT_REG, 0);
1847 #endif
1848 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1849 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1850 FAIL_IF(!code);
1851 *code |= mode;
1852 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1853 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
1854 #else
1855 /* [esp - 4] is reserved for eflags. */
1856 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)));
1857 #endif
1858 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1859 }
1860
1861 return SLJIT_SUCCESS;
1862 }
1863
1864 static int emit_shift_with_flags(struct sljit_compiler *compiler,
1865 sljit_ub mode, int set_flags,
1866 int dst, sljit_w dstw,
1867 int src1, sljit_w src1w,
1868 int src2, sljit_w src2w)
1869 {
1870 /* The CPU does not set flags if the shift count is 0. */
1871 if (src2 & SLJIT_IMM) {
1872 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1873 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
1874 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1875 #else
1876 if ((src2w & 0x1f) != 0)
1877 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1878 #endif
1879 if (!set_flags)
1880 return emit_mov(compiler, dst, dstw, src1, src1w);
1881 /* OR dst, src, 0 */
1882 return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1883 dst, dstw, src1, src1w, SLJIT_IMM, 0);
1884 }
1885
1886 if (!set_flags)
1887 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
1888
1889 if (!(dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS))
1890 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
1891
1892 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
1893
1894 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS)
1895 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
1896 return SLJIT_SUCCESS;
1897 }
1898
1899 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
1900 int dst, sljit_w dstw,
1901 int src1, sljit_w src1w,
1902 int src2, sljit_w src2w)
1903 {
1904 CHECK_ERROR();
1905 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1906
1907 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1908 compiler->mode32 = op & SLJIT_INT_OP;
1909 #endif
1910 CHECK_EXTRA_REGS(dst, dstw, (void)0);
1911 CHECK_EXTRA_REGS(src1, src1w, (void)0);
1912 CHECK_EXTRA_REGS(src2, src2w, (void)0);
1913
1914 if (GET_OPCODE(op) >= SLJIT_MUL) {
1915 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1916 compiler->flags_saved = 0;
1917 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1918 FAIL_IF(emit_save_flags(compiler));
1919 }
1920
1921 switch (GET_OPCODE(op)) {
1922 case SLJIT_ADD:
1923 if (!GET_FLAGS(op)) {
1924 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
1925 return compiler->error;
1926 }
1927 else
1928 compiler->flags_saved = 0;
1929 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1930 FAIL_IF(emit_save_flags(compiler));
1931 return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05,
1932 dst, dstw, src1, src1w, src2, src2w);
1933 case SLJIT_ADDC:
1934 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1935 FAIL_IF(emit_restore_flags(compiler, 1));
1936 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1937 FAIL_IF(emit_save_flags(compiler));
1938 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1939 compiler->flags_saved = 0;
1940 return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15,
1941 dst, dstw, src1, src1w, src2, src2w);
1942 case SLJIT_SUB:
1943 if (!GET_FLAGS(op)) {
1944 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
1945 return compiler->error;
1946 }
1947 else
1948 compiler->flags_saved = 0;
1949 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1950 FAIL_IF(emit_save_flags(compiler));
1951 if (dst == SLJIT_UNUSED)
1952 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
1953 return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
1954 dst, dstw, src1, src1w, src2, src2w);
1955 case SLJIT_SUBC:
1956 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1957 FAIL_IF(emit_restore_flags(compiler, 1));
1958 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1959 FAIL_IF(emit_save_flags(compiler));
1960 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1961 compiler->flags_saved = 0;
1962 return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d,
1963 dst, dstw, src1, src1w, src2, src2w);
1964 case SLJIT_MUL:
1965 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
1966 case SLJIT_AND:
1967 if (dst == SLJIT_UNUSED)
1968 return emit_test_binary(compiler, src1, src1w, src2, src2w);
1969 return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25,
1970 dst, dstw, src1, src1w, src2, src2w);
1971 case SLJIT_OR:
1972 return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1973 dst, dstw, src1, src1w, src2, src2w);
1974 case SLJIT_XOR:
1975 return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35,
1976 dst, dstw, src1, src1w, src2, src2w);
1977 case SLJIT_SHL:
1978 return emit_shift_with_flags(compiler, 0x4 << 3, GET_FLAGS(op),
1979 dst, dstw, src1, src1w, src2, src2w);
1980 case SLJIT_LSHR:
1981 return emit_shift_with_flags(compiler, 0x5 << 3, GET_FLAGS(op),
1982 dst, dstw, src1, src1w, src2, src2w);
1983 case SLJIT_ASHR:
1984 return emit_shift_with_flags(compiler, 0x7 << 3, GET_FLAGS(op),
1985 dst, dstw, src1, src1w, src2, src2w);
1986 }
1987
1988 return SLJIT_SUCCESS;
1989 }
1990
1991 SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
1992 {
1993 check_sljit_get_register_index(reg);
1994 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1995 if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
1996 || reg == SLJIT_GENERAL_EREG1 || reg == SLJIT_GENERAL_EREG2)
1997 return -1;
1998 #endif
1999 return reg_map[reg];
2000 }
2001
2002 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
2003 void *instruction, int size)
2004 {
2005 sljit_ub *buf;
2006
2007 CHECK_ERROR();
2008 check_sljit_emit_op_custom(compiler, instruction, size);
2009 SLJIT_ASSERT(size > 0 && size < 16);
2010
2011 buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
2012 FAIL_IF(!buf);
2013 INC_SIZE(size);
2014 SLJIT_MEMMOVE(buf, instruction, size);
2015 return SLJIT_SUCCESS;
2016 }
2017
2018 /* --------------------------------------------------------------------- */
2019 /* Floating point operators */
2020 /* --------------------------------------------------------------------- */
2021
2022 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2023 static int sse2_available = 0;
2024 #endif
2025
2026 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2027
2028 /* Alignment + 2 * 16 bytes. */
2029 static sljit_i sse2_data[3 + 4 + 4];
2030 static sljit_i *sse2_buffer;
2031
2032 static void init_compiler()
2033 {
2034 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2035 int features = 0;
2036 #endif
2037
2038 sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
2039 sse2_buffer[0] = 0;
2040 sse2_buffer[1] = 0x80000000;
2041 sse2_buffer[4] = 0xffffffff;
2042 sse2_buffer[5] = 0x7fffffff;
2043
2044 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2045 #ifdef __GNUC__
2046 /* AT&T syntax. */
2047 asm (
2048 "pushl %%ebx\n"
2049 "movl $0x1, %%eax\n"
2050 "cpuid\n"
2051 "popl %%ebx\n"
2052 "movl %%edx, %0\n"
2053 : "=g" (features)
2054 :
2055 : "%eax", "%ecx", "%edx"
2056 );
2057 #elif defined(_MSC_VER) || defined(__BORLANDC__)
2058 /* Intel syntax. */
2059 __asm {
2060 mov eax, 1
2061 push ebx
2062 cpuid
2063 pop ebx
2064 mov features, edx
2065 }
2066 #else
2067 #error "SLJIT_SSE2_AUTO is not implemented for this C compiler"
2068 #endif
2069 sse2_available = (features >> 26) & 0x1;
2070 #endif
2071 }
2072
2073 #endif
2074
2075 SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
2076 {
2077 /* Always available. */
2078 return 1;
2079 }
2080
2081 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2082
2083 static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2084 int xmm1, int xmm2, sljit_w xmm2w)
2085 {
2086 sljit_ub *buf;
2087
2088 buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2089 FAIL_IF(!buf);
2090 *buf++ = 0x0f;
2091 *buf = opcode;
2092 return SLJIT_SUCCESS;
2093 }
2094
2095 static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2096 int xmm1, int xmm2, sljit_w xmm2w)
2097 {
2098 sljit_ub *buf;
2099
2100 buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2101 FAIL_IF(!buf);
2102 *buf++ = 0x0f;
2103 *buf = opcode;
2104 return SLJIT_SUCCESS;
2105 }
2106
2107 static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler,
2108 int dst, int src, sljit_w srcw)
2109 {
2110 return emit_sse2(compiler, 0x10, dst, src, srcw);
2111 }
2112
2113 static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler,
2114 int dst, sljit_w dstw, int src)
2115 {
2116 return emit_sse2(compiler, 0x11, src, dst, dstw);
2117 }
2118
2119 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2120 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2121 #else
2122 static int sljit_emit_sse2_fop1(struct sljit_compiler *compiler, int op,
2123 #endif
2124 int dst, sljit_w dstw,
2125 int src, sljit_w srcw)
2126 {
2127 int dst_r;
2128
2129 CHECK_ERROR();
2130 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2131
2132 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2133 compiler->mode32 = 1;
2134 #endif
2135
2136 if (GET_OPCODE(op) == SLJIT_FCMP) {
2137 compiler->flags_saved = 0;
2138 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2139 dst_r = dst;
2140 else {
2141 dst_r = TMP_FREG;
2142 FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw));
2143 }
2144 return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw);
2145 }
2146
2147 if (op == SLJIT_FMOV) {
2148 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2149 return emit_sse2_load(compiler, dst, src, srcw);
2150 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4)
2151 return emit_sse2_store(compiler, dst, dstw, src);
2152 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw));
2153 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2154 }
2155
2156 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2157 dst_r = dst;
2158 if (dst != src)
2159 FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2160 }
2161 else {
2162 dst_r = TMP_FREG;
2163 FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2164 }
2165
2166 switch (op) {
2167 case SLJIT_FNEG:
2168 FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer));
2169 break;
2170
2171 case SLJIT_FABS:
2172 FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4)));
2173 break;
2174 }
2175
2176 if (dst_r == TMP_FREG)
2177 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2178 return SLJIT_SUCCESS;
2179 }
2180
2181 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2182 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2183 #else
2184 static int sljit_emit_sse2_fop2(struct sljit_compiler *compiler, int op,
2185 #endif
2186 int dst, sljit_w dstw,
2187 int src1, sljit_w src1w,
2188 int src2, sljit_w src2w)
2189 {
2190 int dst_r;
2191
2192 CHECK_ERROR();
2193 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2194
2195 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2196 compiler->mode32 = 1;
2197 #endif
2198
2199 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2200 dst_r = dst;
2201 if (dst == src1)
2202 ; /* Do nothing here. */
2203 else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) {
2204 /* Swap arguments. */
2205 src2 = src1;
2206 src2w = src1w;
2207 }
2208 else if (dst != src2)
2209 FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w));
2210 else {
2211 dst_r = TMP_FREG;
2212 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2213 }
2214 }
2215 else {
2216 dst_r = TMP_FREG;
2217 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2218 }
2219
2220 switch (op) {
2221 case SLJIT_FADD:
2222 FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w));
2223 break;
2224
2225 case SLJIT_FSUB:
2226 FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w));
2227 break;
2228
2229 case SLJIT_FMUL:
2230 FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w));
2231 break;
2232
2233 case SLJIT_FDIV:
2234 FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w));
2235 break;
2236 }
2237
2238 if (dst_r == TMP_FREG)
2239 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2240 return SLJIT_SUCCESS;
2241 }
2242
2243 #endif
2244
2245 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) || !(defined SLJIT_SSE2 && SLJIT_SSE2)
2246
2247 static int emit_fld(struct sljit_compiler *compiler,
2248 int src, sljit_w srcw)
2249 {
2250 sljit_ub *buf;
2251
2252 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2253 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2254 FAIL_IF(!buf);
2255 INC_SIZE(2);
2256 *buf++ = 0xd9;
2257 *buf = 0xc0 + src - 1;
2258 return SLJIT_SUCCESS;
2259 }
2260
2261 buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2262 FAIL_IF(!buf);
2263 *buf = 0xdd;
2264 return SLJIT_SUCCESS;
2265 }
2266
2267 static int emit_fop(struct sljit_compiler *compiler,
2268 sljit_ub st_arg, sljit_ub st_arg2,
2269 sljit_ub m64fp_arg, sljit_ub m64fp_arg2,
2270 int src, sljit_w srcw)
2271 {
2272 sljit_ub *buf;
2273
2274 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2275 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2276 FAIL_IF(!buf);
2277 INC_SIZE(2);
2278 *buf++ = st_arg;
2279 *buf = st_arg2 + src;
2280 return SLJIT_SUCCESS;
2281 }
2282
2283 buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2284 FAIL_IF(!buf);
2285 *buf++ = m64fp_arg;
2286 *buf |= m64fp_arg2;
2287 return SLJIT_SUCCESS;
2288 }
2289
2290 static int emit_fop_regs(struct sljit_compiler *compiler,
2291 sljit_ub st_arg, sljit_ub st_arg2,
2292 int src)
2293 {
2294 sljit_ub *buf;
2295
2296 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2297 FAIL_IF(!buf);
2298 INC_SIZE(2);
2299 *buf++ = st_arg;
2300 *buf = st_arg2 + src;
2301 return SLJIT_SUCCESS;
2302 }
2303
2304 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2305 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2306 #else
2307 static int sljit_emit_fpu_fop1(struct sljit_compiler *compiler, int op,
2308 #endif
2309 int dst, sljit_w dstw,
2310 int src, sljit_w srcw)
2311 {
2312 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2313 sljit_ub *buf;
2314 #endif
2315
2316 CHECK_ERROR();
2317 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2318
2319 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2320 compiler->mode32 = 1;
2321 #endif
2322
2323 if (GET_OPCODE(op) == SLJIT_FCMP) {
2324 compiler->flags_saved = 0;
2325 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2326 FAIL_IF(emit_fld(compiler, dst, dstw));
2327 FAIL_IF(emit_fop(compiler, 0xd8, 0xd8, 0xdc, 0x3 << 3, src, srcw));
2328
2329 /* Copy flags. */
2330 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2331 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2332 FAIL_IF(!buf);
2333 INC_SIZE(3);
2334 *buf++ = 0xdf;
2335 *buf++ = 0xe0;
2336 /* Note: lahf is not supported on all x86-64 architectures. */
2337 *buf++ = 0x9e;
2338 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2339 #else
2340 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2341 FAIL_IF(emit_fld(compiler, dst, dstw));
2342 FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2343 } else {
2344 FAIL_IF(emit_fld(compiler, src, srcw));
2345 FAIL_IF(emit_fld(compiler, dst + ((dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? 1 : 0), dstw));
2346 FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2347 FAIL_IF(emit_fop_regs(compiler, 0xdd, 0xd8, 0));
2348 }
2349 #endif
2350 return SLJIT_SUCCESS;
2351 }
2352
2353 FAIL_IF(emit_fld(compiler, src, srcw));
2354
2355 switch (op) {
2356 case SLJIT_FNEG:
2357 FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe0, 0));
2358 break;
2359 case SLJIT_FABS:
2360 FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe1, 0));
2361 break;
2362 }
2363
2364 FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2365
2366 return SLJIT_SUCCESS;
2367 }
2368
2369 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2370 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2371 #else
2372 static int sljit_emit_fpu_fop2(struct sljit_compiler *compiler, int op,
2373 #endif
2374 int dst, sljit_w dstw,
2375 int src1, sljit_w src1w,
2376 int src2, sljit_w src2w)
2377 {
2378 CHECK_ERROR();
2379 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2380
2381 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2382 compiler->mode32 = 1;
2383 #endif
2384
2385 if (src1 >= SLJIT_FLOAT_REG1 && src1 <= SLJIT_FLOAT_REG4 && dst == src1) {
2386 FAIL_IF(emit_fld(compiler, src2, src2w));
2387
2388 switch (op) {
2389 case SLJIT_FADD:
2390 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src1));
2391 break;
2392 case SLJIT_FSUB:
2393 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe8, src1));
2394 break;
2395 case SLJIT_FMUL:
2396 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src1));
2397 break;
2398 case SLJIT_FDIV:
2399 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf8, src1));
2400 break;
2401 }
2402 return SLJIT_SUCCESS;
2403 }
2404
2405 FAIL_IF(emit_fld(compiler, src1, src1w));
2406
2407 if (src2 >= SLJIT_FLOAT_REG1 && src2 <= SLJIT_FLOAT_REG4 && dst == src2) {
2408 switch (op) {
2409 case SLJIT_FADD:
2410 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src2));
2411 break;
2412 case SLJIT_FSUB:
2413 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe0, src2));
2414 break;
2415 case SLJIT_FMUL:
2416 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src2));
2417 break;
2418 case SLJIT_FDIV:
2419 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf0, src2));
2420 break;
2421 }
2422 return SLJIT_SUCCESS;
2423 }
2424
2425 switch (op) {
2426 case SLJIT_FADD:
2427 FAIL_IF(emit_fop(compiler, 0xd8, 0xc0, 0xdc, 0x0 << 3, src2, src2w));
2428 break;
2429 case SLJIT_FSUB:
2430 FAIL_IF(emit_fop(compiler, 0xd8, 0xe0, 0xdc, 0x4 << 3, src2, src2w));
2431 break;
2432 case SLJIT_FMUL:
2433 FAIL_IF(emit_fop(compiler, 0xd8, 0xc8, 0xdc, 0x1 << 3, src2, src2w));
2434 break;
2435 case SLJIT_FDIV:
2436 FAIL_IF(emit_fop(compiler, 0xd8, 0xf0, 0xdc, 0x6 << 3, src2, src2w));
2437 break;
2438 }
2439
2440 FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2441
2442 return SLJIT_SUCCESS;
2443 }
2444 #endif
2445
2446 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2447
2448 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2449 int dst, sljit_w dstw,
2450 int src, sljit_w srcw)
2451 {
2452 if (sse2_available)
2453 return sljit_emit_sse2_fop1(compiler, op, dst, dstw, src, srcw);
2454 else
2455 return sljit_emit_fpu_fop1(compiler, op, dst, dstw, src, srcw);
2456 }
2457
2458 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2459 int dst, sljit_w dstw,
2460 int src1, sljit_w src1w,
2461 int src2, sljit_w src2w)
2462 {
2463 if (sse2_available)
2464 return sljit_emit_sse2_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2465 else
2466 return sljit_emit_fpu_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2467 }
2468
2469 #endif
2470
2471 /* --------------------------------------------------------------------- */
2472 /* Conditional instructions */
2473 /* --------------------------------------------------------------------- */
2474
2475 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2476 {
2477 sljit_ub *buf;
2478 struct sljit_label *label;
2479
2480 CHECK_ERROR_PTR();
2481 check_sljit_emit_label(compiler);
2482
2483 /* We should restore the flags before the label,
2484 since other taken jumps has their own flags as well. */
2485 if (SLJIT_UNLIKELY(compiler->flags_saved))
2486 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2487
2488 if (compiler->last_label && compiler->last_label->size == compiler->size)
2489 return compiler->last_label;
2490
2491 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2492 PTR_FAIL_IF(!label);
2493 set_label(label, compiler);
2494
2495 buf = (sljit_ub*)ensure_buf(compiler, 2);
2496 PTR_FAIL_IF(!buf);
2497
2498 *buf++ = 0;
2499 *buf++ = 0;
2500
2501 return label;
2502 }
2503
2504 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
2505 {
2506 sljit_ub *buf;
2507 struct sljit_jump *jump;
2508
2509 CHECK_ERROR_PTR();
2510 check_sljit_emit_jump(compiler, type);
2511
2512 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2513 if ((type & 0xff) <= SLJIT_JUMP)
2514 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2515 compiler->flags_saved = 0;
2516 }
2517
2518 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2519 PTR_FAIL_IF_NULL(jump);
2520 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2521 type &= 0xff;
2522
2523 if (type >= SLJIT_CALL1)
2524 PTR_FAIL_IF(call_with_args(compiler, type));
2525
2526 /* Worst case size. */
2527 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2528 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2529 #else
2530 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2531 #endif
2532
2533 buf = (sljit_ub*)ensure_buf(compiler, 2);
2534 PTR_FAIL_IF_NULL(buf);
2535
2536 *buf++ = 0;
2537 *buf++ = type + 4;
2538 return jump;
2539 }
2540
2541 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
2542 {
2543 sljit_ub *code;
2544 struct sljit_jump *jump;
2545
2546 CHECK_ERROR();
2547 check_sljit_emit_ijump(compiler, type, src, srcw);
2548
2549 CHECK_EXTRA_REGS(src, srcw, (void)0);
2550 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2551 if (type <= SLJIT_JUMP)
2552 FAIL_IF(emit_restore_flags(compiler, 0));
2553 compiler->flags_saved = 0;
2554 }
2555
2556 if (type >= SLJIT_CALL1) {
2557 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2558 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2559 if (src == SLJIT_TEMPORARY_REG3) {
2560 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2561 src = TMP_REGISTER;
2562 }
2563 if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG && type >= SLJIT_CALL3) {
2564 if (src & 0xf0) {
2565 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2566 src = TMP_REGISTER;
2567 }
2568 else
2569 srcw += sizeof(sljit_w);
2570 }
2571 #else
2572 if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG) {
2573 if (src & 0xf0) {
2574 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2575 src = TMP_REGISTER;
2576 }
2577 else
2578 srcw += sizeof(sljit_w) * (type - SLJIT_CALL0);
2579 }
2580 #endif
2581 #endif
2582 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2583 if (src == SLJIT_TEMPORARY_REG3) {
2584 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2585 src = TMP_REGISTER;
2586 }
2587 #endif
2588 FAIL_IF(call_with_args(compiler, type));
2589 }
2590
2591 if (src == SLJIT_IMM) {
2592 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2593 FAIL_IF_NULL(jump);
2594 set_jump(jump, compiler, JUMP_ADDR);
2595 jump->u.target = srcw;
2596
2597 /* Worst case size. */
2598 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2599 compiler->size += 5;
2600 #else
2601 compiler->size += 10 + 3;
2602 #endif
2603
2604 code = (sljit_ub*)ensure_buf(compiler, 2);
2605 FAIL_IF_NULL(code);
2606
2607 *code++ = 0;
2608 *code++ = type + 4;
2609 }
2610 else {
2611 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2612 /* REX_W is not necessary (src is not immediate). */
2613 compiler->mode32 = 1;
2614 #endif
2615 code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2616 FAIL_IF(!code);
2617 *code++ = 0xff;
2618 *code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3);
2619 }
2620 return SLJIT_SUCCESS;
2621 }
2622
2623 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
2624 {
2625 sljit_ub *buf;
2626 sljit_ub cond_set = 0;
2627 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2628 int reg;
2629 #endif
2630
2631 CHECK_ERROR();
2632 check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
2633
2634 if (dst == SLJIT_UNUSED)
2635 return SLJIT_SUCCESS;
2636
2637 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2638 if (SLJIT_UNLIKELY(compiler->flags_saved))
2639 FAIL_IF(emit_restore_flags(compiler, 0));
2640
2641 switch (type) {
2642 case SLJIT_C_EQUAL:
2643 case SLJIT_C_FLOAT_EQUAL:
2644 cond_set = 0x94;
2645 break;
2646
2647 case SLJIT_C_NOT_EQUAL:
2648 case SLJIT_C_FLOAT_NOT_EQUAL:
2649 cond_set = 0x95;
2650 break;
2651
2652 case SLJIT_C_LESS:
2653 case SLJIT_C_FLOAT_LESS:
2654 cond_set = 0x92;
2655 break;
2656
2657 case SLJIT_C_GREATER_EQUAL:
2658 case SLJIT_C_FLOAT_GREATER_EQUAL:
2659 cond_set = 0x93;
2660 break;
2661
2662 case SLJIT_C_GREATER:
2663 case SLJIT_C_FLOAT_GREATER:
2664 cond_set = 0x97;
2665 break;
2666
2667 case SLJIT_C_LESS_EQUAL:
2668 case SLJIT_C_FLOAT_LESS_EQUAL:
2669 cond_set = 0x96;
2670 break;
2671
2672 case SLJIT_C_SIG_LESS:
2673 cond_set = 0x9c;
2674 break;
2675
2676 case SLJIT_C_SIG_GREATER_EQUAL:
2677 cond_set = 0x9d;
2678 break;
2679
2680 case SLJIT_C_SIG_GREATER:
2681 cond_set = 0x9f;
2682 break;
2683
2684 case SLJIT_C_SIG_LESS_EQUAL:
2685 cond_set = 0x9e;
2686 break;
2687
2688 case SLJIT_C_OVERFLOW:
2689 case SLJIT_C_MUL_OVERFLOW:
2690 cond_set = 0x90;
2691 break;
2692
2693 case SLJIT_C_NOT_OVERFLOW:
2694 case SLJIT_C_MUL_NOT_OVERFLOW:
2695 cond_set = 0x91;
2696 break;
2697
2698 case SLJIT_C_FLOAT_NAN:
2699 cond_set = 0x9a;
2700 break;
2701
2702 case SLJIT_C_FLOAT_NOT_NAN:
2703 cond_set = 0x9b;
2704 break;
2705 }
2706
2707 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2708 reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2709
2710 buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2711 FAIL_IF(!buf);
2712 INC_SIZE(4 + 4);
2713 /* Set low register to conditional flag. */
2714 *buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B;
2715 *buf++ = 0x0f;
2716 *buf++ = cond_set;
2717 *buf++ = 0xC0 | reg_lmap[reg];
2718 *buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2719 *buf++ = 0x0f;
2720 *buf++ = 0xb6;
2721 *buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg];
2722
2723 if (reg == TMP_REGISTER) {
2724 if (op == SLJIT_MOV) {
2725 compiler->mode32 = 0;
2726 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
2727 }
2728 else {
2729 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2730 compiler->skip_checks = 1;
2731 #endif
2732 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2733 }
2734 }
2735 #else
2736 if (op == SLJIT_MOV) {
2737 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2738 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2739 FAIL_IF(!buf);
2740 INC_SIZE(3 + 3);
2741 /* Set low byte to conditional flag. */
2742 *buf++ = 0x0f;
2743 *buf++ = cond_set;
2744 *buf++ = 0xC0 | reg_map[dst];
2745
2746 *buf++ = 0x0f;
2747 *buf++ = 0xb6;
2748 *buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst];
2749 }
2750 else {
2751 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2752
2753 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2754 FAIL_IF(!buf);
2755 INC_SIZE(3 + 3);
2756 /* Set al to conditional flag. */
2757 *buf++ = 0x0f;
2758 *buf++ = cond_set;
2759 *buf++ = 0xC0;
2760
2761 *buf++ = 0x0f;
2762 *buf++ = 0xb6;
2763 if (dst >= SLJIT_GENERAL_REG1 && dst <= SLJIT_NO_REGISTERS)
2764 *buf = 0xC0 | (reg_map[dst] << 3);
2765 else {
2766 *buf = 0xC0;
2767 EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
2768 }
2769
2770 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2771 }
2772 }
2773 else {
2774 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2775 EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
2776 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2777 FAIL_IF(!buf);
2778 INC_SIZE(3);
2779
2780 *buf++ = 0x0f;
2781 *buf++ = cond_set;
2782 *buf++ = 0xC0 | reg_map[dst];
2783 }
2784 else {
2785 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2786
2787 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
2788 FAIL_IF(!buf);
2789 INC_SIZE(3 + 3 + 1);
2790 /* Set al to conditional flag. */
2791 *buf++ = 0x0f;
2792 *buf++ = cond_set;
2793 *buf++ = 0xC0;
2794
2795 *buf++ = 0x0f;
2796 *buf++ = 0xb6;
2797 *buf++ = 0xC0;
2798
2799 *buf++ = 0x90 + reg_map[TMP_REGISTER];
2800 }
2801 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2802 compiler->skip_checks = 1;
2803 #endif
2804 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2805 }
2806 #endif
2807
2808 return SLJIT_SUCCESS;
2809 }
2810
2811 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
2812 {
2813 sljit_ub *buf;
2814 struct sljit_const *const_;
2815 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2816 int reg;
2817 #endif
2818
2819 CHECK_ERROR_PTR();
2820 check_sljit_emit_const(compiler, dst, dstw, init_value);
2821
2822 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2823
2824 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2825 PTR_FAIL_IF(!const_);
2826 set_const(const_, compiler);
2827
2828 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2829 compiler->mode32 = 0;
2830 reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2831
2832 if (emit_load_imm64(compiler, reg, init_value))
2833 return NULL;
2834 #else
2835 if (dst == SLJIT_UNUSED)
2836 dst = TMP_REGISTER;
2837
2838 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2839 return NULL;
2840 #endif
2841
2842 buf = (sljit_ub*)ensure_buf(compiler, 2);
2843 PTR_FAIL_IF(!buf);
2844
2845 *buf++ = 0;
2846 *buf++ = 1;
2847
2848 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2849 if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
2850 if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
2851 return NULL;
2852 #endif
2853
2854 return const_;
2855 }
2856
2857 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2858 {
2859 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2860 *(sljit_w*)addr = new_addr - (addr + 4);
2861 #else
2862 *(sljit_uw*)addr = new_addr;
2863 #endif
2864 }
2865
2866 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
2867 {
2868 *(sljit_w*)addr = new_constant;
2869 }

  ViewVC Help
Powered by ViewVC 1.1.5