/[pcre]/code/trunk/sljit/sljitNativeX86_common.c
ViewVC logotype

Contents of /code/trunk/sljit/sljitNativeX86_common.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 847 - (show annotations)
Tue Jan 3 17:49:03 2012 UTC (3 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 78831 byte(s)
Error occurred while calculating annotation data.
fix signed/unsigned half load mismatches and JIT compiler update
1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name()
28 {
29 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
30 return "x86-32";
31 #else
32 return "x86-64";
33 #endif
34 }
35
36 /*
37 32b register indexes:
38 0 - EAX
39 1 - ECX
40 2 - EDX
41 3 - EBX
42 4 - none
43 5 - EBP
44 6 - ESI
45 7 - EDI
46 */
47
48 /*
49 64b register indexes:
50 0 - RAX
51 1 - RCX
52 2 - RDX
53 3 - RBX
54 4 - none
55 5 - RBP
56 6 - RSI
57 7 - RDI
58 8 - R8 - From now on REX prefix is required
59 9 - R9
60 10 - R10
61 11 - R11
62 12 - R12
63 13 - R13
64 14 - R14
65 15 - R15
66 */
67
68 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
69
70 /* Last register + 1. */
71 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
72
73 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 2] = {
74 0, 0, 2, 1, 0, 0, 3, 6, 7, 0, 0, 4, 5
75 };
76
77 #define CHECK_EXTRA_REGS(p, w, do) \
78 if (p >= SLJIT_TEMPORARY_EREG1 && p <= SLJIT_TEMPORARY_EREG2) { \
79 w = compiler->temporaries_start + (p - SLJIT_TEMPORARY_EREG1) * sizeof(sljit_w); \
80 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
81 do; \
82 } \
83 else if (p >= SLJIT_GENERAL_EREG1 && p <= SLJIT_GENERAL_EREG2) { \
84 w = compiler->generals_start + (p - SLJIT_GENERAL_EREG1) * sizeof(sljit_w); \
85 p = SLJIT_MEM1(SLJIT_LOCALS_REG); \
86 do; \
87 }
88
89 #else /* SLJIT_CONFIG_X86_32 */
90
91 /* Last register + 1. */
92 #define TMP_REGISTER (SLJIT_NO_REGISTERS + 1)
93 #define TMP_REG2 (SLJIT_NO_REGISTERS + 2)
94 #define TMP_REG3 (SLJIT_NO_REGISTERS + 3)
95
96 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
97 Note: avoid to use r12 and r13 for memory addessing
98 therefore r12 is better for GENERAL_EREG than GENERAL_REG. */
99 #ifndef _WIN64
100 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
101 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
102 0, 0, 6, 1, 8, 11, 3, 15, 14, 13, 12, 4, 2, 7, 9
103 };
104 /* low-map. reg_map & 0x7. */
105 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
106 0, 0, 6, 1, 0, 3, 3, 7, 6, 5, 4, 4, 2, 7, 1
107 };
108 #else
109 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
110 static SLJIT_CONST sljit_ub reg_map[SLJIT_NO_REGISTERS + 4] = {
111 0, 0, 2, 1, 11, 13, 3, 6, 7, 14, 12, 15, 10, 8, 9
112 };
113 /* low-map. reg_map & 0x7. */
114 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NO_REGISTERS + 4] = {
115 0, 0, 2, 1, 3, 5, 3, 6, 7, 6, 4, 7, 2, 0, 1
116 };
117 #endif
118
119 #define REX_W 0x48
120 #define REX_R 0x44
121 #define REX_X 0x42
122 #define REX_B 0x41
123 #define REX 0x40
124
125 typedef unsigned int sljit_uhw;
126 typedef int sljit_hw;
127
128 #define IS_HALFWORD(x) ((x) <= 0x7fffffffll && (x) >= -0x80000000ll)
129 #define NOT_HALFWORD(x) ((x) > 0x7fffffffll || (x) < -0x80000000ll)
130
131 #define CHECK_EXTRA_REGS(p, w, do)
132
133 #endif /* SLJIT_CONFIG_X86_32 */
134
135 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
136 #define TMP_FREG (SLJIT_FLOAT_REG4 + 1)
137 #endif
138
139 /* Size flags for emit_x86_instruction: */
140 #define EX86_BIN_INS 0x0010
141 #define EX86_SHIFT_INS 0x0020
142 #define EX86_REX 0x0040
143 #define EX86_NO_REXW 0x0080
144 #define EX86_BYTE_ARG 0x0100
145 #define EX86_HALF_ARG 0x0200
146 #define EX86_PREF_66 0x0400
147
148 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
149 #define EX86_PREF_F2 0x0800
150 #define EX86_SSE2 0x1000
151 #endif
152
153 #define INC_SIZE(s) (*buf++ = (s), compiler->size += (s))
154 #define INC_CSIZE(s) (*code++ = (s), compiler->size += (s))
155
156 #define PUSH_REG(r) (*buf++ = (0x50 + (r)))
157 #define POP_REG(r) (*buf++ = (0x58 + (r)))
158 #define RET() (*buf++ = (0xc3))
159 #define RETN(n) (*buf++ = (0xc2), *buf++ = n, *buf++ = 0)
160 /* r32, r/m32 */
161 #define MOV_RM(mod, reg, rm) (*buf++ = (0x8b), *buf++ = (mod) << 6 | (reg) << 3 | (rm))
162
163 static sljit_ub get_jump_code(int type)
164 {
165 switch (type) {
166 case SLJIT_C_EQUAL:
167 case SLJIT_C_FLOAT_EQUAL:
168 return 0x84;
169
170 case SLJIT_C_NOT_EQUAL:
171 case SLJIT_C_FLOAT_NOT_EQUAL:
172 return 0x85;
173
174 case SLJIT_C_LESS:
175 case SLJIT_C_FLOAT_LESS:
176 return 0x82;
177
178 case SLJIT_C_GREATER_EQUAL:
179 case SLJIT_C_FLOAT_GREATER_EQUAL:
180 return 0x83;
181
182 case SLJIT_C_GREATER:
183 case SLJIT_C_FLOAT_GREATER:
184 return 0x87;
185
186 case SLJIT_C_LESS_EQUAL:
187 case SLJIT_C_FLOAT_LESS_EQUAL:
188 return 0x86;
189
190 case SLJIT_C_SIG_LESS:
191 return 0x8c;
192
193 case SLJIT_C_SIG_GREATER_EQUAL:
194 return 0x8d;
195
196 case SLJIT_C_SIG_GREATER:
197 return 0x8f;
198
199 case SLJIT_C_SIG_LESS_EQUAL:
200 return 0x8e;
201
202 case SLJIT_C_OVERFLOW:
203 case SLJIT_C_MUL_OVERFLOW:
204 return 0x80;
205
206 case SLJIT_C_NOT_OVERFLOW:
207 case SLJIT_C_MUL_NOT_OVERFLOW:
208 return 0x81;
209
210 case SLJIT_C_FLOAT_NAN:
211 return 0x8a;
212
213 case SLJIT_C_FLOAT_NOT_NAN:
214 return 0x8b;
215 }
216 return 0;
217 }
218
219 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, int type);
220
221 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
222 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_w addr, int type);
223 #endif
224
225 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, int type)
226 {
227 int short_jump;
228 sljit_uw label_addr;
229
230 if (jump->flags & JUMP_LABEL)
231 label_addr = (sljit_uw)(code + jump->u.label->size);
232 else
233 label_addr = jump->u.target;
234 short_jump = (sljit_w)(label_addr - (jump->addr + 2)) >= -128 && (sljit_w)(label_addr - (jump->addr + 2)) <= 127;
235
236 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
237 if ((sljit_w)(label_addr - (jump->addr + 1)) > 0x7fffffffll || (sljit_w)(label_addr - (jump->addr + 1)) < -0x80000000ll)
238 return generate_far_jump_code(jump, code_ptr, type);
239 #endif
240
241 if (type == SLJIT_JUMP) {
242 if (short_jump)
243 *code_ptr++ = 0xeb;
244 else
245 *code_ptr++ = 0xe9;
246 jump->addr++;
247 }
248 else if (type >= SLJIT_FAST_CALL) {
249 short_jump = 0;
250 *code_ptr++ = 0xe8;
251 jump->addr++;
252 }
253 else if (short_jump) {
254 *code_ptr++ = get_jump_code(type) - 0x10;
255 jump->addr++;
256 }
257 else {
258 *code_ptr++ = 0x0f;
259 *code_ptr++ = get_jump_code(type);
260 jump->addr += 2;
261 }
262
263 if (short_jump) {
264 jump->flags |= PATCH_MB;
265 code_ptr += sizeof(sljit_b);
266 } else {
267 jump->flags |= PATCH_MW;
268 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
269 code_ptr += sizeof(sljit_w);
270 #else
271 code_ptr += sizeof(sljit_hw);
272 #endif
273 }
274
275 return code_ptr;
276 }
277
278 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
279 {
280 struct sljit_memory_fragment *buf;
281 sljit_ub *code;
282 sljit_ub *code_ptr;
283 sljit_ub *buf_ptr;
284 sljit_ub *buf_end;
285 sljit_ub len;
286
287 struct sljit_label *label;
288 struct sljit_jump *jump;
289 struct sljit_const *const_;
290
291 CHECK_ERROR_PTR();
292 check_sljit_generate_code(compiler);
293 reverse_buf(compiler);
294
295 /* Second code generation pass. */
296 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
297 PTR_FAIL_WITH_EXEC_IF(code);
298 buf = compiler->buf;
299
300 code_ptr = code;
301 label = compiler->labels;
302 jump = compiler->jumps;
303 const_ = compiler->consts;
304 do {
305 buf_ptr = buf->memory;
306 buf_end = buf_ptr + buf->used_size;
307 do {
308 len = *buf_ptr++;
309 if (len > 0) {
310 /* The code is already generated. */
311 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
312 code_ptr += len;
313 buf_ptr += len;
314 }
315 else {
316 if (*buf_ptr >= 4) {
317 jump->addr = (sljit_uw)code_ptr;
318 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
319 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
320 else
321 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
322 jump = jump->next;
323 }
324 else if (*buf_ptr == 0) {
325 label->addr = (sljit_uw)code_ptr;
326 label->size = code_ptr - code;
327 label = label->next;
328 }
329 else if (*buf_ptr == 1) {
330 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_w);
331 const_ = const_->next;
332 }
333 else {
334 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
335 *code_ptr++ = (*buf_ptr == 2) ? 0xe8 /* call */ : 0xe9 /* jmp */;
336 buf_ptr++;
337 *(sljit_w*)code_ptr = *(sljit_w*)buf_ptr - ((sljit_w)code_ptr + sizeof(sljit_w));
338 code_ptr += sizeof(sljit_w);
339 buf_ptr += sizeof(sljit_w) - 1;
340 #else
341 code_ptr = generate_fixed_jump(code_ptr, *(sljit_w*)(buf_ptr + 1), *buf_ptr);
342 buf_ptr += sizeof(sljit_w);
343 #endif
344 }
345 buf_ptr++;
346 }
347 } while (buf_ptr < buf_end);
348 SLJIT_ASSERT(buf_ptr == buf_end);
349 buf = buf->next;
350 } while (buf);
351
352 SLJIT_ASSERT(!label);
353 SLJIT_ASSERT(!jump);
354 SLJIT_ASSERT(!const_);
355
356 jump = compiler->jumps;
357 while (jump) {
358 if (jump->flags & PATCH_MB) {
359 SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) >= -128 && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_b))) <= 127);
360 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_b)));
361 } else if (jump->flags & PATCH_MW) {
362 if (jump->flags & JUMP_LABEL) {
363 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
364 *(sljit_w*)jump->addr = (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_w)));
365 #else
366 SLJIT_ASSERT((sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
367 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.label->addr - (jump->addr + sizeof(sljit_hw)));
368 #endif
369 }
370 else {
371 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
372 *(sljit_w*)jump->addr = (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_w)));
373 #else
374 SLJIT_ASSERT((sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) >= -0x80000000ll && (sljit_w)(jump->u.target - (jump->addr + sizeof(sljit_hw))) <= 0x7fffffffll);
375 *(sljit_hw*)jump->addr = (sljit_hw)(jump->u.target - (jump->addr + sizeof(sljit_hw)));
376 #endif
377 }
378 }
379 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
380 else if (jump->flags & PATCH_MD)
381 *(sljit_w*)jump->addr = jump->u.label->addr;
382 #endif
383
384 jump = jump->next;
385 }
386
387 /* Maybe we waste some space because of short jumps. */
388 SLJIT_ASSERT(code_ptr <= code + compiler->size);
389 compiler->error = SLJIT_ERR_COMPILED;
390 compiler->executable_size = compiler->size;
391 return (void*)code;
392 }
393
394 /* --------------------------------------------------------------------- */
395 /* Operators */
396 /* --------------------------------------------------------------------- */
397
398 static int emit_cum_binary(struct sljit_compiler *compiler,
399 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
400 int dst, sljit_w dstw,
401 int src1, sljit_w src1w,
402 int src2, sljit_w src2w);
403
404 static int emit_non_cum_binary(struct sljit_compiler *compiler,
405 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
406 int dst, sljit_w dstw,
407 int src1, sljit_w src1w,
408 int src2, sljit_w src2w);
409
410 static int emit_mov(struct sljit_compiler *compiler,
411 int dst, sljit_w dstw,
412 int src, sljit_w srcw);
413
414 static SLJIT_INLINE int emit_save_flags(struct sljit_compiler *compiler)
415 {
416 sljit_ub *buf;
417
418 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
419 buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
420 FAIL_IF(!buf);
421 INC_SIZE(5);
422 *buf++ = 0x9c; /* pushfd */
423 #else
424 buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
425 FAIL_IF(!buf);
426 INC_SIZE(6);
427 *buf++ = 0x9c; /* pushfq */
428 *buf++ = 0x48;
429 #endif
430 *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp + sizeof(sljit_w)] */
431 *buf++ = 0x64;
432 *buf++ = 0x24;
433 *buf++ = sizeof(sljit_w);
434 compiler->flags_saved = 1;
435 return SLJIT_SUCCESS;
436 }
437
438 static SLJIT_INLINE int emit_restore_flags(struct sljit_compiler *compiler, int keep_flags)
439 {
440 sljit_ub *buf;
441
442 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
443 buf = (sljit_ub*)ensure_buf(compiler, 1 + 5);
444 FAIL_IF(!buf);
445 INC_SIZE(5);
446 #else
447 buf = (sljit_ub*)ensure_buf(compiler, 1 + 6);
448 FAIL_IF(!buf);
449 INC_SIZE(6);
450 *buf++ = 0x48;
451 #endif
452 *buf++ = 0x8d; /* lea esp/rsp, [esp/rsp - sizeof(sljit_w)] */
453 *buf++ = 0x64;
454 *buf++ = 0x24;
455 *buf++ = (sljit_ub)-(int)sizeof(sljit_w);
456 *buf++ = 0x9d; /* popfd / popfq */
457 compiler->flags_saved = keep_flags;
458 return SLJIT_SUCCESS;
459 }
460
461 #ifdef _WIN32
462 #include <malloc.h>
463
464 static void SLJIT_CALL sljit_touch_stack(sljit_w local_size)
465 {
466 /* Workaround for calling _chkstk. */
467 alloca(local_size);
468 }
469 #endif
470
471 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
472 #include "sljitNativeX86_32.c"
473 #else
474 #include "sljitNativeX86_64.c"
475 #endif
476
477 static int emit_mov(struct sljit_compiler *compiler,
478 int dst, sljit_w dstw,
479 int src, sljit_w srcw)
480 {
481 sljit_ub* code;
482
483 if (dst == SLJIT_UNUSED) {
484 /* No destination, doesn't need to setup flags. */
485 if (src & SLJIT_MEM) {
486 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
487 FAIL_IF(!code);
488 *code = 0x8b;
489 }
490 return SLJIT_SUCCESS;
491 }
492 if (src >= SLJIT_TEMPORARY_REG1 && src <= TMP_REGISTER) {
493 code = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
494 FAIL_IF(!code);
495 *code = 0x89;
496 return SLJIT_SUCCESS;
497 }
498 if (src & SLJIT_IMM) {
499 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
500 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
501 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
502 #else
503 if (!compiler->mode32) {
504 if (NOT_HALFWORD(srcw))
505 return emit_load_imm64(compiler, dst, srcw);
506 }
507 else
508 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, 0xb8 + reg_lmap[dst], srcw);
509 #endif
510 }
511 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
512 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
513 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
514 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
515 FAIL_IF(!code);
516 *code = 0x89;
517 return SLJIT_SUCCESS;
518 }
519 #endif
520 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
521 FAIL_IF(!code);
522 *code = 0xc7;
523 return SLJIT_SUCCESS;
524 }
525 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
526 code = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
527 FAIL_IF(!code);
528 *code = 0x8b;
529 return SLJIT_SUCCESS;
530 }
531
532 /* Memory to memory move. Requires two instruction. */
533 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src, srcw);
534 FAIL_IF(!code);
535 *code = 0x8b;
536 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
537 FAIL_IF(!code);
538 *code = 0x89;
539 return SLJIT_SUCCESS;
540 }
541
542 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
543 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
544
545 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op0(struct sljit_compiler *compiler, int op)
546 {
547 sljit_ub *buf;
548 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
549 int size;
550 #endif
551
552 CHECK_ERROR();
553 check_sljit_emit_op0(compiler, op);
554
555 switch (GET_OPCODE(op)) {
556 case SLJIT_BREAKPOINT:
557 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
558 FAIL_IF(!buf);
559 INC_SIZE(1);
560 *buf = 0xcc;
561 break;
562 case SLJIT_NOP:
563 buf = (sljit_ub*)ensure_buf(compiler, 1 + 1);
564 FAIL_IF(!buf);
565 INC_SIZE(1);
566 *buf = 0x90;
567 break;
568 case SLJIT_UMUL:
569 case SLJIT_SMUL:
570 case SLJIT_UDIV:
571 case SLJIT_SDIV:
572 compiler->flags_saved = 0;
573 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
574 #ifdef _WIN64
575 SLJIT_COMPILE_ASSERT(
576 reg_map[SLJIT_TEMPORARY_REG1] == 0
577 && reg_map[SLJIT_TEMPORARY_REG2] == 2
578 && reg_map[TMP_REGISTER] > 7,
579 invalid_register_assignment_for_div_mul);
580 #else
581 SLJIT_COMPILE_ASSERT(
582 reg_map[SLJIT_TEMPORARY_REG1] == 0
583 && reg_map[SLJIT_TEMPORARY_REG2] < 7
584 && reg_map[TMP_REGISTER] == 2,
585 invalid_register_assignment_for_div_mul);
586 #endif
587 compiler->mode32 = op & SLJIT_INT_OP;
588 #endif
589
590 op = GET_OPCODE(op);
591 if (op == SLJIT_UDIV) {
592 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
593 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
594 buf = emit_x86_instruction(compiler, 1, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0);
595 #else
596 buf = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
597 #endif
598 FAIL_IF(!buf);
599 *buf = 0x33;
600 }
601
602 if (op == SLJIT_SDIV) {
603 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
604 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG2, 0);
605 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG1, 0);
606 #else
607 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
608 #endif
609
610 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
611 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
612 FAIL_IF(!buf);
613 INC_SIZE(3);
614 *buf++ = 0xc1;
615 *buf++ = 0xfa;
616 *buf = 0x1f;
617 #else
618 if (compiler->mode32) {
619 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
620 FAIL_IF(!buf);
621 INC_SIZE(3);
622 *buf++ = 0xc1;
623 *buf++ = 0xfa;
624 *buf = 0x1f;
625 } else {
626 buf = (sljit_ub*)ensure_buf(compiler, 1 + 4);
627 FAIL_IF(!buf);
628 INC_SIZE(4);
629 *buf++ = REX_W;
630 *buf++ = 0xc1;
631 *buf++ = 0xfa;
632 *buf = 0x3f;
633 }
634 #endif
635 }
636
637 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
638 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
639 FAIL_IF(!buf);
640 INC_SIZE(2);
641 *buf++ = 0xf7;
642 *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_map[TMP_REGISTER] : reg_map[SLJIT_TEMPORARY_REG2]);
643 #else
644 #ifdef _WIN64
645 size = (!compiler->mode32 || op >= SLJIT_UDIV) ? 3 : 2;
646 #else
647 size = (!compiler->mode32) ? 3 : 2;
648 #endif
649 buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
650 FAIL_IF(!buf);
651 INC_SIZE(size);
652 #ifdef _WIN64
653 if (!compiler->mode32)
654 *buf++ = REX_W | ((op >= SLJIT_UDIV) ? REX_B : 0);
655 else if (op >= SLJIT_UDIV)
656 *buf++ = REX_B;
657 *buf++ = 0xf7;
658 *buf = 0xc0 | ((op >= SLJIT_UDIV) ? reg_lmap[TMP_REGISTER] : reg_lmap[SLJIT_TEMPORARY_REG2]);
659 #else
660 if (!compiler->mode32)
661 *buf++ = REX_W;
662 *buf++ = 0xf7;
663 *buf = 0xc0 | reg_map[SLJIT_TEMPORARY_REG2];
664 #endif
665 #endif
666 switch (op) {
667 case SLJIT_UMUL:
668 *buf |= 4 << 3;
669 break;
670 case SLJIT_SMUL:
671 *buf |= 5 << 3;
672 break;
673 case SLJIT_UDIV:
674 *buf |= 6 << 3;
675 break;
676 case SLJIT_SDIV:
677 *buf |= 7 << 3;
678 break;
679 }
680 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
681 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG2, 0, TMP_REGISTER, 0);
682 #endif
683 break;
684 }
685
686 return SLJIT_SUCCESS;
687 }
688
689 #define ENCODE_PREFIX(prefix) \
690 do { \
691 code = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
692 FAIL_IF(!code); \
693 INC_CSIZE(1); \
694 *code = (prefix); \
695 } while (0)
696
697 static int emit_mov_byte(struct sljit_compiler *compiler, int sign,
698 int dst, sljit_w dstw,
699 int src, sljit_w srcw)
700 {
701 sljit_ub* code;
702 int dst_r;
703 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
704 int work_r;
705 #endif
706
707 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
708 compiler->mode32 = 0;
709 #endif
710
711 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
712 return SLJIT_SUCCESS; /* Empty instruction. */
713
714 if (src & SLJIT_IMM) {
715 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
716 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
717 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
718 #else
719 return emit_load_imm64(compiler, dst, srcw);
720 #endif
721 }
722 code = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
723 FAIL_IF(!code);
724 *code = 0xc6;
725 return SLJIT_SUCCESS;
726 }
727
728 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
729
730 if ((dst & SLJIT_MEM) && src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS) {
731 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
732 if (reg_map[src] >= 4) {
733 SLJIT_ASSERT(dst_r == TMP_REGISTER);
734 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
735 } else
736 dst_r = src;
737 #else
738 dst_r = src;
739 #endif
740 }
741 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
742 else if (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS && reg_map[src] >= 4) {
743 /* src, dst are registers. */
744 SLJIT_ASSERT(dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER);
745 if (reg_map[dst] < 4) {
746 if (dst != src)
747 EMIT_MOV(compiler, dst, 0, src, 0);
748 code = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
749 FAIL_IF(!code);
750 *code++ = 0x0f;
751 *code = sign ? 0xbe : 0xb6;
752 }
753 else {
754 if (dst != src)
755 EMIT_MOV(compiler, dst, 0, src, 0);
756 if (sign) {
757 /* shl reg, 24 */
758 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
759 FAIL_IF(!code);
760 *code |= 0x4 << 3;
761 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
762 FAIL_IF(!code);
763 /* shr/sar reg, 24 */
764 *code |= 0x7 << 3;
765 }
766 else {
767 /* and dst, 0xff */
768 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 255, dst, 0);
769 FAIL_IF(!code);
770 *(code + 1) |= 0x4 << 3;
771 }
772 }
773 return SLJIT_SUCCESS;
774 }
775 #endif
776 else {
777 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
778 code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
779 FAIL_IF(!code);
780 *code++ = 0x0f;
781 *code = sign ? 0xbe : 0xb6;
782 }
783
784 if (dst & SLJIT_MEM) {
785 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
786 if (dst_r == TMP_REGISTER) {
787 /* Find a non-used register, whose reg_map[src] < 4. */
788 if ((dst & 0xf) == SLJIT_TEMPORARY_REG1) {
789 if ((dst & 0xf0) == (SLJIT_TEMPORARY_REG2 << 4))
790 work_r = SLJIT_TEMPORARY_REG3;
791 else
792 work_r = SLJIT_TEMPORARY_REG2;
793 }
794 else {
795 if ((dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
796 work_r = SLJIT_TEMPORARY_REG1;
797 else if ((dst & 0xf) == SLJIT_TEMPORARY_REG2)
798 work_r = SLJIT_TEMPORARY_REG3;
799 else
800 work_r = SLJIT_TEMPORARY_REG2;
801 }
802
803 if (work_r == SLJIT_TEMPORARY_REG1) {
804 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
805 }
806 else {
807 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
808 FAIL_IF(!code);
809 *code = 0x87;
810 }
811
812 code = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
813 FAIL_IF(!code);
814 *code = 0x88;
815
816 if (work_r == SLJIT_TEMPORARY_REG1) {
817 ENCODE_PREFIX(0x90 + reg_map[TMP_REGISTER]);
818 }
819 else {
820 code = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
821 FAIL_IF(!code);
822 *code = 0x87;
823 }
824 }
825 else {
826 code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
827 FAIL_IF(!code);
828 *code = 0x88;
829 }
830 #else
831 code = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
832 FAIL_IF(!code);
833 *code = 0x88;
834 #endif
835 }
836
837 return SLJIT_SUCCESS;
838 }
839
840 static int emit_mov_half(struct sljit_compiler *compiler, int sign,
841 int dst, sljit_w dstw,
842 int src, sljit_w srcw)
843 {
844 sljit_ub* code;
845 int dst_r;
846
847 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
848 compiler->mode32 = 0;
849 #endif
850
851 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
852 return SLJIT_SUCCESS; /* Empty instruction. */
853
854 if (src & SLJIT_IMM) {
855 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) {
856 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
857 return emit_do_imm(compiler, 0xb8 + reg_map[dst], srcw);
858 #else
859 return emit_load_imm64(compiler, dst, srcw);
860 #endif
861 }
862 code = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
863 FAIL_IF(!code);
864 *code = 0xc7;
865 return SLJIT_SUCCESS;
866 }
867
868 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REGISTER;
869
870 if ((dst & SLJIT_MEM) && (src >= SLJIT_TEMPORARY_REG1 && src <= SLJIT_NO_REGISTERS))
871 dst_r = src;
872 else {
873 code = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
874 FAIL_IF(!code);
875 *code++ = 0x0f;
876 *code = sign ? 0xbf : 0xb7;
877 }
878
879 if (dst & SLJIT_MEM) {
880 code = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
881 FAIL_IF(!code);
882 *code = 0x89;
883 }
884
885 return SLJIT_SUCCESS;
886 }
887
888 static int emit_unary(struct sljit_compiler *compiler, int un_index,
889 int dst, sljit_w dstw,
890 int src, sljit_w srcw)
891 {
892 sljit_ub* code;
893
894 if (dst == SLJIT_UNUSED) {
895 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
896 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
897 FAIL_IF(!code);
898 *code++ = 0xf7;
899 *code |= (un_index) << 3;
900 return SLJIT_SUCCESS;
901 }
902 if (dst == src && dstw == srcw) {
903 /* Same input and output */
904 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
905 FAIL_IF(!code);
906 *code++ = 0xf7;
907 *code |= (un_index) << 3;
908 return SLJIT_SUCCESS;
909 }
910 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
911 EMIT_MOV(compiler, dst, 0, src, srcw);
912 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
913 FAIL_IF(!code);
914 *code++ = 0xf7;
915 *code |= (un_index) << 3;
916 return SLJIT_SUCCESS;
917 }
918 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
919 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
920 FAIL_IF(!code);
921 *code++ = 0xf7;
922 *code |= (un_index) << 3;
923 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
924 return SLJIT_SUCCESS;
925 }
926
927 static int emit_not_with_flags(struct sljit_compiler *compiler,
928 int dst, sljit_w dstw,
929 int src, sljit_w srcw)
930 {
931 sljit_ub* code;
932
933 if (dst == SLJIT_UNUSED) {
934 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
935 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
936 FAIL_IF(!code);
937 *code++ = 0xf7;
938 *code |= 0x2 << 3;
939 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
940 FAIL_IF(!code);
941 *code = 0x0b;
942 return SLJIT_SUCCESS;
943 }
944 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
945 EMIT_MOV(compiler, dst, 0, src, srcw);
946 code = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
947 FAIL_IF(!code);
948 *code++ = 0xf7;
949 *code |= 0x2 << 3;
950 code = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
951 FAIL_IF(!code);
952 *code = 0x0b;
953 return SLJIT_SUCCESS;
954 }
955 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
956 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
957 FAIL_IF(!code);
958 *code++ = 0xf7;
959 *code |= 0x2 << 3;
960 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, TMP_REGISTER, 0);
961 FAIL_IF(!code);
962 *code = 0x0b;
963 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
964 return SLJIT_SUCCESS;
965 }
966
967 static int emit_clz(struct sljit_compiler *compiler, int op,
968 int dst, sljit_w dstw,
969 int src, sljit_w srcw)
970 {
971 sljit_ub* code;
972 int dst_r;
973
974 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
975 /* Just set the zero flag. */
976 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
977 code = emit_x86_instruction(compiler, 1, 0, 0, TMP_REGISTER, 0);
978 FAIL_IF(!code);
979 *code++ = 0xf7;
980 *code |= 0x2 << 3;
981 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
982 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REGISTER, 0);
983 #else
984 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, TMP_REGISTER, 0);
985 #endif
986 FAIL_IF(!code);
987 *code |= 0x5 << 3;
988 return SLJIT_SUCCESS;
989 }
990
991 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
992 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
993 src = TMP_REGISTER;
994 srcw = 0;
995 }
996
997 code = emit_x86_instruction(compiler, 2, TMP_REGISTER, 0, src, srcw);
998 FAIL_IF(!code);
999 *code++ = 0x0f;
1000 *code = 0xbd;
1001
1002 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1003 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER)
1004 dst_r = dst;
1005 else {
1006 /* Find an unused temporary register. */
1007 if ((dst & 0xf) != SLJIT_TEMPORARY_REG1 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG1 << 4))
1008 dst_r = SLJIT_TEMPORARY_REG1;
1009 else if ((dst & 0xf) != SLJIT_TEMPORARY_REG2 && (dst & 0xf0) != (SLJIT_TEMPORARY_REG2 << 4))
1010 dst_r = SLJIT_TEMPORARY_REG2;
1011 else
1012 dst_r = SLJIT_TEMPORARY_REG3;
1013 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1014 }
1015 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1016 #else
1017 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= TMP_REGISTER) ? dst : TMP_REG2;
1018 compiler->mode32 = 0;
1019 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1020 compiler->mode32 = op & SLJIT_INT_OP;
1021 #endif
1022
1023 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REGISTER, 0);
1024 FAIL_IF(!code);
1025 *code++ = 0x0f;
1026 *code = 0x45;
1027
1028 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1029 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1030 #else
1031 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1032 #endif
1033 FAIL_IF(!code);
1034 *(code + 1) |= 0x6 << 3;
1035
1036 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1037 if (dst & SLJIT_MEM) {
1038 code = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1039 FAIL_IF(!code);
1040 *code = 0x87;
1041 }
1042 #else
1043 if (dst & SLJIT_MEM)
1044 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1045 #endif
1046 return SLJIT_SUCCESS;
1047 }
1048
1049 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op1(struct sljit_compiler *compiler, int op,
1050 int dst, sljit_w dstw,
1051 int src, sljit_w srcw)
1052 {
1053 sljit_ub* code;
1054 int update = 0;
1055 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1056 int dst_is_ereg = 0;
1057 int src_is_ereg = 0;
1058 #else
1059 #define src_is_ereg 0
1060 #endif
1061
1062 CHECK_ERROR();
1063 check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw);
1064
1065 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1066 compiler->mode32 = op & SLJIT_INT_OP;
1067 #endif
1068 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1069 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1070
1071 if (GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_MOVU_SI) {
1072 op = GET_OPCODE(op);
1073 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1074 compiler->mode32 = 0;
1075 #endif
1076
1077 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 7 == SLJIT_MOVU, movu_offset);
1078 if (op >= SLJIT_MOVU) {
1079 update = 1;
1080 op -= 7;
1081 }
1082
1083 if (src & SLJIT_IMM) {
1084 switch (op) {
1085 case SLJIT_MOV_UB:
1086 srcw = (unsigned char)srcw;
1087 break;
1088 case SLJIT_MOV_SB:
1089 srcw = (signed char)srcw;
1090 break;
1091 case SLJIT_MOV_UH:
1092 srcw = (unsigned short)srcw;
1093 break;
1094 case SLJIT_MOV_SH:
1095 srcw = (signed short)srcw;
1096 break;
1097 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1098 case SLJIT_MOV_UI:
1099 srcw = (unsigned int)srcw;
1100 break;
1101 case SLJIT_MOV_SI:
1102 srcw = (signed int)srcw;
1103 break;
1104 #endif
1105 }
1106 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1107 if (SLJIT_UNLIKELY(dst_is_ereg))
1108 return emit_mov(compiler, dst, dstw, src, srcw);
1109 #endif
1110 }
1111
1112 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & 0xf) && (srcw != 0 || (src & 0xf0) != 0)) {
1113 code = emit_x86_instruction(compiler, 1, src & 0xf, 0, src, srcw);
1114 FAIL_IF(!code);
1115 *code = 0x8d;
1116 src &= SLJIT_MEM | 0xf;
1117 srcw = 0;
1118 }
1119
1120 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1121 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI) || (src & SLJIT_MEM))) {
1122 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_LOCALS_REG));
1123 dst = TMP_REGISTER;
1124 }
1125 #endif
1126
1127 switch (op) {
1128 case SLJIT_MOV:
1129 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1130 case SLJIT_MOV_UI:
1131 case SLJIT_MOV_SI:
1132 #endif
1133 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1134 break;
1135 case SLJIT_MOV_UB:
1136 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned char)srcw : srcw));
1137 break;
1138 case SLJIT_MOV_SB:
1139 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed char)srcw : srcw));
1140 break;
1141 case SLJIT_MOV_UH:
1142 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned short)srcw : srcw));
1143 break;
1144 case SLJIT_MOV_SH:
1145 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed short)srcw : srcw));
1146 break;
1147 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1148 case SLJIT_MOV_UI:
1149 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, (src & SLJIT_IMM) ? (unsigned int)srcw : srcw));
1150 break;
1151 case SLJIT_MOV_SI:
1152 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, (src & SLJIT_IMM) ? (signed int)srcw : srcw));
1153 break;
1154 #endif
1155 }
1156
1157 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1158 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REGISTER)
1159 return emit_mov(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), dstw, TMP_REGISTER, 0);
1160 #endif
1161
1162 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & 0xf) && (dstw != 0 || (dst & 0xf0) != 0)) {
1163 code = emit_x86_instruction(compiler, 1, dst & 0xf, 0, dst, dstw);
1164 FAIL_IF(!code);
1165 *code = 0x8d;
1166 }
1167 return SLJIT_SUCCESS;
1168 }
1169
1170 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1171 compiler->flags_saved = 0;
1172
1173 switch (GET_OPCODE(op)) {
1174 case SLJIT_NOT:
1175 if (SLJIT_UNLIKELY(op & SLJIT_SET_E))
1176 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1177 return emit_unary(compiler, 0x2, dst, dstw, src, srcw);
1178
1179 case SLJIT_NEG:
1180 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1181 FAIL_IF(emit_save_flags(compiler));
1182 return emit_unary(compiler, 0x3, dst, dstw, src, srcw);
1183
1184 case SLJIT_CLZ:
1185 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1186 FAIL_IF(emit_save_flags(compiler));
1187 return emit_clz(compiler, op, dst, dstw, src, srcw);
1188 }
1189
1190 return SLJIT_SUCCESS;
1191
1192 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1193 #undef src_is_ereg
1194 #endif
1195 }
1196
1197 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1198
1199 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1200 if (IS_HALFWORD(immw) || compiler->mode32) { \
1201 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1202 FAIL_IF(!code); \
1203 *(code + 1) |= (_op_imm_); \
1204 } \
1205 else { \
1206 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1207 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1208 FAIL_IF(!code); \
1209 *code = (_op_mr_); \
1210 }
1211
1212 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1213 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (_op_eax_imm_), immw))
1214
1215 #else
1216
1217 #define BINARY_IMM(_op_imm_, _op_mr_, immw, arg, argw) \
1218 code = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1219 FAIL_IF(!code); \
1220 *(code + 1) |= (_op_imm_);
1221
1222 #define BINARY_EAX_IMM(_op_eax_imm_, immw) \
1223 FAIL_IF(emit_do_imm(compiler, (_op_eax_imm_), immw))
1224
1225 #endif
1226
1227 static int emit_cum_binary(struct sljit_compiler *compiler,
1228 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1229 int dst, sljit_w dstw,
1230 int src1, sljit_w src1w,
1231 int src2, sljit_w src2w)
1232 {
1233 sljit_ub* code;
1234
1235 if (dst == SLJIT_UNUSED) {
1236 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1237 if (src2 & SLJIT_IMM) {
1238 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1239 }
1240 else {
1241 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1242 FAIL_IF(!code);
1243 *code = op_rm;
1244 }
1245 return SLJIT_SUCCESS;
1246 }
1247
1248 if (dst == src1 && dstw == src1w) {
1249 if (src2 & SLJIT_IMM) {
1250 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1251 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1252 #else
1253 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1254 #endif
1255 BINARY_EAX_IMM(op_eax_imm, src2w);
1256 }
1257 else {
1258 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1259 }
1260 }
1261 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1262 code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1263 FAIL_IF(!code);
1264 *code = op_rm;
1265 }
1266 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= TMP_REGISTER) {
1267 /* Special exception for sljit_emit_cond_value. */
1268 code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1269 FAIL_IF(!code);
1270 *code = op_mr;
1271 }
1272 else {
1273 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1274 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1275 FAIL_IF(!code);
1276 *code = op_mr;
1277 }
1278 return SLJIT_SUCCESS;
1279 }
1280
1281 /* Only for cumulative operations. */
1282 if (dst == src2 && dstw == src2w) {
1283 if (src1 & SLJIT_IMM) {
1284 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1285 if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1286 #else
1287 if ((dst == SLJIT_TEMPORARY_REG1) && (src1w > 127 || src1w < -128)) {
1288 #endif
1289 BINARY_EAX_IMM(op_eax_imm, src1w);
1290 }
1291 else {
1292 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1293 }
1294 }
1295 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1296 code = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1297 FAIL_IF(!code);
1298 *code = op_rm;
1299 }
1300 else if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1301 code = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1302 FAIL_IF(!code);
1303 *code = op_mr;
1304 }
1305 else {
1306 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1307 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1308 FAIL_IF(!code);
1309 *code = op_mr;
1310 }
1311 return SLJIT_SUCCESS;
1312 }
1313
1314 /* General version. */
1315 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1316 EMIT_MOV(compiler, dst, 0, src1, src1w);
1317 if (src2 & SLJIT_IMM) {
1318 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1319 }
1320 else {
1321 code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1322 FAIL_IF(!code);
1323 *code = op_rm;
1324 }
1325 }
1326 else {
1327 /* This version requires less memory writing. */
1328 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1329 if (src2 & SLJIT_IMM) {
1330 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1331 }
1332 else {
1333 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1334 FAIL_IF(!code);
1335 *code = op_rm;
1336 }
1337 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1338 }
1339
1340 return SLJIT_SUCCESS;
1341 }
1342
1343 static int emit_non_cum_binary(struct sljit_compiler *compiler,
1344 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1345 int dst, sljit_w dstw,
1346 int src1, sljit_w src1w,
1347 int src2, sljit_w src2w)
1348 {
1349 sljit_ub* code;
1350
1351 if (dst == SLJIT_UNUSED) {
1352 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1353 if (src2 & SLJIT_IMM) {
1354 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1355 }
1356 else {
1357 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1358 FAIL_IF(!code);
1359 *code = op_rm;
1360 }
1361 return SLJIT_SUCCESS;
1362 }
1363
1364 if (dst == src1 && dstw == src1w) {
1365 if (src2 & SLJIT_IMM) {
1366 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1367 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1368 #else
1369 if ((dst == SLJIT_TEMPORARY_REG1) && (src2w > 127 || src2w < -128)) {
1370 #endif
1371 BINARY_EAX_IMM(op_eax_imm, src2w);
1372 }
1373 else {
1374 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1375 }
1376 }
1377 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1378 code = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1379 FAIL_IF(!code);
1380 *code = op_rm;
1381 }
1382 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1383 code = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1384 FAIL_IF(!code);
1385 *code = op_mr;
1386 }
1387 else {
1388 EMIT_MOV(compiler, TMP_REGISTER, 0, src2, src2w);
1389 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, dst, dstw);
1390 FAIL_IF(!code);
1391 *code = op_mr;
1392 }
1393 return SLJIT_SUCCESS;
1394 }
1395
1396 /* General version. */
1397 if ((dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) && dst != src2) {
1398 EMIT_MOV(compiler, dst, 0, src1, src1w);
1399 if (src2 & SLJIT_IMM) {
1400 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1401 }
1402 else {
1403 code = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1404 FAIL_IF(!code);
1405 *code = op_rm;
1406 }
1407 }
1408 else {
1409 /* This version requires less memory writing. */
1410 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1411 if (src2 & SLJIT_IMM) {
1412 BINARY_IMM(op_imm, op_mr, src2w, TMP_REGISTER, 0);
1413 }
1414 else {
1415 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1416 FAIL_IF(!code);
1417 *code = op_rm;
1418 }
1419 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1420 }
1421
1422 return SLJIT_SUCCESS;
1423 }
1424
1425 static int emit_mul(struct sljit_compiler *compiler,
1426 int dst, sljit_w dstw,
1427 int src1, sljit_w src1w,
1428 int src2, sljit_w src2w)
1429 {
1430 sljit_ub* code;
1431 int dst_r;
1432
1433 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1434
1435 /* Register destination. */
1436 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1437 code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1438 FAIL_IF(!code);
1439 *code++ = 0x0f;
1440 *code = 0xaf;
1441 }
1442 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1443 code = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1444 FAIL_IF(!code);
1445 *code++ = 0x0f;
1446 *code = 0xaf;
1447 }
1448 else if (src1 & SLJIT_IMM) {
1449 if (src2 & SLJIT_IMM) {
1450 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1451 src2 = dst_r;
1452 src2w = 0;
1453 }
1454
1455 if (src1w <= 127 && src1w >= -128) {
1456 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1457 FAIL_IF(!code);
1458 *code = 0x6b;
1459 code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1460 FAIL_IF(!code);
1461 INC_CSIZE(1);
1462 *code = (sljit_b)src1w;
1463 }
1464 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1465 else {
1466 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1467 FAIL_IF(!code);
1468 *code = 0x69;
1469 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1470 FAIL_IF(!code);
1471 INC_CSIZE(4);
1472 *(sljit_w*)code = src1w;
1473 }
1474 #else
1475 else if (IS_HALFWORD(src1w)) {
1476 code = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1477 FAIL_IF(!code);
1478 *code = 0x69;
1479 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1480 FAIL_IF(!code);
1481 INC_CSIZE(4);
1482 *(sljit_hw*)code = (sljit_hw)src1w;
1483 }
1484 else {
1485 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1486 if (dst_r != src2)
1487 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1488 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1489 FAIL_IF(!code);
1490 *code++ = 0x0f;
1491 *code = 0xaf;
1492 }
1493 #endif
1494 }
1495 else if (src2 & SLJIT_IMM) {
1496 /* Note: src1 is NOT immediate. */
1497
1498 if (src2w <= 127 && src2w >= -128) {
1499 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1500 FAIL_IF(!code);
1501 *code = 0x6b;
1502 code = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1503 FAIL_IF(!code);
1504 INC_CSIZE(1);
1505 *code = (sljit_b)src2w;
1506 }
1507 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1508 else {
1509 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1510 FAIL_IF(!code);
1511 *code = 0x69;
1512 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1513 FAIL_IF(!code);
1514 INC_CSIZE(4);
1515 *(sljit_w*)code = src2w;
1516 }
1517 #else
1518 else if (IS_HALFWORD(src2w)) {
1519 code = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1520 FAIL_IF(!code);
1521 *code = 0x69;
1522 code = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1523 FAIL_IF(!code);
1524 INC_CSIZE(4);
1525 *(sljit_hw*)code = (sljit_hw)src2w;
1526 }
1527 else {
1528 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1529 if (dst_r != src1)
1530 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1531 code = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1532 FAIL_IF(!code);
1533 *code++ = 0x0f;
1534 *code = 0xaf;
1535 }
1536 #endif
1537 }
1538 else {
1539 /* Neither argument is immediate. */
1540 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1541 dst_r = TMP_REGISTER;
1542 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1543 code = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1544 FAIL_IF(!code);
1545 *code++ = 0x0f;
1546 *code = 0xaf;
1547 }
1548
1549 if (dst_r == TMP_REGISTER)
1550 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1551
1552 return SLJIT_SUCCESS;
1553 }
1554
1555 static int emit_lea_binary(struct sljit_compiler *compiler,
1556 int dst, sljit_w dstw,
1557 int src1, sljit_w src1w,
1558 int src2, sljit_w src2w)
1559 {
1560 sljit_ub* code;
1561 int dst_r, done = 0;
1562
1563 /* These cases better be left to handled by normal way. */
1564 if (dst == src1 && dstw == src1w)
1565 return SLJIT_ERR_UNSUPPORTED;
1566 if (dst == src2 && dstw == src2w)
1567 return SLJIT_ERR_UNSUPPORTED;
1568
1569 dst_r = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
1570
1571 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1572 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1573 /* It is not possible to be both SLJIT_LOCALS_REG. */
1574 if (src1 != SLJIT_LOCALS_REG || src2 != SLJIT_LOCALS_REG) {
1575 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1576 FAIL_IF(!code);
1577 *code = 0x8d;
1578 done = 1;
1579 }
1580 }
1581 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1582 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1583 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (int)src2w);
1584 #else
1585 if (src2 & SLJIT_IMM) {
1586 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1587 #endif
1588 FAIL_IF(!code);
1589 *code = 0x8d;
1590 done = 1;
1591 }
1592 }
1593 else if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1594 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1595 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1596 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (int)src1w);
1597 #else
1598 if (src1 & SLJIT_IMM) {
1599 code = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1600 #endif
1601 FAIL_IF(!code);
1602 *code = 0x8d;
1603 done = 1;
1604 }
1605 }
1606
1607 if (done) {
1608 if (dst_r == TMP_REGISTER)
1609 return emit_mov(compiler, dst, dstw, TMP_REGISTER, 0);
1610 return SLJIT_SUCCESS;
1611 }
1612 return SLJIT_ERR_UNSUPPORTED;
1613 }
1614
1615 static int emit_cmp_binary(struct sljit_compiler *compiler,
1616 int src1, sljit_w src1w,
1617 int src2, sljit_w src2w)
1618 {
1619 sljit_ub* code;
1620
1621 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1622 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1623 #else
1624 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1625 #endif
1626 BINARY_EAX_IMM(0x3d, src2w);
1627 return SLJIT_SUCCESS;
1628 }
1629
1630 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1631 if (src2 & SLJIT_IMM) {
1632 BINARY_IMM(0x7 << 3, 0x39, src2w, src1, 0);
1633 }
1634 else {
1635 code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1636 FAIL_IF(!code);
1637 *code = 0x3b;
1638 }
1639 return SLJIT_SUCCESS;
1640 }
1641
1642 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS && !(src1 & SLJIT_IMM)) {
1643 code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1644 FAIL_IF(!code);
1645 *code = 0x39;
1646 return SLJIT_SUCCESS;
1647 }
1648
1649 if (src2 & SLJIT_IMM) {
1650 if (src1 & SLJIT_IMM) {
1651 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1652 src1 = TMP_REGISTER;
1653 src1w = 0;
1654 }
1655 BINARY_IMM(0x7 << 3, 0x39, src2w, src1, src1w);
1656 }
1657 else {
1658 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1659 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1660 FAIL_IF(!code);
1661 *code = 0x3b;
1662 }
1663 return SLJIT_SUCCESS;
1664 }
1665
1666 static int emit_test_binary(struct sljit_compiler *compiler,
1667 int src1, sljit_w src1w,
1668 int src2, sljit_w src2w)
1669 {
1670 sljit_ub* code;
1671
1672 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1673 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1674 #else
1675 if (src1 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1676 #endif
1677 BINARY_EAX_IMM(0xa9, src2w);
1678 return SLJIT_SUCCESS;
1679 }
1680
1681 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1682 if (src2 == SLJIT_TEMPORARY_REG1 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1683 #else
1684 if (src2 == SLJIT_TEMPORARY_REG1 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1685 #endif
1686 BINARY_EAX_IMM(0xa9, src1w);
1687 return SLJIT_SUCCESS;
1688 }
1689
1690 if (src1 >= SLJIT_TEMPORARY_REG1 && src1 <= SLJIT_NO_REGISTERS) {
1691 if (src2 & SLJIT_IMM) {
1692 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1693 if (IS_HALFWORD(src2w) || compiler->mode32) {
1694 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1695 FAIL_IF(!code);
1696 *code = 0xf7;
1697 }
1698 else {
1699 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1700 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1701 FAIL_IF(!code);
1702 *code = 0x85;
1703 }
1704 #else
1705 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1706 FAIL_IF(!code);
1707 *code = 0xf7;
1708 #endif
1709 }
1710 else {
1711 code = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1712 FAIL_IF(!code);
1713 *code = 0x85;
1714 }
1715 return SLJIT_SUCCESS;
1716 }
1717
1718 if (src2 >= SLJIT_TEMPORARY_REG1 && src2 <= SLJIT_NO_REGISTERS) {
1719 if (src1 & SLJIT_IMM) {
1720 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1721 if (IS_HALFWORD(src1w) || compiler->mode32) {
1722 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1723 FAIL_IF(!code);
1724 *code = 0xf7;
1725 }
1726 else {
1727 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1728 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1729 FAIL_IF(!code);
1730 *code = 0x85;
1731 }
1732 #else
1733 code = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1734 FAIL_IF(!code);
1735 *code = 0xf7;
1736 #endif
1737 }
1738 else {
1739 code = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1740 FAIL_IF(!code);
1741 *code = 0x85;
1742 }
1743 return SLJIT_SUCCESS;
1744 }
1745
1746 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1747 if (src2 & SLJIT_IMM) {
1748 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1749 if (IS_HALFWORD(src2w) || compiler->mode32) {
1750 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1751 FAIL_IF(!code);
1752 *code = 0xf7;
1753 }
1754 else {
1755 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1756 code = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REGISTER, 0);
1757 FAIL_IF(!code);
1758 *code = 0x85;
1759 }
1760 #else
1761 code = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REGISTER, 0);
1762 FAIL_IF(!code);
1763 *code = 0xf7;
1764 #endif
1765 }
1766 else {
1767 code = emit_x86_instruction(compiler, 1, TMP_REGISTER, 0, src2, src2w);
1768 FAIL_IF(!code);
1769 *code = 0x85;
1770 }
1771 return SLJIT_SUCCESS;
1772 }
1773
1774 static int emit_shift(struct sljit_compiler *compiler,
1775 sljit_ub mode,
1776 int dst, sljit_w dstw,
1777 int src1, sljit_w src1w,
1778 int src2, sljit_w src2w)
1779 {
1780 sljit_ub* code;
1781
1782 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
1783 if (dst == src1 && dstw == src1w) {
1784 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
1785 FAIL_IF(!code);
1786 *code |= mode;
1787 return SLJIT_SUCCESS;
1788 }
1789 if (dst == SLJIT_UNUSED) {
1790 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1791 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1792 FAIL_IF(!code);
1793 *code |= mode;
1794 return SLJIT_SUCCESS;
1795 }
1796 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
1797 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1798 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1799 FAIL_IF(!code);
1800 *code |= mode;
1801 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1802 return SLJIT_SUCCESS;
1803 }
1804 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) {
1805 EMIT_MOV(compiler, dst, 0, src1, src1w);
1806 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
1807 FAIL_IF(!code);
1808 *code |= mode;
1809 return SLJIT_SUCCESS;
1810 }
1811
1812 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1813 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REGISTER, 0);
1814 FAIL_IF(!code);
1815 *code |= mode;
1816 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1817 return SLJIT_SUCCESS;
1818 }
1819
1820 if (dst == SLJIT_PREF_SHIFT_REG) {
1821 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1822 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1823 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1824 FAIL_IF(!code);
1825 *code |= mode;
1826 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1827 }
1828 else if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
1829 if (src1 != dst)
1830 EMIT_MOV(compiler, dst, 0, src1, src1w);
1831 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_PREF_SHIFT_REG, 0);
1832 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1833 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
1834 FAIL_IF(!code);
1835 *code |= mode;
1836 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1837 }
1838 else {
1839 /* This case is really difficult, since ecx can be used for
1840 addressing as well, and we must ensure to work even in that case. */
1841 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1842 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
1843 #else
1844 /* [esp - 4] is reserved for eflags. */
1845 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)), SLJIT_PREF_SHIFT_REG, 0);
1846 #endif
1847
1848 EMIT_MOV(compiler, TMP_REGISTER, 0, src1, src1w);
1849 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
1850 code = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REGISTER, 0);
1851 FAIL_IF(!code);
1852 *code |= mode;
1853
1854 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1855 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
1856 #else
1857 /* [esp - 4] is reserved for eflags. */
1858 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), -(int)(2 * sizeof(sljit_w)));
1859 #endif
1860 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
1861 }
1862
1863 return SLJIT_SUCCESS;
1864 }
1865
1866 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op2(struct sljit_compiler *compiler, int op,
1867 int dst, sljit_w dstw,
1868 int src1, sljit_w src1w,
1869 int src2, sljit_w src2w)
1870 {
1871 CHECK_ERROR();
1872 check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
1873
1874 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1875 compiler->mode32 = op & SLJIT_INT_OP;
1876 #endif
1877 CHECK_EXTRA_REGS(dst, dstw, (void)0);
1878 CHECK_EXTRA_REGS(src1, src1w, (void)0);
1879 CHECK_EXTRA_REGS(src2, src2w, (void)0);
1880
1881 if (GET_OPCODE(op) >= SLJIT_MUL) {
1882 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1883 compiler->flags_saved = 0;
1884 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1885 FAIL_IF(emit_save_flags(compiler));
1886 }
1887
1888 switch (GET_OPCODE(op)) {
1889 case SLJIT_ADD:
1890 if (!GET_FLAGS(op)) {
1891 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
1892 return compiler->error;
1893 }
1894 else
1895 compiler->flags_saved = 0;
1896 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1897 FAIL_IF(emit_save_flags(compiler));
1898 return emit_cum_binary(compiler, 0x03, 0x01, 0x0 << 3, 0x05,
1899 dst, dstw, src1, src1w, src2, src2w);
1900 case SLJIT_ADDC:
1901 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1902 FAIL_IF(emit_restore_flags(compiler, 1));
1903 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1904 FAIL_IF(emit_save_flags(compiler));
1905 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1906 compiler->flags_saved = 0;
1907 return emit_cum_binary(compiler, 0x13, 0x11, 0x2 << 3, 0x15,
1908 dst, dstw, src1, src1w, src2, src2w);
1909 case SLJIT_SUB:
1910 if (!GET_FLAGS(op)) {
1911 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
1912 return compiler->error;
1913 }
1914 else
1915 compiler->flags_saved = 0;
1916 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1917 FAIL_IF(emit_save_flags(compiler));
1918 if (dst == SLJIT_UNUSED)
1919 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
1920 return emit_non_cum_binary(compiler, 0x2b, 0x29, 0x5 << 3, 0x2d,
1921 dst, dstw, src1, src1w, src2, src2w);
1922 case SLJIT_SUBC:
1923 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
1924 FAIL_IF(emit_restore_flags(compiler, 1));
1925 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
1926 FAIL_IF(emit_save_flags(compiler));
1927 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
1928 compiler->flags_saved = 0;
1929 return emit_non_cum_binary(compiler, 0x1b, 0x19, 0x3 << 3, 0x1d,
1930 dst, dstw, src1, src1w, src2, src2w);
1931 case SLJIT_MUL:
1932 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
1933 case SLJIT_AND:
1934 if (dst == SLJIT_UNUSED)
1935 return emit_test_binary(compiler, src1, src1w, src2, src2w);
1936 return emit_cum_binary(compiler, 0x23, 0x21, 0x4 << 3, 0x25,
1937 dst, dstw, src1, src1w, src2, src2w);
1938 case SLJIT_OR:
1939 return emit_cum_binary(compiler, 0x0b, 0x09, 0x1 << 3, 0x0d,
1940 dst, dstw, src1, src1w, src2, src2w);
1941 case SLJIT_XOR:
1942 return emit_cum_binary(compiler, 0x33, 0x31, 0x6 << 3, 0x35,
1943 dst, dstw, src1, src1w, src2, src2w);
1944 case SLJIT_SHL:
1945 return emit_shift(compiler, 0x4 << 3,
1946 dst, dstw, src1, src1w, src2, src2w);
1947 case SLJIT_LSHR:
1948 return emit_shift(compiler, 0x5 << 3,
1949 dst, dstw, src1, src1w, src2, src2w);
1950 case SLJIT_ASHR:
1951 return emit_shift(compiler, 0x7 << 3,
1952 dst, dstw, src1, src1w, src2, src2w);
1953 }
1954
1955 return SLJIT_SUCCESS;
1956 }
1957
1958 SLJIT_API_FUNC_ATTRIBUTE int sljit_get_register_index(int reg)
1959 {
1960 check_sljit_get_register_index(reg);
1961 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1962 if (reg == SLJIT_TEMPORARY_EREG1 || reg == SLJIT_TEMPORARY_EREG2
1963 || reg == SLJIT_GENERAL_EREG1 || reg == SLJIT_GENERAL_EREG2)
1964 return -1;
1965 #endif
1966 return reg_map[reg];
1967 }
1968
1969 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_op_custom(struct sljit_compiler *compiler,
1970 void *instruction, int size)
1971 {
1972 sljit_ub *buf;
1973
1974 CHECK_ERROR();
1975 check_sljit_emit_op_custom(compiler, instruction, size);
1976 SLJIT_ASSERT(size > 0 && size < 16);
1977
1978 buf = (sljit_ub*)ensure_buf(compiler, 1 + size);
1979 FAIL_IF(!buf);
1980 INC_SIZE(size);
1981 SLJIT_MEMMOVE(buf, instruction, size);
1982 return SLJIT_SUCCESS;
1983 }
1984
1985 /* --------------------------------------------------------------------- */
1986 /* Floating point operators */
1987 /* --------------------------------------------------------------------- */
1988
1989 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
1990 static int sse2_available = 0;
1991 #endif
1992
1993 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
1994
1995 /* Alignment + 2 * 16 bytes. */
1996 static sljit_i sse2_data[3 + 4 + 4];
1997 static sljit_i *sse2_buffer;
1998
1999 static void init_compiler()
2000 {
2001 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2002 int features = 0;
2003 #endif
2004
2005 sse2_buffer = (sljit_i*)(((sljit_uw)sse2_data + 15) & ~0xf);
2006 sse2_buffer[0] = 0;
2007 sse2_buffer[1] = 0x80000000;
2008 sse2_buffer[4] = 0xffffffff;
2009 sse2_buffer[5] = 0x7fffffff;
2010
2011 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2012 #ifdef __GNUC__
2013 /* AT&T syntax. */
2014 asm (
2015 "pushl %%ebx\n"
2016 "movl $0x1, %%eax\n"
2017 "cpuid\n"
2018 "popl %%ebx\n"
2019 "movl %%edx, %0\n"
2020 : "=g" (features)
2021 :
2022 : "%eax", "%ecx", "%edx"
2023 );
2024 #elif defined(_MSC_VER) || defined(__BORLANDC__)
2025 /* Intel syntax. */
2026 __asm {
2027 mov eax, 1
2028 push ebx
2029 cpuid
2030 pop ebx
2031 mov features, edx
2032 }
2033 #else
2034 #error "SLJIT_SSE2_AUTO is not implemented for this C compiler"
2035 #endif
2036 sse2_available = (features >> 26) & 0x1;
2037 #endif
2038 }
2039
2040 #endif
2041
2042 SLJIT_API_FUNC_ATTRIBUTE int sljit_is_fpu_available(void)
2043 {
2044 /* Always available. */
2045 return 1;
2046 }
2047
2048 #if (defined SLJIT_SSE2 && SLJIT_SSE2)
2049
2050 static int emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2051 int xmm1, int xmm2, sljit_w xmm2w)
2052 {
2053 sljit_ub *buf;
2054
2055 buf = emit_x86_instruction(compiler, 2 | EX86_PREF_F2 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2056 FAIL_IF(!buf);
2057 *buf++ = 0x0f;
2058 *buf = opcode;
2059 return SLJIT_SUCCESS;
2060 }
2061
2062 static int emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2063 int xmm1, int xmm2, sljit_w xmm2w)
2064 {
2065 sljit_ub *buf;
2066
2067 buf = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2068 FAIL_IF(!buf);
2069 *buf++ = 0x0f;
2070 *buf = opcode;
2071 return SLJIT_SUCCESS;
2072 }
2073
2074 static SLJIT_INLINE int emit_sse2_load(struct sljit_compiler *compiler,
2075 int dst, int src, sljit_w srcw)
2076 {
2077 return emit_sse2(compiler, 0x10, dst, src, srcw);
2078 }
2079
2080 static SLJIT_INLINE int emit_sse2_store(struct sljit_compiler *compiler,
2081 int dst, sljit_w dstw, int src)
2082 {
2083 return emit_sse2(compiler, 0x11, src, dst, dstw);
2084 }
2085
2086 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2087 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2088 #else
2089 static int sljit_emit_sse2_fop1(struct sljit_compiler *compiler, int op,
2090 #endif
2091 int dst, sljit_w dstw,
2092 int src, sljit_w srcw)
2093 {
2094 int dst_r;
2095
2096 CHECK_ERROR();
2097 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2098
2099 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2100 compiler->mode32 = 1;
2101 #endif
2102
2103 if (GET_OPCODE(op) == SLJIT_FCMP) {
2104 compiler->flags_saved = 0;
2105 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2106 dst_r = dst;
2107 else {
2108 dst_r = TMP_FREG;
2109 FAIL_IF(emit_sse2_load(compiler, dst_r, dst, dstw));
2110 }
2111 return emit_sse2_logic(compiler, 0x2e, dst_r, src, srcw);
2112 }
2113
2114 if (op == SLJIT_FMOV) {
2115 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4)
2116 return emit_sse2_load(compiler, dst, src, srcw);
2117 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4)
2118 return emit_sse2_store(compiler, dst, dstw, src);
2119 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src, srcw));
2120 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2121 }
2122
2123 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2124 dst_r = dst;
2125 if (dst != src)
2126 FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2127 }
2128 else {
2129 dst_r = TMP_FREG;
2130 FAIL_IF(emit_sse2_load(compiler, dst_r, src, srcw));
2131 }
2132
2133 switch (op) {
2134 case SLJIT_FNEG:
2135 FAIL_IF(emit_sse2_logic(compiler, 0x57, dst_r, SLJIT_MEM0(), (sljit_w)sse2_buffer));
2136 break;
2137
2138 case SLJIT_FABS:
2139 FAIL_IF(emit_sse2_logic(compiler, 0x54, dst_r, SLJIT_MEM0(), (sljit_w)(sse2_buffer + 4)));
2140 break;
2141 }
2142
2143 if (dst_r == TMP_FREG)
2144 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2145 return SLJIT_SUCCESS;
2146 }
2147
2148 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2149 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2150 #else
2151 static int sljit_emit_sse2_fop2(struct sljit_compiler *compiler, int op,
2152 #endif
2153 int dst, sljit_w dstw,
2154 int src1, sljit_w src1w,
2155 int src2, sljit_w src2w)
2156 {
2157 int dst_r;
2158
2159 CHECK_ERROR();
2160 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2161
2162 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2163 compiler->mode32 = 1;
2164 #endif
2165
2166 if (dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) {
2167 dst_r = dst;
2168 if (dst == src1)
2169 ; /* Do nothing here. */
2170 else if (dst == src2 && (op == SLJIT_FADD || op == SLJIT_FMUL)) {
2171 /* Swap arguments. */
2172 src2 = src1;
2173 src2w = src1w;
2174 }
2175 else if (dst != src2)
2176 FAIL_IF(emit_sse2_load(compiler, dst_r, src1, src1w));
2177 else {
2178 dst_r = TMP_FREG;
2179 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2180 }
2181 }
2182 else {
2183 dst_r = TMP_FREG;
2184 FAIL_IF(emit_sse2_load(compiler, TMP_FREG, src1, src1w));
2185 }
2186
2187 switch (op) {
2188 case SLJIT_FADD:
2189 FAIL_IF(emit_sse2(compiler, 0x58, dst_r, src2, src2w));
2190 break;
2191
2192 case SLJIT_FSUB:
2193 FAIL_IF(emit_sse2(compiler, 0x5c, dst_r, src2, src2w));
2194 break;
2195
2196 case SLJIT_FMUL:
2197 FAIL_IF(emit_sse2(compiler, 0x59, dst_r, src2, src2w));
2198 break;
2199
2200 case SLJIT_FDIV:
2201 FAIL_IF(emit_sse2(compiler, 0x5e, dst_r, src2, src2w));
2202 break;
2203 }
2204
2205 if (dst_r == TMP_FREG)
2206 return emit_sse2_store(compiler, dst, dstw, TMP_FREG);
2207 return SLJIT_SUCCESS;
2208 }
2209
2210 #endif
2211
2212 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO) || !(defined SLJIT_SSE2 && SLJIT_SSE2)
2213
2214 static int emit_fld(struct sljit_compiler *compiler,
2215 int src, sljit_w srcw)
2216 {
2217 sljit_ub *buf;
2218
2219 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2220 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2221 FAIL_IF(!buf);
2222 INC_SIZE(2);
2223 *buf++ = 0xd9;
2224 *buf = 0xc0 + src - 1;
2225 return SLJIT_SUCCESS;
2226 }
2227
2228 buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2229 FAIL_IF(!buf);
2230 *buf = 0xdd;
2231 return SLJIT_SUCCESS;
2232 }
2233
2234 static int emit_fop(struct sljit_compiler *compiler,
2235 sljit_ub st_arg, sljit_ub st_arg2,
2236 sljit_ub m64fp_arg, sljit_ub m64fp_arg2,
2237 int src, sljit_w srcw)
2238 {
2239 sljit_ub *buf;
2240
2241 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2242 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2243 FAIL_IF(!buf);
2244 INC_SIZE(2);
2245 *buf++ = st_arg;
2246 *buf = st_arg2 + src;
2247 return SLJIT_SUCCESS;
2248 }
2249
2250 buf = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2251 FAIL_IF(!buf);
2252 *buf++ = m64fp_arg;
2253 *buf |= m64fp_arg2;
2254 return SLJIT_SUCCESS;
2255 }
2256
2257 static int emit_fop_regs(struct sljit_compiler *compiler,
2258 sljit_ub st_arg, sljit_ub st_arg2,
2259 int src)
2260 {
2261 sljit_ub *buf;
2262
2263 buf = (sljit_ub*)ensure_buf(compiler, 1 + 2);
2264 FAIL_IF(!buf);
2265 INC_SIZE(2);
2266 *buf++ = st_arg;
2267 *buf = st_arg2 + src;
2268 return SLJIT_SUCCESS;
2269 }
2270
2271 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2272 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2273 #else
2274 static int sljit_emit_fpu_fop1(struct sljit_compiler *compiler, int op,
2275 #endif
2276 int dst, sljit_w dstw,
2277 int src, sljit_w srcw)
2278 {
2279 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2280 sljit_ub *buf;
2281 #endif
2282
2283 CHECK_ERROR();
2284 check_sljit_emit_fop1(compiler, op, dst, dstw, src, srcw);
2285
2286 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2287 compiler->mode32 = 1;
2288 #endif
2289
2290 if (GET_OPCODE(op) == SLJIT_FCMP) {
2291 compiler->flags_saved = 0;
2292 #if !(defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2293 FAIL_IF(emit_fld(compiler, dst, dstw));
2294 FAIL_IF(emit_fop(compiler, 0xd8, 0xd8, 0xdc, 0x3 << 3, src, srcw));
2295
2296 /* Copy flags. */
2297 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2298 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2299 FAIL_IF(!buf);
2300 INC_SIZE(3);
2301 *buf++ = 0xdf;
2302 *buf++ = 0xe0;
2303 /* Note: lahf is not supported on all x86-64 architectures. */
2304 *buf++ = 0x9e;
2305 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2306 #else
2307 if (src >= SLJIT_FLOAT_REG1 && src <= SLJIT_FLOAT_REG4) {
2308 FAIL_IF(emit_fld(compiler, dst, dstw));
2309 FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2310 } else {
2311 FAIL_IF(emit_fld(compiler, src, srcw));
2312 FAIL_IF(emit_fld(compiler, dst + ((dst >= SLJIT_FLOAT_REG1 && dst <= SLJIT_FLOAT_REG4) ? 1 : 0), dstw));
2313 FAIL_IF(emit_fop_regs(compiler, 0xdf, 0xe8, src));
2314 FAIL_IF(emit_fop_regs(compiler, 0xdd, 0xd8, 0));
2315 }
2316 #endif
2317 return SLJIT_SUCCESS;
2318 }
2319
2320 FAIL_IF(emit_fld(compiler, src, srcw));
2321
2322 switch (op) {
2323 case SLJIT_FNEG:
2324 FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe0, 0));
2325 break;
2326 case SLJIT_FABS:
2327 FAIL_IF(emit_fop_regs(compiler, 0xd9, 0xe1, 0));
2328 break;
2329 }
2330
2331 FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2332
2333 return SLJIT_SUCCESS;
2334 }
2335
2336 #if !(defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2337 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2338 #else
2339 static int sljit_emit_fpu_fop2(struct sljit_compiler *compiler, int op,
2340 #endif
2341 int dst, sljit_w dstw,
2342 int src1, sljit_w src1w,
2343 int src2, sljit_w src2w)
2344 {
2345 CHECK_ERROR();
2346 check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2347
2348 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2349 compiler->mode32 = 1;
2350 #endif
2351
2352 if (src1 >= SLJIT_FLOAT_REG1 && src1 <= SLJIT_FLOAT_REG4 && dst == src1) {
2353 FAIL_IF(emit_fld(compiler, src2, src2w));
2354
2355 switch (op) {
2356 case SLJIT_FADD:
2357 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src1));
2358 break;
2359 case SLJIT_FSUB:
2360 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe8, src1));
2361 break;
2362 case SLJIT_FMUL:
2363 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src1));
2364 break;
2365 case SLJIT_FDIV:
2366 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf8, src1));
2367 break;
2368 }
2369 return SLJIT_SUCCESS;
2370 }
2371
2372 FAIL_IF(emit_fld(compiler, src1, src1w));
2373
2374 if (src2 >= SLJIT_FLOAT_REG1 && src2 <= SLJIT_FLOAT_REG4 && dst == src2) {
2375 switch (op) {
2376 case SLJIT_FADD:
2377 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc0, src2));
2378 break;
2379 case SLJIT_FSUB:
2380 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xe0, src2));
2381 break;
2382 case SLJIT_FMUL:
2383 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xc8, src2));
2384 break;
2385 case SLJIT_FDIV:
2386 FAIL_IF(emit_fop_regs(compiler, 0xde, 0xf0, src2));
2387 break;
2388 }
2389 return SLJIT_SUCCESS;
2390 }
2391
2392 switch (op) {
2393 case SLJIT_FADD:
2394 FAIL_IF(emit_fop(compiler, 0xd8, 0xc0, 0xdc, 0x0 << 3, src2, src2w));
2395 break;
2396 case SLJIT_FSUB:
2397 FAIL_IF(emit_fop(compiler, 0xd8, 0xe0, 0xdc, 0x4 << 3, src2, src2w));
2398 break;
2399 case SLJIT_FMUL:
2400 FAIL_IF(emit_fop(compiler, 0xd8, 0xc8, 0xdc, 0x1 << 3, src2, src2w));
2401 break;
2402 case SLJIT_FDIV:
2403 FAIL_IF(emit_fop(compiler, 0xd8, 0xf0, 0xdc, 0x6 << 3, src2, src2w));
2404 break;
2405 }
2406
2407 FAIL_IF(emit_fop(compiler, 0xdd, 0xd8, 0xdd, 0x3 << 3, dst, dstw));
2408
2409 return SLJIT_SUCCESS;
2410 }
2411 #endif
2412
2413 #if (defined SLJIT_SSE2_AUTO && SLJIT_SSE2_AUTO)
2414
2415 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop1(struct sljit_compiler *compiler, int op,
2416 int dst, sljit_w dstw,
2417 int src, sljit_w srcw)
2418 {
2419 if (sse2_available)
2420 return sljit_emit_sse2_fop1(compiler, op, dst, dstw, src, srcw);
2421 else
2422 return sljit_emit_fpu_fop1(compiler, op, dst, dstw, src, srcw);
2423 }
2424
2425 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_fop2(struct sljit_compiler *compiler, int op,
2426 int dst, sljit_w dstw,
2427 int src1, sljit_w src1w,
2428 int src2, sljit_w src2w)
2429 {
2430 if (sse2_available)
2431 return sljit_emit_sse2_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2432 else
2433 return sljit_emit_fpu_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2434 }
2435
2436 #endif
2437
2438 /* --------------------------------------------------------------------- */
2439 /* Conditional instructions */
2440 /* --------------------------------------------------------------------- */
2441
2442 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2443 {
2444 sljit_ub *buf;
2445 struct sljit_label *label;
2446
2447 CHECK_ERROR_PTR();
2448 check_sljit_emit_label(compiler);
2449
2450 /* We should restore the flags before the label,
2451 since other taken jumps has their own flags as well. */
2452 if (SLJIT_UNLIKELY(compiler->flags_saved))
2453 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2454
2455 if (compiler->last_label && compiler->last_label->size == compiler->size)
2456 return compiler->last_label;
2457
2458 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2459 PTR_FAIL_IF(!label);
2460 set_label(label, compiler);
2461
2462 buf = (sljit_ub*)ensure_buf(compiler, 2);
2463 PTR_FAIL_IF(!buf);
2464
2465 *buf++ = 0;
2466 *buf++ = 0;
2467
2468 return label;
2469 }
2470
2471 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, int type)
2472 {
2473 sljit_ub *buf;
2474 struct sljit_jump *jump;
2475
2476 CHECK_ERROR_PTR();
2477 check_sljit_emit_jump(compiler, type);
2478
2479 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2480 if ((type & 0xff) <= SLJIT_JUMP)
2481 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2482 compiler->flags_saved = 0;
2483 }
2484
2485 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2486 PTR_FAIL_IF_NULL(jump);
2487 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2488 type &= 0xff;
2489
2490 if (type >= SLJIT_CALL1)
2491 PTR_FAIL_IF(call_with_args(compiler, type));
2492
2493 /* Worst case size. */
2494 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2495 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2496 #else
2497 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2498 #endif
2499
2500 buf = (sljit_ub*)ensure_buf(compiler, 2);
2501 PTR_FAIL_IF_NULL(buf);
2502
2503 *buf++ = 0;
2504 *buf++ = type + 4;
2505 return jump;
2506 }
2507
2508 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_ijump(struct sljit_compiler *compiler, int type, int src, sljit_w srcw)
2509 {
2510 sljit_ub *code;
2511 struct sljit_jump *jump;
2512
2513 CHECK_ERROR();
2514 check_sljit_emit_ijump(compiler, type, src, srcw);
2515
2516 CHECK_EXTRA_REGS(src, srcw, (void)0);
2517 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2518 if (type <= SLJIT_JUMP)
2519 FAIL_IF(emit_restore_flags(compiler, 0));
2520 compiler->flags_saved = 0;
2521 }
2522
2523 if (type >= SLJIT_CALL1) {
2524 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2525 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2526 if (src == SLJIT_TEMPORARY_REG3) {
2527 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2528 src = TMP_REGISTER;
2529 }
2530 if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG && type >= SLJIT_CALL3) {
2531 if (src & 0xf0) {
2532 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2533 src = TMP_REGISTER;
2534 }
2535 else
2536 srcw += sizeof(sljit_w);
2537 }
2538 #else
2539 if ((src & SLJIT_MEM) && (src & 0xf) == SLJIT_LOCALS_REG) {
2540 if (src & 0xf0) {
2541 EMIT_MOV(compiler, TMP_REGISTER, 0, src, srcw);
2542 src = TMP_REGISTER;
2543 }
2544 else
2545 srcw += sizeof(sljit_w) * (type - SLJIT_CALL0);
2546 }
2547 #endif
2548 #endif
2549 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2550 if (src == SLJIT_TEMPORARY_REG3) {
2551 EMIT_MOV(compiler, TMP_REGISTER, 0, src, 0);
2552 src = TMP_REGISTER;
2553 }
2554 #endif
2555 FAIL_IF(call_with_args(compiler, type));
2556 }
2557
2558 if (src == SLJIT_IMM) {
2559 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2560 FAIL_IF_NULL(jump);
2561 set_jump(jump, compiler, JUMP_ADDR);
2562 jump->u.target = srcw;
2563
2564 /* Worst case size. */
2565 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2566 compiler->size += 5;
2567 #else
2568 compiler->size += 10 + 3;
2569 #endif
2570
2571 code = (sljit_ub*)ensure_buf(compiler, 2);
2572 FAIL_IF_NULL(code);
2573
2574 *code++ = 0;
2575 *code++ = type + 4;
2576 }
2577 else {
2578 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2579 /* REX_W is not necessary (src is not immediate). */
2580 compiler->mode32 = 1;
2581 #endif
2582 code = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2583 FAIL_IF(!code);
2584 *code++ = 0xff;
2585 *code |= (type >= SLJIT_FAST_CALL) ? (2 << 3) : (4 << 3);
2586 }
2587 return SLJIT_SUCCESS;
2588 }
2589
2590 SLJIT_API_FUNC_ATTRIBUTE int sljit_emit_cond_value(struct sljit_compiler *compiler, int op, int dst, sljit_w dstw, int type)
2591 {
2592 sljit_ub *buf;
2593 sljit_ub cond_set = 0;
2594 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2595 int reg;
2596 #endif
2597
2598 CHECK_ERROR();
2599 check_sljit_emit_cond_value(compiler, op, dst, dstw, type);
2600
2601 if (dst == SLJIT_UNUSED)
2602 return SLJIT_SUCCESS;
2603
2604 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2605 if (SLJIT_UNLIKELY(compiler->flags_saved))
2606 FAIL_IF(emit_restore_flags(compiler, 0));
2607
2608 switch (type) {
2609 case SLJIT_C_EQUAL:
2610 case SLJIT_C_FLOAT_EQUAL:
2611 cond_set = 0x94;
2612 break;
2613
2614 case SLJIT_C_NOT_EQUAL:
2615 case SLJIT_C_FLOAT_NOT_EQUAL:
2616 cond_set = 0x95;
2617 break;
2618
2619 case SLJIT_C_LESS:
2620 case SLJIT_C_FLOAT_LESS:
2621 cond_set = 0x92;
2622 break;
2623
2624 case SLJIT_C_GREATER_EQUAL:
2625 case SLJIT_C_FLOAT_GREATER_EQUAL:
2626 cond_set = 0x93;
2627 break;
2628
2629 case SLJIT_C_GREATER:
2630 case SLJIT_C_FLOAT_GREATER:
2631 cond_set = 0x97;
2632 break;
2633
2634 case SLJIT_C_LESS_EQUAL:
2635 case SLJIT_C_FLOAT_LESS_EQUAL:
2636 cond_set = 0x96;
2637 break;
2638
2639 case SLJIT_C_SIG_LESS:
2640 cond_set = 0x9c;
2641 break;
2642
2643 case SLJIT_C_SIG_GREATER_EQUAL:
2644 cond_set = 0x9d;
2645 break;
2646
2647 case SLJIT_C_SIG_GREATER:
2648 cond_set = 0x9f;
2649 break;
2650
2651 case SLJIT_C_SIG_LESS_EQUAL:
2652 cond_set = 0x9e;
2653 break;
2654
2655 case SLJIT_C_OVERFLOW:
2656 case SLJIT_C_MUL_OVERFLOW:
2657 cond_set = 0x90;
2658 break;
2659
2660 case SLJIT_C_NOT_OVERFLOW:
2661 case SLJIT_C_MUL_NOT_OVERFLOW:
2662 cond_set = 0x91;
2663 break;
2664
2665 case SLJIT_C_FLOAT_NAN:
2666 cond_set = 0x9a;
2667 break;
2668
2669 case SLJIT_C_FLOAT_NOT_NAN:
2670 cond_set = 0x9b;
2671 break;
2672 }
2673
2674 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2675 reg = (op == SLJIT_MOV && dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2676
2677 buf = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2678 FAIL_IF(!buf);
2679 INC_SIZE(4 + 4);
2680 /* Set low register to conditional flag. */
2681 *buf++ = (reg_map[reg] <= 7) ? 0x40 : REX_B;
2682 *buf++ = 0x0f;
2683 *buf++ = cond_set;
2684 *buf++ = 0xC0 | reg_lmap[reg];
2685 *buf++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2686 *buf++ = 0x0f;
2687 *buf++ = 0xb6;
2688 *buf = 0xC0 | (reg_lmap[reg] << 3) | reg_lmap[reg];
2689
2690 if (reg == TMP_REGISTER) {
2691 if (op == SLJIT_MOV) {
2692 compiler->mode32 = 0;
2693 EMIT_MOV(compiler, dst, dstw, TMP_REGISTER, 0);
2694 }
2695 else {
2696 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2697 compiler->skip_checks = 1;
2698 #endif
2699 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2700 }
2701 }
2702 #else
2703 if (op == SLJIT_MOV) {
2704 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2705 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2706 FAIL_IF(!buf);
2707 INC_SIZE(3 + 3);
2708 /* Set low byte to conditional flag. */
2709 *buf++ = 0x0f;
2710 *buf++ = cond_set;
2711 *buf++ = 0xC0 | reg_map[dst];
2712
2713 *buf++ = 0x0f;
2714 *buf++ = 0xb6;
2715 *buf = 0xC0 | (reg_map[dst] << 3) | reg_map[dst];
2716 }
2717 else {
2718 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2719
2720 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2721 FAIL_IF(!buf);
2722 INC_SIZE(3 + 3);
2723 /* Set al to conditional flag. */
2724 *buf++ = 0x0f;
2725 *buf++ = cond_set;
2726 *buf++ = 0xC0;
2727
2728 *buf++ = 0x0f;
2729 *buf++ = 0xb6;
2730 if (dst >= SLJIT_GENERAL_REG1 && dst <= SLJIT_NO_REGISTERS)
2731 *buf = 0xC0 | (reg_map[dst] << 3);
2732 else {
2733 *buf = 0xC0;
2734 EMIT_MOV(compiler, dst, dstw, SLJIT_TEMPORARY_REG1, 0);
2735 }
2736
2737 EMIT_MOV(compiler, SLJIT_TEMPORARY_REG1, 0, TMP_REGISTER, 0);
2738 }
2739 }
2740 else {
2741 if (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_TEMPORARY_REG3) {
2742 EMIT_MOV(compiler, TMP_REGISTER, 0, dst, 0);
2743 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2744 FAIL_IF(!buf);
2745 INC_SIZE(3);
2746
2747 *buf++ = 0x0f;
2748 *buf++ = cond_set;
2749 *buf++ = 0xC0 | reg_map[dst];
2750 }
2751 else {
2752 EMIT_MOV(compiler, TMP_REGISTER, 0, SLJIT_TEMPORARY_REG1, 0);
2753
2754 buf = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3 + 1);
2755 FAIL_IF(!buf);
2756 INC_SIZE(3 + 3 + 1);
2757 /* Set al to conditional flag. */
2758 *buf++ = 0x0f;
2759 *buf++ = cond_set;
2760 *buf++ = 0xC0;
2761
2762 *buf++ = 0x0f;
2763 *buf++ = 0xb6;
2764 *buf++ = 0xC0;
2765
2766 *buf++ = 0x90 + reg_map[TMP_REGISTER];
2767 }
2768 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) || (defined SLJIT_DEBUG && SLJIT_DEBUG)
2769 compiler->skip_checks = 1;
2770 #endif
2771 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REGISTER, 0);
2772 }
2773 #endif
2774
2775 return SLJIT_SUCCESS;
2776 }
2777
2778 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, int dst, sljit_w dstw, sljit_w init_value)
2779 {
2780 sljit_ub *buf;
2781 struct sljit_const *const_;
2782 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2783 int reg;
2784 #endif
2785
2786 CHECK_ERROR_PTR();
2787 check_sljit_emit_const(compiler, dst, dstw, init_value);
2788
2789 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2790
2791 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2792 PTR_FAIL_IF(!const_);
2793 set_const(const_, compiler);
2794
2795 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2796 compiler->mode32 = 0;
2797 reg = (dst >= SLJIT_TEMPORARY_REG1 && dst <= SLJIT_NO_REGISTERS) ? dst : TMP_REGISTER;
2798
2799 if (emit_load_imm64(compiler, reg, init_value))
2800 return NULL;
2801 #else
2802 if (dst == SLJIT_UNUSED)
2803 dst = TMP_REGISTER;
2804
2805 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2806 return NULL;
2807 #endif
2808
2809 buf = (sljit_ub*)ensure_buf(compiler, 2);
2810 PTR_FAIL_IF(!buf);
2811
2812 *buf++ = 0;
2813 *buf++ = 1;
2814
2815 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2816 if (reg == TMP_REGISTER && dst != SLJIT_UNUSED)
2817 if (emit_mov(compiler, dst, dstw, TMP_REGISTER, 0))
2818 return NULL;
2819 #endif
2820
2821 return const_;
2822 }
2823
2824 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2825 {
2826 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2827 *(sljit_w*)addr = new_addr - (addr + 4);
2828 #else
2829 *(sljit_uw*)addr = new_addr;
2830 #endif
2831 }
2832
2833 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_w new_constant)
2834 {
2835 *(sljit_w*)addr = new_constant;
2836 }

  ViewVC Help
Powered by ViewVC 1.1.5