Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 1451881 Details for
Bug 1591701
luajit on ppc64le
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
patch8
ppc64le_f9b470d_ported.patch (text/plain), 24.99 KB, created by
Menanteau Guy
on 2018-06-15 10:21:04 UTC
(
hide
)
Description:
patch8
Filename:
MIME Type:
Creator:
Menanteau Guy
Created:
2018-06-15 10:21:04 UTC
Size:
24.99 KB
patch
obsolete
>From f9b470df94afcd7114bd855c972e783be1b89f99 Mon Sep 17 00:00:00 2001 >From: =?UTF-8?q?Marcin=20Ko=C5=9Bcielnicki?= <koriakin@0x04.net> >Date: Tue, 19 Jul 2016 18:12:18 +0200 >Subject: [PATCH] ppc64: Add FFI support. > >--- > dynasm/dasm_ppc.lua | 5 ++ > src/host/buildvm_asm.c | 16 ++--- > src/lj_arch.h | 1 - > src/lj_ccall.c | 166 +++++++++++++++++++++++++++++++++++++++++++- > src/lj_ccall.h | 13 ++++ > src/lj_ccallback.c | 68 ++++++++++++++++-- > src/lj_ctype.h | 2 +- > src/lj_def.h | 4 ++ > src/lj_target_ppc.h | 9 +++ > src/vm_ppc.dasc | 184 +++++++++++++++++++++++++++++++++++++------------ > 10 files changed, 406 insertions(+), 62 deletions(-) > >diff --git a/dynasm/dasm_ppc.lua b/dynasm/dasm_ppc.lua >index e2f704ec1..1b0f9add6 100644 >--- a/dynasm/dasm_ppc.lua >+++ b/dynasm/dasm_ppc.lua >@@ -257,9 +257,11 @@ map_op = { > addic_3 = "30000000RRI", > ["addic._3"] = "34000000RRI", > addi_3 = "38000000RR0I", >+ addil_3 = "38000000RR0J", > li_2 = "38000000RI", > la_2 = "38000000RD", > addis_3 = "3c000000RR0I", >+ addisl_3 = "3c000000RR0J", > lis_2 = "3c000000RI", > lus_2 = "3c000000RU", > bc_3 = "40000000AAK", >@@ -842,6 +844,9 @@ map_op = { > srdi_3 = op_alias("rldicl_4", function(p) > p[4] = p[3]; p[3] = "64-("..p[3]..")" > end), >+ ["srdi._3"] = op_alias("rldicl._4", function(p) >+ p[4] = p[3]; p[3] = "64-("..p[3]..")" >+ end), > clrldi_3 = op_alias("rldicl_4", function(p) > p[4] = p[3]; p[3] = "0" > end), >diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c >index 28419c077..5e5025e6d 100644 >--- a/src/host/buildvm_asm.c >+++ b/src/host/buildvm_asm.c >@@ -136,18 +136,14 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, > #else > #define TOCPREFIX "" > #endif >- if ((ins >> 26) == 16) { >+ if ((ins >> 26) == 14) { >+ fprintf(ctx->fp, "\taddi %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym); >+ } else if ((ins >> 26) == 15) { >+ fprintf(ctx->fp, "\taddis %d,%d,%s\n", (ins >> 21) & 31, (ins >> 16) & 31, sym); >+ } else if ((ins >> 26) == 16) { > fprintf(ctx->fp, "\t%s %d, %d, " TOCPREFIX "%s\n", > (ins & 1) ? "bcl" : "bc", (ins >> 21) & 31, (ins >> 16) & 31, sym); > } else if ((ins >> 26) == 18) { >-#if LJ_ARCH_PPC64 >- const char *suffix = strchr(sym, '@'); >- if (suffix && suffix[1] == 'h') { >- fprintf(ctx->fp, "\taddis 11, 2, %s\n", sym); >- } else if (suffix && suffix[1] == 'l') { >- fprintf(ctx->fp, "\tld 12, %s\n", sym); >- } else >-#endif > fprintf(ctx->fp, "\t%s " TOCPREFIX "%s\n", (ins & 1) ? "bl" : "b", sym); > } else { > fprintf(stderr, >@@ -245,7 +241,7 @@ void emit_asm(BuildCtx *ctx) > int i, rel; > > fprintf(ctx->fp, "\t.file \"buildvm_%s.dasc\"\n", ctx->dasm_arch); >-#if LJ_ARCH_PPC64 >+#if LJ_ARCH_PPC_ELFV2 > fprintf(ctx->fp, "\t.abiversion 2\n"); > #endif > fprintf(ctx->fp, "\t.text\n"); >diff --git a/src/lj_arch.h b/src/lj_arch.h >index 159fd45d5..12080e22b 100644 >--- a/src/lj_arch.h >+++ b/src/lj_arch.h >@@ -269,7 +269,6 @@ > #elif LJ_ARCH_BITS == 64 > #define LJ_ARCH_PPC32ON64 1 > #define LJ_ARCH_NOJIT 1 /* NYI */ >-#define LJ_ARCH_NOFFI 1 /* NYI */ > #if _CALL_ELF == 2 > #define LJ_ARCH_PPC_ELFV2 1 > #else >diff --git a/src/lj_ccall.c b/src/lj_ccall.c >index b599be33a..ee34b52fa 100644 >--- a/src/lj_ccall.c >+++ b/src/lj_ccall.c >@@ -362,21 +362,97 @@ > #elif LJ_TARGET_PPC > /* -- PPC calling conventions --------------------------------------------- */ > >+#if LJ_ARCH_BITS == 64 >+ >+#if LJ_ARCH_PPC_ELFV2 >+ >+#define CCALL_HANDLE_STRUCTRET \ >+ if (sz > 16 && ccall_classify_fp(cts, ctr) <= 0) { \ >+ cc->retref = 1; /* Return by reference. */ \ >+ cc->gpr[ngpr++] = (GPRArg)dp; \ >+ } >+ >+#define CCALL_HANDLE_STRUCTRET2 \ >+ int isfp = ccall_classify_fp(cts, ctr); \ >+ int i; \ >+ if (isfp == FTYPE_FLOAT) { \ >+ for (i = 0; i < ctr->size / 4; i++) \ >+ ((float *)dp)[i] = cc->fpr[i]; \ >+ } else if (isfp == FTYPE_DOUBLE) { \ >+ for (i = 0; i < ctr->size / 8; i++) \ >+ ((double *)dp)[i] = cc->fpr[i]; \ >+ } else { \ >+ if (ctr->size < 8 && LJ_BE) { \ >+ sp += 8 - ctr->size; \ >+ } \ >+ memcpy(dp, sp, ctr->size); \ >+ } >+ >+#else >+ > #define CCALL_HANDLE_STRUCTRET \ > cc->retref = 1; /* Return all structs by reference. */ \ > cc->gpr[ngpr++] = (GPRArg)dp; > >+#endif >+ > #define CCALL_HANDLE_COMPLEXRET \ > /* Complex values are returned in 2 or 4 GPRs. */ \ > cc->retref = 0; > >+#define CCALL_HANDLE_STRUCTARG >+ > #define CCALL_HANDLE_COMPLEXRET2 \ >- memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ >+ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ >+ ((float *)dp)[0] = cc->fpr[0]; \ >+ ((float *)dp)[1] = cc->fpr[1]; \ >+ } else { /* Copy complex double from FPRs. */ \ >+ ((double *)dp)[0] = cc->fpr[0]; \ >+ ((double *)dp)[1] = cc->fpr[1]; \ >+ } >+ >+#define CCALL_HANDLE_COMPLEXARG \ >+ isfp = 1; \ >+ if (d->size == sizeof(float) * 2) { \ >+ d = ctype_get(cts, CTID_COMPLEX_DOUBLE); \ >+ isf32 = 1; \ >+ } >+ >+#define CCALL_HANDLE_REGARG \ >+ if (isfp && d->size == sizeof(float)) { \ >+ d = ctype_get(cts, CTID_DOUBLE); \ >+ isf32 = 1; \ >+ } \ >+ if (ngpr < maxgpr) { \ >+ dp = &cc->gpr[ngpr]; \ >+ ngpr += n; \ >+ if (ngpr > maxgpr) { \ >+ nsp += ngpr - 8; \ >+ ngpr = 8; \ >+ if (nsp > CCALL_MAXSTACK) { \ >+ goto err_nyi; \ >+ } \ >+ } \ >+ goto done; \ >+ } >+ >+#else >+ >+#define CCALL_HANDLE_STRUCTRET \ >+ cc->retref = 1; /* Return all structs by reference. */ \ >+ cc->gpr[ngpr++] = (GPRArg)dp; >+ >+#define CCALL_HANDLE_COMPLEXRET \ >+ /* Complex values are returned in 2 or 4 GPRs. */ \ >+ cc->retref = 0; > > #define CCALL_HANDLE_STRUCTARG \ > rp = cdataptr(lj_cdata_new(cts, did, sz)); \ > sz = CTSIZE_PTR; /* Pass all structs by reference. */ > >+#define CCALL_HANDLE_COMPLEXRET2 \ >+ memcpy(dp, sp, ctr->size); /* Copy complex from GPRs. */ >+ > #define CCALL_HANDLE_COMPLEXARG \ > /* Pass complex by value in 2 or 4 GPRs. */ > >@@ -403,6 +479,8 @@ > } \ > } > >+#endif >+ > #define CCALL_HANDLE_RET \ > if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ > ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ >@@ -794,6 +872,50 @@ static unsigned int ccall_classify_struct(CTState *cts, CType *ct) > > #endif > >+/* -- PowerPC64 ELFv2 ABI struct classification ------------------- */ >+ >+#if LJ_ARCH_PPC_ELFV2 >+ >+#define FTYPE_FLOAT 1 >+#define FTYPE_DOUBLE 2 >+ >+static unsigned int ccall_classify_fp(CTState *cts, CType *ct) { >+ if (ctype_isfp(ct->info)) { >+ if (ct->size == sizeof(float)) >+ return FTYPE_FLOAT; >+ else >+ return FTYPE_DOUBLE; >+ } else if (ctype_iscomplex(ct->info)) { >+ if (ct->size == sizeof(float) * 2) >+ return FTYPE_FLOAT; >+ else >+ return FTYPE_DOUBLE; >+ } else if (ctype_isstruct(ct->info)) { >+ int res = -1; >+ int sz = ct->size; >+ while (ct->sib) { >+ ct = ctype_get(cts, ct->sib); >+ if (ctype_isfield(ct->info)) { >+ int sub = ccall_classify_fp(cts, ctype_rawchild(cts, ct)); >+ if (res == -1) >+ res = sub; >+ if (sub != -1 && sub != res) >+ return 0; >+ } else if (ctype_isbitfield(ct->info) || >+ ctype_isxattrib(ct->info, CTA_SUBTYPE)) { >+ return 0; >+ } >+ } >+ if (res > 0 && sz > res * 4 * 8) >+ return 0; >+ return res; >+ } else { >+ return 0; >+ } >+} >+ >+#endif >+ > /* -- MIPS64 ABI struct classification ---------------------------- */ > > #if LJ_TARGET_MIPS64 >@@ -967,6 +1089,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, > CTSize sz; > MSize n, isfp = 0, isva = 0; > void *dp, *rp = NULL; >+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 >+ int isf32 = 0; >+#endif > > if (fid) { /* Get argument type from field. */ > CType *ctf = ctype_get(cts, fid); >@@ -1023,7 +1148,37 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, > *(void **)dp = rp; > dp = rp; > } >+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 && LJ_BE >+ if (ctype_isstruct(d->info) && sz < CTSIZE_PTR) { >+ dp = (char *)dp + (CTSIZE_PTR - sz); >+ } >+#endif > lj_cconv_ct_tv(cts, d, (uint8_t *)dp, o, CCF_ARG(narg)); >+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 >+ if (isfp) { >+ int i; >+ for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++) >+ cc->fpr[nfpr++] = ((double *)dp)[i]; >+ } >+ if (isf32) { >+ int i; >+ for (i = 0; i < d->size / 8; i++) >+ ((float *)dp)[i*2] = ((double *)dp)[i]; >+ } >+#endif >+#if LJ_ARCH_PPC_ELFV2 >+ if (ctype_isstruct(d->info)) { >+ isfp = ccall_classify_fp(cts, d); >+ int i; >+ if (isfp == FTYPE_FLOAT) { >+ for (i = 0; i < d->size / 4 && nfpr < CCALL_NARG_FPR; i++) >+ cc->fpr[nfpr++] = ((float *)dp)[i]; >+ } else if (isfp == FTYPE_DOUBLE) { >+ for (i = 0; i < d->size / 8 && nfpr < CCALL_NARG_FPR; i++) >+ cc->fpr[nfpr++] = ((double *)dp)[i]; >+ } >+ } >+#endif > /* Extend passed integers to 32 bits at least. */ > if (ctype_isinteger_or_bool(d->info) && d->size < 4) { > if (d->info & CTF_UNSIGNED) >@@ -1033,6 +1188,15 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, > if (isfp && d->size == sizeof(float)) > ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ > #endif >+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 >+ if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info)) >+ && d->size <= 4) { >+ if (d->info & CTF_UNSIGNED) >+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp; >+ else >+ *(int64_t *)dp = (int64_t)*(int32_t *)dp; >+ } >+#endif > #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) > if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) > #if LJ_TARGET_MIPS64 >diff --git a/src/lj_ccall.h b/src/lj_ccall.h >index d97227a6b..fcce77928 100644 >--- a/src/lj_ccall.h >+++ b/src/lj_ccall.h >@@ -86,10 +86,23 @@ typedef union FPRArg { > #elif LJ_TARGET_PPC > > #define CCALL_NARG_GPR 8 >+#if LJ_ARCH_BITS == 64 >+#define CCALL_NARG_FPR 13 >+#if LJ_ARCH_PPC_ELFV2 >+#define CCALL_NRET_GPR 2 >+#define CCALL_NRET_FPR 8 >+#define CCALL_SPS_EXTRA 14 >+#else >+#define CCALL_NRET_GPR 1 >+#define CCALL_NRET_FPR 2 >+#define CCALL_SPS_EXTRA 16 >+#endif >+#else > #define CCALL_NARG_FPR 8 > #define CCALL_NRET_GPR 4 /* For complex double. */ > #define CCALL_NRET_FPR 1 > #define CCALL_SPS_EXTRA 4 >+#endif > #define CCALL_SPS_FREE 0 > > typedef intptr_t GPRArg; >diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c >index 2ca6406c3..c2a55c02f 100644 >--- a/src/lj_ccallback.c >+++ b/src/lj_ccallback.c >@@ -61,8 +61,24 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) > > #elif LJ_TARGET_PPC > >+#if LJ_ARCH_PPC_OPD >+ >+#define CALLBACK_SLOT2OFS(slot) (24*(slot)) >+#define CALLBACK_OFS2SLOT(ofs) ((ofs)/24) >+#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) >+ >+#elif LJ_ARCH_PPC_ELFV2 >+ >+#define CALLBACK_SLOT2OFS(slot) (4*(slot)) >+#define CALLBACK_OFS2SLOT(ofs) ((ofs)/4) >+#define CALLBACK_MAX_SLOT (CALLBACK_MCODE_SIZE/4 - 10) >+ >+#else >+ > #define CALLBACK_MCODE_HEAD 24 > >+#endif >+ > #elif LJ_TARGET_MIPS32 > > #define CALLBACK_MCODE_HEAD 20 >@@ -188,24 +204,59 @@ static void callback_mcode_init(global_State *g, uint32_t *page) > lua_assert(p - page <= CALLBACK_MCODE_SIZE); > } > #elif LJ_TARGET_PPC >+#if LJ_ARCH_PPC_OPD >+register void *vm_toc __asm__("r2"); >+static void callback_mcode_init(global_State *g, uint64_t *page) >+{ >+ uint64_t *p = page; >+ void *target = (void *)lj_vm_ffi_callback; >+ MSize slot; >+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { >+ *p++ = (uint64_t)target; >+ *p++ = (uint64_t)vm_toc; >+ *p++ = (uint64_t)g | ((uint64_t)slot << 47); >+ } >+ lua_assert(p - page <= CALLBACK_MCODE_SIZE / 8); >+} >+#else > static void callback_mcode_init(global_State *g, uint32_t *page) > { > uint32_t *p = page; > void *target = (void *)lj_vm_ffi_callback; > MSize slot; >+#if LJ_ARCH_PPC_ELFV2 >+ // Needs to be in sync with lj_vm_ffi_callback. >+ lua_assert(CALLBACK_MCODE_SIZE == 4096); >+ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { >+ *p = PPCI_B | (((page+CALLBACK_MAX_SLOT-p) & 0x00ffffffu) << 2); >+ p++; >+ } >+ *p++ = PPCI_LI | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 32) & 0xffff); >+ *p++ = PPCI_LI | PPCF_T(RID_R11) | ((((intptr_t)g) >> 32) & 0xffff); >+ *p++ = PPCI_RLDICR | PPCF_T(RID_SYS1) | PPCF_A(RID_SYS1) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */ >+ *p++ = PPCI_RLDICR | PPCF_T(RID_R11) | PPCF_A(RID_R11) | PPCF_SH(32) | PPCF_M6(63-32); /* sldi */ >+ *p++ = PPCI_ORIS | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | ((((intptr_t)target) >> 16) & 0xffff); >+ *p++ = PPCI_ORIS | PPCF_A(RID_R11) | PPCF_T(RID_R11) | ((((intptr_t)g) >> 16) & 0xffff); >+ *p++ = PPCI_ORI | PPCF_A(RID_SYS1) | PPCF_T(RID_SYS1) | (((intptr_t)target) & 0xffff); >+ *p++ = PPCI_ORI | PPCF_A(RID_R11) | PPCF_T(RID_R11) | (((intptr_t)g) & 0xffff); >+ *p++ = PPCI_MTCTR | PPCF_T(RID_SYS1); >+ *p++ = PPCI_BCTR; >+#else > *p++ = PPCI_LIS | PPCF_T(RID_TMP) | (u32ptr(target) >> 16); >- *p++ = PPCI_LIS | PPCF_T(RID_R12) | (u32ptr(g) >> 16); >+ *p++ = PPCI_LIS | PPCF_T(RID_R11) | (u32ptr(g) >> 16); > *p++ = PPCI_ORI | PPCF_A(RID_TMP)|PPCF_T(RID_TMP) | (u32ptr(target) & 0xffff); >- *p++ = PPCI_ORI | PPCF_A(RID_R12)|PPCF_T(RID_R12) | (u32ptr(g) & 0xffff); >+ *p++ = PPCI_ORI | PPCF_A(RID_R11)|PPCF_T(RID_R11) | (u32ptr(g) & 0xffff); > *p++ = PPCI_MTCTR | PPCF_T(RID_TMP); > *p++ = PPCI_BCTR; > for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { >- *p++ = PPCI_LI | PPCF_T(RID_R11) | slot; >+ *p++ = PPCI_LI | PPCF_T(RID_R12) | slot; > *p = PPCI_B | (((page-p) & 0x00ffffffu) << 2); > p++; > } >- lua_assert(p - page <= CALLBACK_MCODE_SIZE); >+#endif >+ lua_assert(p - page <= CALLBACK_MCODE_SIZE / 4); > } >+#endif > #elif LJ_TARGET_MIPS > static void callback_mcode_init(global_State *g, uint32_t *page) > { >@@ -637,6 +688,15 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) > *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : > (int32_t)*(int16_t *)dp; > } >+#if LJ_TARGET_PPC && LJ_ARCH_BITS == 64 >+ if (ctr->size <= 4 && >+ (ctype_isinteger_or_bool(ctr->info) || ctype_isenum(ctr->info))) { >+ if (ctr->info & CTF_UNSIGNED) >+ *(uint64_t *)dp = (uint64_t)*(uint32_t *)dp; >+ else >+ *(int64_t *)dp = (int64_t)*(int32_t *)dp; >+ } >+#endif > #if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) > /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ > if (ctr->size <= 4 && >diff --git a/src/lj_ctype.h b/src/lj_ctype.h >index e9b426f0d..9bba59e6b 100644 >--- a/src/lj_ctype.h >+++ b/src/lj_ctype.h >@@ -153,7 +153,7 @@ typedef struct CType { > > /* Simplify target-specific configuration. Checked in lj_ccall.h. */ > #define CCALL_MAX_GPR 8 >-#define CCALL_MAX_FPR 8 >+#define CCALL_MAX_FPR 14 > > typedef LJ_ALIGN(8) union FPRCBArg { double d; float f[2]; } FPRCBArg; > >diff --git a/src/lj_def.h b/src/lj_def.h >index 9413399d8..a22eb0486 100644 >--- a/src/lj_def.h >+++ b/src/lj_def.h >@@ -71,7 +71,11 @@ typedef unsigned int uintptr_t; > #define LJ_MAX_IDXCHAIN 100 /* __index/__newindex chain limit. */ > #define LJ_STACK_EXTRA (5+2*LJ_FR2) /* Extra stack space (metamethods). */ > >+#if defined(__powerpc64__) && _CALL_ELF != 2 >+#define LJ_NUM_CBPAGE 4 /* Number of FFI callback pages. */ >+#else > #define LJ_NUM_CBPAGE 1 /* Number of FFI callback pages. */ >+#endif > > /* Minimum table/buffer sizes. */ > #define LJ_MIN_GLOBAL 6 /* Min. global table size (hbits). */ >diff --git a/src/lj_target_ppc.h b/src/lj_target_ppc.h >index bbf22390c..794d29e63 100644 >--- a/src/lj_target_ppc.h >+++ b/src/lj_target_ppc.h >@@ -131,6 +131,8 @@ static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p, uint32_t exitno) > #define PPCF_C(r) ((r) << 6) > #define PPCF_MB(n) ((n) << 6) > #define PPCF_ME(n) ((n) << 1) >+#define PPCF_SH(n) ((((n) & 31) << (11+1)) | (((n) & 32) >> (5-1))) >+#define PPCF_M6(n) ((((n) & 31) << (5+1)) | (((n) & 32) << (11-5))) > #define PPCF_Y 0x00200000 > #define PPCF_DOT 0x00000001 > >@@ -200,6 +202,13 @@ typedef enum PPCIns { > PPCI_RLWINM = 0x54000000, > PPCI_RLWIMI = 0x50000000, > >+ PPCI_RLDICL = 0x78000000, >+ PPCI_RLDICR = 0x78000004, >+ PPCI_RLDIC = 0x78000008, >+ PPCI_RLDIMI = 0x7800000c, >+ PPCI_RLDCL = 0x78000010, >+ PPCI_RLDCR = 0x78000012, >+ > PPCI_B = 0x48000000, > PPCI_BL = 0x48000001, > PPCI_BC = 0x40800000, >diff --git a/src/vm_ppc.dasc b/src/vm_ppc.dasc >index a9c5e6023..a5866e6b6 100644 >--- a/src/vm_ppc.dasc >+++ b/src/vm_ppc.dasc >@@ -33,16 +33,16 @@ > |.macro lpx, a, b, c; ldx a, b, c; .endmacro > |.macro lp, a, b; ld a, b; .endmacro > |.macro stp, a, b; std a, b; .endmacro >+|.macro stpx, a, b, c; stdx a, b, c; .endmacro > |.define decode_OPP, decode_OP8 >-|.if FFI >-|// Missing: Calling conventions, 64 bit regs, TOC. >-|.error lib_ffi not yet implemented for PPC64 >-|.endif >+|.define WORD_SIZE, 8 > |.else > |.macro lpx, a, b, c; lwzx a, b, c; .endmacro > |.macro lp, a, b; lwz a, b; .endmacro > |.macro stp, a, b; stw a, b; .endmacro >+|.macro stpx, a, b, c; stwx a, b, c; .endmacro > |.define decode_OPP, decode_OP4 >+|.define WORD_SIZE, 4 > |.endif > | > |// Convenience macros for TOC handling. >@@ -2525,9 +2525,9 @@ static void build_subroutines(BuildCtx *ctx) > | bgt >5 // Need to grow stack. > | mtctr FUNCREG > | bctrl // (lua_State *L) >+ | .toc lp TOCREG, SAVE_TOC > | // Either throws an error, or recovers and returns -1, 0 or nresults+1. > | lp BASE, L->base >- | .toc lp TOCREG, SAVE_TOC > | cmpwi CRET1, 0 > | slwi RD, CRET1, 3 > | la RA, -8(BASE) >@@ -2928,10 +2928,18 @@ static void build_subroutines(BuildCtx *ctx) > |->vm_cachesync: > |.if JIT or FFI > | // Compute start of first cache line and number of cache lines. >+ | .if GPR64 >+ | rldicr CARG1, CARG1, 0, 58 >+ | .else > | rlwinm CARG1, CARG1, 0, 0, 26 >+ | .endif > | sub CARG2, CARG2, CARG1 > | addi CARG2, CARG2, 31 >+ | .if GPR64 >+ | srdi. CARG2, CARG2, 5 >+ | .else > | rlwinm. CARG2, CARG2, 27, 5, 31 >+ | .endif > | beqlr > | mtctr CARG2 > | mr CARG3, CARG1 >@@ -2953,32 +2961,54 @@ static void build_subroutines(BuildCtx *ctx) > |//-- FFI helper functions ----------------------------------------------- > |//----------------------------------------------------------------------- > | >- |// Handler for callback functions. Callback slot number in r11, g in r12. >+ |// Handler for callback functions. Callback slot number in r12, g in r11. > |->vm_ffi_callback: > |.if FFI > |.type CTSTATE, CTState, PC >+ | .if OPD >+ | rldicl r12, r11, 17, 47 >+ | rldicl r11, r11, 0, 17 >+ | .endif >+ | .if ELFV2 >+ | rlwinm r12, r12, 30, 22, 31 >+ | addisl TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@ha >+ | addil TOCREG, TOCREG, extern .TOC.-lj_vm_ffi_callback@l >+ | .endif > | saveregs >- | lwz CTSTATE, GL:r12->ctype_state >- | addi DISPATCH, r12, GG_G2DISP >- | stw r11, CTSTATE->cb.slot >- | stw r3, CTSTATE->cb.gpr[0] >+ | lwz CTSTATE, GL:r11->ctype_state >+ | addi DISPATCH, r11, GG_G2DISP >+ | stw r12, CTSTATE->cb.slot >+ | stp r3, CTSTATE->cb.gpr[0] > | stfd f1, CTSTATE->cb.fpr[0] >- | stw r4, CTSTATE->cb.gpr[1] >+ | stp r4, CTSTATE->cb.gpr[1] > | stfd f2, CTSTATE->cb.fpr[1] >- | stw r5, CTSTATE->cb.gpr[2] >+ | stp r5, CTSTATE->cb.gpr[2] > | stfd f3, CTSTATE->cb.fpr[2] >- | stw r6, CTSTATE->cb.gpr[3] >+ | stp r6, CTSTATE->cb.gpr[3] > | stfd f4, CTSTATE->cb.fpr[3] >- | stw r7, CTSTATE->cb.gpr[4] >+ | stp r7, CTSTATE->cb.gpr[4] > | stfd f5, CTSTATE->cb.fpr[4] >- | stw r8, CTSTATE->cb.gpr[5] >+ | stp r8, CTSTATE->cb.gpr[5] > | stfd f6, CTSTATE->cb.fpr[5] >- | stw r9, CTSTATE->cb.gpr[6] >+ | stp r9, CTSTATE->cb.gpr[6] > | stfd f7, CTSTATE->cb.fpr[6] >- | stw r10, CTSTATE->cb.gpr[7] >+ | stp r10, CTSTATE->cb.gpr[7] > | stfd f8, CTSTATE->cb.fpr[7] >+ | .if GPR64 >+ | stfd f9, CTSTATE->cb.fpr[8] >+ | stfd f10, CTSTATE->cb.fpr[9] >+ | stfd f11, CTSTATE->cb.fpr[10] >+ | stfd f12, CTSTATE->cb.fpr[11] >+ | stfd f13, CTSTATE->cb.fpr[12] >+ | .endif >+ | .if ELFV2 >+ | addi TMP0, sp, CFRAME_SPACE+96 >+ | .elif GPR64 >+ | addi TMP0, sp, CFRAME_SPACE+112 >+ | .else > | addi TMP0, sp, CFRAME_SPACE+8 >- | stw TMP0, CTSTATE->cb.stack >+ | .endif >+ | stp TMP0, CTSTATE->cb.stack > | mr CARG1, CTSTATE > | stw CTSTATE, SAVE_PC // Any value outside of bytecode is ok. > | mr CARG2, sp >@@ -3019,9 +3049,21 @@ static void build_subroutines(BuildCtx *ctx) > | mr CARG1, CTSTATE > | mr CARG2, RA > | bl extern lj_ccallback_leave // (CTState *cts, TValue *o) >- | lwz CRET1, CTSTATE->cb.gpr[0] >+ | lp CRET1, CTSTATE->cb.gpr[0] > | lfd FARG1, CTSTATE->cb.fpr[0] >- | lwz CRET2, CTSTATE->cb.gpr[1] >+ | lp CRET2, CTSTATE->cb.gpr[1] >+ | .if GPR64 >+ | lfd FARG2, CTSTATE->cb.fpr[1] >+ | .else >+ | lp CARG3, CTSTATE->cb.gpr[2] >+ | lp CARG4, CTSTATE->cb.gpr[3] >+ | .endif >+ | .elfv2 lfd f3, CTSTATE->cb.fpr[2] >+ | .elfv2 lfd f4, CTSTATE->cb.fpr[3] >+ | .elfv2 lfd f5, CTSTATE->cb.fpr[4] >+ | .elfv2 lfd f6, CTSTATE->cb.fpr[5] >+ | .elfv2 lfd f7, CTSTATE->cb.fpr[6] >+ | .elfv2 lfd f8, CTSTATE->cb.fpr[7] > | b ->vm_leave_unw > |.endif > | >@@ -3034,23 +3076,46 @@ static void build_subroutines(BuildCtx *ctx) > | lbz CARG2, CCSTATE->nsp > | lbz CARG3, CCSTATE->nfpr > | neg TMP1, TMP1 >+ | .if GPR64 >+ | std TMP0, 16(sp) >+ | .else > | stw TMP0, 4(sp) >+ | .endif > | cmpwi cr1, CARG3, 0 > | mr TMP2, sp > | addic. CARG2, CARG2, -1 >+ | .if GPR64 >+ | stdux sp, sp, TMP1 >+ | .else > | stwux sp, sp, TMP1 >+ | .endif > | crnot 4*cr1+eq, 4*cr1+eq // For vararg calls. >- | stw r14, -4(TMP2) >- | stw CCSTATE, -8(TMP2) >+ | .if GPR64 >+ | std r14, -8(TMP2) >+ | std CCSTATE, -16(TMP2) >+ | .else >+ | stw r14, -4(TMP2) >+ | stw CCSTATE, -8(TMP2) >+ | .endif > | mr r14, TMP2 > | la TMP1, CCSTATE->stack >+ | .if GPR64 >+ | sldi CARG2, CARG2, 3 >+ | .else > | slwi CARG2, CARG2, 2 >+ | .endif > | blty >2 >- | la TMP2, 8(sp) >+ | .if ELFV2 >+ | la TMP2, 96(sp) >+ | .elif GPR64 >+ | la TMP2, 112(sp) >+ | .else >+ | la TMP2, 8(sp) >+ | .endif > |1: >- | lwzx TMP0, TMP1, CARG2 >- | stwx TMP0, TMP2, CARG2 >- | addic. CARG2, CARG2, -4 >+ | lpx TMP0, TMP1, CARG2 >+ | stpx TMP0, TMP2, CARG2 >+ | addic. CARG2, CARG2, -WORD_SIZE > | bge <1 > |2: > | bney cr1, >3 >@@ -3062,28 +3127,49 @@ static void build_subroutines(BuildCtx *ctx) > | lfd f6, CCSTATE->fpr[5] > | lfd f7, CCSTATE->fpr[6] > | lfd f8, CCSTATE->fpr[7] >+ | .if GPR64 >+ | lfd f9, CCSTATE->fpr[8] >+ | lfd f10, CCSTATE->fpr[9] >+ | lfd f11, CCSTATE->fpr[10] >+ | lfd f12, CCSTATE->fpr[11] >+ | lfd f13, CCSTATE->fpr[12] >+ | .endif > |3: >- | lp TMP0, CCSTATE->func >- | lwz CARG2, CCSTATE->gpr[1] >- | lwz CARG3, CCSTATE->gpr[2] >- | lwz CARG4, CCSTATE->gpr[3] >- | lwz CARG5, CCSTATE->gpr[4] >- | mtctr TMP0 >- | lwz r8, CCSTATE->gpr[5] >- | lwz r9, CCSTATE->gpr[6] >- | lwz r10, CCSTATE->gpr[7] >- | lwz CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. >+ | .toc std TOCREG, SAVE_TOC >+ | lp FUNCREG, CCSTATE->func >+ | lp CARG2, CCSTATE->gpr[1] >+ | lp CARG3, CCSTATE->gpr[2] >+ | .opd lp TOCREG, TOC_OFS(FUNCREG) >+ | .opdenv lp ENVREG, ENV_OFS(FUNCREG) >+ | .opd lp FUNCREG, 0(FUNCREG) >+ | lp CARG4, CCSTATE->gpr[3] >+ | lp CARG5, CCSTATE->gpr[4] >+ | mtctr FUNCREG >+ | lp r8, CCSTATE->gpr[5] >+ | lp r9, CCSTATE->gpr[6] >+ | lp r10, CCSTATE->gpr[7] >+ | lp CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. > | bctrl >- | lwz CCSTATE:TMP1, -8(r14) >- | lwz TMP2, -4(r14) >+ | .toc lp TOCREG, SAVE_TOC >+ | .if GPR64 >+ | ld CCSTATE:TMP1, -16(r14) >+ | ld TMP2, -8(r14) >+ | ld TMP0, 16(r14) >+ | .else >+ | lwz CCSTATE:TMP1, -8(r14) >+ | lwz TMP2, -4(r14) > | lwz TMP0, 4(r14) >- | stw CARG1, CCSTATE:TMP1->gpr[0] >+ | .endif >+ | stp CARG1, CCSTATE:TMP1->gpr[0] > | stfd FARG1, CCSTATE:TMP1->fpr[0] >- | stw CARG2, CCSTATE:TMP1->gpr[1] >+ | stp CARG2, CCSTATE:TMP1->gpr[1] >+ | .if GPR64 >+ | stfd FARG2, CCSTATE:TMP1->fpr[1] >+ | .endif > | mtlr TMP0 >- | stw CARG3, CCSTATE:TMP1->gpr[2] >+ | stp CARG3, CCSTATE:TMP1->gpr[2] > | mr sp, r14 >- | stw CARG4, CCSTATE:TMP1->gpr[3] >+ | stp CARG4, CCSTATE:TMP1->gpr[3] > | mr r14, TMP2 > | blr > |.endif >@@ -5333,9 +5419,9 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) > | .opdenv lp ENVREG, ENV_OFS(FUNCREG) > | st_vmstate > | bctrl // (lua_State *L [, lua_CFunction f]) >+ | .toc lp TOCREG, SAVE_TOC > | // Returns nresults. > | lp BASE, L->base >- | .toc lp TOCREG, SAVE_TOC > | slwi RD, CRET1, 3 > | lp TMP1, L->top > | li_vmstate INTERP >@@ -5436,8 +5522,12 @@ static void emit_asm_debug(BuildCtx *ctx) > "\t.long lj_vm_ffi_call\n" > #endif > "\t.long %d\n" >+#if LJ_ARCH_PPC32ON64 >+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" >+#else > "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" >- "\t.byte 0x8e\n\t.uleb128 2\n" >+#endif >+ "\t.byte 0x8e\n\t.uleb128 1\n" > "\t.byte 0xd\n\t.uleb128 0xe\n" > "\t.align 2\n" > ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); >@@ -5519,8 +5609,12 @@ static void emit_asm_debug(BuildCtx *ctx) > "\t.long lj_vm_ffi_call-.\n" > "\t.long %d\n" > "\t.uleb128 0\n" /* augmentation length */ >+#if LJ_ARCH_PPC32ON64 >+ "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -2\n" >+#else > "\t.byte 0x11\n\t.uleb128 65\n\t.sleb128 -1\n" >- "\t.byte 0x8e\n\t.uleb128 2\n" >+#endif >+ "\t.byte 0x8e\n\t.uleb128 1\n" > "\t.byte 0xd\n\t.uleb128 0xe\n" > "\t.align 2\n" > ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 1591701
:
1451874
|
1451875
|
1451876
|
1451877
|
1451878
|
1451879
|
1451880
|
1451881
|
1451882
|
1579425