diff --git a/contrib/compiler-rt/lib/asan/asan_interceptors.cc b/contrib/compiler-rt/lib/asan/asan_interceptors.cc index c6969c979a5..cb2214f966e 100644 --- a/contrib/compiler-rt/lib/asan/asan_interceptors.cc +++ b/contrib/compiler-rt/lib/asan/asan_interceptors.cc @@ -357,28 +357,22 @@ DEFINE_REAL_PTHREAD_FUNCTIONS #if SANITIZER_ANDROID INTERCEPTOR(void*, bsd_signal, int signum, void *handler) { - if (!IsHandledDeadlySignal(signum) || - common_flags()->allow_user_segv_handler) { + if (GetHandleSignalMode(signum) != kHandleSignalExclusive) return REAL(bsd_signal)(signum, handler); - } return 0; } #endif INTERCEPTOR(void*, signal, int signum, void *handler) { - if (!IsHandledDeadlySignal(signum) || - common_flags()->allow_user_segv_handler) { + if (GetHandleSignalMode(signum) != kHandleSignalExclusive) return REAL(signal)(signum, handler); - } return nullptr; } INTERCEPTOR(int, sigaction, int signum, const struct sigaction *act, struct sigaction *oldact) { - if (!IsHandledDeadlySignal(signum) || - common_flags()->allow_user_segv_handler) { + if (GetHandleSignalMode(signum) != kHandleSignalExclusive) return REAL(sigaction)(signum, act, oldact); - } return 0; } diff --git a/contrib/compiler-rt/lib/asan/asan_win.cc b/contrib/compiler-rt/lib/asan/asan_win.cc index 4ab535c42e5..26db32465da 100644 --- a/contrib/compiler-rt/lib/asan/asan_win.cc +++ b/contrib/compiler-rt/lib/asan/asan_win.cc @@ -80,7 +80,7 @@ static long WINAPI SEHHandler(EXCEPTION_POINTERS *info) { INTERCEPTOR_WINAPI(LPTOP_LEVEL_EXCEPTION_FILTER, SetUnhandledExceptionFilter, LPTOP_LEVEL_EXCEPTION_FILTER ExceptionFilter) { CHECK(REAL(SetUnhandledExceptionFilter)); - if (ExceptionFilter == &SEHHandler || common_flags()->allow_user_segv_handler) + if (ExceptionFilter == &SEHHandler) return REAL(SetUnhandledExceptionFilter)(ExceptionFilter); // We record the user provided exception handler to be called for all the // exceptions unhandled by asan. diff --git a/contrib/compiler-rt/lib/builtins/README.txt b/contrib/compiler-rt/lib/builtins/README.txt index cd7d1d5b1c1..e603dfa0535 100644 --- a/contrib/compiler-rt/lib/builtins/README.txt +++ b/contrib/compiler-rt/lib/builtins/README.txt @@ -57,8 +57,8 @@ si_int __popcountsi2(si_int a); // bit population si_int __popcountdi2(di_int a); // bit population si_int __popcountti2(ti_int a); // bit population -uint32_t __bswapsi2(uint32_t a); // a byteswapped, arm/mips only -uint64_t __bswapdi2(uint64_t a); // a byteswapped, arm/mips only +uint32_t __bswapsi2(uint32_t a); // a byteswapped +uint64_t __bswapdi2(uint64_t a); // a byteswapped // Integral arithmetic diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S index b67814d9f20..3e7a8b86b73 100644 --- a/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cdcmp.S @@ -48,7 +48,12 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) // NaN has been ruled out, so __aeabi_cdcmple can't trap bne __aeabi_cdcmple +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + mov ip, #APSR_C + msr APSR_nzcvq, ip +#else msr CPSR_f, #APSR_C +#endif JMP(lr) #endif END_COMPILERRT_FUNCTION(__aeabi_cdcmpeq) @@ -95,17 +100,23 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cdcmple) lsls r0, r0, #31 pop {r0-r3, pc} #else + ITT(eq) moveq ip, #0 beq 1f ldm sp, {r0-r3} bl __aeabi_dcmpeq cmp r0, #1 + ITE(eq) moveq ip, #(APSR_C | APSR_Z) movne ip, #(APSR_C) 1: +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + msr APSR_nzcvq, ip +#else msr CPSR_f, ip +#endif pop {r0-r3} POP_PC() #endif diff --git a/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S index e37aa3d06c4..1f304ffd964 100644 --- a/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S +++ b/contrib/compiler-rt/lib/builtins/arm/aeabi_cfcmp.S @@ -48,7 +48,12 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) // NaN has been ruled out, so __aeabi_cfcmple can't trap bne __aeabi_cfcmple +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + mov ip, #APSR_C + msr APSR_nzcvq, ip +#else msr CPSR_f, #APSR_C +#endif JMP(lr) #endif END_COMPILERRT_FUNCTION(__aeabi_cfcmpeq) @@ -95,17 +100,23 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_cfcmple) lsls r0, r0, #31 pop {r0-r3, pc} #else + ITT(eq) moveq ip, #0 beq 1f ldm sp, {r0-r3} bl __aeabi_fcmpeq cmp r0, #1 + ITE(eq) moveq ip, #(APSR_C | APSR_Z) movne ip, #(APSR_C) 1: +#if defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) + msr APSR_nzcvq, ip +#else msr CPSR_f, ip +#endif pop {r0-r3} POP_PC() #endif diff --git a/contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S index 8fa0b2debc7..d5070657091 100644 --- a/contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/eqdf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__eqdf2vfp) vcmp.f64 d6, d7 #endif vmrs apsr_nzcv, fpscr + ITE(eq) moveq r0, #1 // set result register to 1 if equal movne r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S index 3776bf4874c..fd72b2fdbde 100644 --- a/contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/eqsf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__eqsf2vfp) vcmp.f32 s14, s15 #endif vmrs apsr_nzcv, fpscr + ITE(eq) moveq r0, #1 // set result register to 1 if equal movne r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S index 14899f00aab..364fc5b24cd 100644 --- a/contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/gedf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__gedf2vfp) vcmp.f64 d6, d7 #endif vmrs apsr_nzcv, fpscr + ITE(ge) movge r0, #1 // set result register to 1 if greater than or equal movlt r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S index b49d04d1c23..346c3473ae4 100644 --- a/contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/gesf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__gesf2vfp) vcmp.f32 s14, s15 #endif vmrs apsr_nzcv, fpscr + ITE(ge) movge r0, #1 // set result register to 1 if greater than or equal movlt r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S index 8166305e3af..3389c3ad973 100644 --- a/contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/gtdf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__gtdf2vfp) vcmp.f64 d6, d7 #endif vmrs apsr_nzcv, fpscr + ITE(gt) movgt r0, #1 // set result register to 1 if equal movle r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S index d2d8a2380fc..afdba8b018e 100644 --- a/contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/gtsf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__gtsf2vfp) vcmp.f32 s14, s15 #endif vmrs apsr_nzcv, fpscr + ITE(gt) movgt r0, #1 // set result register to 1 if equal movle r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S index a9dab77c146..4bbe4c86837 100644 --- a/contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/ledf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__ledf2vfp) vcmp.f64 d6, d7 #endif vmrs apsr_nzcv, fpscr + ITE(ls) movls r0, #1 // set result register to 1 if equal movhi r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S index 7e127f465cf..51232bd8ced 100644 --- a/contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/lesf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__lesf2vfp) vcmp.f32 s14, s15 #endif vmrs apsr_nzcv, fpscr + ITE(ls) movls r0, #1 // set result register to 1 if equal movhi r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S index 8b6f8e4cc8a..8e2928c813d 100644 --- a/contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/ltdf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__ltdf2vfp) vcmp.f64 d6, d7 #endif vmrs apsr_nzcv, fpscr + ITE(mi) movmi r0, #1 // set result register to 1 if equal movpl r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S index c4ff812b49a..59c00c6bab6 100644 --- a/contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/ltsf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__ltsf2vfp) vcmp.f32 s14, s15 #endif vmrs apsr_nzcv, fpscr + ITE(mi) movmi r0, #1 // set result register to 1 if equal movpl r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S index 7d884e07204..aef72eb0097 100644 --- a/contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/nedf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__nedf2vfp) vcmp.f64 d6, d7 #endif vmrs apsr_nzcv, fpscr + ITE(ne) movne r0, #1 // set result register to 0 if unequal moveq r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S index 97c764f6369..50d60f49300 100644 --- a/contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/nesf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__nesf2vfp) vcmp.f32 s14, s15 #endif vmrs apsr_nzcv, fpscr + ITE(ne) movne r0, #1 // set result register to 1 if unequal moveq r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S index 85563754700..6625fa8a311 100644 --- a/contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/unorddf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__unorddf2vfp) vcmp.f64 d6, d7 #endif vmrs apsr_nzcv, fpscr + ITE(vs) movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) movvc r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S b/contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S index 2b16b4905c4..0b5da2ba3e1 100644 --- a/contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S +++ b/contrib/compiler-rt/lib/builtins/arm/unordsf2vfp.S @@ -27,6 +27,7 @@ DEFINE_COMPILERRT_FUNCTION(__unordsf2vfp) vcmp.f32 s14, s15 #endif vmrs apsr_nzcv, fpscr + ITE(vs) movvs r0, #1 // set result register to 1 if "overflow" (any NaNs) movvc r0, #0 bx lr diff --git a/contrib/compiler-rt/lib/builtins/assembly.h b/contrib/compiler-rt/lib/builtins/assembly.h index 88271270a07..4893d483fd8 100644 --- a/contrib/compiler-rt/lib/builtins/assembly.h +++ b/contrib/compiler-rt/lib/builtins/assembly.h @@ -115,10 +115,12 @@ #if defined(USE_THUMB_2) #define IT(cond) it cond #define ITT(cond) itt cond +#define ITE(cond) ite cond #define WIDE(op) op.w #else #define IT(cond) #define ITT(cond) +#define ITE(cond) #define WIDE(op) op #endif #endif /* defined(__arm__) */ diff --git a/contrib/compiler-rt/lib/builtins/bswapdi2.c b/contrib/compiler-rt/lib/builtins/bswapdi2.c index 0af0b88a479..eb220007bb1 100644 --- a/contrib/compiler-rt/lib/builtins/bswapdi2.c +++ b/contrib/compiler-rt/lib/builtins/bswapdi2.c @@ -14,15 +14,14 @@ #include "int_lib.h" -COMPILER_RT_ABI uint64_t -__bswapdi2 (uint64_t u) -{ - return ((((u) & 0xff00000000000000ULL) >> 56) - | (((u) & 0x00ff000000000000ULL) >> 40) - | (((u) & 0x0000ff0000000000ULL) >> 24) - | (((u) & 0x000000ff00000000ULL) >> 8) - | (((u) & 0x00000000ff000000ULL) << 8) - | (((u) & 0x0000000000ff0000ULL) << 24) - | (((u) & 0x000000000000ff00ULL) << 40) - | (((u) & 0x00000000000000ffULL) << 56)); +COMPILER_RT_ABI uint64_t __bswapdi2(uint64_t u) { + return ( + (((u)&0xff00000000000000ULL) >> 56) | + (((u)&0x00ff000000000000ULL) >> 40) | + (((u)&0x0000ff0000000000ULL) >> 24) | + (((u)&0x000000ff00000000ULL) >> 8) | + (((u)&0x00000000ff000000ULL) << 8) | + (((u)&0x0000000000ff0000ULL) << 24) | + (((u)&0x000000000000ff00ULL) << 40) | + (((u)&0x00000000000000ffULL) << 56)); } diff --git a/contrib/compiler-rt/lib/builtins/bswapsi2.c b/contrib/compiler-rt/lib/builtins/bswapsi2.c index d30c65e55ce..5d941e69f7c 100644 --- a/contrib/compiler-rt/lib/builtins/bswapsi2.c +++ b/contrib/compiler-rt/lib/builtins/bswapsi2.c @@ -14,12 +14,10 @@ #include "int_lib.h" -COMPILER_RT_ABI uint32_t -__bswapsi2 (uint32_t u) -{ - - return ((((u) & 0xff000000) >> 24) - | (((u) & 0x00ff0000) >> 8) - | (((u) & 0x0000ff00) << 8) - | (((u) & 0x000000ff) << 24)); +COMPILER_RT_ABI uint32_t __bswapsi2(uint32_t u) { + return ( + (((u)&0xff000000) >> 24) | + (((u)&0x00ff0000) >> 8) | + (((u)&0x0000ff00) << 8) | + (((u)&0x000000ff) << 24)); } diff --git a/contrib/compiler-rt/lib/lsan/lsan_common.cc b/contrib/compiler-rt/lib/lsan/lsan_common.cc index a6b3453f5a0..a5ffc6835f5 100644 --- a/contrib/compiler-rt/lib/lsan/lsan_common.cc +++ b/contrib/compiler-rt/lib/lsan/lsan_common.cc @@ -265,19 +265,21 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads, } if (flags()->use_tls) { - LOG_THREADS("TLS at %p-%p.\n", tls_begin, tls_end); - if (cache_begin == cache_end) { - ScanRangeForPointers(tls_begin, tls_end, frontier, "TLS", kReachable); - } else { - // Because LSan should not be loaded with dlopen(), we can assume - // that allocator cache will be part of static TLS image. - CHECK_LE(tls_begin, cache_begin); - CHECK_GE(tls_end, cache_end); - if (tls_begin < cache_begin) - ScanRangeForPointers(tls_begin, cache_begin, frontier, "TLS", - kReachable); - if (tls_end > cache_end) - ScanRangeForPointers(cache_end, tls_end, frontier, "TLS", kReachable); + if (tls_begin) { + LOG_THREADS("TLS at %p-%p.\n", tls_begin, tls_end); + // If the tls and cache ranges don't overlap, scan full tls range, + // otherwise, only scan the non-overlapping portions + if (cache_begin == cache_end || tls_end < cache_begin || + tls_begin > cache_end) { + ScanRangeForPointers(tls_begin, tls_end, frontier, "TLS", kReachable); + } else { + if (tls_begin < cache_begin) + ScanRangeForPointers(tls_begin, cache_begin, frontier, "TLS", + kReachable); + if (tls_end > cache_end) + ScanRangeForPointers(cache_end, tls_end, frontier, "TLS", + kReachable); + } } if (dtls && !DTLSInDestruction(dtls)) { for (uptr j = 0; j < dtls->dtv_size; ++j) { diff --git a/contrib/compiler-rt/lib/lsan/lsan_common_mac.cc b/contrib/compiler-rt/lib/lsan/lsan_common_mac.cc index ae10955439c..114dd8c9019 100644 --- a/contrib/compiler-rt/lib/lsan/lsan_common_mac.cc +++ b/contrib/compiler-rt/lib/lsan/lsan_common_mac.cc @@ -91,12 +91,7 @@ LoadedModule *GetLinker() { return nullptr; } // Required on Linux for initialization of TLS behavior, but should not be // required on Darwin. -void InitializePlatformSpecificModules() { - if (flags()->use_tls) { - Report("use_tls=1 is not supported on Darwin.\n"); - Die(); - } -} +void InitializePlatformSpecificModules() {} // Scans global variables for heap pointers. void ProcessGlobalRegions(Frontier *frontier) { diff --git a/contrib/compiler-rt/lib/lsan/lsan_flags.inc b/contrib/compiler-rt/lib/lsan/lsan_flags.inc index 8135bdcff01..e390e2ae5a1 100644 --- a/contrib/compiler-rt/lib/lsan/lsan_flags.inc +++ b/contrib/compiler-rt/lib/lsan/lsan_flags.inc @@ -30,7 +30,7 @@ LSAN_FLAG(bool, use_globals, true, "Root set: include global variables (.data and .bss)") LSAN_FLAG(bool, use_stacks, true, "Root set: include thread stacks") LSAN_FLAG(bool, use_registers, true, "Root set: include thread registers") -LSAN_FLAG(bool, use_tls, !SANITIZER_MAC, +LSAN_FLAG(bool, use_tls, true, "Root set: include TLS and thread-specific storage") LSAN_FLAG(bool, use_root_regions, true, "Root set: include regions added via __lsan_register_root_region().") diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h index 0f6f4f7f850..e13510ba33b 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_allocator_primary32.h @@ -319,5 +319,3 @@ class SizeClassAllocator32 { ByteMap possible_regions; SizeClassInfo size_class_info_array[kNumClasses]; }; - - diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_common.h index 33e652e6c3d..a1c9c5a57a8 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_common.h +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_common.h @@ -380,7 +380,7 @@ void SetSoftRssLimitExceededCallback(void (*Callback)(bool exceeded)); // Functions related to signal handling. typedef void (*SignalHandlerType)(int, void *, void *); -bool IsHandledDeadlySignal(int signum); +HandleSignalMode GetHandleSignalMode(int signum); void InstallDeadlySignalHandlers(SignalHandlerType handler); const char *DescribeSignalOrException(int signo); // Alternative signal stack (POSIX-only). diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_flag_parser.h b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_flag_parser.h index b6ae307fc93..4988fbb7a86 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_flag_parser.h +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_flag_parser.h @@ -64,6 +64,11 @@ inline bool FlagHandler::Parse(const char *value) { *t_ = b ? kHandleSignalYes : kHandleSignalNo; return true; } + if (internal_strcmp(value, "2") == 0 || + internal_strcmp(value, "exclusive") == 0) { + *t_ = kHandleSignalExclusive; + return true; + } Printf("ERROR: Invalid value for signal handler option: '%s'\n", value); return false; } diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_flags.h b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_flags.h index f22593395ed..c2ec29d1656 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_flags.h +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_flags.h @@ -21,6 +21,7 @@ namespace __sanitizer { enum HandleSignalMode { kHandleSignalNo, kHandleSignalYes, + kHandleSignalExclusive, }; struct CommonFlags { diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc index c5aaf411fcc..12c126fa707 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_flags.inc @@ -75,12 +75,13 @@ COMMON_FLAG(bool, print_summary, true, "If false, disable printing error summaries in addition to error " "reports.") COMMON_FLAG(int, print_module_map, 0, - "OS X only. 0 = don't print, 1 = print only once before process " - "exits, 2 = print after each report.") + "OS X only (0 - don't print, 1 - print only once before process " + "exits, 2 - print after each report).") COMMON_FLAG(bool, check_printf, true, "Check printf arguments.") #define COMMON_FLAG_HANDLE_SIGNAL_HELP(signal) \ "Controls custom tool's " #signal " handler (0 - do not registers the " \ - "handler, 1 - register the handler). " + "handler, 1 - register the handler and allow user to set own, " \ + "2 - registers the handler and block user from changing it). " COMMON_FLAG(HandleSignalMode, handle_segv, kHandleSignalYes, COMMON_FLAG_HANDLE_SIGNAL_HELP(SIGSEGV)) COMMON_FLAG(HandleSignalMode, handle_sigbus, kHandleSignalYes, @@ -92,9 +93,6 @@ COMMON_FLAG(HandleSignalMode, handle_sigill, kHandleSignalNo, COMMON_FLAG(HandleSignalMode, handle_sigfpe, kHandleSignalYes, COMMON_FLAG_HANDLE_SIGNAL_HELP(SIGFPE)) #undef COMMON_FLAG_HANDLE_SIGNAL_HELP -COMMON_FLAG(bool, allow_user_segv_handler, false, - "If set, allows user to register a SEGV handler even if the tool " - "registers one.") COMMON_FLAG(bool, use_sigaltstack, true, "If set, uses alternate stack for signal handling.") COMMON_FLAG(bool, detect_deadlocks, false, diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc index b5712dd0dec..cec2f264cbc 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux.cc @@ -62,8 +62,6 @@ #if SANITIZER_FREEBSD #include #include -#include -#include #include extern "C" { // must be included after and on @@ -263,7 +261,7 @@ uptr internal_stat(const char *path, void *buf) { uptr internal_lstat(const char *path, void *buf) { #if SANITIZER_FREEBSD return internal_syscall(SYSCALL(fstatat), AT_FDCWD, (uptr)path, - (uptr)buf, AT_SYMLINK_NOFOLLOW); + (uptr)buf, AT_SYMLINK_NOFOLLOW); #elif SANITIZER_USES_CANONICAL_LINUX_SYSCALLS return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf, AT_SYMLINK_NOFOLLOW); @@ -551,7 +549,7 @@ void BlockingMutex::Lock() { void BlockingMutex::Unlock() { atomic_uint32_t *m = reinterpret_cast(&opaque_storage_); - u32 v = atomic_exchange(m, MtxUnlocked, memory_order_relaxed); + u32 v = atomic_exchange(m, MtxUnlocked, memory_order_release); CHECK_NE(v, MtxUnlocked); if (v == MtxSleeping) { #if SANITIZER_FREEBSD @@ -1398,7 +1396,7 @@ AndroidApiLevel AndroidGetApiLevel() { #endif -bool IsHandledDeadlySignal(int signum) { +HandleSignalMode GetHandleSignalMode(int signum) { switch (signum) { case SIGABRT: return common_flags()->handle_abort; @@ -1411,7 +1409,7 @@ bool IsHandledDeadlySignal(int signum) { case SIGBUS: return common_flags()->handle_sigbus; } - return false; + return kHandleSignalNo; } #if !SANITIZER_GO diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux_s390.cc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux_s390.cc index c2b03b27e66..a6da82ecb1d 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux_s390.cc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_linux_s390.cc @@ -178,6 +178,13 @@ static bool FixedCVE_2016_2143() { // 4.4.6+ is OK. if (minor == 4 && patch >= 6) return true; + if (minor == 4 && patch == 0 && ptr[0] == '-' && + internal_strstr(buf.version, "Ubuntu")) { + // Check Ubuntu 16.04 + int r1 = internal_simple_strtoll(ptr+1, &ptr, 10); + if (r1 >= 13) // 4.4.0-13 or later + return true; + } // Otherwise, OK if 4.5+. return minor >= 5; } else { diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc index f1a6bf91635..f1b9e5fda5c 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_mac.cc @@ -370,6 +370,27 @@ uptr GetTlsSize() { void InitTlsSize() { } +uptr TlsBaseAddr() { + uptr segbase = 0; +#if defined(__x86_64__) + asm("movq %%gs:0,%0" : "=r"(segbase)); +#elif defined(__i386__) + asm("movl %%gs:0,%0" : "=r"(segbase)); +#endif + return segbase; +} + +// The size of the tls on darwin does not appear to be well documented, +// however the vm memory map suggests that it is 1024 uptrs in size, +// with a size of 0x2000 bytes on x86_64 and 0x1000 bytes on i386. +uptr TlsSize() { +#if defined(__x86_64__) || defined(__i386__) + return 1024 * sizeof(uptr); +#else + return 0; +#endif +} + void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size, uptr *tls_addr, uptr *tls_size) { #if !SANITIZER_GO @@ -377,8 +398,8 @@ void GetThreadStackAndTls(bool main, uptr *stk_addr, uptr *stk_size, GetThreadStackTopAndBottom(main, &stack_top, &stack_bottom); *stk_addr = stack_bottom; *stk_size = stack_top - stack_bottom; - *tls_addr = 0; - *tls_size = 0; + *tls_addr = TlsBaseAddr(); + *tls_size = TlsSize(); #else *stk_addr = 0; *stk_size = 0; @@ -393,10 +414,10 @@ void ListOfModules::init() { memory_mapping.DumpListOfModules(&modules_); } -bool IsHandledDeadlySignal(int signum) { +HandleSignalMode GetHandleSignalMode(int signum) { // Handling fatal signals on watchOS and tvOS devices is disallowed. if ((SANITIZER_WATCHOS || SANITIZER_TVOS) && !(SANITIZER_IOSSIM)) - return false; + return kHandleSignalNo; switch (signum) { case SIGABRT: return common_flags()->handle_abort; @@ -409,7 +430,7 @@ bool IsHandledDeadlySignal(int signum) { case SIGBUS: return common_flags()->handle_sigbus; } - return false; + return kHandleSignalNo; } MacosVersion cached_macos_version = MACOS_VERSION_UNINITIALIZED; diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cc index 8d688f3778b..791ff4481ca 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_posix_libcdep.cc @@ -134,7 +134,8 @@ void SleepForMillis(int millis) { void Abort() { #if !SANITIZER_GO // If we are handling SIGABRT, unhandle it first. - if (IsHandledDeadlySignal(SIGABRT)) { + // TODO(vitalybuka): Check if handler belongs to sanitizer. + if (GetHandleSignalMode(SIGABRT) != kHandleSignalNo) { struct sigaction sigact; internal_memset(&sigact, 0, sizeof(sigact)); sigact.sa_sigaction = (sa_sigaction_t)SIG_DFL; @@ -188,8 +189,26 @@ void UnsetAlternateSignalStack() { static void MaybeInstallSigaction(int signum, SignalHandlerType handler) { - if (!IsHandledDeadlySignal(signum)) - return; + switch (GetHandleSignalMode(signum)) { + case kHandleSignalNo: + return; + case kHandleSignalYes: { + struct sigaction sigact; + internal_memset(&sigact, 0, sizeof(sigact)); + CHECK_EQ(0, internal_sigaction(signum, nullptr, &sigact)); + if (sigact.sa_flags & SA_SIGINFO) { + if (sigact.sa_sigaction) return; + } else { + if (sigact.sa_handler != SIG_DFL && sigact.sa_handler != SIG_IGN && + sigact.sa_handler != SIG_ERR) + return; + } + break; + } + case kHandleSignalExclusive: + break; + } + struct sigaction sigact; internal_memset(&sigact, 0, sizeof(sigact)); sigact.sa_sigaction = (sa_sigaction_t)handler; diff --git a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_win.cc b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_win.cc index d0a5c078cd6..c912e8fa2aa 100644 --- a/contrib/compiler-rt/lib/sanitizer_common/sanitizer_win.cc +++ b/contrib/compiler-rt/lib/sanitizer_common/sanitizer_win.cc @@ -832,9 +832,9 @@ void InstallDeadlySignalHandlers(SignalHandlerType handler) { // FIXME: Decide what to do on Windows. } -bool IsHandledDeadlySignal(int signum) { +HandleSignalMode GetHandleSignalMode(int signum) { // FIXME: Decide what to do on Windows. - return false; + return kHandleSignalNo; } // Check based on flags if we should handle this exception. diff --git a/contrib/compiler-rt/lib/scudo/scudo_tls_linux.cpp b/contrib/compiler-rt/lib/scudo/scudo_tls_linux.cpp index 5a9cc998bcc..1e38233f339 100644 --- a/contrib/compiler-rt/lib/scudo/scudo_tls_linux.cpp +++ b/contrib/compiler-rt/lib/scudo/scudo_tls_linux.cpp @@ -18,7 +18,6 @@ #include "scudo_tls.h" -#include #include namespace __scudo { @@ -32,15 +31,17 @@ __attribute__((tls_model("initial-exec"))) THREADLOCAL ScudoThreadContext ThreadLocalContext; static void teardownThread(void *Ptr) { - uptr Iteration = reinterpret_cast(Ptr); + uptr I = reinterpret_cast(Ptr); // The glibc POSIX thread-local-storage deallocation routine calls user // provided destructors in a loop of PTHREAD_DESTRUCTOR_ITERATIONS. // We want to be called last since other destructors might call free and the // like, so we wait until PTHREAD_DESTRUCTOR_ITERATIONS before draining the // quarantine and swallowing the cache. - if (Iteration < PTHREAD_DESTRUCTOR_ITERATIONS) { - pthread_setspecific(PThreadKey, reinterpret_cast(Iteration + 1)); - return; + if (I > 1) { + // If pthread_setspecific fails, we will go ahead with the teardown. + if (LIKELY(pthread_setspecific(PThreadKey, + reinterpret_cast(I - 1)) == 0)) + return; } ThreadLocalContext.commitBack(); ScudoThreadState = ThreadTornDown; @@ -53,8 +54,9 @@ static void initOnce() { } void initThread() { - pthread_once(&GlobalInitialized, initOnce); - pthread_setspecific(PThreadKey, reinterpret_cast(1)); + CHECK_EQ(pthread_once(&GlobalInitialized, initOnce), 0); + CHECK_EQ(pthread_setspecific(PThreadKey, reinterpret_cast( + GetPthreadDestructorIterations())), 0); ThreadLocalContext.init(); ScudoThreadState = ThreadInitialized; } diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_platform.h b/contrib/compiler-rt/lib/tsan/rtl/tsan_platform.h index 1dd9d91d4c9..60d9b9d8c45 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_platform.h +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_platform.h @@ -816,6 +816,7 @@ void FlushShadowMemory(); void WriteMemoryProfile(char *buf, uptr buf_size, uptr nthread, uptr nlive); int ExtractResolvFDs(void *state, int *fds, int nfd); int ExtractRecvmsgFDs(void *msg, int *fds, int nfd); +void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size); int call_pthread_cancel_with_cleanup(int(*fn)(void *c, void *m, void *abstime), void *c, void *m, void *abstime, diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc b/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc index 2d488cadd4f..d05c0e701e7 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_linux.cc @@ -320,6 +320,20 @@ int ExtractRecvmsgFDs(void *msgp, int *fds, int nfd) { return res; } +void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) { + // Check that the thr object is in tls; + const uptr thr_beg = (uptr)thr; + const uptr thr_end = (uptr)thr + sizeof(*thr); + CHECK_GE(thr_beg, tls_addr); + CHECK_LE(thr_beg, tls_addr + tls_size); + CHECK_GE(thr_end, tls_addr); + CHECK_LE(thr_end, tls_addr + tls_size); + // Since the thr object is huge, skip it. + MemoryRangeImitateWrite(thr, /*pc=*/2, tls_addr, thr_beg - tls_addr); + MemoryRangeImitateWrite(thr, /*pc=*/2, thr_end, + tls_addr + tls_size - thr_end); +} + // Note: this function runs with async signals enabled, // so it must not touch any tsan state. int call_pthread_cancel_with_cleanup(int(*fn)(void *c, void *m, diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cc b/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cc index b8d3d5528bb..a82bcd01bbf 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cc +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_platform_mac.cc @@ -75,12 +75,18 @@ static void *SignalSafeGetOrAllocate(uptr *dst, uptr size) { static uptr main_thread_identity = 0; ALIGNED(64) static char main_thread_state[sizeof(ThreadState)]; +ThreadState **cur_thread_location() { + ThreadState **thread_identity = (ThreadState **)pthread_self(); + return ((uptr)thread_identity == main_thread_identity) ? nullptr + : thread_identity; +} + ThreadState *cur_thread() { - uptr thread_identity = (uptr)pthread_self(); - if (thread_identity == main_thread_identity || main_thread_identity == 0) { + ThreadState **thr_state_loc = cur_thread_location(); + if (thr_state_loc == nullptr || main_thread_identity == 0) { return (ThreadState *)&main_thread_state; } - ThreadState **fake_tls = (ThreadState **)MemToShadow(thread_identity); + ThreadState **fake_tls = (ThreadState **)MemToShadow((uptr)thr_state_loc); ThreadState *thr = (ThreadState *)SignalSafeGetOrAllocate( (uptr *)fake_tls, sizeof(ThreadState)); return thr; @@ -90,13 +96,13 @@ ThreadState *cur_thread() { // munmap first and then clear `fake_tls`; if we receive a signal in between, // handler will try to access the unmapped ThreadState. void cur_thread_finalize() { - uptr thread_identity = (uptr)pthread_self(); - if (thread_identity == main_thread_identity) { + ThreadState **thr_state_loc = cur_thread_location(); + if (thr_state_loc == nullptr) { // Calling dispatch_main() or xpc_main() actually invokes pthread_exit to // exit the main thread. Let's keep the main thread's ThreadState. return; } - ThreadState **fake_tls = (ThreadState **)MemToShadow(thread_identity); + ThreadState **fake_tls = (ThreadState **)MemToShadow((uptr)thr_state_loc); internal_munmap(*fake_tls, sizeof(ThreadState)); *fake_tls = nullptr; } @@ -239,6 +245,29 @@ void InitializePlatform() { #endif } +#if !SANITIZER_GO +void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) { + // The pointer to the ThreadState object is stored in the shadow memory + // of the tls. + uptr tls_end = tls_addr + tls_size; + ThreadState **thr_state_loc = cur_thread_location(); + if (thr_state_loc == nullptr) { + MemoryRangeImitateWrite(thr, /*pc=*/2, tls_addr, tls_size); + } else { + uptr thr_state_start = (uptr)thr_state_loc; + uptr thr_state_end = thr_state_start + sizeof(uptr); + CHECK_GE(thr_state_start, tls_addr); + CHECK_LE(thr_state_start, tls_addr + tls_size); + CHECK_GE(thr_state_end, tls_addr); + CHECK_LE(thr_state_end, tls_addr + tls_size); + MemoryRangeImitateWrite(thr, /*pc=*/2, tls_addr, + thr_state_start - tls_addr); + MemoryRangeImitateWrite(thr, /*pc=*/2, thr_state_end, + tls_end - thr_state_end); + } +} +#endif + #if !SANITIZER_GO // Note: this function runs with async signals enabled, // so it must not touch any tsan state. diff --git a/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cc b/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cc index 6a0943c4958..edb60980c76 100644 --- a/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cc +++ b/contrib/compiler-rt/lib/tsan/rtl/tsan_rtl_thread.cc @@ -248,19 +248,7 @@ void ThreadStart(ThreadState *thr, int tid, tid_t os_id, bool workerthread) { if (stk_addr && stk_size) MemoryRangeImitateWrite(thr, /*pc=*/ 1, stk_addr, stk_size); - if (tls_addr && tls_size) { - // Check that the thr object is in tls; - const uptr thr_beg = (uptr)thr; - const uptr thr_end = (uptr)thr + sizeof(*thr); - CHECK_GE(thr_beg, tls_addr); - CHECK_LE(thr_beg, tls_addr + tls_size); - CHECK_GE(thr_end, tls_addr); - CHECK_LE(thr_end, tls_addr + tls_size); - // Since the thr object is huge, skip it. - MemoryRangeImitateWrite(thr, /*pc=*/ 2, tls_addr, thr_beg - tls_addr); - MemoryRangeImitateWrite(thr, /*pc=*/ 2, - thr_end, tls_addr + tls_size - thr_end); - } + if (tls_addr && tls_size) ImitateTlsWrite(thr, tls_addr, tls_size); } #endif diff --git a/contrib/libc++/include/__config b/contrib/libc++/include/__config index 4ad700e234c..32e542bd1fd 100644 --- a/contrib/libc++/include/__config +++ b/contrib/libc++/include/__config @@ -1126,6 +1126,10 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( # define _LIBCPP_HAS_NO_IS_AGGREGATE #endif +#if !defined(__cpp_coroutines) || __cpp_coroutines < 201703L +# define _LIBCPP_HAS_NO_COROUTINES +#endif + #endif // __cplusplus // Decide whether to use availability macros. diff --git a/contrib/libc++/include/__threading_support b/contrib/libc++/include/__threading_support index 385fff32b35..afd7cb50649 100644 --- a/contrib/libc++/include/__threading_support +++ b/contrib/libc++/include/__threading_support @@ -27,7 +27,7 @@ # include # include #elif defined(_LIBCPP_HAS_THREAD_API_WIN32) -#include +#include #include #include #include <__undef_min_max> diff --git a/contrib/libc++/include/algorithm b/contrib/libc++/include/algorithm index 08ca23ff616..7ea4474a1bb 100644 --- a/contrib/libc++/include/algorithm +++ b/contrib/libc++/include/algorithm @@ -35,6 +35,9 @@ template Function for_each(InputIterator first, InputIterator last, Function f); +template + InputIterator for_each_n(InputIterator first, Size n, Function f); // C++17 + template InputIterator find(InputIterator first, InputIterator last, const T& value); @@ -961,6 +964,26 @@ for_each(_InputIterator __first, _InputIterator __last, _Function __f) return __f; } +#if _LIBCPP_STD_VER > 14 +// for_each_n + +template +inline _LIBCPP_INLINE_VISIBILITY +_InputIterator +for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) +{ + typedef decltype(__convert_to_integral(__orig_n)) _IntegralSize; + _IntegralSize __n = __orig_n; + while (__n > 0) + { + __f(*__first); + ++__first; + --__n; + } + return __first; +} +#endif + // find template diff --git a/contrib/libc++/include/experimental/__config b/contrib/libc++/include/experimental/__config index 9a7bbe85d8d..37f88c166dd 100644 --- a/contrib/libc++/include/experimental/__config +++ b/contrib/libc++/include/experimental/__config @@ -44,6 +44,13 @@ #define _LIBCPP_END_NAMESPACE_EXPERIMENTAL_FILESYSTEM \ } } _LIBCPP_END_NAMESPACE_EXPERIMENTAL +#define _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_COROUTINES \ + _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL inline namespace coroutines_v1 { + +#define _LIBCPP_END_NAMESPACE_EXPERIMENTAL_COROUTINES \ + } _LIBCPP_END_NAMESPACE_EXPERIMENTAL + +#define _VSTD_CORO _VSTD_EXPERIMENTAL::coroutines_v1 #define _VSTD_FS ::std::experimental::filesystem::v1 diff --git a/contrib/libc++/include/experimental/coroutine b/contrib/libc++/include/experimental/coroutine new file mode 100644 index 00000000000..d2a03ae8a8c --- /dev/null +++ b/contrib/libc++/include/experimental/coroutine @@ -0,0 +1,270 @@ +// -*- C++ -*- +//===----------------------------- coroutine -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP_EXPERIMENTAL_COROUTINE +#define _LIBCPP_EXPERIMENTAL_COROUTINE + +/** + experimental/coroutine synopsis + +// C++next + +namespace std { +namespace experimental { +inline namespace coroutines_v1 { + + // 18.11.1 coroutine traits +template +class coroutine_traits; +// 18.11.2 coroutine handle +template +class coroutine_handle; +// 18.11.2.7 comparison operators: +bool operator==(coroutine_handle<> x, coroutine_handle<> y) _NOEXCEPT; +bool operator!=(coroutine_handle<> x, coroutine_handle<> y) _NOEXCEPT; +bool operator<(coroutine_handle<> x, coroutine_handle<> y) _NOEXCEPT; +bool operator<=(coroutine_handle<> x, coroutine_handle<> y) _NOEXCEPT; +bool operator>=(coroutine_handle<> x, coroutine_handle<> y) _NOEXCEPT; +bool operator>(coroutine_handle<> x, coroutine_handle<> y) _NOEXCEPT; +// 18.11.3 trivial awaitables +struct suspend_never; +struct suspend_always; +// 18.11.2.8 hash support: +template struct hash; +template struct hash>; + +} // namespace coroutines_v1 +} // namespace experimental +} // namespace std + + */ + +#include +#include +#include +#include +#include // for hash +#include +#include +#include <__debug> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +#ifdef _LIBCPP_HAS_NO_COROUTINES +# if defined(_LIBCPP_WARNING) + _LIBCPP_WARNING(" cannot be used with this compiler") +# else +# warning cannot be used with this compiler +# endif +#endif + +#ifndef _LIBCPP_HAS_NO_COROUTINES + +_LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL_COROUTINES + +template +struct __coroutine_traits_sfinae {}; + +template +struct __coroutine_traits_sfinae< + _Tp, typename __void_t::type> +{ + using promise_type = typename _Tp::promise_type; +}; + +template +struct _LIBCPP_TEMPLATE_VIS coroutine_traits + : public __coroutine_traits_sfinae<_Ret> +{ +}; + +template +class _LIBCPP_TEMPLATE_VIS coroutine_handle; + +template <> +class _LIBCPP_TEMPLATE_VIS coroutine_handle { +public: + _LIBCPP_ALWAYS_INLINE + _LIBCPP_CONSTEXPR coroutine_handle() _NOEXCEPT : __handle_(nullptr) {} + + _LIBCPP_ALWAYS_INLINE + _LIBCPP_CONSTEXPR coroutine_handle(nullptr_t) _NOEXCEPT : __handle_(nullptr) {} + + _LIBCPP_ALWAYS_INLINE + coroutine_handle& operator=(nullptr_t) _NOEXCEPT { + __handle_ = nullptr; + return *this; + } + + _LIBCPP_ALWAYS_INLINE + _LIBCPP_CONSTEXPR void* address() const _NOEXCEPT { return __handle_; } + + _LIBCPP_ALWAYS_INLINE + _LIBCPP_CONSTEXPR explicit operator bool() const _NOEXCEPT { return __handle_; } + + _LIBCPP_ALWAYS_INLINE + void operator()() { resume(); } + + _LIBCPP_ALWAYS_INLINE + void resume() { + _LIBCPP_ASSERT(__is_suspended(), + "resume() can only be called on suspended coroutines"); + _LIBCPP_ASSERT(!done(), + "resume() has undefined behavior when the coroutine is done"); + __builtin_coro_resume(__handle_); + } + + _LIBCPP_ALWAYS_INLINE + void destroy() { + _LIBCPP_ASSERT(__is_suspended(), + "destroy() can only be called on suspended coroutines"); + __builtin_coro_destroy(__handle_); + } + + _LIBCPP_ALWAYS_INLINE + bool done() const { + _LIBCPP_ASSERT(__is_suspended(), + "done() can only be called on suspended coroutines"); + return __builtin_coro_done(__handle_); + } + +public: + _LIBCPP_ALWAYS_INLINE + static coroutine_handle from_address(void* __addr) _NOEXCEPT { + coroutine_handle __tmp; + __tmp.__handle_ = __addr; + return __tmp; + } + +private: + bool __is_suspended() const _NOEXCEPT { + // FIXME actually implement a check for if the coro is suspended. + return __handle_; + } + + template friend class coroutine_handle; + void* __handle_; +}; + +// 18.11.2.7 comparison operators: +inline _LIBCPP_ALWAYS_INLINE +bool operator==(coroutine_handle<> __x, coroutine_handle<> __y) _NOEXCEPT { + return __x.address() == __y.address(); +} +inline _LIBCPP_ALWAYS_INLINE +bool operator!=(coroutine_handle<> __x, coroutine_handle<> __y) _NOEXCEPT { + return !(__x == __y); +} +inline _LIBCPP_ALWAYS_INLINE +bool operator<(coroutine_handle<> __x, coroutine_handle<> __y) _NOEXCEPT { + return less()(__x.address(), __y.address()); +} +inline _LIBCPP_ALWAYS_INLINE +bool operator>(coroutine_handle<> __x, coroutine_handle<> __y) _NOEXCEPT { + return __y < __x; +} +inline _LIBCPP_ALWAYS_INLINE +bool operator<=(coroutine_handle<> __x, coroutine_handle<> __y) _NOEXCEPT { + return !(__x > __y); +} +inline _LIBCPP_ALWAYS_INLINE +bool operator>=(coroutine_handle<> __x, coroutine_handle<> __y) _NOEXCEPT { + return !(__x < __y); +} + +template +class _LIBCPP_TEMPLATE_VIS coroutine_handle : public coroutine_handle<> { + using _Base = coroutine_handle<>; +public: +#ifndef _LIBCPP_CXX03_LANG + // 18.11.2.1 construct/reset + using coroutine_handle<>::coroutine_handle; +#else + _LIBCPP_ALWAYS_INLINE coroutine_handle() _NOEXCEPT : _Base() {} + _LIBCPP_ALWAYS_INLINE coroutine_handle(nullptr_t) _NOEXCEPT : _Base(nullptr) {} +#endif + _LIBCPP_INLINE_VISIBILITY + coroutine_handle& operator=(nullptr_t) _NOEXCEPT { + _Base::operator=(nullptr); + return *this; + } + + _LIBCPP_INLINE_VISIBILITY + _Promise& promise() const { + return *reinterpret_cast<_Promise*>( + __builtin_coro_promise(this->__handle_, __alignof(_Promise), false)); + } + +public: + _LIBCPP_ALWAYS_INLINE + static coroutine_handle from_address(void* __addr) _NOEXCEPT { + coroutine_handle __tmp; + __tmp.__handle_ = __addr; + return __tmp; + } + + // NOTE: this overload isn't required by the standard but is needed so + // the deleted _Promise* overload doesn't make from_address(nullptr) + // ambiguous. + // FIXME: should from_address work with nullptr? + _LIBCPP_ALWAYS_INLINE + static coroutine_handle from_address(nullptr_t) _NOEXCEPT { + return {}; + } + + // from_address cannot be used with the coroutines promise type. + static coroutine_handle from_address(_Promise*) = delete; + + _LIBCPP_ALWAYS_INLINE + static coroutine_handle from_promise(_Promise& __promise) _NOEXCEPT { + coroutine_handle __tmp; + __tmp.__handle_ = __builtin_coro_promise(_VSTD::addressof(__promise), + __alignof(_Promise), true); + return __tmp; + } +}; + +struct _LIBCPP_TYPE_VIS suspend_never { + _LIBCPP_ALWAYS_INLINE + bool await_ready() const _NOEXCEPT { return true; } + _LIBCPP_ALWAYS_INLINE + void await_suspend(coroutine_handle<>) const _NOEXCEPT {} + _LIBCPP_ALWAYS_INLINE + void await_resume() const _NOEXCEPT {} +}; + +struct _LIBCPP_TYPE_VIS suspend_always { + _LIBCPP_ALWAYS_INLINE + bool await_ready() const _NOEXCEPT { return false; } + _LIBCPP_ALWAYS_INLINE + void await_suspend(coroutine_handle<>) const _NOEXCEPT {} + _LIBCPP_ALWAYS_INLINE + void await_resume() const _NOEXCEPT {} +}; + +_LIBCPP_END_NAMESPACE_EXPERIMENTAL_COROUTINES + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +struct hash<_VSTD_CORO::coroutine_handle<_Tp> > { + using __arg_type = _VSTD_CORO::coroutine_handle<_Tp>; + _LIBCPP_INLINE_VISIBILITY + size_t operator()(__arg_type const& __v) const _NOEXCEPT + {return hash()(__v.address());} +}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // !defined(_LIBCPP_HAS_NO_COROUTINES) + +#endif /* _LIBCPP_EXPERIMENTAL_COROUTINE */ diff --git a/contrib/libc++/include/iterator b/contrib/libc++/include/iterator index 4aa44746dc9..d163ab1b091 100644 --- a/contrib/libc++/include/iterator +++ b/contrib/libc++/include/iterator @@ -990,7 +990,6 @@ public: _LIBCPP_INLINE_VISIBILITY char_type operator*() const {return static_cast(__sbuf_->sgetc());} - _LIBCPP_INLINE_VISIBILITY char_type* operator->() const {return nullptr;} _LIBCPP_INLINE_VISIBILITY istreambuf_iterator& operator++() { __sbuf_->sbumpc(); diff --git a/contrib/libc++/include/memory b/contrib/libc++/include/memory index 4201c92dd72..31dc73d8b14 100644 --- a/contrib/libc++/include/memory +++ b/contrib/libc++/include/memory @@ -2251,6 +2251,8 @@ void swap(__compressed_pair<_T1, _T2>& __x, __compressed_pair<_T1, _T2>& __y) template struct _LIBCPP_TEMPLATE_VIS default_delete { + static_assert(!is_function<_Tp>::value, + "default_delete cannot be instantiated for function types"); #ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY constexpr default_delete() noexcept = default; #else @@ -3653,6 +3655,18 @@ __shared_ptr_emplace<_Tp, _Alloc>::__on_zero_shared_weak() _NOEXCEPT __a.deallocate(_PTraits::pointer_to(*this), 1); } +struct __shared_ptr_dummy_rebind_allocator_type; +template <> +class _LIBCPP_TEMPLATE_VIS allocator<__shared_ptr_dummy_rebind_allocator_type> +{ +public: + template + struct rebind + { + typedef allocator<_Other> other; + }; +}; + template class _LIBCPP_TEMPLATE_VIS enable_shared_from_this; template @@ -3921,6 +3935,17 @@ public: #endif // _LIBCPP_HAS_NO_VARIADICS private: + template ::value> + struct __shared_ptr_default_allocator + { + typedef allocator<_Yp> type; + }; + + template + struct __shared_ptr_default_allocator<_Yp, true> + { + typedef allocator<__shared_ptr_dummy_rebind_allocator_type> type; + }; template _LIBCPP_INLINE_VISIBILITY @@ -3939,8 +3964,7 @@ private: } } - _LIBCPP_INLINE_VISIBILITY - void __enable_weak_this(const volatile void*, const volatile void*) _NOEXCEPT {} + _LIBCPP_INLINE_VISIBILITY void __enable_weak_this(...) _NOEXCEPT {} template friend class _LIBCPP_TEMPLATE_VIS shared_ptr; template friend class _LIBCPP_TEMPLATE_VIS weak_ptr; @@ -3972,8 +3996,9 @@ shared_ptr<_Tp>::shared_ptr(_Yp* __p, : __ptr_(__p) { unique_ptr<_Yp> __hold(__p); - typedef __shared_ptr_pointer<_Yp*, default_delete<_Yp>, allocator<_Yp> > _CntrlBlk; - __cntrl_ = new _CntrlBlk(__p, default_delete<_Yp>(), allocator<_Yp>()); + typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; + typedef __shared_ptr_pointer<_Yp*, default_delete<_Yp>, _AllocT > _CntrlBlk; + __cntrl_ = new _CntrlBlk(__p, default_delete<_Yp>(), _AllocT()); __hold.release(); __enable_weak_this(__p, __p); } @@ -3988,8 +4013,9 @@ shared_ptr<_Tp>::shared_ptr(_Yp* __p, _Dp __d, try { #endif // _LIBCPP_NO_EXCEPTIONS - typedef __shared_ptr_pointer<_Yp*, _Dp, allocator<_Yp> > _CntrlBlk; - __cntrl_ = new _CntrlBlk(__p, __d, allocator<_Yp>()); + typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; + typedef __shared_ptr_pointer<_Yp*, _Dp, _AllocT > _CntrlBlk; + __cntrl_ = new _CntrlBlk(__p, __d, _AllocT()); __enable_weak_this(__p, __p); #ifndef _LIBCPP_NO_EXCEPTIONS } @@ -4010,8 +4036,9 @@ shared_ptr<_Tp>::shared_ptr(nullptr_t __p, _Dp __d) try { #endif // _LIBCPP_NO_EXCEPTIONS - typedef __shared_ptr_pointer > _CntrlBlk; - __cntrl_ = new _CntrlBlk(__p, __d, allocator<_Tp>()); + typedef typename __shared_ptr_default_allocator<_Tp>::type _AllocT; + typedef __shared_ptr_pointer _CntrlBlk; + __cntrl_ = new _CntrlBlk(__p, __d, _AllocT()); #ifndef _LIBCPP_NO_EXCEPTIONS } catch (...) @@ -4179,8 +4206,9 @@ shared_ptr<_Tp>::shared_ptr(unique_ptr<_Yp, _Dp> __r, else #endif { - typedef __shared_ptr_pointer<_Yp*, _Dp, allocator<_Yp> > _CntrlBlk; - __cntrl_ = new _CntrlBlk(__r.get(), __r.get_deleter(), allocator<_Yp>()); + typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; + typedef __shared_ptr_pointer<_Yp*, _Dp, _AllocT > _CntrlBlk; + __cntrl_ = new _CntrlBlk(__r.get(), __r.get_deleter(), _AllocT()); __enable_weak_this(__r.get(), __r.get()); } __r.release(); @@ -4208,10 +4236,11 @@ shared_ptr<_Tp>::shared_ptr(unique_ptr<_Yp, _Dp> __r, else #endif { + typedef typename __shared_ptr_default_allocator<_Yp>::type _AllocT; typedef __shared_ptr_pointer<_Yp*, reference_wrapper::type>, - allocator<_Yp> > _CntrlBlk; - __cntrl_ = new _CntrlBlk(__r.get(), ref(__r.get_deleter()), allocator<_Yp>()); + _AllocT > _CntrlBlk; + __cntrl_ = new _CntrlBlk(__r.get(), ref(__r.get_deleter()), _AllocT()); __enable_weak_this(__r.get(), __r.get()); } __r.release(); diff --git a/contrib/libc++/include/module.modulemap b/contrib/libc++/include/module.modulemap index c354cae1e5f..462d4234ae6 100644 --- a/contrib/libc++/include/module.modulemap +++ b/contrib/libc++/include/module.modulemap @@ -500,6 +500,10 @@ module std [system] { module chrono { header "experimental/chrono" export * + } + module coroutine { + header "experimental/coroutine" + export * } module deque { header "experimental/deque" diff --git a/contrib/llvm/include/llvm/ADT/Triple.h b/contrib/llvm/include/llvm/ADT/Triple.h index 3a4a37017d6..07626982d28 100644 --- a/contrib/llvm/include/llvm/ADT/Triple.h +++ b/contrib/llvm/include/llvm/ADT/Triple.h @@ -59,6 +59,7 @@ public: mips64, // MIPS64: mips64 mips64el, // MIPS64EL: mips64el msp430, // MSP430: msp430 + nios2, // NIOSII: nios2 ppc, // PPC: powerpc ppc64, // PPC64: powerpc64, ppu ppc64le, // PPC64LE: powerpc64le diff --git a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h index bf73e099a2b..ca48b548351 100644 --- a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -70,174 +70,173 @@ struct SimplifyQuery { Copy.CxtI = I; return Copy; } - }; +}; - // NOTE: the explicit multiple argument versions of these functions are - // deprecated. - // Please use the SimplifyQuery versions in new code. +// NOTE: the explicit multiple argument versions of these functions are +// deprecated. +// Please use the SimplifyQuery versions in new code. - /// Given operands for an Add, fold the result or return null. - Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, +/// Given operands for an Add, fold the result or return null. +Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, const SimplifyQuery &Q); - /// Given operands for a Sub, fold the result or return null. - Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, - const SimplifyQuery &Q); +/// Given operands for a Sub, fold the result or return null. +Value *SimplifySubInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, + const SimplifyQuery &Q); - /// Given operands for an FAdd, fold the result or return null. - Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, +/// Given operands for an FAdd, fold the result or return null. +Value *SimplifyFAddInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for an FSub, fold the result or return null. +Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for an FMul, fold the result or return null. +Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for a Mul, fold the result or return null. +Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an SDiv, fold the result or return null. +Value *SimplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for a UDiv, fold the result or return null. +Value *SimplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an FDiv, fold the result or return null. +Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for an SRem, fold the result or return null. +Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for a URem, fold the result or return null. +Value *SimplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an FRem, fold the result or return null. +Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, + const SimplifyQuery &Q); + +/// Given operands for a Shl, fold the result or return null. +Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const SimplifyQuery &Q); + +/// Given operands for a LShr, fold the result or return null. +Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q); + +/// Given operands for a AShr, fold the result or return nulll. +Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, + const SimplifyQuery &Q); + +/// Given operands for an And, fold the result or return null. +Value *SimplifyAndInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an Or, fold the result or return null. +Value *SimplifyOrInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an Xor, fold the result or return null. +Value *SimplifyXorInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); + +/// Given operands for an ICmpInst, fold the result or return null. +Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, + const SimplifyQuery &Q); + +/// Given operands for an FCmpInst, fold the result or return null. +Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q); + +/// Given operands for a SelectInst, fold the result or return null. +Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, const SimplifyQuery &Q); - /// Given operands for an FSub, fold the result or return null. - Value *SimplifyFSubInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); +/// Given operands for a GetElementPtrInst, fold the result or return null. +Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, + const SimplifyQuery &Q); - /// Given operands for an FMul, fold the result or return null. - Value *SimplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); +/// Given operands for an InsertValueInst, fold the result or return null. +Value *SimplifyInsertValueInst(Value *Agg, Value *Val, ArrayRef Idxs, + const SimplifyQuery &Q); - /// Given operands for a Mul, fold the result or return null. - Value *SimplifyMulInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); +/// Given operands for an ExtractValueInst, fold the result or return null. +Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, + const SimplifyQuery &Q); - /// Given operands for an SDiv, fold the result or return null. - Value *SimplifySDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - - /// Given operands for a UDiv, fold the result or return null. - Value *SimplifyUDivInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - - /// Given operands for an FDiv, fold the result or return null. - Value *SimplifyFDivInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); - - /// Given operands for an SRem, fold the result or return null. - Value *SimplifySRemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - - /// Given operands for a URem, fold the result or return null. - Value *SimplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - - /// Given operands for an FRem, fold the result or return null. - Value *SimplifyFRemInst(Value *LHS, Value *RHS, FastMathFlags FMF, - const SimplifyQuery &Q); - - /// Given operands for a Shl, fold the result or return null. - Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, - const SimplifyQuery &Q); - - /// Given operands for a LShr, fold the result or return null. - Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, - const SimplifyQuery &Q); - - /// Given operands for a AShr, fold the result or return nulll. - Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, - const SimplifyQuery &Q); - - /// Given operands for an And, fold the result or return null. - Value *SimplifyAndInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - - /// Given operands for an Or, fold the result or return null. - Value *SimplifyOrInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - - /// Given operands for an Xor, fold the result or return null. - Value *SimplifyXorInst(Value *LHS, Value *RHS, const SimplifyQuery &Q); - - /// Given operands for an ICmpInst, fold the result or return null. - Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const SimplifyQuery &Q); - - /// Given operands for an FCmpInst, fold the result or return null. - Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - FastMathFlags FMF, const SimplifyQuery &Q); - - /// Given operands for a SelectInst, fold the result or return null. - Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal, - const SimplifyQuery &Q); - - /// Given operands for a GetElementPtrInst, fold the result or return null. - Value *SimplifyGEPInst(Type *SrcTy, ArrayRef Ops, - const SimplifyQuery &Q); - - /// Given operands for an InsertValueInst, fold the result or return null. - Value *SimplifyInsertValueInst(Value *Agg, Value *Val, - ArrayRef Idxs, - const SimplifyQuery &Q); - - /// Given operands for an ExtractValueInst, fold the result or return null. - Value *SimplifyExtractValueInst(Value *Agg, ArrayRef Idxs, +/// Given operands for an ExtractElementInst, fold the result or return null. +Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const SimplifyQuery &Q); - /// Given operands for an ExtractElementInst, fold the result or return null. - Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, - const SimplifyQuery &Q); +/// Given operands for a CastInst, fold the result or return null. +Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, + const SimplifyQuery &Q); - /// Given operands for a CastInst, fold the result or return null. - Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, - const SimplifyQuery &Q); +/// Given operands for a ShuffleVectorInst, fold the result or return null. +Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, + Type *RetTy, const SimplifyQuery &Q); - /// Given operands for a ShuffleVectorInst, fold the result or return null. - Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, - Type *RetTy, const SimplifyQuery &Q); +//=== Helper functions for higher up the class hierarchy. - //=== Helper functions for higher up the class hierarchy. - - - /// Given operands for a CmpInst, fold the result or return null. - Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, - const SimplifyQuery &Q); - - /// Given operands for a BinaryOperator, fold the result or return null. - Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, +/// Given operands for a CmpInst, fold the result or return null. +Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, const SimplifyQuery &Q); - /// Given operands for an FP BinaryOperator, fold the result or return null. - /// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the - /// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. - Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, - FastMathFlags FMF, const SimplifyQuery &Q); +/// Given operands for a BinaryOperator, fold the result or return null. +Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, + const SimplifyQuery &Q); - /// Given a function and iterators over arguments, fold the result or return - /// null. - Value *SimplifyCall(Value *V, User::op_iterator ArgBegin, - User::op_iterator ArgEnd, const SimplifyQuery &Q); +/// Given operands for an FP BinaryOperator, fold the result or return null. +/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the +/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp. +Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS, + FastMathFlags FMF, const SimplifyQuery &Q); - /// Given a function and set of arguments, fold the result or return null. - Value *SimplifyCall(Value *V, ArrayRef Args, const SimplifyQuery &Q); +/// Given a function and iterators over arguments, fold the result or return +/// null. +Value *SimplifyCall(Value *V, User::op_iterator ArgBegin, + User::op_iterator ArgEnd, const SimplifyQuery &Q); - /// See if we can compute a simplified version of this instruction. If not, - /// return null. - Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, - OptimizationRemarkEmitter *ORE = nullptr); +/// Given a function and set of arguments, fold the result or return null. +Value *SimplifyCall(Value *V, ArrayRef Args, const SimplifyQuery &Q); - /// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively. - /// - /// This first performs a normal RAUW of I with SimpleV. It then recursively - /// attempts to simplify those users updated by the operation. The 'I' - /// instruction must not be equal to the simplified value 'SimpleV'. - /// - /// The function returns true if any simplifications were performed. - bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr); +/// See if we can compute a simplified version of this instruction. If not, +/// return null. +Value *SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, + OptimizationRemarkEmitter *ORE = nullptr); - /// Recursively attempt to simplify an instruction. - /// - /// This routine uses SimplifyInstruction to simplify 'I', and if successful - /// replaces uses of 'I' with the simplified value. It then recurses on each - /// of the users impacted. It returns true if any simplifications were - /// performed. - bool recursivelySimplifyInstruction(Instruction *I, - const TargetLibraryInfo *TLI = nullptr, - const DominatorTree *DT = nullptr, - AssumptionCache *AC = nullptr); - // These helper functions return a SimplifyQuery structure that contains as - // many of the optional analysis we use as are currently valid. This is the - // strongly preferred way of constructing SimplifyQuery in passes. - const SimplifyQuery getBestSimplifyQuery(Pass &, Function &); - template - const SimplifyQuery getBestSimplifyQuery(AnalysisManager &, - Function &); - const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &, - const DataLayout &); +/// Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively. +/// +/// This first performs a normal RAUW of I with SimpleV. It then recursively +/// attempts to simplify those users updated by the operation. The 'I' +/// instruction must not be equal to the simplified value 'SimpleV'. +/// +/// The function returns true if any simplifications were performed. +bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr); + +/// Recursively attempt to simplify an instruction. +/// +/// This routine uses SimplifyInstruction to simplify 'I', and if successful +/// replaces uses of 'I' with the simplified value. It then recurses on each +/// of the users impacted. It returns true if any simplifications were +/// performed. +bool recursivelySimplifyInstruction(Instruction *I, + const TargetLibraryInfo *TLI = nullptr, + const DominatorTree *DT = nullptr, + AssumptionCache *AC = nullptr); + +// These helper functions return a SimplifyQuery structure that contains as +// many of the optional analysis we use as are currently valid. This is the +// strongly preferred way of constructing SimplifyQuery in passes. +const SimplifyQuery getBestSimplifyQuery(Pass &, Function &); +template +const SimplifyQuery getBestSimplifyQuery(AnalysisManager &, + Function &); +const SimplifyQuery getBestSimplifyQuery(LoopStandardAnalysisResults &, + const DataLayout &); } // end namespace llvm #endif diff --git a/contrib/llvm/include/llvm/Analysis/LoopPass.h b/contrib/llvm/include/llvm/Analysis/LoopPass.h index 496ae189e57..75e7688bbdc 100644 --- a/contrib/llvm/include/llvm/Analysis/LoopPass.h +++ b/contrib/llvm/include/llvm/Analysis/LoopPass.h @@ -126,9 +126,8 @@ public: } public: - // Add a new loop into the loop queue as a child of the given parent, or at - // the top level if \c ParentLoop is null. - Loop &addLoop(Loop *ParentLoop); + // Add a new loop into the loop queue. + void addLoop(Loop &L); //===--------------------------------------------------------------------===// /// SimpleAnalysis - Provides simple interface to update analysis info diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h index ac54bd4cfff..4a6fc245c22 100644 --- a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1533,6 +1533,12 @@ public: /// specified loop. bool isLoopInvariant(const SCEV *S, const Loop *L); + /// Determine if the SCEV can be evaluated at loop's entry. It is true if it + /// doesn't depend on a SCEVUnknown of an instruction which is dominated by + /// the header of loop L. + bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L, DominatorTree &DT, + LoopInfo &LI); + /// Return true if the given SCEV changes value in a known way in the /// specified loop. This property being true implies that the value is /// variant in the loop AND that we can emit an expression to compute the diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h index 0a0af384c3e..6cbe3a1f515 100644 --- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -396,6 +396,9 @@ public: bool isLegalMaskedScatter(Type *DataType) const; bool isLegalMaskedGather(Type *DataType) const; + /// Return true if target doesn't mind addresses in vectors. + bool prefersVectorizedAddressing() const; + /// \brief Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store /// of the specified type. @@ -807,6 +810,7 @@ public: virtual bool isLegalMaskedLoad(Type *DataType) = 0; virtual bool isLegalMaskedScatter(Type *DataType) = 0; virtual bool isLegalMaskedGather(Type *DataType) = 0; + virtual bool prefersVectorizedAddressing() = 0; virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) = 0; @@ -1000,6 +1004,9 @@ public: bool isLegalMaskedGather(Type *DataType) override { return Impl.isLegalMaskedGather(DataType); } + bool prefersVectorizedAddressing() override { + return Impl.prefersVectorizedAddressing(); + } int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) override { diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 550e84ad90c..ad1a7cb748f 100644 --- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -237,6 +237,8 @@ public: bool isLegalMaskedGather(Type *DataType) { return false; } + bool prefersVectorizedAddressing() { return true; } + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { // Guess that all legal addressing mode are free. diff --git a/contrib/llvm/include/llvm/Analysis/ValueTracking.h b/contrib/llvm/include/llvm/Analysis/ValueTracking.h index cf24062e46f..b1ee76159c4 100644 --- a/contrib/llvm/include/llvm/Analysis/ValueTracking.h +++ b/contrib/llvm/include/llvm/Analysis/ValueTracking.h @@ -60,7 +60,8 @@ template class ArrayRef; KnownBits computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, - const DominatorTree *DT = nullptr); + const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr); /// Compute known bits from the range metadata. /// \p KnownZero the set of bits that are known to be zero /// \p KnownOne the set of bits that are known to be one diff --git a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h index 180c0b57924..c898667f147 100644 --- a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -34,6 +34,7 @@ namespace llvm { class AsmPrinterHandler; +class BasicBlock; class BlockAddress; class Constant; class ConstantArray; @@ -43,6 +44,7 @@ class DIEAbbrev; class DwarfDebug; class GCMetadataPrinter; class GlobalIndirectSymbol; +class GlobalObject; class GlobalValue; class GlobalVariable; class GCStrategy; @@ -65,6 +67,8 @@ class MCSubtargetInfo; class MCSymbol; class MCTargetOptions; class MDNode; +class Module; +class raw_ostream; class TargetLoweringObjectFile; class TargetMachine; @@ -109,7 +113,7 @@ public: /// Map global GOT equivalent MCSymbols to GlobalVariables and keep track of /// its number of uses by other globals. - typedef std::pair GOTEquivUsePair; + using GOTEquivUsePair = std::pair; MapVector GlobalGOTEquivs; /// Enable print [latency:throughput] in output diff --git a/contrib/llvm/include/llvm/CodeGen/AtomicExpandUtils.h b/contrib/llvm/include/llvm/CodeGen/AtomicExpandUtils.h index ac18eac8a1c..1f9c96b18e1 100644 --- a/contrib/llvm/include/llvm/CodeGen/AtomicExpandUtils.h +++ b/contrib/llvm/include/llvm/CodeGen/AtomicExpandUtils.h @@ -1,4 +1,4 @@ -//===-- AtomicExpandUtils.h - Utilities for expanding atomic instructions -===// +//===- AtomicExpandUtils.h - Utilities for expanding atomic instructions --===// // // The LLVM Compiler Infrastructure // @@ -7,19 +7,24 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_CODEGEN_ATOMICEXPANDUTILS_H +#define LLVM_CODEGEN_ATOMICEXPANDUTILS_H + #include "llvm/ADT/STLExtras.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/Support/AtomicOrdering.h" namespace llvm { -class Value; -class AtomicRMWInst; +class AtomicRMWInst; +class Value; /// Parameters (see the expansion example below): /// (the builder, %addr, %loaded, %new_val, ordering, /// /* OUT */ %success, /* OUT */ %new_loaded) -typedef function_ref &, Value *, Value *, Value *, - AtomicOrdering, Value *&, Value *&)> CreateCmpXchgInstFun; +using CreateCmpXchgInstFun = + function_ref &, Value *, Value *, Value *, AtomicOrdering, + Value *&, Value *&)>; /// \brief Expand an atomic RMW instruction into a loop utilizing /// cmpxchg. You'll want to make sure your target machine likes cmpxchg @@ -42,7 +47,8 @@ typedef function_ref &, Value *, Value *, Value *, /// loop: /// %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] /// %new = some_op iN %loaded, %incr -/// ; This is what -atomic-expand will produce using this function on i686 targets: +/// ; This is what -atomic-expand will produce using this function on i686 +/// targets: /// %pair = cmpxchg iN* %addr, iN %loaded, iN %new_val /// %new_loaded = extractvalue { iN, i1 } %pair, 0 /// %success = extractvalue { iN, i1 } %pair, 1 @@ -52,6 +58,8 @@ typedef function_ref &, Value *, Value *, Value *, /// [...] /// /// Returns true if the containing function was modified. -bool -expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory); -} +bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun Factory); + +} // end namespace llvm + +#endif // LLVM_CODEGEN_ATOMICEXPANDUTILS_H diff --git a/contrib/llvm/include/llvm/CodeGen/DIE.h b/contrib/llvm/include/llvm/CodeGen/DIE.h index a40147336fe..4be44e62fa9 100644 --- a/contrib/llvm/include/llvm/CodeGen/DIE.h +++ b/contrib/llvm/include/llvm/CodeGen/DIE.h @@ -1,4 +1,4 @@ -//===--- lib/CodeGen/DIE.h - DWARF Info Entries -----------------*- C++ -*-===// +//===- lib/CodeGen/DIE.h - DWARF Info Entries -------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -31,6 +31,7 @@ #include #include #include +#include #include namespace llvm { @@ -53,11 +54,11 @@ class DIEAbbrevData { dwarf::Form Form; /// Dwarf attribute value for DW_FORM_implicit_const - int64_t Value; + int64_t Value = 0; public: DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) - : Attribute(A), Form(F), Value(0) {} + : Attribute(A), Form(F) {} DIEAbbrevData(dwarf::Attribute A, int64_t V) : Attribute(A), Form(dwarf::DW_FORM_implicit_const), Value(V) {} @@ -136,13 +137,14 @@ class DIEAbbrevSet { /// storage container. BumpPtrAllocator &Alloc; /// \brief FoldingSet that uniques the abbreviations. - llvm::FoldingSet AbbreviationsSet; + FoldingSet AbbreviationsSet; /// A list of all the unique abbreviations in use. std::vector Abbreviations; public: DIEAbbrevSet(BumpPtrAllocator &A) : Alloc(A) {} ~DIEAbbrevSet(); + /// Generate the abbreviation declaration for a DIE and return a pointer to /// the generated abbreviation. /// @@ -289,13 +291,11 @@ public: /// A pointer to another debug information entry. An instance of this class can /// also be used as a proxy for a debug information entry not yet defined /// (ie. types.) -class DIE; class DIEEntry { DIE *Entry; - DIEEntry() = delete; - public: + DIEEntry() = delete; explicit DIEEntry(DIE &E) : Entry(&E) {} DIE &getEntry() const { return *Entry; } @@ -348,10 +348,10 @@ private: /// /// All values that aren't standard layout (or are larger than 8 bytes) /// should be stored by reference instead of by value. - typedef AlignedCharArrayUnion - ValTy; + using ValTy = AlignedCharArrayUnion; + static_assert(sizeof(ValTy) <= sizeof(uint64_t) || sizeof(ValTy) <= sizeof(void *), "Expected all large types to be stored via pointer"); @@ -486,10 +486,12 @@ struct IntrusiveBackListNode { }; struct IntrusiveBackListBase { - typedef IntrusiveBackListNode Node; + using Node = IntrusiveBackListNode; + Node *Last = nullptr; bool empty() const { return !Last; } + void push_back(Node &N) { assert(N.Next.getPointer() == &N && "Expected unlinked node"); assert(N.Next.getInt() == true && "Expected unlinked node"); @@ -505,6 +507,7 @@ struct IntrusiveBackListBase { template class IntrusiveBackList : IntrusiveBackListBase { public: using IntrusiveBackListBase::empty; + void push_back(T &N) { IntrusiveBackListBase::push_back(N); } T &back() { return *static_cast(Last); } const T &back() const { return *static_cast(Last); } @@ -513,6 +516,7 @@ public: class iterator : public iterator_facade_base { friend class const_iterator; + Node *N = nullptr; public: @@ -585,10 +589,12 @@ public: class DIEValueList { struct Node : IntrusiveBackListNode { DIEValue V; + explicit Node(DIEValue V) : V(V) {} }; - typedef IntrusiveBackList ListTy; + using ListTy = IntrusiveBackList; + ListTy List; public: @@ -597,9 +603,10 @@ public: : public iterator_adaptor_base { friend class const_value_iterator; - typedef iterator_adaptor_base iterator_adaptor; + + using iterator_adaptor = + iterator_adaptor_base; public: value_iterator() = default; @@ -612,9 +619,9 @@ public: class const_value_iterator : public iterator_adaptor_base< const_value_iterator, ListTy::const_iterator, std::forward_iterator_tag, const DIEValue> { - typedef iterator_adaptor_base iterator_adaptor; + using iterator_adaptor = + iterator_adaptor_base; public: const_value_iterator() = default; @@ -627,8 +634,8 @@ public: const DIEValue &operator*() const { return wrapped()->V; } }; - typedef iterator_range value_range; - typedef iterator_range const_value_range; + using value_range = iterator_range; + using const_value_range = iterator_range; value_iterator addValue(BumpPtrAllocator &Alloc, const DIEValue &V) { List.push_back(*new (Alloc) Node(V)); @@ -657,15 +664,15 @@ class DIE : IntrusiveBackListNode, public DIEValueList { friend class DIEUnit; /// Dwarf unit relative offset. - unsigned Offset; + unsigned Offset = 0; /// Size of instance + children. - unsigned Size; + unsigned Size = 0; unsigned AbbrevNumber = ~0u; /// Dwarf tag code. dwarf::Tag Tag = (dwarf::Tag)0; /// Set to true to force a DIE to emit an abbreviation that says it has /// children even when it doesn't. This is used for unit testing purposes. - bool ForceChildren; + bool ForceChildren = false; /// Children DIEs. IntrusiveBackList Children; @@ -673,20 +680,19 @@ class DIE : IntrusiveBackListNode, public DIEValueList { /// DIEUnit which contains this DIE as its unit DIE. PointerUnion Owner; - DIE() = delete; - explicit DIE(dwarf::Tag Tag) : Offset(0), Size(0), Tag(Tag), - ForceChildren(false) {} + explicit DIE(dwarf::Tag Tag) : Tag(Tag) {} public: + DIE() = delete; + DIE(const DIE &RHS) = delete; + DIE(DIE &&RHS) = delete; + DIE &operator=(const DIE &RHS) = delete; + DIE &operator=(const DIE &&RHS) = delete; + static DIE *get(BumpPtrAllocator &Alloc, dwarf::Tag Tag) { return new (Alloc) DIE(Tag); } - DIE(const DIE &RHS) = delete; - DIE(DIE &&RHS) = delete; - void operator=(const DIE &RHS) = delete; - void operator=(const DIE &&RHS) = delete; - // Accessors. unsigned getAbbrevNumber() const { return AbbrevNumber; } dwarf::Tag getTag() const { return Tag; } @@ -696,10 +702,10 @@ public: bool hasChildren() const { return ForceChildren || !Children.empty(); } void setForceChildren(bool B) { ForceChildren = B; } - typedef IntrusiveBackList::iterator child_iterator; - typedef IntrusiveBackList::const_iterator const_child_iterator; - typedef iterator_range child_range; - typedef iterator_range const_child_range; + using child_iterator = IntrusiveBackList::iterator; + using const_child_iterator = IntrusiveBackList::const_iterator; + using child_range = iterator_range; + using const_child_range = iterator_range; child_range children() { return make_range(Children.begin(), Children.end()); @@ -838,10 +844,10 @@ struct BasicDIEUnit final : DIEUnit { /// DIELoc - Represents an expression location. // class DIELoc : public DIEValueList { - mutable unsigned Size; // Size in bytes excluding size header. + mutable unsigned Size = 0; // Size in bytes excluding size header. public: - DIELoc() : Size(0) {} + DIELoc() = default; /// ComputeSize - Calculate the size of the location expression. /// @@ -872,10 +878,10 @@ public: /// DIEBlock - Represents a block of values. // class DIEBlock : public DIEValueList { - mutable unsigned Size; // Size in bytes excluding size header. + mutable unsigned Size = 0; // Size in bytes excluding size header. public: - DIEBlock() : Size(0) {} + DIEBlock() = default; /// ComputeSize - Calculate the size of the location expression. /// diff --git a/contrib/llvm/include/llvm/CodeGen/FaultMaps.h b/contrib/llvm/include/llvm/CodeGen/FaultMaps.h index 0f0005b83c5..98ff526dfe9 100644 --- a/contrib/llvm/include/llvm/CodeGen/FaultMaps.h +++ b/contrib/llvm/include/llvm/CodeGen/FaultMaps.h @@ -56,7 +56,7 @@ private: HandlerOffsetExpr(HandlerOffset) {} }; - typedef std::vector FunctionFaultInfos; + using FunctionFaultInfos = std::vector; // We'd like to keep a stable iteration order for FunctionInfos to help // FileCheck based testing. @@ -78,20 +78,17 @@ private: /// generated by the version of LLVM that includes it. No guarantees are made /// with respect to forward or backward compatibility. class FaultMapParser { - typedef uint8_t FaultMapVersionType; - static const size_t FaultMapVersionOffset = 0; + using FaultMapVersionType = uint8_t; + using Reserved0Type = uint8_t; + using Reserved1Type = uint16_t; + using NumFunctionsType = uint32_t; - typedef uint8_t Reserved0Type; + static const size_t FaultMapVersionOffset = 0; static const size_t Reserved0Offset = FaultMapVersionOffset + sizeof(FaultMapVersionType); - - typedef uint16_t Reserved1Type; static const size_t Reserved1Offset = Reserved0Offset + sizeof(Reserved0Type); - - typedef uint32_t NumFunctionsType; static const size_t NumFunctionsOffset = Reserved1Offset + sizeof(Reserved1Type); - static const size_t FunctionInfosOffset = NumFunctionsOffset + sizeof(NumFunctionsType); @@ -105,14 +102,13 @@ class FaultMapParser { public: class FunctionFaultInfoAccessor { - typedef uint32_t FaultKindType; - static const size_t FaultKindOffset = 0; + using FaultKindType = uint32_t; + using FaultingPCOffsetType = uint32_t; + using HandlerPCOffsetType = uint32_t; - typedef uint32_t FaultingPCOffsetType; + static const size_t FaultKindOffset = 0; static const size_t FaultingPCOffsetOffset = FaultKindOffset + sizeof(FaultKindType); - - typedef uint32_t HandlerPCOffsetType; static const size_t HandlerPCOffsetOffset = FaultingPCOffsetOffset + sizeof(FaultingPCOffsetType); @@ -140,20 +136,17 @@ public: }; class FunctionInfoAccessor { - typedef uint64_t FunctionAddrType; - static const size_t FunctionAddrOffset = 0; + using FunctionAddrType = uint64_t; + using NumFaultingPCsType = uint32_t; + using ReservedType = uint32_t; - typedef uint32_t NumFaultingPCsType; + static const size_t FunctionAddrOffset = 0; static const size_t NumFaultingPCsOffset = FunctionAddrOffset + sizeof(FunctionAddrType); - - typedef uint32_t ReservedType; static const size_t ReservedOffset = NumFaultingPCsOffset + sizeof(NumFaultingPCsType); - static const size_t FunctionFaultInfosOffset = ReservedOffset + sizeof(ReservedType); - static const size_t FunctionInfoHeaderSize = FunctionFaultInfosOffset; const uint8_t *P = nullptr; diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h new file mode 100644 index 00000000000..0a46eb9e784 --- /dev/null +++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h @@ -0,0 +1,78 @@ +//== llvm/CodeGen/GlobalISel/Localizer.h - Localizer -------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file This file describes the interface of the Localizer pass. +/// This pass moves/duplicates constant-like instructions close to their uses. +/// Its primarily goal is to workaround the deficiencies of the fast register +/// allocator. +/// With GlobalISel constants are all materialized in the entry block of +/// a function. However, the fast allocator cannot rematerialize constants and +/// has a lot more live-ranges to deal with and will most likely end up +/// spilling a lot. +/// By pushing the constants close to their use, we only create small +/// live-ranges. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H +#define LLVM_CODEGEN_GLOBALISEL_LOCALIZER_H + +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +namespace llvm { +// Forward declarations. +class MachineRegisterInfo; + +/// This pass implements the localization mechanism described at the +/// top of this file. One specificity of the implementation is that +/// it will materialize one and only one instance of a constant per +/// basic block, thus enabling reuse of that constant within that block. +/// Moreover, it only materializes constants in blocks where they +/// are used. PHI uses are considered happening at the end of the +/// related predecessor. +class Localizer : public MachineFunctionPass { +public: + static char ID; + +private: + /// MRI contains all the register class/bank information that this + /// pass uses and updates. + MachineRegisterInfo *MRI; + + /// Check whether or not \p MI needs to be moved close to its uses. + static bool shouldLocalize(const MachineInstr &MI); + + /// Check if \p MOUse is used in the same basic block as \p Def. + /// If the use is in the same block, we say it is local. + /// When the use is not local, \p InsertMBB will contain the basic + /// block when to insert \p Def to have a local use. + static bool isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, + MachineBasicBlock *&InsertMBB); + + /// Initialize the field members using \p MF. + void init(MachineFunction &MF); + +public: + Localizer(); + + StringRef getPassName() const override { return "Localizer"; } + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties() + .set(MachineFunctionProperties::Property::IsSSA) + .set(MachineFunctionProperties::Property::Legalized) + .set(MachineFunctionProperties::Property::RegBankSelected); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +} // End namespace llvm. + +#endif diff --git a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h index f2a9a9f73ca..2300a106c35 100644 --- a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -264,6 +264,14 @@ namespace ISD { /// optimized. STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM, + /// Constrained versions of libm-equivalent floating point intrinsics. + /// These will be lowered to the equivalent non-constrained pseudo-op + /// (or expanded to the equivalent library call) before final selection. + /// They are used to limit optimizations while the DAG is being optimized. + STRICT_FSQRT, STRICT_FPOW, STRICT_FPOWI, STRICT_FSIN, STRICT_FCOS, + STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2, + STRICT_FRINT, STRICT_FNEARBYINT, + /// FMA - Perform a * b + c with no intermediate rounding step. FMA, diff --git a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h index b792cba4b78..40cd146f88f 100644 --- a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h +++ b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/LiveInterval.h - Interval representation ---*- C++ -*-===// +//===- llvm/CodeGen/LiveInterval.h - Interval representation ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -21,22 +21,30 @@ #ifndef LLVM_CODEGEN_LIVEINTERVAL_H #define LLVM_CODEGEN_LIVEINTERVAL_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntEqClasses.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/Allocator.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/MathExtras.h" +#include #include -#include +#include +#include +#include #include +#include +#include namespace llvm { + class CoalescerPair; class LiveIntervals; - class MachineInstr; class MachineRegisterInfo; - class TargetRegisterInfo; class raw_ostream; - template class SmallPtrSet; /// VNInfo - Value Number Information. /// This class holds information about a machine level values, including @@ -44,7 +52,7 @@ namespace llvm { /// class VNInfo { public: - typedef BumpPtrAllocator Allocator; + using Allocator = BumpPtrAllocator; /// The ID number of this value. unsigned id; @@ -53,14 +61,10 @@ namespace llvm { SlotIndex def; /// VNInfo constructor. - VNInfo(unsigned i, SlotIndex d) - : id(i), def(d) - { } + VNInfo(unsigned i, SlotIndex d) : id(i), def(d) {} /// VNInfo constructor, copies values from orig, except for the value number. - VNInfo(unsigned i, const VNInfo &orig) - : id(i), def(orig.def) - { } + VNInfo(unsigned i, const VNInfo &orig) : id(i), def(orig.def) {} /// Copy from the parameter into this VNInfo. void copyFrom(VNInfo &src) { @@ -152,16 +156,16 @@ namespace llvm { /// segment with a new value number is used. class LiveRange { public: - /// This represents a simple continuous liveness interval for a value. /// The start point is inclusive, the end point exclusive. These intervals /// are rendered as [start,end). struct Segment { SlotIndex start; // Start point of the interval (inclusive) SlotIndex end; // End point of the interval (exclusive) - VNInfo *valno; // identifier for the value contained in this segment. + VNInfo *valno = nullptr; // identifier for the value contained in this + // segment. - Segment() : valno(nullptr) {} + Segment() = default; Segment(SlotIndex S, SlotIndex E, VNInfo *V) : start(S), end(E), valno(V) { @@ -189,8 +193,8 @@ namespace llvm { void dump() const; }; - typedef SmallVector Segments; - typedef SmallVector VNInfoList; + using Segments = SmallVector; + using VNInfoList = SmallVector; Segments segments; // the liveness segments VNInfoList valnos; // value#'s @@ -198,22 +202,24 @@ namespace llvm { // The segment set is used temporarily to accelerate initial computation // of live ranges of physical registers in computeRegUnitRange. // After that the set is flushed to the segment vector and deleted. - typedef std::set SegmentSet; + using SegmentSet = std::set; std::unique_ptr segmentSet; - typedef Segments::iterator iterator; + using iterator = Segments::iterator; + using const_iterator = Segments::const_iterator; + iterator begin() { return segments.begin(); } iterator end() { return segments.end(); } - typedef Segments::const_iterator const_iterator; const_iterator begin() const { return segments.begin(); } const_iterator end() const { return segments.end(); } - typedef VNInfoList::iterator vni_iterator; + using vni_iterator = VNInfoList::iterator; + using const_vni_iterator = VNInfoList::const_iterator; + vni_iterator vni_begin() { return valnos.begin(); } vni_iterator vni_end() { return valnos.end(); } - typedef VNInfoList::const_iterator const_vni_iterator; const_vni_iterator vni_begin() const { return valnos.begin(); } const_vni_iterator vni_end() const { return valnos.end(); } @@ -631,40 +637,37 @@ namespace llvm { /// or stack slot. class LiveInterval : public LiveRange { public: - typedef LiveRange super; + using super = LiveRange; /// A live range for subregisters. The LaneMask specifies which parts of the /// super register are covered by the interval. /// (@sa TargetRegisterInfo::getSubRegIndexLaneMask()). class SubRange : public LiveRange { public: - SubRange *Next; + SubRange *Next = nullptr; LaneBitmask LaneMask; /// Constructs a new SubRange object. - SubRange(LaneBitmask LaneMask) - : Next(nullptr), LaneMask(LaneMask) { - } + SubRange(LaneBitmask LaneMask) : LaneMask(LaneMask) {} /// Constructs a new SubRange object by copying liveness from @p Other. SubRange(LaneBitmask LaneMask, const LiveRange &Other, BumpPtrAllocator &Allocator) - : LiveRange(Other, Allocator), Next(nullptr), LaneMask(LaneMask) { - } + : LiveRange(Other, Allocator), LaneMask(LaneMask) {} void print(raw_ostream &OS) const; void dump() const; }; private: - SubRange *SubRanges; ///< Single linked list of subregister live ranges. + SubRange *SubRanges = nullptr; ///< Single linked list of subregister live + /// ranges. public: const unsigned reg; // the register or stack slot of this interval. float weight; // weight of this interval - LiveInterval(unsigned Reg, float Weight) - : SubRanges(nullptr), reg(Reg), weight(Weight) {} + LiveInterval(unsigned Reg, float Weight) : reg(Reg), weight(Weight) {} ~LiveInterval() { clearSubRanges(); @@ -673,8 +676,10 @@ namespace llvm { template class SingleLinkedListIterator { T *P; + public: SingleLinkedListIterator(T *P) : P(P) {} + SingleLinkedListIterator &operator++() { P = P->Next; return *this; @@ -698,7 +703,9 @@ namespace llvm { } }; - typedef SingleLinkedListIterator subrange_iterator; + using subrange_iterator = SingleLinkedListIterator; + using const_subrange_iterator = SingleLinkedListIterator; + subrange_iterator subrange_begin() { return subrange_iterator(SubRanges); } @@ -706,7 +713,6 @@ namespace llvm { return subrange_iterator(nullptr); } - typedef SingleLinkedListIterator const_subrange_iterator; const_subrange_iterator subrange_begin() const { return const_subrange_iterator(SubRanges); } @@ -759,12 +765,12 @@ namespace llvm { /// isSpillable - Can this interval be spilled? bool isSpillable() const { - return weight != llvm::huge_valf; + return weight != huge_valf; } /// markNotSpillable - Mark interval as not spillable void markNotSpillable() { - weight = llvm::huge_valf; + weight = huge_valf; } /// For a given lane mask @p LaneMask, compute indexes at which the @@ -931,5 +937,7 @@ namespace llvm { void Distribute(LiveInterval &LI, LiveInterval *LIV[], MachineRegisterInfo &MRI); }; -} -#endif + +} // end namespace llvm + +#endif // LLVM_CODEGEN_LIVEINTERVAL_H diff --git a/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h b/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h index 181cb375de8..820e8836248 100644 --- a/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -1,4 +1,4 @@ -//===-- LiveIntervalAnalysis.h - Live Interval Analysis ---------*- C++ -*-===// +//===- LiveIntervalAnalysis.h - Live Interval Analysis ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -20,6 +20,7 @@ #ifndef LLVM_CODEGEN_LIVEINTERVALANALYSIS_H #define LLVM_CODEGEN_LIVEINTERVALANALYSIS_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -27,27 +28,29 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/Support/Allocator.h" +#include "llvm/MC/LaneBitmask.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetRegisterInfo.h" -#include +#include +#include +#include namespace llvm { extern cl::opt UseSegmentSetForPhysRegs; - class BitVector; - class BlockFrequency; - class LiveRangeCalc; - class LiveVariables; - class MachineDominatorTree; - class MachineLoopInfo; - class TargetRegisterInfo; - class MachineRegisterInfo; - class TargetInstrInfo; - class TargetRegisterClass; - class VirtRegMap; - class MachineBlockFrequencyInfo; +class BitVector; +class LiveRangeCalc; +class MachineBlockFrequencyInfo; +class MachineDominatorTree; +class MachineFunction; +class MachineInstr; +class MachineRegisterInfo; +class raw_ostream; +class TargetInstrInfo; +class VirtRegMap; class LiveIntervals : public MachineFunctionPass { MachineFunction* MF; @@ -56,8 +59,8 @@ extern cl::opt UseSegmentSetForPhysRegs; const TargetInstrInfo* TII; AliasAnalysis *AA; SlotIndexes* Indexes; - MachineDominatorTree *DomTree; - LiveRangeCalc *LRCalc; + MachineDominatorTree *DomTree = nullptr; + LiveRangeCalc *LRCalc = nullptr; /// Special pool allocator for VNInfo's (LiveInterval val#). VNInfo::Allocator VNInfoAllocator; @@ -95,6 +98,7 @@ extern cl::opt UseSegmentSetForPhysRegs; public: static char ID; + LiveIntervals(); ~LiveIntervals() override; @@ -466,6 +470,7 @@ extern cl::opt UseSegmentSetForPhysRegs; class HMEditor; }; -} // End llvm namespace -#endif +} // end namespace llvm + +#endif // LLVM_CODEGEN_LIVEINTERVALANALYSIS_H diff --git a/contrib/llvm/include/llvm/CodeGen/LiveIntervalUnion.h b/contrib/llvm/include/llvm/CodeGen/LiveIntervalUnion.h index 57e3deb038a..b922e543c85 100644 --- a/contrib/llvm/include/llvm/CodeGen/LiveIntervalUnion.h +++ b/contrib/llvm/include/llvm/CodeGen/LiveIntervalUnion.h @@ -26,12 +26,14 @@ namespace llvm { +class raw_ostream; class TargetRegisterInfo; #ifndef NDEBUG // forward declaration template class SparseBitVector; -typedef SparseBitVector<128> LiveVirtRegBitSet; + +using LiveVirtRegBitSet = SparseBitVector<128>; #endif /// Union of live intervals that are strong candidates for coalescing into a @@ -42,19 +44,19 @@ class LiveIntervalUnion { // A set of live virtual register segments that supports fast insertion, // intersection, and removal. // Mapping SlotIndex intervals to virtual register numbers. - typedef IntervalMap LiveSegments; + using LiveSegments = IntervalMap; public: // SegmentIter can advance to the next segment ordered by starting position // which may belong to a different live virtual register. We also must be able // to reach the current segment's containing virtual register. - typedef LiveSegments::iterator SegmentIter; + using SegmentIter = LiveSegments::iterator; /// Const version of SegmentIter. - typedef LiveSegments::const_iterator ConstSegmentIter; + using ConstSegmentIter = LiveSegments::const_iterator; // LiveIntervalUnions share an external allocator. - typedef LiveSegments::Allocator Allocator; + using Allocator = LiveSegments::Allocator; private: unsigned Tag = 0; // unique tag for current contents. @@ -76,7 +78,7 @@ public: SlotIndex startIndex() const { return Segments.start(); } // Provide public access to the underlying map to allow overlap iteration. - typedef LiveSegments Map; + using Map = LiveSegments; const Map &getMap() const { return Segments; } /// getTag - Return an opaque tag representing the current state of the union. diff --git a/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h b/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h index 9e04c467fad..f9c741dd75b 100644 --- a/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h +++ b/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h @@ -7,23 +7,24 @@ // //===----------------------------------------------------------------------===// // -// This file implements the LivePhysRegs utility for tracking liveness of -// physical registers. This can be used for ad-hoc liveness tracking after -// register allocation. You can start with the live-ins/live-outs at the -// beginning/end of a block and update the information while walking the -// instructions inside the block. This implementation tracks the liveness on a -// sub-register granularity. -// -// We assume that the high bits of a physical super-register are not preserved -// unless the instruction has an implicit-use operand reading the super- -// register. -// -// X86 Example: -// %YMM0 = ... -// %XMM0 = ... (Kills %XMM0, all %XMM0s sub-registers, and %YMM0) -// -// %YMM0 = ... -// %XMM0 = ..., %YMM0 (%YMM0 and all its sub-registers are alive) +/// \file +/// This file implements the LivePhysRegs utility for tracking liveness of +/// physical registers. This can be used for ad-hoc liveness tracking after +/// register allocation. You can start with the live-ins/live-outs at the +/// beginning/end of a block and update the information while walking the +/// instructions inside the block. This implementation tracks the liveness on a +/// sub-register granularity. +/// +/// We assume that the high bits of a physical super-register are not preserved +/// unless the instruction has an implicit-use operand reading the super- +/// register. +/// +/// X86 Example: +/// %YMM0 = ... +/// %XMM0 = ... (Kills %XMM0, all %XMM0s sub-registers, and %YMM0) +/// +/// %YMM0 = ... +/// %XMM0 = ..., %YMM0 (%YMM0 and all its sub-registers are alive) //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_LIVEPHYSREGS_H @@ -39,40 +40,42 @@ namespace llvm { class MachineInstr; +class MachineOperand; +class MachineRegisterInfo; +class raw_ostream; -/// \brief A set of live physical registers with functions to track liveness +/// \brief A set of physical registers with utility functions to track liveness /// when walking backward/forward through a basic block. class LivePhysRegs { const TargetRegisterInfo *TRI = nullptr; SparseSet LiveRegs; +public: + /// Constructs an unitialized set. init() needs to be called to initialize it. + LivePhysRegs() = default; + + /// Constructs and initializes an empty set. + LivePhysRegs(const TargetRegisterInfo &TRI) : TRI(&TRI) { + LiveRegs.setUniverse(TRI.getNumRegs()); + } + LivePhysRegs(const LivePhysRegs&) = delete; LivePhysRegs &operator=(const LivePhysRegs&) = delete; -public: - /// \brief Constructs a new empty LivePhysRegs set. - LivePhysRegs() = default; - - /// \brief Constructs and initialize an empty LivePhysRegs set. - LivePhysRegs(const TargetRegisterInfo *TRI) : TRI(TRI) { - assert(TRI && "Invalid TargetRegisterInfo pointer."); - LiveRegs.setUniverse(TRI->getNumRegs()); - } - - /// \brief Clear and initialize the LivePhysRegs set. + /// (re-)initializes and clears the set. void init(const TargetRegisterInfo &TRI) { this->TRI = &TRI; LiveRegs.clear(); LiveRegs.setUniverse(TRI.getNumRegs()); } - /// \brief Clears the LivePhysRegs set. + /// Clears the set. void clear() { LiveRegs.clear(); } - /// \brief Returns true if the set is empty. + /// Returns true if the set is empty. bool empty() const { return LiveRegs.empty(); } - /// \brief Adds a physical register and all its sub-registers to the set. + /// Adds a physical register and all its sub-registers to the set. void addReg(unsigned Reg) { assert(TRI && "LivePhysRegs is not initialized."); assert(Reg <= TRI->getNumRegs() && "Expected a physical register."); @@ -90,12 +93,13 @@ public: LiveRegs.erase(*R); } - /// \brief Removes physical registers clobbered by the regmask operand @p MO. + /// Removes physical registers clobbered by the regmask operand \p MO. void removeRegsInMask(const MachineOperand &MO, - SmallVectorImpl> *Clobbers); + SmallVectorImpl> *Clobbers = + nullptr); - /// \brief Returns true if register @p Reg is contained in the set. This also - /// works if only the super register of @p Reg has been defined, because + /// \brief Returns true if register \p Reg is contained in the set. This also + /// works if only the super register of \p Reg has been defined, because /// addReg() always adds all sub-registers to the set as well. /// Note: Returns false if just some sub registers are live, use available() /// when searching a free register. @@ -104,48 +108,48 @@ public: /// Returns true if register \p Reg and no aliasing register is in the set. bool available(const MachineRegisterInfo &MRI, unsigned Reg) const; - /// \brief Simulates liveness when stepping backwards over an - /// instruction(bundle): Remove Defs, add uses. This is the recommended way of - /// calculating liveness. + /// Simulates liveness when stepping backwards over an instruction(bundle). + /// Remove Defs, add uses. This is the recommended way of calculating + /// liveness. void stepBackward(const MachineInstr &MI); - /// \brief Simulates liveness when stepping forward over an - /// instruction(bundle): Remove killed-uses, add defs. This is the not - /// recommended way, because it depends on accurate kill flags. If possible - /// use stepBackward() instead of this function. - /// The clobbers set will be the list of registers either defined or clobbered - /// by a regmask. The operand will identify whether this is a regmask or - /// register operand. + /// Simulates liveness when stepping forward over an instruction(bundle). + /// Remove killed-uses, add defs. This is the not recommended way, because it + /// depends on accurate kill flags. If possible use stepBackward() instead of + /// this function. The clobbers set will be the list of registers either + /// defined or clobbered by a regmask. The operand will identify whether this + /// is a regmask or register operand. void stepForward(const MachineInstr &MI, SmallVectorImpl> &Clobbers); - /// Adds all live-in registers of basic block @p MBB. + /// Adds all live-in registers of basic block \p MBB. /// Live in registers are the registers in the blocks live-in list and the /// pristine registers. void addLiveIns(const MachineBasicBlock &MBB); - /// Adds all live-out registers of basic block @p MBB. + /// Adds all live-out registers of basic block \p MBB. /// Live out registers are the union of the live-in registers of the successor /// blocks and pristine registers. Live out registers of the end block are the /// callee saved registers. void addLiveOuts(const MachineBasicBlock &MBB); - /// Like addLiveOuts() but does not add pristine registers/callee saved + /// Adds all live-out registers of basic block \p MBB but skips pristine /// registers. void addLiveOutsNoPristines(const MachineBasicBlock &MBB); - typedef SparseSet::const_iterator const_iterator; + using const_iterator = SparseSet::const_iterator; + const_iterator begin() const { return LiveRegs.begin(); } const_iterator end() const { return LiveRegs.end(); } - /// \brief Prints the currently live registers to @p OS. + /// Prints the currently live registers to \p OS. void print(raw_ostream &OS) const; - /// \brief Dumps the currently live registers to the debug output. + /// Dumps the currently live registers to the debug output. void dump() const; private: - /// Adds live-in registers from basic block @p MBB, taking associated + /// \brief Adds live-in registers from basic block \p MBB, taking associated /// lane masks into consideration. void addBlockLiveIns(const MachineBasicBlock &MBB); }; @@ -155,11 +159,11 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) { return OS; } -/// Compute the live-in list for \p MBB assuming all of its successors live-in -/// lists are up-to-date. Uses the given LivePhysReg instance \p LiveRegs; This -/// is just here to avoid repeated heap allocations when calling this multiple -/// times in a pass. -void computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI, +/// \brief Computes the live-in list for \p MBB assuming all of its successors +/// live-in lists are up-to-date. Uses the given LivePhysReg instance \p +/// LiveRegs; This is just here to avoid repeated heap allocations when calling +/// this multiple times in a pass. +void computeLiveIns(LivePhysRegs &LiveRegs, const MachineRegisterInfo &MRI, MachineBasicBlock &MBB); } // end namespace llvm diff --git a/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h index 4250777682b..362d9854a27 100644 --- a/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h +++ b/contrib/llvm/include/llvm/CodeGen/LiveRangeEdit.h @@ -1,4 +1,4 @@ -//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===// +//===- LiveRangeEdit.h - Basic tools for split and spill --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,19 +19,28 @@ #define LLVM_CODEGEN_LIVERANGEEDIT_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include namespace llvm { class LiveIntervals; class MachineBlockFrequencyInfo; +class MachineInstr; class MachineLoopInfo; +class MachineOperand; +class TargetInstrInfo; +class TargetRegisterInfo; class VirtRegMap; class LiveRangeEdit : private MachineRegisterInfo::Delegate { @@ -39,7 +48,10 @@ public: /// Callback methods for LiveRangeEdit owners. class Delegate { virtual void anchor(); + public: + virtual ~Delegate() = default; + /// Called immediately before erasing a dead machine instruction. virtual void LRE_WillEraseInstruction(MachineInstr *MI) {} @@ -53,8 +65,6 @@ public: /// Called after cloning a virtual register. /// This is used for new registers representing connected components of Old. virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {} - - virtual ~Delegate() {} }; private: @@ -70,7 +80,7 @@ private: const unsigned FirstNew; /// ScannedRemattable - true when remattable values have been identified. - bool ScannedRemattable; + bool ScannedRemattable = false; /// DeadRemats - The saved instructions which have already been dead after /// rematerialization but not deleted yet -- to be done in postOptimization. @@ -78,11 +88,11 @@ private: /// Remattable - Values defined by remattable instructions as identified by /// tii.isTriviallyReMaterializable(). - SmallPtrSet Remattable; + SmallPtrSet Remattable; /// Rematted - Values that were actually rematted, and so need to have their /// live range trimmed or entirely removed. - SmallPtrSet Rematted; + SmallPtrSet Rematted; /// scanRemattable - Identify the Parent values that may rematerialize. void scanRemattable(AliasAnalysis *aa); @@ -94,11 +104,11 @@ private: /// foldAsLoad - If LI has a single use and a single def that can be folded as /// a load, eliminate the register by folding the def into the use. - bool foldAsLoad(LiveInterval *LI, SmallVectorImpl &Dead); + bool foldAsLoad(LiveInterval *LI, SmallVectorImpl &Dead); + + using ToShrinkSet = SetVector, + SmallPtrSet>; - typedef SetVector, - SmallPtrSet > ToShrinkSet; /// Helper for eliminateDeadDefs. void eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink, AliasAnalysis *AA); @@ -129,26 +139,26 @@ public: SmallPtrSet *deadRemats = nullptr) : Parent(parent), NewRegs(newRegs), MRI(MF.getRegInfo()), LIS(lis), VRM(vrm), TII(*MF.getSubtarget().getInstrInfo()), TheDelegate(delegate), - FirstNew(newRegs.size()), ScannedRemattable(false), - DeadRemats(deadRemats) { + FirstNew(newRegs.size()), DeadRemats(deadRemats) { MRI.setDelegate(this); } ~LiveRangeEdit() override { MRI.resetDelegate(this); } LiveInterval &getParent() const { - assert(Parent && "No parent LiveInterval"); - return *Parent; + assert(Parent && "No parent LiveInterval"); + return *Parent; } + unsigned getReg() const { return getParent().reg; } /// Iterator for accessing the new registers added by this edit. - typedef SmallVectorImpl::const_iterator iterator; - iterator begin() const { return NewRegs.begin()+FirstNew; } + using iterator = SmallVectorImpl::const_iterator; + iterator begin() const { return NewRegs.begin() + FirstNew; } iterator end() const { return NewRegs.end(); } - unsigned size() const { return NewRegs.size()-FirstNew; } + unsigned size() const { return NewRegs.size() - FirstNew; } bool empty() const { return size() == 0; } - unsigned get(unsigned idx) const { return NewRegs[idx+FirstNew]; } + unsigned get(unsigned idx) const { return NewRegs[idx + FirstNew]; } /// pop_back - It allows LiveRangeEdit users to drop new registers. /// The context is when an original def instruction of a register is @@ -176,26 +186,25 @@ public: return createEmptyIntervalFrom(getReg()); } - unsigned create() { - return createFrom(getReg()); - } + unsigned create() { return createFrom(getReg()); } /// anyRematerializable - Return true if any parent values may be /// rematerializable. /// This function must be called before any rematerialization is attempted. - bool anyRematerializable(AliasAnalysis*); + bool anyRematerializable(AliasAnalysis *); /// checkRematerializable - Manually add VNI to the list of rematerializable /// values if DefMI may be rematerializable. bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, - AliasAnalysis*); + AliasAnalysis *); /// Remat - Information needed to rematerialize at a specific location. struct Remat { - VNInfo *ParentVNI; // parent_'s value at the remat location. - MachineInstr *OrigMI; // Instruction defining OrigVNI. It contains the - // real expr for remat. - explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(nullptr) {} + VNInfo *ParentVNI; // parent_'s value at the remat location. + MachineInstr *OrigMI = nullptr; // Instruction defining OrigVNI. It contains + // the real expr for remat. + + explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI) {} }; /// canRematerializeAt - Determine if ParentVNI can be rematerialized at @@ -209,10 +218,8 @@ public: /// liveness is not updated. /// Return the SlotIndex of the new instruction. SlotIndex rematerializeAt(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, - const Remat &RM, - const TargetRegisterInfo&, + MachineBasicBlock::iterator MI, unsigned DestReg, + const Remat &RM, const TargetRegisterInfo &, bool Late = false); /// markRematerialized - explicitly mark a value as rematerialized after doing @@ -248,11 +255,10 @@ public: /// calculateRegClassAndHint - Recompute register class and hint for each new /// register. - void calculateRegClassAndHint(MachineFunction&, - const MachineLoopInfo&, - const MachineBlockFrequencyInfo&); + void calculateRegClassAndHint(MachineFunction &, const MachineLoopInfo &, + const MachineBlockFrequencyInfo &); }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_LIVERANGEEDIT_H diff --git a/contrib/llvm/include/llvm/CodeGen/LiveStackAnalysis.h b/contrib/llvm/include/llvm/CodeGen/LiveStackAnalysis.h index 3ffbe3d775b..c90ae7b184f 100644 --- a/contrib/llvm/include/llvm/CodeGen/LiveStackAnalysis.h +++ b/contrib/llvm/include/llvm/CodeGen/LiveStackAnalysis.h @@ -1,4 +1,4 @@ -//===-- LiveStackAnalysis.h - Live Stack Slot Analysis ----------*- C++ -*-===// +//===- LiveStackAnalysis.h - Live Stack Slot Analysis -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,13 +18,16 @@ #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Pass.h" +#include #include #include namespace llvm { +class TargetRegisterClass; +class TargetRegisterInfo; + class LiveStacks : public MachineFunctionPass { const TargetRegisterInfo *TRI; @@ -33,8 +36,7 @@ class LiveStacks : public MachineFunctionPass { VNInfo::Allocator VNInfoAllocator; /// S2IMap - Stack slot indices to live interval mapping. - /// - typedef std::unordered_map SS2IntervalMap; + using SS2IntervalMap = std::unordered_map; SS2IntervalMap S2IMap; /// S2RCMap - Stack slot indices to register class mapping. @@ -42,12 +44,14 @@ class LiveStacks : public MachineFunctionPass { public: static char ID; // Pass identification, replacement for typeid + LiveStacks() : MachineFunctionPass(ID) { initializeLiveStacksPass(*PassRegistry::getPassRegistry()); } - typedef SS2IntervalMap::iterator iterator; - typedef SS2IntervalMap::const_iterator const_iterator; + using iterator = SS2IntervalMap::iterator; + using const_iterator = SS2IntervalMap::const_iterator; + const_iterator begin() const { return S2IMap.begin(); } const_iterator end() const { return S2IMap.end(); } iterator begin() { return S2IMap.begin(); } @@ -93,6 +97,7 @@ public: /// print - Implement the dump method. void print(raw_ostream &O, const Module * = nullptr) const override; }; -} -#endif /* LLVM_CODEGEN_LIVESTACK_ANALYSIS_H */ +} // end namespace llvm + +#endif // LLVM_CODEGEN_LIVESTACK_ANALYSIS_H diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 18d40564856..8da48c379d0 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -1,4 +1,4 @@ -//===-- llvm/CodeGen/MachineBasicBlock.h ------------------------*- C++ -*-===// +//===- llvm/CodeGen/MachineBasicBlock.h -------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -15,41 +15,50 @@ #define LLVM_CODEGEN_MACHINEBASICBLOCK_H #include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/simple_ilist.h" #include "llvm/CodeGen/MachineInstrBundleIterator.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/Support/BranchProbability.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Support/DataTypes.h" +#include +#include #include +#include +#include +#include namespace llvm { -class Pass; class BasicBlock; class MachineFunction; class MCSymbol; -class MIPrinter; +class ModuleSlotTracker; +class Pass; class SlotIndexes; class StringRef; class raw_ostream; -class MachineBranchProbabilityInfo; +class TargetRegisterClass; +class TargetRegisterInfo; template <> struct ilist_traits { private: friend class MachineBasicBlock; // Set by the owning MachineBasicBlock. + MachineBasicBlock *Parent; - typedef simple_ilist>::iterator - instr_iterator; + using instr_iterator = + simple_ilist>::iterator; public: void addNodeToList(MachineInstr *N); void removeNodeFromList(MachineInstr *N); void transferNodesFromList(ilist_traits &OldList, instr_iterator First, instr_iterator Last); - void deleteNode(MachineInstr *MI); }; @@ -69,7 +78,8 @@ public: }; private: - typedef ilist> Instructions; + using Instructions = ilist>; + Instructions Insts; const BasicBlock *BB; int Number; @@ -83,12 +93,12 @@ private: /// same order as Successors, or it is empty if we don't use it (disable /// optimization). std::vector Probs; - typedef std::vector::iterator probability_iterator; - typedef std::vector::const_iterator - const_probability_iterator; + using probability_iterator = std::vector::iterator; + using const_probability_iterator = + std::vector::const_iterator; /// Keep track of the physical registers that are livein of the basicblock. - typedef std::vector LiveInVector; + using LiveInVector = std::vector; LiveInVector LiveIns; /// Alignment of the basic block. Zero if the basic block does not need to be @@ -113,7 +123,7 @@ private: mutable MCSymbol *CachedMCSymbol = nullptr; // Intrusive list support - MachineBasicBlock() {} + MachineBasicBlock() = default; explicit MachineBasicBlock(MachineFunction &MF, const BasicBlock *BB); @@ -145,16 +155,16 @@ public: const MachineFunction *getParent() const { return xParent; } MachineFunction *getParent() { return xParent; } - typedef Instructions::iterator instr_iterator; - typedef Instructions::const_iterator const_instr_iterator; - typedef Instructions::reverse_iterator reverse_instr_iterator; - typedef Instructions::const_reverse_iterator const_reverse_instr_iterator; + using instr_iterator = Instructions::iterator; + using const_instr_iterator = Instructions::const_iterator; + using reverse_instr_iterator = Instructions::reverse_iterator; + using const_reverse_instr_iterator = Instructions::const_reverse_iterator; - typedef MachineInstrBundleIterator iterator; - typedef MachineInstrBundleIterator const_iterator; - typedef MachineInstrBundleIterator reverse_iterator; - typedef MachineInstrBundleIterator - const_reverse_iterator; + using iterator = MachineInstrBundleIterator; + using const_iterator = MachineInstrBundleIterator; + using reverse_iterator = MachineInstrBundleIterator; + using const_reverse_iterator = + MachineInstrBundleIterator; unsigned size() const { return (unsigned)Insts.size(); } bool empty() const { return Insts.empty(); } @@ -178,8 +188,8 @@ public: reverse_instr_iterator instr_rend () { return Insts.rend(); } const_reverse_instr_iterator instr_rend () const { return Insts.rend(); } - typedef iterator_range instr_range; - typedef iterator_range const_instr_range; + using instr_range = iterator_range; + using const_instr_range = iterator_range; instr_range instrs() { return instr_range(instr_begin(), instr_end()); } const_instr_range instrs() const { return const_instr_range(instr_begin(), instr_end()); @@ -213,18 +223,18 @@ public: } // Machine-CFG iterators - typedef std::vector::iterator pred_iterator; - typedef std::vector::const_iterator const_pred_iterator; - typedef std::vector::iterator succ_iterator; - typedef std::vector::const_iterator const_succ_iterator; - typedef std::vector::reverse_iterator - pred_reverse_iterator; - typedef std::vector::const_reverse_iterator - const_pred_reverse_iterator; - typedef std::vector::reverse_iterator - succ_reverse_iterator; - typedef std::vector::const_reverse_iterator - const_succ_reverse_iterator; + using pred_iterator = std::vector::iterator; + using const_pred_iterator = std::vector::const_iterator; + using succ_iterator = std::vector::iterator; + using const_succ_iterator = std::vector::const_iterator; + using pred_reverse_iterator = + std::vector::reverse_iterator; + using const_pred_reverse_iterator = + std::vector::const_reverse_iterator; + using succ_reverse_iterator = + std::vector::reverse_iterator; + using const_succ_reverse_iterator = + std::vector::const_reverse_iterator; pred_iterator pred_begin() { return Predecessors.begin(); } const_pred_iterator pred_begin() const { return Predecessors.begin(); } pred_iterator pred_end() { return Predecessors.end(); } @@ -307,7 +317,7 @@ public: // Iteration support for live in sets. These sets are kept in sorted // order by their register number. - typedef LiveInVector::const_iterator livein_iterator; + using livein_iterator = LiveInVector::const_iterator; #ifndef NDEBUG /// Unlike livein_begin, this method does not check that the liveness /// information is accurate. Still for debug purposes it may be useful @@ -455,7 +465,6 @@ public: /// other block. bool isLayoutSuccessor(const MachineBasicBlock *MBB) const; - /// Return the fallthrough block if the block can implicitly /// transfer control to the block after it by falling off the end of /// it. This should return null if it can reach the block after @@ -695,7 +704,7 @@ public: LivenessQueryResult computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, const_iterator Before, - unsigned Neighborhood=10) const; + unsigned Neighborhood = 10) const; // Debugging methods. void dump() const; @@ -714,7 +723,6 @@ public: /// Return the MCSymbol for this basic block. MCSymbol *getSymbol() const; - private: /// Return probability iterator corresponding to the I successor iterator. probability_iterator getProbabilityIterator(succ_iterator I); @@ -764,8 +772,8 @@ struct MBB2NumberFunctor : // template <> struct GraphTraits { - typedef MachineBasicBlock *NodeRef; - typedef MachineBasicBlock::succ_iterator ChildIteratorType; + using NodeRef = MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::succ_iterator; static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; } static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } @@ -773,8 +781,8 @@ template <> struct GraphTraits { }; template <> struct GraphTraits { - typedef const MachineBasicBlock *NodeRef; - typedef MachineBasicBlock::const_succ_iterator ChildIteratorType; + using NodeRef = const MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::const_succ_iterator; static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; } static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } @@ -787,28 +795,30 @@ template <> struct GraphTraits { // to be when traversing the predecessor edges of a MBB // instead of the successor edges. // -template <> struct GraphTraits > { - typedef MachineBasicBlock *NodeRef; - typedef MachineBasicBlock::pred_iterator ChildIteratorType; +template <> struct GraphTraits> { + using NodeRef = MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::pred_iterator; + static NodeRef getEntryNode(Inverse G) { return G.Graph; } + static ChildIteratorType child_begin(NodeRef N) { return N->pred_begin(); } static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } }; -template <> struct GraphTraits > { - typedef const MachineBasicBlock *NodeRef; - typedef MachineBasicBlock::const_pred_iterator ChildIteratorType; +template <> struct GraphTraits> { + using NodeRef = const MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::const_pred_iterator; + static NodeRef getEntryNode(Inverse G) { return G.Graph; } + static ChildIteratorType child_begin(NodeRef N) { return N->pred_begin(); } static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); } }; - - /// MachineInstrSpan provides an interface to get an iteration range /// containing the instruction it was initialized with, along with all /// those instructions inserted prior to or following that instruction @@ -816,6 +826,7 @@ template <> struct GraphTraits > { class MachineInstrSpan { MachineBasicBlock &MBB; MachineBasicBlock::iterator I, B, E; + public: MachineInstrSpan(MachineBasicBlock::iterator I) : MBB(*I->getParent()), @@ -854,6 +865,6 @@ inline IterT skipDebugInstructionsBackward(IterT It, IterT Begin) { return It; } -} // End llvm namespace +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEBASICBLOCK_H diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h index cd1c204981e..cba79c818a7 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h @@ -1,4 +1,4 @@ -//===- MachineBlockFrequencyInfo.h - MBB Frequency Analysis -*- C++ -*-----===// +//===- MachineBlockFrequencyInfo.h - MBB Frequency Analysis -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -17,26 +17,28 @@ #include "llvm/ADT/Optional.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Support/BlockFrequency.h" -#include +#include +#include namespace llvm { +template class BlockFrequencyInfoImpl; class MachineBasicBlock; class MachineBranchProbabilityInfo; +class MachineFunction; class MachineLoopInfo; -template class BlockFrequencyInfoImpl; +class raw_ostream; /// MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation /// to estimate machine basic block frequencies. class MachineBlockFrequencyInfo : public MachineFunctionPass { - typedef BlockFrequencyInfoImpl ImplType; + using ImplType = BlockFrequencyInfoImpl; std::unique_ptr MBFI; public: static char ID; MachineBlockFrequencyInfo(); - ~MachineBlockFrequencyInfo() override; void getAnalysisUsage(AnalysisUsage &AU) const override; @@ -74,9 +76,8 @@ public: const MachineBasicBlock *MBB) const; uint64_t getEntryFreq() const; - }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEBLOCKFREQUENCYINFO_H diff --git a/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h b/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h index 4131194a0c0..370ffbe4862 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineDominanceFrontier.h @@ -11,23 +11,28 @@ #define LLVM_CODEGEN_MACHINEDOMINANCEFRONTIER_H #include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/DominanceFrontierImpl.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" - +#include "llvm/Support/GenericDomTree.h" +#include namespace llvm { class MachineDominanceFrontier : public MachineFunctionPass { ForwardDominanceFrontierBase Base; -public: - typedef DominatorTreeBase DomTreeT; - typedef DomTreeNodeBase DomTreeNodeT; - typedef DominanceFrontierBase::DomSetType DomSetType; - typedef DominanceFrontierBase::iterator iterator; - typedef DominanceFrontierBase::const_iterator const_iterator; - void operator=(const MachineDominanceFrontier &) = delete; +public: + using DomTreeT = DominatorTreeBase; + using DomTreeNodeT = DomTreeNodeBase; + using DomSetType = DominanceFrontierBase::DomSetType; + using iterator = DominanceFrontierBase::iterator; + using const_iterator = + DominanceFrontierBase::const_iterator; + MachineDominanceFrontier(const MachineDominanceFrontier &) = delete; + MachineDominanceFrontier & + operator=(const MachineDominanceFrontier &) = delete; static char ID; @@ -104,6 +109,6 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override; }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEDOMINANCEFRONTIER_H diff --git a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h index 30b6cfdd1c3..74a7c3ea04a 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h @@ -1,4 +1,4 @@ -//=- llvm/CodeGen/MachineDominators.h - Machine Dom Calculation --*- C++ -*-==// +//==- llvm/CodeGen/MachineDominators.h - Machine Dom Calculation -*- C++ -*-==// // // The LLVM Compiler Infrastructure // @@ -16,12 +16,15 @@ #define LLVM_CODEGEN_MACHINEDOMINATORS_H #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/Support/GenericDomTree.h" #include "llvm/Support/GenericDomTreeConstruction.h" +#include #include +#include namespace llvm { @@ -33,7 +36,7 @@ inline void DominatorTreeBase::addRoot(MachineBasicBlock* MBB extern template class DomTreeNodeBase; extern template class DominatorTreeBase; -typedef DomTreeNodeBase MachineDomTreeNode; +using MachineDomTreeNode = DomTreeNodeBase; //===------------------------------------- /// DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to @@ -52,6 +55,7 @@ class MachineDominatorTree : public MachineFunctionPass { /// The splitting of a critical edge is local and thus, it is possible /// to apply several of those changes at the same time. mutable SmallVector CriticalEdgesToSplit; + /// \brief Remember all the basic blocks that are inserted during /// edge splitting. /// Invariant: NewBBs == all the basic blocks contained in the NewBB @@ -259,8 +263,8 @@ public: template struct MachineDomTreeGraphTraitsBase { - typedef Node *NodeRef; - typedef ChildIterator ChildIteratorType; + using NodeRef = Node *; + using ChildIteratorType = ChildIterator; static NodeRef getEntryNode(NodeRef N) { return N; } static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } @@ -287,6 +291,6 @@ template <> struct GraphTraits } }; -} +} // end namespace llvm -#endif +#endif // LLVM_CODEGEN_MACHINEDOMINATORS_H diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h index e7e728c1be2..8d040beff7a 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h @@ -826,20 +826,12 @@ public: getOperand(0).getSubReg() == getOperand(1).getSubReg(); } - /// Return true if this is a transient instruction that is - /// either very likely to be eliminated during register allocation (such as - /// copy-like instructions), or if this instruction doesn't have an - /// execution-time cost. - bool isTransient() const { - switch(getOpcode()) { - default: return false; - // Copy-like instructions are usually eliminated during register allocation. - case TargetOpcode::PHI: - case TargetOpcode::COPY: - case TargetOpcode::INSERT_SUBREG: - case TargetOpcode::SUBREG_TO_REG: - case TargetOpcode::REG_SEQUENCE: - // Pseudo-instructions that don't produce any real output. + /// Return true if this instruction doesn't produce any output in the form of + /// executable instructions. + bool isMetaInstruction() const { + switch (getOpcode()) { + default: + return false; case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: case TargetOpcode::CFI_INSTRUCTION: @@ -850,6 +842,23 @@ public: } } + /// Return true if this is a transient instruction that is either very likely + /// to be eliminated during register allocation (such as copy-like + /// instructions), or if this instruction doesn't have an execution-time cost. + bool isTransient() const { + switch (getOpcode()) { + default: + return isMetaInstruction(); + // Copy-like instructions are usually eliminated during register allocation. + case TargetOpcode::PHI: + case TargetOpcode::COPY: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + return true; + } + } + /// Return the number of instructions inside the MI bundle, excluding the /// bundle header. /// diff --git a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index 6e5c6473ff4..1026654da3d 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -642,6 +642,11 @@ public: /// void setRegBank(unsigned Reg, const RegisterBank &RegBank); + void setRegClassOrRegBank(unsigned Reg, + const RegClassOrRegBank &RCOrRB){ + VRegInfo[Reg].first = RCOrRB; + } + /// constrainRegClass - Constrain the register class of the specified virtual /// register to be a common subclass of RC and the current register class, /// but only if the new class has at least MinNumRegs registers. Return the diff --git a/contrib/llvm/include/llvm/CodeGen/MachineValueType.h b/contrib/llvm/include/llvm/CodeGen/MachineValueType.h index e92bb7f7496..d991e4c216d 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineValueType.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineValueType.h @@ -26,7 +26,7 @@ namespace llvm { /// Machine Value Type. Every type that is supported natively by some /// processor targeted by LLVM occurs here. This means that any legal value /// type can be represented by an MVT. -class MVT { + class MVT { public: enum SimpleValueType : uint8_t { // Simple value types that aren't explicitly part of this enumeration diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h index 99afd8c5c9a..97aa2aace82 100644 --- a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h +++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h @@ -52,14 +52,14 @@ class TargetRegisterInfo; /// These are the different kinds of scheduling dependencies. enum Kind { Data, ///< Regular data dependence (aka true-dependence). - Anti, ///< A register anti-dependedence (aka WAR). + Anti, ///< A register anti-dependence (aka WAR). Output, ///< A register output-dependence (aka WAW). Order ///< Any other ordering dependency. }; // Strong dependencies must be respected by the scheduler. Artificial // dependencies may be removed only if they are redundant with another - // strong depedence. + // strong dependence. // // Weak dependencies may be violated by the scheduling strategy, but only if // the strategy can prove it is correct to do so. @@ -342,7 +342,7 @@ class TargetRegisterInfo; /// BoundaryNodes can have DAG edges, including Data edges, but they do not /// correspond to schedulable entities (e.g. instructions) and do not have a /// valid ID. Consequently, always check for boundary nodes before accessing - /// an assoicative data structure keyed on node ID. + /// an associative data structure keyed on node ID. bool isBoundaryNode() const { return NodeNum == BoundaryID; } /// Assigns the representative SDNode for this SUnit. This may be used diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h index 21e1740aa6b..f5f5bfd45e7 100644 --- a/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -18,6 +18,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SparseMultiSet.h" #include "llvm/ADT/SparseSet.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/TargetSchedule.h" #include "llvm/Support/Compiler.h" @@ -224,7 +225,7 @@ namespace llvm { MachineInstr *FirstDbgValue; /// Set of live physical registers for updating kill flags. - BitVector LiveRegs; + LivePhysRegs LiveRegs; public: explicit ScheduleDAGInstrs(MachineFunction &mf, @@ -311,7 +312,7 @@ namespace llvm { std::string getDAGName() const override; /// Fixes register kill flags that scheduling has made invalid. - void fixupKills(MachineBasicBlock *MBB); + void fixupKills(MachineBasicBlock &MBB); protected: void initSUnits(); diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h index d761661f763..493122b1570 100644 --- a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1070,6 +1070,11 @@ public: SDNode *MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef Ops); + /// Mutate the specified strict FP node to its non-strict equivalent, + /// unlinking the node from its chain and dropping the metadata arguments. + /// The node must be a strict FP node. + SDNode *mutateStrictFPToFP(SDNode *Node); + /// These are used for target selectors to create a new node /// with specified return type(s), MachineInstr opcode, and operands. /// diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 35ddcf80c91..973c5aac528 100644 --- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -612,6 +612,32 @@ public: SDNodeBits.IsMemIntrinsic; } + /// Test if this node is a strict floating point pseudo-op. + bool isStrictFPOpcode() { + switch (NodeType) { + default: + return false; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FREM: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + return true; + } + } + /// Test if this node has a post-isel opcode, directly /// corresponding to a MachineInstr opcode. bool isMachineOpcode() const { return NodeType < 0; } diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h index 71ea82b6a9a..68ad0998220 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h @@ -14,6 +14,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/DebugInfo/CodeView/CodeViewError.h" #include "llvm/DebugInfo/CodeView/RecordSerialization.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamRef.h" #include "llvm/Support/Endian.h" @@ -50,6 +51,13 @@ public: Optional Hash; }; +template struct RemappedRecord { + explicit RemappedRecord(const CVRecord &R) : OriginalRecord(R) {} + + CVRecord OriginalRecord; + SmallVector, 8> Mappings; +}; + } // end namespace codeview template diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h index 4bc8fbefd5d..70ccc867cd3 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h @@ -46,6 +46,7 @@ Error visitMemberRecordStream(ArrayRef FieldList, TypeVisitorCallbacks &Callbacks); Error visitTypeStream(const CVTypeArray &Types, TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source = VDS_BytesPresent, TypeServerHandler *TS = nullptr); Error visitTypeStream(CVTypeRange Types, TypeVisitorCallbacks &Callbacks, TypeServerHandler *TS = nullptr); diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeDeserializer.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeDeserializer.h index 2142d4a2dec..a9c5cf42fc5 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeDeserializer.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeDeserializer.h @@ -40,6 +40,17 @@ class TypeDeserializer : public TypeVisitorCallbacks { public: TypeDeserializer() = default; + template static Error deserializeAs(CVType &CVT, T &Record) { + MappingInfo I(CVT.content()); + if (auto EC = I.Mapping.visitTypeBegin(CVT)) + return EC; + if (auto EC = I.Mapping.visitKnownRecord(CVT, Record)) + return EC; + if (auto EC = I.Mapping.visitTypeEnd(CVT)) + return EC; + return Error::success(); + } + Error visitTypeBegin(CVType &Record) override { assert(!Mapping && "Already in a type mapping!"); Mapping = llvm::make_unique(Record.content()); diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h new file mode 100644 index 00000000000..82ceb503831 --- /dev/null +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndexDiscovery.h @@ -0,0 +1,33 @@ +//===- TypeIndexDiscovery.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEINDEXDISCOVERY_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPEINDEXDISCOVERY_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/CodeView/TypeRecord.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace codeview { +enum class TiRefKind { TypeRef, IndexRef }; +struct TiReference { + TiRefKind Kind; + uint32_t Offset; + uint32_t Count; +}; + +void discoverTypeIndices(ArrayRef RecordData, + SmallVectorImpl &Refs); +void discoverTypeIndices(const CVType &Type, + SmallVectorImpl &Refs); +} +} + +#endif diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h index 1f10872c876..92745ebfcde 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h @@ -35,6 +35,7 @@ using support::ulittle16_t; using support::ulittle32_t; typedef CVRecord CVType; +typedef RemappedRecord RemappedType; struct CVMemberRecord { TypeLeafKind Kind; @@ -278,15 +279,9 @@ public: Attrs(calcAttrs(PK, PM, PO, Size)) {} PointerRecord(TypeIndex ReferentType, PointerKind PK, PointerMode PM, - PointerOptions PO, uint8_t Size, - const MemberPointerInfo &Member) + PointerOptions PO, uint8_t Size, const MemberPointerInfo &MPI) : TypeRecord(TypeRecordKind::Pointer), ReferentType(ReferentType), - Attrs(calcAttrs(PK, PM, PO, Size)), MemberInfo(Member) {} - - PointerRecord(TypeIndex ReferentType, uint32_t Attrs, - const MemberPointerInfo &Member) - : TypeRecord(TypeRecordKind::Pointer), ReferentType(ReferentType), - Attrs(Attrs), MemberInfo(Member) {} + Attrs(calcAttrs(PK, PM, PO, Size)), MemberInfo(MPI) {} TypeIndex getReferentType() const { return ReferentType; } diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h index 6dad9824713..435c43f7edc 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h @@ -17,7 +17,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" @@ -26,6 +25,8 @@ namespace llvm { namespace codeview { +class TypeHasher; + class TypeSerializer : public TypeVisitorCallbacks { struct SubRecord { SubRecord(TypeLeafKind K, uint32_t S) : Kind(K), Size(S) {} @@ -45,14 +46,13 @@ class TypeSerializer : public TypeVisitorCallbacks { } }; - typedef SmallVector, 2> RecordList; + typedef SmallVector, 2> MutableRecordList; static constexpr uint8_t ContinuationLength = 8; BumpPtrAllocator &RecordStorage; RecordSegment CurrentSegment; - RecordList FieldListSegments; + MutableRecordList FieldListSegments; - TypeIndex LastTypeIndex; Optional TypeKind; Optional MemberKind; std::vector RecordBuffer; @@ -60,28 +60,35 @@ class TypeSerializer : public TypeVisitorCallbacks { BinaryStreamWriter Writer; TypeRecordMapping Mapping; - RecordList SeenRecords; - StringMap HashedRecords; + /// Private type record hashing implementation details are handled here. + std::unique_ptr Hasher; + + /// Contains a list of all records indexed by TypeIndex.toArrayIndex(). + SmallVector, 2> SeenRecords; + + /// Temporary storage that we use to copy a record's data while re-writing + /// its type indices. + SmallVector RemapStorage; + + TypeIndex nextTypeIndex() const; bool isInFieldList() const; - TypeIndex calcNextTypeIndex() const; - TypeIndex incrementTypeIndex(); MutableArrayRef getCurrentSubRecordData(); MutableArrayRef getCurrentRecordData(); Error writeRecordPrefix(TypeLeafKind Kind); - TypeIndex insertRecordBytesPrivate(MutableArrayRef Record); - TypeIndex insertRecordBytesWithCopy(CVType &Record, - MutableArrayRef Data); Expected> addPadding(MutableArrayRef Record); public: - explicit TypeSerializer(BumpPtrAllocator &Storage); + explicit TypeSerializer(BumpPtrAllocator &Storage, bool Hash = true); + ~TypeSerializer(); - ArrayRef> records() const; - TypeIndex getLastTypeIndex() const; - TypeIndex insertRecordBytes(MutableArrayRef Record); + void reset(); + + ArrayRef> records() const; + TypeIndex insertRecordBytes(ArrayRef &Record); + TypeIndex insertRecord(const RemappedType &Record); Expected visitTypeEndGetIndex(CVType &Record); Error visitTypeBegin(CVType &Record) override; diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h index 65bcf9812e6..3ad2b4e9c92 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeStreamMerger.h @@ -22,12 +22,75 @@ class TypeIndex; class TypeServerHandler; class TypeTableBuilder; -/// Merges one type stream into another. Returns true on success. -Error mergeTypeStreams(TypeTableBuilder &DestIdStream, - TypeTableBuilder &DestTypeStream, +/// \brief Merge one set of type records into another. This method assumes +/// that all records are type records, and there are no Id records present. +/// +/// \param Dest The table to store the re-written type records into. +/// +/// \param SourceToDest A vector, indexed by the TypeIndex in the source +/// type stream, that contains the index of the corresponding type record +/// in the destination stream. +/// +/// \param Handler (optional) If non-null, an interface that gets invoked +/// to handle type server records. +/// +/// \param Types The collection of types to merge in. +/// +/// \returns Error::success() if the operation succeeded, otherwise an +/// appropriate error code. +Error mergeTypeRecords(TypeTableBuilder &Dest, SmallVectorImpl &SourceToDest, TypeServerHandler *Handler, const CVTypeArray &Types); +/// \brief Merge one set of id records into another. This method assumes +/// that all records are id records, and there are no Type records present. +/// However, since Id records can refer back to Type records, this method +/// assumes that the referenced type records have also been merged into +/// another type stream (for example using the above method), and accepts +/// the mapping from source to dest for that stream so that it can re-write +/// the type record mappings accordingly. +/// +/// \param Dest The table to store the re-written id records into. +/// +/// \param Types The mapping to use for the type records that these id +/// records refer to. +/// +/// \param SourceToDest A vector, indexed by the TypeIndex in the source +/// id stream, that contains the index of the corresponding id record +/// in the destination stream. +/// +/// \param Ids The collection of id records to merge in. +/// +/// \returns Error::success() if the operation succeeded, otherwise an +/// appropriate error code. +Error mergeIdRecords(TypeTableBuilder &Dest, ArrayRef Types, + SmallVectorImpl &SourceToDest, + const CVTypeArray &Ids); + +/// \brief Merge a unified set of type and id records, splitting them into +/// separate output streams. +/// +/// \param DestIds The table to store the re-written id records into. +/// +/// \param DestTypes the table to store the re-written type records into. +/// +/// \param SourceToDest A vector, indexed by the TypeIndex in the source +/// id stream, that contains the index of the corresponding id record +/// in the destination stream. +/// +/// \param Handler (optional) If non-null, an interface that gets invoked +/// to handle type server records. +/// +/// \param IdsAndTypes The collection of id records to merge in. +/// +/// \returns Error::success() if the operation succeeded, otherwise an +/// appropriate error code. +Error mergeTypeAndIdRecords(TypeTableBuilder &DestIds, + TypeTableBuilder &DestTypes, + SmallVectorImpl &SourceToDest, + TypeServerHandler *Handler, + const CVTypeArray &IdsAndTypes); + } // end namespace codeview } // end namespace llvm diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h index 102bee4b080..7bdc9ecb20c 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h @@ -64,10 +64,14 @@ public: return *ExpectedIndex; } - TypeIndex writeSerializedRecord(MutableArrayRef Record) { + TypeIndex writeSerializedRecord(ArrayRef Record) { return Serializer.insertRecordBytes(Record); } + TypeIndex writeSerializedRecord(const RemappedType &Record) { + return Serializer.insertRecord(Record); + } + template void ForEachRecord(TFunc Func) { uint32_t Index = TypeIndex::FirstNonSimpleIndex; @@ -77,23 +81,24 @@ public: } } - ArrayRef> records() const { - return Serializer.records(); - } + ArrayRef> records() const { return Serializer.records(); } }; class FieldListRecordBuilder { TypeTableBuilder &TypeTable; + BumpPtrAllocator Allocator; TypeSerializer TempSerializer; CVType Type; public: explicit FieldListRecordBuilder(TypeTableBuilder &TypeTable) - : TypeTable(TypeTable), TempSerializer(TypeTable.getAllocator()) { + : TypeTable(TypeTable), TempSerializer(Allocator, false) { Type.Type = TypeLeafKind::LF_FIELDLIST; } void begin() { + TempSerializer.reset(); + if (auto EC = TempSerializer.visitTypeBegin(Type)) consumeError(std::move(EC)); } @@ -109,23 +114,19 @@ public: consumeError(std::move(EC)); } - TypeIndex end() { + TypeIndex end(bool Write) { + TypeIndex Index; if (auto EC = TempSerializer.visitTypeEnd(Type)) { consumeError(std::move(EC)); return TypeIndex(); } - TypeIndex Index; - for (auto Record : TempSerializer.records()) { - Index = TypeTable.writeSerializedRecord(Record); + if (Write) { + for (auto Record : TempSerializer.records()) + Index = TypeTable.writeSerializedRecord(Record); } - return Index; - } - /// Stop building the record. - void reset() { - if (auto EC = TempSerializer.visitTypeEnd(Type)) - consumeError(std::move(EC)); + return Index; } }; diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableCollection.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableCollection.h index 7de562a19a7..42b62ba2b6c 100644 --- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableCollection.h +++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableCollection.h @@ -18,7 +18,7 @@ namespace codeview { class TypeTableCollection : public TypeCollection { public: - explicit TypeTableCollection(ArrayRef> Records); + explicit TypeTableCollection(ArrayRef> Records); Optional getFirst() override; Optional getNext(TypeIndex Prev) override; @@ -33,7 +33,7 @@ private: bool hasCapacityFor(TypeIndex Index) const; void ensureTypeExists(TypeIndex Index); - ArrayRef> Records; + ArrayRef> Records; TypeDatabase Database; }; } diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h index d3a63edf10f..7fa68f3f231 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h @@ -46,7 +46,8 @@ class raw_ostream; /// Reads a value from data extractor and applies a relocation to the result if /// one exists for the given offset. uint64_t getRelocatedValue(const DataExtractor &Data, uint32_t Size, - uint32_t *Off, const RelocAddrMap *Relocs); + uint32_t *Off, const RelocAddrMap *Relocs, + uint64_t *SecNdx = nullptr); /// DWARFContext /// This data structure is the top level entity that deals with dwarf debug @@ -71,6 +72,14 @@ class DWARFContext : public DIContext { std::unique_ptr AbbrevDWO; std::unique_ptr LocDWO; + struct DWOFile { + object::OwningBinary File; + std::unique_ptr Context; + }; + StringMap> DWOFiles; + std::weak_ptr DWP; + bool CheckedForDWP = false; + /// Read compile units from the debug_info section (if necessary) /// and store them in CUs. void parseCompileUnits(); @@ -165,6 +174,8 @@ public: return DWOCUs[index].get(); } + DWARFCompileUnit *getDWOCompileUnitForHash(uint64_t Hash); + /// Get a DIE given an exact offset. DWARFDie getDIEForOffset(uint32_t Offset); @@ -206,6 +217,7 @@ public: DIInliningInfo getInliningInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override; + virtual StringRef getFileName() const = 0; virtual bool isLittleEndian() const = 0; virtual uint8_t getAddressSize() const = 0; virtual const DWARFSection &getInfoSection() = 0; @@ -248,6 +260,8 @@ public: return version == 2 || version == 3 || version == 4 || version == 5; } + std::shared_ptr getDWOContext(StringRef AbsolutePath); + private: /// Return the compile unit that includes an offset (relative to .debug_info). DWARFCompileUnit *getCompileUnitForOffset(uint32_t Offset); @@ -263,6 +277,7 @@ private: class DWARFContextInMemory : public DWARFContext { virtual void anchor(); + StringRef FileName; bool IsLittleEndian; uint8_t AddressSize; DWARFSection InfoSection; @@ -316,6 +331,7 @@ public: uint8_t AddrSize, bool isLittleEndian = sys::IsLittleEndianHost); + StringRef getFileName() const override { return FileName; } bool isLittleEndian() const override { return IsLittleEndian; } uint8_t getAddressSize() const override { return AddressSize; } const DWARFSection &getInfoSection() override { return InfoSection; } diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index 95ec1be62a7..b436711ae6e 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -25,6 +25,7 @@ class raw_ostream; struct DWARFAddressRange { uint64_t LowPC; uint64_t HighPC; + uint64_t SectionIndex; }; /// DWARFAddressRangesVector - represents a set of absolute address ranges. @@ -44,6 +45,8 @@ public: /// address past the end of the address range. The ending address must /// be greater than or equal to the beginning address. uint64_t EndAddress; + /// A section index this range belongs to. + uint64_t SectionIndex; /// The end of any given range list is marked by an end of list entry, /// which consists of a 0 for the beginning address offset diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h index ca94a90fabf..fa41b9e293c 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h @@ -195,7 +195,8 @@ public: /// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU. /// Returns true if both attributes are present. - bool getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC) const; + bool getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC, + uint64_t &SectionIndex) const; /// Get the address ranges for this DIE. /// diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index a30e0be9c3c..3a781dde892 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -47,6 +47,7 @@ private: const char *cstr; }; const uint8_t *data = nullptr; + uint64_t SectionIndex; /// Section index for reference forms. }; dwarf::Form Form; /// Form for this value. @@ -58,6 +59,7 @@ public: dwarf::Form getForm() const { return Form; } uint64_t getRawUValue() const { return Value.uval; } + uint64_t getSectionIndex() const { return Value.SectionIndex; } void setForm(dwarf::Form F) { Form = F; } void setUValue(uint64_t V) { Value.uval = V; } void setSValue(int64_t V) { Value.sval = V; } diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h index fabacc0abce..f143de33473 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFRelocMap.h @@ -16,7 +16,10 @@ namespace llvm { +/// RelocAddrEntry contains relocated value and section index. +/// Section index is -1LL if relocation points to absolute symbol. struct RelocAddrEntry { + uint64_t SectionIndex; uint64_t Value; }; diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h index ae7fd24ce5b..d0f7bd0d623 100644 --- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h +++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h @@ -143,17 +143,7 @@ class DWARFUnit { typedef iterator_range::iterator> die_iterator_range; - class DWOHolder { - object::OwningBinary DWOFile; - std::unique_ptr DWOContext; - DWARFUnit *DWOU = nullptr; - - public: - DWOHolder(StringRef DWOPath, uint64_t DWOId); - - DWARFUnit *getUnit() const { return DWOU; } - }; - std::unique_ptr DWO; + std::shared_ptr DWO; const DWARFUnitIndex::Entry *IndexEntry; diff --git a/contrib/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/contrib/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h index c91f6f725c8..d68f5f70c83 100644 --- a/contrib/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h +++ b/contrib/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h @@ -43,8 +43,8 @@ class MappedBlockStream : public BinaryStream { friend class WritableMappedBlockStream; public: static std::unique_ptr - createStream(uint32_t BlockSize, uint32_t NumBlocks, - const MSFStreamLayout &Layout, BinaryStreamRef MsfData); + createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, + BinaryStreamRef MsfData); static std::unique_ptr createIndexedStream(const MSFLayout &Layout, BinaryStreamRef MsfData, @@ -74,12 +74,11 @@ public: void invalidateCache(); uint32_t getBlockSize() const { return BlockSize; } - uint32_t getNumBlocks() const { return NumBlocks; } + uint32_t getNumBlocks() const { return StreamLayout.Blocks.size(); } uint32_t getStreamLength() const { return StreamLayout.Length; } protected: - MappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks, - const MSFStreamLayout &StreamLayout, + MappedBlockStream(uint32_t BlockSize, const MSFStreamLayout &StreamLayout, BinaryStreamRef MsfData); private: @@ -91,7 +90,6 @@ private: ArrayRef &Buffer); const uint32_t BlockSize; - const uint32_t NumBlocks; const MSFStreamLayout StreamLayout; BinaryStreamRef MsfData; @@ -103,8 +101,8 @@ private: class WritableMappedBlockStream : public WritableBinaryStream { public: static std::unique_ptr - createStream(uint32_t BlockSize, uint32_t NumBlocks, - const MSFStreamLayout &Layout, WritableBinaryStreamRef MsfData); + createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, + WritableBinaryStreamRef MsfData); static std::unique_ptr createIndexedStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData, @@ -139,7 +137,7 @@ public: uint32_t getStreamLength() const { return ReadInterface.getStreamLength(); } protected: - WritableMappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks, + WritableMappedBlockStream(uint32_t BlockSize, const MSFStreamLayout &StreamLayout, WritableBinaryStreamRef MsfData); diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h index bcac182e214..e116f314ac0 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/DbiStreamBuilder.h @@ -82,6 +82,7 @@ private: Error finalize(); uint32_t calculateModiSubstreamSize() const; + uint32_t calculateNamesOffset() const; uint32_t calculateSectionContribsStreamSize() const; uint32_t calculateSectionMapStreamSize() const; uint32_t calculateFileInfoSubstreamSize() const; diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h index bfd38b6c80e..196ba4d6ffb 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/PDBTypeServerHandler.h @@ -11,8 +11,7 @@ #define LLVM_DEBUGINFO_PDB_PDBTYPESERVERHANDLER_H #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeServerHandler.h" #include "llvm/DebugInfo/PDB/Native/NativeSession.h" @@ -39,7 +38,7 @@ private: bool RevisitAlways; std::unique_ptr Session; - SmallVector, 4> SearchPaths; + StringSet<> SearchPaths; }; } } diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h index c5549983ed4..17fba9991c2 100644 --- a/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h +++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Native/TpiStream.h @@ -21,6 +21,9 @@ #include "llvm/Support/Error.h" namespace llvm { +namespace codeview { +class LazyRandomTypeCollection; +} namespace msf { class MappedBlockStream; } @@ -53,12 +56,16 @@ public: codeview::CVTypeRange types(bool *HadError) const; const codeview::CVTypeArray &typeArray() const { return TypeRecords; } + codeview::LazyRandomTypeCollection &typeCollection() { return *Types; } + Error commit(); private: const PDBFile &Pdb; std::unique_ptr Stream; + std::unique_ptr Types; + codeview::CVTypeArray TypeRecords; std::unique_ptr HashStream; diff --git a/contrib/llvm/include/llvm/IR/Attributes.h b/contrib/llvm/include/llvm/IR/Attributes.h index d4a896c0186..ace309ed95a 100644 --- a/contrib/llvm/include/llvm/IR/Attributes.h +++ b/contrib/llvm/include/llvm/IR/Attributes.h @@ -322,7 +322,7 @@ template <> struct DenseMapInfo { /// the AttributeList object. The function attributes are at index /// `AttributeList::FunctionIndex', the return value is at index /// `AttributeList::ReturnIndex', and the attributes for the parameters start at -/// index `1'. +/// index `AttributeList::FirstArgIndex'. class AttributeList { public: enum AttrIndex : unsigned { @@ -347,8 +347,8 @@ public: /// \brief Create an AttributeList with the specified parameters in it. static AttributeList get(LLVMContext &C, ArrayRef> Attrs); - static AttributeList - get(LLVMContext &C, ArrayRef> Attrs); + static AttributeList get(LLVMContext &C, + ArrayRef> Attrs); /// \brief Create an AttributeList from attribute sets for a function, its /// return value, and all of its arguments. @@ -356,13 +356,11 @@ public: AttributeSet RetAttrs, ArrayRef ArgAttrs); - static AttributeList - getImpl(LLVMContext &C, - ArrayRef> Attrs); - private: explicit AttributeList(AttributeListImpl *LI) : pImpl(LI) {} + static AttributeList getImpl(LLVMContext &C, ArrayRef AttrSets); + public: AttributeList() = default; @@ -521,39 +519,31 @@ public: /// \brief Return the attributes at the index as a string. std::string getAsString(unsigned Index, bool InAttrGrp = false) const; - using iterator = ArrayRef::iterator; + //===--------------------------------------------------------------------===// + // AttributeList Introspection + //===--------------------------------------------------------------------===// - iterator begin(unsigned Slot) const; - iterator end(unsigned Slot) const; + typedef const AttributeSet *iterator; + iterator begin() const; + iterator end() const; + + unsigned getNumAttrSets() const; + + /// Use these to iterate over the valid attribute indices. + unsigned index_begin() const { return AttributeList::FunctionIndex; } + unsigned index_end() const { return getNumAttrSets() - 1; } /// operator==/!= - Provide equality predicates. bool operator==(const AttributeList &RHS) const { return pImpl == RHS.pImpl; } bool operator!=(const AttributeList &RHS) const { return pImpl != RHS.pImpl; } - //===--------------------------------------------------------------------===// - // AttributeList Introspection - //===--------------------------------------------------------------------===// - /// \brief Return a raw pointer that uniquely identifies this attribute list. void *getRawPointer() const { return pImpl; } /// \brief Return true if there are no attributes. - bool isEmpty() const { - return getNumSlots() == 0; - } - - /// \brief Return the number of slots used in this attribute list. This is - /// the number of arguments that have an attribute set on them (including the - /// function itself). - unsigned getNumSlots() const; - - /// \brief Return the index for the given slot. - unsigned getSlotIndex(unsigned Slot) const; - - /// \brief Return the attributes at the given slot. - AttributeSet getSlotAttributes(unsigned Slot) const; + bool isEmpty() const { return pImpl == nullptr; } void dump() const; }; diff --git a/contrib/llvm/include/llvm/IR/BasicBlock.h b/contrib/llvm/include/llvm/IR/BasicBlock.h index c917b1f2cad..235cb57cfd0 100644 --- a/contrib/llvm/include/llvm/IR/BasicBlock.h +++ b/contrib/llvm/include/llvm/IR/BasicBlock.h @@ -33,6 +33,7 @@ class Function; class LandingPadInst; class LLVMContext; class Module; +class PHINode; class TerminatorInst; class ValueSymbolTable; @@ -261,6 +262,50 @@ public: inline const Instruction &back() const { return InstList.back(); } inline Instruction &back() { return InstList.back(); } + /// Iterator to walk just the phi nodes in the basic block. + template + class phi_iterator_impl + : public iterator_facade_base, + std::forward_iterator_tag, PHINodeT> { + friend BasicBlock; + + PHINodeT *PN; + + phi_iterator_impl(PHINodeT *PN) : PN(PN) {} + + public: + // Allow default construction to build variables, but this doesn't build + // a useful iterator. + phi_iterator_impl() = default; + + // Allow conversion between instantiations where valid. + template + phi_iterator_impl(const phi_iterator_impl &Arg) + : PN(Arg.PN) {} + + bool operator==(const phi_iterator_impl &Arg) const { return PN == Arg.PN; } + + PHINodeT &operator*() const { return *PN; } + + using phi_iterator_impl::iterator_facade_base::operator++; + phi_iterator_impl &operator++() { + assert(PN && "Cannot increment the end iterator!"); + PN = dyn_cast(std::next(BBIteratorT(PN))); + return *this; + } + }; + typedef phi_iterator_impl<> phi_iterator; + typedef phi_iterator_impl + const_phi_iterator; + + /// Returns a range that iterates over the phis in the basic block. + /// + /// Note that this cannot be used with basic blocks that have no terminator. + iterator_range phis() const { + return const_cast(this)->phis(); + } + iterator_range phis(); + /// \brief Return the underlying instruction list container. /// /// Currently you need to access the underlying instruction list container diff --git a/contrib/llvm/include/llvm/IR/IntrinsicInst.h b/contrib/llvm/include/llvm/IR/IntrinsicInst.h index 05e3315cbab..2ae98d9e35b 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicInst.h +++ b/contrib/llvm/include/llvm/IR/IntrinsicInst.h @@ -171,6 +171,7 @@ namespace llvm { ebStrict }; + bool isUnaryOp() const; RoundingMode getRoundingMode() const; ExceptionBehavior getExceptionBehavior() const; @@ -182,6 +183,18 @@ namespace llvm { case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: return true; default: return false; } diff --git a/contrib/llvm/include/llvm/IR/Intrinsics.td b/contrib/llvm/include/llvm/IR/Intrinsics.td index 19f6045568f..291d16fb0d9 100644 --- a/contrib/llvm/include/llvm/IR/Intrinsics.td +++ b/contrib/llvm/include/llvm/IR/Intrinsics.td @@ -489,8 +489,64 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + + // These intrinsics are sensitive to the rounding mode so we need constrained + // versions of each of them. When strict rounding and exception control are + // not required the non-constrained versions of these intrinsics should be + // used. + def int_experimental_constrained_sqrt : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_powi : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_i32_ty, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_sin : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_cos : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_pow : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_log : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_log10: Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_log2 : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_exp : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_exp2 : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_rint : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_nearbyint : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; } -// FIXME: Add intrinsic for fcmp, fptrunc, fpext, fptoui and fptosi. +// FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi. +// FIXME: Add intrinsics for fabs, copysign, floor, ceil, trunc and round? //===------------------------- Expect Intrinsics --------------------------===// diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index d7413fe9e56..e1928546607 100644 --- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -566,6 +566,16 @@ def int_amdgcn_s_getreg : [IntrReadMem, IntrSpeculatable] >; +// int_amdgcn_s_getpc is provided to allow a specific style of position +// independent code to determine the high part of its address when it is +// known (through convention) that the code and any data of interest does +// not cross a 4Gb address boundary. Use for any other purpose may not +// produce the desired results as optimizations may cause code movement, +// especially as we explicitly use IntrNoMem to allow optimizations. +def int_amdgcn_s_getpc : + GCCBuiltin<"__builtin_amdgcn_s_getpc">, + Intrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>; + // __builtin_amdgcn_interp_mov , , , // param values: 0 = P10, 1 = P20, 2 = P0 def int_amdgcn_interp_mov : diff --git a/contrib/llvm/include/llvm/IR/Metadata.h b/contrib/llvm/include/llvm/IR/Metadata.h index 92f701e01ff..3c753260190 100644 --- a/contrib/llvm/include/llvm/IR/Metadata.h +++ b/contrib/llvm/include/llvm/IR/Metadata.h @@ -1223,6 +1223,7 @@ public: // FIXME: Fix callers and remove condition on N. unsigned size() const { return N ? N->getNumOperands() : 0u; } + bool empty() const { return N ? N->getNumOperands() == 0 : true; } T *operator[](unsigned I) const { return cast_or_null(N->getOperand(I)); } // FIXME: Fix callers and remove condition on N. diff --git a/contrib/llvm/include/llvm/IR/Module.h b/contrib/llvm/include/llvm/IR/Module.h index 3024d9e27a2..5e1f680c5b3 100644 --- a/contrib/llvm/include/llvm/IR/Module.h +++ b/contrib/llvm/include/llvm/IR/Module.h @@ -139,9 +139,12 @@ public: /// during the append operation. AppendUnique = 6, + /// Takes the max of the two values, which are required to be integers. + Max = 7, + // Markers: ModFlagBehaviorFirstVal = Error, - ModFlagBehaviorLastVal = AppendUnique + ModFlagBehaviorLastVal = Max }; /// Checks if Metadata represents a valid ModFlagBehavior, and stores the diff --git a/contrib/llvm/include/llvm/InitializePasses.h b/contrib/llvm/include/llvm/InitializePasses.h index 3df5244a0bd..3ca21c15577 100644 --- a/contrib/llvm/include/llvm/InitializePasses.h +++ b/contrib/llvm/include/llvm/InitializePasses.h @@ -144,6 +144,7 @@ void initializeGCMachineCodeAnalysisPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); void initializeGCOVProfilerLegacyPassPass(PassRegistry&); void initializeGVNHoistLegacyPassPass(PassRegistry&); +void initializeGVNSinkLegacyPassPass(PassRegistry&); void initializeGVNLegacyPassPass(PassRegistry&); void initializeGlobalDCELegacyPassPass(PassRegistry&); void initializeGlobalMergePass(PassRegistry&); @@ -193,6 +194,7 @@ void initializeLiveVariablesPass(PassRegistry&); void initializeLoadCombinePass(PassRegistry&); void initializeLoadStoreVectorizerPass(PassRegistry&); void initializeLoaderPassPass(PassRegistry&); +void initializeLocalizerPass(PassRegistry&); void initializeLocalStackSlotPassPass(PassRegistry&); void initializeLoopAccessLegacyAnalysisPass(PassRegistry&); void initializeLoopDataPrefetchLegacyPassPass(PassRegistry&); diff --git a/contrib/llvm/include/llvm/LTO/Config.h b/contrib/llvm/include/llvm/LTO/Config.h index ede6637dfa4..5ba8492db8f 100644 --- a/contrib/llvm/include/llvm/LTO/Config.h +++ b/contrib/llvm/include/llvm/LTO/Config.h @@ -39,7 +39,7 @@ struct Config { std::string CPU; TargetOptions Options; std::vector MAttrs; - Reloc::Model RelocModel = Reloc::PIC_; + Optional RelocModel = Reloc::PIC_; CodeModel::Model CodeModel = CodeModel::Default; CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default; TargetMachine::CodeGenFileType CGFileType = TargetMachine::CGFT_ObjectFile; diff --git a/contrib/llvm/include/llvm/Object/Binary.h b/contrib/llvm/include/llvm/Object/Binary.h index cf5d93ee9ed..3f5a233c1ee 100644 --- a/contrib/llvm/include/llvm/Object/Binary.h +++ b/contrib/llvm/include/llvm/Object/Binary.h @@ -95,9 +95,7 @@ public: return TypeID > ID_StartObjects && TypeID < ID_EndObjects; } - bool isSymbolic() const { - return isIR() || isObject(); - } + bool isSymbolic() const { return isIR() || isObject() || isCOFFImportFile(); } bool isArchive() const { return TypeID == ID_Archive; diff --git a/contrib/llvm/include/llvm/Object/COFF.h b/contrib/llvm/include/llvm/Object/COFF.h index 8b9b4973717..dafd1a43cb5 100644 --- a/contrib/llvm/include/llvm/Object/COFF.h +++ b/contrib/llvm/include/llvm/Object/COFF.h @@ -782,6 +782,7 @@ protected: std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const override; uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const override; diff --git a/contrib/llvm/include/llvm/Object/ELFObjectFile.h b/contrib/llvm/include/llvm/Object/ELFObjectFile.h index d8b58b8079f..ef2abd8c52c 100644 --- a/contrib/llvm/include/llvm/Object/ELFObjectFile.h +++ b/contrib/llvm/include/llvm/Object/ELFObjectFile.h @@ -235,6 +235,7 @@ protected: std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const override; uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const override; @@ -645,6 +646,17 @@ uint64_t ELFObjectFile::getSectionAddress(DataRefImpl Sec) const { return getSection(Sec)->sh_addr; } +template +uint64_t ELFObjectFile::getSectionIndex(DataRefImpl Sec) const { + auto SectionsOrErr = EF.sections(); + handleAllErrors(std::move(SectionsOrErr.takeError()), + [](const ErrorInfoBase &) { + llvm_unreachable("unable to get section index"); + }); + const Elf_Shdr *First = SectionsOrErr->begin(); + return getSection(Sec) - First; +} + template uint64_t ELFObjectFile::getSectionSize(DataRefImpl Sec) const { return getSection(Sec)->sh_size; diff --git a/contrib/llvm/include/llvm/Object/MachO.h b/contrib/llvm/include/llvm/Object/MachO.h index 29553558f72..a4356d5977b 100644 --- a/contrib/llvm/include/llvm/Object/MachO.h +++ b/contrib/llvm/include/llvm/Object/MachO.h @@ -290,6 +290,7 @@ public: std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const override; uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const override; diff --git a/contrib/llvm/include/llvm/Object/ObjectFile.h b/contrib/llvm/include/llvm/Object/ObjectFile.h index 9a7bc618ffd..ea6a9049bc1 100644 --- a/contrib/llvm/include/llvm/Object/ObjectFile.h +++ b/contrib/llvm/include/llvm/Object/ObjectFile.h @@ -95,6 +95,7 @@ public: std::error_code getName(StringRef &Result) const; uint64_t getAddress() const; + uint64_t getIndex() const; uint64_t getSize() const; std::error_code getContents(StringRef &Result) const; @@ -222,6 +223,7 @@ protected: virtual std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const = 0; virtual uint64_t getSectionAddress(DataRefImpl Sec) const = 0; + virtual uint64_t getSectionIndex(DataRefImpl Sec) const = 0; virtual uint64_t getSectionSize(DataRefImpl Sec) const = 0; virtual std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const = 0; @@ -393,6 +395,10 @@ inline uint64_t SectionRef::getAddress() const { return OwningObject->getSectionAddress(SectionPimpl); } +inline uint64_t SectionRef::getIndex() const { + return OwningObject->getSectionIndex(SectionPimpl); +} + inline uint64_t SectionRef::getSize() const { return OwningObject->getSectionSize(SectionPimpl); } diff --git a/contrib/llvm/include/llvm/Object/RelocVisitor.h b/contrib/llvm/include/llvm/Object/RelocVisitor.h index 86579b7c3e3..348179860f3 100644 --- a/contrib/llvm/include/llvm/Object/RelocVisitor.h +++ b/contrib/llvm/include/llvm/Object/RelocVisitor.h @@ -40,13 +40,13 @@ public: // TODO: Should handle multiple applied relocations via either passing in the // previously computed value or just count paired relocations as a single // visit. - uint64_t visit(uint32_t RelocType, RelocationRef R, uint64_t Value = 0) { + uint64_t visit(uint32_t Rel, RelocationRef R, uint64_t Value = 0) { if (isa(ObjToVisit)) - return visitELF(RelocType, R, Value); + return visitELF(Rel, R, Value); if (isa(ObjToVisit)) - return visitCOFF(RelocType, R, Value); + return visitCOFF(Rel, R, Value); if (isa(ObjToVisit)) - return visitMachO(RelocType, R, Value); + return visitMachO(Rel, R, Value); HasError = true; return 0; @@ -58,214 +58,60 @@ private: const ObjectFile &ObjToVisit; bool HasError = false; - uint64_t visitELF(uint32_t RelocType, RelocationRef R, uint64_t Value) { + uint64_t visitELF(uint32_t Rel, RelocationRef R, uint64_t Value) { if (ObjToVisit.getBytesInAddress() == 8) { // 64-bit object file switch (ObjToVisit.getArch()) { case Triple::x86_64: - switch (RelocType) { - case ELF::R_X86_64_NONE: - return visitELF_X86_64_NONE(R); - case ELF::R_X86_64_64: - return visitELF_X86_64_64(R, Value); - case ELF::R_X86_64_PC32: - return visitELF_X86_64_PC32(R, Value); - case ELF::R_X86_64_32: - return visitELF_X86_64_32(R, Value); - case ELF::R_X86_64_32S: - return visitELF_X86_64_32S(R, Value); - default: - HasError = true; - return 0; - } + return visitX86_64(Rel, R, Value); case Triple::aarch64: case Triple::aarch64_be: - switch (RelocType) { - case ELF::R_AARCH64_ABS32: - return visitELF_AARCH64_ABS32(R, Value); - case ELF::R_AARCH64_ABS64: - return visitELF_AARCH64_ABS64(R, Value); - default: - HasError = true; - return 0; - } + return visitAarch64(Rel, R, Value); case Triple::bpfel: case Triple::bpfeb: - switch (RelocType) { - case ELF::R_BPF_64_64: - return visitELF_BPF_64_64(R, Value); - case ELF::R_BPF_64_32: - return visitELF_BPF_64_32(R, Value); - default: - HasError = true; - return 0; - } + return visitBpf(Rel, R, Value); case Triple::mips64el: case Triple::mips64: - switch (RelocType) { - case ELF::R_MIPS_32: - return visitELF_MIPS64_32(R, Value); - case ELF::R_MIPS_64: - return visitELF_MIPS64_64(R, Value); - default: - HasError = true; - return 0; - } + return visitMips64(Rel, R, Value); case Triple::ppc64le: case Triple::ppc64: - switch (RelocType) { - case ELF::R_PPC64_ADDR32: - return visitELF_PPC64_ADDR32(R, Value); - case ELF::R_PPC64_ADDR64: - return visitELF_PPC64_ADDR64(R, Value); - default: - HasError = true; - return 0; - } + return visitPPC64(Rel, R, Value); case Triple::systemz: - switch (RelocType) { - case ELF::R_390_32: - return visitELF_390_32(R, Value); - case ELF::R_390_64: - return visitELF_390_64(R, Value); - default: - HasError = true; - return 0; - } + return visitSystemz(Rel, R, Value); case Triple::sparcv9: - switch (RelocType) { - case ELF::R_SPARC_32: - case ELF::R_SPARC_UA32: - return visitELF_SPARCV9_32(R, Value); - case ELF::R_SPARC_64: - case ELF::R_SPARC_UA64: - return visitELF_SPARCV9_64(R, Value); - default: - HasError = true; - return 0; - } + return visitSparc64(Rel, R, Value); case Triple::amdgcn: - switch (RelocType) { - case ELF::R_AMDGPU_ABS32: - return visitELF_AMDGPU_ABS32(R, Value); - case ELF::R_AMDGPU_ABS64: - return visitELF_AMDGPU_ABS64(R, Value); - default: - HasError = true; - return 0; - } + return visitAmdgpu(Rel, R, Value); default: HasError = true; return 0; } - } else if (ObjToVisit.getBytesInAddress() == 4) { // 32-bit object file - switch (ObjToVisit.getArch()) { - case Triple::x86: - switch (RelocType) { - case ELF::R_386_NONE: - return visitELF_386_NONE(R); - case ELF::R_386_32: - return visitELF_386_32(R, Value); - case ELF::R_386_PC32: - return visitELF_386_PC32(R, Value); - default: - HasError = true; - return 0; - } - case Triple::ppc: - switch (RelocType) { - case ELF::R_PPC_ADDR32: - return visitELF_PPC_ADDR32(R, Value); - default: - HasError = true; - return 0; - } - case Triple::arm: - case Triple::armeb: - switch (RelocType) { - default: - HasError = true; - return 0; - case ELF::R_ARM_ABS32: - return visitELF_ARM_ABS32(R, Value); - } - case Triple::lanai: - switch (RelocType) { - case ELF::R_LANAI_32: - return visitELF_Lanai_32(R, Value); - default: - HasError = true; - return 0; - } - case Triple::mipsel: - case Triple::mips: - switch (RelocType) { - case ELF::R_MIPS_32: - return visitELF_MIPS_32(R, Value); - default: - HasError = true; - return 0; - } - case Triple::sparc: - switch (RelocType) { - case ELF::R_SPARC_32: - case ELF::R_SPARC_UA32: - return visitELF_SPARC_32(R, Value); - default: - HasError = true; - return 0; - } - case Triple::hexagon: - switch (RelocType) { - case ELF::R_HEX_32: - return visitELF_HEX_32(R, Value); - default: - HasError = true; - return 0; - } - default: - HasError = true; - return 0; - } - } else { - report_fatal_error("Invalid word size in object file"); } - } - uint64_t visitCOFF(uint32_t RelocType, RelocationRef R, uint64_t Value) { + // 32-bit object file + assert(ObjToVisit.getBytesInAddress() == 4 && + "Invalid word size in object file"); + switch (ObjToVisit.getArch()) { case Triple::x86: - switch (RelocType) { - case COFF::IMAGE_REL_I386_SECREL: - return visitCOFF_I386_SECREL(R, Value); - case COFF::IMAGE_REL_I386_DIR32: - return visitCOFF_I386_DIR32(R, Value); - } - break; - case Triple::x86_64: - switch (RelocType) { - case COFF::IMAGE_REL_AMD64_SECREL: - return visitCOFF_AMD64_SECREL(R, Value); - case COFF::IMAGE_REL_AMD64_ADDR64: - return visitCOFF_AMD64_ADDR64(R, Value); - } - break; + return visitX86(Rel, R, Value); + case Triple::ppc: + return visitPPC32(Rel, R, Value); + case Triple::arm: + case Triple::armeb: + return visitARM(Rel, R, Value); + case Triple::lanai: + return visitLanai(Rel, R, Value); + case Triple::mipsel: + case Triple::mips: + return visitMips32(Rel, R, Value); + case Triple::sparc: + return visitSparc32(Rel, R, Value); + case Triple::hexagon: + return visitHexagon(Rel, R, Value); + default: + HasError = true; + return 0; } - HasError = true; - return 0; - } - - uint64_t visitMachO(uint32_t RelocType, RelocationRef R, uint64_t Value) { - switch (ObjToVisit.getArch()) { - default: break; - case Triple::x86_64: - switch (RelocType) { - default: break; - case MachO::X86_64_RELOC_UNSIGNED: - return visitMACHO_X86_64_UNSIGNED(R, Value); - } - } - HasError = true; - return 0; } int64_t getELFAddend(RelocationRef R) { @@ -275,176 +121,193 @@ private: return *AddendOrErr; } - /// Operations - - /// 386-ELF - uint64_t visitELF_386_NONE(RelocationRef R) { + uint64_t visitX86_64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_X86_64_NONE: + return 0; + case ELF::R_X86_64_64: + return Value + getELFAddend(R); + case ELF::R_X86_64_PC32: + return Value + getELFAddend(R) - R.getOffset(); + case ELF::R_X86_64_32: + case ELF::R_X86_64_32S: + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + } + HasError = true; return 0; } - // Ideally the Addend here will be the addend in the data for - // the relocation. It's not actually the case for Rel relocations. - uint64_t visitELF_386_32(RelocationRef R, uint64_t Value) { - return Value; - } - - uint64_t visitELF_386_PC32(RelocationRef R, uint64_t Value) { - return Value - R.getOffset(); - } - - /// X86-64 ELF - uint64_t visitELF_X86_64_NONE(RelocationRef R) { + uint64_t visitAarch64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_AARCH64_ABS32: { + int64_t Res = Value + getELFAddend(R); + if (Res < INT32_MIN || Res > UINT32_MAX) + HasError = true; + return static_cast(Res); + } + case ELF::R_AARCH64_ABS64: + return Value + getELFAddend(R); + } + HasError = true; return 0; } - uint64_t visitELF_X86_64_64(RelocationRef R, uint64_t Value) { - return Value + getELFAddend(R); + uint64_t visitBpf(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_BPF_64_32: + return Value & 0xFFFFFFFF; + case ELF::R_BPF_64_64: + return Value; + } + HasError = true; + return 0; } - uint64_t visitELF_X86_64_PC32(RelocationRef R, uint64_t Value) { - return Value + getELFAddend(R) - R.getOffset(); + uint64_t visitMips64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_MIPS_32: + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + case ELF::R_MIPS_64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - uint64_t visitELF_X86_64_32(RelocationRef R, uint64_t Value) { - return (Value + getELFAddend(R)) & 0xFFFFFFFF; + uint64_t visitPPC64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_PPC64_ADDR32: + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + case ELF::R_PPC64_ADDR64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - uint64_t visitELF_X86_64_32S(RelocationRef R, uint64_t Value) { - return (Value + getELFAddend(R)) & 0xFFFFFFFF; + uint64_t visitSystemz(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_390_32: { + int64_t Res = Value + getELFAddend(R); + if (Res < INT32_MIN || Res > UINT32_MAX) + HasError = true; + return static_cast(Res); + } + case ELF::R_390_64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - /// BPF ELF - uint64_t visitELF_BPF_64_32(RelocationRef R, uint64_t Value) { - return Value & 0xFFFFFFFF; + uint64_t visitSparc64(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_SPARC_32: + case ELF::R_SPARC_64: + case ELF::R_SPARC_UA32: + case ELF::R_SPARC_UA64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - uint64_t visitELF_BPF_64_64(RelocationRef R, uint64_t Value) { - return Value; + uint64_t visitAmdgpu(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_AMDGPU_ABS32: + case ELF::R_AMDGPU_ABS64: + return Value + getELFAddend(R); + } + HasError = true; + return 0; } - /// PPC64 ELF - uint64_t visitELF_PPC64_ADDR32(RelocationRef R, uint64_t Value) { - return (Value + getELFAddend(R)) & 0xFFFFFFFF; + uint64_t visitX86(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (Rel) { + case ELF::R_386_NONE: + return 0; + case ELF::R_386_32: + return Value; + case ELF::R_386_PC32: + return Value - R.getOffset(); + } + HasError = true; + return 0; } - uint64_t visitELF_PPC64_ADDR64(RelocationRef R, uint64_t Value) { - return Value + getELFAddend(R); + uint64_t visitPPC32(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_PPC_ADDR32) + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + HasError = true; + return 0; } - /// PPC32 ELF - uint64_t visitELF_PPC_ADDR32(RelocationRef R, uint64_t Value) { - return (Value + getELFAddend(R)) & 0xFFFFFFFF; + uint64_t visitARM(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_ARM_ABS32) { + if ((int64_t)Value < INT32_MIN || (int64_t)Value > UINT32_MAX) + HasError = true; + return static_cast(Value); + } + HasError = true; + return 0; } - /// Lanai ELF - uint64_t visitELF_Lanai_32(RelocationRef R, uint64_t Value) { - return (Value + getELFAddend(R)) & 0xFFFFFFFF; + uint64_t visitLanai(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_LANAI_32) + return (Value + getELFAddend(R)) & 0xFFFFFFFF; + HasError = true; + return 0; } - /// MIPS ELF - uint64_t visitELF_MIPS_32(RelocationRef R, uint64_t Value) { - return Value & 0xFFFFFFFF; + uint64_t visitMips32(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_MIPS_32) + return Value & 0xFFFFFFFF; + HasError = true; + return 0; } - /// MIPS64 ELF - uint64_t visitELF_MIPS64_32(RelocationRef R, uint64_t Value) { - return (Value + getELFAddend(R)) & 0xFFFFFFFF; + uint64_t visitSparc32(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_SPARC_32 || Rel == ELF::R_SPARC_UA32) + return Value + getELFAddend(R); + HasError = true; + return 0; } - uint64_t visitELF_MIPS64_64(RelocationRef R, uint64_t Value) { - return Value + getELFAddend(R); + uint64_t visitHexagon(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (Rel == ELF::R_HEX_32) + return Value + getELFAddend(R); + HasError = true; + return 0; } - // AArch64 ELF - uint64_t visitELF_AARCH64_ABS32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - int64_t Res = Value + Addend; - - // Overflow check allows for both signed and unsigned interpretation. - if (Res < INT32_MIN || Res > UINT32_MAX) - HasError = true; - - return static_cast(Res); + uint64_t visitCOFF(uint32_t Rel, RelocationRef R, uint64_t Value) { + switch (ObjToVisit.getArch()) { + case Triple::x86: + switch (Rel) { + case COFF::IMAGE_REL_I386_SECREL: + case COFF::IMAGE_REL_I386_DIR32: + return static_cast(Value); + } + break; + case Triple::x86_64: + switch (Rel) { + case COFF::IMAGE_REL_AMD64_SECREL: + return static_cast(Value); + case COFF::IMAGE_REL_AMD64_ADDR64: + return Value; + } + break; + } + HasError = true; + return 0; } - uint64_t visitELF_AARCH64_ABS64(RelocationRef R, uint64_t Value) { - return Value + getELFAddend(R); - } - - // SystemZ ELF - uint64_t visitELF_390_32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - int64_t Res = Value + Addend; - - // Overflow check allows for both signed and unsigned interpretation. - if (Res < INT32_MIN || Res > UINT32_MAX) - HasError = true; - - return static_cast(Res); - } - - uint64_t visitELF_390_64(RelocationRef R, uint64_t Value) { - return Value + getELFAddend(R); - } - - uint64_t visitELF_SPARC_32(RelocationRef R, uint32_t Value) { - return Value + getELFAddend(R); - } - - uint64_t visitELF_SPARCV9_32(RelocationRef R, uint64_t Value) { - return Value + getELFAddend(R); - } - - uint64_t visitELF_SPARCV9_64(RelocationRef R, uint64_t Value) { - return Value + getELFAddend(R); - } - - uint64_t visitELF_ARM_ABS32(RelocationRef R, uint64_t Value) { - int64_t Res = Value; - - // Overflow check allows for both signed and unsigned interpretation. - if (Res < INT32_MIN || Res > UINT32_MAX) - HasError = true; - - return static_cast(Res); - } - - uint64_t visitELF_HEX_32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return Value + Addend; - } - - uint64_t visitELF_AMDGPU_ABS32(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return Value + Addend; - } - - uint64_t visitELF_AMDGPU_ABS64(RelocationRef R, uint64_t Value) { - int64_t Addend = getELFAddend(R); - return Value + Addend; - } - - /// I386 COFF - uint64_t visitCOFF_I386_SECREL(RelocationRef R, uint64_t Value) { - return static_cast(Value); - } - - uint64_t visitCOFF_I386_DIR32(RelocationRef R, uint64_t Value) { - return static_cast(Value); - } - - /// AMD64 COFF - uint64_t visitCOFF_AMD64_SECREL(RelocationRef R, uint64_t Value) { - return static_cast(Value); - } - - uint64_t visitCOFF_AMD64_ADDR64(RelocationRef R, uint64_t Value) { - return Value; - } - - // X86_64 MachO - uint64_t visitMACHO_X86_64_UNSIGNED(RelocationRef R, uint64_t Value) { - return Value; + uint64_t visitMachO(uint32_t Rel, RelocationRef R, uint64_t Value) { + if (ObjToVisit.getArch() == Triple::x86_64 && + Rel == MachO::X86_64_RELOC_UNSIGNED) + return Value; + HasError = true; + return 0; } }; diff --git a/contrib/llvm/include/llvm/Object/Wasm.h b/contrib/llvm/include/llvm/Object/Wasm.h index d200d4a148e..de54a4928cc 100644 --- a/contrib/llvm/include/llvm/Object/Wasm.h +++ b/contrib/llvm/include/llvm/Object/Wasm.h @@ -119,6 +119,7 @@ public: std::error_code getSectionName(DataRefImpl Sec, StringRef &Res) const override; uint64_t getSectionAddress(DataRefImpl Sec) const override; + uint64_t getSectionIndex(DataRefImpl Sec) const override; uint64_t getSectionSize(DataRefImpl Sec) const override; std::error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const override; diff --git a/contrib/llvm/include/llvm/Option/OptTable.h b/contrib/llvm/include/llvm/Option/OptTable.h index 390e52774fe..8a323a255ca 100644 --- a/contrib/llvm/include/llvm/Option/OptTable.h +++ b/contrib/llvm/include/llvm/Option/OptTable.h @@ -113,6 +113,14 @@ public: return getInfo(id).MetaVar; } + /// Find flags from OptTable which starts with Cur. + /// + /// \param [in] Cur - String prefix that all returned flags need + // to start with. + /// + /// \return The vector of flags which start with Cur. + std::vector findByPrefix(StringRef Cur) const; + /// \brief Parse a single argument; returning the new argument and /// updating Index. /// diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProf.h b/contrib/llvm/include/llvm/ProfileData/InstrProf.h index 1b07c33746e..0dbb2cf9f26 100644 --- a/contrib/llvm/include/llvm/ProfileData/InstrProf.h +++ b/contrib/llvm/include/llvm/ProfileData/InstrProf.h @@ -212,12 +212,12 @@ StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, /// third field is the uncompressed strings; otherwise it is the /// compressed string. When the string compression is off, the /// second field will have value zero. -Error collectPGOFuncNameStrings(const std::vector &NameStrs, +Error collectPGOFuncNameStrings(ArrayRef NameStrs, bool doCompression, std::string &Result); /// Produce \c Result string with the same format described above. The input /// is vector of PGO function name variables that are referenced. -Error collectPGOFuncNameStrings(const std::vector &NameVars, +Error collectPGOFuncNameStrings(ArrayRef NameVars, std::string &Result, bool doCompression = true); /// \c NameStrings is a string composed of one of more sub-strings encoded in @@ -967,7 +967,7 @@ struct Header { } // end namespace RawInstrProf // Parse MemOP Size range option. -void getMemOPSizeRangeFromOption(std::string Str, int64_t &RangeStart, +void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, int64_t &RangeLast); } // end namespace llvm diff --git a/contrib/llvm/include/llvm/TableGen/Record.h b/contrib/llvm/include/llvm/TableGen/Record.h index fef5bf30456..d14a56cb87e 100644 --- a/contrib/llvm/include/llvm/TableGen/Record.h +++ b/contrib/llvm/include/llvm/TableGen/Record.h @@ -671,7 +671,7 @@ public: /// [AL, AH, CL] - Represent a list of defs /// class ListInit final : public TypedInit, public FoldingSetNode, - public TrailingObjects { + public TrailingObjects { unsigned NumValues; public: @@ -1137,17 +1137,19 @@ public: /// to have at least one value then a (possibly empty) list of arguments. Each /// argument can have a name associated with it. /// -class DagInit : public TypedInit, public FoldingSetNode { +class DagInit final : public TypedInit, public FoldingSetNode, + public TrailingObjects { Init *Val; StringInit *ValName; - SmallVector Args; - SmallVector ArgNames; + unsigned NumArgs; + unsigned NumArgNames; - DagInit(Init *V, StringInit *VN, ArrayRef ArgRange, - ArrayRef NameRange) + DagInit(Init *V, StringInit *VN, unsigned NumArgs, unsigned NumArgNames) : TypedInit(IK_DagInit, DagRecTy::get()), Val(V), ValName(VN), - Args(ArgRange.begin(), ArgRange.end()), - ArgNames(NameRange.begin(), NameRange.end()) {} + NumArgs(NumArgs), NumArgNames(NumArgNames) {} + + friend TrailingObjects; + size_t numTrailingObjects(OverloadToken) const { return NumArgs; } public: DagInit(const DagInit &Other) = delete; @@ -1173,20 +1175,24 @@ public: return ValName ? ValName->getValue() : StringRef(); } - unsigned getNumArgs() const { return Args.size(); } + unsigned getNumArgs() const { return NumArgs; } Init *getArg(unsigned Num) const { - assert(Num < Args.size() && "Arg number out of range!"); - return Args[Num]; + assert(Num < NumArgs && "Arg number out of range!"); + return getTrailingObjects()[Num]; } StringInit *getArgName(unsigned Num) const { - assert(Num < ArgNames.size() && "Arg number out of range!"); - return ArgNames[Num]; + assert(Num < NumArgNames && "Arg number out of range!"); + return getTrailingObjects()[Num]; } StringRef getArgNameStr(unsigned Num) const { StringInit *Init = getArgName(Num); return Init ? Init->getValue() : StringRef(); } + ArrayRef getArgNames() const { + return makeArrayRef(getTrailingObjects(), NumArgNames); + } + Init *resolveReferences(Record &R, const RecordVal *RV) const override; std::string getAsString() const override; @@ -1194,20 +1200,20 @@ public: typedef SmallVectorImpl::const_iterator const_arg_iterator; typedef SmallVectorImpl::const_iterator const_name_iterator; - inline const_arg_iterator arg_begin() const { return Args.begin(); } - inline const_arg_iterator arg_end () const { return Args.end(); } + inline const_arg_iterator arg_begin() const { return getTrailingObjects(); } + inline const_arg_iterator arg_end () const { return arg_begin() + NumArgs; } inline iterator_range args() const { return llvm::make_range(arg_begin(), arg_end()); } - inline size_t arg_size () const { return Args.size(); } - inline bool arg_empty() const { return Args.empty(); } + inline size_t arg_size () const { return NumArgs; } + inline bool arg_empty() const { return NumArgs == 0; } - inline const_name_iterator name_begin() const { return ArgNames.begin(); } - inline const_name_iterator name_end () const { return ArgNames.end(); } + inline const_name_iterator name_begin() const { return getTrailingObjects(); } + inline const_name_iterator name_end () const { return name_begin() + NumArgNames; } - inline size_t name_size () const { return ArgNames.size(); } - inline bool name_empty() const { return ArgNames.empty(); } + inline size_t name_size () const { return NumArgNames; } + inline bool name_empty() const { return NumArgNames == 0; } Init *getBit(unsigned Bit) const override { llvm_unreachable("Illegal bit reference off dag"); diff --git a/contrib/llvm/include/llvm/Target/TargetLowering.h b/contrib/llvm/include/llvm/Target/TargetLowering.h index 1ca32d4c358..17182b958ec 100644 --- a/contrib/llvm/include/llvm/Target/TargetLowering.h +++ b/contrib/llvm/include/llvm/Target/TargetLowering.h @@ -405,7 +405,9 @@ public: } /// Returns if it's reasonable to merge stores to MemVT size. - virtual bool canMergeStoresTo(EVT MemVT) const { return true; } + virtual bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT) const { + return true; + } /// \brief Return true if it is cheap to speculate a call to intrinsic cttz. virtual bool isCheapToSpeculateCttz() const { @@ -736,7 +738,7 @@ public: if (VT.isExtended()) return Expand; // If a target-specific SDNode requires legalization, require the target // to provide custom legalization for it. - if (Op > array_lengthof(OpActions[0])) return Custom; + if (Op >= array_lengthof(OpActions[0])) return Custom; return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; } diff --git a/contrib/llvm/include/llvm/Transforms/Scalar.h b/contrib/llvm/include/llvm/Transforms/Scalar.h index ba0a3ee1287..856c288a071 100644 --- a/contrib/llvm/include/llvm/Transforms/Scalar.h +++ b/contrib/llvm/include/llvm/Transforms/Scalar.h @@ -354,6 +354,13 @@ FunctionPass *createEarlyCSEPass(bool UseMemorySSA = false); // FunctionPass *createGVNHoistPass(); +//===----------------------------------------------------------------------===// +// +// GVNSink - This pass uses an "inverted" value numbering to decide the +// similarity of expressions and sinks similar expressions into successors. +// +FunctionPass *createGVNSinkPass(); + //===----------------------------------------------------------------------===// // // MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h b/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h index 8f05e8cdb23..3f97789cabb 100644 --- a/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -68,6 +68,24 @@ public: class ValueTable { DenseMap valueNumbering; DenseMap expressionNumbering; + + // Expressions is the vector of Expression. ExprIdx is the mapping from + // value number to the index of Expression in Expressions. We use it + // instead of a DenseMap because filling such mapping is faster than + // filling a DenseMap and the compile time is a little better. + uint32_t nextExprNumber; + std::vector Expressions; + std::vector ExprIdx; + // Value number to PHINode mapping. Used for phi-translate in scalarpre. + DenseMap NumberingPhi; + // Cache for phi-translate in scalarpre. + typedef DenseMap, uint32_t> + PhiTranslateMap; + PhiTranslateMap PhiTranslateTable; + // Map the block to reversed postorder traversal number. It is used to + // find back edge easily. + DenseMap BlockRPONumber; + AliasAnalysis *AA; MemoryDependenceResults *MD; DominatorTree *DT; @@ -79,6 +97,10 @@ public: Value *LHS, Value *RHS); Expression createExtractvalueExpr(ExtractValueInst *EI); uint32_t lookupOrAddCall(CallInst *C); + uint32_t phiTranslateImpl(const BasicBlock *BB, const BasicBlock *PhiBlock, + uint32_t Num, GVN &Gvn); + std::pair assignExpNewValueNum(Expression &exp); + bool areAllValsInBB(uint32_t num, const BasicBlock *BB, GVN &Gvn); public: ValueTable(); @@ -87,9 +109,12 @@ public: ~ValueTable(); uint32_t lookupOrAdd(Value *V); - uint32_t lookup(Value *V) const; + uint32_t lookup(Value *V, bool Verify = true) const; uint32_t lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Pred, Value *LHS, Value *RHS); + uint32_t phiTranslate(const BasicBlock *BB, const BasicBlock *PhiBlock, + uint32_t Num, GVN &Gvn); + void assignBlockRPONumber(Function &F); bool exists(Value *V) const; void add(Value *V, uint32_t num); void clear(); @@ -238,7 +263,12 @@ struct GVNHoistPass : PassInfoMixin { /// \brief Run the pass over the function. PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; - +/// \brief Uses an "inverted" value numbering to decide the similarity of +/// expressions and sinks similar expressions into successors. +struct GVNSinkPass : PassInfoMixin { + /// \brief Run the pass over the function. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; } #endif diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Local.h b/contrib/llvm/include/llvm/Transforms/Utils/Local.h index b5a5f4c2704..8942111307f 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/Local.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/Local.h @@ -356,6 +356,10 @@ void combineMetadata(Instruction *K, const Instruction *J, ArrayRef Kn /// Unknown metadata is removed. void combineMetadataForCSE(Instruction *K, const Instruction *J); +// Replace each use of 'From' with 'To', if that use does not belong to basic +// block where 'From' is defined. Returns the number of replacements made. +unsigned replaceNonLocalUsesWith(Instruction *From, Value *To); + /// Replace each use of 'From' with 'To' if that use is dominated by /// the given edge. Returns the number of replacements made. unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, @@ -406,6 +410,14 @@ bool recognizeBSwapOrBitReverseIdiom( void maybeMarkSanitizerLibraryCallNoBuiltin(CallInst *CI, const TargetLibraryInfo *TLI); +//===----------------------------------------------------------------------===// +// Transform predicates +// + +/// Given an instruction, is it legal to set operand OpIdx to a non-constant +/// value? +bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx); + } // End llvm namespace #endif diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index 0ca712bbfe7..79517ec6a3a 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -687,11 +687,8 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1, // bits. if (Opc == Instruction::And) { - unsigned BitWidth = DL.getTypeSizeInBits(Op0->getType()->getScalarType()); - KnownBits Known0(BitWidth); - KnownBits Known1(BitWidth); - computeKnownBits(Op0, Known0, DL); - computeKnownBits(Op1, Known1, DL); + KnownBits Known0 = computeKnownBits(Op0, DL); + KnownBits Known1 = computeKnownBits(Op1, DL); if ((Known1.One | Known0.Zero).isAllOnesValue()) { // All the bits of Op0 that the 'and' could be masking are already zero. return Op0; diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index 2e72d5aa826..122442bafb1 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -688,9 +688,7 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (isNUW) return Op0; - unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); - KnownBits Known(BitWidth); - computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); if (Known.Zero.isMaxSignedValue()) { // Op1 is either 0 or the minimum signed value. If the sub is NSW, then // Op1 must be 0 because negating the minimum signed value is undefined. @@ -1309,15 +1307,13 @@ static Value *SimplifyShift(Instruction::BinaryOps Opcode, Value *Op0, // If any bits in the shift amount make that value greater than or equal to // the number of bits in the type, the shift is undefined. - unsigned BitWidth = Op1->getType()->getScalarSizeInBits(); - KnownBits Known(BitWidth); - computeKnownBits(Op1, Known, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); - if (Known.One.getLimitedValue() >= BitWidth) + KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT); + if (Known.One.getLimitedValue() >= Known.getBitWidth()) return UndefValue::get(Op0->getType()); // If all valid bits in the shift amount are known zero, the first operand is // unchanged. - unsigned NumValidShiftBits = Log2_32_Ceil(BitWidth); + unsigned NumValidShiftBits = Log2_32_Ceil(Known.getBitWidth()); if (Known.countMinTrailingZeros() >= NumValidShiftBits) return Op0; @@ -1343,9 +1339,7 @@ static Value *SimplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, // The low bit cannot be shifted out of an exact shift if it is set. if (isExact) { - unsigned BitWidth = Op0->getType()->getScalarSizeInBits(); - KnownBits Op0Known(BitWidth); - computeKnownBits(Op0, Op0Known, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); + KnownBits Op0Known = computeKnownBits(Op0, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); if (Op0Known.One[0]) return Op0; } @@ -1428,6 +1422,8 @@ Value *llvm::SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, return ::SimplifyAShrInst(Op0, Op1, isExact, Q, RecursionLimit); } +/// Commuted variants are assumed to be handled by calling this function again +/// with the parameters swapped. static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp, ICmpInst *UnsignedICmp, bool IsAnd) { Value *X, *Y; @@ -1560,20 +1556,8 @@ static Value *simplifyAndOrOfICmpsWithConstants(ICmpInst *Cmp0, ICmpInst *Cmp1, return nullptr; } -/// Commuted variants are assumed to be handled by calling this function again -/// with the parameters swapped. -static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) - return X; - - if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) - return X; - - if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true)) - return X; - +static Value *simplifyAndOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) { // (icmp (add V, C0), C1) & (icmp V, C0) - Type *ITy = Op0->getType(); ICmpInst::Predicate Pred0, Pred1; const APInt *C0, *C1; Value *V; @@ -1587,6 +1571,7 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { if (AddInst->getOperand(1) != Op1->getOperand(1)) return nullptr; + Type *ITy = Op0->getType(); bool isNSW = AddInst->hasNoSignedWrap(); bool isNUW = AddInst->hasNoUnsignedWrap(); @@ -1617,18 +1602,29 @@ static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { return nullptr; } -/// Commuted variants are assumed to be handled by calling this function again -/// with the parameters swapped. -static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { - if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) +static Value *simplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true)) + return X; + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/true)) return X; - if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1)) + if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1)) + return X; + if (Value *X = simplifyAndOfICmpsWithSameOperands(Op1, Op0)) return X; - if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false)) + if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, true)) return X; + if (Value *X = simplifyAndOfICmpsWithAdd(Op0, Op1)) + return X; + if (Value *X = simplifyAndOfICmpsWithAdd(Op1, Op0)) + return X; + + return nullptr; +} + +static Value *simplifyOrOfICmpsWithAdd(ICmpInst *Op0, ICmpInst *Op1) { // (icmp (add V, C0), C1) | (icmp V, C0) ICmpInst::Predicate Pred0, Pred1; const APInt *C0, *C1; @@ -1674,19 +1670,24 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { return nullptr; } -static Value *simplifyPossiblyCastedAndOrOfICmps(ICmpInst *Cmp0, ICmpInst *Cmp1, - bool IsAnd, CastInst *Cast) { - Value *V = - IsAnd ? simplifyAndOfICmps(Cmp0, Cmp1) : simplifyOrOfICmps(Cmp0, Cmp1); - if (!V) - return nullptr; - if (!Cast) - return V; +static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) { + if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false)) + return X; + if (Value *X = simplifyUnsignedRangeCheck(Op1, Op0, /*IsAnd=*/false)) + return X; - // If we looked through casts, we can only handle a constant simplification - // because we are not allowed to create a cast instruction here. - if (auto *C = dyn_cast(V)) - return ConstantExpr::getCast(Cast->getOpcode(), C, Cast->getType()); + if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1)) + return X; + if (Value *X = simplifyOrOfICmpsWithSameOperands(Op1, Op0)) + return X; + + if (Value *X = simplifyAndOrOfICmpsWithConstants(Op0, Op1, false)) + return X; + + if (Value *X = simplifyOrOfICmpsWithAdd(Op0, Op1)) + return X; + if (Value *X = simplifyOrOfICmpsWithAdd(Op1, Op0)) + return X; return nullptr; } @@ -1706,11 +1707,18 @@ static Value *simplifyAndOrOfICmps(Value *Op0, Value *Op1, bool IsAnd) { if (!Cmp0 || !Cmp1) return nullptr; - if (Value *V = simplifyPossiblyCastedAndOrOfICmps(Cmp0, Cmp1, IsAnd, Cast0)) - return V; - if (Value *V = simplifyPossiblyCastedAndOrOfICmps(Cmp1, Cmp0, IsAnd, Cast0)) + Value *V = + IsAnd ? simplifyAndOfICmps(Cmp0, Cmp1) : simplifyOrOfICmps(Cmp0, Cmp1); + if (!V) + return nullptr; + if (!Cast0) return V; + // If we looked through casts, we can only handle a constant simplification + // because we are not allowed to create a cast instruction here. + if (auto *C = dyn_cast(V)) + return ConstantExpr::getCast(Cast0->getOpcode(), C, Cast0->getType()); + return nullptr; } @@ -1927,37 +1935,27 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q, MaxRecurse)) return V; - // (A & C)|(B & D) - Value *C = nullptr, *D = nullptr; - if (match(Op0, m_And(m_Value(A), m_Value(C))) && - match(Op1, m_And(m_Value(B), m_Value(D)))) { - ConstantInt *C1 = dyn_cast(C); - ConstantInt *C2 = dyn_cast(D); - if (C1 && C2 && (C1->getValue() == ~C2->getValue())) { + // (A & C1)|(B & C2) + const APInt *C1, *C2; + if (match(Op0, m_And(m_Value(A), m_APInt(C1))) && + match(Op1, m_And(m_Value(B), m_APInt(C2)))) { + if (*C1 == ~*C2) { // (A & C1)|(B & C2) // If we have: ((V + N) & C1) | (V & C2) // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 // replace with V+N. - Value *V1, *V2; - if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+ - match(A, m_Add(m_Value(V1), m_Value(V2)))) { + Value *N; + if (C2->isMask() && // C2 == 0+1+ + match(A, m_c_Add(m_Specific(B), m_Value(N)))) { // Add commutes, try both ways. - if (V1 == B && - MaskedValueIsZero(V2, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return A; - if (V2 == B && - MaskedValueIsZero(V1, C2->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (MaskedValueIsZero(N, *C2, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return A; } // Or commutes, try both ways. - if ((C1->getValue() & (C1->getValue() + 1)) == 0 && - match(B, m_Add(m_Value(V1), m_Value(V2)))) { + if (C1->isMask() && + match(B, m_c_Add(m_Specific(A), m_Value(N)))) { // Add commutes, try both ways. - if (V1 == A && - MaskedValueIsZero(V2, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) - return B; - if (V2 == A && - MaskedValueIsZero(V1, C1->getValue(), Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) + if (MaskedValueIsZero(N, *C1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT)) return B; } } @@ -3372,9 +3370,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (ICmpInst::isEquality(Pred)) { const APInt *RHSVal; if (match(RHS, m_APInt(RHSVal))) { - unsigned BitWidth = RHSVal->getBitWidth(); - KnownBits LHSKnown(BitWidth); - computeKnownBits(LHS, LHSKnown, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); + KnownBits LHSKnown = computeKnownBits(LHS, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT); if (LHSKnown.Zero.intersects(*RHSVal) || !LHSKnown.One.isSubsetOf(*RHSVal)) return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy) @@ -3539,6 +3535,10 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, if (V == Op) return RepOp; + // We cannot replace a constant, and shouldn't even try. + if (isa(Op)) + return nullptr; + auto *I = dyn_cast(V); if (!I) return nullptr; @@ -4444,19 +4444,21 @@ static Value *SimplifyIntrinsic(Function *F, IterTy ArgBegin, IterTy ArgEnd, case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: { // X + undef -> undef - if (isa(RHS)) + if (isa(LHS) || isa(RHS)) return UndefValue::get(ReturnType); return nullptr; } case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: { + // 0 * X -> { 0, false } // X * 0 -> { 0, false } - if (match(RHS, m_Zero())) + if (match(LHS, m_Zero()) || match(RHS, m_Zero())) return Constant::getNullValue(ReturnType); + // undef * X -> { 0, false } // X * undef -> { 0, false } - if (match(RHS, m_Undef())) + if (match(LHS, m_Undef()) || match(RHS, m_Undef())) return Constant::getNullValue(ReturnType); return nullptr; @@ -4680,9 +4682,7 @@ Value *llvm::SimplifyInstruction(Instruction *I, const SimplifyQuery &SQ, // In general, it is possible for computeKnownBits to determine all bits in a // value even when the operands are not all constants. if (!Result && I->getType()->isIntOrIntVectorTy()) { - unsigned BitWidth = I->getType()->getScalarSizeInBits(); - KnownBits Known(BitWidth); - computeKnownBits(I, Known, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE); + KnownBits Known = computeKnownBits(I, Q.DL, /*Depth*/ 0, Q.AC, I, Q.DT, ORE); if (Known.isConstant()) Result = ConstantInt::get(I->getType(), Known.getConstant()); } diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index 471ccb62970..e6391792bc2 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -534,9 +534,7 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, VectorType *VecTy = dyn_cast(V->getType()); if (!VecTy) { - unsigned BitWidth = V->getType()->getIntegerBitWidth(); - KnownBits Known(BitWidth); - computeKnownBits(V, Known, DL, 0, AC, dyn_cast(V), DT); + KnownBits Known = computeKnownBits(V, DL, 0, AC, dyn_cast(V), DT); return Known.isZero(); } @@ -550,14 +548,12 @@ static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, // For a vector, KnownZero will only be true if all values are zero, so check // this per component - unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth(); for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { Constant *Elem = C->getAggregateElement(I); if (isa(Elem)) return true; - KnownBits Known(BitWidth); - computeKnownBits(Elem, Known, DL); + KnownBits Known = computeKnownBits(Elem, DL); if (Known.isZero()) return true; } diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp index 0b5f6266e37..e988f6444a5 100644 --- a/contrib/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -73,30 +73,23 @@ LPPassManager::LPPassManager() CurrentLoop = nullptr; } -// Inset loop into loop nest (LoopInfo) and loop queue (LQ). -Loop &LPPassManager::addLoop(Loop *ParentLoop) { - // Create a new loop. LI will take ownership. - Loop *L = new Loop(); - - // Insert into the loop nest and the loop queue. - if (!ParentLoop) { +// Insert loop into loop nest (LoopInfo) and loop queue (LQ). +void LPPassManager::addLoop(Loop &L) { + if (!L.getParentLoop()) { // This is the top level loop. - LI->addTopLevelLoop(L); - LQ.push_front(L); - return *L; + LQ.push_front(&L); + return; } - ParentLoop->addChildLoop(L); // Insert L into the loop queue after the parent loop. for (auto I = LQ.begin(), E = LQ.end(); I != E; ++I) { - if (*I == L->getParentLoop()) { + if (*I == L.getParentLoop()) { // deque does not support insert after. ++I; - LQ.insert(I, 1, L); - break; + LQ.insert(I, 1, &L); + return; } } - return *L; } /// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index 78ded8141c0..d280fda0a16 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -2178,6 +2178,63 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, return Flags; } +bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L, + DominatorTree &DT, LoopInfo &LI) { + if (!isLoopInvariant(S, L)) + return false; + // If a value depends on a SCEVUnknown which is defined after the loop, we + // conservatively assume that we cannot calculate it at the loop's entry. + struct FindDominatedSCEVUnknown { + bool Found = false; + const Loop *L; + DominatorTree &DT; + LoopInfo &LI; + + FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI) + : L(L), DT(DT), LI(LI) {} + + bool checkSCEVUnknown(const SCEVUnknown *SU) { + if (auto *I = dyn_cast(SU->getValue())) { + if (DT.dominates(L->getHeader(), I->getParent())) + Found = true; + else + assert(DT.dominates(I->getParent(), L->getHeader()) && + "No dominance relationship between SCEV and loop?"); + } + return false; + } + + bool follow(const SCEV *S) { + switch (static_cast(S->getSCEVType())) { + case scConstant: + return false; + case scAddRecExpr: + case scTruncate: + case scZeroExtend: + case scSignExtend: + case scAddExpr: + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: + case scUDivExpr: + return true; + case scUnknown: + return checkSCEVUnknown(cast(S)); + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + } + return false; + } + + bool isDone() { return Found; } + }; + + FindDominatedSCEVUnknown FSU(L, DT, LI); + SCEVTraversal ST(FSU); + ST.visitAll(S); + return !FSU.Found; +} + /// Get a canonical add expression, or something simpler if possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags, @@ -2459,7 +2516,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (isLoopInvariant(Ops[i], AddRecLoop)) { + if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; @@ -2734,7 +2791,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); const Loop *AddRecLoop = AddRec->getLoop(); for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (isLoopInvariant(Ops[i], AddRecLoop)) { + if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) { LIOps.push_back(Ops[i]); Ops.erase(Ops.begin()+i); --i; --e; @@ -4648,10 +4705,7 @@ uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) { if (const SCEVUnknown *U = dyn_cast(S)) { // For a SCEVUnknown, ask ValueTracking. - unsigned BitWidth = getTypeSizeInBits(U->getType()); - KnownBits Known(BitWidth); - computeKnownBits(U->getValue(), Known, getDataLayout(), 0, &AC, - nullptr, &DT); + KnownBits Known = computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT); return Known.countMinTrailingZeros(); } @@ -4831,8 +4885,7 @@ ScalarEvolution::getRange(const SCEV *S, const DataLayout &DL = getDataLayout(); if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { // For a SCEVUnknown, ask ValueTracking. - KnownBits Known(BitWidth); - computeKnownBits(U->getValue(), Known, DL, 0, &AC, nullptr, &DT); + KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT); if (Known.One != ~Known.Zero + 1) ConservativeResult = ConservativeResult.intersectWith(ConstantRange(Known.One, @@ -9537,8 +9590,11 @@ struct SCEVCollectAddRecMultiplies { bool HasAddRec = false; SmallVector Operands; for (auto Op : Mul->operands()) { - if (isa(Op)) { + const SCEVUnknown *Unknown = dyn_cast(Op); + if (Unknown && !isa(Unknown->getValue())) { Operands.push_back(Op); + } else if (Unknown) { + HasAddRec = true; } else { bool ContainsAddRec; SCEVHasAddRec ContiansAddRec(ContainsAddRec); diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index 86cbd79aa84..f9b9df2bc70 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -1305,12 +1305,17 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Expand the core addrec. If we need post-loop scaling, force it to // expand to an integer type to avoid the need for additional casting. Type *ExpandTy = PostLoopScale ? IntTy : STy; + // We can't use a pointer type for the addrec if the pointer type is + // non-integral. + Type *AddRecPHIExpandTy = + DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy; + // In some cases, we decide to reuse an existing phi node but need to truncate // it and/or invert the step. Type *TruncTy = nullptr; bool InvertStep = false; - PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy, - TruncTy, InvertStep); + PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy, + IntTy, TruncTy, InvertStep); // Accommodate post-inc mode, if necessary. Value *Result; @@ -1383,8 +1388,15 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { // Re-apply any non-loop-dominating offset. if (PostLoopOffset) { if (PointerType *PTy = dyn_cast(ExpandTy)) { - const SCEV *const OffsetArray[1] = { PostLoopOffset }; - Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result); + if (Result->getType()->isIntegerTy()) { + Value *Base = expandCodeFor(PostLoopOffset, ExpandTy); + const SCEV *const OffsetArray[1] = {SE.getUnknown(Result)}; + Result = expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Base); + } else { + const SCEV *const OffsetArray[1] = {PostLoopOffset}; + Result = + expandAddToGEP(OffsetArray, OffsetArray + 1, PTy, IntTy, Result); + } } else { Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateAdd(Result, diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp index 8a5d1047366..7a8d4f3be24 100644 --- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -149,6 +149,10 @@ bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const { return TTIImpl->isLegalMaskedGather(DataType); } +bool TargetTransformInfo::prefersVectorizedAddressing() const { + return TTIImpl->prefersVectorizedAddressing(); +} + int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index 8e6c1096eec..bd79cd56a18 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -149,8 +149,10 @@ static KnownBits computeKnownBits(const Value *V, unsigned Depth, KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth, AssumptionCache *AC, const Instruction *CxtI, - const DominatorTree *DT) { - return ::computeKnownBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT)); + const DominatorTree *DT, + OptimizationRemarkEmitter *ORE) { + return ::computeKnownBits(V, Depth, + Query(DL, AC, safeCxtI(V, CxtI), DT, ORE)); } bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 1f8b50342c2..c1d81ac203a 100644 --- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -660,10 +660,12 @@ void ModuleBitcodeWriter::writeAttributeTable() { SmallVector Record; for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { - const AttributeList &A = Attrs[i]; - for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) - Record.push_back( - VE.getAttributeGroupID({A.getSlotIndex(i), A.getSlotAttributes(i)})); + AttributeList AL = Attrs[i]; + for (unsigned i = AL.index_begin(), e = AL.index_end(); i != e; ++i) { + AttributeSet AS = AL.getAttributes(i); + if (AS.hasAttributes()) + Record.push_back(VE.getAttributeGroupID({i, AS})); + } Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); Record.clear(); @@ -3413,30 +3415,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() { // Create value IDs for undefined references. forEachSummary([&](GVInfo I) { - if (auto *VS = dyn_cast(I.second)) { - for (auto &RI : VS->refs()) - assignValueId(RI.getGUID()); - return; - } - - auto *FS = dyn_cast(I.second); - if (!FS) - return; - for (auto &RI : FS->refs()) + for (auto &RI : I.second->refs()) assignValueId(RI.getGUID()); - - for (auto &EI : FS->calls()) { - GlobalValue::GUID GUID = EI.first.getGUID(); - if (!hasValueId(GUID)) { - // For SamplePGO, the indirect call targets for local functions will - // have its original name annotated in profile. We try to find the - // corresponding PGOFuncName as the GUID. - GUID = Index.getGUIDFromOriginalID(GUID); - if (GUID == 0 || !hasValueId(GUID)) - continue; - } - assignValueId(GUID); - } }); for (const auto &GVI : valueIds()) { diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index fd76400331d..bb626baabd1 100644 --- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -902,8 +902,11 @@ void ValueEnumerator::EnumerateAttributes(AttributeList PAL) { } // Do lookups for all attribute groups. - for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) { - IndexAndAttrSet Pair = {PAL.getSlotIndex(i), PAL.getSlotAttributes(i)}; + for (unsigned i = PAL.index_begin(), e = PAL.index_end(); i != e; ++i) { + AttributeSet AS = PAL.getAttributes(i); + if (!AS.hasAttributes()) + continue; + IndexAndAttrSet Pair = {i, AS}; unsigned &Entry = AttributeGroupMap[Pair]; if (Entry == 0) { AttributeGroups.push_back(Pair); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 7ddb86d80bf..d72cf592298 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -628,12 +628,15 @@ void AsmPrinter::EmitDebugThreadLocal(const MCExpr *Value, /// EmitFunctionHeader - This method emits the header for the current /// function. void AsmPrinter::EmitFunctionHeader() { + const Function *F = MF->getFunction(); + + if (isVerbose()) + OutStreamer->GetCommentOS() << "-- Begin function " << F->getName() << '\n'; + // Print out constants referenced by the function EmitConstantPool(); // Print the 'header' of function. - const Function *F = MF->getFunction(); - OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(F, TM)); EmitVisibility(CurrentFnSym, F->getVisibility()); @@ -1107,6 +1110,9 @@ void AsmPrinter::EmitFunctionBody() { HI.Handler->endFunction(MF); } + if (isVerbose()) + OutStreamer->GetCommentOS() << "-- End function\n"; + OutStreamer->AddBlankLine(); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp index 1b39e46ee46..114aea391a8 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp @@ -1025,11 +1025,11 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) { bool EmptyPrologue = true; for (const auto &MBB : *MF) { for (const auto &MI : MBB) { - if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && + if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) { PrologEndLoc = MI.getDebugLoc(); break; - } else if (!MI.isDebugValue()) { + } else if (!MI.isMetaInstruction()) { EmptyPrologue = false; } } @@ -1562,7 +1562,7 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) { EnumeratorCount++; } } - FTI = FLRB.end(); + FTI = FLRB.end(true); } std::string FullName = getFullyQualifiedName(Ty); @@ -1869,7 +1869,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) { MemberCount++; } - TypeIndex FieldTI = FLBR.end(); + TypeIndex FieldTI = FLBR.end(true); return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount, !Info.NestedClasses.empty()); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp index 8e3b88d0af0..201030f0ac5 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -116,65 +116,17 @@ void DIEHash::addParentContext(const DIE &Parent) { // Collect all of the attributes for a particular DIE in single structure. void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { -#define COLLECT_ATTR(NAME) \ - case dwarf::NAME: \ - Attrs.NAME = V; \ - break for (const auto &V : Die.values()) { DEBUG(dbgs() << "Attribute: " << dwarf::AttributeString(V.getAttribute()) << " added.\n"); switch (V.getAttribute()) { - COLLECT_ATTR(DW_AT_name); - COLLECT_ATTR(DW_AT_accessibility); - COLLECT_ATTR(DW_AT_address_class); - COLLECT_ATTR(DW_AT_allocated); - COLLECT_ATTR(DW_AT_artificial); - COLLECT_ATTR(DW_AT_associated); - COLLECT_ATTR(DW_AT_binary_scale); - COLLECT_ATTR(DW_AT_bit_offset); - COLLECT_ATTR(DW_AT_bit_size); - COLLECT_ATTR(DW_AT_bit_stride); - COLLECT_ATTR(DW_AT_byte_size); - COLLECT_ATTR(DW_AT_byte_stride); - COLLECT_ATTR(DW_AT_const_expr); - COLLECT_ATTR(DW_AT_const_value); - COLLECT_ATTR(DW_AT_containing_type); - COLLECT_ATTR(DW_AT_count); - COLLECT_ATTR(DW_AT_data_bit_offset); - COLLECT_ATTR(DW_AT_data_location); - COLLECT_ATTR(DW_AT_data_member_location); - COLLECT_ATTR(DW_AT_decimal_scale); - COLLECT_ATTR(DW_AT_decimal_sign); - COLLECT_ATTR(DW_AT_default_value); - COLLECT_ATTR(DW_AT_digit_count); - COLLECT_ATTR(DW_AT_discr); - COLLECT_ATTR(DW_AT_discr_list); - COLLECT_ATTR(DW_AT_discr_value); - COLLECT_ATTR(DW_AT_encoding); - COLLECT_ATTR(DW_AT_enum_class); - COLLECT_ATTR(DW_AT_endianity); - COLLECT_ATTR(DW_AT_explicit); - COLLECT_ATTR(DW_AT_is_optional); - COLLECT_ATTR(DW_AT_location); - COLLECT_ATTR(DW_AT_lower_bound); - COLLECT_ATTR(DW_AT_mutable); - COLLECT_ATTR(DW_AT_ordering); - COLLECT_ATTR(DW_AT_picture_string); - COLLECT_ATTR(DW_AT_prototyped); - COLLECT_ATTR(DW_AT_small); - COLLECT_ATTR(DW_AT_segment); - COLLECT_ATTR(DW_AT_string_length); - COLLECT_ATTR(DW_AT_threads_scaled); - COLLECT_ATTR(DW_AT_upper_bound); - COLLECT_ATTR(DW_AT_use_location); - COLLECT_ATTR(DW_AT_use_UTF8); - COLLECT_ATTR(DW_AT_variable_parameter); - COLLECT_ATTR(DW_AT_virtuality); - COLLECT_ATTR(DW_AT_visibility); - COLLECT_ATTR(DW_AT_vtable_elem_location); - COLLECT_ATTR(DW_AT_type); +#define HANDLE_DIE_HASH_ATTR(NAME) \ + case dwarf::NAME: \ + Attrs.NAME = V; \ + break; +#include "DIEHashAttributes.def" default: break; } @@ -366,62 +318,12 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) { // Go through the attributes from \param Attrs in the order specified in 7.27.4 // and hash them. void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) { -#define ADD_ATTR(ATTR) \ +#define HANDLE_DIE_HASH_ATTR(NAME) \ { \ - if (ATTR) \ - hashAttribute(ATTR, Tag); \ + if (Attrs.NAME) \ + hashAttribute(Attrs.NAME, Tag); \ } - - ADD_ATTR(Attrs.DW_AT_name); - ADD_ATTR(Attrs.DW_AT_accessibility); - ADD_ATTR(Attrs.DW_AT_address_class); - ADD_ATTR(Attrs.DW_AT_allocated); - ADD_ATTR(Attrs.DW_AT_artificial); - ADD_ATTR(Attrs.DW_AT_associated); - ADD_ATTR(Attrs.DW_AT_binary_scale); - ADD_ATTR(Attrs.DW_AT_bit_offset); - ADD_ATTR(Attrs.DW_AT_bit_size); - ADD_ATTR(Attrs.DW_AT_bit_stride); - ADD_ATTR(Attrs.DW_AT_byte_size); - ADD_ATTR(Attrs.DW_AT_byte_stride); - ADD_ATTR(Attrs.DW_AT_const_expr); - ADD_ATTR(Attrs.DW_AT_const_value); - ADD_ATTR(Attrs.DW_AT_containing_type); - ADD_ATTR(Attrs.DW_AT_count); - ADD_ATTR(Attrs.DW_AT_data_bit_offset); - ADD_ATTR(Attrs.DW_AT_data_location); - ADD_ATTR(Attrs.DW_AT_data_member_location); - ADD_ATTR(Attrs.DW_AT_decimal_scale); - ADD_ATTR(Attrs.DW_AT_decimal_sign); - ADD_ATTR(Attrs.DW_AT_default_value); - ADD_ATTR(Attrs.DW_AT_digit_count); - ADD_ATTR(Attrs.DW_AT_discr); - ADD_ATTR(Attrs.DW_AT_discr_list); - ADD_ATTR(Attrs.DW_AT_discr_value); - ADD_ATTR(Attrs.DW_AT_encoding); - ADD_ATTR(Attrs.DW_AT_enum_class); - ADD_ATTR(Attrs.DW_AT_endianity); - ADD_ATTR(Attrs.DW_AT_explicit); - ADD_ATTR(Attrs.DW_AT_is_optional); - ADD_ATTR(Attrs.DW_AT_location); - ADD_ATTR(Attrs.DW_AT_lower_bound); - ADD_ATTR(Attrs.DW_AT_mutable); - ADD_ATTR(Attrs.DW_AT_ordering); - ADD_ATTR(Attrs.DW_AT_picture_string); - ADD_ATTR(Attrs.DW_AT_prototyped); - ADD_ATTR(Attrs.DW_AT_small); - ADD_ATTR(Attrs.DW_AT_segment); - ADD_ATTR(Attrs.DW_AT_string_length); - ADD_ATTR(Attrs.DW_AT_threads_scaled); - ADD_ATTR(Attrs.DW_AT_upper_bound); - ADD_ATTR(Attrs.DW_AT_use_location); - ADD_ATTR(Attrs.DW_AT_use_UTF8); - ADD_ATTR(Attrs.DW_AT_variable_parameter); - ADD_ATTR(Attrs.DW_AT_virtuality); - ADD_ATTR(Attrs.DW_AT_visibility); - ADD_ATTR(Attrs.DW_AT_vtable_elem_location); - ADD_ATTR(Attrs.DW_AT_type); - +#include "DIEHashAttributes.def" // FIXME: Add the extended attributes. } @@ -478,10 +380,12 @@ void DIEHash::computeHash(const DIE &Die) { /// DWARF4 standard. It is an md5 hash of the flattened description of the DIE /// with the inclusion of the full CU and all top level CU entities. // TODO: Initialize the type chain at 0 instead of 1 for CU signatures. -uint64_t DIEHash::computeCUSignature(const DIE &Die) { +uint64_t DIEHash::computeCUSignature(StringRef DWOName, const DIE &Die) { Numbering.clear(); Numbering[&Die] = 1; + if (!DWOName.empty()) + Hash.update(DWOName); // Hash the DIE. computeHash(Die); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h index 996cd7ef3d2..29337ae38a9 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -28,64 +28,15 @@ class CompileUnit; class DIEHash { // Collection of all attributes used in hashing a particular DIE. struct DIEAttrs { - DIEValue DW_AT_name; - DIEValue DW_AT_accessibility; - DIEValue DW_AT_address_class; - DIEValue DW_AT_allocated; - DIEValue DW_AT_artificial; - DIEValue DW_AT_associated; - DIEValue DW_AT_binary_scale; - DIEValue DW_AT_bit_offset; - DIEValue DW_AT_bit_size; - DIEValue DW_AT_bit_stride; - DIEValue DW_AT_byte_size; - DIEValue DW_AT_byte_stride; - DIEValue DW_AT_const_expr; - DIEValue DW_AT_const_value; - DIEValue DW_AT_containing_type; - DIEValue DW_AT_count; - DIEValue DW_AT_data_bit_offset; - DIEValue DW_AT_data_location; - DIEValue DW_AT_data_member_location; - DIEValue DW_AT_decimal_scale; - DIEValue DW_AT_decimal_sign; - DIEValue DW_AT_default_value; - DIEValue DW_AT_digit_count; - DIEValue DW_AT_discr; - DIEValue DW_AT_discr_list; - DIEValue DW_AT_discr_value; - DIEValue DW_AT_encoding; - DIEValue DW_AT_enum_class; - DIEValue DW_AT_endianity; - DIEValue DW_AT_explicit; - DIEValue DW_AT_is_optional; - DIEValue DW_AT_location; - DIEValue DW_AT_lower_bound; - DIEValue DW_AT_mutable; - DIEValue DW_AT_ordering; - DIEValue DW_AT_picture_string; - DIEValue DW_AT_prototyped; - DIEValue DW_AT_small; - DIEValue DW_AT_segment; - DIEValue DW_AT_string_length; - DIEValue DW_AT_threads_scaled; - DIEValue DW_AT_upper_bound; - DIEValue DW_AT_use_location; - DIEValue DW_AT_use_UTF8; - DIEValue DW_AT_variable_parameter; - DIEValue DW_AT_virtuality; - DIEValue DW_AT_visibility; - DIEValue DW_AT_vtable_elem_location; - DIEValue DW_AT_type; - - // Insert any additional ones here... +#define HANDLE_DIE_HASH_ATTR(NAME) DIEValue NAME; +#include "DIEHashAttributes.def" }; public: DIEHash(AsmPrinter *A = nullptr) : AP(A) {} /// \brief Computes the CU signature. - uint64_t computeCUSignature(const DIE &Die); + uint64_t computeCUSignature(StringRef DWOName, const DIE &Die); /// \brief Computes the type signature. uint64_t computeTypeSignature(const DIE &Die); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def new file mode 100644 index 00000000000..28a02390fcc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHashAttributes.def @@ -0,0 +1,55 @@ +#ifndef HANDLE_DIE_HASH_ATTR +#error "Missing macro definition of HANDLE_DIE_HASH_ATTR" +#endif + +HANDLE_DIE_HASH_ATTR(DW_AT_name) +HANDLE_DIE_HASH_ATTR(DW_AT_accessibility) +HANDLE_DIE_HASH_ATTR(DW_AT_address_class) +HANDLE_DIE_HASH_ATTR(DW_AT_allocated) +HANDLE_DIE_HASH_ATTR(DW_AT_artificial) +HANDLE_DIE_HASH_ATTR(DW_AT_associated) +HANDLE_DIE_HASH_ATTR(DW_AT_binary_scale) +HANDLE_DIE_HASH_ATTR(DW_AT_bit_offset) +HANDLE_DIE_HASH_ATTR(DW_AT_bit_size) +HANDLE_DIE_HASH_ATTR(DW_AT_bit_stride) +HANDLE_DIE_HASH_ATTR(DW_AT_byte_size) +HANDLE_DIE_HASH_ATTR(DW_AT_byte_stride) +HANDLE_DIE_HASH_ATTR(DW_AT_const_expr) +HANDLE_DIE_HASH_ATTR(DW_AT_const_value) +HANDLE_DIE_HASH_ATTR(DW_AT_containing_type) +HANDLE_DIE_HASH_ATTR(DW_AT_count) +HANDLE_DIE_HASH_ATTR(DW_AT_data_bit_offset) +HANDLE_DIE_HASH_ATTR(DW_AT_data_location) +HANDLE_DIE_HASH_ATTR(DW_AT_data_member_location) +HANDLE_DIE_HASH_ATTR(DW_AT_decimal_scale) +HANDLE_DIE_HASH_ATTR(DW_AT_decimal_sign) +HANDLE_DIE_HASH_ATTR(DW_AT_default_value) +HANDLE_DIE_HASH_ATTR(DW_AT_digit_count) +HANDLE_DIE_HASH_ATTR(DW_AT_discr) +HANDLE_DIE_HASH_ATTR(DW_AT_discr_list) +HANDLE_DIE_HASH_ATTR(DW_AT_discr_value) +HANDLE_DIE_HASH_ATTR(DW_AT_encoding) +HANDLE_DIE_HASH_ATTR(DW_AT_enum_class) +HANDLE_DIE_HASH_ATTR(DW_AT_endianity) +HANDLE_DIE_HASH_ATTR(DW_AT_explicit) +HANDLE_DIE_HASH_ATTR(DW_AT_is_optional) +HANDLE_DIE_HASH_ATTR(DW_AT_location) +HANDLE_DIE_HASH_ATTR(DW_AT_lower_bound) +HANDLE_DIE_HASH_ATTR(DW_AT_mutable) +HANDLE_DIE_HASH_ATTR(DW_AT_ordering) +HANDLE_DIE_HASH_ATTR(DW_AT_picture_string) +HANDLE_DIE_HASH_ATTR(DW_AT_prototyped) +HANDLE_DIE_HASH_ATTR(DW_AT_small) +HANDLE_DIE_HASH_ATTR(DW_AT_segment) +HANDLE_DIE_HASH_ATTR(DW_AT_string_length) +HANDLE_DIE_HASH_ATTR(DW_AT_threads_scaled) +HANDLE_DIE_HASH_ATTR(DW_AT_upper_bound) +HANDLE_DIE_HASH_ATTR(DW_AT_use_location) +HANDLE_DIE_HASH_ATTR(DW_AT_use_UTF8) +HANDLE_DIE_HASH_ATTR(DW_AT_variable_parameter) +HANDLE_DIE_HASH_ATTR(DW_AT_virtuality) +HANDLE_DIE_HASH_ATTR(DW_AT_visibility) +HANDLE_DIE_HASH_ATTR(DW_AT_vtable_elem_location) +HANDLE_DIE_HASH_ATTR(DW_AT_type) + +#undef HANDLE_DIE_HASH_ATTR diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp index 826162ad47c..0971c594220 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp @@ -115,7 +115,8 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) { return getBaseTypeSize(BaseType); } -bool hasDebugInfo(const MachineModuleInfo *MMI, const MachineFunction *MF) { +static bool hasDebugInfo(const MachineModuleInfo *MMI, + const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return false; auto *SP = MF->getFunction()->getSubprogram(); @@ -223,9 +224,9 @@ void DebugHandlerBase::endInstruction() { return; assert(CurMI != nullptr); - // Don't create a new label after DBG_VALUE instructions. - // They don't generate code. - if (!CurMI->isDebugValue()) { + // Don't create a new label after DBG_VALUE and other instructions that don't + // generate code. + if (!CurMI->isMetaInstruction()) { PrevLabel = nullptr; PrevInstBB = CurMI->getParent(); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index e172712cf88..04073b3aed6 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -760,7 +760,7 @@ void DwarfCompileUnit::emitHeader(bool UseOffsets) { /// addGlobalName - Add a new global name to the compile unit. void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die, const DIScope *Context) { - if (includeMinimalInlineScopes()) + if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) return; std::string FullName = getParentContextString(Context) + Name.str(); GlobalNames[FullName] = &Die; @@ -768,7 +768,7 @@ void DwarfCompileUnit::addGlobalName(StringRef Name, const DIE &Die, void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name, const DIScope *Context) { - if (includeMinimalInlineScopes()) + if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) return; std::string FullName = getParentContextString(Context) + Name.str(); // Insert, allowing the entry to remain as-is if it's already present @@ -781,7 +781,7 @@ void DwarfCompileUnit::addGlobalNameForTypeUnit(StringRef Name, /// Add a new global type to the unit. void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die, const DIScope *Context) { - if (includeMinimalInlineScopes()) + if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) return; std::string FullName = getParentContextString(Context) + Ty->getName().str(); GlobalTypes[FullName] = &Die; @@ -789,7 +789,7 @@ void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die, void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty, const DIScope *Context) { - if (includeMinimalInlineScopes()) + if (!DD->hasDwarfPubSections(includeMinimalInlineScopes())) return; std::string FullName = getParentContextString(Context) + Ty->getName().str(); // Insert, allowing the entry to remain as-is if it's already present diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 77e9e671529..b8f57472f17 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -77,8 +77,6 @@ class DwarfCompileUnit final : public DwarfUnit { bool isDwoUnit() const override; - bool includeMinimalInlineScopes() const; - DenseMap &getAbstractSPDies() { if (isDwoUnit() && !DD->shareAcrossDWOCUs()) return AbstractSPDies; @@ -101,6 +99,8 @@ public: return Skeleton; } + bool includeMinimalInlineScopes() const; + void initStmtList(); /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 3410b98d777..bf27516e1cc 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -252,12 +252,6 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) // Handle split DWARF. HasSplitDwarf = !Asm->TM.Options.MCOptions.SplitDwarfFile.empty(); - // Pubnames/pubtypes on by default for GDB. - if (DwarfPubSections == Default) - HasDwarfPubSections = tuneForGDB(); - else - HasDwarfPubSections = DwarfPubSections == Enable; - // SCE defaults to linkage names only for abstract subprograms. if (DwarfLinkageNames == DefaultLinkageNames) UseAllLinkageNames = !tuneForSCE(); @@ -380,19 +374,35 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU, // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram // was inlined from another compile unit. - auto &CU = *CUMap.lookup(SP->getUnit()); - if (auto *SkelCU = CU.getSkeleton()) { - (shareAcrossDWOCUs() ? CU : SrcCU) - .constructAbstractSubprogramScopeDIE(Scope); - if (CU.getCUNode()->getSplitDebugInlining()) - SkelCU->constructAbstractSubprogramScopeDIE(Scope); - } else { - CU.constructAbstractSubprogramScopeDIE(Scope); + if (useSplitDwarf() && !shareAcrossDWOCUs() && !SP->getUnit()->getSplitDebugInlining()) + // Avoid building the original CU if it won't be used + SrcCU.constructAbstractSubprogramScopeDIE(Scope); + else { + auto &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); + if (auto *SkelCU = CU.getSkeleton()) { + (shareAcrossDWOCUs() ? CU : SrcCU) + .constructAbstractSubprogramScopeDIE(Scope); + if (CU.getCUNode()->getSplitDebugInlining()) + SkelCU->constructAbstractSubprogramScopeDIE(Scope); + } else + CU.constructAbstractSubprogramScopeDIE(Scope); } } -void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { - if (!GenerateGnuPubSections) +bool DwarfDebug::hasDwarfPubSections(bool includeMinimalInlineScopes) const { + // Opting in to GNU Pubnames/types overrides the default to ensure these are + // generated for things like Gold's gdb_index generation. + if (GenerateGnuPubSections) + return true; + + if (DwarfPubSections == Default) + return tuneForGDB() && !includeMinimalInlineScopes; + + return DwarfPubSections == Enable; +} + +void DwarfDebug::addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const { + if (!hasDwarfPubSections(U.includeMinimalInlineScopes())) return; U.addFlag(D, dwarf::DW_AT_GNU_pubnames); @@ -401,7 +411,9 @@ void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { // Create new DwarfCompileUnit for the given metadata node with tag // DW_TAG_compile_unit. DwarfCompileUnit & -DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { +DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) { + if (auto *CU = CUMap.lookup(DIUnit)) + return *CU; StringRef FN = DIUnit->getFilename(); CompilationDir = DIUnit->getDirectory(); @@ -534,7 +546,12 @@ void DwarfDebug::beginModule() { } for (DICompileUnit *CUNode : M->debug_compile_units()) { - DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode); + if (CUNode->getEnumTypes().empty() && CUNode->getRetainedTypes().empty() && + CUNode->getGlobalVariables().empty() && + CUNode->getImportedEntities().empty() && CUNode->getMacros().empty()) + continue; + + DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(CUNode); for (auto *IE : CUNode->getImportedEntities()) CU.addImportedEntity(IE); @@ -581,11 +598,12 @@ void DwarfDebug::finishVariableDefinitions() { } void DwarfDebug::finishSubprogramDefinitions() { - for (const DISubprogram *SP : ProcessedSPNodes) - if (SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug) - forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) { - CU.finishSubprogramDefinition(SP); - }); + for (const DISubprogram *SP : ProcessedSPNodes) { + assert(SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug); + forBothCUs( + getOrCreateDwarfCompileUnit(SP->getUnit()), + [&](DwarfCompileUnit &CU) { CU.finishSubprogramDefinition(SP); }); + } } void DwarfDebug::finalizeModuleInfo() { @@ -595,6 +613,13 @@ void DwarfDebug::finalizeModuleInfo() { finishVariableDefinitions(); + // Include the DWO file name in the hash if there's more than one CU. + // This handles ThinLTO's situation where imported CUs may very easily be + // duplicate with the same CU partially imported into another ThinLTO unit. + StringRef DWOName; + if (CUMap.size() > 1) + DWOName = Asm->TM.Options.MCOptions.SplitDwarfFile; + // Handle anything that needs to be done on a per-unit basis after // all other generation. for (const auto &P : CUMap) { @@ -609,7 +634,8 @@ void DwarfDebug::finalizeModuleInfo() { auto *SkCU = TheCU.getSkeleton(); if (useSplitDwarf()) { // Emit a unique identifier for this CU. - uint64_t ID = DIEHash(Asm).computeCUSignature(TheCU.getUnitDie()); + uint64_t ID = + DIEHash(Asm).computeCUSignature(DWOName, TheCU.getUnitDie()); TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, ID); SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, @@ -718,7 +744,9 @@ void DwarfDebug::endModule() { } // Emit the pubnames and pubtypes sections if requested. - if (HasDwarfPubSections) { + // The condition is optimistically correct - any CU not using GMLT (& + // implicit/default pubnames state) might still have pubnames. + if (hasDwarfPubSections(/* gmlt */ false)) { emitDebugPubNames(GenerateGnuPubSections); emitDebugPubTypes(GenerateGnuPubSections); } @@ -1028,8 +1056,12 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { DebugHandlerBase::beginInstruction(MI); assert(CurMI); + const auto *SP = MI->getParent()->getParent()->getFunction()->getSubprogram(); + if (!SP || SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) + return; + // Check if source location changes, but ignore DBG_VALUE and CFI locations. - if (MI->isDebugValue() || MI->isCFIInstruction()) + if (MI->isMetaInstruction()) return; const DebugLoc &DL = MI->getDebugLoc(); // When we emit a line-0 record, we don't update PrevInstLoc; so look at @@ -1111,7 +1143,7 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { // the beginning of the function body. for (const auto &MBB : *MF) for (const auto &MI : MBB) - if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && + if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) return MI.getDebugLoc(); return DebugLoc(); @@ -1122,40 +1154,28 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) { CurFn = MF; - if (LScopes.empty()) + auto *SP = MF->getFunction()->getSubprogram(); + assert(LScopes.empty() || SP == LScopes.getCurrentFunctionScope()->getScopeNode()); + if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) return; + DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit()); + // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function // belongs to so that we add to the correct per-cu line table in the // non-asm case. - LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); - // FnScope->getScopeNode() and DI->second should represent the same function, - // though they may not be the same MDNode due to inline functions merged in - // LTO where the debug info metadata still differs (either due to distinct - // written differences - two versions of a linkonce_odr function - // written/copied into two separate files, or some sub-optimal metadata that - // isn't structurally identical (see: file path/name info from clang, which - // includes the directory of the cpp file being built, even when the file name - // is absolute (such as an <> lookup header))) - auto *SP = cast(FnScope->getScopeNode()); - DwarfCompileUnit *TheCU = CUMap.lookup(SP->getUnit()); - if (!TheCU) { - assert(SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug && - "DICompileUnit missing from llvm.dbg.cu?"); - return; - } if (Asm->OutStreamer->hasRawTextSupport()) // Use a single line table if we are generating assembly. Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); else - Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); + Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID()); // Record beginning of function. PrologEndLoc = findPrologueEndLoc(MF); - if (DILocation *L = PrologEndLoc) { + if (PrologEndLoc) { // We'd like to list the prologue as "not statements" but GDB behaves // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. - auto *SP = L->getInlinedAtScope()->getSubprogram(); + auto *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram(); recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT); } } @@ -1395,7 +1415,7 @@ void DwarfDebug::emitDebugPubSection( const auto &Globals = (TheU->*Accessor)(); - if (Globals.empty()) + if (!hasDwarfPubSections(TheU->includeMinimalInlineScopes())) continue; if (auto *Skeleton = TheU->getSkeleton()) @@ -1544,6 +1564,9 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) { // Emit locations into the debug loc section. void DwarfDebug::emitDebugLoc() { + if (DebugLocs.getLists().empty()) + return; + // Start the dwarf loc section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfLocSection()); @@ -1755,6 +1778,9 @@ void DwarfDebug::emitDebugARanges() { /// Emit address ranges into a debug ranges section. void DwarfDebug::emitDebugRanges() { + if (CUMap.empty()) + return; + // Start the dwarf ranges section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); @@ -1834,6 +1860,9 @@ void DwarfDebug::emitMacroFile(DIMacroFile &F, DwarfCompileUnit &U) { /// Emit macros into a debug macinfo section. void DwarfDebug::emitDebugMacinfo() { + if (CUMap.empty()) + return; + // Start the dwarf macinfo section. Asm->OutStreamer->SwitchSection( Asm->getObjFileLowering().getDwarfMacinfoSection()); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index b9c5aa9ffb2..ebfba4cfc27 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -246,9 +246,6 @@ class DwarfDebug : public DebugHandlerBase { std::pair, const DICompositeType *>, 1> TypeUnitsUnderConstruction; - /// Whether to emit the pubnames/pubtypes sections. - bool HasDwarfPubSections; - /// Whether to use the GNU TLS opcode (instead of the standard opcode). bool UseGNUTLSOpcode; @@ -415,11 +412,11 @@ class DwarfDebug : public DebugHandlerBase { /// Flags to let the linker know we have emitted new style pubnames. Only /// emit it here if we don't have a skeleton CU for split dwarf. - void addGnuPubAttributes(DwarfUnit &U, DIE &D) const; + void addGnuPubAttributes(DwarfCompileUnit &U, DIE &D) const; /// Create new DwarfCompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. - DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit); + DwarfCompileUnit &getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit); /// Construct imported_module or imported_declaration DIE. void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, @@ -556,6 +553,8 @@ public: /// A helper function to check whether the DIE for a given Scope is /// going to be null. bool isLexicalScopeDIENull(LexicalScope *Scope); + + bool hasDwarfPubSections(bool includeMinimalInlineScopes) const; }; } // End of namespace llvm diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp index 984973cf3a3..344136b1f19 100644 --- a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -96,7 +96,7 @@ namespace { char AtomicExpand::ID = 0; char &llvm::AtomicExpandID = AtomicExpand::ID; -INITIALIZE_PASS(AtomicExpand, "atomic-expand", "Expand Atomic instructions", +INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); } diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index a67e194356d..d3fced436b6 100644 --- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -24,8 +24,6 @@ #include using namespace llvm; -#define DEBUG_TYPE "basictti" - // This flag is used by the template base class for BasicTTIImpl, and here to // provide a definition. cl::opt diff --git a/contrib/llvm/lib/CodeGen/BranchCoalescing.cpp b/contrib/llvm/lib/CodeGen/BranchCoalescing.cpp index efdf300df85..2c41b597843 100644 --- a/contrib/llvm/lib/CodeGen/BranchCoalescing.cpp +++ b/contrib/llvm/lib/CodeGen/BranchCoalescing.cpp @@ -27,7 +27,7 @@ using namespace llvm; -#define DEBUG_TYPE "coal-branch" +#define DEBUG_TYPE "branch-coalescing" static cl::opt EnableBranchCoalescing("enable-branch-coalesce", cl::Hidden, @@ -193,11 +193,11 @@ public: char BranchCoalescing::ID = 0; char &llvm::BranchCoalescingID = BranchCoalescing::ID; -INITIALIZE_PASS_BEGIN(BranchCoalescing, "branch-coalescing", +INITIALIZE_PASS_BEGIN(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) -INITIALIZE_PASS_END(BranchCoalescing, "branch-coalescing", "Branch Coalescing", +INITIALIZE_PASS_END(BranchCoalescing, DEBUG_TYPE, "Branch Coalescing", false, false) BranchCoalescing::CoalescingCandidateInfo::CoalescingCandidateInfo() diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index b63d9f4a435..03ceac10bee 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -44,7 +44,7 @@ #include using namespace llvm; -#define DEBUG_TYPE "branchfolding" +#define DEBUG_TYPE "branch-folder" STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); @@ -89,7 +89,7 @@ namespace { char BranchFolderPass::ID = 0; char &llvm::BranchFolderPassID = BranchFolderPass::ID; -INITIALIZE_PASS(BranchFolderPass, "branch-folder", +INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE, "Control Flow Optimizer", false, false) bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { @@ -153,13 +153,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, TriedMerging.clear(); + MachineRegisterInfo &MRI = MF.getRegInfo(); AfterBlockPlacement = AfterPlacement; TII = tii; TRI = tri; MMI = mmi; MLI = mli; + this->MRI = &MRI; - MachineRegisterInfo &MRI = MF.getRegInfo(); UpdateLiveIns = MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF); if (!UpdateLiveIns) MRI.invalidateLiveness(); @@ -351,7 +352,7 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, if (UpdateLiveIns) { NewDest->clearLiveIns(); - computeLiveIns(LiveRegs, *TRI, *NewDest); + computeLiveIns(LiveRegs, *MRI, *NewDest); } ++NumTailMerge; @@ -388,7 +389,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); if (UpdateLiveIns) - computeLiveIns(LiveRegs, *TRI, *NewMBB); + computeLiveIns(LiveRegs, *MRI, *NewMBB); // Add the new block to the funclet. const auto &FuncletI = FuncletMembership.find(&CurMBB); diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h index 4852721eea1..92681137e4c 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.h +++ b/contrib/llvm/lib/CodeGen/BranchFolding.h @@ -108,6 +108,7 @@ namespace llvm { bool UpdateLiveIns; unsigned MinCommonTailLength; const TargetInstrInfo *TII; + const MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; MachineModuleInfo *MMI; MachineLoopInfo *MLI; diff --git a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp index 7af13694166..e3de61c7816 100644 --- a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp +++ b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp @@ -259,7 +259,7 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI, // Need to fix live-in lists if we track liveness. if (TRI->trackLivenessAfterRegAlloc(*MF)) - computeLiveIns(LiveRegs, *TRI, *NewBB); + computeLiveIns(LiveRegs, MF->getRegInfo(), *NewBB); ++NumSplit; @@ -345,6 +345,10 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) { // Do it here since if there's no split, no update is needed. MBB->replaceSuccessor(FBB, &NewBB); NewBB.addSuccessor(FBB); + + // Need to fix live-in lists if we track liveness. + if (TRI->trackLivenessAfterRegAlloc(*MF)) + computeLiveIns(LiveRegs, MF->getRegInfo(), NewBB); } // We now have an appropriate fall-through block in place (either naturally or diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index 3a1a3020a8d..4e85708efaf 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -257,10 +257,10 @@ class TypePromotionTransaction; } char CodeGenPrepare::ID = 0; -INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare", +INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", false, false) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare", +INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE, "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); } diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index 7ac2e544543..265dda16bfa 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -23,7 +23,7 @@ using namespace llvm; -#define DEBUG_TYPE "codegen-dce" +#define DEBUG_TYPE "dead-mi-elimination" STATISTIC(NumDeletes, "Number of dead instructions deleted"); @@ -54,7 +54,7 @@ namespace { char DeadMachineInstructionElim::ID = 0; char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID; -INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination", +INITIALIZE_PASS(DeadMachineInstructionElim, DEBUG_TYPE, "Remove dead machine instructions", false, false) bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { diff --git a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp index 6f4ea1912cf..ab9a0592e01 100644 --- a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp +++ b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp @@ -132,8 +132,7 @@ private: char DetectDeadLanes::ID = 0; char &llvm::DetectDeadLanesID = DetectDeadLanes::ID; -INITIALIZE_PASS(DetectDeadLanes, "detect-dead-lanes", "Detect Dead Lanes", - false, false) +INITIALIZE_PASS(DetectDeadLanes, DEBUG_TYPE, "Detect Dead Lanes", false, false) /// Returns true if \p MI will get lowered to a series of COPY instructions. /// We call this a COPY-like instruction. diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp index 1ef4d866065..06ae5cd72c8 100644 --- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -71,12 +71,12 @@ namespace { } // end anonymous namespace char DwarfEHPrepare::ID = 0; -INITIALIZE_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare", +INITIALIZE_PASS_BEGIN(DwarfEHPrepare, DEBUG_TYPE, "Prepare DWARF exceptions", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_END(DwarfEHPrepare, "dwarfehprepare", +INITIALIZE_PASS_END(DwarfEHPrepare, DEBUG_TYPE, "Prepare DWARF exceptions", false, false) FunctionPass *llvm::createDwarfEHPass() { return new DwarfEHPrepare(); } diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp index 72917279645..402afe75b14 100644 --- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -616,13 +616,13 @@ private: char EarlyIfConverter::ID = 0; char &llvm::EarlyIfConverterID = EarlyIfConverter::ID; -INITIALIZE_PASS_BEGIN(EarlyIfConverter, - "early-ifcvt", "Early If Converter", false, false) +INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE, + "Early If Converter", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) -INITIALIZE_PASS_END(EarlyIfConverter, - "early-ifcvt", "Early If Converter", false, false) +INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE, + "Early If Converter", false, false) void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp index 0ec79c2e69f..88d422a0f54 100644 --- a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp @@ -41,7 +41,7 @@ namespace { char ExpandISelPseudos::ID = 0; char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID; -INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos", +INITIALIZE_PASS(ExpandISelPseudos, DEBUG_TYPE, "Expand ISel Pseudo-instructions", false, false) bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index e860906043d..27cd639b2a4 100644 --- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -58,7 +58,7 @@ private: char ExpandPostRA::ID = 0; char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID; -INITIALIZE_PASS(ExpandPostRA, "postrapseudos", +INITIALIZE_PASS(ExpandPostRA, DEBUG_TYPE, "Post-RA pseudo instruction expansion pass", false, false) /// TransferImplicitOperands - MI is a pseudo-instruction, and the lowered diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp index d61afad4db5..0bdd5e64a7f 100644 --- a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp +++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp @@ -37,7 +37,7 @@ public: char FuncletLayout::ID = 0; char &llvm::FuncletLayoutID = FuncletLayout::ID; -INITIALIZE_PASS(FuncletLayout, "funclet-layout", +INITIALIZE_PASS(FuncletLayout, DEBUG_TYPE, "Contiguously Lay Out Funclets", false, false) bool FuncletLayout::runOnMachineFunction(MachineFunction &F) { diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp index fcd2722f1c2..29d1209bb02 100644 --- a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp @@ -26,6 +26,7 @@ void llvm::initializeGlobalISel(PassRegistry &Registry) { void llvm::initializeGlobalISel(PassRegistry &Registry) { initializeIRTranslatorPass(Registry); initializeLegalizerPass(Registry); + initializeLocalizerPass(Registry); initializeRegBankSelectPass(Registry); initializeInstructionSelectPass(Registry); } diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp new file mode 100644 index 00000000000..bdca732b4e3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -0,0 +1,125 @@ +//===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file implements the Localizer class. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GlobalISel/Localizer.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "localizer" + +using namespace llvm; + +char Localizer::ID = 0; +INITIALIZE_PASS(Localizer, DEBUG_TYPE, + "Move/duplicate certain instructions close to their use", false, + false); + +Localizer::Localizer() : MachineFunctionPass(ID) { + initializeLocalizerPass(*PassRegistry::getPassRegistry()); +} + +void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); } + +bool Localizer::shouldLocalize(const MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + // Constants-like instructions should be close to their users. + // We don't want long live-ranges for them. + case TargetOpcode::G_CONSTANT: + case TargetOpcode::G_FCONSTANT: + case TargetOpcode::G_FRAME_INDEX: + return true; + } +} + +bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def, + MachineBasicBlock *&InsertMBB) { + MachineInstr &MIUse = *MOUse.getParent(); + InsertMBB = MIUse.getParent(); + if (MIUse.isPHI()) + InsertMBB = MIUse.getOperand(MIUse.getOperandNo(&MOUse) + 1).getMBB(); + return InsertMBB == Def.getParent(); +} + +bool Localizer::runOnMachineFunction(MachineFunction &MF) { + // If the ISel pipeline failed, do not bother running that pass. + if (MF.getProperties().hasProperty( + MachineFunctionProperties::Property::FailedISel)) + return false; + + DEBUG(dbgs() << "Localize instructions for: " << MF.getName() << '\n'); + + init(MF); + + bool Changed = false; + // Keep track of the instructions we localized. + // We won't need to process them if we see them later in the CFG. + SmallPtrSet LocalizedInstrs; + DenseMap, unsigned> MBBWithLocalDef; + // TODO: Do bottom up traversal. + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI)) + continue; + DEBUG(dbgs() << "Should localize: " << MI); + assert(MI.getDesc().getNumDefs() == 1 && + "More than one definition not supported yet"); + unsigned Reg = MI.getOperand(0).getReg(); + // Check if all the users of MI are local. + // We are going to invalidation the list of use operands, so we + // can't use range iterator. + for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end(); + MOIt != MOItEnd;) { + MachineOperand &MOUse = *MOIt++; + // Check if the use is already local. + MachineBasicBlock *InsertMBB; + DEBUG(MachineInstr &MIUse = *MOUse.getParent(); + dbgs() << "Checking use: " << MIUse + << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n'); + if (isLocalUse(MOUse, MI, InsertMBB)) + continue; + DEBUG(dbgs() << "Fixing non-local use\n"); + Changed = true; + auto MBBAndReg = std::make_pair(InsertMBB, Reg); + auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg); + if (NewVRegIt == MBBWithLocalDef.end()) { + // Create the localized instruction. + MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI); + LocalizedInstrs.insert(LocalizedMI); + // Move it at the right place. + MachineInstr &MIUse = *MOUse.getParent(); + if (MIUse.getParent() == InsertMBB) + InsertMBB->insert(MIUse, LocalizedMI); + else + InsertMBB->insert(InsertMBB->getFirstNonPHI(), LocalizedMI); + + // Set a new register for the definition. + unsigned NewReg = + MRI->createGenericVirtualRegister(MRI->getType(Reg)); + MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); + LocalizedMI->getOperand(0).setReg(NewReg); + NewVRegIt = + MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first; + DEBUG(dbgs() << "Inserted: " << *LocalizedMI); + } + DEBUG(dbgs() << "Update use with: " << PrintReg(NewVRegIt->second) + << '\n'); + // Update the user reg. + MOUse.setReg(NewVRegIt->second); + } + } + } + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp index 1ea53493994..23812a2a234 100644 --- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp +++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp @@ -192,10 +192,7 @@ namespace { } // end anonymous namespace char GlobalMerge::ID = 0; -INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables", - false, false) -INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables", - false, false) +INITIALIZE_PASS(GlobalMerge, DEBUG_TYPE, "Merge global variables", false, false) bool GlobalMerge::doMerge(SmallVectorImpl &Globals, Module &M, bool isConst, unsigned AddrSpace) const { diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index 628d599a3cc..1c33f3b6800 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -39,7 +39,7 @@ using namespace llvm; -#define DEBUG_TYPE "ifcvt" +#define DEBUG_TYPE "if-converter" // Hidden options for help debugging. static cl::opt IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); @@ -316,9 +316,9 @@ namespace { char &llvm::IfConverterID = IfConverter::ID; -INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false) +INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) -INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) +INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false) bool IfConverter::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF))) @@ -1588,32 +1588,22 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB); } - // To be able to insert code freely at the end of BBI we sometimes remove - // the branch from BBI to NextMBB temporarily. Remember if this happened. - bool RemovedBranchToNextMBB = false; if (CvtMBB.pred_size() > 1) { BBI.NonPredSize -= TII->removeBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); - // Keep the CFG updated. + // RemoveExtraEdges won't work if the block has an unanalyzable branch, so + // explicitly remove CvtBBI as a successor. BBI.BB->removeSuccessor(&CvtMBB, true); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB); PredicateBlock(*CvtBBI, CvtMBB.end(), Cond); - // Remove the branch from the entry of the triangle to NextBB to be able to - // do the merge below. Keep the CFG updated, but remember we removed the - // branch since we do want to execute NextMBB, either by introducing a - // branch to it again, or merging it into the entry block. - // How it's handled is decided further down. - BBI.NonPredSize -= TII->removeBranch(*BBI.BB); - BBI.BB->removeSuccessor(&NextMBB, true); - RemovedBranchToNextMBB = true; - // Now merge the entry of the triangle with the true block. + BBI.NonPredSize -= TII->removeBranch(*BBI.BB); MergeBlocks(BBI, *CvtBBI, false); } @@ -1651,19 +1641,12 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // block. By not merging them, we make it possible to iteratively // ifcvt the blocks. if (!HasEarlyExit && - // We might have removed BBI from NextMBB's predecessor list above but - // we want it to be there, so consider that too. - (NextMBB.pred_size() == (RemovedBranchToNextMBB ? 0 : 1)) && - !NextBBI->HasFallThrough && + NextMBB.pred_size() == 1 && !NextBBI->HasFallThrough && !NextMBB.hasAddressTaken()) { - // We will merge NextBBI into BBI, and thus remove the current - // fallthrough from BBI into CvtBBI. - BBI.BB->removeSuccessor(&CvtMBB, true); MergeBlocks(BBI, *NextBBI); FalseBBDead = true; } else { InsertUncondBranch(*BBI.BB, NextMBB, TII); - BBI.BB->addSuccessor(&NextMBB); BBI.HasFallThrough = false; } // Mixed predicated and unpredicated code. This cannot be iteratively @@ -1671,6 +1654,8 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { IterIfcvt = false; } + RemoveExtraEdges(BBI); + // Update block info. BB can be iteratively if-converted. if (!IterIfcvt) BBI.IsDone = true; diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp index 920c2a372a9..24e289dd4f1 100644 --- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp +++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -674,8 +674,8 @@ void ImplicitNullChecks::rewriteNullChecks( char ImplicitNullChecks::ID = 0; char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID; -INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks", +INITIALIZE_PASS_BEGIN(ImplicitNullChecks, DEBUG_TYPE, "Implicit null checks", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(ImplicitNullChecks, "implicit-null-checks", +INITIALIZE_PASS_END(ImplicitNullChecks, DEBUG_TYPE, "Implicit null checks", false, false) diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp index bb29db301a9..ee4929c9148 100644 --- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -107,13 +107,11 @@ private: } // end anonymous namespace. char InterleavedAccess::ID = 0; -INITIALIZE_PASS_BEGIN( - InterleavedAccess, "interleaved-access", +INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE, "Lower interleaved memory accesses to target specific intrinsics", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_END( - InterleavedAccess, "interleaved-access", +INITIALIZE_PASS_END(InterleavedAccess, DEBUG_TYPE, "Lower interleaved memory accesses to target specific intrinsics", false, false) diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp index 275d84e2c18..40ee7ea785f 100644 --- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -86,8 +86,9 @@ void LexicalScopes::extractLexicalScopes( continue; } - // Ignore DBG_VALUE. It does not contribute to any instruction in output. - if (MInsn.isDebugValue()) + // Ignore DBG_VALUE and similar instruction that do not contribute to any + // instruction in the output. + if (MInsn.isMetaInstruction()) continue; if (RangeBeginMI) { diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp index f956974b1aa..b5e705f6455 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -43,7 +43,7 @@ using namespace llvm; -#define DEBUG_TYPE "live-debug-values" +#define DEBUG_TYPE "livedebugvalues" STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); @@ -283,7 +283,7 @@ public: char LiveDebugValues::ID = 0; char &llvm::LiveDebugValuesID = LiveDebugValues::ID; -INITIALIZE_PASS(LiveDebugValues, "livedebugvalues", "Live DEBUG_VALUE analysis", +INITIALIZE_PASS(LiveDebugValues, DEBUG_TYPE, "Live DEBUG_VALUE analysis", false, false) /// Default construct and initialize the pass. diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index bcf7c8e99c7..bbd783367c9 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -45,7 +45,7 @@ using namespace llvm; -#define DEBUG_TYPE "livedebug" +#define DEBUG_TYPE "livedebugvars" static cl::opt EnableLDV("live-debug-variables", cl::init(true), @@ -54,11 +54,11 @@ EnableLDV("live-debug-variables", cl::init(true), STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted"); char LiveDebugVariables::ID = 0; -INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars", +INITIALIZE_PASS_BEGIN(LiveDebugVariables, DEBUG_TYPE, "Debug Variable Analysis", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars", +INITIALIZE_PASS_END(LiveDebugVariables, DEBUG_TYPE, "Debug Variable Analysis", false, false) void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index 3f5b8e19d1f..0c05dbeacba 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -1,4 +1,4 @@ -//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===// +//===- LiveIntervalAnalysis.cpp - Live Interval Analysis ------------------===// // // The LLVM Compiler Infrastructure // @@ -14,28 +14,45 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "LiveRangeCalc.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/VirtRegMap.h" -#include "llvm/IR/Value.h" +#include "llvm/MC/LaneBitmask.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include -#include +#include +#include +#include +#include +#include + using namespace llvm; #define DEBUG_TYPE "regalloc" @@ -59,11 +76,13 @@ static bool EnablePrecomputePhysRegs = false; #endif // NDEBUG namespace llvm { + cl::opt UseSegmentSetForPhysRegs( "use-segment-set-for-physregs", cl::Hidden, cl::init(true), cl::desc( "Use segment set for the computation of the live ranges of physregs.")); -} + +} // end namespace llvm void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -78,8 +97,7 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -LiveIntervals::LiveIntervals() : MachineFunctionPass(ID), - DomTree(nullptr), LRCalc(nullptr) { +LiveIntervals::LiveIntervals() : MachineFunctionPass(ID) { initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); } @@ -168,12 +186,10 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const { #endif LiveInterval* LiveIntervals::createInterval(unsigned reg) { - float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? - llvm::huge_valf : 0.0F; + float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? huge_valf : 0.0F; return new LiveInterval(reg, Weight); } - /// Compute the live interval of a virtual register, based on defs and uses. void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); @@ -337,7 +353,7 @@ static void createSegmentsForValues(LiveRange &LR, } } -typedef SmallVector, 16> ShrinkToUsesWorkList; +using ShrinkToUsesWorkList = SmallVector, 16>; static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes, ShrinkToUsesWorkList &WorkList, @@ -593,7 +609,7 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, // Find all blocks that are reachable from KillMBB without leaving VNI's live // range. It is possible that KillMBB itself is reachable, so start a DFS // from each successor. - typedef df_iterator_default_set VisitedTy; + using VisitedTy = df_iterator_default_set; VisitedTy Visited; for (MachineBasicBlock *Succ : KillMBB->successors()) { for (df_ext_iterator @@ -822,7 +838,6 @@ LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr &startInst) { return S; } - //===----------------------------------------------------------------------===// // Register mask functions //===----------------------------------------------------------------------===// @@ -855,7 +870,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, return false; bool Found = false; - for (;;) { + while (true) { assert(*SlotI >= LiveI->start); // Loop over all slots overlapping this segment. while (*SlotI < LiveI->end) { diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp index 9f7d7cf5484..0dc1079b2ad 100644 --- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp +++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -53,7 +53,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) { continue; removeReg(Reg); } else if (O->isRegMask()) - removeRegsInMask(*O, nullptr); + removeRegsInMask(*O); } // Add uses to the set. @@ -142,66 +142,85 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI, /// Add live-in registers of basic block \p MBB to \p LiveRegs. void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) { for (const auto &LI : MBB.liveins()) { - MCSubRegIndexIterator S(LI.PhysReg, TRI); - if (LI.LaneMask.all() || (LI.LaneMask.any() && !S.isValid())) { - addReg(LI.PhysReg); + unsigned Reg = LI.PhysReg; + LaneBitmask Mask = LI.LaneMask; + MCSubRegIndexIterator S(Reg, TRI); + assert(Mask.any() && "Invalid livein mask"); + if (Mask.all() || !S.isValid()) { + addReg(Reg); continue; } for (; S.isValid(); ++S) { unsigned SI = S.getSubRegIndex(); - if ((LI.LaneMask & TRI->getSubRegIndexLaneMask(SI)).any()) + if ((Mask & TRI->getSubRegIndexLaneMask(SI)).any()) addReg(S.getSubReg()); } } } -/// Add pristine registers to the given \p LiveRegs. This function removes -/// actually saved callee save registers when \p InPrologueEpilogue is false. -static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF, - const MachineFrameInfo &MFI, - const TargetRegisterInfo &TRI) { +/// Adds all callee saved registers to \p LiveRegs. +static void addCalleeSavedRegs(LivePhysRegs &LiveRegs, + const MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; - ++CSR) + for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR) LiveRegs.addReg(*CSR); +} + +/// Adds pristine registers to the given \p LiveRegs. Pristine registers are +/// callee saved registers that are unused in the function. +static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + /// Add all callee saved regs, then remove the ones that are saved+restored. + addCalleeSavedRegs(LiveRegs, MF); + /// Remove the ones that are not saved/restored; they are pristine. for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) LiveRegs.removeReg(Info.getReg()); } void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) { - // To get the live-outs we simply merge the live-ins of all successors. - for (const MachineBasicBlock *Succ : MBB.successors()) - addBlockLiveIns(*Succ); + if (!MBB.succ_empty()) { + // To get the live-outs we simply merge the live-ins of all successors. + for (const MachineBasicBlock *Succ : MBB.successors()) + addBlockLiveIns(*Succ); + } else if (MBB.isReturnBlock()) { + // For the return block: Add all callee saved registers that are saved and + // restored (somewhere); This does not include callee saved registers that + // are unused and hence not saved and restored; they are called pristine. + const MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.isCalleeSavedInfoValid()) { + for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) + addReg(Info.getReg()); + } + } } void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) { - const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.isCalleeSavedInfoValid()) { - if (MBB.isReturnBlock()) { - // The return block has no successors whose live-ins we could merge - // below. So instead we add the callee saved registers manually. - const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (const MCPhysReg *I = MRI.getCalleeSavedRegs(); *I; ++I) - addReg(*I); - } else { - addPristines(*this, MF, MFI, *TRI); - } + if (!MBB.succ_empty()) { + const MachineFunction &MF = *MBB.getParent(); + addPristines(*this, MF); + addLiveOutsNoPristines(MBB); + } else if (MBB.isReturnBlock()) { + // For the return block: Add all callee saved registers. + const MachineFunction &MF = *MBB.getParent(); + const MachineFrameInfo &MFI = MF.getFrameInfo(); + if (MFI.isCalleeSavedInfoValid()) + addCalleeSavedRegs(*this, MF); } - - addLiveOutsNoPristines(MBB); } void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) { const MachineFunction &MF = *MBB.getParent(); - const MachineFrameInfo &MFI = MF.getFrameInfo(); - if (MFI.isCalleeSavedInfoValid()) - addPristines(*this, MF, MFI, *TRI); + addPristines(*this, MF); addBlockLiveIns(MBB); } -void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI, +void llvm::computeLiveIns(LivePhysRegs &LiveRegs, + const MachineRegisterInfo &MRI, MachineBasicBlock &MBB) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); assert(MBB.livein_empty()); LiveRegs.init(TRI); LiveRegs.addLiveOutsNoPristines(MBB); @@ -209,10 +228,12 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI, LiveRegs.stepBackward(MI); for (unsigned Reg : LiveRegs) { + if (MRI.isReserved(Reg)) + continue; // Skip the register if we are about to add one of its super registers. bool ContainsSuperReg = false; for (MCSuperRegIterator SReg(Reg, &TRI); SReg.isValid(); ++SReg) { - if (LiveRegs.contains(*SReg)) { + if (LiveRegs.contains(*SReg) && !MRI.isReserved(*SReg)) { ContainsSuperReg = true; break; } diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp index dbf1f96102d..b51f8b0aa6b 100644 --- a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp @@ -25,10 +25,10 @@ using namespace llvm; #define DEBUG_TYPE "livestacks" char LiveStacks::ID = 0; -INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks", +INITIALIZE_PASS_BEGIN(LiveStacks, DEBUG_TYPE, "Live Stack Slot Analysis", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_END(LiveStacks, "livestacks", +INITIALIZE_PASS_END(LiveStacks, DEBUG_TYPE, "Live Stack Slot Analysis", false, false) char &llvm::LiveStacksID = LiveStacks::ID; diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp index e189fb0dd89..17cab0ae910 100644 --- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -103,10 +103,10 @@ namespace { char LocalStackSlotPass::ID = 0; char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; -INITIALIZE_PASS_BEGIN(LocalStackSlotPass, "localstackalloc", +INITIALIZE_PASS_BEGIN(LocalStackSlotPass, DEBUG_TYPE, "Local Stack Slot Allocation", false, false) INITIALIZE_PASS_DEPENDENCY(StackProtector) -INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc", +INITIALIZE_PASS_END(LocalStackSlotPass, DEBUG_TYPE, "Local Stack Slot Allocation", false, false) diff --git a/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp index 5fb5b747f47..0fc48d4e0b6 100644 --- a/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp +++ b/contrib/llvm/lib/CodeGen/LowerEmuTLS.cpp @@ -53,7 +53,7 @@ private: char LowerEmuTLS::ID = 0; -INITIALIZE_PASS(LowerEmuTLS, "loweremutls", +INITIALIZE_PASS(LowerEmuTLS, DEBUG_TYPE, "Add __emutls_[vt]. variables for emultated TLS model", false, false) diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 9c7367b4c78..4d1ec11df46 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -26,7 +26,7 @@ using namespace llvm; -#define DEBUG_TYPE "block-freq" +#define DEBUG_TYPE "machine-block-freq" static cl::opt ViewMachineBlockFreqPropagationDAG( @@ -149,11 +149,11 @@ struct DOTGraphTraits } // end namespace llvm -INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq", +INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, DEBUG_TYPE, "Machine Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq", +INITIALIZE_PASS_END(MachineBlockFrequencyInfo, DEBUG_TYPE, "Machine Block Frequency Analysis", true, true) char MachineBlockFrequencyInfo::ID = 0; diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index adfca9a4623..c1ca8e8e83b 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -499,13 +499,13 @@ public: char MachineBlockPlacement::ID = 0; char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; -INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement", +INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement", +INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) #ifndef NDEBUG diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 0766f465456..34f6bbd59e9 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -108,12 +108,12 @@ namespace { char MachineCSE::ID = 0; char &llvm::MachineCSEID = MachineCSE::ID; -INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", - "Machine Common Subexpression Elimination", false, false) +INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE, + "Machine Common Subexpression Elimination", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(MachineCSE, "machine-cse", - "Machine Common Subexpression Elimination", false, false) +INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE, + "Machine Common Subexpression Elimination", false, false) /// The source register of a COPY machine instruction can be propagated to all /// its users, and this propagation could increase the probability of finding @@ -180,8 +180,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, I = skipDebugInstructionsForward(I, E); if (I == E) - // Reached end of block, register is obviously dead. - return true; + // Reached end of block, we don't know if register is dead or not. + return false; bool SeenDef = false; for (const MachineOperand &MO : I->operands()) { diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp index 50e453e4067..c176de16b59 100644 --- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp @@ -86,11 +86,11 @@ private: char MachineCombiner::ID = 0; char &llvm::MachineCombinerID = MachineCombiner::ID; -INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner", +INITIALIZE_PASS_BEGIN(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) -INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", +INITIALIZE_PASS_END(MachineCombiner, DEBUG_TYPE, "Machine InstCombiner", false, false) void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 7312dc5e94b..f83b5481e0a 100644 --- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -27,7 +27,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; -#define DEBUG_TYPE "codegen-cp" +#define DEBUG_TYPE "machine-cp" STATISTIC(NumDeletes, "Number of dead copies deleted"); @@ -79,7 +79,7 @@ namespace { char MachineCopyPropagation::ID = 0; char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; -INITIALIZE_PASS(MachineCopyPropagation, "machine-cp", +INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE, "Machine Copy Propagation Pass", false, false) /// Remove any entry in \p Map where the register is a subregister or equal to diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index 7eb991744f0..95c62d820b0 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -38,7 +38,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; -#define DEBUG_TYPE "machine-licm" +#define DEBUG_TYPE "machinelicm" static cl::opt AvoidSpeculation("avoid-speculation", @@ -237,13 +237,13 @@ namespace { char MachineLICM::ID = 0; char &llvm::MachineLICMID = MachineLICM::ID; -INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", - "Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_BEGIN(MachineLICM, DEBUG_TYPE, + "Machine Loop Invariant Code Motion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(MachineLICM, "machinelicm", - "Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_END(MachineLICM, DEBUG_TYPE, + "Machine Loop Invariant Code Motion", false, false) /// Test if the given loop is the outer-most loop that has a unique predecessor. static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { diff --git a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp index 581a8ad8114..9ea3c00a2fc 100644 --- a/contrib/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/contrib/llvm/lib/CodeGen/MachineOutliner.cpp @@ -901,7 +901,7 @@ namespace llvm { ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); } } -INITIALIZE_PASS(MachineOutliner, "machine-outliner", +INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) void MachineOutliner::pruneOverlaps(std::vector &CandidateList, diff --git a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp index d06c38cf4ed..8f5ac8b3fc4 100644 --- a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -715,13 +715,13 @@ char MachinePipeliner::ID = 0; int MachinePipeliner::NumTries = 0; #endif char &llvm::MachinePipelinerID = MachinePipeliner::ID; -INITIALIZE_PASS_BEGIN(MachinePipeliner, "pipeliner", +INITIALIZE_PASS_BEGIN(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(MachinePipeliner, "pipeliner", +INITIALIZE_PASS_END(MachinePipeliner, DEBUG_TYPE, "Modulo Software Pipelining", false, false) /// The "main" function for implementing Swing Modulo Scheduling. diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index 41e161f71e5..edc3783afa2 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -69,7 +69,7 @@ using namespace llvm; -#define DEBUG_TYPE "misched" +#define DEBUG_TYPE "machine-scheduler" namespace llvm { @@ -191,13 +191,13 @@ char MachineScheduler::ID = 0; char &llvm::MachineSchedulerID = MachineScheduler::ID; -INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler", +INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler", +INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) MachineScheduler::MachineScheduler() @@ -532,7 +532,7 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, // thumb2 size reduction is currently an exception, so the PostMIScheduler // needs to do this. if (FixKillFlags) - Scheduler.fixupKills(&*MBB); + Scheduler.fixupKills(*MBB); } Scheduler.finalizeSchedule(); } @@ -3233,6 +3233,12 @@ void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) return; + // Keep clustered nodes together. + if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), + Cand.SU == DAG->getNextClusterSucc(), + TryCand, Cand, Cluster)) + return; + // Avoid critical resource consumption and balance the schedule. if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, TryCand, Cand, ResourceReduce)) diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index 5f87b68123f..7c34e71a0cc 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -173,14 +173,14 @@ namespace { char MachineSinking::ID = 0; char &llvm::MachineSinkingID = MachineSinking::ID; -INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink", - "Machine code sinking", false, false) +INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE, + "Machine code sinking", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(MachineSinking, "machine-sink", - "Machine code sinking", false, false) +INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE, + "Machine code sinking", false, false) bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, MachineBasicBlock *MBB) { diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 998a9645e68..01391a1a0e5 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -44,12 +44,12 @@ using namespace llvm; char MachineTraceMetrics::ID = 0; char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; -INITIALIZE_PASS_BEGIN(MachineTraceMetrics, - "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE, + "Machine Trace Metrics", false, true) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineTraceMetrics, - "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_END(MachineTraceMetrics, DEBUG_TYPE, + "Machine Trace Metrics", false, true) MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) { std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr); diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index b53b002f55a..265f93c363c 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -87,7 +87,6 @@ namespace { RegSet regsLive; RegVector regsDefined, regsDead, regsKilled; RegMaskVector regMasks; - RegSet regsLiveInButUnused; SlotIndex lastIndex; @@ -419,7 +418,6 @@ unsigned MachineVerifier::verify(MachineFunction &MF) { regsDead.clear(); regsKilled.clear(); regMasks.clear(); - regsLiveInButUnused.clear(); MBBInfoMap.clear(); return foundErrors; @@ -756,7 +754,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { regsLive.insert(*SubRegs); } } - regsLiveInButUnused = regsLive; const MachineFrameInfo &MFI = MF->getFrameInfo(); BitVector PR = MFI.getPristineRegs(*MF); @@ -1268,8 +1265,6 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Both use and def operands can read a register. if (MO->readsReg()) { - regsLiveInButUnused.erase(Reg); - if (MO->isKill()) addRegWithSubRegs(regsKilled, Reg); diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp index 2a8531f337a..76ad668104b 100644 --- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp +++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -23,7 +23,7 @@ #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; -#define DEBUG_TYPE "phi-opt" +#define DEBUG_TYPE "opt-phis" STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles"); @@ -59,7 +59,7 @@ namespace { char OptimizePHIs::ID = 0; char &llvm::OptimizePHIsID = OptimizePHIs::ID; -INITIALIZE_PASS(OptimizePHIs, "opt-phis", +INITIALIZE_PASS(OptimizePHIs, DEBUG_TYPE, "Optimize machine instruction PHIs", false, false) bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index db2264b2439..9c898fa40d7 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -112,11 +112,11 @@ STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; char& llvm::PHIEliminationID = PHIElimination::ID; -INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination", +INITIALIZE_PASS_BEGIN(PHIElimination, DEBUG_TYPE, "Eliminate PHI nodes for register allocation", false, false) INITIALIZE_PASS_DEPENDENCY(LiveVariables) -INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination", +INITIALIZE_PASS_END(PHIElimination, DEBUG_TYPE, "Eliminate PHI nodes for register allocation", false, false) void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index 61dccdde8f1..f2249f9e37e 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -200,7 +200,7 @@ namespace { char &llvm::PostRASchedulerID = PostRAScheduler::ID; -INITIALIZE_PASS(PostRAScheduler, "post-RA-sched", +INITIALIZE_PASS(PostRAScheduler, DEBUG_TYPE, "Post RA top-down list latency scheduler", false, false) SchedulePostRATDList::SchedulePostRATDList( @@ -367,7 +367,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { Scheduler.finishBlock(); // Update register kills - Scheduler.fixupKills(&MBB); + Scheduler.fixupKills(MBB); } return true; diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index d27ea2f5186..0118580a626 100644 --- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -20,7 +20,7 @@ using namespace llvm; -#define DEBUG_TYPE "processimplicitdefs" +#define DEBUG_TYPE "processimpdefs" namespace { /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def @@ -51,9 +51,7 @@ public: char ProcessImplicitDefs::ID = 0; char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID; -INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs", - "Process Implicit Definitions", false, false) -INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs", +INITIALIZE_PASS(ProcessImplicitDefs, DEBUG_TYPE, "Process Implicit Definitions", false, false) void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index aaa253fde49..a9813e534c5 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -45,7 +45,7 @@ using namespace llvm; -#define DEBUG_TYPE "pei" +#define DEBUG_TYPE "prologepilog" typedef SmallVector MBBVector; static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS, @@ -129,12 +129,12 @@ WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1), cl::desc("Warn for stack size bigger than the given" " number")); -INITIALIZE_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", false, +INITIALIZE_PASS_BEGIN(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(StackProtector) -INITIALIZE_PASS_END(PEI, "prologepilog", +INITIALIZE_PASS_END(PEI, DEBUG_TYPE, "Prologue/Epilogue Insertion & Frame Finalization", false, false) @@ -450,12 +450,13 @@ static void updateLiveness(MachineFunction &MF) { const std::vector &CSI = MFI.getCalleeSavedInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { for (MachineBasicBlock *MBB : Visited) { MCPhysReg Reg = CSI[i].getReg(); // Add the callee-saved register as live-in. // It's killed at the spill. - if (!MBB->isLiveIn(Reg)) + if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg)) MBB->addLiveIn(Reg); } } diff --git a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp index 2f7ee8bf414..cc32e43968b 100644 --- a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp +++ b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp @@ -112,11 +112,11 @@ char RenameIndependentSubregs::ID; char &llvm::RenameIndependentSubregsID = RenameIndependentSubregs::ID; -INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, "rename-independent-subregs", +INITIALIZE_PASS_BEGIN(RenameIndependentSubregs, DEBUG_TYPE, "Rename Independent Subregisters", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(RenameIndependentSubregs, "rename-independent-subregs", +INITIALIZE_PASS_END(RenameIndependentSubregs, DEBUG_TYPE, "Rename Independent Subregisters", false, false) bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const { diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp index 2771fdbd737..8584a9b7c89 100644 --- a/contrib/llvm/lib/CodeGen/SafeStack.cpp +++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp @@ -52,7 +52,7 @@ using namespace llvm; using namespace llvm::safestack; -#define DEBUG_TYPE "safestack" +#define DEBUG_TYPE "safe-stack" namespace llvm { @@ -820,10 +820,10 @@ public: } // anonymous namespace char SafeStackLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, "safe-stack", +INITIALIZE_PASS_BEGIN(SafeStackLegacyPass, DEBUG_TYPE, "Safe Stack instrumentation pass", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) -INITIALIZE_PASS_END(SafeStackLegacyPass, "safe-stack", +INITIALIZE_PASS_END(SafeStackLegacyPass, DEBUG_TYPE, "Safe Stack instrumentation pass", false, false) FunctionPass *llvm::createSafeStackPass() { return new SafeStackLegacyPass(); } diff --git a/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp index dab5b91f50a..07b43a82ca9 100644 --- a/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp +++ b/contrib/llvm/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp @@ -49,12 +49,8 @@ private: } // namespace char ScalarizeMaskedMemIntrin::ID = 0; -INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrin, "scalarize-masked-mem-intrin", - "Scalarize unsupported masked memory intrinsics", false, - false) -INITIALIZE_PASS_END(ScalarizeMaskedMemIntrin, "scalarize-masked-mem-intrin", - "Scalarize unsupported masked memory intrinsics", false, - false) +INITIALIZE_PASS(ScalarizeMaskedMemIntrin, DEBUG_TYPE, + "Scalarize unsupported masked memory intrinsics", false, false) FunctionPass *llvm::createScalarizeMaskedMemIntrinPass() { return new ScalarizeMaskedMemIntrin(); diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index 18823b74c47..8035ea80364 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1057,179 +1057,71 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores, loads.dump()); } -void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { - // Start with no live registers. - LiveRegs.reset(); +static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, + MachineInstr &MI, bool addToLiveRegs) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; - // Examine the live-in regs of all successors. - for (const MachineBasicBlock *Succ : BB->successors()) { - for (const auto &LI : Succ->liveins()) { - // Repeat, for reg and all subregs. - for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); - } + // Things that are available after the instruction are killed by it. + bool IsKill = LiveRegs.available(MRI, Reg); + MO.setIsKill(IsKill); + if (IsKill && addToLiveRegs) + LiveRegs.addReg(Reg); } } -/// \brief If we change a kill flag on the bundle instruction implicit register -/// operands, then we also need to propagate that to any instructions inside -/// the bundle which had the same kill state. -static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, - bool NewKillState, - const TargetRegisterInfo *TRI) { - if (MI->getOpcode() != TargetOpcode::BUNDLE) - return; +void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) { + DEBUG(dbgs() << "Fixup kills for BB#" << MBB.getNumber() << '\n'); - // Walk backwards from the last instruction in the bundle to the first. - // Once we set a kill flag on an instruction, we bail out, as otherwise we - // might set it on too many operands. We will clear as many flags as we - // can though. - MachineBasicBlock::instr_iterator Begin = MI->getIterator(); - MachineBasicBlock::instr_iterator End = getBundleEnd(Begin); - while (Begin != End) { - if (NewKillState) { - if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) - return; - } else - (--End)->clearRegisterKills(Reg, TRI); - } -} - -void ScheduleDAGInstrs::toggleKillFlag(MachineInstr &MI, MachineOperand &MO) { - if (MO.isDebug()) - return; - - // Setting kill flag... - if (!MO.isKill()) { - MO.setIsKill(true); - toggleBundleKillFlag(&MI, MO.getReg(), true, TRI); - return; - } - - // If MO itself is live, clear the kill flag... - if (LiveRegs.test(MO.getReg())) { - MO.setIsKill(false); - toggleBundleKillFlag(&MI, MO.getReg(), false, TRI); - return; - } - - // If any subreg of MO is live, then create an imp-def for that - // subreg and keep MO marked as killed. - MO.setIsKill(false); - toggleBundleKillFlag(&MI, MO.getReg(), false, TRI); - bool AllDead = true; - const unsigned SuperReg = MO.getReg(); - MachineInstrBuilder MIB(MF, &MI); - for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { - if (LiveRegs.test(*SubRegs)) { - MIB.addReg(*SubRegs, RegState::ImplicitDefine); - AllDead = false; - } - } - - if(AllDead) { - MO.setIsKill(true); - toggleBundleKillFlag(&MI, MO.getReg(), true, TRI); - } -} - -void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { - // FIXME: Reuse the LivePhysRegs utility for this. - DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); - - LiveRegs.resize(TRI->getNumRegs()); - BitVector killedRegs(TRI->getNumRegs()); - - startBlockForKills(MBB); + LiveRegs.init(*TRI); + LiveRegs.addLiveOuts(MBB); // Examine block from end to start... - unsigned Count = MBB->size(); - for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); - I != E; --Count) { - MachineInstr &MI = *--I; + for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) { if (MI.isDebugValue()) continue; // Update liveness. Registers that are defed but not used in this // instruction are now dead. Mark register and all subregs as they // are completely defined. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (MO.isRegMask()) - LiveRegs.clearBitsNotInMask(MO.getRegMask()); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (!MO.isDef()) continue; - // Ignore two-addr defs. - if (MI.isRegTiedToUseOperand(i)) continue; - - // Repeat for reg and all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.reset(*SubRegs); + for (ConstMIBundleOperands O(MI); O.isValid(); ++O) { + const MachineOperand &MO = *O; + if (MO.isReg()) { + if (!MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + LiveRegs.removeReg(Reg); + } else if (MO.isRegMask()) { + LiveRegs.removeRegsInMask(MO); + } } - // Examine all used registers and set/clear kill flag. When a - // register is used multiple times we only set the kill flag on - // the first use. Don't set kill flags on undef operands. - killedRegs.reset(); - - // toggleKillFlag can append new operands (implicit defs), so using - // a range-based loop is not safe. The new operands will be appended - // at the end of the operand list and they don't need to be visited, - // so iterating until the currently last operand is ok. - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - if ((Reg == 0) || MRI.isReserved(Reg)) continue; - - bool kill = false; - if (!killedRegs.test(Reg)) { - kill = true; - // A register is not killed if any subregs are live... - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { - if (LiveRegs.test(*SubRegs)) { - kill = false; - break; - } - } - - // If subreg is not live, then register is killed if it became - // live in this instruction - if (kill) - kill = !LiveRegs.test(Reg); + // If there is a bundle header fix it up first. + if (!MI.isBundled()) { + toggleKills(MRI, LiveRegs, MI, true); + } else { + MachineBasicBlock::instr_iterator First = MI.getIterator(); + if (MI.isBundle()) { + toggleKills(MRI, LiveRegs, MI, false); + ++First; } - - if (MO.isKill() != kill) { - DEBUG(dbgs() << "Fixing " << MO << " in "); - toggleKillFlag(MI, MO); - DEBUG(MI.dump()); - DEBUG({ - if (MI.getOpcode() == TargetOpcode::BUNDLE) { - MachineBasicBlock::instr_iterator Begin = MI.getIterator(); - MachineBasicBlock::instr_iterator End = getBundleEnd(Begin); - while (++Begin != End) - DEBUG(Begin->dump()); - } - }); - } - - killedRegs.set(Reg); - } - - // Mark any used register (that is not using undef) and subregs as - // now live... - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; - unsigned Reg = MO.getReg(); - if ((Reg == 0) || MRI.isReserved(Reg)) continue; - - for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); - SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); + // Some targets make the (questionable) assumtion that the instructions + // inside the bundle are ordered and consequently only the last use of + // a register inside the bundle can kill it. + MachineBasicBlock::instr_iterator I = std::next(First); + while (I->isBundledWithSucc()) + ++I; + do { + if (!I->isDebugValue()) + toggleKills(MRI, LiveRegs, *I, true); + --I; + } while(I != First); } } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5d450e7e078..23a302f3e56 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12349,9 +12349,9 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( SDValue Val = St->getValue(); StoreInt <<= ElementSizeBytes * 8; if (ConstantSDNode *C = dyn_cast(Val)) { - StoreInt |= C->getAPIntValue().zext(SizeInBits); + StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits); } else if (ConstantFPSDNode *C = dyn_cast(Val)) { - StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits); + StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits); } else { llvm_unreachable("Invalid constant element type"); } @@ -12617,16 +12617,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); bool IsFast = false; if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { LastLegalType = i + 1; // Or check whether a truncstore is legal. - } else if (TLI.getTypeAction(Context, StoreTy) == + } else if (!LegalTypes && + TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { @@ -12642,7 +12645,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { !NoVectors) { // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(Context, MemVT, i + 1); - if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(Ty) && + if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) @@ -12700,7 +12703,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); bool IsFast; - if (TLI.isTypeLegal(Ty) && + if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) @@ -12810,6 +12813,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1); bool IsFastSt, IsFastLd; if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && @@ -12823,6 +12827,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && @@ -12834,7 +12839,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { else if (TLI.getTypeAction(Context, StoreTy) == TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); - if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + if (!LegalTypes && + TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, @@ -14455,6 +14462,145 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +/// If we are extracting a subvector produced by a wide binary operator with at +/// at least one operand that was the result of a vector concatenation, then try +/// to use the narrow vector operands directly to avoid the concatenation and +/// extraction. +static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { + // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share + // some of these bailouts with other transforms. + + // The extract index must be a constant, so we can map it to a concat operand. + auto *ExtractIndex = dyn_cast(Extract->getOperand(1)); + if (!ExtractIndex) + return SDValue(); + + // Only handle the case where we are doubling and then halving. A larger ratio + // may require more than two narrow binops to replace the wide binop. + EVT VT = Extract->getValueType(0); + unsigned NumElems = VT.getVectorNumElements(); + assert((ExtractIndex->getZExtValue() % NumElems) == 0 && + "Extract index is not a multiple of the vector length."); + if (Extract->getOperand(0).getValueSizeInBits() != VT.getSizeInBits() * 2) + return SDValue(); + + // We are looking for an optionally bitcasted wide vector binary operator + // feeding an extract subvector. + SDValue BinOp = Extract->getOperand(0); + if (BinOp.getOpcode() == ISD::BITCAST) + BinOp = BinOp.getOperand(0); + + // TODO: The motivating case for this transform is an x86 AVX1 target. That + // target has temptingly almost legal versions of bitwise logic ops in 256-bit + // flavors, but no other 256-bit integer support. This could be extended to + // handle any binop, but that may require fixing/adding other folds to avoid + // codegen regressions. + unsigned BOpcode = BinOp.getOpcode(); + if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR) + return SDValue(); + + // The binop must be a vector type, so we can chop it in half. + EVT WideBVT = BinOp.getValueType(); + if (!WideBVT.isVector()) + return SDValue(); + + // Bail out if the target does not support a narrower version of the binop. + EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), + WideBVT.getVectorNumElements() / 2); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) + return SDValue(); + + // Peek through bitcasts of the binary operator operands if needed. + SDValue LHS = BinOp.getOperand(0); + if (LHS.getOpcode() == ISD::BITCAST) + LHS = LHS.getOperand(0); + + SDValue RHS = BinOp.getOperand(1); + if (RHS.getOpcode() == ISD::BITCAST) + RHS = RHS.getOperand(0); + + // We need at least one concatenation operation of a binop operand to make + // this transform worthwhile. The concat must double the input vector sizes. + // TODO: Should we also handle INSERT_SUBVECTOR patterns? + bool ConcatL = + LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2; + bool ConcatR = + RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2; + if (!ConcatL && !ConcatR) + return SDValue(); + + // If one of the binop operands was not the result of a concat, we must + // extract a half-sized operand for our new narrow binop. We can't just reuse + // the original extract index operand because we may have bitcasted. + unsigned ConcatOpNum = ExtractIndex->getZExtValue() / NumElems; + unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements(); + EVT ExtBOIdxVT = Extract->getOperand(1).getValueType(); + SDLoc DL(Extract); + + // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN + // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N) + // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN + SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum)) + : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(0), + DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); + + SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum)) + : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, + BinOp.getOperand(1), + DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT)); + + SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y); + return DAG.getBitcast(VT, NarrowBinOp); +} + +/// If we are extracting a subvector from a wide vector load, convert to a +/// narrow load to eliminate the extraction: +/// (extract_subvector (load wide vector)) --> (load narrow vector) +static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { + // TODO: Add support for big-endian. The offset calculation must be adjusted. + if (DAG.getDataLayout().isBigEndian()) + return SDValue(); + + // TODO: The one-use check is overly conservative. Check the cost of the + // extract instead or remove that condition entirely. + auto *Ld = dyn_cast(Extract->getOperand(0)); + auto *ExtIdx = dyn_cast(Extract->getOperand(1)); + if (!Ld || !Ld->hasOneUse() || Ld->isVolatile() || !ExtIdx) + return SDValue(); + + // The narrow load will be offset from the base address of the old load if + // we are extracting from something besides index 0 (little-endian). + EVT VT = Extract->getValueType(0); + SDLoc DL(Extract); + SDValue BaseAddr = Ld->getOperand(1); + unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); + + // TODO: Use "BaseIndexOffset" to make this more effective. + SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset, + VT.getStoreSize()); + SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); + + // The new load must have the same position as the old load in terms of memory + // dependency. Create a TokenFactor for Ld and NewLd and update uses of Ld's + // output chain to use that TokenFactor. + // TODO: This code is based on a similar sequence in x86 lowering. It should + // be moved to a helper function, so it can be shared and reused. + if (Ld->hasAnyUseOfValue(1)) { + SDValue OldChain = SDValue(Ld, 1); + SDValue NewChain = SDValue(NewLd.getNode(), 1); + SDValue TokenFactor = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + OldChain, NewChain); + DAG.ReplaceAllUsesOfValueWith(OldChain, TokenFactor); + DAG.UpdateNodeOperands(TokenFactor.getNode(), OldChain, NewChain); + } + + return NewLd; +} + SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); @@ -14463,6 +14609,10 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (V.isUndef()) return DAG.getUNDEF(NVT); + if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT)) + if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG)) + return NarrowLoad; + // Combine: // (extract_subvec (concat V1, V2, ...), i) // Into: @@ -14510,6 +14660,9 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { } } + if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG)) + return NarrowBOp; + return SDValue(); } @@ -14745,10 +14898,10 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, // This is often generated during legalization. // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case. -SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, - SelectionDAG &DAG, - const TargetLowering &TLI, - bool LegalOperations) { +static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); @@ -14795,7 +14948,8 @@ SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, // destination type. This is often generated during legalization. // If the source node itself was a '*_extend_vector_inreg' node then we should // then be able to remove it. -SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, SelectionDAG &DAG) { +static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 9a47a914df9..d0a8b34c69c 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -899,6 +899,39 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } } +static TargetLowering::LegalizeAction +getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) { + unsigned EqOpc; + switch (Opcode) { + default: llvm_unreachable("Unexpected FP pseudo-opcode"); + case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; + case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; + case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; + case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break; + case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break; + case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break; + case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break; + case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; + case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; + case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; + case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; + case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; + } + + auto Action = TLI.getOperationAction(EqOpc, VT); + + // We don't currently handle Custom or Promote for strict FP pseudo-ops. + // For now, we just expand for those cases. + if (Action != TargetLowering::Legal) + Action = TargetLowering::Expand; + + // ISD::FPOWI returns 'Legal' even though it should be expanded. + if (Opcode == ISD::STRICT_FPOWI && Action == TargetLowering::Legal) + Action = TargetLowering::Expand; + + return Action; +} + /// Return a legal replacement for the given operation, with all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); @@ -1043,6 +1076,25 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } break; + case ISD::STRICT_FSQRT: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + // These pseudo-ops get legalized as if they were their non-strict + // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT + // is also legal, but if ISD::FSQRT requires expansion then so does + // ISD::STRICT_FSQRT. + Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(), + Node->getValueType(0)); + break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { @@ -2032,6 +2084,9 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { + if (Node->isStrictFPOpcode()) + Node = DAG.mutateStrictFPToFP(Node); + RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); @@ -3907,16 +3962,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::FMAX_PPCF128)); break; case ISD::FSQRT: + case ISD::STRICT_FSQRT: Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128)); break; case ISD::FSIN: + case ISD::STRICT_FSIN: Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128)); break; case ISD::FCOS: + case ISD::STRICT_FCOS: Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128)); @@ -3926,26 +3984,31 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { ExpandSinCosLibCall(Node, Results); break; case ISD::FLOG: + case ISD::STRICT_FLOG: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128)); break; case ISD::FLOG2: + case ISD::STRICT_FLOG2: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128)); break; case ISD::FLOG10: + case ISD::STRICT_FLOG10: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128)); break; case ISD::FEXP: + case ISD::STRICT_FEXP: Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128)); break; case ISD::FEXP2: + case ISD::STRICT_FEXP2: Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128)); @@ -3966,11 +4029,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::CEIL_PPCF128)); break; case ISD::FRINT: + case ISD::STRICT_FRINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128)); break; case ISD::FNEARBYINT: + case ISD::STRICT_FNEARBYINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, @@ -3985,11 +4050,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::ROUND_PPCF128)); break; case ISD::FPOWI: + case ISD::STRICT_FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128)); break; case ISD::FPOW: + case ISD::STRICT_FPOW: Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128)); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 16c1f78f1b3..177898e1e95 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4779,23 +4779,23 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, DAG.getMachineFunction()); if (VT == MVT::Other) { - if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(DstAS) || - TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) { - VT = TLI.getPointerTy(DAG.getDataLayout(), DstAS); - } else { - switch (DstAlign & 7) { - case 0: VT = MVT::i64; break; - case 4: VT = MVT::i32; break; - case 2: VT = MVT::i16; break; - default: VT = MVT::i8; break; - } - } + // Use the largest integer type whose alignment constraints are satisfied. + // We only need to check DstAlign here as SrcAlign is always greater or + // equal to DstAlign (or zero). + VT = MVT::i64; + while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && + !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); + assert(VT.isInteger()); + // Find the largest legal integer type. MVT LVT = MVT::i64; while (!TLI.isTypeLegal(LVT)) LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); assert(LVT.isInteger()); + // If the type we've chosen is larger than the largest legal integer type + // then use that instead. if (VT.bitsGT(LVT)) VT = LVT; } @@ -6542,6 +6542,63 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, return N; } +SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { + unsigned OrigOpc = Node->getOpcode(); + unsigned NewOpc; + bool IsUnary = false; + switch (OrigOpc) { + default: + llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); + case ISD::STRICT_FADD: NewOpc = ISD::FADD; break; + case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break; + case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break; + case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break; + case ISD::STRICT_FREM: NewOpc = ISD::FREM; break; + case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break; + case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break; + case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break; + case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break; + case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break; + case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break; + case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break; + case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break; + case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break; + case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break; + case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break; + case ISD::STRICT_FNEARBYINT: + NewOpc = ISD::FNEARBYINT; + IsUnary = true; + break; + } + + // We're taking this node out of the chain, so we need to re-link things. + SDValue InputChain = Node->getOperand(0); + SDValue OutputChain = SDValue(Node, 1); + ReplaceAllUsesOfValueWith(OutputChain, InputChain); + + SDVTList VTs = getVTList(Node->getOperand(1).getValueType()); + SDNode *Res = nullptr; + if (IsUnary) + Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) }); + else + Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), + Node->getOperand(2) }); + + // MorphNodeTo can operate in two ways: if an existing node with the + // specified operands exists, it can just return it. Otherwise, it + // updates the node in place to have the requested operands. + if (Res == Node) { + // If we updated the node in place, reset the node ID. To the isel, + // this should be just like a newly allocated machine node. + Res->setNodeId(-1); + } else { + ReplaceAllUsesWith(Node, Res); + RemoveDeadNode(Node); + } + + return Res; +} + /// getMachineNode - These are used for target selectors to create a new node /// with specified return type(s), MachineInstr opcode, and operands. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 57d340c41c3..b895da21a7f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4736,24 +4736,15 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, DIExpression *Expr, int64_t Offset, const DebugLoc &dl, unsigned DbgSDNodeOrder) { - SDDbgValue *SDV; - auto *FISDN = dyn_cast(N.getNode()); - if (FISDN && Expr->startsWithDeref()) { + if (auto *FISDN = dyn_cast(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe // stack slot locations as such instead of as indirectly addressed // locations. - ArrayRef TrailingElements(Expr->elements_begin() + 1, - Expr->elements_end()); - DIExpression *DerefedDIExpr = - DIExpression::get(*DAG.getContext(), TrailingElements); - int FI = FISDN->getIndex(); - SDV = DAG.getFrameIndexDbgValue(Variable, DerefedDIExpr, FI, 0, dl, - DbgSDNodeOrder); - } else { - SDV = DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, - Offset, dl, DbgSDNodeOrder); + return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), 0, dl, + DbgSDNodeOrder); } - return SDV; + return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, + Offset, dl, DbgSDNodeOrder); } // VisualStudio defines setjmp as _setjmp @@ -5254,7 +5245,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: - visitConstrainedFPIntrinsic(I, Intrinsic); + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: + visitConstrainedFPIntrinsic(cast(I)); return nullptr; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -5752,11 +5755,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } } -void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I, - unsigned Intrinsic) { +void SelectionDAGBuilder::visitConstrainedFPIntrinsic( + const ConstrainedFPIntrinsic &FPI) { SDLoc sdl = getCurSDLoc(); unsigned Opcode; - switch (Intrinsic) { + switch (FPI.getIntrinsicID()) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. case Intrinsic::experimental_constrained_fadd: Opcode = ISD::STRICT_FADD; @@ -5773,23 +5776,64 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(const CallInst &I, case Intrinsic::experimental_constrained_frem: Opcode = ISD::STRICT_FREM; break; + case Intrinsic::experimental_constrained_sqrt: + Opcode = ISD::STRICT_FSQRT; + break; + case Intrinsic::experimental_constrained_pow: + Opcode = ISD::STRICT_FPOW; + break; + case Intrinsic::experimental_constrained_powi: + Opcode = ISD::STRICT_FPOWI; + break; + case Intrinsic::experimental_constrained_sin: + Opcode = ISD::STRICT_FSIN; + break; + case Intrinsic::experimental_constrained_cos: + Opcode = ISD::STRICT_FCOS; + break; + case Intrinsic::experimental_constrained_exp: + Opcode = ISD::STRICT_FEXP; + break; + case Intrinsic::experimental_constrained_exp2: + Opcode = ISD::STRICT_FEXP2; + break; + case Intrinsic::experimental_constrained_log: + Opcode = ISD::STRICT_FLOG; + break; + case Intrinsic::experimental_constrained_log10: + Opcode = ISD::STRICT_FLOG10; + break; + case Intrinsic::experimental_constrained_log2: + Opcode = ISD::STRICT_FLOG2; + break; + case Intrinsic::experimental_constrained_rint: + Opcode = ISD::STRICT_FRINT; + break; + case Intrinsic::experimental_constrained_nearbyint: + Opcode = ISD::STRICT_FNEARBYINT; + break; } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = getRoot(); - SDValue Ops[3] = { Chain, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)) }; SmallVector ValueVTs; - ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); ValueVTs.push_back(MVT::Other); // Out chain SDVTList VTs = DAG.getVTList(ValueVTs); - SDValue Result = DAG.getNode(Opcode, sdl, VTs, Ops); + SDValue Result; + if (FPI.isUnaryOp()) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)) }); + else + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)) }); assert(Result.getNode()->getNumValues() == 2); SDValue OutChain = Result.getValue(1); DAG.setRoot(OutChain); SDValue FPResult = Result.getValue(0); - setValue(&I, FPResult); + setValue(&FPI, FPResult); } std::pair diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index bdaee858da6..77e131fa551 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -895,7 +895,7 @@ private: void visitInlineAsm(ImmutableCallSite CS); const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); - void visitConstrainedFPIntrinsic(const CallInst &I, unsigned Intrinsic); + void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); void visitVAStart(const CallInst &I); void visitVAArg(const VAArgInst &I); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 5e0feccb6b4..687b882c5e4 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -905,50 +905,6 @@ public: } // end anonymous namespace -static bool isStrictFPOp(SDNode *Node, unsigned &NewOpc) { - unsigned OrigOpc = Node->getOpcode(); - switch (OrigOpc) { - case ISD::STRICT_FADD: NewOpc = ISD::FADD; return true; - case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; return true; - case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; return true; - case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; return true; - case ISD::STRICT_FREM: NewOpc = ISD::FREM; return true; - default: return false; - } -} - -SDNode* SelectionDAGISel::MutateStrictFPToFP(SDNode *Node, unsigned NewOpc) { - assert(((Node->getOpcode() == ISD::STRICT_FADD && NewOpc == ISD::FADD) || - (Node->getOpcode() == ISD::STRICT_FSUB && NewOpc == ISD::FSUB) || - (Node->getOpcode() == ISD::STRICT_FMUL && NewOpc == ISD::FMUL) || - (Node->getOpcode() == ISD::STRICT_FDIV && NewOpc == ISD::FDIV) || - (Node->getOpcode() == ISD::STRICT_FREM && NewOpc == ISD::FREM)) && - "Unexpected StrictFP opcode!"); - - // We're taking this node out of the chain, so we need to re-link things. - SDValue InputChain = Node->getOperand(0); - SDValue OutputChain = SDValue(Node, 1); - CurDAG->ReplaceAllUsesOfValueWith(OutputChain, InputChain); - - SDVTList VTs = CurDAG->getVTList(Node->getOperand(1).getValueType()); - SDValue Ops[2] = { Node->getOperand(1), Node->getOperand(2) }; - SDNode *Res = CurDAG->MorphNodeTo(Node, NewOpc, VTs, Ops); - - // MorphNodeTo can operate in two ways: if an existing node with the - // specified operands exists, it can just return it. Otherwise, it - // updates the node in place to have the requested operands. - if (Res == Node) { - // If we updated the node in place, reset the node ID. To the isel, - // this should be just like a newly allocated machine node. - Res->setNodeId(-1); - } else { - CurDAG->ReplaceAllUsesWith(Node, Res); - CurDAG->RemoveDeadNode(Node); - } - - return Res; -} - void SelectionDAGISel::DoInstructionSelection() { DEBUG(dbgs() << "===== Instruction selection begins: BB#" << FuncInfo->MBB->getNumber() @@ -992,15 +948,12 @@ void SelectionDAGISel::DoInstructionSelection() { // If the current node is a strict FP pseudo-op, the isStrictFPOp() // function will provide the corresponding normal FP opcode to which the // node should be mutated. - unsigned NormalFPOpc = ISD::UNDEF; - bool IsStrictFPOp = isStrictFPOp(Node, NormalFPOpc); - if (IsStrictFPOp) - Node = MutateStrictFPToFP(Node, NormalFPOpc); + // + // FIXME: The backends need a way to handle FP constraints. + if (Node->isStrictFPOpcode()) + Node = CurDAG->mutateStrictFPToFP(Node); Select(Node); - - // FIXME: Add code here to attach an implicit def and use of - // target-specific FP environment registers. } CurDAG->setRoot(Dummy.getValue()); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index befbd80d796..0dffffee997 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -603,11 +603,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask, Known2, TLO, Depth+1)) return true; - assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. @@ -633,11 +633,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::OR: if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask, Known2, TLO, Depth+1)) return true; - assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. @@ -660,10 +660,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::XOR: { if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1)) return true; - assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. @@ -725,8 +725,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return true; if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); - assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(Op, NewMask, TLO)) @@ -741,8 +741,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return true; if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); - assert((Known2.Zero & Known2.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(Op, NewMask, TLO)) @@ -907,7 +907,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // Compute the new bits that are at the top now. if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); Known.One.lshrInPlace(ShAmt); @@ -947,7 +947,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); Known.One.lshrInPlace(ShAmt); @@ -1029,7 +1029,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); // If the sign bit of the input is known set or clear, then we know the // top bits of the result. @@ -1084,7 +1084,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known = Known.zext(BitWidth); Known.Zero |= NewBits; break; @@ -1134,7 +1134,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt InMask = NewMask.trunc(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known = Known.zext(BitWidth); break; } @@ -1193,7 +1193,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; } case ISD::AssertZext: { @@ -1205,7 +1205,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask, Known, TLO, Depth+1)) return true; - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero |= ~InMask; break; diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp index ff7d205c1f4..6750fde5763 100644 --- a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp +++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -27,7 +27,7 @@ using namespace llvm; -#define DEBUG_TYPE "shadowstackgclowering" +#define DEBUG_TYPE "shadow-stack-gc-lowering" namespace { @@ -66,10 +66,10 @@ private: }; } -INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering", +INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, DEBUG_TYPE, "Shadow Stack GC Lowering", false, false) INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) -INITIALIZE_PASS_END(ShadowStackGCLowering, "shadow-stack-gc-lowering", +INITIALIZE_PASS_END(ShadowStackGCLowering, DEBUG_TYPE, "Shadow Stack GC Lowering", false, false) FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); } diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp index 2638702da15..aa75f5e2caa 100644 --- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -210,13 +210,12 @@ public: char ShrinkWrap::ID = 0; char &llvm::ShrinkWrapID = ShrinkWrap::ID; -INITIALIZE_PASS_BEGIN(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, - false) +INITIALIZE_PASS_BEGIN(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false) +INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false) bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const { diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index e9eff4d0acb..09e9c3bb335 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -74,7 +74,7 @@ private: } // end anonymous namespace char SjLjEHPrepare::ID = 0; -INITIALIZE_PASS(SjLjEHPrepare, "sjljehprepare", "Prepare SjLj exceptions", +INITIALIZE_PASS(SjLjEHPrepare, DEBUG_TYPE, "Prepare SjLj exceptions", false, false) // Public Interface To the SjLjEHPrepare pass. diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp index bc2a1d09056..3656832a7f1 100644 --- a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp +++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp @@ -19,7 +19,7 @@ using namespace llvm; #define DEBUG_TYPE "slotindexes" char SlotIndexes::ID = 0; -INITIALIZE_PASS(SlotIndexes, "slotindexes", +INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE, "Slot index numbering", false, false) STATISTIC(NumLocalRenum, "Number of local renumberings"); diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp index 43cbf4add0f..0abe1c47da5 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp @@ -40,14 +40,14 @@ using namespace llvm; -#define DEBUG_TYPE "spillplacement" +#define DEBUG_TYPE "spill-code-placement" char SpillPlacement::ID = 0; -INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement", +INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(EdgeBundles) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement", +INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE, "Spill Code Placement Analysis", true, true) char &llvm::SpillPlacementID = SpillPlacement::ID; diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp index 86a16187fcb..acb3676fdd7 100644 --- a/contrib/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -53,7 +53,7 @@ using namespace llvm; -#define DEBUG_TYPE "stackcoloring" +#define DEBUG_TYPE "stack-coloring" static cl::opt DisableColoring("no-stack-coloring", @@ -371,12 +371,12 @@ private: char StackColoring::ID = 0; char &llvm::StackColoringID = StackColoring::ID; -INITIALIZE_PASS_BEGIN(StackColoring, - "stack-coloring", "Merge disjoint stack slots", false, false) +INITIALIZE_PASS_BEGIN(StackColoring, DEBUG_TYPE, + "Merge disjoint stack slots", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(StackProtector) -INITIALIZE_PASS_END(StackColoring, - "stack-coloring", "Merge disjoint stack slots", false, false) +INITIALIZE_PASS_END(StackColoring, DEBUG_TYPE, + "Merge disjoint stack slots", false, false) void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index 5da77264261..ca8bde2d114 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -58,10 +58,10 @@ static cl::opt EnableSelectionDAGSP("enable-selectiondag-sp", cl::init(true), cl::Hidden); char StackProtector::ID = 0; -INITIALIZE_PASS_BEGIN(StackProtector, "stack-protector", +INITIALIZE_PASS_BEGIN(StackProtector, DEBUG_TYPE, "Insert stack protectors", false, true) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) -INITIALIZE_PASS_END(StackProtector, "stack-protector", +INITIALIZE_PASS_END(StackProtector, DEBUG_TYPE, "Insert stack protectors", false, true) FunctionPass *llvm::createStackProtectorPass() { return new StackProtector(); } diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index 234b2043a6a..d1758ecbd79 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -32,7 +32,7 @@ #include using namespace llvm; -#define DEBUG_TYPE "stackslotcoloring" +#define DEBUG_TYPE "stack-slot-coloring" static cl::opt DisableSharing("no-stack-slot-sharing", @@ -116,12 +116,12 @@ namespace { char StackSlotColoring::ID = 0; char &llvm::StackSlotColoringID = StackSlotColoring::ID; -INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring", +INITIALIZE_PASS_BEGIN(StackSlotColoring, DEBUG_TYPE, "Stack Slot Coloring", false, false) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(LiveStacks) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring", +INITIALIZE_PASS_END(StackSlotColoring, DEBUG_TYPE, "Stack Slot Coloring", false, false) namespace { diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index e2377d89497..ad0b0437365 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -40,8 +40,7 @@ char TailDuplicatePass::ID = 0; char &llvm::TailDuplicateID = TailDuplicatePass::ID; -INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", false, - false) +INITIALIZE_PASS(TailDuplicatePass, DEBUG_TYPE, "Tail Duplication", false, false) bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) diff --git a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp index d2414200e9d..d40f7af431a 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp @@ -749,7 +749,7 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, if (PredBB->succ_size() > 1) return false; - MachineBasicBlock *PredTBB, *PredFBB; + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) return false; @@ -832,7 +832,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, appendCopies(PredBB, CopyInfos, Copies); // Simplify - MachineBasicBlock *PredTBB, *PredFBB; + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; SmallVector PredCond; TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond); diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 7392c832714..552a89f76ca 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -52,7 +52,7 @@ using namespace llvm; -#define DEBUG_TYPE "twoaddrinstr" +#define DEBUG_TYPE "twoaddressinstruction" STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); @@ -171,10 +171,10 @@ public: } // end anonymous namespace char TwoAddressInstructionPass::ID = 0; -INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction", +INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, DEBUG_TYPE, "Two-Address instruction pass", false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", +INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE, "Two-Address instruction pass", false, false) char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp index a632b40c20f..4e7542bf31e 100644 --- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -94,7 +94,7 @@ private: } // end anonymous namespace char WinEHPrepare::ID = 0; -INITIALIZE_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions", +INITIALIZE_PASS(WinEHPrepare, DEBUG_TYPE, "Prepare Windows exceptions", false, false) FunctionPass *llvm::createWinEHPass() { return new WinEHPrepare(); } diff --git a/contrib/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/contrib/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp index f95c3e79388..705b548141b 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp @@ -45,24 +45,9 @@ static Error visitKnownMember(CVMemberRecord &Record, } static Expected deserializeTypeServerRecord(CVType &Record) { - class StealTypeServerVisitor : public TypeVisitorCallbacks { - public: - explicit StealTypeServerVisitor(TypeServer2Record &TR) : TR(TR) {} - - Error visitKnownRecord(CVType &CVR, TypeServer2Record &Record) override { - TR = Record; - return Error::success(); - } - - private: - TypeServer2Record &TR; - }; - TypeServer2Record R(TypeRecordKind::TypeServer2); - StealTypeServerVisitor Thief(R); - if (auto EC = visitTypeRecord(Record, Thief)) + if (auto EC = TypeDeserializer::deserializeAs(Record, R)) return std::move(EC); - return R; } @@ -308,8 +293,9 @@ Error llvm::codeview::visitTypeRecord(CVType &Record, Error llvm::codeview::visitTypeStream(const CVTypeArray &Types, TypeVisitorCallbacks &Callbacks, + VisitorDataSource Source, TypeServerHandler *TS) { - VisitHelper V(Callbacks, VDS_BytesPresent); + VisitHelper V(Callbacks, Source); if (TS) V.Visitor.addTypeServerHandler(*TS); return V.Visitor.visitTypeStream(Types); diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp new file mode 100644 index 00000000000..11e2e215303 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeIndexDiscovery.cpp @@ -0,0 +1,371 @@ +//===- TypeIndexDiscovery.cpp -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::codeview; + +static inline MethodKind getMethodKind(uint16_t Attrs) { + Attrs &= uint16_t(MethodOptions::MethodKindMask); + Attrs >>= 2; + return MethodKind(Attrs); +} + +static inline bool isIntroVirtual(uint16_t Attrs) { + MethodKind MK = getMethodKind(Attrs); + return MK == MethodKind::IntroducingVirtual || + MK == MethodKind::PureIntroducingVirtual; +} + +static inline PointerMode getPointerMode(uint32_t Attrs) { + return static_cast((Attrs >> PointerRecord::PointerModeShift) & + PointerRecord::PointerModeMask); +} + +static inline bool isMemberPointer(uint32_t Attrs) { + PointerMode Mode = getPointerMode(Attrs); + return Mode == PointerMode::PointerToDataMember || + Mode == PointerMode::PointerToDataMember; +} + +static inline uint32_t getEncodedIntegerLength(ArrayRef Data) { + uint16_t N = support::endian::read16le(Data.data()); + if (N < LF_NUMERIC) + return 2; + + assert(N <= LF_UQUADWORD); + + constexpr uint32_t Sizes[] = { + 1, // LF_CHAR + 2, // LF_SHORT + 2, // LF_USHORT + 4, // LF_LONG + 4, // LF_ULONG + 4, // LF_REAL32 + 8, // LF_REAL64 + 10, // LF_REAL80 + 16, // LF_REAL128 + 8, // LF_QUADWORD + 8, // LF_UQUADWORD + }; + + return Sizes[N - LF_NUMERIC]; +} + +static inline uint32_t getCStringLength(ArrayRef Data) { + const char *S = reinterpret_cast(Data.data()); + return strlen(S) + 1; +} + +static void handleMethodOverloadList(ArrayRef Content, + SmallVectorImpl &Refs) { + uint32_t Offset = 0; + + while (!Content.empty()) { + // Array of: + // 0: Attrs + // 2: Padding + // 4: TypeIndex + // if (isIntroVirtual()) + // 8: VFTableOffset + + // At least 8 bytes are guaranteed. 4 extra bytes come iff function is an + // intro virtual. + uint32_t Len = 8; + + uint16_t Attrs = support::endian::read16le(Content.data()); + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + + if (LLVM_UNLIKELY(isIntroVirtual(Attrs))) + Len += 4; + Offset += Len; + Content = Content.drop_front(Len); + } +} + +static uint32_t handleBaseClass(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Encoded Integer + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8 + getEncodedIntegerLength(Data.drop_front(8)); +} + +static uint32_t handleEnumerator(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: Encoded Integer + // : Name + uint32_t Size = 4 + getEncodedIntegerLength(Data.drop_front(4)); + return Size + getCStringLength(Data.drop_front(Size)); +} + +static uint32_t handleDataMember(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Encoded Integer + // : Name + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + uint32_t Size = 8 + getEncodedIntegerLength(Data.drop_front(8)); + return Size + getCStringLength(Data.drop_front(Size)); +} + +static uint32_t handleOverloadedMethod(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Name + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8 + getCStringLength(Data.drop_front(8)); +} + +static uint32_t handleOneMethod(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Attributes + // 4: Type + // if (isIntroVirtual) + // 8: VFTableOffset + // : Name + uint32_t Size = 8; + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + + uint16_t Attrs = support::endian::read16le(Data.drop_front(2).data()); + if (LLVM_UNLIKELY(isIntroVirtual(Attrs))) + Size += 4; + + return Size + getCStringLength(Data.drop_front(Size)); +} + +static uint32_t handleNestedType(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Name + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8 + getCStringLength(Data.drop_front(8)); +} + +static uint32_t handleStaticDataMember(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + // 8: Name + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8 + getCStringLength(Data.drop_front(8)); +} + +static uint32_t handleVirtualBaseClass(ArrayRef Data, uint32_t Offset, + bool IsIndirect, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Attrs + // 4: TypeIndex + // 8: TypeIndex + // 12: Encoded Integer + // : Encoded Integer + uint32_t Size = 12; + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 2}); + Size += getEncodedIntegerLength(Data.drop_front(Size)); + Size += getEncodedIntegerLength(Data.drop_front(Size)); + return Size; +} + +static uint32_t handleVFPtr(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8; +} + +static uint32_t handleListContinuation(ArrayRef Data, uint32_t Offset, + SmallVectorImpl &Refs) { + // 0: Kind + // 2: Padding + // 4: TypeIndex + Refs.push_back({TiRefKind::TypeRef, Offset + 4, 1}); + return 8; +} + +static void handleFieldList(ArrayRef Content, + SmallVectorImpl &Refs) { + uint32_t Offset = 0; + uint32_t ThisLen = 0; + while (!Content.empty()) { + TypeLeafKind Kind = + static_cast(support::endian::read16le(Content.data())); + switch (Kind) { + case LF_BCLASS: + ThisLen = handleBaseClass(Content, Offset, Refs); + break; + case LF_ENUMERATE: + ThisLen = handleEnumerator(Content, Offset, Refs); + break; + case LF_MEMBER: + ThisLen = handleDataMember(Content, Offset, Refs); + break; + case LF_METHOD: + ThisLen = handleOverloadedMethod(Content, Offset, Refs); + break; + case LF_ONEMETHOD: + ThisLen = handleOneMethod(Content, Offset, Refs); + break; + case LF_NESTTYPE: + ThisLen = handleNestedType(Content, Offset, Refs); + break; + case LF_STMEMBER: + ThisLen = handleStaticDataMember(Content, Offset, Refs); + break; + case LF_VBCLASS: + case LF_IVBCLASS: + ThisLen = + handleVirtualBaseClass(Content, Offset, Kind == LF_VBCLASS, Refs); + break; + case LF_VFUNCTAB: + ThisLen = handleVFPtr(Content, Offset, Refs); + break; + case LF_INDEX: + ThisLen = handleListContinuation(Content, Offset, Refs); + break; + default: + return; + } + Content = Content.drop_front(ThisLen); + Offset += ThisLen; + if (!Content.empty()) { + uint8_t Pad = Content.front(); + if (Pad >= LF_PAD0) { + uint32_t Skip = Pad & 0x0F; + Content = Content.drop_front(Skip); + Offset += Skip; + } + } + } +} + +static void handlePointer(ArrayRef Content, + SmallVectorImpl &Refs) { + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + + uint32_t Attrs = support::endian::read32le(Content.drop_front(4).data()); + if (isMemberPointer(Attrs)) + Refs.push_back({TiRefKind::TypeRef, 8, 1}); +} + +static void discoverTypeIndices(ArrayRef Content, TypeLeafKind Kind, + SmallVectorImpl &Refs) { + uint32_t Count; + // FIXME: In the future it would be nice if we could avoid hardcoding these + // values. One idea is to define some structures representing these types + // that would allow the use of offsetof(). + switch (Kind) { + case TypeLeafKind::LF_FUNC_ID: + Refs.push_back({TiRefKind::IndexRef, 0, 1}); + Refs.push_back({TiRefKind::TypeRef, 4, 1}); + break; + case TypeLeafKind::LF_MFUNC_ID: + Refs.push_back({TiRefKind::TypeRef, 0, 2}); + break; + case TypeLeafKind::LF_STRING_ID: + Refs.push_back({TiRefKind::IndexRef, 0, 1}); + break; + case TypeLeafKind::LF_SUBSTR_LIST: + Count = support::endian::read32le(Content.data()); + if (Count > 0) + Refs.push_back({TiRefKind::IndexRef, 4, Count}); + break; + case TypeLeafKind::LF_BUILDINFO: + Count = support::endian::read16le(Content.data()); + if (Count > 0) + Refs.push_back({TiRefKind::IndexRef, 2, Count}); + break; + case TypeLeafKind::LF_UDT_SRC_LINE: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + Refs.push_back({TiRefKind::IndexRef, 4, 1}); + break; + case TypeLeafKind::LF_UDT_MOD_SRC_LINE: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + break; + case TypeLeafKind::LF_MODIFIER: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + break; + case TypeLeafKind::LF_PROCEDURE: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + Refs.push_back({TiRefKind::TypeRef, 8, 1}); + break; + case TypeLeafKind::LF_MFUNCTION: + Refs.push_back({TiRefKind::TypeRef, 0, 3}); + Refs.push_back({TiRefKind::TypeRef, 16, 1}); + break; + case TypeLeafKind::LF_ARGLIST: + Count = support::endian::read32le(Content.data()); + if (Count > 0) + Refs.push_back({TiRefKind::TypeRef, 4, Count}); + break; + case TypeLeafKind::LF_ARRAY: + Refs.push_back({TiRefKind::TypeRef, 0, 2}); + break; + case TypeLeafKind::LF_CLASS: + case TypeLeafKind::LF_STRUCTURE: + case TypeLeafKind::LF_INTERFACE: + Refs.push_back({TiRefKind::TypeRef, 4, 3}); + break; + case TypeLeafKind::LF_UNION: + Refs.push_back({TiRefKind::TypeRef, 4, 1}); + break; + case TypeLeafKind::LF_ENUM: + Refs.push_back({TiRefKind::TypeRef, 4, 2}); + break; + case TypeLeafKind::LF_BITFIELD: + Refs.push_back({TiRefKind::TypeRef, 0, 1}); + break; + case TypeLeafKind::LF_VFTABLE: + Refs.push_back({TiRefKind::TypeRef, 0, 2}); + break; + case TypeLeafKind::LF_VTSHAPE: + break; + case TypeLeafKind::LF_METHODLIST: + handleMethodOverloadList(Content, Refs); + break; + case TypeLeafKind::LF_FIELDLIST: + handleFieldList(Content, Refs); + break; + case TypeLeafKind::LF_POINTER: + handlePointer(Content, Refs); + break; + default: + break; + } +} + +void llvm::codeview::discoverTypeIndices(const CVType &Type, + SmallVectorImpl &Refs) { + ::discoverTypeIndices(Type.content(), Type.kind(), Refs); +} + +void llvm::codeview::discoverTypeIndices(ArrayRef RecordData, + SmallVectorImpl &Refs) { + const RecordPrefix *P = + reinterpret_cast(RecordData.data()); + TypeLeafKind K = static_cast(uint16_t(P->RecordKind)); + ::discoverTypeIndices(RecordData.drop_front(sizeof(RecordPrefix)), K, Refs); +} diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp index 3b061e67e05..93c1198e36c 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/CodeView/TypeSerializer.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/BinaryStreamWriter.h" #include @@ -16,23 +17,111 @@ using namespace llvm; using namespace llvm::codeview; +namespace { +struct HashedType { + uint64_t Hash; + const uint8_t *Data; + unsigned Size; // FIXME: Go to uint16_t? + TypeIndex Index; +}; + +/// Wrapper around a poitner to a HashedType. Hash and equality operations are +/// based on data in the pointee. +struct HashedTypePtr { + HashedTypePtr() = default; + HashedTypePtr(HashedType *Ptr) : Ptr(Ptr) {} + HashedType *Ptr = nullptr; +}; +} // namespace + +namespace llvm { +template <> struct DenseMapInfo { + static inline HashedTypePtr getEmptyKey() { return HashedTypePtr(nullptr); } + static inline HashedTypePtr getTombstoneKey() { + return HashedTypePtr(reinterpret_cast(1)); + } + static unsigned getHashValue(HashedTypePtr Val) { + assert(Val.Ptr != getEmptyKey().Ptr && Val.Ptr != getTombstoneKey().Ptr); + return Val.Ptr->Hash; + } + static bool isEqual(HashedTypePtr LHSP, HashedTypePtr RHSP) { + HashedType *LHS = LHSP.Ptr; + HashedType *RHS = RHSP.Ptr; + if (RHS == getEmptyKey().Ptr || RHS == getTombstoneKey().Ptr) + return LHS == RHS; + if (LHS->Hash != RHS->Hash || LHS->Size != RHS->Size) + return false; + return ::memcmp(LHS->Data, RHS->Data, LHS->Size) == 0; + } +}; +} + +/// Private implementation so that we don't leak our DenseMap instantiations to +/// users. +class llvm::codeview::TypeHasher { +private: + /// Storage for type record provided by the caller. Records will outlive the + /// hasher object, so they should be allocated here. + BumpPtrAllocator &RecordStorage; + + /// Storage for hash keys. These only need to live as long as the hashing + /// operation. + BumpPtrAllocator KeyStorage; + + /// Hash table. We really want a DenseMap, TypeIndex> here, + /// but DenseMap is inefficient when the keys are long (like type records) + /// because it recomputes the hash value of every key when it grows. This + /// value type stores the hash out of line in KeyStorage, so that table + /// entries are small and easy to rehash. + DenseSet HashedRecords; + +public: + TypeHasher(BumpPtrAllocator &RecordStorage) : RecordStorage(RecordStorage) {} + + void reset() { HashedRecords.clear(); } + + /// Takes the bytes of type record, inserts them into the hash table, saves + /// them, and returns a pointer to an identical stable type record along with + /// its type index in the destination stream. + TypeIndex getOrCreateRecord(ArrayRef &Record, TypeIndex TI); +}; + +TypeIndex TypeHasher::getOrCreateRecord(ArrayRef &Record, + TypeIndex TI) { + assert(Record.size() < UINT32_MAX && "Record too big"); + assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); + + // Compute the hash up front so we can store it in the key. + HashedType TempHashedType = {hash_value(Record), Record.data(), + unsigned(Record.size()), TI}; + auto Result = HashedRecords.insert(HashedTypePtr(&TempHashedType)); + HashedType *&Hashed = Result.first->Ptr; + + if (Result.second) { + // This was a new type record. We need stable storage for both the key and + // the record. The record should outlive the hashing operation. + Hashed = KeyStorage.Allocate(); + *Hashed = TempHashedType; + + uint8_t *Stable = RecordStorage.Allocate(Record.size()); + memcpy(Stable, Record.data(), Record.size()); + Hashed->Data = Stable; + assert(Hashed->Size == Record.size()); + } + + // Update the caller's copy of Record to point a stable copy. + Record = ArrayRef(Hashed->Data, Hashed->Size); + return Hashed->Index; +} + +TypeIndex TypeSerializer::nextTypeIndex() const { + return TypeIndex::fromArrayIndex(SeenRecords.size()); +} + bool TypeSerializer::isInFieldList() const { return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST; } -TypeIndex TypeSerializer::calcNextTypeIndex() const { - if (LastTypeIndex.isNoneType()) - return TypeIndex(TypeIndex::FirstNonSimpleIndex); - else - return TypeIndex(LastTypeIndex.getIndex() + 1); -} - -TypeIndex TypeSerializer::incrementTypeIndex() { - TypeIndex Previous = LastTypeIndex; - LastTypeIndex = calcNextTypeIndex(); - return Previous; -} - MutableArrayRef TypeSerializer::getCurrentSubRecordData() { assert(isInFieldList()); return getCurrentRecordData().drop_front(CurrentSegment.length()); @@ -51,46 +140,6 @@ Error TypeSerializer::writeRecordPrefix(TypeLeafKind Kind) { return Error::success(); } -TypeIndex -TypeSerializer::insertRecordBytesPrivate(MutableArrayRef Record) { - assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); - - StringRef S(reinterpret_cast(Record.data()), Record.size()); - - TypeIndex NextTypeIndex = calcNextTypeIndex(); - auto Result = HashedRecords.try_emplace(S, NextTypeIndex); - if (Result.second) { - LastTypeIndex = NextTypeIndex; - SeenRecords.push_back(Record); - } - return Result.first->getValue(); -} - -TypeIndex -TypeSerializer::insertRecordBytesWithCopy(CVType &Record, - MutableArrayRef Data) { - assert(Data.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); - - StringRef S(reinterpret_cast(Data.data()), Data.size()); - - // Do a two state lookup / insert so that we don't have to allocate unless - // we're going - // to do an insert. This is a big memory savings. - auto Iter = HashedRecords.find(S); - if (Iter != HashedRecords.end()) - return Iter->second; - - LastTypeIndex = calcNextTypeIndex(); - uint8_t *Copy = RecordStorage.Allocate(Data.size()); - ::memcpy(Copy, Data.data(), Data.size()); - Data = MutableArrayRef(Copy, Data.size()); - S = StringRef(reinterpret_cast(Data.data()), Data.size()); - HashedRecords.insert(std::make_pair(S, LastTypeIndex)); - SeenRecords.push_back(Data); - Record.RecordData = Data; - return LastTypeIndex; -} - Expected> TypeSerializer::addPadding(MutableArrayRef Record) { uint32_t Align = Record.size() % 4; @@ -108,27 +157,79 @@ TypeSerializer::addPadding(MutableArrayRef Record) { return MutableArrayRef(Record.data(), Record.size() + N); } -TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage) - : RecordStorage(Storage), LastTypeIndex(), - RecordBuffer(MaxRecordLength * 2), +TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage, bool Hash) + : RecordStorage(Storage), RecordBuffer(MaxRecordLength * 2), Stream(RecordBuffer, llvm::support::little), Writer(Stream), Mapping(Writer) { // RecordBuffer needs to be able to hold enough data so that if we are 1 // byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes, // we won't overflow. + if (Hash) + Hasher = make_unique(Storage); } -ArrayRef> TypeSerializer::records() const { +TypeSerializer::~TypeSerializer() = default; + +ArrayRef> TypeSerializer::records() const { return SeenRecords; } -TypeIndex TypeSerializer::getLastTypeIndex() const { return LastTypeIndex; } +void TypeSerializer::reset() { + if (Hasher) + Hasher->reset(); + Writer.setOffset(0); + CurrentSegment = RecordSegment(); + FieldListSegments.clear(); + TypeKind.reset(); + MemberKind.reset(); + SeenRecords.clear(); +} -TypeIndex TypeSerializer::insertRecordBytes(MutableArrayRef Record) { +TypeIndex TypeSerializer::insertRecordBytes(ArrayRef &Record) { assert(!TypeKind.hasValue() && "Already in a type mapping!"); assert(Writer.getOffset() == 0 && "Stream has data already!"); - return insertRecordBytesPrivate(Record); + if (Hasher) { + TypeIndex ActualTI = Hasher->getOrCreateRecord(Record, nextTypeIndex()); + if (nextTypeIndex() == ActualTI) + SeenRecords.push_back(Record); + return ActualTI; + } + + TypeIndex NewTI = nextTypeIndex(); + uint8_t *Stable = RecordStorage.Allocate(Record.size()); + memcpy(Stable, Record.data(), Record.size()); + Record = ArrayRef(Stable, Record.size()); + SeenRecords.push_back(Record); + return NewTI; +} + +TypeIndex TypeSerializer::insertRecord(const RemappedType &Record) { + assert(!TypeKind.hasValue() && "Already in a type mapping!"); + assert(Writer.getOffset() == 0 && "Stream has data already!"); + + TypeIndex TI; + ArrayRef OriginalData = Record.OriginalRecord.RecordData; + if (Record.Mappings.empty()) { + // This record did not remap any type indices. Just write it. + return insertRecordBytes(OriginalData); + } + + // At least one type index was remapped. Before we can hash it we have to + // copy the full record bytes, re-write each type index, then hash the copy. + // We do this in temporary storage since only the DenseMap can decide whether + // this record already exists, and if it does we don't want the memory to + // stick around. + RemapStorage.resize(OriginalData.size()); + ::memcpy(&RemapStorage[0], OriginalData.data(), OriginalData.size()); + uint8_t *ContentBegin = RemapStorage.data() + sizeof(RecordPrefix); + for (const auto &M : Record.Mappings) { + // First 4 bytes of every record are the record prefix, but the mapping + // offset is relative to the content which starts after. + *(TypeIndex *)(ContentBegin + M.first) = M.second; + } + auto RemapRef = makeArrayRef(RemapStorage); + return insertRecordBytes(RemapRef); } Error TypeSerializer::visitTypeBegin(CVType &Record) { @@ -163,8 +264,13 @@ Expected TypeSerializer::visitTypeEndGetIndex(CVType &Record) { Prefix->RecordLen = ThisRecordData.size() - sizeof(uint16_t); Record.Type = *TypeKind; - TypeIndex InsertedTypeIndex = - insertRecordBytesWithCopy(Record, ThisRecordData); + Record.RecordData = ThisRecordData; + + // insertRecordBytes assumes we're not in a mapping, so do this first. + TypeKind.reset(); + Writer.setOffset(0); + + TypeIndex InsertedTypeIndex = insertRecordBytes(Record.RecordData); // Write out each additional segment in reverse order, and update each // record's continuation index to point to the previous one. @@ -174,11 +280,9 @@ Expected TypeSerializer::visitTypeEndGetIndex(CVType &Record) { reinterpret_cast(CIBytes.data()); assert(*CI == 0xB0C0B0C0 && "Invalid TypeIndex placeholder"); *CI = InsertedTypeIndex.getIndex(); - InsertedTypeIndex = insertRecordBytesPrivate(X); + InsertedTypeIndex = insertRecordBytes(X); } - TypeKind.reset(); - Writer.setOffset(0); FieldListSegments.clear(); CurrentSegment.SubRecords.clear(); diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp index 46747f8eab9..71a0966df03 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -11,7 +11,9 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h" +#include "llvm/DebugInfo/CodeView/TypeDeserializer.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/CodeView/TypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h" @@ -57,37 +59,56 @@ namespace { /// looking at the record kind. class TypeStreamMerger : public TypeVisitorCallbacks { public: - TypeStreamMerger(TypeTableBuilder &DestIdStream, - TypeTableBuilder &DestTypeStream, - SmallVectorImpl &SourceToDest, - TypeServerHandler *Handler) - : DestIdStream(DestIdStream), DestTypeStream(DestTypeStream), - FieldListBuilder(DestTypeStream), Handler(Handler), - IndexMap(SourceToDest) {} + explicit TypeStreamMerger(SmallVectorImpl &SourceToDest, + TypeServerHandler *Handler) + : Handler(Handler), IndexMap(SourceToDest) { + SourceToDest.clear(); + } static const TypeIndex Untranslated; -/// TypeVisitorCallbacks overrides. -#define TYPE_RECORD(EnumName, EnumVal, Name) \ - Error visitKnownRecord(CVType &CVR, Name##Record &Record) override; -#define MEMBER_RECORD(EnumName, EnumVal, Name) \ - Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override; -#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) -#include "llvm/DebugInfo/CodeView/TypeRecords.def" - - Error visitUnknownType(CVType &Record) override; - Error visitTypeBegin(CVType &Record) override; Error visitTypeEnd(CVType &Record) override; - Error visitMemberEnd(CVMemberRecord &Record) override; - Error mergeStream(const CVTypeArray &Types); + Error mergeTypesAndIds(TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes, + const CVTypeArray &IdsAndTypes); + Error mergeIdRecords(TypeTableBuilder &Dest, + ArrayRef TypeSourceToDest, + const CVTypeArray &Ids); + Error mergeTypeRecords(TypeTableBuilder &Dest, const CVTypeArray &Types); private: + Error doit(const CVTypeArray &Types); + void addMapping(TypeIndex Idx); - bool remapIndex(TypeIndex &Idx); + bool remapTypeIndex(TypeIndex &Idx); + bool remapItemIndex(TypeIndex &Idx); + + bool remapIndices(RemappedType &Record, ArrayRef Refs) { + auto OriginalData = Record.OriginalRecord.content(); + bool Success = true; + for (auto &Ref : Refs) { + uint32_t Offset = Ref.Offset; + ArrayRef Bytes = + OriginalData.slice(Ref.Offset, sizeof(TypeIndex)); + ArrayRef TIs(reinterpret_cast(Bytes.data()), + Ref.Count); + for (auto TI : TIs) { + TypeIndex NewTI = TI; + bool ThisSuccess = (Ref.Kind == TiRefKind::IndexRef) + ? remapItemIndex(NewTI) + : remapTypeIndex(NewTI); + if (ThisSuccess && NewTI != TI) + Record.Mappings.emplace_back(Offset, NewTI); + Offset += sizeof(TypeIndex); + Success &= ThisSuccess; + } + } + return Success; + } + + bool remapIndex(TypeIndex &Idx, ArrayRef Map); size_t slotForIndex(TypeIndex Idx) const { assert(!Idx.isSimple() && "simple type indices have no slots"); @@ -98,50 +119,46 @@ private: return llvm::make_error(cv_error_code::corrupt_record); } - template - Error writeRecord(RecordType &R, bool RemapSuccess) { + Error writeRecord(TypeTableBuilder &Dest, const RemappedType &Record, + bool RemapSuccess) { TypeIndex DestIdx = Untranslated; if (RemapSuccess) - DestIdx = DestTypeStream.writeKnownType(R); + DestIdx = Dest.writeSerializedRecord(Record); addMapping(DestIdx); return Error::success(); } - template - Error writeIdRecord(RecordType &R, bool RemapSuccess) { - TypeIndex DestIdx = Untranslated; - if (RemapSuccess) - DestIdx = DestIdStream.writeKnownType(R); + Error writeTypeRecord(const CVType &Record) { + TypeIndex DestIdx = + DestTypeStream->writeSerializedRecord(Record.RecordData); addMapping(DestIdx); return Error::success(); } - template - Error writeMember(RecordType &R, bool RemapSuccess) { - if (RemapSuccess) - FieldListBuilder.writeMemberType(R); - else - HadUntranslatedMember = true; - return Error::success(); + Error writeTypeRecord(const RemappedType &Record, bool RemapSuccess) { + return writeRecord(*DestTypeStream, Record, RemapSuccess); + } + + Error writeIdRecord(const RemappedType &Record, bool RemapSuccess) { + return writeRecord(*DestIdStream, Record, RemapSuccess); } Optional LastError; bool IsSecondPass = false; - bool HadUntranslatedMember = false; - unsigned NumBadIndices = 0; - BumpPtrAllocator Allocator; - - TypeTableBuilder &DestIdStream; - TypeTableBuilder &DestTypeStream; - FieldListRecordBuilder FieldListBuilder; - TypeServerHandler *Handler; - TypeIndex CurIndex{TypeIndex::FirstNonSimpleIndex}; + TypeTableBuilder *DestIdStream = nullptr; + TypeTableBuilder *DestTypeStream = nullptr; + TypeServerHandler *Handler = nullptr; + + // If we're only mapping id records, this array contains the mapping for + // type records. + ArrayRef TypeLookup; + /// Map from source type index to destination type index. Indexed by source /// type index minus 0x1000. SmallVectorImpl &IndexMap; @@ -151,22 +168,34 @@ private: const TypeIndex TypeStreamMerger::Untranslated(SimpleTypeKind::NotTranslated); -Error TypeStreamMerger::visitTypeBegin(CVRecord &Rec) { +Error TypeStreamMerger::visitTypeBegin(CVType &Rec) { + RemappedType R(Rec); + SmallVector Refs; + discoverTypeIndices(Rec.RecordData, Refs); + bool Success = remapIndices(R, Refs); + switch (Rec.kind()) { + case TypeLeafKind::LF_FUNC_ID: + case TypeLeafKind::LF_MFUNC_ID: + case TypeLeafKind::LF_STRING_ID: + case TypeLeafKind::LF_SUBSTR_LIST: + case TypeLeafKind::LF_BUILDINFO: + case TypeLeafKind::LF_UDT_SRC_LINE: + case TypeLeafKind::LF_UDT_MOD_SRC_LINE: + return writeIdRecord(R, Success); + default: + return writeTypeRecord(R, Success); + } return Error::success(); } -Error TypeStreamMerger::visitTypeEnd(CVRecord &Rec) { - CurIndex = TypeIndex(CurIndex.getIndex() + 1); +Error TypeStreamMerger::visitTypeEnd(CVType &Rec) { + ++CurIndex; if (!IsSecondPass) assert(IndexMap.size() == slotForIndex(CurIndex) && "visitKnownRecord should add one index map entry"); return Error::success(); } -Error TypeStreamMerger::visitMemberEnd(CVMemberRecord &Rec) { - return Error::success(); -} - void TypeStreamMerger::addMapping(TypeIndex Idx) { if (!IsSecondPass) { assert(IndexMap.size() == slotForIndex(CurIndex) && @@ -178,7 +207,7 @@ void TypeStreamMerger::addMapping(TypeIndex Idx) { } } -bool TypeStreamMerger::remapIndex(TypeIndex &Idx) { +bool TypeStreamMerger::remapIndex(TypeIndex &Idx, ArrayRef Map) { // Simple types are unchanged. if (Idx.isSimple()) return true; @@ -187,14 +216,14 @@ bool TypeStreamMerger::remapIndex(TypeIndex &Idx) { // successfully. If it refers to a type later in the stream or a record we // had to defer, defer it until later pass. unsigned MapPos = slotForIndex(Idx); - if (MapPos < IndexMap.size() && IndexMap[MapPos] != Untranslated) { - Idx = IndexMap[MapPos]; + if (MapPos < Map.size() && Map[MapPos] != Untranslated) { + Idx = Map[MapPos]; return true; } // If this is the second pass and this index isn't in the map, then it points // outside the current type stream, and this is a corrupt record. - if (IsSecondPass && MapPos >= IndexMap.size()) { + if (IsSecondPass && MapPos >= Map.size()) { // FIXME: Print a more useful error. We can give the current record and the // index that we think its pointing to. LastError = joinErrors(std::move(*LastError), errorCorruptRecord()); @@ -208,241 +237,61 @@ bool TypeStreamMerger::remapIndex(TypeIndex &Idx) { return false; } -//----------------------------------------------------------------------------// -// Item records -//----------------------------------------------------------------------------// +bool TypeStreamMerger::remapTypeIndex(TypeIndex &Idx) { + // If we're mapping a pure index stream, then IndexMap only contains mappings + // from OldIdStream -> NewIdStream, in which case we will need to use the + // special mapping from OldTypeStream -> NewTypeStream which was computed + // externally. Regardless, we use this special map if and only if we are + // doing an id-only mapping. + if (DestTypeStream == nullptr) + return remapIndex(Idx, TypeLookup); -Error TypeStreamMerger::visitKnownRecord(CVType &, FuncIdRecord &R) { - bool Success = true; - Success &= remapIndex(R.ParentScope); - Success &= remapIndex(R.FunctionType); - return writeIdRecord(R, Success); + assert(TypeLookup.empty()); + return remapIndex(Idx, IndexMap); } -Error TypeStreamMerger::visitKnownRecord(CVType &, MemberFuncIdRecord &R) { - bool Success = true; - Success &= remapIndex(R.ClassType); - Success &= remapIndex(R.FunctionType); - return writeIdRecord(R, Success); +bool TypeStreamMerger::remapItemIndex(TypeIndex &Idx) { + assert(DestIdStream); + return remapIndex(Idx, IndexMap); } -Error TypeStreamMerger::visitKnownRecord(CVType &, StringIdRecord &R) { - return writeIdRecord(R, remapIndex(R.Id)); +Error TypeStreamMerger::mergeTypeRecords(TypeTableBuilder &Dest, + const CVTypeArray &Types) { + DestTypeStream = &Dest; + + return doit(Types); } -Error TypeStreamMerger::visitKnownRecord(CVType &, StringListRecord &R) { - bool Success = true; - for (TypeIndex &Str : R.StringIndices) - Success &= remapIndex(Str); - return writeIdRecord(R, Success); +Error TypeStreamMerger::mergeIdRecords(TypeTableBuilder &Dest, + ArrayRef TypeSourceToDest, + const CVTypeArray &Ids) { + DestIdStream = &Dest; + TypeLookup = TypeSourceToDest; + + return doit(Ids); } -Error TypeStreamMerger::visitKnownRecord(CVType &, BuildInfoRecord &R) { - bool Success = true; - for (TypeIndex &Arg : R.ArgIndices) - Success &= remapIndex(Arg); - return writeIdRecord(R, Success); +Error TypeStreamMerger::mergeTypesAndIds(TypeTableBuilder &DestIds, + TypeTableBuilder &DestTypes, + const CVTypeArray &IdsAndTypes) { + DestIdStream = &DestIds; + DestTypeStream = &DestTypes; + + return doit(IdsAndTypes); } -Error TypeStreamMerger::visitKnownRecord(CVType &, UdtSourceLineRecord &R) { - bool Success = true; - Success &= remapIndex(R.UDT); - Success &= remapIndex(R.SourceFile); - // FIXME: Translate UdtSourceLineRecord into UdtModSourceLineRecords in the - // IPI stream. - return writeIdRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, UdtModSourceLineRecord &R) { - bool Success = true; - Success &= remapIndex(R.UDT); - Success &= remapIndex(R.SourceFile); - return writeIdRecord(R, Success); -} - -//----------------------------------------------------------------------------// -// Type records -//----------------------------------------------------------------------------// - -Error TypeStreamMerger::visitKnownRecord(CVType &, ModifierRecord &R) { - return writeRecord(R, remapIndex(R.ModifiedType)); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, ProcedureRecord &R) { - bool Success = true; - Success &= remapIndex(R.ReturnType); - Success &= remapIndex(R.ArgumentList); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, MemberFunctionRecord &R) { - bool Success = true; - Success &= remapIndex(R.ReturnType); - Success &= remapIndex(R.ClassType); - Success &= remapIndex(R.ThisType); - Success &= remapIndex(R.ArgumentList); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &Type, ArgListRecord &R) { - bool Success = true; - for (TypeIndex &Arg : R.ArgIndices) - Success &= remapIndex(Arg); - if (auto EC = writeRecord(R, Success)) - return EC; - return Error::success(); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, PointerRecord &R) { - bool Success = true; - Success &= remapIndex(R.ReferentType); - if (R.isPointerToMember()) - Success &= remapIndex(R.MemberInfo->ContainingType); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, ArrayRecord &R) { - bool Success = true; - Success &= remapIndex(R.ElementType); - Success &= remapIndex(R.IndexType); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, ClassRecord &R) { - bool Success = true; - Success &= remapIndex(R.FieldList); - Success &= remapIndex(R.DerivationList); - Success &= remapIndex(R.VTableShape); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, UnionRecord &R) { - return writeRecord(R, remapIndex(R.FieldList)); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, EnumRecord &R) { - bool Success = true; - Success &= remapIndex(R.FieldList); - Success &= remapIndex(R.UnderlyingType); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, BitFieldRecord &R) { - return writeRecord(R, remapIndex(R.Type)); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, VFTableShapeRecord &R) { - return writeRecord(R, true); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, TypeServer2Record &R) { - return writeRecord(R, true); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, LabelRecord &R) { - return writeRecord(R, true); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, VFTableRecord &R) { - bool Success = true; - Success &= remapIndex(R.CompleteClass); - Success &= remapIndex(R.OverriddenVFTable); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, - MethodOverloadListRecord &R) { - bool Success = true; - for (OneMethodRecord &Meth : R.Methods) - Success &= remapIndex(Meth.Type); - return writeRecord(R, Success); -} - -Error TypeStreamMerger::visitKnownRecord(CVType &, FieldListRecord &R) { - // Visit the members inside the field list. - HadUntranslatedMember = false; - FieldListBuilder.begin(); - if (auto EC = codeview::visitMemberRecordStream(R.Data, *this)) - return EC; - - // Write the record if we translated all field list members. - TypeIndex DestIdx = Untranslated; - if (!HadUntranslatedMember) - DestIdx = FieldListBuilder.end(); - else - FieldListBuilder.reset(); - addMapping(DestIdx); - - return Error::success(); -} - -//----------------------------------------------------------------------------// -// Member records -//----------------------------------------------------------------------------// - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - NestedTypeRecord &R) { - return writeMember(R, remapIndex(R.Type)); -} - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, OneMethodRecord &R) { - bool Success = true; - Success &= remapIndex(R.Type); - return writeMember(R, Success); -} - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - OverloadedMethodRecord &R) { - return writeMember(R, remapIndex(R.MethodList)); -} - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - DataMemberRecord &R) { - return writeMember(R, remapIndex(R.Type)); -} - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - StaticDataMemberRecord &R) { - return writeMember(R, remapIndex(R.Type)); -} - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - EnumeratorRecord &R) { - return writeMember(R, true); -} - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, VFPtrRecord &R) { - return writeMember(R, remapIndex(R.Type)); -} - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, BaseClassRecord &R) { - return writeMember(R, remapIndex(R.Type)); -} - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - VirtualBaseClassRecord &R) { - bool Success = true; - Success &= remapIndex(R.BaseType); - Success &= remapIndex(R.VBPtrType); - return writeMember(R, Success); -} - -Error TypeStreamMerger::visitKnownMember(CVMemberRecord &, - ListContinuationRecord &R) { - return writeMember(R, remapIndex(R.ContinuationIndex)); -} - -Error TypeStreamMerger::visitUnknownType(CVType &Rec) { - // We failed to translate a type. Translate this index as "not translated". - addMapping(TypeIndex(SimpleTypeKind::NotTranslated)); - return errorCorruptRecord(); -} - -Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) { - assert(IndexMap.empty()); +Error TypeStreamMerger::doit(const CVTypeArray &Types) { LastError = Error::success(); - if (auto EC = codeview::visitTypeStream(Types, *this, Handler)) + // We don't want to deserialize records. I guess this flag is poorly named, + // but it really means "Don't deserialize records before switching on the + // concrete type. + // FIXME: We can probably get even more speed here if we don't use the visitor + // pipeline here, but instead write the switch ourselves. I don't think it + // would buy us much since it's already pretty fast, but it's probably worth + // a few cycles. + if (auto EC = + codeview::visitTypeStream(Types, *this, VDS_BytesExternal, Handler)) return EC; // If we found bad indices but no other errors, try doing another pass and see @@ -458,7 +307,8 @@ Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) { NumBadIndices = 0; CurIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex); - if (auto EC = codeview::visitTypeStream(Types, *this, Handler)) + if (auto EC = + codeview::visitTypeStream(Types, *this, VDS_BytesExternal, Handler)) return EC; assert(NumBadIndices <= BadIndicesRemaining && @@ -469,18 +319,32 @@ Error TypeStreamMerger::mergeStream(const CVTypeArray &Types) { } } - IndexMap.clear(); - Error Ret = std::move(*LastError); LastError.reset(); return Ret; } -Error llvm::codeview::mergeTypeStreams(TypeTableBuilder &DestIdStream, - TypeTableBuilder &DestTypeStream, +Error llvm::codeview::mergeTypeRecords(TypeTableBuilder &Dest, SmallVectorImpl &SourceToDest, TypeServerHandler *Handler, - const CVTypeArray &Types) { - return TypeStreamMerger(DestIdStream, DestTypeStream, SourceToDest, Handler) - .mergeStream(Types); + const CVTypeArray &Types) { + TypeStreamMerger M(SourceToDest, Handler); + return M.mergeTypeRecords(Dest, Types); +} + +Error llvm::codeview::mergeIdRecords(TypeTableBuilder &Dest, + ArrayRef TypeSourceToDest, + SmallVectorImpl &SourceToDest, + const CVTypeArray &Ids) { + TypeStreamMerger M(SourceToDest, nullptr); + return M.mergeIdRecords(Dest, TypeSourceToDest, Ids); +} + +Error llvm::codeview::mergeTypeAndIdRecords( + TypeTableBuilder &DestIds, TypeTableBuilder &DestTypes, + SmallVectorImpl &SourceToDest, TypeServerHandler *Handler, + const CVTypeArray &IdsAndTypes) { + + TypeStreamMerger M(SourceToDest, Handler); + return M.mergeTypesAndIds(DestIds, DestTypes, IdsAndTypes); } diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp index a18710d6ab5..699694fde92 100644 --- a/contrib/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp +++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeTableCollection.cpp @@ -24,8 +24,7 @@ static void error(Error &&EC) { consumeError(std::move(EC)); } -TypeTableCollection::TypeTableCollection( - ArrayRef> Records) +TypeTableCollection::TypeTableCollection(ArrayRef> Records) : Records(Records), Database(Records.size()) {} Optional TypeTableCollection::getFirst() { diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp index 8e7c6c43d1a..5ed55ce4c0d 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp @@ -60,12 +60,15 @@ typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind; typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind; uint64_t llvm::getRelocatedValue(const DataExtractor &Data, uint32_t Size, - uint32_t *Off, const RelocAddrMap *Relocs) { + uint32_t *Off, const RelocAddrMap *Relocs, + uint64_t *SectionIndex) { if (!Relocs) return Data.getUnsigned(Off, Size); RelocAddrMap::const_iterator AI = Relocs->find(*Off); if (AI == Relocs->end()) return Data.getUnsigned(Off, Size); + if (SectionIndex) + *SectionIndex = AI->second.SectionIndex; return Data.getUnsigned(Off, Size) + AI->second.Value; } @@ -287,6 +290,15 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH, getStringSection(), isLittleEndian()); } +DWARFCompileUnit *DWARFContext::getDWOCompileUnitForHash(uint64_t Hash) { + // FIXME: Improve this for the case where this DWO file is really a DWP file + // with an index - use the index for lookup instead of a linear search. + for (const auto &DWOCU : dwo_compile_units()) + if (DWOCU->getDWOId() == Hash) + return DWOCU.get(); + return nullptr; +} + DWARFDie DWARFContext::getDIEForOffset(uint32_t Offset) { parseCompileUnits(); if (auto *CU = CUs.getUnitForOffset(Offset)) @@ -897,28 +909,81 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address, return InliningInfo; } +std::shared_ptr +DWARFContext::getDWOContext(StringRef AbsolutePath) { + if (auto S = DWP.lock()) { + DWARFContext *Ctxt = S->Context.get(); + return std::shared_ptr(std::move(S), Ctxt); + } + + std::weak_ptr *Entry = &DWOFiles[AbsolutePath]; + + if (auto S = Entry->lock()) { + DWARFContext *Ctxt = S->Context.get(); + return std::shared_ptr(std::move(S), Ctxt); + } + + SmallString<128> DWPName; + Expected> Obj = [&] { + if (!CheckedForDWP) { + (getFileName() + ".dwp").toVector(DWPName); + auto Obj = object::ObjectFile::createObjectFile(DWPName); + if (Obj) { + Entry = &DWP; + return Obj; + } else { + CheckedForDWP = true; + // TODO: Should this error be handled (maybe in a high verbosity mode) + // before falling back to .dwo files? + consumeError(Obj.takeError()); + } + } + + return object::ObjectFile::createObjectFile(AbsolutePath); + }(); + + if (!Obj) { + // TODO: Actually report errors helpfully. + consumeError(Obj.takeError()); + return nullptr; + } + + auto S = std::make_shared(); + S->File = std::move(Obj.get()); + S->Context = llvm::make_unique(*S->File.getBinary()); + *Entry = S; + auto *Ctxt = S->Context.get(); + return std::shared_ptr(std::move(S), Ctxt); +} + static Error createError(const Twine &Reason, llvm::Error E) { return make_error(Reason + toString(std::move(E)), inconvertibleErrorCode()); } -/// Returns the address of symbol relocation used against. Used for futher -/// relocations computation. Symbol's section load address is taken in account if -/// LoadedObjectInfo interface is provided. -static Expected -getSymbolAddress(const object::ObjectFile &Obj, const RelocationRef &Reloc, - const LoadedObjectInfo *L, - std::map &Cache) { - uint64_t Ret = 0; +/// SymInfo contains information about symbol: it's address +/// and section index which is -1LL for absolute symbols. +struct SymInfo { + uint64_t Address; + uint64_t SectionIndex; +}; + +/// Returns the address of symbol relocation used against and a section index. +/// Used for futher relocations computation. Symbol's section load address is +static Expected getSymbolInfo(const object::ObjectFile &Obj, + const RelocationRef &Reloc, + const LoadedObjectInfo *L, + std::map &Cache) { + SymInfo Ret = {0, (uint64_t)-1LL}; object::section_iterator RSec = Obj.section_end(); object::symbol_iterator Sym = Reloc.getSymbol(); - std::map::iterator CacheIt = Cache.end(); + std::map::iterator CacheIt = Cache.end(); // First calculate the address of the symbol or section as it appears // in the object file if (Sym != Obj.symbol_end()) { bool New; - std::tie(CacheIt, New) = Cache.insert({*Sym, 0}); + std::tie(CacheIt, New) = Cache.insert({*Sym, {0, 0}}); if (!New) return CacheIt->second; @@ -934,12 +999,15 @@ getSymbolAddress(const object::ObjectFile &Obj, const RelocationRef &Reloc, SectOrErr.takeError()); RSec = *SectOrErr; - Ret = *SymAddrOrErr; + Ret.Address = *SymAddrOrErr; } else if (auto *MObj = dyn_cast(&Obj)) { RSec = MObj->getRelocationSection(Reloc.getRawDataRefImpl()); - Ret = RSec->getAddress(); + Ret.Address = RSec->getAddress(); } + if (RSec != Obj.section_end()) + Ret.SectionIndex = RSec->getIndex(); + // If we are given load addresses for the sections, we need to adjust: // SymAddr = (Address of Symbol Or Section in File) - // (Address of Section in File) + @@ -949,7 +1017,7 @@ getSymbolAddress(const object::ObjectFile &Obj, const RelocationRef &Reloc, // we need to perform the same computation. if (L && RSec != Obj.section_end()) if (uint64_t SectionLoadAddress = L->getSectionLoadAddress(*RSec)) - Ret += SectionLoadAddress - RSec->getAddress(); + Ret.Address += SectionLoadAddress - RSec->getAddress(); if (CacheIt != Cache.end()) CacheIt->second = Ret; @@ -989,8 +1057,8 @@ Error DWARFContextInMemory::maybeDecompress(const SectionRef &Sec, } DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, - const LoadedObjectInfo *L) - : IsLittleEndian(Obj.isLittleEndian()), + const LoadedObjectInfo *L) + : FileName(Obj.getFileName()), IsLittleEndian(Obj.isLittleEndian()), AddressSize(Obj.getBytesInAddress()) { for (const SectionRef &Section : Obj.sections()) { StringRef name; @@ -1008,7 +1076,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, // Try to obtain an already relocated version of this section. // Else use the unrelocated section from the object file. We'll have to // apply relocations ourselves later. - if (!L || !L->getLoadedSectionContents(*RelocatedSection,data)) + if (!L || !L->getLoadedSectionContents(*RelocatedSection, data)) Section.getContents(data); if (auto Err = maybeDecompress(Section, name, data)) { @@ -1047,7 +1115,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, // If the section we're relocating was relocated already by the JIT, // then we used the relocated version above, so we do not need to process // relocations for it now. - if (L && L->getLoadedSectionContents(*RelocatedSection,RelSecData)) + if (L && L->getLoadedSectionContents(*RelocatedSection, RelSecData)) continue; // In Mach-o files, the relocations do not need to be applied if @@ -1091,29 +1159,30 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj, if (Section.relocation_begin() == Section.relocation_end()) continue; - std::map AddrCache; + // Symbol to [address, section index] cache mapping. + std::map AddrCache; for (const RelocationRef &Reloc : Section.relocations()) { // FIXME: it's not clear how to correctly handle scattered // relocations. if (isRelocScattered(Obj, Reloc)) continue; - Expected SymAddrOrErr = - getSymbolAddress(Obj, Reloc, L, AddrCache); - if (!SymAddrOrErr) { - errs() << toString(SymAddrOrErr.takeError()) << '\n'; + Expected SymInfoOrErr = getSymbolInfo(Obj, Reloc, L, AddrCache); + if (!SymInfoOrErr) { + errs() << toString(SymInfoOrErr.takeError()) << '\n'; continue; } object::RelocVisitor V(Obj); - uint64_t Val = V.visit(Reloc.getType(), Reloc, *SymAddrOrErr); + uint64_t Val = V.visit(Reloc.getType(), Reloc, SymInfoOrErr->Address); if (V.error()) { SmallString<32> Name; Reloc.getTypeName(Name); errs() << "error: failed to compute relocation: " << Name << "\n"; continue; } - Map->insert({Reloc.getOffset(), {Val}}); + llvm::RelocAddrEntry Rel = {SymInfoOrErr->SectionIndex, Val}; + Map->insert({Reloc.getOffset(), Rel}); } } } diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp index 8da797750ab..6b5e1d3c931 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugRangeList.cpp @@ -35,8 +35,8 @@ bool DWARFDebugRangeList::extract(DataExtractor data, uint32_t *offset_ptr, while (true) { RangeListEntry entry; uint32_t prev_offset = *offset_ptr; - entry.StartAddress = - getRelocatedValue(data, AddressSize, offset_ptr, &Relocs); + entry.StartAddress = getRelocatedValue(data, AddressSize, offset_ptr, + &Relocs, &entry.SectionIndex); entry.EndAddress = getRelocatedValue(data, AddressSize, offset_ptr, &Relocs); @@ -69,8 +69,8 @@ DWARFDebugRangeList::getAbsoluteRanges(uint64_t BaseAddress) const { if (RLE.isBaseAddressSelectionEntry(AddressSize)) { BaseAddress = RLE.EndAddress; } else { - Res.push_back( - {BaseAddress + RLE.StartAddress, BaseAddress + RLE.EndAddress}); + Res.push_back({BaseAddress + RLE.StartAddress, + BaseAddress + RLE.EndAddress, RLE.SectionIndex}); } } return Res; diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp index e3bd759ba94..fd45c77d374 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp @@ -211,13 +211,16 @@ Optional DWARFDie::getHighPC(uint64_t LowPC) const { return None; } -bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC) const { - auto LowPcAddr = toAddress(find(DW_AT_low_pc)); +bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC, + uint64_t &SectionIndex) const { + auto F = find(DW_AT_low_pc); + auto LowPcAddr = toAddress(F); if (!LowPcAddr) return false; if (auto HighPcAddr = getHighPC(*LowPcAddr)) { LowPC = *LowPcAddr; HighPC = *HighPcAddr; + SectionIndex = F->getSectionIndex(); return true; } return false; @@ -228,9 +231,9 @@ DWARFDie::getAddressRanges() const { if (isNULL()) return DWARFAddressRangesVector(); // Single range specified by low/high PC. - uint64_t LowPC, HighPC; - if (getLowAndHighPC(LowPC, HighPC)) - return {{LowPC, HighPC}}; + uint64_t LowPC, HighPC, Index; + if (getLowAndHighPC(LowPC, HighPC, Index)) + return {{LowPC, HighPC, Index}}; // Multiple ranges from .debug_ranges section. auto RangesOffset = toSectionOffset(find(DW_AT_ranges)); diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp index 1cbd3ea2c86..0963d7bfd71 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -333,8 +333,8 @@ bool DWARFFormValue::extractValue(const DataExtractor &Data, return false; uint16_t AddrSize = (Form == DW_FORM_addr) ? U->getAddressByteSize() : U->getRefAddrByteSize(); - Value.uval = - getRelocatedValue(Data, AddrSize, OffsetPtr, U->getRelocMap()); + Value.uval = getRelocatedValue(Data, AddrSize, OffsetPtr, + U->getRelocMap(), &Value.SectionIndex); break; } case DW_FORM_exprloc: diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp index c268afc222c..c5add6a478b 100644 --- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp @@ -249,23 +249,6 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { return DieArray.size(); } -DWARFUnit::DWOHolder::DWOHolder(StringRef DWOPath, uint64_t DWOId) { - auto Obj = object::ObjectFile::createObjectFile(DWOPath); - if (!Obj) { - // TODO: Actually report errors helpfully. - consumeError(Obj.takeError()); - return; - } - DWOFile = std::move(Obj.get()); - DWOContext.reset( - cast(new DWARFContextInMemory(*DWOFile.getBinary()))); - for (const auto &DWOCU : DWOContext->dwo_compile_units()) - if (DWOCU->getDWOId() == DWOId) { - DWOU = DWOCU.get(); - return; - } -} - bool DWARFUnit::parseDWO() { if (isDWO) return false; @@ -287,16 +270,18 @@ bool DWARFUnit::parseDWO() { auto DWOId = getDWOId(); if (!DWOId) return false; - DWO = llvm::make_unique(AbsolutePath, *DWOId); - DWARFUnit *DWOCU = DWO->getUnit(); - if (!DWOCU) { - DWO.reset(); + auto DWOContext = Context.getDWOContext(AbsolutePath); + if (!DWOContext) return false; - } + + DWARFCompileUnit *DWOCU = DWOContext->getDWOCompileUnitForHash(*DWOId); + if (!DWOCU) + return false; + DWO = std::shared_ptr(std::move(DWOContext), DWOCU); // Share .debug_addr and .debug_ranges section with compile unit in .dwo - DWOCU->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase); + DWO->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase); auto DWORangesBase = UnitDie.getRangesBaseAttribute(); - DWOCU->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0); + DWO->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0); return true; } @@ -339,8 +324,8 @@ void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) { // Collect address ranges from DIEs in .dwo if necessary. bool DWOCreated = parseDWO(); - if (DWO.get()) - DWO->getUnit()->collectAddressRanges(CURanges); + if (DWO) + DWO->collectAddressRanges(CURanges); if (DWOCreated) DWO.reset(); @@ -400,7 +385,7 @@ DWARFUnit::getInlinedChainForAddress(uint64_t Address, // First, find the subroutine that contains the given address (the leaf // of inlined chain). DWARFDie SubroutineDIE = - (DWO ? DWO->getUnit() : this)->getSubroutineForAddress(Address); + (DWO ? DWO.get() : this)->getSubroutineForAddress(Address); while (SubroutineDIE) { if (SubroutineDIE.isSubroutineDIE()) diff --git a/contrib/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp b/contrib/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp index 57953cfa338..dfdeb841421 100644 --- a/contrib/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp +++ b/contrib/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp @@ -45,18 +45,17 @@ static Interval intersect(const Interval &I1, const Interval &I2) { std::min(I1.second, I2.second)); } -MappedBlockStream::MappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks, +MappedBlockStream::MappedBlockStream(uint32_t BlockSize, const MSFStreamLayout &Layout, BinaryStreamRef MsfData) - : BlockSize(BlockSize), NumBlocks(NumBlocks), StreamLayout(Layout), - MsfData(MsfData) {} + : BlockSize(BlockSize), StreamLayout(Layout), MsfData(MsfData) {} std::unique_ptr -MappedBlockStream::createStream(uint32_t BlockSize, uint32_t NumBlocks, +MappedBlockStream::createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, BinaryStreamRef MsfData) { return llvm::make_unique>( - BlockSize, NumBlocks, Layout, MsfData); + BlockSize, Layout, MsfData); } std::unique_ptr MappedBlockStream::createIndexedStream( @@ -66,7 +65,7 @@ std::unique_ptr MappedBlockStream::createIndexedStream( SL.Blocks = Layout.StreamMap[StreamIndex]; SL.Length = Layout.StreamSizes[StreamIndex]; return llvm::make_unique>( - Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + Layout.SB->BlockSize, SL, MsfData); } std::unique_ptr @@ -75,7 +74,7 @@ MappedBlockStream::createDirectoryStream(const MSFLayout &Layout, MSFStreamLayout SL; SL.Blocks = Layout.DirectoryBlocks; SL.Length = Layout.SB->NumDirectoryBytes; - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData); } std::unique_ptr @@ -83,7 +82,7 @@ MappedBlockStream::createFpmStream(const MSFLayout &Layout, BinaryStreamRef MsfData) { MSFStreamLayout SL; initializeFpmStreamLayout(Layout, SL); - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData); } Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size, @@ -173,7 +172,7 @@ Error MappedBlockStream::readLongestContiguousChunk(uint32_t Offset, uint32_t First = Offset / BlockSize; uint32_t Last = First; - while (Last < NumBlocks - 1) { + while (Last < getNumBlocks() - 1) { if (StreamLayout.Blocks[Last] != StreamLayout.Blocks[Last + 1] - 1) break; ++Last; @@ -313,17 +312,16 @@ void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset, } WritableMappedBlockStream::WritableMappedBlockStream( - uint32_t BlockSize, uint32_t NumBlocks, const MSFStreamLayout &Layout, + uint32_t BlockSize, const MSFStreamLayout &Layout, WritableBinaryStreamRef MsfData) - : ReadInterface(BlockSize, NumBlocks, Layout, MsfData), - WriteInterface(MsfData) {} + : ReadInterface(BlockSize, Layout, MsfData), WriteInterface(MsfData) {} std::unique_ptr -WritableMappedBlockStream::createStream(uint32_t BlockSize, uint32_t NumBlocks, +WritableMappedBlockStream::createStream(uint32_t BlockSize, const MSFStreamLayout &Layout, WritableBinaryStreamRef MsfData) { return llvm::make_unique>( - BlockSize, NumBlocks, Layout, MsfData); + BlockSize, Layout, MsfData); } std::unique_ptr @@ -334,7 +332,7 @@ WritableMappedBlockStream::createIndexedStream(const MSFLayout &Layout, MSFStreamLayout SL; SL.Blocks = Layout.StreamMap[StreamIndex]; SL.Length = Layout.StreamSizes[StreamIndex]; - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData); } std::unique_ptr @@ -343,7 +341,7 @@ WritableMappedBlockStream::createDirectoryStream( MSFStreamLayout SL; SL.Blocks = Layout.DirectoryBlocks; SL.Length = Layout.SB->NumDirectoryBytes; - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData); } std::unique_ptr @@ -351,7 +349,7 @@ WritableMappedBlockStream::createFpmStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData) { MSFStreamLayout SL; initializeFpmStreamLayout(Layout, SL); - return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData); + return createStream(Layout.SB->BlockSize, SL, MsfData); } Error WritableMappedBlockStream::readBytes(uint32_t Offset, uint32_t Size, diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp index c19a2f0d311..23c7456d777 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp @@ -129,16 +129,21 @@ uint32_t DbiStreamBuilder::calculateSectionMapStreamSize() const { return sizeof(SecMapHeader) + sizeof(SecMapEntry) * SectionMap.size(); } -uint32_t DbiStreamBuilder::calculateFileInfoSubstreamSize() const { - uint32_t Size = 0; - Size += sizeof(ulittle16_t); // NumModules - Size += sizeof(ulittle16_t); // NumSourceFiles - Size += ModiList.size() * sizeof(ulittle16_t); // ModIndices - Size += ModiList.size() * sizeof(ulittle16_t); // ModFileCounts +uint32_t DbiStreamBuilder::calculateNamesOffset() const { + uint32_t Offset = 0; + Offset += sizeof(ulittle16_t); // NumModules + Offset += sizeof(ulittle16_t); // NumSourceFiles + Offset += ModiList.size() * sizeof(ulittle16_t); // ModIndices + Offset += ModiList.size() * sizeof(ulittle16_t); // ModFileCounts uint32_t NumFileInfos = 0; for (const auto &M : ModiList) NumFileInfos += M->source_files().size(); - Size += NumFileInfos * sizeof(ulittle32_t); // FileNameOffsets + Offset += NumFileInfos * sizeof(ulittle32_t); // FileNameOffsets + return Offset; +} + +uint32_t DbiStreamBuilder::calculateFileInfoSubstreamSize() const { + uint32_t Size = calculateNamesOffset(); Size += calculateNamesBufferSize(); return alignTo(Size, sizeof(uint32_t)); } @@ -157,9 +162,8 @@ uint32_t DbiStreamBuilder::calculateDbgStreamsSize() const { Error DbiStreamBuilder::generateFileInfoSubstream() { uint32_t Size = calculateFileInfoSubstreamSize(); - uint32_t NameSize = calculateNamesBufferSize(); auto Data = Allocator.Allocate(Size); - uint32_t NamesOffset = Size - NameSize; + uint32_t NamesOffset = calculateNamesOffset(); FileInfoBuffer = MutableBinaryByteStream(MutableArrayRef(Data, Size), llvm::support::little); @@ -207,6 +211,9 @@ Error DbiStreamBuilder::generateFileInfoSubstream() { } } + if (auto EC = NameBufferWriter.padToAlignment(sizeof(uint32_t))) + return EC; + if (NameBufferWriter.bytesRemaining() > 0) return make_error(raw_error_code::invalid_format, "The names buffer contained unexpected data."); diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp index f00567db743..9fd90102f72 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/PDBTypeServerHandler.cpp @@ -47,7 +47,7 @@ void PDBTypeServerHandler::addSearchPath(StringRef Path) { if (Path.empty() || !sys::fs::is_directory(Path)) return; - SearchPaths.push_back(Path); + SearchPaths.insert(Path); } Expected @@ -57,7 +57,13 @@ PDBTypeServerHandler::handleInternal(PDBFile &File, if (!ExpectedTpi) return ExpectedTpi.takeError(); - if (auto EC = codeview::visitTypeStream(ExpectedTpi->typeArray(), Callbacks)) + // For handling a type server, we should be using whatever the callback array + // was + // that is being used for the original file. We shouldn't allow the visitor + // to + // arbitrarily stick a deserializer in there. + if (auto EC = codeview::visitTypeStream(ExpectedTpi->typeArray(), Callbacks, + VDS_BytesExternal)) return std::move(EC); return true; @@ -80,13 +86,14 @@ Expected PDBTypeServerHandler::handle(TypeServer2Record &TS, cv_error_code::corrupt_record, "TypeServer2Record does not contain filename!"); - for (auto Path : SearchPaths) { - sys::path::append(Path, File); - if (!sys::fs::exists(Path)) + for (auto &Path : SearchPaths) { + SmallString<64> PathStr = Path.getKey(); + sys::path::append(PathStr, File); + if (!sys::fs::exists(PathStr)) continue; std::unique_ptr ThisSession; - if (auto EC = loadDataForPDB(PDB_ReaderType::Native, Path, ThisSession)) { + if (auto EC = loadDataForPDB(PDB_ReaderType::Native, PathStr, ThisSession)) { // It is not an error if this PDB fails to load, it just means that it // doesn't match and we should continue searching. ignoreErrors(std::move(EC)); diff --git a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp index 8e006587389..623afb371b5 100644 --- a/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp +++ b/contrib/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp @@ -8,7 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/TpiStream.h" + #include "llvm/ADT/iterator_range.h" +#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h" #include "llvm/DebugInfo/CodeView/TypeRecord.h" #include "llvm/DebugInfo/MSF/MappedBlockStream.h" #include "llvm/DebugInfo/PDB/Native/PDBFile.h" @@ -104,6 +106,8 @@ Error TpiStream::reload() { HashStream = std::move(HS); } + Types = llvm::make_unique( + TypeRecords, getNumTypeRecords(), getTypeIndexOffsets()); return Error::success(); } diff --git a/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp b/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp index f454ae61d96..34f4017d982 100644 --- a/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp +++ b/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp @@ -2525,6 +2525,9 @@ static std::string base_name(std::string &s) { ++p0; break; } + if (!isalpha(*p0) && !isdigit(*p0) && *p0 != '_') { + return std::string(); + } } return std::string(p0, pe); } @@ -2612,39 +2615,45 @@ static const char *parse_unnamed_type_name(const char *first, const char *last, first = t0 + 1; } break; case 'l': { + size_t lambda_pos = db.names.size(); db.names.push_back(std::string("'lambda'(")); const char *t0 = first + 2; if (first[2] == 'v') { db.names.back().first += ')'; ++t0; } else { - const char *t1 = parse_type(t0, last, db); - if (t1 == t0) { + bool is_first_it = true; + while (true) { + long k0 = static_cast(db.names.size()); + const char *t1 = parse_type(t0, last, db); + long k1 = static_cast(db.names.size()); + if (t1 == t0) + break; + if (k0 >= k1) + return first; + // If the call to parse_type above found a pack expansion + // substitution, then multiple names could have been + // inserted into the name table. Walk through the names, + // appending each onto the lambda's parameter list. + std::for_each(db.names.begin() + k0, db.names.begin() + k1, + [&](typename C::sub_type::value_type &pair) { + if (pair.empty()) + return; + auto &lambda = db.names[lambda_pos].first; + if (!is_first_it) + lambda.append(", "); + is_first_it = false; + lambda.append(pair.move_full()); + }); + db.names.erase(db.names.begin() + k0, db.names.end()); + t0 = t1; + } + if (is_first_it) { if (!db.names.empty()) db.names.pop_back(); return first; } - if (db.names.size() < 2) - return first; - auto tmp = db.names.back().move_full(); - db.names.pop_back(); - db.names.back().first.append(tmp); - t0 = t1; - while (true) { - t1 = parse_type(t0, last, db); - if (t1 == t0) - break; - if (db.names.size() < 2) - return first; - tmp = db.names.back().move_full(); - db.names.pop_back(); - if (!tmp.empty()) { - db.names.back().first.append(", "); - db.names.back().first.append(tmp); - } - t0 = t1; - } - if (db.names.empty()) + if (db.names.empty() || db.names.size() - 1 != lambda_pos) return first; db.names.back().first.append(")"); } @@ -4030,6 +4039,8 @@ static const char *parse_encoding(const char *first, const char *last, C &db) { save_value sb(db.tag_templates); if (db.encoding_depth > 1) db.tag_templates = true; + save_value sp(db.parsed_ctor_dtor_cv); + db.parsed_ctor_dtor_cv = false; switch (*first) { case 'G': case 'T': @@ -4229,6 +4240,7 @@ template struct string_pair { template string_pair(const char (&s)[N]) : first(s, N - 1) {} size_t size() const { return first.size() + second.size(); } + bool empty() const { return first.empty() && second.empty(); } StrT full() const { return first + second; } StrT move_full() { return std::move(first) + std::move(second); } }; diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 660843765b3..9ce3974529b 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -737,23 +737,23 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, writeInt16BE(LocalAddress, applyPPCha(Delta)); } break; case ELF::R_PPC64_ADDR32: { - int32_t Result = static_cast(Value + Addend); - if (SignExtend32<32>(Result) != Result) + int64_t Result = static_cast(Value + Addend); + if (SignExtend64<32>(Result) != Result) llvm_unreachable("Relocation R_PPC64_ADDR32 overflow"); writeInt32BE(LocalAddress, Result); } break; case ELF::R_PPC64_REL24: { uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); - int32_t delta = static_cast(Value - FinalAddress + Addend); - if (SignExtend32<26>(delta) != delta) + int64_t delta = static_cast(Value - FinalAddress + Addend); + if (SignExtend64<26>(delta) != delta) llvm_unreachable("Relocation R_PPC64_REL24 overflow"); // Generates a 'bl
' instruction writeInt32BE(LocalAddress, 0x48000001 | (delta & 0x03FFFFFC)); } break; case ELF::R_PPC64_REL32: { uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset); - int32_t delta = static_cast(Value - FinalAddress + Addend); - if (SignExtend32<32>(delta) != delta) + int64_t delta = static_cast(Value - FinalAddress + Addend); + if (SignExtend64<32>(delta) != delta) llvm_unreachable("Relocation R_PPC64_REL32 overflow"); writeInt32BE(LocalAddress, delta); } break; @@ -1324,12 +1324,13 @@ RuntimeDyldELF::processRelocationRef( Obj.getPlatformFlags(AbiVariant); AbiVariant &= ELF::EF_PPC64_ABI; // A PPC branch relocation will need a stub function if the target is - // an external symbol (Symbol::ST_Unknown) or if the target address - // is not within the signed 24-bits branch address. + // an external symbol (either Value.SymbolName is set, or SymType is + // Symbol::ST_Unknown) or if the target address is not within the + // signed 24-bits branch address. SectionEntry &Section = Sections[SectionID]; uint8_t *Target = Section.getAddressWithOffset(Offset); bool RangeOverflow = false; - if (SymType != SymbolRef::ST_Unknown) { + if (!Value.SymbolName && SymType != SymbolRef::ST_Unknown) { if (AbiVariant != 2) { // In the ELFv1 ABI, a function call may point to the .opd entry, // so the final symbol value is calculated based on the relocation @@ -1344,21 +1345,19 @@ RuntimeDyldELF::processRelocationRef( } uint8_t *RelocTarget = Sections[Value.SectionID].getAddressWithOffset(Value.Addend); - int32_t delta = static_cast(Target - RelocTarget); + int64_t delta = static_cast(Target - RelocTarget); // If it is within 26-bits branch range, just set the branch target - if (SignExtend32<26>(delta) == delta) { + if (SignExtend64<26>(delta) == delta) { RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); - if (Value.SymbolName) - addRelocationForSymbol(RE, Value.SymbolName); - else - addRelocationForSection(RE, Value.SectionID); + addRelocationForSection(RE, Value.SectionID); } else { RangeOverflow = true; } } - if (SymType == SymbolRef::ST_Unknown || RangeOverflow) { - // It is an external symbol (SymbolRef::ST_Unknown) or within a range - // larger than 24-bits. + if (Value.SymbolName || SymType == SymbolRef::ST_Unknown || + RangeOverflow) { + // It is an external symbol (either Value.SymbolName is set, or + // SymType is SymbolRef::ST_Unknown) or out of range. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { // Symbol function stub already created, just relocate to it @@ -1412,7 +1411,7 @@ RuntimeDyldELF::processRelocationRef( RelType, 0); Section.advanceStubOffset(getMaxStubSize()); } - if (SymType == SymbolRef::ST_Unknown) { + if (Value.SymbolName || SymType == SymbolRef::ST_Unknown) { // Restore the TOC for external calls if (AbiVariant == 2) writeInt32BE(Target + 4, 0xE8410018); // ld r2,28(r1) diff --git a/contrib/llvm/lib/IR/AttributeImpl.h b/contrib/llvm/lib/IR/AttributeImpl.h index acfac316e91..4ed7b021883 100644 --- a/contrib/llvm/lib/IR/AttributeImpl.h +++ b/contrib/llvm/lib/IR/AttributeImpl.h @@ -212,27 +212,21 @@ using IndexAttrPair = std::pair; /// return type, and parameters. class AttributeListImpl final : public FoldingSetNode, - private TrailingObjects { + private TrailingObjects { friend class AttributeList; friend TrailingObjects; private: - LLVMContext &Context; - unsigned NumSlots; ///< Number of entries in this set. /// Bitset with a bit for each available attribute Attribute::AttrKind. uint64_t AvailableFunctionAttrs; + LLVMContext &Context; + unsigned NumAttrSets; ///< Number of entries in this set. // Helper fn for TrailingObjects class. - size_t numTrailingObjects(OverloadToken) { return NumSlots; } - - /// \brief Return a pointer to the IndexAttrPair for the specified slot. - const IndexAttrPair *getSlotPair(unsigned Slot) const { - return getTrailingObjects() + Slot; - } + size_t numTrailingObjects(OverloadToken) { return NumAttrSets; } public: - AttributeListImpl(LLVMContext &C, - ArrayRef> Slots); + AttributeListImpl(LLVMContext &C, ArrayRef Sets); // AttributesSetImpt is uniqued, these should not be available. AttributeListImpl(const AttributeListImpl &) = delete; @@ -243,41 +237,18 @@ public: /// \brief Get the context that created this AttributeListImpl. LLVMContext &getContext() { return Context; } - /// \brief Return the number of slots used in this attribute list. This is - /// the number of arguments that have an attribute set on them (including the - /// function itself). - unsigned getNumSlots() const { return NumSlots; } - - /// \brief Get the index of the given "slot" in the AttrNodes list. This index - /// is the index of the return, parameter, or function object that the - /// attributes are applied to, not the index into the AttrNodes list where the - /// attributes reside. - unsigned getSlotIndex(unsigned Slot) const { - return getSlotPair(Slot)->first; - } - - /// \brief Retrieve the attribute set node for the given "slot" in the - /// AttrNode list. - AttributeSet getSlotAttributes(unsigned Slot) const { - return getSlotPair(Slot)->second; - } - /// \brief Return true if the AttributeSet or the FunctionIndex has an /// enum attribute of the given kind. bool hasFnAttribute(Attribute::AttrKind Kind) const { return AvailableFunctionAttrs & ((uint64_t)1) << Kind; } - using iterator = AttributeSet::iterator; - - iterator begin(unsigned Slot) const { - return getSlotAttributes(Slot).begin(); - } - iterator end(unsigned Slot) const { return getSlotAttributes(Slot).end(); } + typedef const AttributeSet *iterator; + iterator begin() const { return getTrailingObjects(); } + iterator end() const { return begin() + NumAttrSets; } void Profile(FoldingSetNodeID &ID) const; - static void Profile(FoldingSetNodeID &ID, - ArrayRef> Nodes); + static void Profile(FoldingSetNodeID &ID, ArrayRef Nodes); void dump() const; }; diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp index adb31d127a2..19b7c302723 100644 --- a/contrib/llvm/lib/IR/Attributes.cpp +++ b/contrib/llvm/lib/IR/Attributes.cpp @@ -507,7 +507,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { } AttributeSet AttributeSet::addAttribute(LLVMContext &C, - Attribute::AttrKind Kind) const { + Attribute::AttrKind Kind) const { if (hasAttribute(Kind)) return *this; AttrBuilder B; B.addAttribute(Kind); @@ -515,7 +515,7 @@ AttributeSet AttributeSet::addAttribute(LLVMContext &C, } AttributeSet AttributeSet::addAttribute(LLVMContext &C, StringRef Kind, - StringRef Value) const { + StringRef Value) const { AttrBuilder B; B.addAttribute(Kind, Value); return addAttributes(C, AttributeSet::get(C, B)); @@ -788,48 +788,44 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const { // AttributeListImpl Definition //===----------------------------------------------------------------------===// -AttributeListImpl::AttributeListImpl( - LLVMContext &C, ArrayRef> Slots) - : Context(C), NumSlots(Slots.size()), AvailableFunctionAttrs(0) { -#ifndef NDEBUG - assert(!Slots.empty() && "pointless AttributeListImpl"); - if (Slots.size() >= 2) { - auto &PrevPair = Slots.front(); - for (auto &CurPair : Slots.drop_front()) { - assert(PrevPair.first <= CurPair.first && "Attribute set not ordered!"); - } - } -#endif +/// Map from AttributeList index to the internal array index. Adding one works: +/// FunctionIndex: ~0U -> 0 +/// ReturnIndex: 0 -> 1 +/// FirstArgIndex: 1.. -> 2.. +static constexpr unsigned attrIdxToArrayIdx(unsigned Index) { + // MSVC warns about '~0U + 1' wrapping around when this is called on + // FunctionIndex, so cast to int first. + return static_cast(Index) + 1; +} + +AttributeListImpl::AttributeListImpl(LLVMContext &C, + ArrayRef Sets) + : AvailableFunctionAttrs(0), Context(C), NumAttrSets(Sets.size()) { + assert(!Sets.empty() && "pointless AttributeListImpl"); // There's memory after the node where we can store the entries in. - std::copy(Slots.begin(), Slots.end(), getTrailingObjects()); + std::copy(Sets.begin(), Sets.end(), getTrailingObjects()); // Initialize AvailableFunctionAttrs summary bitset. static_assert(Attribute::EndAttrKinds <= sizeof(AvailableFunctionAttrs) * CHAR_BIT, "Too many attributes"); - static_assert(AttributeList::FunctionIndex == ~0u, - "FunctionIndex should be biggest possible index"); - const auto &Last = Slots.back(); - if (Last.first == AttributeList::FunctionIndex) { - AttributeSet Node = Last.second; - for (Attribute I : Node) { - if (!I.isStringAttribute()) - AvailableFunctionAttrs |= ((uint64_t)1) << I.getKindAsEnum(); - } + static_assert(attrIdxToArrayIdx(AttributeList::FunctionIndex) == 0U, + "function should be stored in slot 0"); + for (Attribute I : Sets[0]) { + if (!I.isStringAttribute()) + AvailableFunctionAttrs |= 1ULL << I.getKindAsEnum(); } } void AttributeListImpl::Profile(FoldingSetNodeID &ID) const { - Profile(ID, makeArrayRef(getSlotPair(0), getNumSlots())); + Profile(ID, makeArrayRef(begin(), end())); } -void AttributeListImpl::Profile( - FoldingSetNodeID &ID, ArrayRef> Nodes) { - for (const auto &Node : Nodes) { - ID.AddInteger(Node.first); - ID.AddPointer(Node.second.SetNode); - } +void AttributeListImpl::Profile(FoldingSetNodeID &ID, + ArrayRef Sets) { + for (const auto &Set : Sets) + ID.AddPointer(Set.SetNode); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -842,24 +838,13 @@ LLVM_DUMP_METHOD void AttributeListImpl::dump() const { // AttributeList Construction and Mutation Methods //===----------------------------------------------------------------------===// -AttributeList AttributeList::getImpl( - LLVMContext &C, ArrayRef> Attrs) { - assert(!Attrs.empty() && "creating pointless AttributeList"); -#ifndef NDEBUG - unsigned LastIndex = 0; - bool IsFirst = true; - for (auto &&AttrPair : Attrs) { - assert((IsFirst || LastIndex < AttrPair.first) && - "unsorted or duplicate AttributeList indices"); - assert(AttrPair.second.hasAttributes() && "pointless AttributeList slot"); - LastIndex = AttrPair.first; - IsFirst = false; - } -#endif +AttributeList AttributeList::getImpl(LLVMContext &C, + ArrayRef AttrSets) { + assert(!AttrSets.empty() && "pointless AttributeListImpl"); LLVMContextImpl *pImpl = C.pImpl; FoldingSetNodeID ID; - AttributeListImpl::Profile(ID, Attrs); + AttributeListImpl::Profile(ID, AttrSets); void *InsertPoint; AttributeListImpl *PA = @@ -870,8 +855,8 @@ AttributeList AttributeList::getImpl( if (!PA) { // Coallocate entries after the AttributeListImpl itself. void *Mem = ::operator new( - AttributeListImpl::totalSizeToAlloc(Attrs.size())); - PA = new (Mem) AttributeListImpl(C, Attrs); + AttributeListImpl::totalSizeToAlloc(AttrSets.size())); + PA = new (Mem) AttributeListImpl(C, AttrSets); pImpl->AttrsLists.InsertNode(PA, InsertPoint); } @@ -912,7 +897,7 @@ AttributeList::get(LLVMContext &C, AttrPairVec.emplace_back(Index, AttributeSet::get(C, AttrVec)); } - return getImpl(C, AttrPairVec); + return get(C, AttrPairVec); } AttributeList @@ -922,35 +907,76 @@ AttributeList::get(LLVMContext &C, if (Attrs.empty()) return AttributeList(); - return getImpl(C, Attrs); + assert(std::is_sorted(Attrs.begin(), Attrs.end(), + [](const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; + }) && + "Misordered Attributes list!"); + assert(none_of(Attrs, + [](const std::pair &Pair) { + return !Pair.second.hasAttributes(); + }) && + "Pointless attribute!"); + + unsigned MaxIndex = Attrs.back().first; + + SmallVector AttrVec(attrIdxToArrayIdx(MaxIndex) + 1); + for (auto Pair : Attrs) + AttrVec[attrIdxToArrayIdx(Pair.first)] = Pair.second; + + return getImpl(C, AttrVec); } AttributeList AttributeList::get(LLVMContext &C, AttributeSet FnAttrs, AttributeSet RetAttrs, ArrayRef ArgAttrs) { - SmallVector, 8> AttrPairs; - if (RetAttrs.hasAttributes()) - AttrPairs.emplace_back(ReturnIndex, RetAttrs); - size_t Index = 1; - for (AttributeSet AS : ArgAttrs) { - if (AS.hasAttributes()) - AttrPairs.emplace_back(Index, AS); - ++Index; + // Scan from the end to find the last argument with attributes. Most + // arguments don't have attributes, so it's nice if we can have fewer unique + // AttributeListImpls by dropping empty attribute sets at the end of the list. + unsigned NumSets = 0; + for (size_t I = ArgAttrs.size(); I != 0; --I) { + if (ArgAttrs[I - 1].hasAttributes()) { + NumSets = I + 2; + break; + } } - if (FnAttrs.hasAttributes()) - AttrPairs.emplace_back(FunctionIndex, FnAttrs); - if (AttrPairs.empty()) + if (NumSets == 0) { + // Check function and return attributes if we didn't have argument + // attributes. + if (RetAttrs.hasAttributes()) + NumSets = 2; + else if (FnAttrs.hasAttributes()) + NumSets = 1; + } + + // If all attribute sets were empty, we can use the empty attribute list. + if (NumSets == 0) return AttributeList(); - return getImpl(C, AttrPairs); + + SmallVector AttrSets; + AttrSets.reserve(NumSets); + // If we have any attributes, we always have function attributes. + AttrSets.push_back(FnAttrs); + if (NumSets > 1) + AttrSets.push_back(RetAttrs); + if (NumSets > 2) { + // Drop the empty argument attribute sets at the end. + ArgAttrs = ArgAttrs.take_front(NumSets - 2); + AttrSets.insert(AttrSets.end(), ArgAttrs.begin(), ArgAttrs.end()); + } + + return getImpl(C, AttrSets); } AttributeList AttributeList::get(LLVMContext &C, unsigned Index, const AttrBuilder &B) { if (!B.hasAttributes()) return AttributeList(); - AttributeSet AS = AttributeSet::get(C, B); - std::pair Arr[1] = {{Index, AS}}; - return getImpl(C, Arr); + Index = attrIdxToArrayIdx(Index); + SmallVector AttrSets(Index + 1); + AttrSets[Index] = AttributeSet::get(C, B); + return getImpl(C, AttrSets); } AttributeList AttributeList::get(LLVMContext &C, unsigned Index, @@ -973,32 +999,22 @@ AttributeList AttributeList::get(LLVMContext &C, ArrayRef Attrs) { if (Attrs.empty()) return AttributeList(); - if (Attrs.size() == 1) return Attrs[0]; + if (Attrs.size() == 1) + return Attrs[0]; - SmallVector, 8> AttrNodeVec; - AttributeListImpl *A0 = Attrs[0].pImpl; - if (A0) - AttrNodeVec.append(A0->getSlotPair(0), A0->getSlotPair(A0->getNumSlots())); - // Copy all attributes from Attrs into AttrNodeVec while keeping AttrNodeVec - // ordered by index. Because we know that each list in Attrs is ordered by - // index we only need to merge each successive list in rather than doing a - // full sort. - for (unsigned I = 1, E = Attrs.size(); I != E; ++I) { - AttributeListImpl *ALI = Attrs[I].pImpl; - if (!ALI) continue; - SmallVector, 8>::iterator - ANVI = AttrNodeVec.begin(), ANVE; - for (const IndexAttrPair *AI = ALI->getSlotPair(0), - *AE = ALI->getSlotPair(ALI->getNumSlots()); - AI != AE; ++AI) { - ANVE = AttrNodeVec.end(); - while (ANVI != ANVE && ANVI->first <= AI->first) - ++ANVI; - ANVI = AttrNodeVec.insert(ANVI, *AI) + 1; - } + unsigned MaxSize = 0; + for (AttributeList List : Attrs) + MaxSize = std::max(MaxSize, List.getNumAttrSets()); + + SmallVector NewAttrSets(MaxSize); + for (unsigned I = 0; I < MaxSize; ++I) { + AttrBuilder CurBuilder; + for (AttributeList List : Attrs) + CurBuilder.merge(List.getAttributes(I - 1)); + NewAttrSets[I] = AttributeSet::get(C, CurBuilder); } - return getImpl(C, AttrNodeVec); + return getImpl(C, NewAttrSets); } AttributeList AttributeList::addAttribute(LLVMContext &C, unsigned Index, @@ -1022,29 +1038,19 @@ AttributeList AttributeList::addAttribute(LLVMContext &C, Attribute A) const { assert(std::is_sorted(Indices.begin(), Indices.end())); - unsigned I = 0, E = pImpl ? pImpl->getNumSlots() : 0; - SmallVector AttrVec; - for (unsigned Index : Indices) { - // Add all attribute slots before the current index. - for (; I < E && getSlotIndex(I) < Index; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); + SmallVector AttrSets(this->begin(), this->end()); + unsigned MaxIndex = attrIdxToArrayIdx(Indices.back()); + if (MaxIndex >= AttrSets.size()) + AttrSets.resize(MaxIndex + 1); - // Add the attribute at this index. If we already have attributes at this - // index, merge them into a new set. - AttrBuilder B; - if (I < E && getSlotIndex(I) == Index) { - B.merge(AttrBuilder(pImpl->getSlotAttributes(I))); - ++I; - } + for (unsigned Index : Indices) { + Index = attrIdxToArrayIdx(Index); + AttrBuilder B(AttrSets[Index]); B.addAttribute(A); - AttrVec.emplace_back(Index, AttributeSet::get(C, B)); + AttrSets[Index] = AttributeSet::get(C, B); } - // Add remaining attributes. - for (; I < E; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); - - return get(C, AttrVec); + return getImpl(C, AttrSets); } AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, @@ -1064,33 +1070,16 @@ AttributeList AttributeList::addAttributes(LLVMContext &C, unsigned Index, "Attempt to change alignment!"); #endif - SmallVector AttrVec; - uint64_t NumAttrs = pImpl->getNumSlots(); - unsigned I; + Index = attrIdxToArrayIdx(Index); + SmallVector AttrSets(this->begin(), this->end()); + if (Index >= AttrSets.size()) + AttrSets.resize(Index + 1); - // Add all the attribute slots before the one we need to merge. - for (I = 0; I < NumAttrs; ++I) { - if (getSlotIndex(I) >= Index) - break; - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); - } + AttrBuilder Merged(AttrSets[Index]); + Merged.merge(B); + AttrSets[Index] = AttributeSet::get(C, Merged); - AttrBuilder NewAttrs; - if (I < NumAttrs && getSlotIndex(I) == Index) { - // We need to merge the attribute sets. - NewAttrs.merge(pImpl->getSlotAttributes(I)); - ++I; - } - NewAttrs.merge(B); - - // Add the new or merged attribute set at this index. - AttrVec.emplace_back(Index, AttributeSet::get(C, NewAttrs)); - - // Add the remaining entries. - for (; I < NumAttrs; ++I) - AttrVec.emplace_back(getSlotIndex(I), pImpl->getSlotAttributes(I)); - - return get(C, AttrVec); + return getImpl(C, AttrSets); } AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, @@ -1109,54 +1098,38 @@ AttributeList AttributeList::removeAttribute(LLVMContext &C, unsigned Index, return removeAttributes(C, Index, B); } -AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned Index, - const AttrBuilder &Attrs) const { +AttributeList +AttributeList::removeAttributes(LLVMContext &C, unsigned Index, + const AttrBuilder &AttrsToRemove) const { if (!pImpl) return AttributeList(); // FIXME it is not obvious how this should work for alignment. // For now, say we can't pass in alignment, which no current use does. - assert(!Attrs.hasAlignmentAttr() && "Attempt to change alignment!"); + assert(!AttrsToRemove.hasAlignmentAttr() && "Attempt to change alignment!"); - // Add the attribute slots before the one we're trying to add. - SmallVector AttrSets; - uint64_t NumAttrs = pImpl->getNumSlots(); - AttrBuilder B; - uint64_t LastIndex = 0; - for (unsigned I = 0, E = NumAttrs; I != E; ++I) { - if (getSlotIndex(I) >= Index) { - if (getSlotIndex(I) == Index) - B = AttrBuilder(getSlotAttributes(LastIndex++)); - break; - } - LastIndex = I + 1; - AttrSets.push_back({getSlotIndex(I), getSlotAttributes(I)}); - } + Index = attrIdxToArrayIdx(Index); + SmallVector AttrSets(this->begin(), this->end()); + if (Index >= AttrSets.size()) + AttrSets.resize(Index + 1); - // Remove the attributes from the existing set and add them. - B.remove(Attrs); - if (B.hasAttributes()) - AttrSets.push_back({Index, AttributeSet::get(C, B)}); + AttrBuilder B(AttrSets[Index]); + B.remove(AttrsToRemove); + AttrSets[Index] = AttributeSet::get(C, B); - // Add the remaining attribute slots. - for (unsigned I = LastIndex, E = NumAttrs; I < E; ++I) - AttrSets.push_back({getSlotIndex(I), getSlotAttributes(I)}); - - return get(C, AttrSets); + return getImpl(C, AttrSets); } AttributeList AttributeList::removeAttributes(LLVMContext &C, unsigned WithoutIndex) const { if (!pImpl) return AttributeList(); - - SmallVector, 4> AttrSet; - for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) { - unsigned Index = getSlotIndex(I); - if (Index != WithoutIndex) - AttrSet.push_back({Index, pImpl->getSlotAttributes(I)}); - } - return get(C, AttrSet); + WithoutIndex = attrIdxToArrayIdx(WithoutIndex); + if (WithoutIndex >= getNumAttrSets()) + return *this; + SmallVector AttrSets(this->begin(), this->end()); + AttrSets[WithoutIndex] = AttributeSet(); + return getImpl(C, AttrSets); } AttributeList AttributeList::addDereferenceableAttr(LLVMContext &C, @@ -1225,20 +1198,20 @@ bool AttributeList::hasFnAttribute(StringRef Kind) const { bool AttributeList::hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const { - return hasAttribute(ArgNo + 1, Kind); + return hasAttribute(ArgNo + FirstArgIndex, Kind); } bool AttributeList::hasAttrSomewhere(Attribute::AttrKind Attr, unsigned *Index) const { if (!pImpl) return false; - for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) - for (AttributeListImpl::iterator II = pImpl->begin(I), IE = pImpl->end(I); - II != IE; ++II) - if (II->hasAttribute(Attr)) { - if (Index) *Index = pImpl->getSlotIndex(I); - return true; - } + for (unsigned I = index_begin(), E = index_end(); I != E; ++I) { + if (hasAttribute(I, Attr)) { + if (Index) + *Index = I; + return true; + } + } return false; } @@ -1282,60 +1255,35 @@ std::string AttributeList::getAsString(unsigned Index, bool InAttrGrp) const { } AttributeSet AttributeList::getAttributes(unsigned Index) const { - if (!pImpl) return AttributeSet(); - - // Loop through to find the attribute node we want. - for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) - if (pImpl->getSlotIndex(I) == Index) - return pImpl->getSlotAttributes(I); - - return AttributeSet(); + Index = attrIdxToArrayIdx(Index); + if (!pImpl || Index >= getNumAttrSets()) + return AttributeSet(); + return pImpl->begin()[Index]; } -AttributeList::iterator AttributeList::begin(unsigned Slot) const { - if (!pImpl) - return ArrayRef().begin(); - return pImpl->begin(Slot); +AttributeList::iterator AttributeList::begin() const { + return pImpl ? pImpl->begin() : nullptr; } -AttributeList::iterator AttributeList::end(unsigned Slot) const { - if (!pImpl) - return ArrayRef().end(); - return pImpl->end(Slot); +AttributeList::iterator AttributeList::end() const { + return pImpl ? pImpl->end() : nullptr; } //===----------------------------------------------------------------------===// // AttributeList Introspection Methods //===----------------------------------------------------------------------===// -unsigned AttributeList::getNumSlots() const { - return pImpl ? pImpl->getNumSlots() : 0; -} - -unsigned AttributeList::getSlotIndex(unsigned Slot) const { - assert(pImpl && Slot < pImpl->getNumSlots() && - "Slot # out of range!"); - return pImpl->getSlotIndex(Slot); -} - -AttributeSet AttributeList::getSlotAttributes(unsigned Slot) const { - assert(pImpl && Slot < pImpl->getNumSlots() && - "Slot # out of range!"); - return pImpl->getSlotAttributes(Slot); +unsigned AttributeList::getNumAttrSets() const { + return pImpl ? pImpl->NumAttrSets : 0; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void AttributeList::dump() const { dbgs() << "PAL[\n"; - for (unsigned i = 0, e = getNumSlots(); i < e; ++i) { - uint64_t Index = getSlotIndex(i); - dbgs() << " { "; - if (Index == ~0U) - dbgs() << "~0U"; - else - dbgs() << Index; - dbgs() << " => " << getAsString(Index) << " }\n"; + for (unsigned i = index_begin(), e = index_end(); i != e; ++i) { + if (getAttributes(i).hasAttributes()) + dbgs() << " { " << i << " => " << getAsString(i) << " }\n"; } dbgs() << "]\n"; @@ -1346,26 +1294,16 @@ LLVM_DUMP_METHOD void AttributeList::dump() const { // AttrBuilder Method Implementations //===----------------------------------------------------------------------===// +// FIXME: Remove this ctor, use AttributeSet. AttrBuilder::AttrBuilder(AttributeList AL, unsigned Index) { - AttributeListImpl *pImpl = AL.pImpl; - if (!pImpl) return; - - for (unsigned I = 0, E = pImpl->getNumSlots(); I != E; ++I) { - if (pImpl->getSlotIndex(I) != Index) continue; - - for (AttributeListImpl::iterator II = pImpl->begin(I), IE = pImpl->end(I); - II != IE; ++II) - addAttribute(*II); - - break; - } + AttributeSet AS = AL.getAttributes(Index); + for (const Attribute &A : AS) + addAttribute(A); } AttrBuilder::AttrBuilder(AttributeSet AS) { - if (AS.hasAttributes()) { - for (const Attribute &A : AS) - addAttribute(A); - } + for (const Attribute &A : AS) + addAttribute(A); } void AttrBuilder::clear() { diff --git a/contrib/llvm/lib/IR/BasicBlock.cpp b/contrib/llvm/lib/IR/BasicBlock.cpp index 90ca21ab91f..1f8659d4e2c 100644 --- a/contrib/llvm/lib/IR/BasicBlock.cpp +++ b/contrib/llvm/lib/IR/BasicBlock.cpp @@ -263,6 +263,10 @@ const BasicBlock *BasicBlock::getUniqueSuccessor() const { return SuccBB; } +iterator_range BasicBlock::phis() { + return make_range(dyn_cast(&front()), nullptr); +} + /// This method is used to notify a BasicBlock that the /// specified Predecessor of the block is no longer able to reach it. This is /// actually not used to update the Predecessor list, but is actually used to @@ -389,13 +393,11 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) { // Loop over any phi nodes in the basic block, updating the BB field of // incoming values... BasicBlock *Successor = *I; - PHINode *PN; - for (BasicBlock::iterator II = Successor->begin(); - (PN = dyn_cast(II)); ++II) { - int IDX = PN->getBasicBlockIndex(this); - while (IDX != -1) { - PN->setIncomingBlock((unsigned)IDX, New); - IDX = PN->getBasicBlockIndex(this); + for (auto &PN : Successor->phis()) { + int Idx = PN.getBasicBlockIndex(this); + while (Idx != -1) { + PN.setIncomingBlock((unsigned)Idx, New); + Idx = PN.getBasicBlockIndex(this); } } } diff --git a/contrib/llvm/lib/IR/DebugLoc.cpp b/contrib/llvm/lib/IR/DebugLoc.cpp index 3168ec6944a..b7e3f0c6779 100644 --- a/contrib/llvm/lib/IR/DebugLoc.cpp +++ b/contrib/llvm/lib/IR/DebugLoc.cpp @@ -163,7 +163,7 @@ void DebugLoc::reparentDebugInfo(Instruction &I, DISubprogram *OrigSP, // Fix up debug variables to point to NewSP. auto reparentVar = [&](DILocalVariable *Var) { - return DILocalVariable::getDistinct( + return DILocalVariable::get( Ctx, cast( reparentScope(Ctx, Var->getScope(), OrigSP, NewSP, Cache)), diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp index 01d4ed6c8ee..d7baa9ebc22 100644 --- a/contrib/llvm/lib/IR/Instructions.cpp +++ b/contrib/llvm/lib/IR/Instructions.cpp @@ -454,6 +454,9 @@ bool CallInst::dataOperandHasImpliedAttr(unsigned i, // question is a call argument; or be indirectly implied by the kind of its // containing operand bundle, if the operand is a bundle operand. + if (i == AttributeList::ReturnIndex) + return hasRetAttr(Kind); + // FIXME: Avoid these i - 1 calculations and update the API to use zero-based // indices. if (i < (getNumArgOperands() + 1)) @@ -779,6 +782,9 @@ bool InvokeInst::dataOperandHasImpliedAttr(unsigned i, // question is an invoke argument; or be indirectly implied by the kind of its // containing operand bundle, if the operand is a bundle operand. + if (i == AttributeList::ReturnIndex) + return hasRetAttr(Kind); + // FIXME: Avoid these i - 1 calculations and update the API to use zero-based // indices. if (i < (getNumArgOperands() + 1)) diff --git a/contrib/llvm/lib/IR/IntrinsicInst.cpp b/contrib/llvm/lib/IR/IntrinsicInst.cpp index c9814a96bea..94e115a6a78 100644 --- a/contrib/llvm/lib/IR/IntrinsicInst.cpp +++ b/contrib/llvm/lib/IR/IntrinsicInst.cpp @@ -97,7 +97,9 @@ Value *InstrProfIncrementInst::getStep() const { ConstrainedFPIntrinsic::RoundingMode ConstrainedFPIntrinsic::getRoundingMode() const { - Metadata *MD = dyn_cast(getOperand(2))->getMetadata(); + unsigned NumOperands = getNumArgOperands(); + Metadata *MD = + dyn_cast(getArgOperand(NumOperands - 2))->getMetadata(); if (!MD || !isa(MD)) return rmInvalid; StringRef RoundingArg = cast(MD)->getString(); @@ -115,7 +117,9 @@ ConstrainedFPIntrinsic::getRoundingMode() const { ConstrainedFPIntrinsic::ExceptionBehavior ConstrainedFPIntrinsic::getExceptionBehavior() const { - Metadata *MD = dyn_cast(getOperand(3))->getMetadata(); + unsigned NumOperands = getNumArgOperands(); + Metadata *MD = + dyn_cast(getArgOperand(NumOperands - 1))->getMetadata(); if (!MD || !isa(MD)) return ebInvalid; StringRef ExceptionArg = cast(MD)->getString(); @@ -125,3 +129,21 @@ ConstrainedFPIntrinsic::getExceptionBehavior() const { .Case("fpexcept.strict", ebStrict) .Default(ebInvalid); } + +bool ConstrainedFPIntrinsic::isUnaryOp() const { + switch (getIntrinsicID()) { + default: + return false; + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: + return true; + } +} diff --git a/contrib/llvm/lib/IR/Module.cpp b/contrib/llvm/lib/IR/Module.cpp index 12c258d95f5..95673e515a5 100644 --- a/contrib/llvm/lib/IR/Module.cpp +++ b/contrib/llvm/lib/IR/Module.cpp @@ -481,7 +481,7 @@ PICLevel::Level Module::getPICLevel() const { } void Module::setPICLevel(PICLevel::Level PL) { - addModuleFlag(ModFlagBehavior::Error, "PIC Level", PL); + addModuleFlag(ModFlagBehavior::Max, "PIC Level", PL); } PIELevel::Level Module::getPIELevel() const { @@ -495,7 +495,7 @@ PIELevel::Level Module::getPIELevel() const { } void Module::setPIELevel(PIELevel::Level PL) { - addModuleFlag(ModFlagBehavior::Error, "PIE Level", PL); + addModuleFlag(ModFlagBehavior::Max, "PIE Level", PL); } void Module::setProfileSummary(Metadata *M) { diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp index 21e8048442b..a8523236ac9 100644 --- a/contrib/llvm/lib/IR/Verifier.cpp +++ b/contrib/llvm/lib/IR/Verifier.cpp @@ -1282,6 +1282,13 @@ Verifier::visitModuleFlag(const MDNode *Op, // These behavior types accept any value. break; + case Module::Max: { + Assert(mdconst::dyn_extract_or_null(Op->getOperand(2)), + "invalid value for 'max' module flag (expected constant integer)", + Op->getOperand(2)); + break; + } + case Module::Require: { // The value should itself be an MDNode with two operands, a flag ID (an // MDString), and a value. @@ -1729,17 +1736,9 @@ void Verifier::visitConstantExpr(const ConstantExpr *CE) { } bool Verifier::verifyAttributeCount(AttributeList Attrs, unsigned Params) { - if (Attrs.getNumSlots() == 0) - return true; - - unsigned LastSlot = Attrs.getNumSlots() - 1; - unsigned LastIndex = Attrs.getSlotIndex(LastSlot); - if (LastIndex <= Params || - (LastIndex == AttributeList::FunctionIndex && - (LastSlot == 0 || Attrs.getSlotIndex(LastSlot - 1) <= Params))) - return true; - - return false; + // There shouldn't be more attribute sets than there are parameters plus the + // function and return value. + return Attrs.getNumAttrSets() <= Params + 2; } /// Verify that statepoint intrinsic is well formed. @@ -3967,6 +3966,18 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_sqrt: + case Intrinsic::experimental_constrained_pow: + case Intrinsic::experimental_constrained_powi: + case Intrinsic::experimental_constrained_sin: + case Intrinsic::experimental_constrained_cos: + case Intrinsic::experimental_constrained_exp: + case Intrinsic::experimental_constrained_exp2: + case Intrinsic::experimental_constrained_log: + case Intrinsic::experimental_constrained_log10: + case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_rint: + case Intrinsic::experimental_constrained_nearbyint: visitConstrainedFPIntrinsic( cast(*CS.getInstruction())); break; @@ -4336,7 +4347,12 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) { } void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) { - Assert(isa(FPI.getOperand(2)), + unsigned NumOperands = FPI.getNumArgOperands(); + Assert(((NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)), + "invalid arguments for constrained FP intrinsic", &FPI); + Assert(isa(FPI.getArgOperand(NumOperands-1)), + "invalid exception behavior argument", &FPI); + Assert(isa(FPI.getArgOperand(NumOperands-2)), "invalid rounding mode argument", &FPI); Assert(FPI.getRoundingMode() != ConstrainedFPIntrinsic::rmInvalid, "invalid rounding mode argument", &FPI); diff --git a/contrib/llvm/lib/LTO/LTO.cpp b/contrib/llvm/lib/LTO/LTO.cpp index c73b6b6b15c..9efc095f9fc 100644 --- a/contrib/llvm/lib/LTO/LTO.cpp +++ b/contrib/llvm/lib/LTO/LTO.cpp @@ -114,7 +114,10 @@ static void computeCacheKey( AddUnsigned((unsigned)Conf.Options.DebuggerTuning); for (auto &A : Conf.MAttrs) AddString(A); - AddUnsigned(Conf.RelocModel); + if (Conf.RelocModel) + AddUnsigned(*Conf.RelocModel); + else + AddUnsigned(-1); AddUnsigned(Conf.CodeModel); AddUnsigned(Conf.CGOptLevel); AddUnsigned(Conf.CGFileType); @@ -539,16 +542,10 @@ Error LTO::addRegularLTO(BitcodeModule BM, if (Sym.isUndefined()) continue; Keep.push_back(GV); - switch (GV->getLinkage()) { - default: - break; - case GlobalValue::LinkOnceAnyLinkage: - GV->setLinkage(GlobalValue::WeakAnyLinkage); - break; - case GlobalValue::LinkOnceODRLinkage: - GV->setLinkage(GlobalValue::WeakODRLinkage); - break; - } + GlobalValue::LinkageTypes OriginalLinkage = GV->getLinkage(); + if (GlobalValue::isLinkOnceLinkage(OriginalLinkage)) + GV->setLinkage(GlobalValue::getWeakLinkage( + GlobalValue::isLinkOnceODRLinkage(OriginalLinkage))); } else if (isa(GV) && (GV->hasLinkOnceODRLinkage() || GV->hasWeakODRLinkage() || GV->hasAvailableExternallyLinkage()) && @@ -999,10 +996,6 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, ExportedGUIDs.insert(GUID); } - auto isPrevailing = [&](GlobalValue::GUID GUID, - const GlobalValueSummary *S) { - return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); - }; auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) { const auto &ExportList = ExportLists.find(ModuleIdentifier); return (ExportList != ExportLists.end() && @@ -1010,17 +1003,20 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache, ExportedGUIDs.count(GUID); }; thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported); - - auto recordNewLinkage = [&](StringRef ModuleIdentifier, - GlobalValue::GUID GUID, - GlobalValue::LinkageTypes NewLinkage) { - ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; - }; - - thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing, - recordNewLinkage); } + auto isPrevailing = [&](GlobalValue::GUID GUID, + const GlobalValueSummary *S) { + return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); + }; + auto recordNewLinkage = [&](StringRef ModuleIdentifier, + GlobalValue::GUID GUID, + GlobalValue::LinkageTypes NewLinkage) { + ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; + }; + thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing, + recordNewLinkage); + std::unique_ptr BackendProc = ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, AddStream, Cache); diff --git a/contrib/llvm/lib/LTO/LTOBackend.cpp b/contrib/llvm/lib/LTO/LTOBackend.cpp index 30447c528af..668667a5356 100644 --- a/contrib/llvm/lib/LTO/LTOBackend.cpp +++ b/contrib/llvm/lib/LTO/LTOBackend.cpp @@ -117,15 +117,22 @@ Error Config::addSaveTemps(std::string OutputFileName, namespace { std::unique_ptr -createTargetMachine(Config &Conf, StringRef TheTriple, - const Target *TheTarget) { +createTargetMachine(Config &Conf, const Target *TheTarget, Module &M) { + StringRef TheTriple = M.getTargetTriple(); SubtargetFeatures Features; Features.getDefaultSubtargetFeatures(Triple(TheTriple)); for (const std::string &A : Conf.MAttrs) Features.AddFeature(A); + Reloc::Model RelocModel; + if (Conf.RelocModel) + RelocModel = *Conf.RelocModel; + else + RelocModel = + M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_; + return std::unique_ptr(TheTarget->createTargetMachine( - TheTriple, Conf.CPU, Features.getString(), Conf.Options, Conf.RelocModel, + TheTriple, Conf.CPU, Features.getString(), Conf.Options, RelocModel, Conf.CodeModel, Conf.CGOptLevel)); } @@ -311,7 +318,7 @@ void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream, std::unique_ptr MPartInCtx = std::move(MOrErr.get()); std::unique_ptr TM = - createTargetMachine(C, MPartInCtx->getTargetTriple(), T); + createTargetMachine(C, T, *MPartInCtx); codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx); }, @@ -360,8 +367,7 @@ Error lto::backend(Config &C, AddStreamFn AddStream, if (!TOrErr) return TOrErr.takeError(); - std::unique_ptr TM = - createTargetMachine(C, Mod->getTargetTriple(), *TOrErr); + std::unique_ptr TM = createTargetMachine(C, *TOrErr, *Mod); // Setup optimization remarks. auto DiagFileOrErr = lto::setupOptimizationRemarks( @@ -397,8 +403,7 @@ Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream, if (!TOrErr) return TOrErr.takeError(); - std::unique_ptr TM = - createTargetMachine(Conf, Mod.getTargetTriple(), *TOrErr); + std::unique_ptr TM = createTargetMachine(Conf, *TOrErr, Mod); if (Conf.CodeGenOnly) { codegen(Conf, TM.get(), AddStream, Task, Mod); diff --git a/contrib/llvm/lib/LibDriver/LibDriver.cpp b/contrib/llvm/lib/LibDriver/LibDriver.cpp deleted file mode 100644 index c50629d7150..00000000000 --- a/contrib/llvm/lib/LibDriver/LibDriver.cpp +++ /dev/null @@ -1,171 +0,0 @@ -//===- LibDriver.cpp - lib.exe-compatible driver --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Defines an interface to a lib.exe-compatible driver that also understands -// bitcode files. Used by llvm-lib and lld-link /lib. -// -//===----------------------------------------------------------------------===// - -#include "llvm/LibDriver/LibDriver.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Object/ArchiveWriter.h" -#include "llvm/Option/Arg.h" -#include "llvm/Option/ArgList.h" -#include "llvm/Option/Option.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/StringSaver.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -namespace { - -enum { - OPT_INVALID = 0, -#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11) OPT_##ID, -#include "Options.inc" -#undef OPTION -}; - -#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; -#include "Options.inc" -#undef PREFIX - -static const llvm::opt::OptTable::Info infoTable[] = { -#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X6, X7, X8, X9, X10) \ - { \ - X1, X2, X9, X10, OPT_##ID, llvm::opt::Option::KIND##Class, X8, X7, \ - OPT_##GROUP, OPT_##ALIAS, X6 \ - }, -#include "Options.inc" -#undef OPTION -}; - -class LibOptTable : public llvm::opt::OptTable { -public: - LibOptTable() : OptTable(infoTable, true) {} -}; - -} - -static std::string getOutputPath(llvm::opt::InputArgList *Args, - const llvm::NewArchiveMember &FirstMember) { - if (auto *Arg = Args->getLastArg(OPT_out)) - return Arg->getValue(); - SmallString<128> Val = StringRef(FirstMember.Buf->getBufferIdentifier()); - llvm::sys::path::replace_extension(Val, ".lib"); - return Val.str(); -} - -static std::vector getSearchPaths(llvm::opt::InputArgList *Args, - StringSaver &Saver) { - std::vector Ret; - // Add current directory as first item of the search path. - Ret.push_back(""); - - // Add /libpath flags. - for (auto *Arg : Args->filtered(OPT_libpath)) - Ret.push_back(Arg->getValue()); - - // Add $LIB. - Optional EnvOpt = sys::Process::GetEnv("LIB"); - if (!EnvOpt.hasValue()) - return Ret; - StringRef Env = Saver.save(*EnvOpt); - while (!Env.empty()) { - StringRef Path; - std::tie(Path, Env) = Env.split(';'); - Ret.push_back(Path); - } - return Ret; -} - -static Optional findInputFile(StringRef File, - ArrayRef Paths) { - for (auto Dir : Paths) { - SmallString<128> Path = Dir; - sys::path::append(Path, File); - if (sys::fs::exists(Path)) - return Path.str().str(); - } - return Optional(); -} - -int llvm::libDriverMain(llvm::ArrayRef ArgsArr) { - SmallVector NewArgs(ArgsArr.begin(), ArgsArr.end()); - BumpPtrAllocator Alloc; - StringSaver Saver(Alloc); - cl::ExpandResponseFiles(Saver, cl::TokenizeWindowsCommandLine, NewArgs); - ArgsArr = NewArgs; - - LibOptTable Table; - unsigned MissingIndex; - unsigned MissingCount; - llvm::opt::InputArgList Args = - Table.ParseArgs(ArgsArr.slice(1), MissingIndex, MissingCount); - if (MissingCount) { - llvm::errs() << "missing arg value for \"" - << Args.getArgString(MissingIndex) << "\", expected " - << MissingCount - << (MissingCount == 1 ? " argument.\n" : " arguments.\n"); - return 1; - } - for (auto *Arg : Args.filtered(OPT_UNKNOWN)) - llvm::errs() << "ignoring unknown argument: " << Arg->getSpelling() << "\n"; - - if (!Args.hasArgNoClaim(OPT_INPUT)) { - // No input files. To match lib.exe, silently do nothing. - return 0; - } - - std::vector SearchPaths = getSearchPaths(&Args, Saver); - - std::vector Members; - for (auto *Arg : Args.filtered(OPT_INPUT)) { - Optional Path = findInputFile(Arg->getValue(), SearchPaths); - if (!Path.hasValue()) { - llvm::errs() << Arg->getValue() << ": no such file or directory\n"; - return 1; - } - Expected MOrErr = - NewArchiveMember::getFile(Saver.save(*Path), /*Deterministic=*/true); - if (!MOrErr) { - handleAllErrors(MOrErr.takeError(), [&](const llvm::ErrorInfoBase &EIB) { - llvm::errs() << Arg->getValue() << ": " << EIB.message() << "\n"; - }); - return 1; - } - sys::fs::file_magic Magic = - sys::fs::identify_magic(MOrErr->Buf->getBuffer()); - if (Magic != sys::fs::file_magic::coff_object && - Magic != sys::fs::file_magic::bitcode && - Magic != sys::fs::file_magic::windows_resource) { - llvm::errs() << Arg->getValue() - << ": not a COFF object, bitcode or resource file\n"; - return 1; - } - Members.emplace_back(std::move(*MOrErr)); - } - - std::pair Result = - llvm::writeArchive(getOutputPath(&Args, Members[0]), Members, - /*WriteSymtab=*/true, object::Archive::K_GNU, - /*Deterministic*/ true, Args.hasArg(OPT_llvmlibthin)); - - if (Result.second) { - if (Result.first.empty()) - Result.first = ArgsArr[0]; - llvm::errs() << Result.first << ": " << Result.second.message() << "\n"; - return 1; - } - - return 0; -} diff --git a/contrib/llvm/lib/LibDriver/Options.td b/contrib/llvm/lib/LibDriver/Options.td deleted file mode 100644 index 5a56ef7468d..00000000000 --- a/contrib/llvm/lib/LibDriver/Options.td +++ /dev/null @@ -1,25 +0,0 @@ -include "llvm/Option/OptParser.td" - -// lib.exe accepts options starting with either a dash or a slash. - -// Flag that takes no arguments. -class F : Flag<["/", "-", "-?"], name>; - -// Flag that takes one argument after ":". -class P : - Joined<["/", "-", "-?"], name#":">, HelpText; - -def libpath: P<"libpath", "Object file search path">; -def out : P<"out", "Path to file to write output">; - -def llvmlibthin : F<"llvmlibthin">; - -//============================================================================== -// The flags below do nothing. They are defined only for lib.exe compatibility. -//============================================================================== - -class QF : Joined<["/", "-", "-?"], name#":">; - -def ignore : QF<"ignore">; -def machine: QF<"machine">; -def nologo : F<"nologo">; diff --git a/contrib/llvm/lib/Linker/IRMover.cpp b/contrib/llvm/lib/Linker/IRMover.cpp index c0af21aa148..defad190498 100644 --- a/contrib/llvm/lib/Linker/IRMover.cpp +++ b/contrib/llvm/lib/Linker/IRMover.cpp @@ -1157,6 +1157,11 @@ Error IRLinker::linkModuleFlagsMetadata() { mdconst::extract(DstOp->getOperand(0)); unsigned DstBehaviorValue = DstBehavior->getZExtValue(); + auto overrideDstValue = [&]() { + DstModFlags->setOperand(DstIndex, SrcOp); + Flags[ID].first = SrcOp; + }; + // If either flag has override behavior, handle it first. if (DstBehaviorValue == Module::Override) { // Diagnose inconsistent flags which both have override behavior. @@ -1167,8 +1172,7 @@ Error IRLinker::linkModuleFlagsMetadata() { continue; } else if (SrcBehaviorValue == Module::Override) { // Update the destination flag to that of the source. - DstModFlags->setOperand(DstIndex, SrcOp); - Flags[ID].first = SrcOp; + overrideDstValue(); continue; } @@ -1204,6 +1208,15 @@ Error IRLinker::linkModuleFlagsMetadata() { } continue; } + case Module::Max: { + ConstantInt *DstValue = + mdconst::extract(DstOp->getOperand(2)); + ConstantInt *SrcValue = + mdconst::extract(SrcOp->getOperand(2)); + if (SrcValue->getZExtValue() > DstValue->getZExtValue()) + overrideDstValue(); + break; + } case Module::Append: { MDNode *DstValue = cast(DstOp->getOperand(2)); MDNode *SrcValue = cast(SrcOp->getOperand(2)); diff --git a/contrib/llvm/lib/MC/WasmObjectWriter.cpp b/contrib/llvm/lib/MC/WasmObjectWriter.cpp index 0540c4c47a3..8c3df36cfb4 100644 --- a/contrib/llvm/lib/MC/WasmObjectWriter.cpp +++ b/contrib/llvm/lib/MC/WasmObjectWriter.cpp @@ -422,6 +422,7 @@ static void ApplyRelocations( RelEntry.Offset; switch (RelEntry.Type) { case wasm::R_WEBASSEMBLY_FUNCTION_INDEX_LEB: { + assert(SymbolIndices.count(RelEntry.Symbol)); uint32_t Index = SymbolIndices[RelEntry.Symbol]; assert(RelEntry.Addend == 0); @@ -429,6 +430,7 @@ static void ApplyRelocations( break; } case wasm::R_WEBASSEMBLY_TABLE_INDEX_SLEB: { + assert(SymbolIndices.count(RelEntry.Symbol)); uint32_t Index = SymbolIndices[RelEntry.Symbol]; assert(RelEntry.Addend == 0); @@ -448,6 +450,7 @@ static void ApplyRelocations( break; } case wasm::R_WEBASSEMBLY_TABLE_INDEX_I32: { + assert(SymbolIndices.count(RelEntry.Symbol)); uint32_t Index = SymbolIndices[RelEntry.Symbol]; assert(RelEntry.Addend == 0); @@ -478,6 +481,7 @@ WriteRelocations(ArrayRef Relocations, uint64_t Offset = RelEntry.Offset + RelEntry.FixupSection->getSectionOffset() + HeaderSize; + assert(SymbolIndices.count(RelEntry.Symbol)); uint32_t Index = SymbolIndices[RelEntry.Symbol]; int64_t Addend = RelEntry.Addend; @@ -726,10 +730,6 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, if (IsAddressTaken.count(&WS)) TableElems.push_back(Index); } else { - // For now, ignore temporary non-function symbols. - if (S.isTemporary()) - continue; - if (WS.getOffset() != 0) report_fatal_error("data sections must contain one variable each"); if (!WS.getSize()) @@ -777,20 +777,18 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm, } } - // For each external global, prepare a corresponding wasm global - // holding its address. - if (WS.isExternal()) { - Index = NumGlobalImports + Globals.size(); + // For each global, prepare a corresponding wasm global holding its + // address. For externals these will also be named exports. + Index = NumGlobalImports + Globals.size(); - WasmGlobal Global; - Global.Type = PtrType; - Global.IsMutable = false; - Global.HasImport = false; - Global.InitialValue = DataSection.getSectionOffset(); - Global.ImportIndex = 0; - SymbolIndices[&WS] = Index; - Globals.push_back(Global); - } + WasmGlobal Global; + Global.Type = PtrType; + Global.IsMutable = false; + Global.HasImport = false; + Global.InitialValue = DataSection.getSectionOffset(); + Global.ImportIndex = 0; + SymbolIndices[&WS] = Index; + Globals.push_back(Global); } } diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp index 28531feccfe..7372f24cb9a 100644 --- a/contrib/llvm/lib/Object/COFFObjectFile.cpp +++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp @@ -293,6 +293,10 @@ uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Ref) const { return Result; } +uint64_t COFFObjectFile::getSectionIndex(DataRefImpl Sec) const { + return toSec(Sec) - SectionTable; +} + uint64_t COFFObjectFile::getSectionSize(DataRefImpl Ref) const { return getSectionSize(toSec(Ref)); } diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp index 3d3fa07db3f..bfb8875f47d 100644 --- a/contrib/llvm/lib/Object/MachOObjectFile.cpp +++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp @@ -1820,6 +1820,10 @@ uint64_t MachOObjectFile::getSectionAddress(DataRefImpl Sec) const { return getSection(Sec).addr; } +uint64_t MachOObjectFile::getSectionIndex(DataRefImpl Sec) const { + return Sec.d.a; +} + uint64_t MachOObjectFile::getSectionSize(DataRefImpl Sec) const { // In the case if a malformed Mach-O file where the section offset is past // the end of the file or some part of the section size is past the end of diff --git a/contrib/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm/lib/Object/WasmObjectFile.cpp index 058686e4db9..f565d7a33e5 100644 --- a/contrib/llvm/lib/Object/WasmObjectFile.cpp +++ b/contrib/llvm/lib/Object/WasmObjectFile.cpp @@ -743,6 +743,10 @@ std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec, uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; } +uint64_t WasmObjectFile::getSectionIndex(DataRefImpl Sec) const { + return Sec.d.a; +} + uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const { const WasmSection &S = Sections[Sec.d.a]; return S.Content.size(); diff --git a/contrib/llvm/lib/Option/OptTable.cpp b/contrib/llvm/lib/Option/OptTable.cpp index 7eafb00855d..b00d21ec8f6 100644 --- a/contrib/llvm/lib/Option/OptTable.cpp +++ b/contrib/llvm/lib/Option/OptTable.cpp @@ -186,6 +186,20 @@ static unsigned matchOption(const OptTable::Info *I, StringRef Str, return 0; } +std::vector OptTable::findByPrefix(StringRef Cur) const { + std::vector Ret; + for (const Info &In : OptionInfos.slice(FirstSearchableIndex)) { + if (!In.Prefixes) + continue; + for (int I = 0; In.Prefixes[I]; I++) { + std::string S = std::string(In.Prefixes[I]) + std::string(In.Name); + if (StringRef(S).startswith(Cur)) + Ret.push_back(S); + } + } + return Ret; +} + Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index, unsigned FlagsToInclude, unsigned FlagsToExclude) const { diff --git a/contrib/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm/lib/Passes/PassBuilder.cpp index 6ece7965ce6..abc53e97aa7 100644 --- a/contrib/llvm/lib/Passes/PassBuilder.cpp +++ b/contrib/llvm/lib/Passes/PassBuilder.cpp @@ -155,6 +155,11 @@ static cl::opt cl::Hidden, cl::ZeroOrMore, cl::desc("Run Partial inlinining pass")); +static cl::opt + RunNewGVN("enable-npm-newgvn", cl::init(false), + cl::Hidden, cl::ZeroOrMore, + cl::desc("Run NewGVN instead of GVN")); + static cl::opt EnableGVNHoist( "enable-npm-gvn-hoist", cl::init(false), cl::Hidden, cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); @@ -336,10 +341,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Rotate Loop - disable header duplication at -Oz LPM1.addPass(LoopRotatePass(Level != Oz)); LPM1.addPass(LICMPass()); -#if 0 - // The LoopUnswitch pass isn't yet ported to the new pass manager. - LPM1.addPass(LoopUnswitchPass(/* OptimizeForSize */ Level != O3)); -#endif + LPM1.addPass(SimpleLoopUnswitchPass()); LPM2.addPass(IndVarSimplifyPass()); LPM2.addPass(LoopIdiomRecognizePass()); LPM2.addPass(LoopDeletionPass()); @@ -357,7 +359,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, if (Level != O1) { // These passes add substantial compile time so skip them at O1. FPM.addPass(MergedLoadStoreMotionPass()); - FPM.addPass(GVN()); + if (RunNewGVN) + FPM.addPass(NewGVNPass()); + else + FPM.addPass(GVN()); } // Specially optimize memory movement as it doesn't look like dataflow in SSA. @@ -429,6 +434,11 @@ static void addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline))); } + // Delete anything that is now dead to make sure that we don't instrument + // dead code. Instrumentation can end up keeping dead code around and + // dramatically increase code size. + MPM.addPass(GlobalDCEPass()); + if (RunProfileGen) { MPM.addPass(PGOInstrumentationGen()); @@ -774,7 +784,10 @@ ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, // FIXME: once we fix LoopPass Manager, add LICM here. // FIXME: once we provide support for enabling MLSM, add it here. // FIXME: once we provide support for enabling NewGVN, add it here. - MainFPM.addPass(GVN()); + if (RunNewGVN) + MainFPM.addPass(NewGVNPass()); + else + MainFPM.addPass(GVN()); // Remove dead memcpy()'s. MainFPM.addPass(MemCpyOptPass()); diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp index 64a65ccc11a..a2b7c94f9de 100644 --- a/contrib/llvm/lib/ProfileData/InstrProf.cpp +++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp @@ -355,7 +355,7 @@ void InstrProfSymtab::create(Module &M, bool InLTO) { finalizeSymtab(); } -Error collectPGOFuncNameStrings(const std::vector &NameStrs, +Error collectPGOFuncNameStrings(ArrayRef NameStrs, bool doCompression, std::string &Result) { assert(!NameStrs.empty() && "No name data to emit"); @@ -403,7 +403,7 @@ StringRef getPGOFuncNameVarInitializer(GlobalVariable *NameVar) { return NameStr; } -Error collectPGOFuncNameStrings(const std::vector &NameVars, +Error collectPGOFuncNameStrings(ArrayRef NameVars, std::string &Result, bool doCompression) { std::vector NameStrs; for (auto *NameVar : NameVars) { @@ -978,22 +978,22 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) { } // Parse the value profile options. -void getMemOPSizeRangeFromOption(std::string MemOPSizeRange, - int64_t &RangeStart, int64_t &RangeLast) { +void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart, + int64_t &RangeLast) { static const int64_t DefaultMemOPSizeRangeStart = 0; static const int64_t DefaultMemOPSizeRangeLast = 8; RangeStart = DefaultMemOPSizeRangeStart; RangeLast = DefaultMemOPSizeRangeLast; if (!MemOPSizeRange.empty()) { - auto Pos = MemOPSizeRange.find(":"); + auto Pos = MemOPSizeRange.find(':'); if (Pos != std::string::npos) { if (Pos > 0) - RangeStart = atoi(MemOPSizeRange.substr(0, Pos).c_str()); + MemOPSizeRange.substr(0, Pos).getAsInteger(10, RangeStart); if (Pos < MemOPSizeRange.size() - 1) - RangeLast = atoi(MemOPSizeRange.substr(Pos + 1).c_str()); + MemOPSizeRange.substr(Pos + 1).getAsInteger(10, RangeLast); } else - RangeLast = atoi(MemOPSizeRange.c_str()); + MemOPSizeRange.getAsInteger(10, RangeLast); } assert(RangeLast >= RangeStart); } diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp index 2a916b14bc2..e9716e3b1e8 100644 --- a/contrib/llvm/lib/Support/APInt.cpp +++ b/contrib/llvm/lib/Support/APInt.cpp @@ -2045,7 +2045,7 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, if (isSingleWord()) { char Buffer[65]; - char *BufPtr = Buffer+65; + char *BufPtr = std::end(Buffer); uint64_t N; if (!Signed) { @@ -2069,7 +2069,7 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, *--BufPtr = Digits[N % Radix]; N /= Radix; } - Str.append(BufPtr, Buffer+65); + Str.append(BufPtr, std::end(Buffer)); return; } diff --git a/contrib/llvm/lib/Support/BinaryStreamReader.cpp b/contrib/llvm/lib/Support/BinaryStreamReader.cpp index 5c277448a76..86223297116 100644 --- a/contrib/llvm/lib/Support/BinaryStreamReader.cpp +++ b/contrib/llvm/lib/Support/BinaryStreamReader.cpp @@ -42,29 +42,30 @@ Error BinaryStreamReader::readBytes(ArrayRef &Buffer, uint32_t Size) { } Error BinaryStreamReader::readCString(StringRef &Dest) { - // TODO: This could be made more efficient by using readLongestContiguousChunk - // and searching for null terminators in the resulting buffer. - - uint32_t Length = 0; - // First compute the length of the string by reading 1 byte at a time. uint32_t OriginalOffset = getOffset(); - const char *C; + uint32_t FoundOffset = 0; while (true) { - if (auto EC = readObject(C)) + uint32_t ThisOffset = getOffset(); + ArrayRef Buffer; + if (auto EC = readLongestContiguousChunk(Buffer)) return EC; - if (*C == '\0') + StringRef S(reinterpret_cast(Buffer.begin()), Buffer.size()); + size_t Pos = S.find_first_of('\0'); + if (LLVM_LIKELY(Pos != StringRef::npos)) { + FoundOffset = Pos + ThisOffset; break; - ++Length; + } } - // Now go back and request a reference for that many bytes. - uint32_t NewOffset = getOffset(); + assert(FoundOffset >= OriginalOffset); + setOffset(OriginalOffset); + size_t Length = FoundOffset - OriginalOffset; if (auto EC = readFixedString(Dest, Length)) return EC; - // Now set the offset back to where it was after we calculated the length. - setOffset(NewOffset); + // Now set the offset back to after the null terminator. + setOffset(FoundOffset + 1); return Error::success(); } diff --git a/contrib/llvm/lib/Support/ConvertUTF.cpp b/contrib/llvm/lib/Support/ConvertUTF.cpp index 39fd218d3f0..aa9507c189e 100644 --- a/contrib/llvm/lib/Support/ConvertUTF.cpp +++ b/contrib/llvm/lib/Support/ConvertUTF.cpp @@ -53,6 +53,35 @@ #endif #include + +/* + * This code extensively uses fall-through switches. + * Keep the compiler from warning about that. + */ +#if defined(__clang__) && defined(__has_warning) +# if __has_warning("-Wimplicit-fallthrough") +# define ConvertUTF_DISABLE_WARNINGS \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wimplicit-fallthrough\"") +# define ConvertUTF_RESTORE_WARNINGS \ + _Pragma("clang diagnostic pop") +# endif +#elif defined(__GNUC__) && __GNUC__ > 6 +# define ConvertUTF_DISABLE_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wimplicit-fallthrough\"") +# define ConvertUTF_RESTORE_WARNINGS \ + _Pragma("GCC diagnostic pop") +#endif +#ifndef ConvertUTF_DISABLE_WARNINGS +# define ConvertUTF_DISABLE_WARNINGS +#endif +#ifndef ConvertUTF_RESTORE_WARNINGS +# define ConvertUTF_RESTORE_WARNINGS +#endif + +ConvertUTF_DISABLE_WARNINGS + namespace llvm { static const int halfShift = 10; /* used for shifting by 10 bits */ @@ -708,3 +737,5 @@ ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, --------------------------------------------------------------------- */ } // namespace llvm + +ConvertUTF_RESTORE_WARNINGS diff --git a/contrib/llvm/lib/Support/DebugCounter.cpp b/contrib/llvm/lib/Support/DebugCounter.cpp index 29dae8a20f0..a10ac8e8539 100644 --- a/contrib/llvm/lib/Support/DebugCounter.cpp +++ b/contrib/llvm/lib/Support/DebugCounter.cpp @@ -6,6 +6,7 @@ using namespace llvm; +namespace { // This class overrides the default list implementation of printing so we // can pretty print the list of debug counter options. This type of // dynamic option is pretty rare (basically this and pass lists). @@ -40,6 +41,7 @@ private: } } }; +} // namespace // Create our command line option. static DebugCounterList DebugCounterOption( diff --git a/contrib/llvm/lib/Support/DynamicLibrary.cpp b/contrib/llvm/lib/Support/DynamicLibrary.cpp index 1541a572630..9398789cea8 100644 --- a/contrib/llvm/lib/Support/DynamicLibrary.cpp +++ b/contrib/llvm/lib/Support/DynamicLibrary.cpp @@ -127,10 +127,15 @@ void DynamicLibrary::AddSymbol(StringRef SymbolName, void *SymbolValue) { DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *FileName, std::string *Err) { - SmartScopedLock Lock(*SymbolsMutex); + // Force OpenedHandles to be added into the ManagedStatic list before any + // ManagedStatic can be added from static constructors in HandleSet::DLOpen. + HandleSet& HS = *OpenedHandles; + void *Handle = HandleSet::DLOpen(FileName, Err); - if (Handle != &Invalid) - OpenedHandles->AddLibrary(Handle, /*IsProcess*/ FileName == nullptr); + if (Handle != &Invalid) { + SmartScopedLock Lock(*SymbolsMutex); + HS.AddLibrary(Handle, /*IsProcess*/ FileName == nullptr); + } return DynamicLibrary(Handle); } diff --git a/contrib/llvm/lib/Support/GraphWriter.cpp b/contrib/llvm/lib/Support/GraphWriter.cpp index d0e1d50e8cc..f70b77da8de 100644 --- a/contrib/llvm/lib/Support/GraphWriter.cpp +++ b/contrib/llvm/lib/Support/GraphWriter.cpp @@ -43,6 +43,7 @@ std::string llvm::DOT::EscapeString(const std::string &Label) { Str.erase(Str.begin()+i); continue; default: break; } + LLVM_FALLTHROUGH; case '{': case '}': case '<': case '>': case '|': case '"': diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp index 6a0b64fb884..234f7439a54 100644 --- a/contrib/llvm/lib/Support/Host.cpp +++ b/contrib/llvm/lib/Support/Host.cpp @@ -1401,6 +1401,7 @@ bool sys::getHostCPUFeatures(StringMap &Features) { Features["prefetchwt1"] = HasLeaf7 && (ECX & 1); Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; + Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save; // Enable protection keys Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); diff --git a/contrib/llvm/lib/Support/Path.cpp b/contrib/llvm/lib/Support/Path.cpp index 9fd6652ce4b..80bef558258 100644 --- a/contrib/llvm/lib/Support/Path.cpp +++ b/contrib/llvm/lib/Support/Path.cpp @@ -1156,6 +1156,7 @@ file_magic identify_magic(StringRef Magic) { case 0xc4: // ARMNT Windows if (Magic[1] == 0x01) return file_magic::coff_object; + LLVM_FALLTHROUGH; case 0x90: // PA-RISC Windows case 0x68: // mc68K Windows diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp index b0e3d6898ca..318e21da999 100644 --- a/contrib/llvm/lib/Support/Triple.cpp +++ b/contrib/llvm/lib/Support/Triple.cpp @@ -34,6 +34,7 @@ StringRef Triple::getArchTypeName(ArchType Kind) { case mips64: return "mips64"; case mips64el: return "mips64el"; case msp430: return "msp430"; + case nios2: return "nios2"; case ppc64: return "powerpc64"; case ppc64le: return "powerpc64le"; case ppc: return "powerpc"; @@ -98,6 +99,8 @@ StringRef Triple::getArchTypePrefix(ArchType Kind) { case mips64: case mips64el: return "mips"; + case nios2: return "nios2"; + case hexagon: return "hexagon"; case amdgcn: return "amdgcn"; @@ -262,6 +265,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("mips64", mips64) .Case("mips64el", mips64el) .Case("msp430", msp430) + .Case("nios2", nios2) .Case("ppc64", ppc64) .Case("ppc32", ppc) .Case("ppc", ppc) @@ -384,6 +388,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Cases("mipsel", "mipsallegrexel", Triple::mipsel) .Cases("mips64", "mips64eb", Triple::mips64) .Case("mips64el", Triple::mips64el) + .Case("nios2", Triple::nios2) .Case("r600", Triple::r600) .Case("amdgcn", Triple::amdgcn) .Case("riscv32", Triple::riscv32) @@ -625,6 +630,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { case Triple::mips64el: case Triple::mipsel: case Triple::msp430: + case Triple::nios2: case Triple::nvptx: case Triple::nvptx64: case Triple::ppc64le: @@ -643,11 +649,13 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) { case Triple::tce: case Triple::tcele: case Triple::thumbeb: - case Triple::wasm32: - case Triple::wasm64: case Triple::xcore: return Triple::ELF; + case Triple::wasm32: + case Triple::wasm64: + return Triple::Wasm; + case Triple::ppc: case Triple::ppc64: if (T.isOSDarwin()) @@ -1160,6 +1168,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::le32: case llvm::Triple::mips: case llvm::Triple::mipsel: + case llvm::Triple::nios2: case llvm::Triple::nvptx: case llvm::Triple::ppc: case llvm::Triple::r600: @@ -1243,6 +1252,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::le32: case Triple::mips: case Triple::mipsel: + case Triple::nios2: case Triple::nvptx: case Triple::ppc: case Triple::r600: @@ -1290,6 +1300,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::kalimba: case Triple::lanai: case Triple::msp430: + case Triple::nios2: case Triple::r600: case Triple::tce: case Triple::tcele: @@ -1361,6 +1372,7 @@ Triple Triple::getBigEndianArchVariant() const { case Triple::le32: case Triple::le64: case Triple::msp430: + case Triple::nios2: case Triple::nvptx64: case Triple::nvptx: case Triple::r600: @@ -1447,6 +1459,7 @@ bool Triple::isLittleEndian() const { case Triple::mips64el: case Triple::mipsel: case Triple::msp430: + case Triple::nios2: case Triple::nvptx64: case Triple::nvptx: case Triple::ppc64le: diff --git a/contrib/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm/lib/Support/YAMLParser.cpp index c17a6f6e1ea..f1496393e55 100644 --- a/contrib/llvm/lib/Support/YAMLParser.cpp +++ b/contrib/llvm/lib/Support/YAMLParser.cpp @@ -2116,6 +2116,7 @@ void MappingNode::increment() { break; default: setError("Unexpected token. Expected Key or Block End", T); + LLVM_FALLTHROUGH; case Token::TK_Error: IsAtEnd = true; CurrentEntry = nullptr; @@ -2128,6 +2129,7 @@ void MappingNode::increment() { return increment(); case Token::TK_FlowMappingEnd: getNext(); + LLVM_FALLTHROUGH; case Token::TK_Error: // Set this to end iterator. IsAtEnd = true; @@ -2170,6 +2172,7 @@ void SequenceNode::increment() { default: setError( "Unexpected token. Expected Block Entry or Block End." , T); + LLVM_FALLTHROUGH; case Token::TK_Error: IsAtEnd = true; CurrentEntry = nullptr; @@ -2198,6 +2201,7 @@ void SequenceNode::increment() { return increment(); case Token::TK_FlowSequenceEnd: getNext(); + LLVM_FALLTHROUGH; case Token::TK_Error: // Set this to end iterator. IsAtEnd = true; diff --git a/contrib/llvm/lib/TableGen/Record.cpp b/contrib/llvm/lib/TableGen/Record.cpp index 33d3de5daf3..09f9759ce7d 100644 --- a/contrib/llvm/lib/TableGen/Record.cpp +++ b/contrib/llvm/lib/TableGen/Record.cpp @@ -219,7 +219,6 @@ ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef Range) { BitsInit *BitsInit::get(ArrayRef Range) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileBitsInit(ID, Range); @@ -234,7 +233,6 @@ BitsInit *BitsInit::get(ArrayRef Range) { std::uninitialized_copy(Range.begin(), Range.end(), I->getTrailingObjects()); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -456,7 +454,6 @@ static void ProfileListInit(FoldingSetNodeID &ID, ListInit *ListInit::get(ArrayRef Range, RecTy *EltTy) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileListInit(ID, Range, EltTy); @@ -471,7 +468,6 @@ ListInit *ListInit::get(ArrayRef Range, RecTy *EltTy) { std::uninitialized_copy(Range.begin(), Range.end(), I->getTrailingObjects()); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -606,7 +602,6 @@ ProfileUnOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *Op, RecTy *Type) { UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileUnOpInit(ID, Opc, LHS, Type); @@ -617,7 +612,6 @@ UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) { UnOpInit *I = new(Allocator) UnOpInit(Opc, LHS, Type); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -752,7 +746,6 @@ ProfileBinOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *RHS, BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS, Init *RHS, RecTy *Type) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileBinOpInit(ID, Opc, LHS, RHS, Type); @@ -763,7 +756,6 @@ BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS, BinOpInit *I = new(Allocator) BinOpInit(Opc, LHS, RHS, Type); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -910,7 +902,6 @@ ProfileTernOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *MHS, TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS, RecTy *Type) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileTernOpInit(ID, Opc, LHS, MHS, RHS, Type); @@ -921,7 +912,6 @@ TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS, TernOpInit *I = new(Allocator) TernOpInit(Opc, LHS, MHS, RHS, Type); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -1503,7 +1493,6 @@ DagInit * DagInit::get(Init *V, StringInit *VN, ArrayRef ArgRange, ArrayRef NameRange) { static FoldingSet ThePool; - static std::vector TheActualPool; FoldingSetNodeID ID; ProfileDagInit(ID, V, VN, ArgRange, NameRange); @@ -1512,9 +1501,13 @@ DagInit::get(Init *V, StringInit *VN, ArrayRef ArgRange, if (DagInit *I = ThePool.FindNodeOrInsertPos(ID, IP)) return I; - DagInit *I = new(Allocator) DagInit(V, VN, ArgRange, NameRange); + void *Mem = Allocator.Allocate(totalSizeToAlloc(ArgRange.size(), NameRange.size()), alignof(BitsInit)); + DagInit *I = new(Mem) DagInit(V, VN, ArgRange.size(), NameRange.size()); + std::uninitialized_copy(ArgRange.begin(), ArgRange.end(), + I->getTrailingObjects()); + std::uninitialized_copy(NameRange.begin(), NameRange.end(), + I->getTrailingObjects()); ThePool.InsertNode(I, IP); - TheActualPool.push_back(I); return I; } @@ -1533,7 +1526,7 @@ DagInit::get(Init *V, StringInit *VN, } void DagInit::Profile(FoldingSetNodeID &ID) const { - ProfileDagInit(ID, Val, ValName, Args, ArgNames); + ProfileDagInit(ID, Val, ValName, makeArrayRef(getTrailingObjects(), NumArgs), makeArrayRef(getTrailingObjects(), NumArgNames)); } Init *DagInit::convertInitializerTo(RecTy *Ty) const { @@ -1545,9 +1538,9 @@ Init *DagInit::convertInitializerTo(RecTy *Ty) const { Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const { SmallVector NewArgs; - NewArgs.reserve(Args.size()); + NewArgs.reserve(arg_size()); bool ArgsChanged = false; - for (const Init *Arg : Args) { + for (const Init *Arg : args()) { Init *NewArg = Arg->resolveReferences(R, RV); NewArgs.push_back(NewArg); ArgsChanged |= NewArg != Arg; @@ -1555,7 +1548,7 @@ Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const { Init *Op = Val->resolveReferences(R, RV); if (Op != Val || ArgsChanged) - return DagInit::get(Op, ValName, NewArgs, ArgNames); + return DagInit::get(Op, ValName, NewArgs, getArgNames()); return const_cast(this); } @@ -1564,12 +1557,12 @@ std::string DagInit::getAsString() const { std::string Result = "(" + Val->getAsString(); if (ValName) Result += ":" + ValName->getAsUnquotedString(); - if (!Args.empty()) { - Result += " " + Args[0]->getAsString(); - if (ArgNames[0]) Result += ":$" + ArgNames[0]->getAsUnquotedString(); - for (unsigned i = 1, e = Args.size(); i != e; ++i) { - Result += ", " + Args[i]->getAsString(); - if (ArgNames[i]) Result += ":$" + ArgNames[i]->getAsUnquotedString(); + if (!arg_empty()) { + Result += " " + getArg(0)->getAsString(); + if (getArgName(0)) Result += ":$" + getArgName(0)->getAsUnquotedString(); + for (unsigned i = 1, e = getNumArgs(); i != e; ++i) { + Result += ", " + getArg(i)->getAsString(); + if (getArgName(i)) Result += ":$" + getArgName(i)->getAsUnquotedString(); } } return Result + ")"; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 056ffd58b52..981fd22c213 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -320,6 +320,9 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, switch (ExtraCode[0]) { default: return true; // Unknown modifier. + case 'a': // Print 'a' modifier + PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O); + return false; case 'w': // Print W register case 'x': // Print X register if (MO.isReg()) @@ -388,7 +391,7 @@ bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) + if (ExtraCode && ExtraCode[0] && ExtraCode[0] != 'a') return true; // Unknown modifier. const MachineOperand &MO = MI->getOperand(OpNum); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 629ad5c61b7..33fec74998d 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -584,27 +584,21 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, return true; } -static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) { - for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) - MBB->addLiveIn(*I); -} - bool AArch64ExpandPseudo::expandCMP_SWAP( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; DebugLoc DL = MI.getDebugLoc(); - MachineOperand &Dest = MI.getOperand(0); + const MachineOperand &Dest = MI.getOperand(0); unsigned StatusReg = MI.getOperand(1).getReg(); - MachineOperand &Addr = MI.getOperand(2); - MachineOperand &Desired = MI.getOperand(3); - MachineOperand &New = MI.getOperand(4); - - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); + bool StatusDead = MI.getOperand(1).isDead(); + // Duplicating undef operands into 2 instructions does not guarantee the same + // value on both; However undef should be replaced by xzr anyway. + assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); + unsigned AddrReg = MI.getOperand(2).getReg(); + unsigned DesiredReg = MI.getOperand(3).getReg(); + unsigned NewReg = MI.getOperand(4).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -616,19 +610,18 @@ bool AArch64ExpandPseudo::expandCMP_SWAP( MF->insert(++StoreBB->getIterator(), DoneBB); // .Lloadcmp: + // mov wStatus, 0 // ldaxr xDest, [xAddr] // cmp xDest, xDesired // b.ne .Ldone - LoadCmpBB->addLiveIn(Addr.getReg()); - LoadCmpBB->addLiveIn(Dest.getReg()); - LoadCmpBB->addLiveIn(Desired.getReg()); - addPostLoopLiveIns(LoadCmpBB, LiveRegs); - + if (!StatusDead) + BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg) + .addImm(0).addImm(0); BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg()) - .addReg(Addr.getReg()); + .addReg(AddrReg); BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg) .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) - .add(Desired) + .addReg(DesiredReg) .addImm(ExtendImm); BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc)) .addImm(AArch64CC::NE) @@ -640,25 +633,35 @@ bool AArch64ExpandPseudo::expandCMP_SWAP( // .Lstore: // stlxr wStatus, xNew, [xAddr] // cbnz wStatus, .Lloadcmp - StoreBB->addLiveIn(Addr.getReg()); - StoreBB->addLiveIn(New.getReg()); - addPostLoopLiveIns(StoreBB, LiveRegs); - - BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg).add(New).add(Addr); + BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg) + .addReg(NewReg) + .addReg(AddrReg); BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) - .addReg(StatusReg, RegState::Kill) + .addReg(StatusReg, getKillRegState(StatusDead)) .addMBB(LoadCmpBB); StoreBB->addSuccessor(LoadCmpBB); StoreBB->addSuccessor(DoneBB); DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); - addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); + + // Recompute livein lists. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + LivePhysRegs LiveRegs; + computeLiveIns(LiveRegs, MRI, *DoneBB); + computeLiveIns(LiveRegs, MRI, *StoreBB); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + // Do an extra pass around the loop to get loop carried registers right. + StoreBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *StoreBB); + LoadCmpBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + return true; } @@ -671,16 +674,15 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( MachineOperand &DestLo = MI.getOperand(0); MachineOperand &DestHi = MI.getOperand(1); unsigned StatusReg = MI.getOperand(2).getReg(); - MachineOperand &Addr = MI.getOperand(3); - MachineOperand &DesiredLo = MI.getOperand(4); - MachineOperand &DesiredHi = MI.getOperand(5); - MachineOperand &NewLo = MI.getOperand(6); - MachineOperand &NewHi = MI.getOperand(7); - - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); - LiveRegs.addLiveOuts(MBB); - for (auto I = std::prev(MBB.end()); I != MBBI; --I) - LiveRegs.stepBackward(*I); + bool StatusDead = MI.getOperand(2).isDead(); + // Duplicating undef operands into 2 instructions does not guarantee the same + // value on both; However undef should be replaced by xzr anyway. + assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); + unsigned AddrReg = MI.getOperand(3).getReg(); + unsigned DesiredLoReg = MI.getOperand(4).getReg(); + unsigned DesiredHiReg = MI.getOperand(5).getReg(); + unsigned NewLoReg = MI.getOperand(6).getReg(); + unsigned NewHiReg = MI.getOperand(7).getReg(); MachineFunction *MF = MBB.getParent(); auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); @@ -696,20 +698,13 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( // cmp xDestLo, xDesiredLo // sbcs xDestHi, xDesiredHi // b.ne .Ldone - LoadCmpBB->addLiveIn(Addr.getReg()); - LoadCmpBB->addLiveIn(DestLo.getReg()); - LoadCmpBB->addLiveIn(DestHi.getReg()); - LoadCmpBB->addLiveIn(DesiredLo.getReg()); - LoadCmpBB->addLiveIn(DesiredHi.getReg()); - addPostLoopLiveIns(LoadCmpBB, LiveRegs); - BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX)) .addReg(DestLo.getReg(), RegState::Define) .addReg(DestHi.getReg(), RegState::Define) - .addReg(Addr.getReg()); + .addReg(AddrReg); BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) - .add(DesiredLo) + .addReg(DesiredLoReg) .addImm(0); BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) .addUse(AArch64::WZR) @@ -717,14 +712,14 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( .addImm(AArch64CC::EQ); BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) - .add(DesiredHi) + .addReg(DesiredHiReg) .addImm(0); BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) .addUse(StatusReg, RegState::Kill) .addUse(StatusReg, RegState::Kill) .addImm(AArch64CC::EQ); BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW)) - .addUse(StatusReg, RegState::Kill) + .addUse(StatusReg, getKillRegState(StatusDead)) .addMBB(DoneBB); LoadCmpBB->addSuccessor(DoneBB); LoadCmpBB->addSuccessor(StoreBB); @@ -732,28 +727,36 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( // .Lstore: // stlxp wStatus, xNewLo, xNewHi, [xAddr] // cbnz wStatus, .Lloadcmp - StoreBB->addLiveIn(Addr.getReg()); - StoreBB->addLiveIn(NewLo.getReg()); - StoreBB->addLiveIn(NewHi.getReg()); - addPostLoopLiveIns(StoreBB, LiveRegs); BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg) - .add(NewLo) - .add(NewHi) - .add(Addr); + .addReg(NewLoReg) + .addReg(NewHiReg) + .addReg(AddrReg); BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) - .addReg(StatusReg, RegState::Kill) + .addReg(StatusReg, getKillRegState(StatusDead)) .addMBB(LoadCmpBB); StoreBB->addSuccessor(LoadCmpBB); StoreBB->addSuccessor(DoneBB); DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); DoneBB->transferSuccessors(&MBB); - addPostLoopLiveIns(DoneBB, LiveRegs); MBB.addSuccessor(LoadCmpBB); NextMBBI = MBB.end(); MI.eraseFromParent(); + + // Recompute liveness bottom up. + const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + LivePhysRegs LiveRegs; + computeLiveIns(LiveRegs, MRI, *DoneBB); + computeLiveIns(LiveRegs, MRI, *StoreBB); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + // Do an extra pass in the loop to get the loop carried dependencies right. + StoreBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *StoreBB); + LoadCmpBB->clearLiveIns(); + computeLiveIns(LiveRegs, MRI, *LoadCmpBB); + return true; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 1aec602a2a3..0b92249580c 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -267,12 +267,12 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { return AArch64::X9; const AArch64Subtarget &Subtarget = MF->getSubtarget(); - const AArch64RegisterInfo *TRI = Subtarget.getRegisterInfo(); + const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); LivePhysRegs LiveRegs(TRI); LiveRegs.addLiveIns(*MBB); // Mark callee saved registers as used so we will not choose them. - const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(MF); + const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF); for (unsigned i = 0; CSRegs[i]; ++i) LiveRegs.addReg(CSRegs[i]); @@ -991,6 +991,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( SmallVector RegPairs; computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); + const MachineRegisterInfo &MRI = MF.getRegInfo(); for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; ++RPII) { @@ -1022,9 +1023,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( dbgs() << ")\n"); MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); - MBB.addLiveIn(Reg1); + if (!MRI.isReserved(Reg1)) + MBB.addLiveIn(Reg1); if (RPI.isPaired()) { - MBB.addLiveIn(Reg2); + if (!MRI.isReserved(Reg2)) + MBB.addLiveIn(Reg2); MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); MIB.addMemOperand(MF.getMachineMemOperand( MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1af36086ad9..62f4c953830 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -886,18 +886,21 @@ static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm, // Create the new constant immediate node. EVT VT = Op.getValueType(); SDLoc DL(Op); + SDValue New; // If the new constant immediate is all-zeros or all-ones, let the target // independent DAG combine optimize this node. - if (NewImm == 0 || NewImm == OrigMask) - return TLO.CombineTo(Op.getOperand(1), TLO.DAG.getConstant(NewImm, DL, VT)); - + if (NewImm == 0 || NewImm == OrigMask) { + New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0), + TLO.DAG.getConstant(NewImm, DL, VT)); // Otherwise, create a machine node so that target independent DAG combine // doesn't undo this optimization. - Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size); - SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT); - SDValue New( - TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0); + } else { + Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size); + SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT); + New = SDValue( + TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0); + } return TLO.CombineTo(Op, New); } @@ -9219,16 +9222,26 @@ static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St, // instructions (stp). SDLoc DL(&St); SDValue BasePtr = St.getBasePtr(); + uint64_t BaseOffset = 0; + const MachinePointerInfo &PtrInfo = St.getPointerInfo(); SDValue NewST1 = DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo, OrigAlignment, St.getMemOperand()->getFlags()); + // As this in ISel, we will not merge this add which may degrade results. + if (BasePtr->getOpcode() == ISD::ADD && + isa(BasePtr->getOperand(1))) { + BaseOffset = cast(BasePtr->getOperand(1))->getSExtValue(); + BasePtr = BasePtr->getOperand(0); + } + unsigned Offset = EltOffset; while (--NumVecElts) { unsigned Alignment = MinAlign(OrigAlignment, Offset); - SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, - DAG.getConstant(Offset, DL, MVT::i64)); + SDValue OffsetPtr = + DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, + DAG.getConstant(BaseOffset + Offset, DL, MVT::i64)); NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr, PtrInfo.getWithOffset(Offset), Alignment, St.getMemOperand()->getFlags()); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index c42738da7ab..faf39be9b41 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -763,15 +763,126 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { llvm_unreachable("Unknown opcode to check as cheap as a move!"); } -bool AArch64InstrInfo::isFalkorLSLFast(const MachineInstr &MI) const { - if (MI.getNumOperands() < 4) +bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: return false; - unsigned ShOpVal = MI.getOperand(3).getImm(); - unsigned ShImm = AArch64_AM::getShiftValue(ShOpVal); - if (AArch64_AM::getShiftType(ShOpVal) == AArch64_AM::LSL && - ShImm < 4) - return true; - return false; + + case AArch64::ADDWrs: + case AArch64::ADDXrs: + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: { + unsigned Imm = MI.getOperand(3).getImm(); + unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); + if (ShiftVal == 0) + return true; + return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5; + } + + case AArch64::ADDWrx: + case AArch64::ADDXrx: + case AArch64::ADDXrx64: + case AArch64::ADDSWrx: + case AArch64::ADDSXrx: + case AArch64::ADDSXrx64: { + unsigned Imm = MI.getOperand(3).getImm(); + switch (AArch64_AM::getArithExtendType(Imm)) { + default: + return false; + case AArch64_AM::UXTB: + case AArch64_AM::UXTH: + case AArch64_AM::UXTW: + case AArch64_AM::UXTX: + return AArch64_AM::getArithShiftValue(Imm) <= 4; + } + } + + case AArch64::SUBWrs: + case AArch64::SUBSWrs: { + unsigned Imm = MI.getOperand(3).getImm(); + unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); + return ShiftVal == 0 || + (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31); + } + + case AArch64::SUBXrs: + case AArch64::SUBSXrs: { + unsigned Imm = MI.getOperand(3).getImm(); + unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); + return ShiftVal == 0 || + (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63); + } + + case AArch64::SUBWrx: + case AArch64::SUBXrx: + case AArch64::SUBXrx64: + case AArch64::SUBSWrx: + case AArch64::SUBSXrx: + case AArch64::SUBSXrx64: { + unsigned Imm = MI.getOperand(3).getImm(); + switch (AArch64_AM::getArithExtendType(Imm)) { + default: + return false; + case AArch64_AM::UXTB: + case AArch64_AM::UXTH: + case AArch64_AM::UXTW: + case AArch64_AM::UXTX: + return AArch64_AM::getArithShiftValue(Imm) == 0; + } + } + + case AArch64::LDRBBroW: + case AArch64::LDRBBroX: + case AArch64::LDRBroW: + case AArch64::LDRBroX: + case AArch64::LDRDroW: + case AArch64::LDRDroX: + case AArch64::LDRHHroW: + case AArch64::LDRHHroX: + case AArch64::LDRHroW: + case AArch64::LDRHroX: + case AArch64::LDRQroW: + case AArch64::LDRQroX: + case AArch64::LDRSBWroW: + case AArch64::LDRSBWroX: + case AArch64::LDRSBXroW: + case AArch64::LDRSBXroX: + case AArch64::LDRSHWroW: + case AArch64::LDRSHWroX: + case AArch64::LDRSHXroW: + case AArch64::LDRSHXroX: + case AArch64::LDRSWroW: + case AArch64::LDRSWroX: + case AArch64::LDRSroW: + case AArch64::LDRSroX: + case AArch64::LDRWroW: + case AArch64::LDRWroX: + case AArch64::LDRXroW: + case AArch64::LDRXroX: + case AArch64::PRFMroW: + case AArch64::PRFMroX: + case AArch64::STRBBroW: + case AArch64::STRBBroX: + case AArch64::STRBroW: + case AArch64::STRBroX: + case AArch64::STRDroW: + case AArch64::STRDroX: + case AArch64::STRHHroW: + case AArch64::STRHHroX: + case AArch64::STRHroW: + case AArch64::STRHroX: + case AArch64::STRQroW: + case AArch64::STRQroX: + case AArch64::STRSroW: + case AArch64::STRSroX: + case AArch64::STRWroW: + case AArch64::STRWroX: + case AArch64::STRXroW: + case AArch64::STRXroX: { + unsigned IsSigned = MI.getOperand(3).getImm(); + return !IsSigned; + } + } } bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 4cd14db633b..59f3405fe43 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -270,7 +270,7 @@ public: bool IsTailCall) const override; /// Returns true if the instruction has a shift by immediate that can be /// executed in one cycle less. - bool isFalkorLSLFast(const MachineInstr &MI) const; + bool isFalkorShiftExtFast(const MachineInstr &MI) const; private: /// \brief Sets the offsets on outlined instructions in \p MBB which use SP diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td index da68f3165c5..ad24612239f 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -442,7 +442,7 @@ def MSRpstateImm4 : MSRpstateImm0_15; // TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects. let hasSideEffects = 0 in def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), - [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>; + [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; // The cycle counter PMC register is PMCCNTR_EL0. let Predicates = [HasPerfMon] in diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp index a6926a6700e..3b71d529db5 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -232,6 +232,19 @@ static bool scheduleAdjacentImpl(ScheduleDAGMI *DAG, SUnit &AnchorSU) { dbgs() << DAG->TII->getName(FirstMI->getOpcode()) << " - " << DAG->TII->getName(SecondMI->getOpcode()) << '\n'; ); + if (&SecondSU != &DAG->ExitSU) + // Make instructions dependent on FirstSU also dependent on SecondSU to + // prevent them from being scheduled between FirstSU and and SecondSU. + for (SUnit::const_succ_iterator + SI = FirstSU.Succs.begin(), SE = FirstSU.Succs.end(); + SI != SE; ++SI) { + if (!SI->getSUnit() || SI->getSUnit() == &SecondSU) + continue; + DEBUG(dbgs() << " Copy Succ "; + SI->getSUnit()->print(dbgs(), DAG); dbgs() << '\n';); + DAG->addEdge(SI->getSUnit(), SDep(&SecondSU, SDep::Artificial)); + } + ++NumFused; return true; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td index cf1c0b66db5..44fd94fc3d4 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td @@ -61,56 +61,42 @@ let SchedModel = FalkorModel in { let SchedModel = FalkorModel in { -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes - { let Latency = 1; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 2; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 2; let NumMicroOps = 2; } -def : WriteRes { let Latency = 1; } -def : WriteRes - { let Latency = 8; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 16; let NumMicroOps = 2; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 5; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 3; } -def : WriteRes - { let Latency = 0; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 0; let NumMicroOps = 2; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 5; } -def : WriteRes - { let Latency = 0; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 3; let NumMicroOps = 2; } -def : WriteRes { let Latency = 2; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; } -def : WriteRes - { let Latency = 6; let NumMicroOps = 2; } -def : WriteRes - { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1 -def : WriteRes { let Latency = 6; } -def : WriteRes { let Latency = 3; } -def : WriteRes - { let Latency = 0; let NumMicroOps = 2; } +// These WriteRes entries are not used in the Falkor sched model. +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } +def : WriteRes { let Unsupported = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } -def : WriteRes { let Latency = 1; } - -def : WriteRes { let Latency = 3; } - -def : WriteRes { let Unsupported = 1; } - -// No forwarding logic is modelled yet. +// These ReadAdvance entries are not used in the Falkor sched model. def : ReadAdvance; def : ReadAdvance; def : ReadAdvance; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td index a9b4d44a523..d098cf7a5a3 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td @@ -12,7 +12,509 @@ // //===----------------------------------------------------------------------===// -include "AArch64SchedFalkorWriteRes.td" +// Contains all of the Falkor specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: FalkorWr +// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD) +// Latency: #cyc +// +// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued +// down one Z pipe, six SD pipes, four VX pipes and the total latency is +// six cycles. +// +// Contains all of the Falkor specific ReadAdvance types for forwarding logic. +// +// Contains all of the Falkor specific WriteVariant types for immediate zero +// and LSLFast. +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define 0 micro-op types +def FalkorWr_none_1cyc : SchedWriteRes<[]> { + let Latency = 1; + let NumMicroOps = 0; +} +def FalkorWr_none_3cyc : SchedWriteRes<[]> { + let Latency = 3; + let NumMicroOps = 0; +} +def FalkorWr_none_4cyc : SchedWriteRes<[]> { + let Latency = 4; + let NumMicroOps = 0; +} + +//===----------------------------------------------------------------------===// +// Define 1 micro-op types + +def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; } +def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } +def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } +def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; } +def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; } +def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; } +def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; } +def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; } +def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; } +def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; } +def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; } +def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; } +def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; } + +def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; } +def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; } +def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; } +def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } +def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } +def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } +def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } +def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } +def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } + +def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; } +def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; } +def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; } + +def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; } +def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; } +def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; } + +//===----------------------------------------------------------------------===// +// Define 2 micro-op types + +def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 1; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 2; +} +def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 2; +} +def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 2; +} +def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { + let Latency = 10; + let NumMicroOps = 2; +} + +def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> { + let Latency = 4; + let NumMicroOps = 2; +} +def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> { + let Latency = 0; + let NumMicroOps = 2; +} + +def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { + let Latency = 8; + let NumMicroOps = 2; + let ResourceCycles = [2, 8]; +} + +def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { + let Latency = 16; + let NumMicroOps = 2; + let ResourceCycles = [2, 16]; +} + +def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 2; +} + +def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 2; +} + +//===----------------------------------------------------------------------===// +// Define 3 micro-op types + +def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 0; + let NumMicroOps = 3; +} + +def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 5; + let NumMicroOps = 3; +} + +def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitZ]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 3; +} +def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> { + let Latency = 0; + let NumMicroOps = 3; +} +//===----------------------------------------------------------------------===// +// Define 4 micro-op types + +def FalkorWr_2VX_2VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, + FalkorUnitVX, FalkorUnitVY]> { + let Latency = 2; + let NumMicroOps = 4; +} + +def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 2; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 3; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 4; +} +def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 6; + let NumMicroOps = 4; +} + +def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 4; +} + +def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST, + FalkorUnitSD, FalkorUnitLD]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 4; +} + +//===----------------------------------------------------------------------===// +// Define 5 micro-op types + +def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 5; +} +def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 5; +} +def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitVXVY]> { + let Latency = 7; + let NumMicroOps = 5; +} +def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, + FalkorUnitVSD, FalkorUnitST, + FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 5; +} +def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitST, + FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 5; +} +//===----------------------------------------------------------------------===// +// Define 6 micro-op types + +def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 6; +} + +def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, + FalkorUnitVSD, FalkorUnitXYZ, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 6; +} + +//===----------------------------------------------------------------------===// +// Define 8 micro-op types + +def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY, + FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 8; +} + +def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 8; +} + +//===----------------------------------------------------------------------===// +// Define 9 micro-op types + +def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, + FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitLD, + FalkorUnitLD, FalkorUnitXYZ, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 9; +} + +def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, + FalkorUnitLD, FalkorUnitVXVY, + FalkorUnitVXVY, FalkorUnitXYZ, + FalkorUnitLD, FalkorUnitLD, + FalkorUnitVXVY, FalkorUnitVXVY]> { + let Latency = 4; + let NumMicroOps = 9; +} + +//===----------------------------------------------------------------------===// +// Define 10 micro-op types + +def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 10; +} + +//===----------------------------------------------------------------------===// +// Define 12 micro-op types + +def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD, + FalkorUnitVXVY, FalkorUnitST, + FalkorUnitVSD, FalkorUnitVXVY, + FalkorUnitST, FalkorUnitVSD]> { + let Latency = 0; + let NumMicroOps = 12; +} + +// Forwarding logic is modeled for multiply add/accumulate. +// ----------------------------------------------------------------------------- +def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>; +def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>; +def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>; +def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>; +def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>; + +// SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast +// ----------------------------------------------------------------------------- +def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).getImm() == 0}]>; +def FalkorFMOVZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR || + MI->getOperand(1).getReg() == AArch64::XZR}]>; +def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>; + +def FalkorWr_FMOV : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_MOVZ : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_ADDSUBsx : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_LDRro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_LDRSro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_PRFMro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_STRVro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_STRQro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def FalkorWr_STRro : SchedWriteVariant<[ + SchedVar, + SchedVar]>; //===----------------------------------------------------------------------===// // Specialize the coarse model by associating instruction groups with the @@ -22,63 +524,76 @@ include "AArch64SchedFalkorWriteRes.td" // Miscellaneous // ----------------------------------------------------------------------------- -def : InstRW<[WriteI], (instrs COPY)>; +// FIXME: This could be better modeled by looking at the regclasses of the operands. +def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>; // SIMD Floating-point Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)v2f32$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v4f16|v2i16p|v2i32p)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(16|32|64)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(16|32|64|v2f32|v4f16|v2i32|v4i16)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i16|v1i32|v1i64|v2i32|v4i16)rz$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FAC(GE|GT)(32|64)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?V(v4i16|v4i32|v8i16)v$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)(v2f32|v4f16)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i16p|v2i32p|v2i64p|v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FABD|FADD|FSUB)v2f32$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>; def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i32|v4i16)(_shift)?$")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>; -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>; -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instrs FMULX16, FMULX32)>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instrs FMULX32)>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instregex "^(FMUL|FMULX)v1i64_indexed$")>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instrs FMULX64)>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instregex "^(FMUL|FMULX)v1i64_indexed$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instrs FMULX64)>; -def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32|v8f16)$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v8f16|v2i64p)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32|v8i16)rz$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32|v8f16)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instrs FCVTLv4i16, FCVTLv2i32)>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>; -def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)(v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^(FDIV|FSQRT)v2f32$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32|v8f16)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32|v8f16)$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(FCVTL|FCVTL2)(v2i32|v4i16|v4i32|v8i16)$")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32|v8i16)(_shift)?$")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs FCVTLv8i16, FCVTLv4i32)>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>; -def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>; +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], + (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>; -def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], (instregex "^(FMUL|FMULX)v2i64_indexed$")>; +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], + (instregex "^(FMUL|FMULX)v2i64_indexed$")>; -def : InstRW<[FalkorWr_3VXVY_4cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>; +def : InstRW<[FalkorWr_3VXVY_4cyc], (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>; +def : InstRW<[FalkorWr_3VXVY_5cyc], (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>; -def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v2i32|v4i16|v4i32|v8i16|v4f32)$")>; +def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32)$")>; -def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32|v8f16)$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>; - -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32], (instregex "^FML(A|S)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64], (instregex "^FML(A|S)v1i64_indexed$")>; -def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32], (instregex "^FML(A|S)(v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>; -def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64], (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32], + (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64], + (instregex "^FML(A|S)v1i64_indexed$")>; +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32], + (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>; +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64], + (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>; // SIMD Integer Instructions // ----------------------------------------------------------------------------- @@ -92,12 +607,14 @@ def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$" def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHLv1i64$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(S|U)SHRd$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs PMULv8i8)>; def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^SHLd$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>; @@ -110,6 +627,8 @@ def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN) def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)RSHRd$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs ADDVv4i16v)>; @@ -120,10 +639,14 @@ def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64) def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)ADDLVv8i8v$")>; def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>; def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], (instregex "^SQDMULL(i16|i32)$")>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc], + (instregex "^SQDMULL(i16|i32)$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>; def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>; @@ -154,7 +677,7 @@ def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>; -def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL2?(v8i8|v16i8)$")>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^PMULL(v8i8|v16i8)$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>; def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>; @@ -165,14 +688,18 @@ def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16 def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>; -def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL2?(v1i64|v2i64)$")>; +def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^PMULL(v1i64|v2i64)$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], (instregex "^SQDMULLv.*$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^SQDMULLv.*$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>; def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>; @@ -186,99 +713,114 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>; def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>; -def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^SQD(MLAL|MLSL)v[248].*$")>; +def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA], + (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^SQD(MLAL|MLSL)v[248].*$")>; // SIMD Load Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteVLD], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>; -def : InstRW<[WriteVLD], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; -def : InstRW<[WriteVLD], (instrs LD2i64)>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; -def : InstRW<[WriteVLD, WriteAdr], (instrs LD2i64_POST)>; +def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instrs LD2i64)>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], (instrs LD2i64_POST)>; -def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "LD1i(8|16|32)$")>; -def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>; +def : InstRW<[FalkorWr_1LD_1VXVY_4cyc], (instregex "^LD1i(8|16|32)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1VXVY_4cyc], (instregex "^LD1i(8|16|32)_POST$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteAdr], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc], (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>; -def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD3i64_POST)>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteAdr], (instrs LD4i64_POST)>; +def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD3i64)>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instrs LD3i64_POST)>; +def : InstRW<[FalkorWr_2LD_3cyc], (instrs LD4i64)>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc], (instrs LD4i64_POST)>; -def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>; -def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, WriteAdr], (instregex "^LD2i(8|16|32)_POST$")>; +def : InstRW<[FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_2VXVY_4cyc], (instregex "^LD2i(8|16|32)_POST$")>; -def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_2LD_1none_3cyc, WriteAdr], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_1none_3cyc], (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>; -def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instrs LD3Threev2d_POST)>; -def : InstRW<[FalkorWr_3LD_3cyc, WriteAdr], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_3LD_3cyc], (instrs LD3Threev2d)>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instrs LD3Threev2d_POST)>; +def : InstRW<[FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_3LD_3cyc], (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "LD3i(8|16|32)$")>; -def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, WriteAdr], (instregex "LD3i(8|16|32)_POST$")>; +def : InstRW<[FalkorWr_1LD_3VXVY_4cyc], (instregex "^LD3i(8|16|32)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3VXVY_4cyc], (instregex "^LD3i(8|16|32)_POST$")>; -def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_2LD_2none_3cyc, WriteAdr], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2none_3cyc], (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>; -def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>; -def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instrs LD4Fourv2d_POST)>; -def : InstRW<[FalkorWr_4LD_3cyc, WriteAdr], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; +def : InstRW<[FalkorWr_4LD_3cyc], (instrs LD4Fourv2d)>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instrs LD4Fourv2d_POST)>; +def : InstRW<[FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_4LD_3cyc], (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>; -def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>; -def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, WriteAdr], (instregex "^LD4i(8|16|32)_POST$")>; +def : InstRW<[FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4VXVY_4cyc], (instregex "^LD4i(8|16|32)_POST$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "LD3Threev(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, WriteAdr],(instregex "LD3Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc], (instregex "^LD3Threev(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1none_4cyc], + (instregex "^LD3Threev(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, WriteAdr],(instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc], (instregex "^LD4Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2none_4cyc], + (instregex "^LD4Fourv(8b|4h|2s|1d)_POST$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "LD3Threev(16b|8h|4s)$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>; +def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD3Threev(16b|8h|4s)$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, WriteAdr],(instregex "LD3Threev(16b|8h|4s)_POST$")>; -def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc], (instregex "^LD4Fourv(16b|8h|4s)$")>; + +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc], + (instregex "^LD3Threev(16b|8h|4s)_POST$")>; + +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc], + (instregex "^LD4Fourv(16b|8h|4s)_POST$")>; // Arithmetic and Logical Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_ADD], (instregex "^ADD(S)?(W|X)r(s|x)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADC(S)?(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ADD(S)?(W|X)r(r|i)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^AND(S)?(W|X)r(i|r|s)$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>; def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(i|r|s)$")>; -def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^SUB(S)?(W|X)r(s|x)$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>; +def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>; +def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>; // SIMD Miscellaneous Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^CPY(i8|i16|i32|i64)$")>; def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>; def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>; @@ -287,35 +829,42 @@ def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>; def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN|XTN2)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>; def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>; -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instrs FRECPS64, FRSQRTS64)>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instrs FRECPS64, FRSQRTS64)>; -def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],(instregex "^INSv(i32|i64)(gpr|lane)$")>; +def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc], + (instregex "^INSv(i32|i64)(gpr|lane)$")>; def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>; +def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>; def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>; def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>; def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>; +def : InstRW<[FalkorWr_2VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>; def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>; def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>; def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>; -def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], (instrs FRECPSv4f32, FRSQRTSv4f32)>; +def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc], + (instrs FRECPSv4f32, FRSQRTSv4f32)>; -def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], (instrs FRECPSv2f64, FRSQRTSv2f64)>; +def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc], + (instrs FRECPSv2f64, FRSQRTSv2f64)>; def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>; def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>; @@ -328,50 +877,95 @@ def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>; // SIMD Store Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteVST], (instregex "^STP(D|S)(i)$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "^STP(D|S)(post|pre)$")>; -def : InstRW<[FalkorWr_2XYZ_2ST_2VSD_0cyc], (instregex "^STRQro(W|X)$")>; -def : InstRW<[WriteVST], (instregex "^ST1(One(v8b|v4h|v2s|v1d)(_POST)?|(i8|i16|i32|i64)(_POST)?|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; -def : InstRW<[WriteVST], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; -def : InstRW<[WriteVST, WriteAdr], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STR(Q|D|S|H|B)ui$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc], + (instregex "^STR(Q|D|S|H|B)(post|pre)$")>; +def : InstRW<[FalkorWr_STRVro], (instregex "^STR(D|S|H|B)ro(W|X)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^STPQi$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2VSD_2ST_0cyc], + (instregex "^STPQ(post|pre)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STP(D|S)(i)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc], + (instregex "^STP(D|S)(post|pre)$")>; +def : InstRW<[FalkorWr_STRQro], (instregex "^STRQro(W|X)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^STUR(Q|D|S|B|H)i$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instrs STNPDi, STNPSi)>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instrs STNPQi)>; -def : InstRW<[WriteVST, WriteVST], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; -def : InstRW<[WriteVST, WriteVST], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[WriteVST, WriteVST], (instregex "^ST3(i8|i16|i32|i64)$")>; -def : InstRW<[WriteVST, WriteVST], (instregex "^ST4(i8|i16|i32|i64)$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST3(i8|i16|i32|i64)_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^ST4(i8|i16|i32|i64)_POST$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1VSD_1ST_0cyc], + (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>; +def : InstRW<[FalkorWr_1VSD_1ST_0cyc], (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc], + (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc], + (instregex "^ST2(Two(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64))_POST$")>; -def : InstRW<[WriteV, WriteVST, WriteVST], (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>; -def : InstRW<[WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST3(i8|i16|i32|i64)$")>; +def : InstRW<[FalkorWr_2VSD_2ST_0cyc], (instregex "^ST4(i8|i16|i32|i64)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc], + (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc], + (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc], + (instregex "^ST3(i8|i16|i32|i64)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc], + (instregex "^ST4(i8|i16|i32|i64)_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteVST], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[WriteVST, WriteVST, WriteVST], (instrs ST3Threev2d)>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST3Threev2d_POST)>; +def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc], + (instregex "^ST3Three(v8b|v4h|v2s|v1d)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc], + (instregex "^ST3Three(v8b|v4h|v2s|v1d)_POST$")>; -def : InstRW<[WriteV, WriteV, WriteVST, WriteVST], (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>; -def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteAdr], (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>; +def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_3VSD_3ST_0cyc], (instrs ST3Threev2d)>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc], + (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc], + (instrs ST3Threev2d_POST)>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST], (instrs ST4Fourv2d)>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>; -def : InstRW<[WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr], (instrs ST4Fourv2d_POST)>; +def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc], + (instregex "^ST4Four(v8b|v4h|v2s|v1d)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc], + (instregex "^ST4Four(v8b|v4h|v2s|v1d)_POST$")>; -def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST3Three(v16b|v8h|v4s)$")>; -def : InstRW<[WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST3Three(v16b|v8h|v4s)_POST$")>; +def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>; +def : InstRW<[FalkorWr_4VSD_4ST_0cyc], (instrs ST4Fourv2d)>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc], + (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc], + (instrs ST4Fourv2d_POST)>; -def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST], (instregex "^ST4Four(v16b|v8h|v4s)$")>; -def : InstRW<[WriteV, WriteV, WriteV, WriteV, WriteVST, WriteVST, WriteVST, WriteVST, WriteAdr],(instregex "^ST4Four(v16b|v8h|v4s)_POST$")>; +def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc], + (instregex "^ST3Three(v16b|v8h|v4s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc], + (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>; + +def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc], + (instregex "^ST4Four(v16b|v8h|v4s)$")>; +// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case). +def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc], + (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>; // Branch Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1none_0cyc], (instrs B)>; +def : InstRW<[FalkorWr_1none_0cyc], (instrs B, TCRETURNdi)>; def : InstRW<[FalkorWr_1Z_0cyc], (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>; +def : InstRW<[FalkorWr_1Z_0cyc], (instrs RET_ReallyLR, TCRETURNri)>; def : InstRW<[FalkorWr_1ZB_0cyc], (instrs Bcc)>; def : InstRW<[FalkorWr_1XYZB_0cyc], (instrs BL)>; def : InstRW<[FalkorWr_1Z_1XY_0cyc], (instrs BLR)>; @@ -388,89 +982,103 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>; // FP Load Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteLD], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>; -def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>; -def : InstRW<[WriteLD], (instregex "^LDUR(Q|D|S|H|B)i$")>; -def : InstRW<[FalkorWr_LDR], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDNPQi)>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDPQi)>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDNP(D|S)i$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDP(D|S)i$")>; -def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi, WriteAdr],(instregex "LDP(D|S)(pre|post)$")>; -def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi, WriteAdr],(instregex "^LDPQ(pre|post)$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], + (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(Q|D|S|H|B)i$")>; +def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc], + (instrs LDNPQi)>; +def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc], + (instrs LDPQi)>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc], + (instregex "LDNP(D|S)i$")>; +def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc], + (instregex "LDP(D|S)i$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc], + (instregex "LDP(D|S)(pre|post)$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc], + (instregex "^LDPQ(pre|post)$")>; // FP Data Processing Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(H|S|D)rr$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(H|S|D)r(r|i)$")>; -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P)(S|U)U(W|X)(H|S|D)r$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(H|S|D)r$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(H|S|D)rrr$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCCMP(E)?(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCMP(E)?(S|D)r(r|i)$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(FABS|FNEG)(S|D)r$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FCSEL(S|D)rrr$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(H|S|D)rr$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(16|32|64)p$")>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTHSr, FCVTHDr)>; -def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(H|S|D)r$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instrs FCVTSHr, FCVTDHr)>; +def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(16|32|64)$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(H|S|D)rr$")>; -def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTSHr, FCVTDHr)>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FABD(32|64)$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^(FADD|FSUB)(S|D)rr$")>; +def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTHSr, FCVTHDr)>; def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>; -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], (instregex "^F(N)?MUL(H|S)rr$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc], + (instregex "^F(N)?MULSrr$")>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], (instregex "^F(N)?MULDrr$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc], + (instregex "^F(N)?MULDrr$")>; -def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(H|S|D)rr$")>; -def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(H|S|D)r$")>; +def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(S|D)rr$")>; +def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(S|D)r$")>; -def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32], (instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>; -def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64], (instregex "^F(N)?M(ADD|SUB)Drrr$")>; +def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32], + (instregex "^F(N)?M(ADD|SUB)Srrr$")>; +def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64], + (instregex "^F(N)?M(ADD|SUB)Drrr$")>; // FP Miscellaneous Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(H|S|D)i$")>; -def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>; -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>; -def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(WH|WS|XH|XD|XDHigh)r$")>; -def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Hr|Sr|Dr|v.*_ns)$")>; -// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov 0.0 +def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>; +def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^FMOV(S|D)i$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>; +def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>; +def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; +// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs FMOVD0, FMOVS0)>; def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>; -def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i16|v1i32|v2i32|v1i64|v4i16|v2f32|v4f16|d|s)(_shift)?")>; +def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>; -def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v2f64|v4f32|v8f16)(_shift)?")>; +def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>; // Load Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>; def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>; - -def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDNP(W|X)i$")>; -def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDP(W|X)i$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(B|H|W|X)ui$")>; -def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(B|H|W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc], + (instregex "^LDNP(W|X)i$")>; +def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc], + (instregex "^LDP(W|X)i$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc], + (instregex "^LDP(W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(BB|HH|W|X)ui$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_3cyc], + (instregex "^LDR(BB|HH|W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_LDRro], (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>; def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(W|X)l$")>; def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDTR(B|H|W|X)i$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(B|H|W|X)i$")>; - +def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDUR(BB|HH|W|X)i$")>; +def : InstRW<[FalkorWr_PRFMro], (instregex "^PRFMro(W|X)$")>; +def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc], + (instrs LDPSWi)>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc], + (instregex "^LDPSW(post|pre)$")>; def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1LD_4cyc], + (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>; +def : InstRW<[FalkorWr_LDRSro], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>; def : InstRW<[FalkorWr_1LD_4cyc], (instrs LDRSWl)>; def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>; def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>; -def : InstRW<[FalkorWr_PRFM], (instregex "^PRFMro(W|X)$")>; -def : InstRW<[FalkorWr_LDR], (instregex "^LDR(B|H|W|X)ro(W|X)$")>; - -def : InstRW<[FalkorWr_LDRS], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>; - -def : InstRW<[FalkorWr_1LD_4cyc, WriteAdr],(instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>; -def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>; -def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi],(instrs LDPSWi)>; -def : InstRW<[FalkorWr_1LD_4cyc, WriteLDHi, WriteAdr],(instregex "^LDPSW(post|pre)$")>; - // Miscellaneous Data-Processing Instructions // ----------------------------------------------------------------------------- def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(S|U)?BFM(W|X)ri$")>; @@ -480,17 +1088,22 @@ def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^EXTR(W|X)rri$")>; // Divide and Multiply Instructions // ----------------------------------------------------------------------------- -def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64], (instregex "^(S|U)M(ADD|SUB)Lrrr$")>; -def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32], (instregex "^M(ADD|SUB)Wrrr$")>; +def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64], + (instregex "^(S|U)M(ADD|SUB)Lrrr$")>; +def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32], + (instregex "^M(ADD|SUB)Wrrr$")>; -def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>; -def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64], (instregex "^M(ADD|SUB)Xrrr$")>; +def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>; +def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64], + (instregex "^M(ADD|SUB)Xrrr$")>; -def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>; -def : InstRW<[FalkorWr_1X_1Z_16cyc], (instregex "^(S|U)DIVXr$")>; +def : InstRW<[FalkorWr_1X_1Z_8cyc], (instregex "^(S|U)DIVWr$")>; +def : InstRW<[FalkorWr_1X_1Z_16cyc], (instregex "^(S|U)DIVXr$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], (instregex "^(S|U)MULLv.*$")>; -def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], (instregex "^(S|U)(MLAL|MLSL)v.*$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc], + (instregex "^(S|U)MULLv.*$")>; +def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA], + (instregex "^(S|U)(MLAL|MLSL)v.*$")>; // Move and Shift Instructions // ----------------------------------------------------------------------------- @@ -498,6 +1111,11 @@ def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV|MOVK)(W| def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^ADRP?$")>; def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^MOVN(W|X)i$")>; def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>; +def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs MOVi32imm, MOVi64imm)>; +def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>], + (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>; +def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>], + (instrs LOADgot)>; // Other Instructions // ----------------------------------------------------------------------------- @@ -507,13 +1125,12 @@ def : InstRW<[FalkorWr_1ST_0cyc], (instrs SYSxt, SYSLxt)>; def : InstRW<[FalkorWr_1Z_0cyc], (instrs MSRpstateImm1, MSRpstateImm4)>; def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^(LDAR(B|H|W|X)|LDAXP(W|X)|LDAXR(B|H|W|X)|LDXP(W|X)|LDXR(B|H|W|X))$")>; -def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS)>; +def : InstRW<[FalkorWr_1LD_3cyc], (instrs MRS, MOVbaseTLS)>; def : InstRW<[FalkorWr_1LD_1Z_3cyc], (instrs DRPS)>; def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>; -def : InstRW<[WriteVST], (instrs STNPDi, STNPSi)>; -def : InstRW<[WriteSTP], (instrs STNPWi, STNPXi)>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs STNPWi, STNPXi)>; def : InstRW<[FalkorWr_2LD_1Z_3cyc], (instrs ERET)>; def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDC.*$")>; @@ -523,20 +1140,16 @@ def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc], (instregex "^STXR(B|H|W|X)$")>; def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXP(W|X)$")>; def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc], (instregex "^STLXR(B|H|W|X)$")>; -def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>; // Store Instructions // ----------------------------------------------------------------------------- -def : InstRW<[WriteST], (instregex "^STP(W|X)i$")>; -def : InstRW<[WriteST, WriteAdr], (instregex "^STP(W|X)(post|pre)$")>; -def : InstRW<[WriteST], (instregex "^STR(Q|D|S|BB|HH)ui$")>; -def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>; -def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)ui$")>; -def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)(post|pre)$")>; -def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>; -def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STP(W|X)i$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc], + (instregex "^STP(W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STR(BB|HH|W|X)ui$")>; +def : InstRW<[FalkorWr_none_1cyc, FalkorWr_1SD_1ST_0cyc], + (instregex "^STR(BB|HH|W|X)(post|pre)$")>; +def : InstRW<[FalkorWr_STRro], (instregex "^STR(BB|HH|W|X)ro(W|X)$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STTR(B|H|W|X)i$")>; +def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instregex "^STUR(BB|HH|W|X)i$")>; -def : InstRW<[WriteST, WriteAdr], (instregex "^STR(B|H|W|X)ro(W|X)$")>; - -def : InstRW<[WriteVST, WriteVST], (instregex "^STPQi$")>; -def : InstRW<[WriteVST, WriteVST, WriteAdr], (instregex "^STPQ(post|pre)$")>; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td deleted file mode 100644 index 6526cc28e80..00000000000 --- a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td +++ /dev/null @@ -1,403 +0,0 @@ -//=- AArch64SchedFalkorWrRes.td - Falkor Write Res ---*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Contains all of the Falkor specific SchedWriteRes types. The approach -// below is to define a generic SchedWriteRes for every combination of -// latency and microOps. The naming conventions is to use a prefix, one field -// for latency, and one or more microOp count/type designators. -// Prefix: FalkorWr -// MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD) -// Latency: #cyc -// -// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued -// down one Z pipe, six SD pipes, four VX pipes and the total latency is -// six cycles. -// -// Contains all of the Falkor specific ReadAdvance types for forwarding logic. -// -// Contains all of the Falkor specific WriteVariant types for immediate zero -// and LSLFast. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Define 1 micro-op types - -def FalkorWr_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 2; } -def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } -def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; } -def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; } -def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; } -def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; } -def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; } -def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; } -def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; } -def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; } -def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; } -def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; } -def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; } - -def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; } -def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; } -def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; } -def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } -def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; } -def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } -def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; } -def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; } - -def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; } -def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; } -def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; } - -def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; } -def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; } -def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; } - -//===----------------------------------------------------------------------===// -// Define 2 micro-op types - -def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 1; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 2; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 3; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 5; - let NumMicroOps = 2; -} -def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 5; - let NumMicroOps = 2; -} -def FalkorWr_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 6; - let NumMicroOps = 2; -} -def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 6; - let NumMicroOps = 2; -} - -def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_1XYZ_1LD_4cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_2LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 4; - let NumMicroOps = 2; -} - -def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> { - let Latency = 10; - let NumMicroOps = 2; -} - -def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> { - let Latency = 1; - let NumMicroOps = 2; -} - -def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> { - let Latency = 4; - let NumMicroOps = 2; -} -def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> { - let Latency = 5; - let NumMicroOps = 2; -} - -def FalkorWr_2XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> { - let Latency = 2; - let NumMicroOps = 2; -} - -def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> { - let Latency = 0; - let NumMicroOps = 2; -} - -def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { - let Latency = 8; - let ResourceCycles = [2, 8]; -} - -def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> { - let Latency = 16; - let ResourceCycles = [2, 16]; -} - -def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> { - let Latency = 3; - let NumMicroOps = 2; -} - -def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 2; -} - -def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> { - let Latency = 0; - let NumMicroOps = 2; -} - -//===----------------------------------------------------------------------===// -// Define 3 micro-op types - -def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, - FalkorUnitLD]> { - let Latency = 0; - let NumMicroOps = 3; -} - -def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD, - FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 3; -} - -def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 3; - let NumMicroOps = 3; -} - -def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 3; -} - -def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 5; - let NumMicroOps = 3; -} - -def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 6; - let NumMicroOps = 3; -} - -def FalkorWr_1LD_2VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 3; -} - -def FalkorWr_2LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 3; -} - -def FalkorWr_3LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 3; -} - -def FalkorWr_2LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitZ]> { - let Latency = 3; - let NumMicroOps = 3; -} - -//===----------------------------------------------------------------------===// -// Define 4 micro-op types - -def FalkorWr_2VX_2VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY, - FalkorUnitVX, FalkorUnitVY]> { - let Latency = 2; - let NumMicroOps = 4; -} - -def FalkorWr_4VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 2; - let NumMicroOps = 4; -} -def FalkorWr_4VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 3; - let NumMicroOps = 4; -} -def FalkorWr_4VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 4; -} -def FalkorWr_4VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 6; - let NumMicroOps = 4; -} - -def FalkorWr_4LD_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitLD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 4; -} - -def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 4; -} - -def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 4; -} - -def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST, - FalkorUnitSD, FalkorUnitLD]> { - let Latency = 3; - let NumMicroOps = 4; -} - -//===----------------------------------------------------------------------===// -// Define 5 micro-op types - -def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 5; -} -def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 5; -} -def FalkorWr_5VXVY_7cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitVXVY]> { - let Latency = 7; - let NumMicroOps = 5; -} - -//===----------------------------------------------------------------------===// -// Define 6 micro-op types - -def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 6; -} - -def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST, - FalkorUnitVSD, FalkorUnitXYZ, - FalkorUnitST, FalkorUnitVSD]> { - let Latency = 0; - let NumMicroOps = 6; -} - -//===----------------------------------------------------------------------===// -// Define 8 micro-op types - -def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY, - FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 8; -} - -//===----------------------------------------------------------------------===// -// Define 9 micro-op types - -def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, - FalkorUnitLD, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitLD, - FalkorUnitLD, FalkorUnitXYZ, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 9; -} - -def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, - FalkorUnitLD, FalkorUnitVXVY, - FalkorUnitVXVY, FalkorUnitXYZ, - FalkorUnitLD, FalkorUnitLD, - FalkorUnitVXVY, FalkorUnitVXVY]> { - let Latency = 4; - let NumMicroOps = 9; -} - -// Forwarding logic is modeled for multiply add/accumulate. -// ----------------------------------------------------------------------------- -def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>; -def FalkorReadIMA64 : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>; -def FalkorReadVMA : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>; -def FalkorReadFMA32 : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>; -def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>; - -// SchedPredicates and WriteVariants for Immediate Zero and LSLFast -// ----------------------------------------------------------------------------- -def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).getImm() == 0}]>; -def FalkorLSLFastPred : SchedPredicate<[{TII->isFalkorLSLFast(*MI)}]>; - -def FalkorWr_FMOV : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_MOVZ : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_LDR : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_ADD : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_PRFM : SchedWriteVariant<[ - SchedVar, - SchedVar]>; - -def FalkorWr_LDRS : SchedWriteVariant<[ - SchedVar, - SchedVar]>; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index b369ee7e4ba..d3cab1ad339 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -90,7 +90,6 @@ void AArch64Subtarget::initializeProperties() { break; case Falkor: MaxInterleaveFactor = 4; - VectorInsertExtractBaseCost = 2; // FIXME: remove this to enable 64-bit SLP if performance looks good. MinVectorRegisterBitWidth = 128; break; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 132f192f2a9..cb3f72a524f 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -10,10 +10,10 @@ // //===----------------------------------------------------------------------===// +#include "AArch64TargetMachine.h" #include "AArch64.h" #include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" -#include "AArch64TargetMachine.h" #include "AArch64TargetObjectFile.h" #include "AArch64TargetTransformInfo.h" #include "MCTargetDesc/AArch64MCTargetDesc.h" @@ -23,6 +23,7 @@ #include "llvm/CodeGen/GlobalISel/IRTranslator.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" +#include "llvm/CodeGen/GlobalISel/Localizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" @@ -277,7 +278,7 @@ public: ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { const AArch64Subtarget &ST = C->MF->getSubtarget(); - if (ST.hasFuseLiterals()) { + if (ST.hasFuseAES() || ST.hasFuseLiterals()) { // Run the Macro Fusion after RA again since literals are expanded from // pseudos then (v. addPreSched2()). ScheduleDAGMI *DAG = createGenericSchedPostRA(C); @@ -295,6 +296,7 @@ public: bool addIRTranslator() override; bool addLegalizeMachineIR() override; bool addRegBankSelect() override; + void addPreGlobalInstructionSelect() override; bool addGlobalInstructionSelect() override; #endif bool addILPOpts() override; @@ -404,6 +406,12 @@ bool AArch64PassConfig::addRegBankSelect() { return false; } +void AArch64PassConfig::addPreGlobalInstructionSelect() { + // Workaround the deficiency of the fast register allocator. + if (TM->getOptLevel() == CodeGenOpt::None) + addPass(new Localizer()); +} + bool AArch64PassConfig::addGlobalInstructionSelect() { addPass(new InstructionSelect()); return false; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td index b279bd61e18..e7ebb37a9d6 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -425,7 +425,7 @@ def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9", FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm, FeatureApertureRegs, FeatureGFX9Insts, FeatureVOP3P, FeatureVGPRIndexMode, - FeatureFastFMAF32, FeatureDPP, + FeatureFastFMAF32, FeatureSDWA, FeatureDPP, FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts ] >; @@ -534,10 +534,12 @@ def AMDGPUAsmVariants { int VOP3_ID = 1; string SDWA = "SDWA"; int SDWA_ID = 2; + string SDWA9 = "SDWA9"; + int SDWA9_ID = 3; string DPP = "DPP"; - int DPP_ID = 3; + int DPP_ID = 4; string Disable = "Disable"; - int Disable_ID = 4; + int Disable_ID = 5; } def DefaultAMDGPUAsmParserVariant : AsmParserVariant { @@ -555,6 +557,12 @@ def SDWAAsmParserVariant : AsmParserVariant { let Name = AMDGPUAsmVariants.SDWA; } +def SDWA9AsmParserVariant : AsmParserVariant { + let Variant = AMDGPUAsmVariants.SDWA9_ID; + let Name = AMDGPUAsmVariants.SDWA9; +} + + def DPPAsmParserVariant : AsmParserVariant { let Variant = AMDGPUAsmVariants.DPP_ID; let Name = AMDGPUAsmVariants.DPP; @@ -567,6 +575,7 @@ def AMDGPU : Target { let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant, VOP3AsmParserVariant, SDWAAsmParserVariant, + SDWA9AsmParserVariant, DPPAsmParserVariant]; let AssemblyWriters = [AMDGPUAsmWriter]; } @@ -607,7 +616,10 @@ def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">, AssemblerPredicate<"FeatureVOP3P">; def HasSDWA : Predicate<"Subtarget->hasSDWA()">, - AssemblerPredicate<"FeatureSDWA">; + AssemblerPredicate<"FeatureSDWA,FeatureVolcanicIslands">; + +def HasSDWA9 : Predicate<"Subtarget->hasSDWA()">, + AssemblerPredicate<"FeatureSDWA,FeatureGFX9">; def HasDPP : Predicate<"Subtarget->hasDPP()">, AssemblerPredicate<"FeatureDPP">; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 5ec46a8294c..723e8a7b54e 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -127,6 +127,29 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); } +bool AMDGPUTargetLowering::isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op) +{ + assert(Op.getOpcode() == ISD::OR); + + SDValue N0 = Op->getOperand(0); + SDValue N1 = Op->getOperand(1); + EVT VT = N0.getValueType(); + + if (VT.isInteger() && !VT.isVector()) { + KnownBits LHSKnown, RHSKnown; + DAG.computeKnownBits(N0, LHSKnown); + + if (LHSKnown.Zero.getBoolValue()) { + DAG.computeKnownBits(N1, RHSKnown); + + if (!(~RHSKnown.Zero & ~LHSKnown.Zero)) + return true; + } + } + + return false; +} + AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -2596,8 +2619,6 @@ SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const { EVT VT = N->getValueType(0); - if (VT != MVT::i64) - return SDValue(); ConstantSDNode *RHS = dyn_cast(N->getOperand(1)); if (!RHS) @@ -2618,6 +2639,8 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, case ISD::SIGN_EXTEND: case ISD::ANY_EXTEND: { // shl (ext x) => zext (shl x), if shift does not overflow int + if (VT != MVT::i64) + break; KnownBits Known; SDValue X = LHS->getOperand(0); DAG.computeKnownBits(X, Known); @@ -2628,7 +2651,22 @@ SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0)); return DAG.getZExtOrTrunc(Shl, SL, VT); } + case ISD::OR: if (!isOrEquivalentToAdd(DAG, LHS)) break; + case ISD::ADD: { // Fall through from above + // shl (or|add x, c2), c1 => or|add (shl x, c1), (c2 << c1) + if (ConstantSDNode *C2 = dyn_cast(LHS->getOperand(1))) { + SDValue Shl = DAG.getNode(ISD::SHL, SL, VT, LHS->getOperand(0), + SDValue(RHS, 0)); + SDValue C2V = DAG.getConstant(C2->getAPIntValue() << RHSVal, + SDLoc(C2), VT); + return DAG.getNode(LHS->getOpcode(), SL, VT, Shl, C2V); + } + break; } + } + + if (VT != MVT::i64) + return SDValue(); // i64 (shl x, C) -> (build_pair 0, (shl x, C -32)) @@ -3440,7 +3478,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, DL); } - if ((OffsetVal + WidthVal) >= 32) { + if ((OffsetVal + WidthVal) >= 32 && + !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) { SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32); return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32, BitsFrom, ShiftVal); diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index fb2f15022d2..0d066cdbdff 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -34,6 +34,9 @@ private: /// compare. SDValue getFFBH_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL) const; +public: + static bool isOrEquivalentToAdd(SelectionDAG &DAG, SDValue Op); + protected: const AMDGPUSubtarget *Subtarget; AMDGPUAS AMDGPUASI; diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 9de302994e6..57905be1881 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -36,6 +36,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo() { setAction({G_CONSTANT, S32}, Legal); setAction({G_CONSTANT, S64}, Legal); + setAction({G_FCONSTANT, S32}, Legal); + setAction({G_GEP, P1}, Legal); setAction({G_GEP, P2}, Legal); setAction({G_GEP, 1, S64}, Legal); diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index 85184b36390..07f92918a43 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -97,6 +97,9 @@ private: Instruction *UseInst, int OpIdx0, int OpIdx1) const; + /// Check whether we have enough local memory for promotion. + bool hasSufficientLocalMem(const Function &F); + public: static char ID; @@ -107,7 +110,7 @@ public: StringRef getPassName() const override { return "AMDGPU Promote Alloca"; } - void handleAlloca(AllocaInst &I); + bool handleAlloca(AllocaInst &I, bool SufficientLDS); void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); @@ -147,105 +150,21 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { const AMDGPUSubtarget &ST = TM->getSubtarget(F); if (!ST.isPromoteAllocaEnabled()) return false; + AS = AMDGPU::getAMDGPUAS(*F.getParent()); - FunctionType *FTy = F.getFunctionType(); - - // If the function has any arguments in the local address space, then it's - // possible these arguments require the entire local memory space, so - // we cannot use local memory in the pass. - for (Type *ParamTy : FTy->params()) { - PointerType *PtrTy = dyn_cast(ParamTy); - if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) { - LocalMemLimit = 0; - DEBUG(dbgs() << "Function has local memory argument. Promoting to " - "local memory disabled.\n"); - return false; - } - } - - LocalMemLimit = ST.getLocalMemorySize(); - if (LocalMemLimit == 0) - return false; - - const DataLayout &DL = Mod->getDataLayout(); - - // Check how much local memory is being used by global objects - CurrentLocalMemUsage = 0; - for (GlobalVariable &GV : Mod->globals()) { - if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS) - continue; - - for (const User *U : GV.users()) { - const Instruction *Use = dyn_cast(U); - if (!Use) - continue; - - if (Use->getParent()->getParent() == &F) { - unsigned Align = GV.getAlignment(); - if (Align == 0) - Align = DL.getABITypeAlignment(GV.getValueType()); - - // FIXME: Try to account for padding here. The padding is currently - // determined from the inverse order of uses in the function. I'm not - // sure if the use list order is in any way connected to this, so the - // total reported size is likely incorrect. - uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType()); - CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align); - CurrentLocalMemUsage += AllocSize; - break; - } - } - } - - unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage, - F); - - // Restrict local memory usage so that we don't drastically reduce occupancy, - // unless it is already significantly reduced. - - // TODO: Have some sort of hint or other heuristics to guess occupancy based - // on other factors.. - unsigned OccupancyHint = ST.getWavesPerEU(F).second; - if (OccupancyHint == 0) - OccupancyHint = 7; - - // Clamp to max value. - OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU()); - - // Check the hint but ignore it if it's obviously wrong from the existing LDS - // usage. - MaxOccupancy = std::min(OccupancyHint, MaxOccupancy); - - - // Round up to the next tier of usage. - unsigned MaxSizeWithWaveCount - = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F); - - // Program is possibly broken by using more local mem than available. - if (CurrentLocalMemUsage > MaxSizeWithWaveCount) - return false; - - LocalMemLimit = MaxSizeWithWaveCount; - - DEBUG( - dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n" - << " Rounding size to " << MaxSizeWithWaveCount - << " with a maximum occupancy of " << MaxOccupancy << '\n' - << " and " << (LocalMemLimit - CurrentLocalMemUsage) - << " available for promotion\n" - ); - + bool SufficientLDS = hasSufficientLocalMem(F); + bool Changed = false; BasicBlock &EntryBB = *F.begin(); for (auto I = EntryBB.begin(), E = EntryBB.end(); I != E; ) { AllocaInst *AI = dyn_cast(I); ++I; if (AI) - handleAlloca(*AI); + Changed |= handleAlloca(*AI, SufficientLDS); } - return true; + return Changed; } std::pair @@ -661,12 +580,105 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes( return true; } +bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) { + + FunctionType *FTy = F.getFunctionType(); + const AMDGPUSubtarget &ST = TM->getSubtarget(F); + + // If the function has any arguments in the local address space, then it's + // possible these arguments require the entire local memory space, so + // we cannot use local memory in the pass. + for (Type *ParamTy : FTy->params()) { + PointerType *PtrTy = dyn_cast(ParamTy); + if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) { + LocalMemLimit = 0; + DEBUG(dbgs() << "Function has local memory argument. Promoting to " + "local memory disabled.\n"); + return false; + } + } + + LocalMemLimit = ST.getLocalMemorySize(); + if (LocalMemLimit == 0) + return false; + + const DataLayout &DL = Mod->getDataLayout(); + + // Check how much local memory is being used by global objects + CurrentLocalMemUsage = 0; + for (GlobalVariable &GV : Mod->globals()) { + if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS) + continue; + + for (const User *U : GV.users()) { + const Instruction *Use = dyn_cast(U); + if (!Use) + continue; + + if (Use->getParent()->getParent() == &F) { + unsigned Align = GV.getAlignment(); + if (Align == 0) + Align = DL.getABITypeAlignment(GV.getValueType()); + + // FIXME: Try to account for padding here. The padding is currently + // determined from the inverse order of uses in the function. I'm not + // sure if the use list order is in any way connected to this, so the + // total reported size is likely incorrect. + uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType()); + CurrentLocalMemUsage = alignTo(CurrentLocalMemUsage, Align); + CurrentLocalMemUsage += AllocSize; + break; + } + } + } + + unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage, + F); + + // Restrict local memory usage so that we don't drastically reduce occupancy, + // unless it is already significantly reduced. + + // TODO: Have some sort of hint or other heuristics to guess occupancy based + // on other factors.. + unsigned OccupancyHint = ST.getWavesPerEU(F).second; + if (OccupancyHint == 0) + OccupancyHint = 7; + + // Clamp to max value. + OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU()); + + // Check the hint but ignore it if it's obviously wrong from the existing LDS + // usage. + MaxOccupancy = std::min(OccupancyHint, MaxOccupancy); + + + // Round up to the next tier of usage. + unsigned MaxSizeWithWaveCount + = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F); + + // Program is possibly broken by using more local mem than available. + if (CurrentLocalMemUsage > MaxSizeWithWaveCount) + return false; + + LocalMemLimit = MaxSizeWithWaveCount; + + DEBUG( + dbgs() << F.getName() << " uses " << CurrentLocalMemUsage << " bytes of LDS\n" + << " Rounding size to " << MaxSizeWithWaveCount + << " with a maximum occupancy of " << MaxOccupancy << '\n' + << " and " << (LocalMemLimit - CurrentLocalMemUsage) + << " available for promotion\n" + ); + + return true; +} + // FIXME: Should try to pick the most likely to be profitable allocas first. -void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { +bool AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I, bool SufficientLDS) { // Array allocations are probably not worth handling, since an allocation of // the array type is the canonical form. if (!I.isStaticAlloca() || I.isArrayAllocation()) - return; + return false; IRBuilder<> Builder(&I); @@ -675,10 +687,8 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { DEBUG(dbgs() << "Trying to promote " << I << '\n'); - if (tryPromoteAllocaToVector(&I, AS)) { - DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); - return; - } + if (tryPromoteAllocaToVector(&I, AS)) + return true; // Promoted to vector. const Function &ContainingFunction = *I.getParent()->getParent(); CallingConv::ID CC = ContainingFunction.getCallingConv(); @@ -692,9 +702,13 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { break; default: DEBUG(dbgs() << " promote alloca to LDS not supported with calling convention.\n"); - return; + return false; } + // Not likely to have sufficient local memory for promotion. + if (!SufficientLDS) + return false; + const AMDGPUSubtarget &ST = TM->getSubtarget(ContainingFunction); unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second; @@ -718,7 +732,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { if (NewSize > LocalMemLimit) { DEBUG(dbgs() << " " << AllocSize << " bytes of local memory not available to promote\n"); - return; + return false; } CurrentLocalMemUsage = NewSize; @@ -727,7 +741,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { if (!collectUsesWithPtrTypes(&I, &I, WorkList)) { DEBUG(dbgs() << " Do not know how to convert all uses\n"); - return; + return false; } DEBUG(dbgs() << "Promoting alloca to local memory\n"); @@ -873,6 +887,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { llvm_unreachable("Don't know how to promote alloca intrinsic use."); } } + return true; } FunctionPass *llvm::createAMDGPUPromoteAlloca() { diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h index e543cae07ad..66087942681 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -416,6 +416,10 @@ public: return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; } + bool hasSDWA() const { + return HasSDWA; + } + /// \brief Returns the offset in bytes from the start of the input buffer /// of the first explicit kernel argument. unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const { @@ -670,10 +674,6 @@ public: return HasInv2PiInlineImm; } - bool hasSDWA() const { - return HasSDWA; - } - bool hasDPP() const { return HasDPP; } diff --git a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index b52ea2b3a2c..f5541e08e1b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -881,6 +881,10 @@ public: return AMDGPU::isVI(getSTI()); } + bool isGFX9() const { + return AMDGPU::isGFX9(getSTI()); + } + bool hasInv2PiInlineImm() const { return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; } @@ -989,7 +993,6 @@ private: bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; - bool isSGPR(unsigned Reg); public: OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); @@ -1042,9 +1045,10 @@ public: OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); + void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); void cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType); + uint64_t BasicInstType, bool skipVcc = false); }; struct OptionalOperand { @@ -1966,7 +1970,8 @@ ArrayRef AMDGPUAsmParser::getMatchedVariants() const { } if (isForcedSDWA()) { - static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA}; + static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, + AMDGPUAsmVariants::SDWA9}; return makeArrayRef(Variants); } @@ -1977,7 +1982,7 @@ ArrayRef AMDGPUAsmParser::getMatchedVariants() const { static const unsigned Variants[] = { AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, - AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::DPP + AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP }; return makeArrayRef(Variants); @@ -2000,14 +2005,6 @@ unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { return AMDGPU::NoRegister; } -bool AMDGPUAsmParser::isSGPR(unsigned Reg) { - const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); - const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); - return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || - Reg == AMDGPU::SCC; -} - // NB: This code is correct only when used to check constant // bus limitations because GFX7 support no f16 inline constants. // Note that there are no cases when a GFX7 opcode violates @@ -2049,7 +2046,8 @@ bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { if (MO.isImm()) { return !isInlineConstant(Inst, OpIdx); } - return !MO.isReg() || isSGPR(mc2PseudoReg(MO.getReg())); + return !MO.isReg() || + isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); } bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) { @@ -2060,7 +2058,8 @@ bool AMDGPUAsmParser::validateOperandLimitations(const MCInst &Inst) { if (Desc.TSFlags & (SIInstrFlags::VOPC | SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | - SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) { + SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | + SIInstrFlags::SDWA)) { // Check special imm operands (used by madmk, etc) if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { @@ -4151,14 +4150,19 @@ void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); } +void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { + cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); +} + void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { - cvtSDWA(Inst, Operands, SIInstrFlags::VOPC); + cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); } void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, - uint64_t BasicInstType) { + uint64_t BasicInstType, bool skipVcc) { using namespace llvm::AMDGPU::SDWA; OptionalImmIndexMap OptionalIdx; + bool skippedVcc = false; unsigned I = 1; const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); @@ -4168,15 +4172,22 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, for (unsigned E = Operands.size(); I != E; ++I) { AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); - // Add the register arguments - if ((BasicInstType == SIInstrFlags::VOPC || - BasicInstType == SIInstrFlags::VOP2)&& - Op.isReg() && - Op.Reg.RegNo == AMDGPU::VCC) { - // VOPC and VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. - // Skip it. - continue; - } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { + if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { + // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. + // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) + // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. + // Skip VCC only if we didn't skip it on previous iteration. + if (BasicInstType == SIInstrFlags::VOP2 && + (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { + skippedVcc = true; + continue; + } else if (BasicInstType == SIInstrFlags::VOPC && + Inst.getNumOperands() == 0) { + skippedVcc = true; + continue; + } + } + if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { Op.addRegWithInputModsOperands(Inst, 2); } else if (Op.isImm()) { // Handle optional arguments @@ -4184,20 +4195,30 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, } else { llvm_unreachable("Invalid operand type"); } + skippedVcc = false; } - addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); - - if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { + if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && + Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { // V_NOP_sdwa_vi has no optional sdwa arguments switch (BasicInstType) { case SIInstrFlags::VOP1: + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (isGFX9() && + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); break; case SIInstrFlags::VOP2: + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + if (isGFX9() && + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); @@ -4205,6 +4226,9 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, break; case SIInstrFlags::VOPC: + if (isVI()) { + addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); + } addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); break; @@ -4220,10 +4244,9 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { auto it = Inst.begin(); std::advance( - it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); + it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); Inst.insert(it, Inst.getOperand(0)); // src2 = dst } - } /// Force static initialization. diff --git a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp index 137b5cca96c..9b3cde7c4df 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp @@ -62,32 +62,33 @@ static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm, return addOperand(Inst, MCOperand::createImm(Imm)); } -#define DECODE_OPERAND2(RegClass, DecName) \ -static DecodeStatus Decode##RegClass##RegisterClass(MCInst &Inst, \ - unsigned Imm, \ - uint64_t /*Addr*/, \ - const void *Decoder) { \ +#define DECODE_OPERAND(StaticDecoderName, DecoderName) \ +static DecodeStatus StaticDecoderName(MCInst &Inst, \ + unsigned Imm, \ + uint64_t /*Addr*/, \ + const void *Decoder) { \ auto DAsm = static_cast(Decoder); \ - return addOperand(Inst, DAsm->decodeOperand_##DecName(Imm)); \ + return addOperand(Inst, DAsm->DecoderName(Imm)); \ } -#define DECODE_OPERAND(RegClass) DECODE_OPERAND2(RegClass, RegClass) +#define DECODE_OPERAND_REG(RegClass) \ +DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass) -DECODE_OPERAND(VGPR_32) -DECODE_OPERAND(VS_32) -DECODE_OPERAND(VS_64) +DECODE_OPERAND_REG(VGPR_32) +DECODE_OPERAND_REG(VS_32) +DECODE_OPERAND_REG(VS_64) -DECODE_OPERAND(VReg_64) -DECODE_OPERAND(VReg_96) -DECODE_OPERAND(VReg_128) +DECODE_OPERAND_REG(VReg_64) +DECODE_OPERAND_REG(VReg_96) +DECODE_OPERAND_REG(VReg_128) -DECODE_OPERAND(SReg_32) -DECODE_OPERAND(SReg_32_XM0_XEXEC) -DECODE_OPERAND(SReg_64) -DECODE_OPERAND(SReg_64_XEXEC) -DECODE_OPERAND(SReg_128) -DECODE_OPERAND(SReg_256) -DECODE_OPERAND(SReg_512) +DECODE_OPERAND_REG(SReg_32) +DECODE_OPERAND_REG(SReg_32_XM0_XEXEC) +DECODE_OPERAND_REG(SReg_64) +DECODE_OPERAND_REG(SReg_64_XEXEC) +DECODE_OPERAND_REG(SReg_128) +DECODE_OPERAND_REG(SReg_256) +DECODE_OPERAND_REG(SReg_512) static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, @@ -106,6 +107,13 @@ static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm)); } +#define DECODE_SDWA9(DecName) \ +DECODE_OPERAND(decodeSDWA9##DecName, decodeSDWA9##DecName) + +DECODE_SDWA9(Src32) +DECODE_SDWA9(Src16) +DECODE_SDWA9(VopcDst) + #include "AMDGPUGenDisassemblerTables.inc" //===----------------------------------------------------------------------===// @@ -164,6 +172,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size, Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address); if (Res) break; + + Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address); + if (Res) break; } // Reinitialize Bytes as DPP64 could have eaten too much @@ -582,6 +593,48 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const { return errOperand(Val, "unknown operand encoding " + Twine(Val)); } +MCOperand AMDGPUDisassembler::decodeSDWA9Src(const OpWidthTy Width, + unsigned Val) const { + using namespace AMDGPU::SDWA; + + if (SDWA9EncValues::SRC_VGPR_MIN <= Val && + Val <= SDWA9EncValues::SRC_VGPR_MAX) { + return createRegOperand(getVgprClassId(Width), + Val - SDWA9EncValues::SRC_VGPR_MIN); + } + if (SDWA9EncValues::SRC_SGPR_MIN <= Val && + Val <= SDWA9EncValues::SRC_SGPR_MAX) { + return createSRegOperand(getSgprClassId(Width), + Val - SDWA9EncValues::SRC_SGPR_MIN); + } + + return decodeSpecialReg32(Val - SDWA9EncValues::SRC_SGPR_MIN); +} + +MCOperand AMDGPUDisassembler::decodeSDWA9Src16(unsigned Val) const { + return decodeSDWA9Src(OPW16, Val); +} + +MCOperand AMDGPUDisassembler::decodeSDWA9Src32(unsigned Val) const { + return decodeSDWA9Src(OPW32, Val); +} + + +MCOperand AMDGPUDisassembler::decodeSDWA9VopcDst(unsigned Val) const { + using namespace AMDGPU::SDWA; + + if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) { + Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK; + if (Val > AMDGPU::EncValues::SGPR_MAX) { + return decodeSpecialReg64(Val); + } else { + return createSRegOperand(getSgprClassId(OPW64), Val); + } + } else { + return createRegOperand(AMDGPU::VCC); + } +} + //===----------------------------------------------------------------------===// // AMDGPUSymbolizer //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h index 620bae0a6d1..0ff405a71e9 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h +++ b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h @@ -104,6 +104,11 @@ public: MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const; MCOperand decodeSpecialReg32(unsigned Val) const; MCOperand decodeSpecialReg64(unsigned Val) const; + + MCOperand decodeSDWA9Src(const OpWidthTy Width, unsigned Val) const; + MCOperand decodeSDWA9Src16(unsigned Val) const; + MCOperand decodeSDWA9Src32(unsigned Val) const; + MCOperand decodeSDWA9VopcDst(unsigned Val) const; }; //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp index 3bb5c9bc22b..8ead4806733 100644 --- a/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp @@ -191,6 +191,7 @@ public: } }; +namespace { // just a stub to make base class happy class SchedStrategyStub : public MachineSchedStrategy { public: @@ -202,6 +203,7 @@ public: void releaseTopNode(SUnit *SU) override {} void releaseBottomNode(SUnit *SU) override {} }; +} // namespace GCNIterativeScheduler::GCNIterativeScheduler(MachineSchedContext *C, StrategyKind S) diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp index c6d0f217995..d378df674be 100644 --- a/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/GCNMinRegStrategy.cpp @@ -17,6 +17,7 @@ using namespace llvm; #define DEBUG_TYPE "misched" +namespace { class GCNMinRegScheduler { struct Candidate : ilist_node { const SUnit *SU; @@ -71,6 +72,7 @@ public: std::vector schedule(ArrayRef TopRoots, const ScheduleDAG &DAG); }; +} // namespace void GCNMinRegScheduler::initNumPreds(const decltype(ScheduleDAG::SUnits) &SUnits) { NumPreds.resize(SUnits.size()); diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp index 18374dca3f8..390a8286c76 100644 --- a/contrib/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -211,9 +211,9 @@ static LaneBitmask getUsedRegMask(const MachineOperand &MO, return getLiveLaneMask(MO.getReg(), SI, LIS, MRI); } -SmallVector collectVirtualRegUses(const MachineInstr &MI, - const LiveIntervals &LIS, - const MachineRegisterInfo &MRI) { +static SmallVector +collectVirtualRegUses(const MachineInstr &MI, const LiveIntervals &LIS, + const MachineRegisterInfo &MRI) { SmallVector Res; for (const auto &MO : MI.operands()) { if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h index 3d3858ab47e..a856b17a228 100644 --- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h +++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h @@ -52,6 +52,18 @@ public: return 0; } + virtual unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + return 0; + } + + virtual unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + return 0; + } + protected: uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; void verifyInstructionPredicates(const MCInst &MI, diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index bda0928036f..e02acf516c0 100644 --- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -69,6 +69,14 @@ public: unsigned getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; + + unsigned getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; + + unsigned getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const override; }; } // end anonymous namespace @@ -319,6 +327,44 @@ unsigned SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, return getMachineOpValue(MI, MO, Fixups, STI); } +unsigned +SIMCCodeEmitter::getSDWA9SrcEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + using namespace AMDGPU::SDWA; + + uint64_t RegEnc = 0; + + const MCOperand &MO = MI.getOperand(OpNo); + + unsigned Reg = MO.getReg(); + RegEnc |= MRI.getEncodingValue(Reg); + RegEnc &= SDWA9EncValues::SRC_VGPR_MASK; + if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) { + RegEnc |= SDWA9EncValues::SRC_SGPR_MASK; + } + return RegEnc; +} + +unsigned +SIMCCodeEmitter::getSDWA9VopcDstEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + using namespace AMDGPU::SDWA; + + uint64_t RegEnc = 0; + + const MCOperand &MO = MI.getOperand(OpNo); + + unsigned Reg = MO.getReg(); + if (Reg != AMDGPU::VCC) { + RegEnc |= MRI.getEncodingValue(Reg); + RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK; + RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK; + } + return RegEnc; +} + uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 3590a9b05e1..60b913cfd39 100644 --- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -1618,6 +1618,14 @@ EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, return VT.changeVectorElementTypeToInteger(); } +bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const { + // Local and Private addresses do not handle vectors. Limit to i32 + if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) { + return (MemVT.getSizeInBits() <= 32); + } + return true; +} + bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.h index 9700ce14c6f..d6a0876a6ee 100644 --- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.h @@ -44,6 +44,8 @@ public: EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override; + bool canMergeStoresTo(unsigned AS, EVT MemVT) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override; diff --git a/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.td index cc667d985a8..3c1e8527284 100644 --- a/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/R600RegisterInfo.td @@ -226,7 +226,7 @@ def R600_Reg32 : RegisterClass <"AMDGPU", [f32, i32], 32, (add R600_Addr, R600_KC0, R600_KC1, ZERO, HALF, ONE, ONE_INT, PV_X, ALU_LITERAL_X, NEG_ONE, NEG_HALF, - ALU_CONST, ALU_PARAM, OQAP + ALU_CONST, ALU_PARAM, OQAP, INDIRECT_BASE_ADDR )>; def R600_Predicate : RegisterClass <"AMDGPU", [i32], 32, (add diff --git a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h index a01330cb917..80967edee0a 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h @@ -118,6 +118,10 @@ namespace AMDGPU { // Operand for source modifiers for VOP instructions OPERAND_INPUT_MODS, + // Operand for GFX9 SDWA instructions + OPERAND_SDWA9_SRC, + OPERAND_SDWA9_VOPC_DST, + /// Operand with 32-bit immediate that uses the constant bus. OPERAND_KIMM32, OPERAND_KIMM16 @@ -160,7 +164,8 @@ namespace AMDGPUAsmVariants { DEFAULT = 0, VOP3 = 1, SDWA = 2, - DPP = 3 + SDWA9 = 3, + DPP = 4 }; } @@ -294,6 +299,18 @@ enum DstUnused { UNUSED_PRESERVE = 2, }; +enum SDWA9EncValues{ + SRC_SGPR_MASK = 0x100, + SRC_VGPR_MASK = 0xFF, + VOPC_DST_VCC_MASK = 0x80, + VOPC_DST_SGPR_MASK = 0x7F, + + SRC_VGPR_MIN = 0, + SRC_VGPR_MAX = 255, + SRC_SGPR_MIN = 256, + SRC_SGPR_MAX = 357, +}; + } // namespace SDWA } // namespace AMDGPU diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 01c1f78e7ca..76c2644867a 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -698,6 +698,18 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, } } +bool SITargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT) const { + if (AS == AMDGPUASI.GLOBAL_ADDRESS || AS == AMDGPUASI.FLAT_ADDRESS) { + return (MemVT.getSizeInBits() <= 4 * 32); + } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { + unsigned MaxPrivateBits = 8 * getSubtarget()->getMaxPrivateElementSize(); + return (MemVT.getSizeInBits() <= MaxPrivateBits); + } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { + return (MemVT.getSizeInBits() <= 2 * 32); + } + return true; +} + bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, unsigned Align, @@ -4229,12 +4241,40 @@ SDValue SITargetLowering::performAndCombine(SDNode *N, SDValue RHS = N->getOperand(1); - if (VT == MVT::i64) { - const ConstantSDNode *CRHS = dyn_cast(RHS); - if (CRHS) { - if (SDValue Split - = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::AND, LHS, CRHS)) - return Split; + const ConstantSDNode *CRHS = dyn_cast(RHS); + if (VT == MVT::i64 && CRHS) { + if (SDValue Split + = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::AND, LHS, CRHS)) + return Split; + } + + if (CRHS && VT == MVT::i32) { + // and (srl x, c), mask => shl (bfe x, nb + c, mask >> nb), nb + // nb = number of trailing zeroes in mask + // It can be optimized out using SDWA for GFX8+ in the SDWA peephole pass, + // given that we are selecting 8 or 16 bit fields starting at byte boundary. + uint64_t Mask = CRHS->getZExtValue(); + unsigned Bits = countPopulation(Mask); + if (getSubtarget()->hasSDWA() && LHS->getOpcode() == ISD::SRL && + (Bits == 8 || Bits == 16) && isShiftedMask_64(Mask) && !(Mask & 1)) { + if (auto *CShift = dyn_cast(LHS->getOperand(1))) { + unsigned Shift = CShift->getZExtValue(); + unsigned NB = CRHS->getAPIntValue().countTrailingZeros(); + unsigned Offset = NB + Shift; + if ((Offset & (Bits - 1)) == 0) { // Starts at a byte or word boundary. + SDLoc SL(N); + SDValue BFE = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32, + LHS->getOperand(0), + DAG.getConstant(Offset, SL, MVT::i32), + DAG.getConstant(Bits, SL, MVT::i32)); + EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(), Bits); + SDValue Ext = DAG.getNode(ISD::AssertZext, SL, VT, BFE, + DAG.getValueType(NarrowVT)); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(LHS), VT, Ext, + DAG.getConstant(NB, SDLoc(CRHS), MVT::i32)); + return Shl; + } + } } } diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h index e6883774749..8e2ec40b224 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -150,6 +150,8 @@ public: bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; + bool canMergeStoresTo(unsigned AS, EVT MemVT) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override; diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 38a16b525a7..36d29b8ecf0 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2331,6 +2331,10 @@ static bool isSubRegOf(const SIRegisterInfo &TRI, bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, StringRef &ErrInfo) const { uint16_t Opcode = MI.getOpcode(); + + if (SIInstrInfo::isGenericOpcode(MI.getOpcode())) + return true; + const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 7b052844f17..c5287c7f64b 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -439,6 +439,27 @@ def ExpSrc3 : RegisterOperand { let ParserMatchClass = VReg32OrOffClass; } +class SDWA9Src : RegisterOperand { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_SDWA9_SRC"; + let EncoderMethod = "getSDWA9SrcEncoding"; +} + +def SDWA9Src32 : SDWA9Src { + let DecoderMethod = "decodeSDWA9Src32"; +} + +def SDWA9Src16 : SDWA9Src { + let DecoderMethod = "decodeSDWA9Src16"; +} + +def SDWA9VopcDst : VOPDstOperand { + let OperandNamespace = "AMDGPU"; + let OperandType = "OPERAND_SDWA9_VOPC_DST"; + let EncoderMethod = "getSDWA9VopcDstEncoding"; + let DecoderMethod = "decodeSDWA9VopcDst"; +} + class NamedMatchClass : AsmOperandClass { let Name = "Imm"#CName; let PredicateMethod = "is"#CName; @@ -588,6 +609,16 @@ class IntInputMods : InputMods def Int32InputMods : IntInputMods; def Int64InputMods : IntInputMods; +def FPRegInputModsMatchClass : AsmOperandClass { + let Name = "RegWithFPInputMods"; + let ParserMethod = "parseRegWithFPInputMods"; + let PredicateMethod = "isRegKind"; +} + +def FPRegInputMods : InputMods { + let PrintMethod = "printOperandAndFPInputMods"; +} + def FPVRegInputModsMatchClass : AsmOperandClass { let Name = "VRegWithFPInputMods"; let ParserMethod = "parseRegWithFPInputMods"; @@ -598,6 +629,17 @@ def FPVRegInputMods : InputMods { let PrintMethod = "printOperandAndFPInputMods"; } + +def IntRegInputModsMatchClass : AsmOperandClass { + let Name = "RegWithIntInputMods"; + let ParserMethod = "parseRegWithIntInputMods"; + let PredicateMethod = "isRegKind"; +} + +def IntRegInputMods : InputMods { + let PrintMethod = "printOperandAndIntInputMods"; +} + def IntVRegInputModsMatchClass : AsmOperandClass { let Name = "VRegWithIntInputMods"; let ParserMethod = "parseRegWithIntInputMods"; @@ -783,6 +825,14 @@ class getVALUDstForVT { VOPDstOperand)))); // else VT == i1 } +// Returns the register class to use for the destination of VOP[12C] +// instructions with GFX9 SDWA extension +class getSDWA9DstForVT { + RegisterOperand ret = !if(!eq(VT.Size, 1), + SDWA9VopcDst, // VOPC + VOPDstOperand); // VOP1/2 32-bit dst +} + // Returns the register class to use for source 0 of VOP[12C] // instructions for the given VT. class getVOPSrc0ForVT { @@ -823,6 +873,9 @@ class getVregSrcForVT { !if(!eq(VT.Size, 64), VReg_64, VGPR_32)); } +class getSDWA9SrcForVT { + RegisterOperand ret = !if(!eq(VT.Size, 16), SDWA9Src16, SDWA9Src32); +} // Returns the register class to use for sources of VOP3 instructions for the // given VT. @@ -926,6 +979,15 @@ class getSrcModExt { Operand ret = !if(isFP, FPVRegInputMods, IntVRegInputMods); } +// Return type of input modifiers operand specified input operand for SDWA 9 +class getSrcModSDWA9 { + bit isFP = !if(!eq(VT.Value, f16.Value), 1, + !if(!eq(VT.Value, f32.Value), 1, + !if(!eq(VT.Value, f64.Value), 1, + 0))); + Operand ret = !if(isFP, FPRegInputMods, IntRegInputMods); +} + // Returns the input arguments for VOP[12C] instructions for the given SrcVT. class getIns32 { dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0), // VOP1 @@ -1062,6 +1124,7 @@ class getInsSDWA { + + dag ret = !if(!eq(NumSrcArgs, 0), + // VOP1 without input operands (V_NOP) + (ins), + !if(!eq(NumSrcArgs, 1), + // VOP1 + !if(!eq(HasSDWAOMod, 0), + // VOP1_SDWA9 without omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + clampmod:$clamp, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel), + // VOP1_SDWA9 with omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel)), + !if(!eq(NumSrcArgs, 2), + !if(!eq(DstVT.Size, 1), + // VOPC_SDWA9 + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + src0_sel:$src0_sel, src1_sel:$src1_sel), + // VOP2_SDWA9 + !if(!eq(HasSDWAOMod, 0), + // VOP2_SDWA9 without omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + clampmod:$clamp, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel), + // VOP1_SDWA9 with omod + (ins Src0Mod:$src0_modifiers, Src0RC:$src0, + Src1Mod:$src1_modifiers, Src1RC:$src1, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel))), + (ins)/* endif */))); +} + // Outs for DPP and SDWA -class getOutsExt { +class getOutsExt { dag ret = !if(HasDst, !if(!eq(DstVT.Size, 1), (outs), // no dst for VOPC, we use "vcc"-token as dst in SDWA VOPC instructions - (outs DstRCDPP:$vdst)), + (outs DstRCExt:$vdst)), + (outs)); // V_NOP +} + +// Outs for GFX9 SDWA +class getOutsSDWA9 { + dag ret = !if(HasDst, + !if(!eq(DstVT.Size, 1), + (outs DstRCSDWA9:$sdst), + (outs DstRCSDWA9:$vdst)), (outs)); // V_NOP } @@ -1153,8 +1269,7 @@ class getAsmDPP { +class getAsmSDWA { string dst = !if(HasDst, !if(!eq(DstVT.Size, 1), " vcc", // use vcc token as dst for VOPC instructioins @@ -1182,6 +1297,35 @@ class getAsmSDWA { + string dst = !if(HasDst, + !if(!eq(DstVT.Size, 1), + "$sdst", // VOPC + "$vdst"), // VOP1/2 + ""); + string src0 = "$src0_modifiers"; + string src1 = "$src1_modifiers"; + string out_mods = !if(!eq(HasOMod, 0), "$clamp", "$clamp$omod"); + string args = !if(!eq(NumSrcArgs, 0), "", + !if(!eq(NumSrcArgs, 1), + ", "#src0, + ", "#src0#", "#src1 + ) + ); + string sdwa = !if(!eq(NumSrcArgs, 0), "", + !if(!eq(NumSrcArgs, 1), + out_mods#" $dst_sel $dst_unused $src0_sel", + !if(!eq(DstVT.Size, 1), + " $src0_sel $src1_sel", // No dst_sel, dst_unused and output modifiers for VOPC + out_mods#" $dst_sel $dst_unused $src0_sel $src1_sel" + ) + ) + ); + string ret = dst#args#sdwa; +} + + // Function that checks if instruction supports DPP and SDWA class getHasExt { @@ -1219,6 +1363,7 @@ class VOPProfile _ArgVT> { field RegisterOperand DstRC = getVALUDstForVT.ret; field RegisterOperand DstRCDPP = getVALUDstForVT.ret; field RegisterOperand DstRCSDWA = getVALUDstForVT.ret; + field RegisterOperand DstRCSDWA9 = getSDWA9DstForVT.ret; field RegisterOperand Src0RC32 = getVOPSrc0ForVT.ret; field RegisterClass Src1RC32 = getVregSrcForVT.ret; field RegisterOperand Src0RC64 = getVOP3SrcForVT.ret; @@ -1228,6 +1373,8 @@ class VOPProfile _ArgVT> { field RegisterClass Src1DPP = getVregSrcForVT.ret; field RegisterClass Src0SDWA = getVregSrcForVT.ret; field RegisterClass Src1SDWA = getVregSrcForVT.ret; + field RegisterOperand Src0SDWA9 = getSDWA9SrcForVT.ret; + field RegisterOperand Src1SDWA9 = getSDWA9SrcForVT.ret; field Operand Src0Mod = getSrcMod.ret; field Operand Src1Mod = getSrcMod.ret; field Operand Src2Mod = getSrcMod.ret; @@ -1235,6 +1382,8 @@ class VOPProfile _ArgVT> { field Operand Src1ModDPP = getSrcModExt.ret; field Operand Src0ModSDWA = getSrcModExt.ret; field Operand Src1ModSDWA = getSrcModExt.ret; + field Operand Src0ModSDWA9 = getSrcModSDWA9.ret; + field Operand Src1ModSDWA9 = getSrcModSDWA9.ret; field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1); @@ -1261,14 +1410,16 @@ class VOPProfile _ArgVT> { field bit HasSrc2Mods = !if(HasModifiers, BitOr.ret, 0); field bit HasClamp = HasModifiers; - field bit HasSDWAClamp = HasSrc0; + field bit HasSDWAClamp = EmitDst; field bit HasFPClamp = BitAnd.ret, HasClamp>.ret; field bit IsPacked = isPackedType.ret; field bit HasOpSel = IsPacked; field bit HasOMod = !if(HasOpSel, 0, HasModifiers); + field bit HasSDWAOMod = isFloatType.ret; field bit HasExt = getHasExt.ret; + field bit HasSDWA9 = HasExt; field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods); field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods); @@ -1282,6 +1433,7 @@ class VOPProfile _ArgVT> { field dag Outs64 = Outs; field dag OutsDPP = getOutsExt.ret; field dag OutsSDWA = getOutsExt.ret; + field dag OutsSDWA9 = getOutsSDWA9.ret; field dag Ins32 = getIns32.ret; field dag Ins64 = getIns64 _ArgVT> { field dag InsSDWA = getInsSDWA.ret; + field dag InsSDWA9 = getInsSDWA9.ret; field string Asm32 = getAsm32.ret; field string Asm64 = getAsm64.ret; field string AsmVOP3P = getAsmVOP3P.ret; field string AsmDPP = getAsmDPP.ret; - field string AsmSDWA = getAsmSDWA.ret; + field string AsmSDWA = getAsmSDWA.ret; + field string AsmSDWA9 = getAsmSDWA9.ret; } class VOP_NO_EXT : VOPProfile { let HasExt = 0; + let HasSDWA9 = 0; } def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>; @@ -1446,6 +1603,15 @@ def getSDWAOp : InstrMapping { let ValueCols = [["SDWA"]]; } +// Maps ordinary instructions to their SDWA GFX9 counterparts +def getSDWA9Op : InstrMapping { + let FilterClass = "VOP"; + let RowFields = ["OpName"]; + let ColFields = ["AsmVariantName"]; + let KeyCol = ["Default"]; + let ValueCols = [["SDWA9"]]; +} + def getMaskedMIMGOp : InstrMapping { let FilterClass = "MIMG_Mask"; let RowFields = ["Op"]; diff --git a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td index f2d8b6f7b7a..ec29a66c8bb 100644 --- a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -184,7 +184,9 @@ def S_BITSET0_B32 : SOP1_32 <"s_bitset0_b32">; def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64">; def S_BITSET1_B32 : SOP1_32 <"s_bitset1_b32">; def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64">; -def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64">; +def S_GETPC_B64 : SOP1_64_0 <"s_getpc_b64", + [(set i64:$sdst, (int_amdgcn_s_getpc))] +>; let isTerminator = 1, isBarrier = 1, SchedRW = [WriteBranch] in { diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 2abd4afad3b..630f469eabf 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -544,6 +544,17 @@ bool isVI(const MCSubtargetInfo &STI) { return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; } +bool isGFX9(const MCSubtargetInfo &STI) { + return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; +} + +bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { + const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); + const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); + return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || + Reg == AMDGPU::SCC; +} + unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { switch(Reg) { diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 8e74aa2cc9a..19888ad7556 100644 --- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -273,6 +273,10 @@ inline bool isKernel(CallingConv::ID CC) { bool isSI(const MCSubtargetInfo &STI); bool isCI(const MCSubtargetInfo &STI); bool isVI(const MCSubtargetInfo &STI); +bool isGFX9(const MCSubtargetInfo &STI); + +/// \brief Is Reg - scalar register +bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI); /// If \p Reg is a pseudo reg, return the correct hardware register given /// \p STI otherwise return \p Reg. diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 1febc6bf8ec..95b5ef0a49d 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -30,6 +30,15 @@ class VOP1_SDWAe op, VOPProfile P> : VOP_SDWAe

{ let Inst{31-25} = 0x3f; // encoding } +class VOP1_SDWA9Ae op, VOPProfile P> : VOP_SDWA9Ae

{ + bits<8> vdst; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = op; + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{31-25} = 0x3f; // encoding +} + class VOP1_Pseudo pattern=[], bit VOP1Only = 0> : InstSI , VOP , @@ -84,6 +93,11 @@ class VOP1_SDWA_Pseudo pattern=[]> : let AsmMatchConverter = "cvtSdwaVOP1"; } +class VOP1_SDWA9_Pseudo pattern=[]> : + VOP_SDWA9_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP1"; +} + class getVOP1Pat64 : LetDummies { list ret = !if(P.HasModifiers, @@ -103,6 +117,7 @@ multiclass VOP1Inst ; def _e64 : VOP3_Pseudo .ret>; def _sdwa : VOP1_SDWA_Pseudo ; + def _sdwa9 : VOP1_SDWA9_Pseudo ; } // Special profile for instructions which have clamp @@ -243,6 +258,7 @@ def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> { let Src0RC64 = VRegSrc_32; let HasExt = 0; + let HasSDWA9 = 0; } // Special case because there are no true output operands. Hack vdst @@ -258,16 +274,21 @@ def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> { let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0); let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); - let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, VCSrc_b32:$src0, + let InsSDWA = (ins Src0RC32:$vdst, Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel); + let InsSDWA9 = (ins Src0RC32:$vdst, Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, + clampmod:$clamp, omod:$omod, dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel); let Asm32 = getAsm32<1, 1>.ret; let Asm64 = getAsm64<1, 1, 0, 1>.ret; let AsmDPP = getAsmDPP<1, 1, 0>.ret; - let AsmSDWA = getAsmSDWA<1, 1, 0>.ret; + let AsmSDWA = getAsmSDWA<1, 1>.ret; + let AsmSDWA9 = getAsmSDWA9<1, 0, 1>.ret; let HasExt = 0; + let HasSDWA9 = 0; let HasDst = 0; let EmitDst = 1; // force vdst emission } @@ -324,7 +345,7 @@ defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>; } // End SubtargetPredicate = isCIVI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP1_F16_I16, uint_to_fp>; defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP1_F16_I16, sint_to_fp>; @@ -347,7 +368,7 @@ defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>; } -let Predicates = [isVI] in { +let Predicates = [Has16BitInsts] in { def : Pat< (f32 (f16_to_fp i16:$src)), @@ -523,6 +544,10 @@ multiclass VOP1_Real_vi op> { VOP_SDWA_Real (NAME#"_sdwa")>, VOP1_SDWAe (NAME#"_sdwa").Pfl>; + def _sdwa_gfx9 : + VOP_SDWA9_Real (NAME#"_sdwa9")>, + VOP1_SDWA9Ae (NAME#"_sdwa9").Pfl>; + // For now left dpp only for asm/dasm // TODO: add corresponding pseudo def _dpp : VOP1_DPP(NAME#"_e32")>; diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 4a11d9471f1..657cacaa792 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -48,6 +48,18 @@ class VOP2_SDWAe op, VOPProfile P> : VOP_SDWAe

{ let Inst{31} = 0x0; // encoding } +class VOP2_SDWA9Ae op, VOPProfile P> : VOP_SDWA9Ae

{ + bits<8> vdst; + bits<9> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0); + let Inst{30-25} = op; + let Inst{31} = 0x0; // encoding + let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr +} + class VOP2_Pseudo pattern=[], string suffix = "_e32"> : InstSI , VOP , @@ -102,6 +114,11 @@ class VOP2_SDWA_Pseudo pattern=[]> : let AsmMatchConverter = "cvtSdwaVOP2"; } +class VOP2_SDWA9_Pseudo pattern=[]> : + VOP_SDWA9_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2"; +} + class getVOP2Pat64 : LetDummies { list ret = !if(P.HasModifiers, [(set P.DstVT:$vdst, @@ -121,10 +138,10 @@ multiclass VOP2Inst .ret>, Commutable_REV; - def _sdwa : VOP2_SDWA_Pseudo ; + def _sdwa : VOP2_SDWA_Pseudo ; + def _sdwa9 : VOP2_SDWA9_Pseudo ; } -// TODO: add SDWA pseudo instructions for VOP2bInst and VOP2eInst multiclass VOP2bInst , Commutable_REV; - def _sdwa : VOP2_SDWA_Pseudo ; + def _sdwa : VOP2_SDWA_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2b"; + } + + def _sdwa9 : VOP2_SDWA9_Pseudo { + let AsmMatchConverter = "cvtSdwaVOP2b"; + } } def _e64 : VOP3_Pseudo .ret>, @@ -203,13 +226,21 @@ class VOP_MAC : VOPProfile <[vt, vt, vt, vt]> { VGPR_32:$src2, // stub argument clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); + let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, + Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, + VGPR_32:$src2, // stub argument + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel); let Asm32 = getAsm32<1, 2, vt>.ret; let Asm64 = getAsm64<1, 2, HasModifiers, HasOMod, vt>.ret; let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret; - let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, vt>.ret; + let AsmSDWA = getAsmSDWA<1, 2, vt>.ret; + let AsmSDWA9 = getAsmSDWA9<1, 1, 2, vt>.ret; let HasSrc2 = 0; let HasSrc2Mods = 0; let HasExt = 1; + let HasSDWA9 = 0; } def VOP_MAC_F16 : VOP_MAC { @@ -229,6 +260,7 @@ def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> { let Asm32 = "$vdst, vcc, $src0, $src1"; let Asm64 = "$vdst, $sdst, $src0, $src1"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; + let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); @@ -246,6 +278,7 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { let Asm32 = "$vdst, vcc, $src0, $src1, vcc"; let Asm64 = "$vdst, $sdst, $src0, $src1, $src2"; let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; + let AsmSDWA9 = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel"; let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl"; let Outs32 = (outs DstRC:$vdst); let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst); @@ -254,16 +287,23 @@ def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> { // implicit VCC use. let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1); - let InsSDWA = (ins Src0Mod:$src0_modifiers, Src0SDWA:$src0, - Src1Mod:$src1_modifiers, Src1SDWA:$src1, + let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, + Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused, src0_sel:$src0_sel, src1_sel:$src1_sel); + let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, + Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, + clampmod:$clamp, omod:$omod, + dst_sel:$dst_sel, dst_unused:$dst_unused, + src0_sel:$src0_sel, src1_sel:$src1_sel); + let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0, Src1Mod:$src1_modifiers, Src1DPP:$src1, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask, bank_mask:$bank_mask, bound_ctrl:$bound_ctrl); let HasExt = 1; + let HasSDWA9 = 1; } // Read in from vcc or arbitrary SGPR @@ -387,7 +427,7 @@ defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>; } // End let SubtargetPredicate = SICI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16, [], "">; defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>; @@ -418,7 +458,7 @@ defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>; } } // End isCommutable = 1 -} // End SubtargetPredicate = isVI +} // End SubtargetPredicate = Has16BitInsts // Note: 16-bit instructions produce a 0 result in the high 16-bits. multiclass Arithmetic_i16_Pats { @@ -468,7 +508,7 @@ class ZExt_i16_i1_Pat : Pat < (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src) >; -let Predicates = [isVI] in { +let Predicates = [Has16BitInsts] in { defm : Arithmetic_i16_Pats; defm : Arithmetic_i16_Pats; @@ -513,7 +553,7 @@ def : Pat< (V_SUB_U16_e64 $src0, NegSubInlineConst16:$src1) >; -} // End Predicates = [isVI] +} // End Predicates = [Has16BitInsts] //===----------------------------------------------------------------------===// // SI @@ -686,15 +726,21 @@ multiclass VOP2_SDWA_Real op> { VOP2_SDWAe (NAME#"_sdwa").Pfl>; } +multiclass VOP2_SDWA9_Real op> { + def _sdwa_gfx9 : + VOP_SDWA9_Real (NAME#"_sdwa9")>, + VOP2_SDWA9Ae (NAME#"_sdwa9").Pfl>; +} + multiclass VOP2be_Real_e32e64_vi op> : - Base_VOP2be_Real_e32e64_vi, VOP2_SDWA_Real { + Base_VOP2be_Real_e32e64_vi, VOP2_SDWA_Real, VOP2_SDWA9_Real { // For now left dpp only for asm/dasm // TODO: add corresponding pseudo def _dpp : VOP2_DPP(NAME#"_e32")>; } multiclass VOP2_Real_e32e64_vi op> : - Base_VOP2_Real_e32e64_vi, VOP2_SDWA_Real { + Base_VOP2_Real_e32e64_vi, VOP2_SDWA_Real, VOP2_SDWA9_Real { // For now left dpp only for asm/dasm // TODO: add corresponding pseudo def _dpp : VOP2_DPP(NAME#"_e32")>; diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td index c0b5069948f..001fc960b22 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -243,7 +243,7 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>; } // End SubtargetPredicate = isCIVI -let SubtargetPredicate = isVI in { +let SubtargetPredicate = Has16BitInsts in { let isCommutable = 1 in { @@ -258,12 +258,13 @@ def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile>; def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile>; } // End isCommutable = 1 +} // End SubtargetPredicate = Has16BitInsts +let SubtargetPredicate = isVI in { def V_PERM_B32 : VOP3Inst <"v_perm_b32", VOP3_Profile>; - } // End SubtargetPredicate = isVI -let Predicates = [isVI] in { +let Predicates = [Has16BitInsts] in { multiclass Ternary_i16_Pats { @@ -288,7 +289,7 @@ def : Pat< defm: Ternary_i16_Pats; defm: Ternary_i16_Pats; -} // End Predicates = [isVI] +} // End Predicates = [Has16BitInsts] let SubtargetPredicate = isGFX9 in { def V_PACK_B32_F16 : VOP3Inst <"v_pack_b32_f16", VOP3_Profile>; diff --git a/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td index a3550a63677..cd347b86d30 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -34,6 +34,17 @@ class VOPC_SDWAe op, VOPProfile P> : VOP_SDWAe

{ let Inst{44-43} = SDWA.UNUSED_PRESERVE; } +class VOPC_SDWA9e op, VOPProfile P> : VOP_SDWA9Be

{ + bits<9> src1; + + let Inst{8-0} = 0xf9; // sdwa + let Inst{16-9} = !if(P.HasSrc1, src1{7-0}, 0); + let Inst{24-17} = op; + let Inst{31-25} = 0x3e; // encoding + let Inst{63} = !if(P.HasSrc1, src1{8}, 0); // src1_sgpr +} + + //===----------------------------------------------------------------------===// // VOPC classes //===----------------------------------------------------------------------===// @@ -102,6 +113,11 @@ class VOPC_SDWA_Pseudo pattern=[]> : let AsmMatchConverter = "cvtSdwaVOPC"; } +class VOPC_SDWA9_Pseudo pattern=[]> : + VOP_SDWA9_Pseudo { + let AsmMatchConverter = "cvtSdwaVOPC"; +} + // This class is used only with VOPC instructions. Use $sdst for out operand class VOPCInstAlias : InstAlias , PredicateControl { @@ -173,6 +189,13 @@ multiclass VOPC_Pseudos { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let SchedRW = P.Schedule; + let isConvergent = DefExec; + let isCompare = 1; + } } def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>; @@ -520,7 +543,11 @@ class VOPC_Class_Profile sched, ValueType vt> : let InsSDWA = (ins Src0ModSDWA:$src0_modifiers, Src0SDWA:$src0, Src1ModSDWA:$src1_modifiers, Src1SDWA:$src1, clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel); + let InsSDWA9 = (ins Src0ModSDWA9:$src0_modifiers, Src0SDWA9:$src0, + Src1ModSDWA9:$src1_modifiers, Src1SDWA9:$src1, + src0_sel:$src0_sel, src1_sel:$src1_sel); let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel"; + //let AsmSDWA9 = " $sdst, $src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let HasSrc1Mods = 0; let HasClamp = 0; let HasOMod = 0; @@ -553,6 +580,12 @@ multiclass VOPC_Class_Pseudos { let SchedRW = p.Schedule; let isConvergent = DefExec; } + + def _sdwa9 : VOPC_SDWA9_Pseudo { + let Defs = !if(DefExec, [VCC, EXEC], [VCC]); + let SchedRW = p.Schedule; + let isConvergent = DefExec; + } } def VOPC_I1_F16_I32 : VOPC_Class_Profile<[Write32Bit], f16>; @@ -920,6 +953,10 @@ multiclass VOPC_Real_vi op> { VOP_SDWA_Real (NAME#"_sdwa")>, VOPC_SDWAe (NAME#"_sdwa").Pfl>; + def _sdwa_gfx9 : + VOP_SDWA9_Real (NAME#"_sdwa9")>, + VOPC_SDWA9e (NAME#"_sdwa9").Pfl>; + def : VOPCInstAlias (NAME#"_e64"), !cast(NAME#"_e32_vi")> { let AssemblerPredicate = isVI; diff --git a/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td index 69906c419db..4da654f84f9 100644 --- a/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -293,11 +293,52 @@ class VOP_SDWAe : Enc64 { let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA.UNUSED_PRESERVE); let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0); let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA.DWORD); - let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0); + let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA.DWORD); - let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0); + let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); +} + +// gfx9 SDWA basic encoding +class VOP_SDWA9e : Enc64 { + bits<9> src0; // {src0_sgpr{0}, src0{7-0}} + bits<3> src0_sel; + bits<2> src0_modifiers; // float: {abs,neg}, int {sext} + bits<3> src1_sel; + bits<2> src1_modifiers; + bits<1> src1_sgpr; + + let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0); + let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA.DWORD); + let Inst{51} = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0); + let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0); + let Inst{55} = !if(P.HasSrc0, src0{8}, 0); + let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA.DWORD); + let Inst{59} = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0); + let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0); + let Inst{63} = 0; // src1_sgpr - should be specified in subclass +} + +// gfx9 SDWA-A +class VOP_SDWA9Ae : VOP_SDWA9e

{ + bits<3> dst_sel; + bits<2> dst_unused; + bits<1> clamp; + bits<2> omod; + + let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA.DWORD); + let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA.UNUSED_PRESERVE); + let Inst{45} = !if(P.HasSDWAClamp, clamp{0}, 0); + let Inst{47-46} = !if(P.HasSDWAOMod, omod{1-0}, 0); +} + +// gfx9 SDWA-B +class VOP_SDWA9Be : VOP_SDWA9e

{ + bits<8> sdst; // {vcc_sdst{0}, sdst{6-0}} + + let Inst{46-40} = !if(P.EmitDst, sdst{6-0}, 0); + let Inst{47} = !if(P.EmitDst, sdst{7}, 0); } class VOP_SDWA_Pseudo pattern=[]> : @@ -331,6 +372,50 @@ class VOP_SDWA_Pseudo pattern=[]> : VOPProfile Pfl = P; } +// GFX9 adds two features to SDWA: +// 1. Add 3 fields to the SDWA microcode word: S0, S1 and OMOD. +// a. S0 and S1 indicate that source 0 and 1 respectively are SGPRs rather +// than VGPRs (at most 1 can be an SGPR); +// b. OMOD is the standard output modifier (result *2, *4, /2) +// 2. Add a new version of the SDWA microcode word for VOPC: SDWAB. This +// replaces OMOD and the dest fields with SD and SDST (SGPR destination) +// field. +// a. When SD=1, the SDST is used as the destination for the compare result; +// b.when SD=0, VCC is used. +// +// In GFX9, V_MAC_F16, V_MAC_F32 opcodes cannot be used with SDWA + +class VOP_SDWA9_Pseudo pattern=[]> : + InstSI , + VOP , + SIMCInstr , + MnemonicAlias { + + let isPseudo = 1; + let isCodeGenOnly = 1; + let UseNamedOperandTable = 1; + + string Mnemonic = opName; + string AsmOperands = P.AsmSDWA9; + + let Size = 8; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + + let VALU = 1; + let SDWA = 1; + let Uses = [EXEC]; + + let SubtargetPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst); + let AssemblerPredicate = !if(P.HasSDWA9, HasSDWA9, DisableInst); + let AsmVariantName = !if(P.HasSDWA9, AMDGPUAsmVariants.SDWA9, + AMDGPUAsmVariants.Disable); + let DecoderNamespace = "SDWA9"; + + VOPProfile Pfl = P; +} + class VOP_SDWA_Real : InstSI , SIMCInstr { @@ -358,6 +443,33 @@ class VOP_SDWA_Real : let TSFlags = ps.TSFlags; } +class VOP_SDWA9_Real : + InstSI , + SIMCInstr { + + let isPseudo = 0; + let isCodeGenOnly = 0; + + let Defs = ps.Defs; + let Uses = ps.Uses; + let SchedRW = ps.SchedRW; + let hasSideEffects = ps.hasSideEffects; + + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + + // Copy relevant pseudo op flags + let SubtargetPredicate = ps.SubtargetPredicate; + let AssemblerPredicate = ps.AssemblerPredicate; + let AsmMatchConverter = ps.AsmMatchConverter; + let AsmVariantName = ps.AsmVariantName; + let UseNamedOperandTable = ps.UseNamedOperandTable; + let DecoderNamespace = ps.DecoderNamespace; + let Constraints = ps.Constraints; + let DisableEncoding = ps.DisableEncoding; + let TSFlags = ps.TSFlags; +} + class VOP_DPPe : Enc64 { bits<2> src0_modifiers; bits<8> src0; diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp index 46ac4d0ad93..31a2f499a9a 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp @@ -34,6 +34,9 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI) static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI, Type *T) { + if (T->isArrayTy()) + return true; + EVT VT = TLI.getValueType(DL, T, true); if (!VT.isSimple() || VT.isVector() || !(VT.isInteger() || VT.isFloatingPoint())) @@ -148,23 +151,47 @@ struct OutgoingValueHandler : public CallLowering::ValueHandler { }; } // End anonymous namespace. -void ARMCallLowering::splitToValueTypes(const ArgInfo &OrigArg, - SmallVectorImpl &SplitArgs, - const DataLayout &DL, - MachineRegisterInfo &MRI) const { +void ARMCallLowering::splitToValueTypes( + const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, + MachineFunction &MF, const SplitArgTy &PerformArgSplit) const { const ARMTargetLowering &TLI = *getTLI(); LLVMContext &Ctx = OrigArg.Ty->getContext(); + const DataLayout &DL = MF.getDataLayout(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const Function *F = MF.getFunction(); SmallVector SplitVTs; SmallVector Offsets; ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); - assert(SplitVTs.size() == 1 && "Unsupported type"); + if (SplitVTs.size() == 1) { + // Even if there is no splitting to do, we still want to replace the + // original type (e.g. pointer type -> integer). + SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), + OrigArg.Flags, OrigArg.IsFixed); + return; + } - // Even if there is no splitting to do, we still want to replace the original - // type (e.g. pointer type -> integer). - SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), - OrigArg.Flags, OrigArg.IsFixed); + unsigned FirstRegIdx = SplitArgs.size(); + for (unsigned i = 0, e = SplitVTs.size(); i != e; ++i) { + EVT SplitVT = SplitVTs[i]; + Type *SplitTy = SplitVT.getTypeForEVT(Ctx); + auto Flags = OrigArg.Flags; + bool NeedsConsecutiveRegisters = + TLI.functionArgumentNeedsConsecutiveRegisters( + SplitTy, F->getCallingConv(), F->isVarArg()); + if (NeedsConsecutiveRegisters) { + Flags.setInConsecutiveRegs(); + if (i == e - 1) + Flags.setInConsecutiveRegsLast(); + } + SplitArgs.push_back( + ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)), + SplitTy, Flags, OrigArg.IsFixed}); + } + + for (unsigned i = 0; i < Offsets.size(); ++i) + PerformArgSplit(SplitArgs[FirstRegIdx + i].Reg, Offsets[i] * 8); } /// Lower the return value for the already existing \p Ret. This assumes that @@ -187,7 +214,9 @@ bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder, SmallVector SplitVTs; ArgInfo RetInfo(VReg, Val->getType()); setArgFlags(RetInfo, AttributeList::ReturnIndex, DL, F); - splitToValueTypes(RetInfo, SplitVTs, DL, MF.getRegInfo()); + splitToValueTypes(RetInfo, SplitVTs, MF, [&](unsigned Reg, uint64_t Offset) { + MIRBuilder.buildExtract(Reg, VReg, Offset); + }); CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg()); @@ -307,6 +336,26 @@ struct IncomingValueHandler : public CallLowering::ValueHandler { return 1; } + /// Merge the values in \p SrcRegs into \p DstReg at offsets \p SrcOffsets. + /// Note that the source registers are not required to have homogeneous types, + /// so we use G_INSERT rather than G_MERGE_VALUES. + // FIXME: Use G_MERGE_VALUES if the types are homogeneous. + void mergeRegisters(unsigned DstReg, ArrayRef SrcRegs, + ArrayRef SrcOffsets) { + LLT Ty = MRI.getType(DstReg); + + unsigned Dst = MRI.createGenericVirtualRegister(Ty); + MIRBuilder.buildUndef(Dst); + + for (unsigned i = 0; i < SrcRegs.size(); ++i) { + unsigned Tmp = MRI.createGenericVirtualRegister(Ty); + MIRBuilder.buildInsert(Tmp, Dst, SrcRegs[i], SrcOffsets[i]); + Dst = Tmp; + } + + MIRBuilder.buildCopy(DstReg, Dst); + } + /// Marking a physical register as used is different between formal /// parameters, where it's a basic block live-in, and call returns, where it's /// an implicit-def of the call instruction. @@ -335,6 +384,7 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, return false; auto &MF = MIRBuilder.getMF(); + auto &MBB = MIRBuilder.getMBB(); auto DL = MF.getDataLayout(); auto &TLI = *getTLI(); @@ -350,17 +400,34 @@ bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder, CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg()); + FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(), + AssignFn); + SmallVector ArgInfos; + SmallVector SplitRegs; + SmallVector RegOffsets; unsigned Idx = 0; for (auto &Arg : F.args()) { ArgInfo AInfo(VRegs[Idx], Arg.getType()); setArgFlags(AInfo, Idx + AttributeList::FirstArgIndex, DL, F); - splitToValueTypes(AInfo, ArgInfos, DL, MF.getRegInfo()); + + SplitRegs.clear(); + RegOffsets.clear(); + + splitToValueTypes(AInfo, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { + SplitRegs.push_back(Reg); + RegOffsets.push_back(Offset); + }); + + if (!SplitRegs.empty()) + ArgHandler.mergeRegisters(VRegs[Idx], SplitRegs, RegOffsets); + Idx++; } - FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo(), - AssignFn); + if (!MBB.empty()) + MIRBuilder.setInstr(*MBB.begin()); + return handleAssignments(MIRBuilder, ArgInfos, ArgHandler); } @@ -407,7 +474,9 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (!Arg.IsFixed) return false; - splitToValueTypes(Arg, ArgInfos, DL, MRI); + splitToValueTypes(Arg, ArgInfos, MF, [&](unsigned Reg, uint64_t Offset) { + MIRBuilder.buildExtract(Reg, Arg.Reg, Offset); + }); } auto ArgAssignFn = TLI.CCAssignFnForCall(CallConv, /*IsVarArg=*/false); @@ -423,12 +492,24 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, return false; ArgInfos.clear(); - splitToValueTypes(OrigRet, ArgInfos, DL, MRI); + SmallVector RegOffsets; + SmallVector SplitRegs; + splitToValueTypes(OrigRet, ArgInfos, MF, + [&](unsigned Reg, uint64_t Offset) { + RegOffsets.push_back(Offset); + SplitRegs.push_back(Reg); + }); auto RetAssignFn = TLI.CCAssignFnForReturn(CallConv, /*IsVarArg=*/false); CallReturnHandler RetHandler(MIRBuilder, MRI, MIB, RetAssignFn); if (!handleAssignments(MIRBuilder, ArgInfos, RetHandler)) return false; + + if (!RegOffsets.empty()) { + // We have split the value and allocated each individual piece, now build + // it up again. + RetHandler.mergeRegisters(OrigRet.Reg, SplitRegs, RegOffsets); + } } // We now know the size of the stack - update the ADJCALLSTACKDOWN diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h index 6404c7a2689..f5a6872336f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h @@ -42,11 +42,14 @@ private: bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val, unsigned VReg, MachineInstrBuilder &Ret) const; + typedef std::function SplitArgTy; + /// Split an argument into one or more arguments that the CC lowering can cope /// with (e.g. replace pointers with integers). void splitToValueTypes(const ArgInfo &OrigArg, SmallVectorImpl &SplitArgs, - const DataLayout &DL, MachineRegisterInfo &MRI) const; + MachineFunction &MF, + const SplitArgTy &PerformArgSplit) const; }; } // End of namespace llvm #endif diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 78a9144bd32..90baabcdb65 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -779,7 +779,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, MachineOperand &Desired = MI.getOperand(3); MachineOperand &New = MI.getOperand(4); - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); + LivePhysRegs LiveRegs(TII->getRegisterInfo()); LiveRegs.addLiveOuts(MBB); for (auto I = std::prev(MBB.end()); I != MBBI; --I) LiveRegs.stepBackward(*I); @@ -903,7 +903,7 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, unsigned DesiredLo = TRI->getSubReg(Desired.getReg(), ARM::gsub_0); unsigned DesiredHi = TRI->getSubReg(Desired.getReg(), ARM::gsub_1); - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); + LivePhysRegs LiveRegs(TII->getRegisterInfo()); LiveRegs.addLiveOuts(MBB); for (auto I = std::prev(MBB.end()); I != MBBI; --I) LiveRegs.stepBackward(*I); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index f8b584db7b9..62e774d869d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -127,7 +127,7 @@ static cl::opt EnableConstpoolPromotion( "arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), - cl::init(true)); + cl::init(false)); // FIXME: set to true by default once PR32780 is fixed static cl::opt ConstpoolPromotionMaxSize( "arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), @@ -12147,12 +12147,6 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, } } - // Lowering to i32/i16 if the size permits. - if (Size >= 4) - return MVT::i32; - else if (Size >= 2) - return MVT::i16; - // Let the target-independent logic figure it out. return MVT::Other; } diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 875c06210ae..26da528c19e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -510,7 +510,7 @@ class InstrItineraryData; bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override; - bool canMergeStoresTo(EVT MemVT) const override { + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT) const override { // Do not merge to larger than i32. return (MemVT.getSizeInBits() <= 32); } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 51290e5a5b9..858136a8207 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -674,7 +674,7 @@ let mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 in { class VLD1D op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), (ins AddrMode:$Rn), IIC_VLD1, - "vld1", Dt, "$Vd, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -682,7 +682,7 @@ class VLD1D op7_4, string Dt, Operand AddrMode> class VLD1Q op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), (ins AddrMode:$Rn), IIC_VLD1x2, - "vld1", Dt, "$Vd, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -703,7 +703,7 @@ multiclass VLD1DWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -711,7 +711,7 @@ multiclass VLD1DWB op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -720,7 +720,7 @@ multiclass VLD1QWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -728,7 +728,7 @@ multiclass VLD1QWB op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -747,7 +747,7 @@ defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64", addrmode6align64or128>; class VLD1D3 op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), (ins AddrMode:$Rn), IIC_VLD1x3, "vld1", Dt, - "$Vd, $Rn", "", []> { + "$Vd, $Rn", "", []>, Sched<[WriteVLD3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -756,7 +756,7 @@ multiclass VLD1D3WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -764,7 +764,7 @@ multiclass VLD1D3WB op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -780,15 +780,15 @@ defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16", addrmode6align64>; defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32", addrmode6align64>; defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64", addrmode6align64>; -def VLD1d64TPseudo : VLDQQPseudo; -def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo; +def VLD1d64TPseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD1d64TPseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD3]>; +def VLD1d64TPseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD3]>; // ...with 4 registers class VLD1D4 op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), (ins AddrMode:$Rn), IIC_VLD1x4, "vld1", Dt, - "$Vd, $Rn", "", []> { + "$Vd, $Rn", "", []>, Sched<[WriteVLD4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -797,7 +797,7 @@ multiclass VLD1D4WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -805,7 +805,7 @@ multiclass VLD1D4WB op7_4, string Dt, Operand AddrMode> { def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -821,9 +821,9 @@ defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; -def VLD1d64QPseudo : VLDQQPseudo; -def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo; +def VLD1d64QPseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD1d64QPseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD1d64QPseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; // VLD2 : Vector Load (multiple 2-element structures) class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, @@ -837,22 +837,22 @@ class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, } def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2d16 : VLD2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2d32 : VLD2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2q8 : VLD2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; def VLD2q16 : VLD2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; def VLD2q32 : VLD2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; -def VLD2q8Pseudo : VLDQQPseudo; -def VLD2q16Pseudo : VLDQQPseudo; -def VLD2q32Pseudo : VLDQQPseudo; +def VLD2q8Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD2q16Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD2q32Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; // ...with address register writeback: multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, @@ -875,45 +875,45 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, } defm VLD2d8wb : VLD2WB<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2d16wb : VLD2WB<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2d32wb : VLD2WB<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2q8wb : VLD2WB<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; defm VLD2q16wb : VLD2WB<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; defm VLD2q32wb : VLD2WB<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VLD2x2u, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVLD4]>; -def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo; -def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo; -def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo; -def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo; +def VLD2q8PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD2q16PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD2q32PseudoWB_fixed : VLDQQWBfixedPseudo, Sched<[WriteVLD4]>; +def VLD2q8PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; +def VLD2q16PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; +def VLD2q32PseudoWB_register : VLDQQWBregisterPseudo, Sched<[WriteVLD4]>; // ...with double-spaced registers def VLD2b8 : VLD2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2b16 : VLD2<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; def VLD2b32 : VLD2<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2b8wb : VLD2WB<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2b16wb : VLD2WB<0b1001, {0,1,?,?}, "16", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; defm VLD2b32wb : VLD2WB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, IIC_VLD2u, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVLD2]>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), (ins addrmode6:$Rn), IIC_VLD3, - "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { + "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []>, Sched<[WriteVLD3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; @@ -923,9 +923,9 @@ def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; def VLD3d16 : VLD3D<0b0100, {0,1,0,?}, "16">; def VLD3d32 : VLD3D<0b0100, {1,0,0,?}, "32">; -def VLD3d8Pseudo : VLDQQPseudo; -def VLD3d16Pseudo : VLDQQPseudo; -def VLD3d32Pseudo : VLDQQPseudo; +def VLD3d8Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD3d16Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; +def VLD3d32Pseudo : VLDQQPseudo, Sched<[WriteVLD3]>; // ...with address register writeback: class VLD3DWB op11_8, bits<4> op7_4, string Dt> @@ -933,7 +933,7 @@ class VLD3DWB op11_8, bits<4> op7_4, string Dt> (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD3u, "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD3]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; } @@ -942,9 +942,9 @@ def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; def VLD3d16_UPD : VLD3DWB<0b0100, {0,1,0,?}, "16">; def VLD3d32_UPD : VLD3DWB<0b0100, {1,0,0,?}, "32">; -def VLD3d8Pseudo_UPD : VLDQQWBPseudo; -def VLD3d16Pseudo_UPD : VLDQQWBPseudo; -def VLD3d32Pseudo_UPD : VLDQQWBPseudo; +def VLD3d8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3d16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3d32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD3]>; // ...with double-spaced registers: def VLD3q8 : VLD3D<0b0101, {0,0,0,?}, "8">; @@ -954,25 +954,26 @@ def VLD3q8_UPD : VLD3DWB<0b0101, {0,0,0,?}, "8">; def VLD3q16_UPD : VLD3DWB<0b0101, {0,1,0,?}, "16">; def VLD3q32_UPD : VLD3DWB<0b0101, {1,0,0,?}, "32">; -def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo; -def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo; -def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo; +def VLD3q8Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q16Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q32Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; // ...alternate versions to be allocated odd register numbers: -def VLD3q8oddPseudo : VLDQQQQPseudo; -def VLD3q16oddPseudo : VLDQQQQPseudo; -def VLD3q32oddPseudo : VLDQQQQPseudo; +def VLD3q8oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; +def VLD3q16oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; +def VLD3q32oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD3]>; -def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD3q8oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q16oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; +def VLD3q32oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD3]>; // VLD4 : Vector Load (multiple 4-element structures) class VLD4D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$Rn), IIC_VLD4, - "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { + "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []>, + Sched<[WriteVLD4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; @@ -982,9 +983,9 @@ def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; def VLD4d16 : VLD4D<0b0000, {0,1,?,?}, "16">; def VLD4d32 : VLD4D<0b0000, {1,0,?,?}, "32">; -def VLD4d8Pseudo : VLDQQPseudo; -def VLD4d16Pseudo : VLDQQPseudo; -def VLD4d32Pseudo : VLDQQPseudo; +def VLD4d8Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD4d16Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; +def VLD4d32Pseudo : VLDQQPseudo, Sched<[WriteVLD4]>; // ...with address register writeback: class VLD4DWB op11_8, bits<4> op7_4, string Dt> @@ -992,7 +993,7 @@ class VLD4DWB op11_8, bits<4> op7_4, string Dt> (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD4u, "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; } @@ -1001,9 +1002,9 @@ def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; def VLD4d16_UPD : VLD4DWB<0b0000, {0,1,?,?}, "16">; def VLD4d32_UPD : VLD4DWB<0b0000, {1,0,?,?}, "32">; -def VLD4d8Pseudo_UPD : VLDQQWBPseudo; -def VLD4d16Pseudo_UPD : VLDQQWBPseudo; -def VLD4d32Pseudo_UPD : VLDQQWBPseudo; +def VLD4d8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4d16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4d32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD4]>; // ...with double-spaced registers: def VLD4q8 : VLD4D<0b0001, {0,0,?,?}, "8">; @@ -1013,18 +1014,18 @@ def VLD4q8_UPD : VLD4DWB<0b0001, {0,0,?,?}, "8">; def VLD4q16_UPD : VLD4DWB<0b0001, {0,1,?,?}, "16">; def VLD4q32_UPD : VLD4DWB<0b0001, {1,0,?,?}, "32">; -def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo; -def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo; -def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo; +def VLD4q8Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q16Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q32Pseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; // ...alternate versions to be allocated odd register numbers: -def VLD4q8oddPseudo : VLDQQQQPseudo; -def VLD4q16oddPseudo : VLDQQQQPseudo; -def VLD4q32oddPseudo : VLDQQQQPseudo; +def VLD4q8oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; +def VLD4q16oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; +def VLD4q32oddPseudo : VLDQQQQPseudo, Sched<[WriteVLD4]>; -def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo; -def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo; +def VLD4q8oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q16oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; +def VLD4q32oddPseudo_UPD : VLDQQQQWBPseudo, Sched<[WriteVLD4]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1076,11 +1077,12 @@ class VLD1LN32 op11_8, bits<4> op7_4, string Dt, ValueType Ty, "$src = $Vd", [(set DPR:$Vd, (vector_insert (Ty DPR:$src), (i32 (LoadOp addrmode6oneL32:$Rn)), - imm:$lane))]> { + imm:$lane))]>, Sched<[WriteVLD1]> { let Rm = 0b1111; let DecoderMethod = "DecodeVLD1LN"; } -class VLD1QLNPseudo : VLDQLNPseudo { +class VLD1QLNPseudo : VLDQLNPseudo, + Sched<[WriteVLD1]> { let Pattern = [(set QPR:$dst, (vector_insert (Ty QPR:$src), (i32 (LoadOp addrmode6:$addr)), imm:$lane))]; @@ -1117,7 +1119,7 @@ class VLD1LNWB op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$src, nohash_imm:$lane), IIC_VLD1lnu, "vld1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", - "$src = $Vd, $Rn.addr = $wb", []> { + "$src = $Vd, $Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let DecoderMethod = "DecodeVLD1LN"; } @@ -1134,16 +1136,16 @@ def VLD1LNd32_UPD : VLD1LNWB<0b1000, {?,0,?,?}, "32"> { let Inst{4} = Rn{4}; } -def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo; -def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo; -def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo; +def VLD1LNq8Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD1LNq16Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD1LNq32Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; // VLD2LN : Vector Load (single 2-element structure to one lane) class VLD2LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2ln, "vld2", Dt, "\\{$Vd[$lane], $dst2[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2", []> { + "$src1 = $Vd, $src2 = $dst2", []>, Sched<[WriteVLD1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2LN"; @@ -1159,9 +1161,9 @@ def VLD2LNd32 : VLD2LN<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNd8Pseudo : VLDQLNPseudo; -def VLD2LNd16Pseudo : VLDQLNPseudo; -def VLD2LNd32Pseudo : VLDQLNPseudo; +def VLD2LNd8Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; +def VLD2LNd16Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; +def VLD2LNd32Pseudo : VLDQLNPseudo, Sched<[WriteVLD1]>; // ...with double-spaced registers: def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16"> { @@ -1171,8 +1173,8 @@ def VLD2LNq32 : VLD2LN<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNq16Pseudo : VLDQQLNPseudo; -def VLD2LNq32Pseudo : VLDQQLNPseudo; +def VLD2LNq16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD1]>; +def VLD2LNq32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD1]>; // ...with address register writeback: class VLD2LNWB op11_8, bits<4> op7_4, string Dt> @@ -1195,9 +1197,9 @@ def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo; -def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo; -def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo; +def VLD2LNd8Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD2LNd16Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD2LNd32Pseudo_UPD : VLDQLNWBPseudo, Sched<[WriteVLD1]>; def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -1206,8 +1208,8 @@ def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } -def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo; -def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo; +def VLD2LNq16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD1]>; +def VLD2LNq32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD1]>; // VLD3LN : Vector Load (single 3-element structure to one lane) class VLD3LN op11_8, bits<4> op7_4, string Dt> @@ -1215,7 +1217,7 @@ class VLD3LN op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VLD3ln, "vld3", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []> { + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3", []>, Sched<[WriteVLD2]> { let Rm = 0b1111; let DecoderMethod = "DecodeVLD3LN"; } @@ -1230,9 +1232,9 @@ def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNd8Pseudo : VLDQQLNPseudo; -def VLD3LNd16Pseudo : VLDQQLNPseudo; -def VLD3LNd32Pseudo : VLDQQLNPseudo; +def VLD3LNd8Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD3LNd16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD3LNd32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; // ...with double-spaced registers: def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16"> { @@ -1242,8 +1244,8 @@ def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNq16Pseudo : VLDQQQQLNPseudo; -def VLD3LNq32Pseudo : VLDQQQQLNPseudo; +def VLD3LNq16Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; +def VLD3LNq32Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; // ...with address register writeback: class VLD3LNWB op11_8, bits<4> op7_4, string Dt> @@ -1254,7 +1256,7 @@ class VLD3LNWB op11_8, bits<4> op7_4, string Dt> IIC_VLD3lnu, "vld3", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane]\\}, $Rn$Rm", "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $Rn.addr = $wb", - []> { + []>, Sched<[WriteVLD2]> { let DecoderMethod = "DecodeVLD3LN"; } @@ -1268,9 +1270,9 @@ def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo; -def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo; -def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo; +def VLD3LNd8Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD3LNd16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD3LNd32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; @@ -1279,8 +1281,8 @@ def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } -def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo; -def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo; +def VLD3LNq16Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD3LNq32Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; // VLD4LN : Vector Load (single 4-element structure to one lane) class VLD4LN op11_8, bits<4> op7_4, string Dt> @@ -1289,7 +1291,8 @@ class VLD4LN op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VLD4ln, "vld4", Dt, "\\{$Vd[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $Rn", - "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []> { + "$src1 = $Vd, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>, + Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4LN"; @@ -1306,9 +1309,9 @@ def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNd8Pseudo : VLDQQLNPseudo; -def VLD4LNd16Pseudo : VLDQQLNPseudo; -def VLD4LNd32Pseudo : VLDQQLNPseudo; +def VLD4LNd8Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD4LNd16Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; +def VLD4LNd32Pseudo : VLDQQLNPseudo, Sched<[WriteVLD2]>; // ...with double-spaced registers: def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16"> { @@ -1319,8 +1322,8 @@ def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNq16Pseudo : VLDQQQQLNPseudo; -def VLD4LNq32Pseudo : VLDQQQQLNPseudo; +def VLD4LNq16Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; +def VLD4LNq32Pseudo : VLDQQQQLNPseudo, Sched<[WriteVLD2]>; // ...with address register writeback: class VLD4LNWB op11_8, bits<4> op7_4, string Dt> @@ -1347,9 +1350,9 @@ def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo; -def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo; -def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo; +def VLD4LNd8Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD4LNd16Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD4LNd32Pseudo_UPD : VLDQQLNWBPseudo, Sched<[WriteVLD2]>; def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -1359,8 +1362,8 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo; -def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo; +def VLD4LNq16Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; +def VLD4LNq32Pseudo_UPD : VLDQQQQLNWBPseudo, Sched<[WriteVLD2]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1371,7 +1374,8 @@ class VLD1DUP op7_4, string Dt, ValueType Ty, PatFrag LoadOp, (ins AddrMode:$Rn), IIC_VLD1dup, "vld1", Dt, "$Vd, $Rn", "", [(set VecListOneDAllLanes:$Vd, - (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]> { + (Ty (NEONvdup (i32 (LoadOp AddrMode:$Rn)))))]>, + Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1434,7 +1438,7 @@ multiclass VLD1QDUPWB op7_4, string Dt, Operand AddrMode> { (outs VecListDPairAllLanes:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD1dupu, "vld1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; @@ -1491,7 +1495,7 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy, (outs VdTy:$Vd, GPR:$wb), (ins AddrMode:$Rn), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; @@ -1500,7 +1504,7 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy, (outs VdTy:$Vd, GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm), IIC_VLD2dupu, "vld2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD1]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; } @@ -1524,7 +1528,8 @@ defm VLD2DUPd32x2wb : VLD2DUPWB<{1,0,1,?}, "32", VecListDPairSpacedAllLanes, class VLD3DUP op7_4, string Dt> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), (ins addrmode6dup:$Rn), IIC_VLD3dup, - "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []> { + "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn", "", []>, + Sched<[WriteVLD2]> { let Rm = 0b1111; let Inst{4} = 0; let DecoderMethod = "DecodeVLD3DupInstruction"; @@ -1534,9 +1539,9 @@ def VLD3DUPd8 : VLD3DUP<{0,0,0,?}, "8">; def VLD3DUPd16 : VLD3DUP<{0,1,0,?}, "16">; def VLD3DUPd32 : VLD3DUP<{1,0,0,?}, "32">; -def VLD3DUPd8Pseudo : VLDQQPseudo; -def VLD3DUPd16Pseudo : VLDQQPseudo; -def VLD3DUPd32Pseudo : VLDQQPseudo; +def VLD3DUPd8Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd16Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd32Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; // ...with double-spaced registers (not used for codegen): def VLD3DUPq8 : VLD3DUP<{0,0,1,?}, "8">; @@ -1548,7 +1553,7 @@ class VLD3DUPWB op7_4, string Dt, Operand AddrMode> : NLdSt<1, 0b10, 0b1110, op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins AddrMode:$Rn, am6offset:$Rm), IIC_VLD3dupu, "vld3", Dt, "\\{$Vd[], $dst2[], $dst3[]\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Inst{4} = 0; let DecoderMethod = "DecodeVLD3DupInstruction"; } @@ -1561,9 +1566,9 @@ def VLD3DUPq8_UPD : VLD3DUPWB<{0,0,1,0}, "8", addrmode6dupalign64>; def VLD3DUPq16_UPD : VLD3DUPWB<{0,1,1,?}, "16", addrmode6dupalign64>; def VLD3DUPq32_UPD : VLD3DUPWB<{1,0,1,?}, "32", addrmode6dupalign64>; -def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo; -def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo; -def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo; +def VLD3DUPd8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD3DUPd32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; // VLD4DUP : Vector Load (single 4-element structure to all lanes) class VLD4DUP op7_4, string Dt> @@ -1580,9 +1585,9 @@ def VLD4DUPd8 : VLD4DUP<{0,0,0,?}, "8">; def VLD4DUPd16 : VLD4DUP<{0,1,0,?}, "16">; def VLD4DUPd32 : VLD4DUP<{1,?,0,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8Pseudo : VLDQQPseudo; -def VLD4DUPd16Pseudo : VLDQQPseudo; -def VLD4DUPd32Pseudo : VLDQQPseudo; +def VLD4DUPd8Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd16Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd32Pseudo : VLDQQPseudo, Sched<[WriteVLD2]>; // ...with double-spaced registers (not used for codegen): def VLD4DUPq8 : VLD4DUP<{0,0,1,?}, "8">; @@ -1595,7 +1600,7 @@ class VLD4DUPWB op7_4, string Dt> (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6dup:$Rn, am6offset:$Rm), IIC_VLD4dupu, "vld4", Dt, "\\{$Vd[], $dst2[], $dst3[], $dst4[]\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVLD2]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD4DupInstruction"; } @@ -1608,9 +1613,9 @@ def VLD4DUPq8_UPD : VLD4DUPWB<{0,0,1,0}, "8">; def VLD4DUPq16_UPD : VLD4DUPWB<{0,1,1,?}, "16">; def VLD4DUPq32_UPD : VLD4DUPWB<{1,?,1,?}, "32"> { let Inst{6} = Rn{5}; } -def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo; -def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo; -def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo; +def VLD4DUPd8Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd16Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; +def VLD4DUPd32Pseudo_UPD : VLDQQWBPseudo, Sched<[WriteVLD2]>; } // mayLoad = 1, hasSideEffects = 0, hasExtraDefRegAllocReq = 1 @@ -1657,14 +1662,14 @@ class VSTQQQQWBPseudo // VST1 : Vector Store (multiple single elements) class VST1D op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins AddrMode:$Rn, VecListOneD:$Vd), - IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { + IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } class VST1Q op7_4, string Dt, Operand AddrMode> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins AddrMode:$Rn, VecListDPair:$Vd), - IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { + IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST2]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1685,7 +1690,7 @@ multiclass VST1DWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1694,7 +1699,7 @@ multiclass VST1DWB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListOneD:$Vd), IIC_VLD1u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST1]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1703,7 +1708,7 @@ multiclass VST1QWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1712,7 +1717,7 @@ multiclass VST1QWB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListDPair:$Vd), IIC_VLD1x2u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1732,7 +1737,7 @@ defm VST1q64wb : VST1QWB<{1,1,?,?}, "64", addrmode6align64or128>; class VST1D3 op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), (ins AddrMode:$Rn, VecListThreeD:$Vd), - IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { + IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []>, Sched<[WriteVST3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1741,7 +1746,7 @@ multiclass VST1D3WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1750,7 +1755,7 @@ multiclass VST1D3WB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListThreeD:$Vd), IIC_VLD1x3u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1766,16 +1771,16 @@ defm VST1d16Twb : VST1D3WB<{0,1,0,?}, "16", addrmode6align64>; defm VST1d32Twb : VST1D3WB<{1,0,0,?}, "32", addrmode6align64>; defm VST1d64Twb : VST1D3WB<{1,1,0,?}, "64", addrmode6align64>; -def VST1d64TPseudo : VSTQQPseudo; -def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo; -def VST1d64TPseudoWB_register : VSTQQWBPseudo; +def VST1d64TPseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST1d64TPseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST3]>; +def VST1d64TPseudoWB_register : VSTQQWBPseudo, Sched<[WriteVST3]>; // ...with 4 registers class VST1D4 op7_4, string Dt, Operand AddrMode> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VST1x4, "vst1", Dt, "$Vd, $Rn", "", - []> { + []>, Sched<[WriteVST4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1784,7 +1789,7 @@ multiclass VST1D4WB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; @@ -1793,7 +1798,7 @@ multiclass VST1D4WB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1x4u, "vst1", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST1Instruction"; } @@ -1809,9 +1814,9 @@ defm VST1d16Qwb : VST1D4WB<{0,1,?,?}, "16", addrmode6align64or128or256>; defm VST1d32Qwb : VST1D4WB<{1,0,?,?}, "32", addrmode6align64or128or256>; defm VST1d64Qwb : VST1D4WB<{1,1,?,?}, "64", addrmode6align64or128or256>; -def VST1d64QPseudo : VSTQQPseudo; -def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo; -def VST1d64QPseudoWB_register : VSTQQWBPseudo; +def VST1d64QPseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST1d64QPseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST1d64QPseudoWB_register : VSTQQWBPseudo, Sched<[WriteVST4]>; // VST2 : Vector Store (multiple 2-element structures) class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, @@ -1824,22 +1829,22 @@ class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, } def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVST2]>; def VST2d16 : VST2<0b1000, {0,1,?,?}, "16", VecListDPair, IIC_VST2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVST2]>; def VST2d32 : VST2<0b1000, {1,0,?,?}, "32", VecListDPair, IIC_VST2, - addrmode6align64or128>; + addrmode6align64or128>, Sched<[WriteVST2]>; def VST2q8 : VST2<0b0011, {0,0,?,?}, "8", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVST4]>; def VST2q16 : VST2<0b0011, {0,1,?,?}, "16", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVST4]>; def VST2q32 : VST2<0b0011, {1,0,?,?}, "32", VecListFourD, IIC_VST2x2, - addrmode6align64or128or256>; + addrmode6align64or128or256>, Sched<[WriteVST4]>; -def VST2q8Pseudo : VSTQQPseudo; -def VST2q16Pseudo : VSTQQPseudo; -def VST2q32Pseudo : VSTQQPseudo; +def VST2q8Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST2q16Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST2q32Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; // ...with address register writeback: multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, @@ -1847,7 +1852,7 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, def _fixed : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; @@ -1855,7 +1860,7 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST2]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } @@ -1864,7 +1869,7 @@ multiclass VST2QWB op7_4, string Dt, Operand AddrMode> { def _fixed : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), (ins AddrMode:$Rn, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn!", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; @@ -1873,7 +1878,7 @@ multiclass VST2QWB op7_4, string Dt, Operand AddrMode> { (ins AddrMode:$Rn, rGPR:$Rm, VecListFourD:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST2Instruction"; } @@ -1890,12 +1895,12 @@ defm VST2q8wb : VST2QWB<{0,0,?,?}, "8", addrmode6align64or128or256>; defm VST2q16wb : VST2QWB<{0,1,?,?}, "16", addrmode6align64or128or256>; defm VST2q32wb : VST2QWB<{1,0,?,?}, "32", addrmode6align64or128or256>; -def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo; -def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo; -def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo; -def VST2q8PseudoWB_register : VSTQQWBregisterPseudo; -def VST2q16PseudoWB_register : VSTQQWBregisterPseudo; -def VST2q32PseudoWB_register : VSTQQWBregisterPseudo; +def VST2q8PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST2q16PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST2q32PseudoWB_fixed : VSTQQWBfixedPseudo, Sched<[WriteVST4]>; +def VST2q8PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; +def VST2q16PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; +def VST2q32PseudoWB_register : VSTQQWBregisterPseudo, Sched<[WriteVST4]>; // ...with double-spaced registers def VST2b8 : VST2<0b1001, {0,0,?,?}, "8", VecListDPairSpaced, IIC_VST2, @@ -1915,7 +1920,7 @@ defm VST2b32wb : VST2DWB<0b1001, {1,0,?,?}, "32", VecListDPairSpaced, class VST3D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3, - "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { + "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []>, Sched<[WriteVST3]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; @@ -1925,9 +1930,9 @@ def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; def VST3d16 : VST3D<0b0100, {0,1,0,?}, "16">; def VST3d32 : VST3D<0b0100, {1,0,0,?}, "32">; -def VST3d8Pseudo : VSTQQPseudo; -def VST3d16Pseudo : VSTQQPseudo; -def VST3d32Pseudo : VSTQQPseudo; +def VST3d8Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST3d16Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; +def VST3d32Pseudo : VSTQQPseudo, Sched<[WriteVST3]>; // ...with address register writeback: class VST3DWB op11_8, bits<4> op7_4, string Dt> @@ -1935,7 +1940,7 @@ class VST3DWB op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2, DPR:$src3), IIC_VST3u, "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST3]> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDST3Instruction"; } @@ -1944,9 +1949,9 @@ def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; def VST3d16_UPD : VST3DWB<0b0100, {0,1,0,?}, "16">; def VST3d32_UPD : VST3DWB<0b0100, {1,0,0,?}, "32">; -def VST3d8Pseudo_UPD : VSTQQWBPseudo; -def VST3d16Pseudo_UPD : VSTQQWBPseudo; -def VST3d32Pseudo_UPD : VSTQQWBPseudo; +def VST3d8Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; +def VST3d16Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; +def VST3d32Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST3]>; // ...with double-spaced registers: def VST3q8 : VST3D<0b0101, {0,0,0,?}, "8">; @@ -1956,25 +1961,25 @@ def VST3q8_UPD : VST3DWB<0b0101, {0,0,0,?}, "8">; def VST3q16_UPD : VST3DWB<0b0101, {0,1,0,?}, "16">; def VST3q32_UPD : VST3DWB<0b0101, {1,0,0,?}, "32">; -def VST3q8Pseudo_UPD : VSTQQQQWBPseudo; -def VST3q16Pseudo_UPD : VSTQQQQWBPseudo; -def VST3q32Pseudo_UPD : VSTQQQQWBPseudo; +def VST3q8Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q16Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q32Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; // ...alternate versions to be allocated odd register numbers: -def VST3q8oddPseudo : VSTQQQQPseudo; -def VST3q16oddPseudo : VSTQQQQPseudo; -def VST3q32oddPseudo : VSTQQQQPseudo; +def VST3q8oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; +def VST3q16oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; +def VST3q32oddPseudo : VSTQQQQPseudo, Sched<[WriteVST3]>; -def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo; -def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo; -def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo; +def VST3q8oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q16oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; +def VST3q32oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST3]>; // VST4 : Vector Store (multiple 4-element structures) class VST4D op11_8, bits<4> op7_4, string Dt> : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn", - "", []> { + "", []>, Sched<[WriteVST4]> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; @@ -1984,9 +1989,9 @@ def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; def VST4d16 : VST4D<0b0000, {0,1,?,?}, "16">; def VST4d32 : VST4D<0b0000, {1,0,?,?}, "32">; -def VST4d8Pseudo : VSTQQPseudo; -def VST4d16Pseudo : VSTQQPseudo; -def VST4d32Pseudo : VSTQQPseudo; +def VST4d8Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST4d16Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; +def VST4d32Pseudo : VSTQQPseudo, Sched<[WriteVST4]>; // ...with address register writeback: class VST4DWB op11_8, bits<4> op7_4, string Dt> @@ -1994,7 +1999,7 @@ class VST4DWB op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST4u, "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { + "$Rn.addr = $wb", []>, Sched<[WriteVST4]> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDST4Instruction"; } @@ -2003,9 +2008,9 @@ def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; def VST4d16_UPD : VST4DWB<0b0000, {0,1,?,?}, "16">; def VST4d32_UPD : VST4DWB<0b0000, {1,0,?,?}, "32">; -def VST4d8Pseudo_UPD : VSTQQWBPseudo; -def VST4d16Pseudo_UPD : VSTQQWBPseudo; -def VST4d32Pseudo_UPD : VSTQQWBPseudo; +def VST4d8Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; +def VST4d16Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; +def VST4d32Pseudo_UPD : VSTQQWBPseudo, Sched<[WriteVST4]>; // ...with double-spaced registers: def VST4q8 : VST4D<0b0001, {0,0,?,?}, "8">; @@ -2015,18 +2020,18 @@ def VST4q8_UPD : VST4DWB<0b0001, {0,0,?,?}, "8">; def VST4q16_UPD : VST4DWB<0b0001, {0,1,?,?}, "16">; def VST4q32_UPD : VST4DWB<0b0001, {1,0,?,?}, "32">; -def VST4q8Pseudo_UPD : VSTQQQQWBPseudo; -def VST4q16Pseudo_UPD : VSTQQQQWBPseudo; -def VST4q32Pseudo_UPD : VSTQQQQWBPseudo; +def VST4q8Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q16Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q32Pseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; // ...alternate versions to be allocated odd register numbers: -def VST4q8oddPseudo : VSTQQQQPseudo; -def VST4q16oddPseudo : VSTQQQQPseudo; -def VST4q32oddPseudo : VSTQQQQPseudo; +def VST4q8oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; +def VST4q16oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; +def VST4q32oddPseudo : VSTQQQQPseudo, Sched<[WriteVST4]>; -def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo; -def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo; -def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo; +def VST4q8oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q16oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; +def VST4q32oddPseudo_UPD : VSTQQQQWBPseudo, Sched<[WriteVST4]>; } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 @@ -2060,12 +2065,13 @@ class VST1LN op11_8, bits<4> op7_4, string Dt, ValueType Ty, : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins AddrMode:$Rn, DPR:$Vd, nohash_imm:$lane), IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "", - [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]> { + [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), AddrMode:$Rn)]>, + Sched<[WriteVST1]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST1LN"; } class VST1QLNPseudo - : VSTQLNPseudo { + : VSTQLNPseudo, Sched<[WriteVST1]> { let Pattern = [(StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), addrmode6:$addr)]; } @@ -2104,11 +2110,12 @@ class VST1LNWB op11_8, bits<4> op7_4, string Dt, ValueType Ty, "\\{$Vd[$lane]\\}, $Rn$Rm", "$Rn.addr = $wb", [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), - AdrMode:$Rn, am6offset:$Rm))]> { + AdrMode:$Rn, am6offset:$Rm))]>, + Sched<[WriteVST1]> { let DecoderMethod = "DecodeVST1LN"; } class VST1QLNWBPseudo - : VSTQLNWBPseudo { + : VSTQLNWBPseudo, Sched<[WriteVST1]> { let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), addrmode6:$addr, am6offset:$offset))]; } @@ -2139,7 +2146,7 @@ class VST2LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, nohash_imm:$lane), IIC_VST2ln, "vst2", Dt, "\\{$Vd[$lane], $src2[$lane]\\}, $Rn", - "", []> { + "", []>, Sched<[WriteVST1]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST2LN"; @@ -2155,9 +2162,9 @@ def VST2LNd32 : VST2LN<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VST2LNd8Pseudo : VSTQLNPseudo; -def VST2LNd16Pseudo : VSTQLNPseudo; -def VST2LNd32Pseudo : VSTQLNPseudo; +def VST2LNd8Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; +def VST2LNd16Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; +def VST2LNd32Pseudo : VSTQLNPseudo, Sched<[WriteVST1]>; // ...with double-spaced registers: def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16"> { @@ -2169,8 +2176,8 @@ def VST2LNq32 : VST2LN<0b1001, {?,1,0,?}, "32"> { let Inst{4} = Rn{4}; } -def VST2LNq16Pseudo : VSTQQLNPseudo; -def VST2LNq32Pseudo : VSTQQLNPseudo; +def VST2LNq16Pseudo : VSTQQLNPseudo, Sched<[WriteVST1]>; +def VST2LNq32Pseudo : VSTQQLNPseudo, Sched<[WriteVST1]>; // ...with address register writeback: class VST2LNWB op11_8, bits<4> op7_4, string Dt> @@ -2193,9 +2200,9 @@ def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,0,?}, "32"> { let Inst{7} = lane{0}; } -def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo; -def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo; -def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo; +def VST2LNd8Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; +def VST2LNd16Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; +def VST2LNd32Pseudo_UPD : VSTQLNWBPseudo, Sched<[WriteVST1]>; def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -2204,15 +2211,16 @@ def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,0,?}, "32"> { let Inst{7} = lane{0}; } -def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo; -def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo; +def VST2LNq16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST1]>; +def VST2LNq32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST1]>; // VST3LN : Vector Store (single 3-element structure from one lane) class VST3LN op11_8, bits<4> op7_4, string Dt> : NLdStLn<1, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, nohash_imm:$lane), IIC_VST3ln, "vst3", Dt, - "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []> { + "\\{$Vd[$lane], $src2[$lane], $src3[$lane]\\}, $Rn", "", []>, + Sched<[WriteVST2]> { let Rm = 0b1111; let DecoderMethod = "DecodeVST3LN"; } @@ -2227,9 +2235,9 @@ def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VST3LNd8Pseudo : VSTQQLNPseudo; -def VST3LNd16Pseudo : VSTQQLNPseudo; -def VST3LNd32Pseudo : VSTQQLNPseudo; +def VST3LNd8Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST3LNd16Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST3LNd32Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; // ...with double-spaced registers: def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16"> { @@ -2263,9 +2271,9 @@ def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32"> { let Inst{7} = lane{0}; } -def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo; -def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo; -def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo; +def VST3LNd8Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST3LNd16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST3LNd32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16"> { let Inst{7-6} = lane{1-0}; @@ -2274,8 +2282,8 @@ def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32"> { let Inst{7} = lane{0}; } -def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; -def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; +def VST3LNq16Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; +def VST3LNq32Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; // VST4LN : Vector Store (single 4-element structure from one lane) class VST4LN op11_8, bits<4> op7_4, string Dt> @@ -2283,7 +2291,7 @@ class VST4LN op11_8, bits<4> op7_4, string Dt> (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), IIC_VST4ln, "vst4", Dt, "\\{$Vd[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $Rn", - "", []> { + "", []>, Sched<[WriteVST2]> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVST4LN"; @@ -2300,9 +2308,9 @@ def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNd8Pseudo : VSTQQLNPseudo; -def VST4LNd16Pseudo : VSTQQLNPseudo; -def VST4LNd32Pseudo : VSTQQLNPseudo; +def VST4LNd8Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST4LNd16Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; +def VST4LNd32Pseudo : VSTQQLNPseudo, Sched<[WriteVST2]>; // ...with double-spaced registers: def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16"> { @@ -2313,8 +2321,8 @@ def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNq16Pseudo : VSTQQQQLNPseudo; -def VST4LNq32Pseudo : VSTQQQQLNPseudo; +def VST4LNq16Pseudo : VSTQQQQLNPseudo, Sched<[WriteVST2]>; +def VST4LNq32Pseudo : VSTQQQQLNPseudo, Sched<[WriteVST2]>; // ...with address register writeback: class VST4LNWB op11_8, bits<4> op7_4, string Dt> @@ -2339,9 +2347,9 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo; -def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo; -def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo; +def VST4LNd8Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST4LNd16Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; +def VST4LNd32Pseudo_UPD : VSTQQLNWBPseudo, Sched<[WriteVST2]>; def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16"> { let Inst{7-6} = lane{1-0}; @@ -2351,8 +2359,8 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32"> { let Inst{5} = Rn{5}; } -def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo; -def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo; +def VST4LNq16Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; +def VST4LNq32Pseudo_UPD : VSTQQQQLNWBPseudo, Sched<[WriteVST2]>; } // mayStore = 1, hasSideEffects = 0, hasExtraSrcRegAllocReq = 1 diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td index 87eb4c2b907..ec5b97cba8c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMSchedule.td +++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td @@ -131,6 +131,17 @@ def WriteFPDIV64 : SchedWrite; def WriteFPSQRT32 : SchedWrite; def WriteFPSQRT64 : SchedWrite; +// Vector load and stores +def WriteVLD1 : SchedWrite; +def WriteVLD2 : SchedWrite; +def WriteVLD3 : SchedWrite; +def WriteVLD4 : SchedWrite; +def WriteVST1 : SchedWrite; +def WriteVST2 : SchedWrite; +def WriteVST3 : SchedWrite; +def WriteVST4 : SchedWrite; + + // Define TII for use in SchedVariant Predicates. def : PredicateProlog<[{ const ARMBaseInstrInfo *TII = diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td index 8fb8a2a3b6d..4e72b13d94c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleA9.td @@ -1981,6 +1981,15 @@ def A9WriteV7 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 7; } def A9WriteV9 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } def A9WriteV10 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 10; } +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + // Reserve A9UnitFP for 2 consecutive cycles. def A9Write2V4 : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 4; diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td index 537e5da9669..782be9b60a7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td @@ -120,6 +120,12 @@ def : WriteRes { def : WriteRes { let Latency = 7; } def : WriteRes { let Latency = 17; } +// Overriden via InstRW for this processor. +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + def : ReadAdvance; // mul operand read in F1 def : ReadAdvance; // fp-mac operand read in F1 @@ -712,20 +718,20 @@ def R52WriteSTM : SchedWriteVariant<[ // Vector Load/Stores. Can issue only in slot-0. Can dual-issue with // another instruction in slot-1, but only in the last issue. -def R52WriteVLD1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5;} -def R52WriteVLD2Mem : SchedWriteRes<[R52UnitLd]> { +def : WriteRes { let Latency = 5;} +def : WriteRes { let Latency = 6; let NumMicroOps = 3; let ResourceCycles = [2]; let SingleIssue = 1; } -def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> { +def : WriteRes { let Latency = 7; let NumMicroOps = 5; let ResourceCycles = [3]; let SingleIssue = 1; } -def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> { +def : WriteRes { let Latency = 8; let NumMicroOps = 7; let ResourceCycles = [4]; @@ -828,95 +834,6 @@ def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>; def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>; -//--- -// VLDx. Vector Loads -//--- -// 1-element structure load -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>; - -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>; - -// 2-element structure load -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>; - -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>; -def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>; - -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>; -def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>; - -// 3-element structure load -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>; -def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; - -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>; - -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>; - -// 4-element structure load -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; - - -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>; -def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>; -def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>; - //--- // VSTx. Vector Stores //--- diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td index dc041c6c600..b838688c6f0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td +++ b/contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td @@ -1070,6 +1070,16 @@ let SchedModel = SwiftModel in { def : ReadAdvance; def : ReadAdvance; + // Overriden via InstRW for this processor. + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + def : WriteRes; + // Not specified. def : InstRW<[SwiftWriteP01OneCycle2x], (instregex "ABS")>; // Preload. diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp index 1979cbf5012..c4f23c66e4e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -85,9 +85,9 @@ namespace llvm { extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine X(getTheARMLETarget()); + RegisterTargetMachine A(getTheThumbLETarget()); RegisterTargetMachine Y(getTheARMBETarget()); - RegisterTargetMachine A(getTheThumbLETarget()); - RegisterTargetMachine B(getTheThumbBETarget()); + RegisterTargetMachine B(getTheThumbBETarget()); PassRegistry &Registry = *PassRegistry::getPassRegistry(); initializeGlobalISel(Registry); @@ -263,6 +263,11 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT, else this->Options.EABIVersion = EABI::EABI5; } + + initAsmInfo(); + if (!Subtarget.isThumb() && !Subtarget.hasARMOps()) + report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " + "support ARM mode execution!"); } ARMBaseTargetMachine::~ARMBaseTargetMachine() = default; @@ -355,22 +360,6 @@ TargetIRAnalysis ARMBaseTargetMachine::getTargetIRAnalysis() { }); } -void ARMTargetMachine::anchor() {} - -ARMTargetMachine::ARMTargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, CodeGenOpt::Level OL, - bool isLittle) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) { - initAsmInfo(); - if (!Subtarget.hasARMOps()) - report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " - "support ARM mode execution!"); -} - -void ARMLETargetMachine::anchor() {} ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -378,9 +367,7 @@ ARMLETargetMachine::ARMLETargetMachine(const Target &T, const Triple &TT, Optional RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} - -void ARMBETargetMachine::anchor() {} + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, @@ -388,39 +375,7 @@ ARMBETargetMachine::ARMBETargetMachine(const Target &T, const Triple &TT, Optional RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} - -void ThumbTargetMachine::anchor() {} - -ThumbTargetMachine::ThumbTargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, - CodeGenOpt::Level OL, bool isLittle) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) { - initAsmInfo(); -} - -void ThumbLETargetMachine::anchor() {} - -ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} - -void ThumbBETargetMachine::anchor() {} - -ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, const Triple &TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Optional RM, - CodeModel::Model CM, - CodeGenOpt::Level OL) - : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} namespace { diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h index f0ca9427d9f..e5eb27114c7 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h +++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.h @@ -62,23 +62,9 @@ public: } }; -/// ARM target machine. +/// ARM/Thumb little endian target machine. /// -class ARMTargetMachine : public ARMBaseTargetMachine { - virtual void anchor(); - -public: - ARMTargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool isLittle); -}; - -/// ARM little endian target machine. -/// -class ARMLETargetMachine : public ARMTargetMachine { - void anchor() override; - +class ARMLETargetMachine : public ARMBaseTargetMachine { public: ARMLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -86,11 +72,9 @@ public: CodeGenOpt::Level OL); }; -/// ARM big endian target machine. +/// ARM/Thumb big endian target machine. /// -class ARMBETargetMachine : public ARMTargetMachine { - void anchor() override; - +class ARMBETargetMachine : public ARMBaseTargetMachine { public: ARMBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -98,44 +82,6 @@ public: CodeGenOpt::Level OL); }; -/// Thumb target machine. -/// Due to the way architectures are handled, this represents both -/// Thumb-1 and Thumb-2. -/// -class ThumbTargetMachine : public ARMBaseTargetMachine { - virtual void anchor(); - -public: - ThumbTargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool isLittle); -}; - -/// Thumb little endian target machine. -/// -class ThumbLETargetMachine : public ThumbTargetMachine { - void anchor() override; - -public: - ThumbLETargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - -/// Thumb big endian target machine. -/// -class ThumbBETargetMachine : public ThumbTargetMachine { - void anchor() override; - -public: - ThumbBETargetMachine(const Target &T, const Triple &TT, StringRef CPU, - StringRef FS, const TargetOptions &Options, - Optional RM, CodeModel::Model CM, - CodeGenOpt::Level OL); -}; - } // end namespace llvm #endif // LLVM_LIB_TARGET_ARM_ARMTARGETMACHINE_H diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp index 94f9e8dfebb..edbf2b99126 100644 --- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -30,8 +30,8 @@ using namespace dwarf; void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) { - const ARMTargetMachine &ARM_TM = static_cast(TM); - bool isAAPCS_ABI = ARM_TM.TargetABI == ARMTargetMachine::ARMABI::ARM_ABI_AAPCS; + const ARMBaseTargetMachine &ARM_TM = static_cast(TM); + bool isAAPCS_ABI = ARM_TM.TargetABI == ARMBaseTargetMachine::ARMABI::ARM_ABI_AAPCS; genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly(); TargetLoweringObjectFileELF::Initialize(Ctx, TM); diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index 1a17d4e33e4..f917c35b9ce 100644 --- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -535,14 +535,14 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // Look for a temporary register to use. // First, compute the liveness information. - LivePhysRegs UsedRegs(STI.getRegisterInfo()); + const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); + LivePhysRegs UsedRegs(TRI); UsedRegs.addLiveOuts(MBB); // The semantic of pristines changed recently and now, // the callee-saved registers that are touched in the function // are not part of the pristines set anymore. // Add those callee-saved now. - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) UsedRegs.addReg(CSRegs[i]); @@ -561,12 +561,12 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, // And some temporary register, just in case. unsigned TemporaryReg = 0; BitVector PopFriendly = - TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::tGPRRegClassID)); + TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); // Rebuild the GPRs from the high registers because they are removed // form the GPR reg class for thumb1. BitVector GPRsNoLRSP = - TRI->getAllocatableSet(MF, TRI->getRegClass(ARM::hGPRRegClassID)); + TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID)); GPRsNoLRSP |= PopFriendly; GPRsNoLRSP.reset(ARM::LR); GPRsNoLRSP.reset(ARM::SP); diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td index 06ad2b3ffdf..f10ca394f36 100644 --- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -902,7 +902,6 @@ let Defs = [SREG] in // CPI Rd, K // Compares a register with an 8 bit immediate. - let Uses = [SREG] in def CPIRdK : FRdK<0b0011, (outs), (ins GPR8:$rd, imm_ldi8:$k), diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp index 6897161c903..cc7a7c3849b 100644 --- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -132,6 +132,10 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 128; } +bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + return false; +} + SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { case ISD::BR_CC: @@ -496,8 +500,11 @@ const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { + auto N = cast(Op); + assert(N->getOffset() == 0 && "Invalid offset for global address"); + SDLoc DL(Op); - const GlobalValue *GV = cast(Op)->getGlobal(); + const GlobalValue *GV = N->getGlobal(); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i64); return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA); diff --git a/contrib/llvm/lib/Target/BPF/BPFISelLowering.h b/contrib/llvm/lib/Target/BPF/BPFISelLowering.h index 3d1726be286..0b8a8ca20c3 100644 --- a/contrib/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/contrib/llvm/lib/Target/BPF/BPFISelLowering.h @@ -42,6 +42,10 @@ public: // This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; + // This method decides whether folding a constant offset + // with the given GlobalAddress is legal. + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; + MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp index a04aca4afa0..25018b9ed51 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -1657,7 +1657,7 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B, // defined. From the point of view of the liveness tracking, it is ok to // store it as a whole, but if we break it up we may end up storing a // register that is entirely undefined. - LivePhysRegs LPR(&HRI); + LivePhysRegs LPR(HRI); LPR.addLiveIns(B); SmallVector,2> Clobbers; for (auto R = B.begin(); R != It; ++R) { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp index 03794511414..66e07c67958 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1254,7 +1254,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { const MachineOperand &Op1 = MI.getOperand(1); const MachineOperand &Op2 = MI.getOperand(2); const MachineOperand &Op3 = MI.getOperand(3); - LivePhysRegs LiveAtMI(&HRI); + LivePhysRegs LiveAtMI(HRI); getLiveRegsAt(LiveAtMI, MI); bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); if (Op0.getReg() != Op2.getReg()) { @@ -1283,7 +1283,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MachineOperand &Op1 = MI.getOperand(1); MachineOperand &Op2 = MI.getOperand(2); MachineOperand &Op3 = MI.getOperand(3); - LivePhysRegs LiveAtMI(&HRI); + LivePhysRegs LiveAtMI(HRI); getLiveRegsAt(LiveAtMI, MI); bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg()); diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td b/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td index 0f99dfe342b..93fb688fc1c 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td +++ b/contrib/llvm/lib/Target/Hexagon/HexagonPseudo.td @@ -412,6 +412,15 @@ def PS_vstorerwu_ai: STrivv_template, def PS_vstorerwu_ai_128B: STrivv_template, Requires<[HasV60T,UseHVXDbl]>; +let isPseudo = 1, isCodeGenOnly = 1, mayStore = 1, hasSideEffects = 0 in { + def PS_vstorerq_ai: Pseudo<(outs), + (ins IntRegs:$Rs, s32_0Imm:$Off, VecPredRegs:$Qt), "", []>, + Requires<[HasV60T,UseHVXSgl]>; + def PS_vstorerq_ai_128B: Pseudo<(outs), + (ins IntRegs:$Rs, s32_0Imm:$Off, VecPredRegs128B:$Qt), "", []>, + Requires<[HasV60T,UseHVXDbl]>; +} + // Vector load pseudos let Predicates = [HasV60T, UseHVX], isPseudo = 1, isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in @@ -429,30 +438,16 @@ def PS_vloadrwu_ai: LDrivv_template, def PS_vloadrwu_ai_128B: LDrivv_template, Requires<[HasV60T,UseHVXDbl]>; -// Store vector predicate pseudo. -let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13, - isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in { - def PS_vstorerq_ai : STInst<(outs), - (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs:$src1), - ".error \"should not emit\" ", []>, - Requires<[HasV60T,UseHVXSgl]>; - - def PS_vstorerq_ai_128B : STInst<(outs), - (ins IntRegs:$base, s32_0Imm:$offset, VectorRegs:$src1), - ".error \"should not emit\" ", []>, - Requires<[HasV60T,UseHVXSgl]>; - - def PS_vloadrq_ai : STInst<(outs), - (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1), - ".error \"should not emit\" ", []>, - Requires<[HasV60T,UseHVXDbl]>; - - def PS_vloadrq_ai_128B : STInst<(outs), - (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1), - ".error \"should not emit\" ", []>, - Requires<[HasV60T,UseHVXDbl]>; +let isPseudo = 1, isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in { + def PS_vloadrq_ai: Pseudo<(outs VecPredRegs:$Qd), + (ins IntRegs:$Rs, s32_0Imm:$Off), "", []>, + Requires<[HasV60T,UseHVXSgl]>; + def PS_vloadrq_ai_128B: Pseudo<(outs VecPredRegs128B:$Qd), + (ins IntRegs:$Rs, s32_0Imm:$Off), "", []>, + Requires<[HasV60T,UseHVXDbl]>; } + let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in class VSELInst : InstHexagon; diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp index 2a1bb63af78..1fc157900ed 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp @@ -50,11 +50,6 @@ bool HexagonRegisterInfo::isEHReturnCalleeSaveReg(unsigned R) const { R == Hexagon::R3 || R == Hexagon::D0 || R == Hexagon::D1; } -bool HexagonRegisterInfo::isCalleeSaveReg(unsigned Reg) const { - return Hexagon::R16 <= Reg && Reg <= Hexagon::R27; -} - - const MCPhysReg * HexagonRegisterInfo::getCallerSavedRegs(const MachineFunction *MF, const TargetRegisterClass *RC) const { diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h index 8a3f175b848..5f65fad2cc0 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h +++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h @@ -77,7 +77,6 @@ public: unsigned getFirstCallerSavedNonParamReg() const; bool isEHReturnCalleeSaveReg(unsigned Reg) const; - bool isCalleeSaveReg(unsigned Reg) const; }; } // end namespace llvm diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp index c21b6e2515d..cd474921d4b 100644 --- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp +++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp @@ -214,12 +214,12 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { for (auto &MB : MF) { auto Begin = MB.begin(), End = MB.end(); while (Begin != End) { - // First the first non-boundary starting from the end of the last + // Find the first non-boundary starting from the end of the last // scheduling region. MachineBasicBlock::iterator RB = Begin; while (RB != End && HII->isSchedulingBoundary(*RB, &MB, MF)) ++RB; - // First the first boundary starting from the beginning of the new + // Find the first boundary starting from the beginning of the new // region. MachineBasicBlock::iterator RE = RB; while (RE != End && !HII->isSchedulingBoundary(*RE, &MB, MF)) diff --git a/contrib/llvm/lib/Target/MSP430/MSP430.td b/contrib/llvm/lib/Target/MSP430/MSP430.td index dfea669f3ba..203864dd406 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430.td +++ b/contrib/llvm/lib/Target/MSP430/MSP430.td @@ -22,6 +22,18 @@ def FeatureX : SubtargetFeature<"ext", "ExtendedInsts", "true", "Enable MSP430-X extensions">; +def FeatureHWMult16 + : SubtargetFeature<"hwmult16", "HWMultMode", "HWMult16", + "Enable 16-bit hardware multiplier">; + +def FeatureHWMult32 + : SubtargetFeature<"hwmult32", "HWMultMode", "HWMult32", + "Enable 32-bit hardware multiplier">; + +def FeatureHWMultF5 + : SubtargetFeature<"hwmultf5", "HWMultMode", "HWMultF5", + "Enable F5 series hardware multiplier">; + //===----------------------------------------------------------------------===// // MSP430 supported processors. //===----------------------------------------------------------------------===// @@ -29,6 +41,8 @@ class Proc Features> : Processor; def : Proc<"generic", []>; +def : Proc<"msp430", []>; +def : Proc<"msp430x", [FeatureX]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index cd58eda5d92..0b02f79f472 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -403,12 +403,12 @@ void MSP430DAGToDAGISel::Select(SDNode *Node) { int FI = cast(Node)->getIndex(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i16); if (Node->hasOneUse()) { - CurDAG->SelectNodeTo(Node, MSP430::ADD16ri, MVT::i16, TFI, + CurDAG->SelectNodeTo(Node, MSP430::ADDframe, MVT::i16, TFI, CurDAG->getTargetConstant(0, dl, MVT::i16)); return; } ReplaceNode(Node, CurDAG->getMachineNode( - MSP430::ADD16ri, dl, MVT::i16, TFI, + MSP430::ADDframe, dl, MVT::i16, TFI, CurDAG->getTargetConstant(0, dl, MVT::i16))); return; } diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index cc6e64043f5..dae14fd301e 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -38,27 +38,6 @@ using namespace llvm; #define DEBUG_TYPE "msp430-lower" -typedef enum { - NoHWMult, - HWMult16, - HWMult32, - HWMultF5 -} HWMultUseMode; - -static cl::opt -HWMultMode("mhwmult", cl::Hidden, - cl::desc("Hardware multiplier use mode"), - cl::init(NoHWMult), - cl::values( - clEnumValN(NoHWMult, "none", - "Do not use hardware multiplier"), - clEnumValN(HWMult16, "16bit", - "Use 16-bit hardware multiplier"), - clEnumValN(HWMult32, "32bit", - "Use 32-bit hardware multiplier"), - clEnumValN(HWMultF5, "f5series", - "Use F5 series hardware multiplier"))); - MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, const MSP430Subtarget &STI) : TargetLowering(TM) { @@ -262,7 +241,7 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, setCmpLibcallCC(LC.Op, LC.Cond); } - if (HWMultMode == HWMult16) { + if (STI.hasHWMult16()) { const struct { const RTLIB::Libcall Op; const char * const Name; @@ -277,7 +256,7 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); } - } else if (HWMultMode == HWMult32) { + } else if (STI.hasHWMult32()) { const struct { const RTLIB::Libcall Op; const char * const Name; @@ -292,7 +271,7 @@ MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM, for (const auto &LC : LibraryCalls) { setLibcallName(LC.Op, LC.Name); } - } else if (HWMultMode == HWMultF5) { + } else if (STI.hasHWMultF5()) { const struct { const RTLIB::Libcall Op; const char * const Name; diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td index 1cd18611e52..cec43040f60 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td +++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td @@ -122,6 +122,11 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2), [(MSP430callseq_end timm:$amt1, timm:$amt2)]>; } +let Defs = [SR], Uses = [SP] in { +def ADDframe : Pseudo<(outs GR16:$dst), (ins i16imm:$base, i16imm:$offset), + "# ADDframe PSEUDO", []>; +} + let usesCustomInserter = 1 in { let Uses = [SR] in { def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc), diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp index 9600bc28f10..7a3b7a8bd5f 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -127,7 +127,7 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Fold imm into offset Offset += MI.getOperand(FIOperandNum + 1).getImm(); - if (MI.getOpcode() == MSP430::ADD16ri) { + if (MI.getOpcode() == MSP430::ADDframe) { // This is actually "load effective address" of the stack slot // instruction. We have only two-address instructions, thus we need to // expand it into mov + add diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp index 6216348e4d7..776a9dcb11d 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.cpp @@ -19,6 +19,20 @@ using namespace llvm; #define DEBUG_TYPE "msp430-subtarget" +static cl::opt +HWMultModeOption("mhwmult", cl::Hidden, + cl::desc("Hardware multiplier use mode for MSP430"), + cl::init(MSP430Subtarget::NoHWMult), + cl::values( + clEnumValN(MSP430Subtarget::NoHWMult, "none", + "Do not use hardware multiplier"), + clEnumValN(MSP430Subtarget::HWMult16, "16bit", + "Use 16-bit hardware multiplier"), + clEnumValN(MSP430Subtarget::HWMult32, "32bit", + "Use 32-bit hardware multiplier"), + clEnumValN(MSP430Subtarget::HWMultF5, "f5series", + "Use F5 series hardware multiplier"))); + #define GET_SUBTARGETINFO_TARGET_DESC #define GET_SUBTARGETINFO_CTOR #include "MSP430GenSubtargetInfo.inc" @@ -27,7 +41,18 @@ void MSP430Subtarget::anchor() { } MSP430Subtarget & MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - ParseSubtargetFeatures("generic", FS); + ExtendedInsts = false; + HWMultMode = NoHWMult; + + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "msp430"; + + ParseSubtargetFeatures(CPUName, FS); + + if (HWMultModeOption != NoHWMult) + HWMultMode = HWMultModeOption; + return *this; } diff --git a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h index 1a00d85e01c..8828dfd6587 100644 --- a/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h +++ b/contrib/llvm/lib/Target/MSP430/MSP430Subtarget.h @@ -30,8 +30,15 @@ namespace llvm { class StringRef; class MSP430Subtarget : public MSP430GenSubtargetInfo { +public: + enum HWMultEnum { + NoHWMult, HWMult16, HWMult32, HWMultF5 + }; + +private: virtual void anchor(); bool ExtendedInsts; + HWMultEnum HWMultMode; MSP430FrameLowering FrameLowering; MSP430InstrInfo InstrInfo; MSP430TargetLowering TLInfo; @@ -50,6 +57,10 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + bool hasHWMult16() const { return HWMultMode == HWMult16; } + bool hasHWMult32() const { return HWMultMode == HWMult32; } + bool hasHWMultF5() const { return HWMultMode == HWMultF5; } + const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; } diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp index 3641a70d61b..8fe4e75f3e1 100644 --- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -813,28 +813,28 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, !isShiftedMask(CN->getZExtValue(), SMPos1, SMSize1)) return SDValue(); - // The shift masks must have the same position and size. - if (SMPos0 != SMPos1 || SMSize0 != SMSize1) - return SDValue(); + // The shift masks must have the same position and size. + if (SMPos0 != SMPos1 || SMSize0 != SMSize1) + return SDValue(); - SDValue Shl = And1.getOperand(0); + SDValue Shl = And1.getOperand(0); - if (!(CN = dyn_cast(Shl.getOperand(1)))) - return SDValue(); + if (!(CN = dyn_cast(Shl.getOperand(1)))) + return SDValue(); - unsigned Shamt = CN->getZExtValue(); + unsigned Shamt = CN->getZExtValue(); - // Return if the shift amount and the first bit position of mask are not the - // same. - EVT ValTy = N->getValueType(0); - if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits())) - return SDValue(); + // Return if the shift amount and the first bit position of mask are not the + // same. + EVT ValTy = N->getValueType(0); + if ((Shamt != SMPos0) || (SMPos0 + SMSize0 > ValTy.getSizeInBits())) + return SDValue(); - SDLoc DL(N); - return DAG.getNode(MipsISD::Ins, DL, ValTy, Shl.getOperand(0), - DAG.getConstant(SMPos0, DL, MVT::i32), - DAG.getConstant(SMSize0, DL, MVT::i32), - And0.getOperand(0)); + SDLoc DL(N); + return DAG.getNode(MipsISD::Ins, DL, ValTy, Shl.getOperand(0), + DAG.getConstant(SMPos0, DL, MVT::i32), + DAG.getConstant(SMSize0, DL, MVT::i32), + And0.getOperand(0)); } else { // Pattern match DINS. // $dst = or (and $src, mask0), mask1 diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp index 8f5ecadecde..1f4e933db2a 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp @@ -59,9 +59,8 @@ static cl::opt void MipsSubtarget::anchor() { } -MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU, - const std::string &FS, bool little, - const MipsTargetMachine &TM) +MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, + bool little, const MipsTargetMachine &TM) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(MipsDefault), IsLittle(little), IsSoftFloat(false), IsSingleFloat(false), IsFPXX(false), NoABICalls(false), IsFP64bit(false), UseOddSPReg(true), @@ -77,8 +76,6 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, const std::string &CPU, FrameLowering(MipsFrameLowering::create(*this)), TLInfo(MipsTargetLowering::create(TM, *this)) { - PreviousInMips16Mode = InMips16Mode; - if (MipsArchVersion == MipsDefault) MipsArchVersion = Mips32; diff --git a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h index cca2cb8a466..b4d15ee361f 100644 --- a/contrib/llvm/lib/Target/Mips/MipsSubtarget.h +++ b/contrib/llvm/lib/Target/Mips/MipsSubtarget.h @@ -119,9 +119,6 @@ class MipsSubtarget : public MipsGenSubtargetInfo { // Mips16 hard float bool InMips16HardFloat; - // PreviousInMips16 -- the function we just processed was in Mips 16 Mode - bool PreviousInMips16Mode; - // InMicroMips -- can process MicroMips instructions bool InMicroMipsMode; @@ -178,8 +175,8 @@ public: /// This constructor initializes the data members to match that /// of the specified triple. - MipsSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, - bool little, const MipsTargetMachine &TM); + MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS, bool little, + const MipsTargetMachine &TM); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. diff --git a/contrib/llvm/lib/Target/Nios2/CMakeLists.txt b/contrib/llvm/lib/Target/Nios2/CMakeLists.txt new file mode 100644 index 00000000000..78db452094b --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/CMakeLists.txt @@ -0,0 +1,18 @@ +set(LLVM_TARGET_DEFINITIONS Nios2.td) + +#Generate Nios2GenRegisterInfo.inc and Nios2GenInstrInfo.inc which included by +#your hand code C++ files. +#Nios2GenRegisterInfo.inc came from Nios2RegisterInfo.td, Nios2GenInstrInfo.inc +#came from Nios2InstrInfo.td. +tablegen(LLVM Nios2GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM Nios2GenInstrInfo.inc -gen-instr-info) + +#Nios2CommonTableGen must be defined +add_public_tablegen_target(Nios2CommonTableGen) + +#Nios2CodeGen should match with LLVMBuild.txt Nios2CodeGen +add_llvm_target(Nios2CodeGen Nios2TargetMachine.cpp) + +#Should match with "subdirectories = MCTargetDesc TargetInfo" in LLVMBuild.txt +add_subdirectory(TargetInfo) +add_subdirectory(MCTargetDesc) diff --git a/contrib/llvm/lib/Target/Nios2/LLVMBuild.txt b/contrib/llvm/lib/Target/Nios2/LLVMBuild.txt new file mode 100644 index 00000000000..b40a7637970 --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/LLVMBuild.txt @@ -0,0 +1,61 @@ +;===- ./lib/Target/Nios2/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +#Following comments extracted from http: // llvm.org/docs/LLVMBuild.html + +[common] +subdirectories = + MCTargetDesc + TargetInfo + +[component_0] +#TargetGroup components are an extension of LibraryGroups, specifically for +#defining LLVM targets(which are handled specially in a few places). +type = TargetGroup +#The name of the component should always be the name of the target.(should +#match "def Nios2 : Target" in Nios2.td) +name = Nios2 +#Nios2 component is located in directory Target / +parent = Target +#Whether this target defines an assembly parser, assembly printer, disassembler +#, and supports JIT compilation.They are optional. + +[component_1] +#component_1 is a Library type and name is Nios2CodeGen.After build it will +#in lib / libLLVMNios2CodeGen.a of your build command directory. +type = Library +name = Nios2CodeGen +#Nios2CodeGen component(Library) is located in directory Nios2 / +parent = Nios2 +#If given, a list of the names of Library or LibraryGroup components which +#must also be linked in whenever this library is used.That is, the link time +#dependencies for this component.When tools are built, the build system will +#include the transitive closure of all required_libraries for the components +#the tool needs. +required_libraries = CodeGen + Core + GlobalISel + MC + Nios2Desc + Nios2Info + Support + Target +#end of required_libraries + +#All LLVMBuild.txt in Target / Nios2 and subdirectory use 'add_to_library_groups +#= Nios2' +add_to_library_groups = Nios2 diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt new file mode 100644 index 00000000000..21def509a23 --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/CMakeLists.txt @@ -0,0 +1,2 @@ +#MCTargetDesc / CMakeLists.txt +add_llvm_library(LLVMNios2Desc Nios2MCTargetDesc.cpp) diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 00000000000..4dc6995e7f5 --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,25 @@ +;===- ./lib/Target/Nios2/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Nios2Desc +parent = Nios2 +required_libraries = MC + Nios2Info + Support +add_to_library_groups = Nios2 diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp new file mode 100644 index 00000000000..d913166399c --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.cpp @@ -0,0 +1,25 @@ +//===-- Nios2MCTargetDesc.cpp - Nios2 Target Descriptions -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Nios2 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#include "Nios2MCTargetDesc.h" +#include "llvm/MC/MCInstrInfo.h" + +using namespace llvm; + +#define GET_INSTRINFO_MC_DESC +#include "Nios2GenInstrInfo.inc" + +#define GET_REGINFO_MC_DESC +#include "Nios2GenRegisterInfo.inc" + +extern "C" void LLVMInitializeNios2TargetMC() {} diff --git a/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h new file mode 100644 index 00000000000..d426062db16 --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/MCTargetDesc/Nios2MCTargetDesc.h @@ -0,0 +1,34 @@ +//===-- Nios2MCTargetDesc.h - Nios2 Target Descriptions ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides Nios2 specific target descriptions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H +#define LLVM_LIB_TARGET_NIOS2_MCTARGETDESC_NIOS2MCTARGETDESC_H + +namespace llvm { +class Target; +class Triple; + +Target &getTheNios2Target(); + +} // namespace llvm + +// Defines symbolic names for Nios2 registers. This defines a mapping from +// register name to register number. +#define GET_REGINFO_ENUM +#include "Nios2GenRegisterInfo.inc" + +// Defines symbolic names for the Nios2 instructions. +#define GET_INSTRINFO_ENUM +#include "Nios2GenInstrInfo.inc" + +#endif diff --git a/contrib/llvm/lib/Target/Nios2/Nios2.h b/contrib/llvm/lib/Target/Nios2/Nios2.h new file mode 100644 index 00000000000..87202f48cfb --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/Nios2.h @@ -0,0 +1,25 @@ +//===-- Nios2.h - Top-level interface for Nios2 representation --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the entry points for global functions defined in +// the LLVM Nios2 back-end. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2_H +#define LLVM_LIB_TARGET_NIOS2_NIOS2_H + +#include "MCTargetDesc/Nios2MCTargetDesc.h" +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class Nios2TargetMachine; +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Nios2/Nios2.td b/contrib/llvm/lib/Target/Nios2/Nios2.td new file mode 100644 index 00000000000..e8abba86337 --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/Nios2.td @@ -0,0 +1,29 @@ +//===-- Nios2.td - Describe the Nios2 Target Machine -------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Target-independent interfaces +//===----------------------------------------------------------------------===// + +include "llvm/Target/Target.td" + +//===----------------------------------------------------------------------===// +// Target-dependent interfaces +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Calling Conv, Instruction Descriptions +//===----------------------------------------------------------------------===// + +include "Nios2RegisterInfo.td" +include "Nios2InstrInfo.td" + +def Nios2InstrInfo : InstrInfo; + +def Nios2 : Target { let InstructionSet = Nios2InstrInfo; } diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td b/contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td new file mode 100644 index 00000000000..79868be48a4 --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/Nios2InstrFormats.td @@ -0,0 +1,117 @@ +//===-- Nios2InstrFormats.td - Nios2 Instruction Formats ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Describe NIOS2 instructions format +// +// +//===----------------------------------------------------------------------===// + +// Format specifies the encoding used by the instruction. This is part of the +// ad-hoc solution used to emit machine instruction encodings by our machine +// code emitter. +class Format val> { + bits<3> Value = val; +} + +def Pseudo : Format<0>; +def FrmI : Format<1>; +def FrmR : Format<2>; +def FrmJ : Format<3>; +def FrmOther : Format<4>; // Instruction w/ a custom format + +// Generic Nios2 Format +class Nios2Inst pattern, Format f> + : Instruction { + field bits<32> Inst; + Format Form = f; + + let Namespace = "Nios2"; + + let Size = 4; + + bits<6> Opcode = 0; + + // Bottom 6 bits are the 'opcode' field + let Inst{5 - 0} = Opcode; + + let OutOperandList = outs; + let InOperandList = ins; + + let AsmString = asmstr; + let Pattern = pattern; + + // + // Attributes specific to Nios2 instructions: + // + bits<3> FormBits = Form.Value; + + // TSFlags layout should be kept in sync with Nios2InstrInfo.h. + let TSFlags{2 - 0} = FormBits; + + let DecoderNamespace = "Nios2"; +} + +// Nios2 Instruction Format +class InstSE pattern, Format f> + : Nios2Inst { +} + +//===----------------------------------------------------------------------===// +// Format I instruction class in Nios2 : <|A|B|immediate|opcode|> +//===----------------------------------------------------------------------===// + +class FI op, dag outs, dag ins, string asmstr, list pattern> + : InstSE { + bits<5> rA; + bits<5> rB; + bits<16> imm; + + let Opcode = op; + + let Inst{31 - 27} = rA; + let Inst{26 - 22} = rB; + let Inst{21 - 6} = imm; +} + +//===----------------------------------------------------------------------===// +// Format R instruction : <|A|B|C|opx|imm|opcode|> +//===----------------------------------------------------------------------===// + +class FR opx, dag outs, dag ins, string asmstr, list pattern> + : InstSE { + bits<5> rA; + bits<5> rB; + bits<5> rC; + bits<5> imm = 0; + + // opcode is always 0x3a for R instr. + let Opcode = 0x3a; + + let Inst{31 - 27} = rA; + let Inst{26 - 22} = rB; + let Inst{21 - 17} = rC; + // opx stands for opcode extension + let Inst{16 - 11} = opx; + // optional 5-bit immediate value + let Inst{10 - 6} = imm; +} + +//===----------------------------------------------------------------------===// +// Format J instruction class in Nios2 : <|address|opcode|> +//===----------------------------------------------------------------------===// + +class FJ op, dag outs, dag ins, string asmstr, list pattern> + : InstSE { + bits<26> addr; + + let Opcode = op; + + let Inst{31 - 6} = addr; +} diff --git a/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td b/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td new file mode 100644 index 00000000000..5e4815ab3e1 --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/Nios2InstrInfo.td @@ -0,0 +1,50 @@ +//===- Nios2InstrInfo.td - Target Description for Nios2 ------*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the Nios2 implementation of the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Instruction format superclass +//===----------------------------------------------------------------------===// + +include "Nios2InstrFormats.td" + +//===----------------------------------------------------------------------===// +// Nios2 Operand, Complex Patterns and Transformations Definitions. +//===----------------------------------------------------------------------===// + +def simm16 : Operand { + let DecoderMethod= "DecodeSimm16"; +} + +// Node immediate fits as 16-bit sign extended on target immediate. +// e.g. addi, andi +def immSExt16 : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>; + +//===----------------------------------------------------------------------===// +// Instructions specific format +//===----------------------------------------------------------------------===// + +// Arithmetic and logical instructions with 2 register operands. +class ArithLogicI op, string instr_asm, SDNode OpNode, + Operand Od, PatLeaf imm_type, RegisterClass RC> : + FI { + let isReMaterializable = 1; +} + +//===----------------------------------------------------------------------===// +// Nios2 R1 Instructions +//===----------------------------------------------------------------------===// + +/// Arithmetic Instructions (ALU Immediate) +def ADDi : ArithLogicI<0x04, "addi", add, simm16, immSExt16, CPURegs>; diff --git a/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td b/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td new file mode 100644 index 00000000000..1808815816f --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/Nios2RegisterInfo.td @@ -0,0 +1,60 @@ +//===-- Nios2RegisterInfo.td - Nios2 Register defs ---------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +// We have bank of 32 registers. +class Nios2Reg : Register { + field bits<5> Num; + let Namespace = "Nios2"; +} + +// Nios2 CPU Registers +class Nios2GPRReg num, string n> : Nios2Reg { + let Num = num; +} + +//===----------------------------------------------------------------------===// +// Registers +//===----------------------------------------------------------------------===// + +let Namespace = "Nios2" in { + // General Purpose Registers + def ZERO : Nios2GPRReg<0, "zero">, DwarfRegNum<[ 0 ]>; + def AT : Nios2GPRReg<1, "at">, DwarfRegNum<[ 1 ]>; + foreach RegNum = 2 - 23 in { + def R #RegNum : Nios2GPRReg, DwarfRegNum<[ RegNum ]>; + } + def ET : Nios2GPRReg<24, "et">, DwarfRegNum<[ 24 ]>; + def BT : Nios2GPRReg<25, "bt">, DwarfRegNum<[ 25 ]>; + def GP : Nios2GPRReg<26, "gp">, DwarfRegNum<[ 26 ]>; + def SP : Nios2GPRReg<27, "sp">, DwarfRegNum<[ 27 ]>; + def FP : Nios2GPRReg<28, "fp">, DwarfRegNum<[ 28 ]>; + def EA : Nios2GPRReg<29, "ea">, DwarfRegNum<[ 29 ]>; + def BA : Nios2GPRReg<30, "ba">, DwarfRegNum<[ 30 ]>; + def RA : Nios2GPRReg<31, "ra">, DwarfRegNum<[ 31 ]>; + def PC : Nios2Reg<"pc">, DwarfRegNum<[ 32 ]>; +} + +//===----------------------------------------------------------------------===// +// Register Classes +//===----------------------------------------------------------------------===// + +def CPURegs : RegisterClass<"Nios2", [ i32 ], 32, + (add + // Reserved + ZERO, + AT, + // Return Values and Arguments + (sequence "R%u", 2, 7), + // Not preserved across procedure calls + // Caller saved + (sequence "R%u", 8, 15), + // Callee saved + (sequence "R%u", 16, 23), + // Reserved + ET, BT, GP, SP, FP, EA, BA, RA, PC)>; diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp b/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp new file mode 100644 index 00000000000..16d4eabcfaf --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.cpp @@ -0,0 +1,46 @@ +//===-- Nios2TargetMachine.cpp - Define TargetMachine for Nios2 -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the info about Nios2 target spec. +// +//===----------------------------------------------------------------------===// + +#include "Nios2TargetMachine.h" +#include "Nios2.h" + +using namespace llvm; + +#define DEBUG_TYPE "nios2" + +extern "C" void LLVMInitializeNios2Target() { + // Register the target. +} + +static std::string computeDataLayout(const Triple &TT, StringRef CPU, + const TargetOptions &Options) { + return "e-p:32:32:32-i8:8:32-i16:16:32-n32"; +} + +static Reloc::Model getEffectiveRelocModel(CodeModel::Model CM, + Optional RM) { + if (!RM.hasValue() || CM == CodeModel::JITDefault) + return Reloc::Static; + return *RM; +} + +Nios2TargetMachine::Nios2TargetMachine(const Target &T, const Triple &TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Optional RM, + CodeModel::Model CM, + CodeGenOpt::Level OL) + : LLVMTargetMachine(T, computeDataLayout(TT, CPU, Options), TT, CPU, FS, + Options, getEffectiveRelocModel(CM, RM), CM, OL) {} + +Nios2TargetMachine::~Nios2TargetMachine() {} diff --git a/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h b/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h new file mode 100644 index 00000000000..7f145c82f32 --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/Nios2TargetMachine.h @@ -0,0 +1,30 @@ +//===-- Nios2TargetMachine.h - Define TargetMachine for Nios2 ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the Nios2 specific subclass of TargetMachine. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H +#define LLVM_LIB_TARGET_NIOS2_NIOS2TARGETMACHINE_H + +#include "llvm/Target/TargetMachine.h" + +namespace llvm { +class Nios2TargetMachine : public LLVMTargetMachine { +public: + Nios2TargetMachine(const Target &T, const Triple &TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Optional RM, CodeModel::Model CM, + CodeGenOpt::Level OL); + ~Nios2TargetMachine() override; +}; +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/Target/Nios2/TargetInfo/CMakeLists.txt b/contrib/llvm/lib/Target/Nios2/TargetInfo/CMakeLists.txt new file mode 100644 index 00000000000..394d2c2680b --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/TargetInfo/CMakeLists.txt @@ -0,0 +1 @@ +add_llvm_library(LLVMNios2Info Nios2TargetInfo.cpp) diff --git a/contrib/llvm/lib/Target/Nios2/TargetInfo/LLVMBuild.txt b/contrib/llvm/lib/Target/Nios2/TargetInfo/LLVMBuild.txt new file mode 100644 index 00000000000..558f7501ea6 --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/TargetInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Target/Nios2/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Nios2Info +parent = Nios2 +required_libraries = Support +add_to_library_groups = Nios2 diff --git a/contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp b/contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp new file mode 100644 index 00000000000..e317686140f --- /dev/null +++ b/contrib/llvm/lib/Target/Nios2/TargetInfo/Nios2TargetInfo.cpp @@ -0,0 +1,24 @@ +//===-- Nios2TargetInfo.cpp - Nios2 Target Implementation -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "Nios2.h" +#include "llvm/Support/TargetRegistry.h" + +using namespace llvm; + +Target &llvm::getTheNios2Target() { + static Target TheNios2Target; + return TheNios2Target; +} + +extern "C" void LLVMInitializeNios2TargetInfo() { + RegisterTarget + X(getTheNios2Target(), "nios2", "Nios2"); +} diff --git a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp index ebd414baf1d..41e3190c3ee 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCExpandISEL.cpp @@ -339,7 +339,7 @@ void PPCExpandISEL::reorganizeBlockLayout(BlockISELList &BIL, // Note: Cannot use stepBackward instead since we are using the Reg // liveness state at the end of MBB (liveOut of MBB) as the liveIn for // NewSuccessor. Otherwise, will cause cyclic dependence. - LivePhysRegs LPR(MF->getSubtarget().getRegisterInfo()); + LivePhysRegs LPR(*MF->getSubtarget().getRegisterInfo()); SmallVector, 2> Clobbers; for (MachineInstr &MI : *MBB) LPR.stepForward(MI, Clobbers); diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index e65b1f1aa0a..b90a5ee2834 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1596,9 +1596,8 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { return true; } -bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, - unsigned &InsertAtByte, bool &Swap, bool IsLE) { // Check that the mask is shuffling words +static bool isWordShuffleMask(ShuffleVectorSDNode *N) { for (unsigned i = 0; i < 4; ++i) { unsigned B0 = N->getMaskElt(i*4); unsigned B1 = N->getMaskElt(i*4+1); @@ -1610,6 +1609,14 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, return false; } + return true; +} + +bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + unsigned &InsertAtByte, bool &Swap, bool IsLE) { + if (!isWordShuffleMask(N)) + return false; + // Now we look at mask elements 0,4,8,12 unsigned M0 = N->getMaskElt(0) / 4; unsigned M1 = N->getMaskElt(4) / 4; @@ -1680,6 +1687,69 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, return false; } +bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE) { + assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8"); + // Ensure each byte index of the word is consecutive. + if (!isWordShuffleMask(N)) + return false; + + // Now we look at mask elements 0,4,8,12, which are the beginning of words. + unsigned M0 = N->getMaskElt(0) / 4; + unsigned M1 = N->getMaskElt(4) / 4; + unsigned M2 = N->getMaskElt(8) / 4; + unsigned M3 = N->getMaskElt(12) / 4; + + // If both vector operands for the shuffle are the same vector, the mask will + // contain only elements from the first one and the second one will be undef. + if (N->getOperand(1).isUndef()) { + assert(M0 < 4 && "Indexing into an undef vector?"); + if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4) + return false; + + ShiftElts = IsLE ? (4 - M0) % 4 : M0; + Swap = false; + return true; + } + + // Ensure each word index of the ShuffleVector Mask is consecutive. + if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8) + return false; + + if (IsLE) { + if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) { + // Input vectors don't need to be swapped if the leading element + // of the result is one of the 3 left elements of the second vector + // (or if there is no shift to be done at all). + Swap = false; + ShiftElts = (8 - M0) % 8; + } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) { + // Input vectors need to be swapped if the leading element + // of the result is one of the 3 left elements of the first vector + // (or if we're shifting by 4 - thereby simply swapping the vectors). + Swap = true; + ShiftElts = (4 - M0) % 4; + } + + return true; + } else { // BE + if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) { + // Input vectors don't need to be swapped if the leading element + // of the result is one of the 4 elements of the first vector. + Swap = false; + ShiftElts = M0; + } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) { + // Input vectors need to be swapped if the leading element + // of the result is one of the 4 elements of the right vector. + Swap = true; + ShiftElts = M0 - 4; + } + + return true; + } +} + + /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, @@ -7679,6 +7749,20 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins); } + + if (Subtarget.hasVSX() && + PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) { + if (Swap) + std::swap(V1, V2); + SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1); + SDValue Conv2 = + DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2); + + SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2, + DAG.getConstant(ShiftElts, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl); + } + if (Subtarget.hasVSX()) { if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) { int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG); @@ -8212,10 +8296,12 @@ SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDLoc DL(Op); switch (cast(Op.getOperand(ArgStart))->getZExtValue()) { case Intrinsic::ppc_cfence: { + assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument."); assert(Subtarget.isPPC64() && "Only 64-bit is supported for now."); return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, - Op.getOperand(ArgStart + 1))), + Op.getOperand(ArgStart + 1)), + Op.getOperand(0)), 0); } default: diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h index acb77943b11..2f9eb95f6de 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -450,7 +450,11 @@ namespace llvm { /// a VMRGEW or VMRGOW instruction bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG); - + /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable + /// for a XXSLDWI instruction. + bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, + bool &Swap, bool IsLE); + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the /// shift amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index a3f894c81a0..165970f9678 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1001,7 +1001,9 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp), isPPC64; // LR8 is a true define, while the rest of the Defs are clobbers. X3 is // explicitly defined when this op is created, so not mentioned here. -let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, +// This is lowered to BL8_NOP_TLS by the assembly printer, so the size must be +// correct because the branch select pass is relying on it. +let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Size = 8, Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym), "#GETtlsADDR", diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index 46f103141bc..fd6785e963a 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1931,6 +1931,8 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case PPC::DFSTOREf64: { assert(Subtarget.hasP9Vector() && "Invalid D-Form Pseudo-ops on non-P9 target."); + assert(MI.getOperand(2).isReg() && MI.getOperand(1).isImm() && + "D-form op must have register and immediate operands"); unsigned UpperOpcode, LowerOpcode; switch (MI.getOpcode()) { case PPC::DFLOADf32: diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 0766cfe4a98..26b99eced23 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -46,7 +46,7 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>, ]>; def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>, - SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3> + SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3> ]>; def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>, diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td index b98140fedfc..1589ab03e50 100644 --- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -1066,6 +1066,10 @@ def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>; def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>; +// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and +// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable. +def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>; + // Selects. def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)), (SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>; @@ -2379,8 +2383,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Load Vector Indexed def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc, - [(set v2f64:$XT, (load xoaddr:$src))]>; - + [(set v2f64:$XT, (load xaddr:$src))]>; // Load Vector (Left-justified) with Length def LXVL : XX1Form<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB), "lxvl $XT, $src, $rB", IIC_LdStLoad, @@ -2430,7 +2433,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { // Store Vector Indexed def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc, - [(store v2f64:$XT, xoaddr:$dst)]>; + [(store v2f64:$XT, xaddr:$dst)]>; // Store Vector (Left-justified) with Length def STXVL : XX1Form<31, 397, (outs), (ins vsrc:$XT, memr:$dst, g8rc:$rB), @@ -2498,21 +2501,38 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>; } // IsLittleEndian, HasP9Vector - def : Pat<(v2f64 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v2i64 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4f32 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4i32 (load xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>; - def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(store v4f32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; - def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst), - (STXVX $rS, xoaddr:$dst)>; + // D-Form Load/Store + def : Pat<(v4i32 (load iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4f32 (load iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2i64 (load iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (load iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x iaddr:$src)), (LXV memrix16:$src)>; + def : Pat<(store v4f32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(store v4i32:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(store v2f64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(store v2i64:$rS, iaddr:$dst), (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, iaddr:$dst), + (STXV $rS, memrix16:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, iaddr:$dst), + (STXV $rS, memrix16:$dst)>; + + + def : Pat<(v2f64 (load xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v2i64 (load xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v4f32 (load xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v4i32 (load xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xaddr:$src)), (LXVX xaddr:$src)>; + def : Pat<(store v2f64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; + def : Pat<(store v2i64:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; + def : Pat<(store v4f32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; + def : Pat<(store v4i32:$rS, xaddr:$dst), (STXVX $rS, xaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xaddr:$dst), + (STXVX $rS, xaddr:$dst)>; + def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xaddr:$dst), + (STXVX $rS, xaddr:$dst)>; def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))), (v4i32 (LXVWSX xoaddr:$src))>; def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))), @@ -2704,9 +2724,15 @@ def FltToUIntLoad { def FltToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A))))); } +def FltToLongLoadP9 { + dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 iaddr:$A))))); +} def FltToULongLoad { dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A))))); } +def FltToULongLoadP9 { + dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 iaddr:$A))))); +} def FltToLong { dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A)))); } @@ -2728,9 +2754,15 @@ def DblToULong { def DblToIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A))))); } +def DblToIntLoadP9 { + dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load iaddr:$A))))); +} def DblToUIntLoad { dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A))))); } +def DblToUIntLoadP9 { + dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load iaddr:$A))))); +} def DblToLongLoad { dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A))))); } @@ -2898,17 +2930,17 @@ let AddedComplexity = 400 in { (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>; def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)), (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>; - def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)), + def : Pat<(v4i32 (scalar_to_vector DblToIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; - def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)), + def : Pat<(v4i32 (scalar_to_vector DblToUIntLoadP9.A)), (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>; - def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)), + def : Pat<(v2i64 (scalar_to_vector FltToLongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS (DFLOADf32 iaddr:$A), VSFRC)), 0))>; - def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)), + def : Pat<(v2i64 (scalar_to_vector FltToULongLoadP9.A)), (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS (DFLOADf32 iaddr:$A), VSFRC)), 0))>; diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp index 92ce8089c24..d02db9a617a 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp @@ -74,7 +74,7 @@ bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB, unsigned CCValid = MI.getOperand(3).getImm(); unsigned CCMask = MI.getOperand(4).getImm(); - LivePhysRegs LiveRegs(&TII->getRegisterInfo()); + LivePhysRegs LiveRegs(TII->getRegisterInfo()); LiveRegs.addLiveOuts(MBB); for (auto I = std::prev(MBB.end()); I != MBBI; --I) LiveRegs.stepBackward(*I); diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index a30bf34857b..b34c181124d 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -236,32 +236,30 @@ void SystemZInstrInfo::expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode, void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const { MachineBasicBlock *MBB = MI->getParent(); MachineFunction &MF = *MBB->getParent(); - const unsigned Reg = MI->getOperand(0).getReg(); + const unsigned Reg64 = MI->getOperand(0).getReg(); + const unsigned Reg32 = RI.getSubReg(Reg64, SystemZ::subreg_l32); - // Conveniently, all 4 instructions are cloned from LOAD_STACK_GUARD, - // so they already have operand 0 set to reg. + // EAR can only load the low subregister so us a shift for %a0 to produce + // the GR containing %a0 and %a1. // ear , %a0 - MachineInstr *Ear1MI = MF.CloneMachineInstr(MI); - MBB->insert(MI, Ear1MI); - Ear1MI->setDesc(get(SystemZ::EAR)); - MachineInstrBuilder(MF, Ear1MI).addReg(SystemZ::A0); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32) + .addReg(SystemZ::A0) + .addReg(Reg64, RegState::ImplicitDefine); // sllg , , 32 - MachineInstr *SllgMI = MF.CloneMachineInstr(MI); - MBB->insert(MI, SllgMI); - SllgMI->setDesc(get(SystemZ::SLLG)); - MachineInstrBuilder(MF, SllgMI).addReg(Reg).addReg(0).addImm(32); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::SLLG), Reg64) + .addReg(Reg64) + .addReg(0) + .addImm(32); // ear , %a1 - MachineInstr *Ear2MI = MF.CloneMachineInstr(MI); - MBB->insert(MI, Ear2MI); - Ear2MI->setDesc(get(SystemZ::EAR)); - MachineInstrBuilder(MF, Ear2MI).addReg(SystemZ::A1); + BuildMI(*MBB, MI, MI->getDebugLoc(), get(SystemZ::EAR), Reg32) + .addReg(SystemZ::A1); // lg , 40() MI->setDesc(get(SystemZ::LG)); - MachineInstrBuilder(MF, MI).addReg(Reg).addImm(40).addReg(0); + MachineInstrBuilder(MF, MI).addReg(Reg64).addImm(40).addReg(0); } // Emit a zero-extending move from 32-bit GPR SrcReg to 32-bit GPR diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h index 3766ed45b8c..ad597f5c65f 100644 --- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -55,6 +55,7 @@ public: unsigned getNumberOfRegisters(bool Vector); unsigned getRegisterBitWidth(bool Vector); + bool prefersVectorizedAddressing() { return false; } bool supportsEfficientVectorElementLoadStore() { return true; } bool enableInterleavedAccessVectorization() { return true; } diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 32ab475f118..e5d3209ec6a 100644 --- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1316,16 +1316,17 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { while (!Done) { bool UpdateLocLex = true; + AsmToken::TokenKind TK = getLexer().getKind(); // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an // identifier. Don't try an parse it as a register. - if (PrevTK != AsmToken::Error && Tok.getString().startswith(".")) + if (PrevTK != AsmToken::Error && Tok.getString().startswith(".") && + TK != AsmToken::Identifier) break; // If we're parsing an immediate expression, we don't expect a '['. if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) break; - AsmToken::TokenKind TK = getLexer().getKind(); switch (TK) { default: { if (SM.isValidEndState()) { diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td index 3a421fe7739..fe105298f5c 100644 --- a/contrib/llvm/lib/Target/X86/X86.td +++ b/contrib/llvm/lib/Target/X86/X86.td @@ -127,6 +127,9 @@ def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true", def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", "Enable AVX-512 Conflict Detection Instructions", [FeatureAVX512]>; +def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", + "true", "Enable AVX-512 Population Count Instructions", + [FeatureAVX512]>; def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", "Enable AVX-512 PreFetch Instructions", [FeatureAVX512]>; diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp index a5489b9aa8b..313920e02c3 100644 --- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -1655,8 +1655,8 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) { } void FPS::setKillFlags(MachineBasicBlock &MBB) const { - const TargetRegisterInfo *TRI = - MBB.getParent()->getSubtarget().getRegisterInfo(); + const TargetRegisterInfo &TRI = + *MBB.getParent()->getSubtarget().getRegisterInfo(); LivePhysRegs LPR(TRI); LPR.addLiveOuts(MBB); diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index 37b248416e4..86744b06413 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1364,6 +1364,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::MUL, MVT::v8i64, Legal); } + if (Subtarget.hasVPOPCNTDQ()) { + // VPOPCNTDQ sub-targets extend 128/256 vectors to use the avx512 + // version of popcntd/q. + for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v8i32, MVT::v4i64, + MVT::v4i32, MVT::v2i64}) + setOperationAction(ISD::CTPOP, VT, Legal); + } + // Custom lower several nodes. for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td index f9344413bbc..d8702693884 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2693,22 +2693,22 @@ multiclass avx512_load_vl opc, string OpcodeStr, } multiclass avx512_store opc, string OpcodeStr, X86VectorVTInfo _, - PatFrag st_frag, PatFrag mstore> { + PatFrag st_frag, PatFrag mstore, string Name> { let hasSideEffects = 0 in { def rr_REV : AVX512PI, EVEX; + [], _.ExeDomain>, EVEX, FoldGenData; def rrk_REV : AVX512PI, EVEX, EVEX_K; + [], _.ExeDomain>, EVEX, EVEX_K, FoldGenData; def rrkz_REV : AVX512PI, EVEX, EVEX_KZ; + [], _.ExeDomain>, EVEX, EVEX_KZ, FoldGenData; } def mr : AVX512PI opc, string OpcodeStr, X86VectorVTInfo _, multiclass avx512_store_vl< bits<8> opc, string OpcodeStr, - AVX512VLVectorVTInfo _, Predicate prd> { + AVX512VLVectorVTInfo _, Predicate prd, + string Name> { let Predicates = [prd] in defm Z : avx512_store, EVEX_V512; + masked_store_unaligned, Name#Z>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store, EVEX_V256; + masked_store_unaligned, Name#Z256>, EVEX_V256; defm Z128 : avx512_store, EVEX_V128; + masked_store_unaligned, Name#Z128>, EVEX_V128; } } multiclass avx512_alignedstore_vl opc, string OpcodeStr, - AVX512VLVectorVTInfo _, Predicate prd> { + AVX512VLVectorVTInfo _, Predicate prd, + string Name> { let Predicates = [prd] in defm Z : avx512_store, EVEX_V512; + masked_store_aligned512, Name#Z>, EVEX_V512; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_store, EVEX_V256; + masked_store_aligned256, Name#Z256>, EVEX_V256; defm Z128 : avx512_store, EVEX_V128; + masked_store_aligned128, Name#Z128>, EVEX_V128; } } defm VMOVAPS : avx512_alignedload_vl<0x28, "vmovaps", avx512vl_f32_info, HasAVX512>, avx512_alignedstore_vl<0x29, "vmovaps", avx512vl_f32_info, - HasAVX512>, PS, EVEX_CD8<32, CD8VF>; + HasAVX512, "VMOVAPS">, + PS, EVEX_CD8<32, CD8VF>; defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info, HasAVX512>, avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info, - HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; + HasAVX512, "VMOVAPD">, + PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512, null_frag>, - avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512>, + avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512, + "VMOVUPS">, PS, EVEX_CD8<32, CD8VF>; defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, null_frag>, - avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>, + avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512, + "VMOVUPD">, PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, - HasAVX512>, PD, EVEX_CD8<32, CD8VF>; + HasAVX512, "VMOVDQA32">, + PD, EVEX_CD8<32, CD8VF>; defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info, - HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>; + HasAVX512, "VMOVDQA64">, + PD, VEX_W, EVEX_CD8<64, CD8VF>; defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI>, - avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, - HasBWI>, XD, EVEX_CD8<8, CD8VF>; + avx512_store_vl<0x7F, "vmovdqu8", avx512vl_i8_info, + HasBWI, "VMOVDQU8">, + XD, EVEX_CD8<8, CD8VF>; defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>, avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, - HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>; + HasBWI, "VMOVDQU16">, + XD, VEX_W, EVEX_CD8<16, CD8VF>; defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512, null_frag>, avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info, - HasAVX512>, XS, EVEX_CD8<32, CD8VF>; + HasAVX512, "VMOVDQU32">, + XS, EVEX_CD8<32, CD8VF>; defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512, null_frag>, avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, - HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>; + HasAVX512, "VMOVDQU64">, + XS, VEX_W, EVEX_CD8<64, CD8VF>; // Special instructions to help with spilling when we don't have VLX. We need // to load or store from a ZMM register instead. These are converted in @@ -3354,17 +3366,52 @@ def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; -let hasSideEffects = 0 in -defm VMOVSSZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f32x_info, - (outs VR128X:$dst), (ins VR128X:$src1, FR32X:$src2), - "vmovss.s", "$src2, $src1", "$src1, $src2", []>, - XS, EVEX_4V, VEX_LIG; +let hasSideEffects = 0 in { + def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR32X:$src2), + "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [], NoItinerary>, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrr">; -let hasSideEffects = 0 in -defm VMOVSDZrr_REV : AVX512_maskable_in_asm<0x11, MRMDestReg, f64x_info, - (outs VR128X:$dst), (ins VR128X:$src1, FR64X:$src2), - "vmovsd.s", "$src2, $src1", "$src1, $src2", []>, - XD, EVEX_4V, VEX_LIG, VEX_W; +let Constraints = "$src0 = $dst" in + def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, + VR128X:$src1, FR32X:$src2), + "vmovss.s\t{$src2, $src1, $dst {${mask}}|"# + "$dst {${mask}}, $src1, $src2}", + [], NoItinerary>, EVEX_K, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrrk">; + + def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f32x_info.KRCWM:$mask, VR128X:$src1, FR32X:$src2), + "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# + "$dst {${mask}} {z}, $src1, $src2}", + [], NoItinerary>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, + FoldGenData<"VMOVSSZrrkz">; + + def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins VR128X:$src1, FR64X:$src2), + "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [], NoItinerary>, XD, EVEX_4V, VEX_LIG, VEX_W, + FoldGenData<"VMOVSDZrr">; + +let Constraints = "$src0 = $dst" in + def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, + VR128X:$src1, FR64X:$src2), + "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# + "$dst {${mask}}, $src1, $src2}", + [], NoItinerary>, EVEX_K, XD, EVEX_4V, VEX_LIG, + VEX_W, FoldGenData<"VMOVSDZrrk">; + + def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), + (ins f64x_info.KRCWM:$mask, VR128X:$src1, + FR64X:$src2), + "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# + "$dst {${mask}} {z}, $src1, $src2}", + [], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, + VEX_W, FoldGenData<"VMOVSDZrrkz">; +} let Predicates = [HasAVX512] in { let AddedComplexity = 15 in { @@ -8648,6 +8695,41 @@ let Predicates = [HasCDI, NoVLX] in { sub_xmm)>; } +//===---------------------------------------------------------------------===// +// Counts number of ones - VPOPCNTD and VPOPCNTQ +//===---------------------------------------------------------------------===// + +multiclass avx512_unary_rmb_popcnt opc, string OpcodeStr, X86VectorVTInfo VTInfo> { + let Predicates = [HasVPOPCNTDQ] in + defm Z : avx512_unary_rmb, EVEX_V512; +} + +// Use 512bit version to implement 128/256 bit. +multiclass avx512_unary_lowering { + let Predicates = [prd] in { + def Z256_Alt : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)), + (EXTRACT_SUBREG + (!cast(NAME # "Zrr") + (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), + _.info256.RC:$src1, + _.info256.SubRegIdx)), + _.info256.SubRegIdx)>; + + def Z128_Alt : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)), + (EXTRACT_SUBREG + (!cast(NAME # "Zrr") + (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)), + _.info128.RC:$src1, + _.info128.SubRegIdx)), + _.info128.SubRegIdx)>; + } +} + +defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", v16i32_info>, + avx512_unary_lowering; +defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", v8i64_info>, + avx512_unary_lowering, VEX_W; + //===---------------------------------------------------------------------===// // Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// @@ -8795,7 +8877,7 @@ multiclass avx512_extract_elt_w { def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst), (ins _.RC:$src1, u8imm:$src2), OpcodeStr#".s\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - EVEX, TAPD; + EVEX, TAPD, FoldGenData; defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD; } diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td index 66382014f6e..e38bbc9b3d3 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -964,10 +964,10 @@ multiclass ArithBinOp_RF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } // isConvertibleToThreeAddress } // isCommutable - def NAME#8rr_REV : BinOpRR_Rev; - def NAME#16rr_REV : BinOpRR_Rev; - def NAME#32rr_REV : BinOpRR_Rev; - def NAME#64rr_REV : BinOpRR_Rev; + def NAME#8rr_REV : BinOpRR_Rev, FoldGenData; + def NAME#16rr_REV : BinOpRR_Rev, FoldGenData; + def NAME#32rr_REV : BinOpRR_Rev, FoldGenData; + def NAME#64rr_REV : BinOpRR_Rev, FoldGenData; def NAME#8rm : BinOpRM_RF; def NAME#16rm : BinOpRM_RF; @@ -1049,10 +1049,10 @@ multiclass ArithBinOp_RFF BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } // isConvertibleToThreeAddress } // isCommutable - def NAME#8rr_REV : BinOpRR_RFF_Rev; - def NAME#16rr_REV : BinOpRR_RFF_Rev; - def NAME#32rr_REV : BinOpRR_RFF_Rev; - def NAME#64rr_REV : BinOpRR_RFF_Rev; + def NAME#8rr_REV : BinOpRR_RFF_Rev, FoldGenData; + def NAME#16rr_REV : BinOpRR_RFF_Rev, FoldGenData; + def NAME#32rr_REV : BinOpRR_RFF_Rev, FoldGenData; + def NAME#64rr_REV : BinOpRR_RFF_Rev, FoldGenData; def NAME#8rm : BinOpRM_RFF; def NAME#16rm : BinOpRM_RFF; @@ -1129,10 +1129,10 @@ multiclass ArithBinOp_F BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4, } } // isCommutable - def NAME#8rr_REV : BinOpRR_F_Rev; - def NAME#16rr_REV : BinOpRR_F_Rev; - def NAME#32rr_REV : BinOpRR_F_Rev; - def NAME#64rr_REV : BinOpRR_F_Rev; + def NAME#8rr_REV : BinOpRR_F_Rev, FoldGenData; + def NAME#16rr_REV : BinOpRR_F_Rev, FoldGenData; + def NAME#32rr_REV : BinOpRR_F_Rev, FoldGenData; + def NAME#64rr_REV : BinOpRR_F_Rev, FoldGenData; def NAME#8rm : BinOpRM_F; def NAME#16rm : BinOpRM_F; diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm/lib/Target/X86/X86InstrFMA.td index 1941ae57f0f..3a3cdc9fa57 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFMA.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFMA.td @@ -297,7 +297,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, - VEX_LIG; + VEX_LIG, FoldGenData; } multiclass fma4s_int opc, string OpcodeStr, Operand memop, @@ -321,6 +321,12 @@ let isCodeGenOnly = 1 in { "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), [(set VR128:$dst, (Int VR128:$src1, mem_cpat:$src2, VR128:$src3))]>, VEX_LIG; +let hasSideEffects = 0 in + def rr_Int_REV : FMA4, VEX_LIG, FoldGenData; } // isCodeGenOnly = 1 } @@ -372,12 +378,13 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def rr_REV : FMA4; + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, + FoldGenData; def Yrr_REV : FMA4, - VEX_L; + VEX_L, FoldGenData; } // isCodeGenOnly = 1 } diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td index c2fe786732d..bfcbf71d252 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td +++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td @@ -225,6 +225,12 @@ class Has3DNow0F0FOpcode { bit has3DNow0F0FOpcode = 1; } class XOP { Encoding OpEnc = EncXOP; } class XOP_4V : XOP { bit hasVEX_4V = 1; } +// Specify the alternative register form instruction to replace the current +// instruction in case it was picked during generation of memory folding tables +class FoldGenData { + string FoldGenRegForm = _RegisterForm; +} + class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, InstrItinClass itin, @@ -304,6 +310,10 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, CD8_EltSize, !srl(VectSize, CD8_Form{1-0}))), 0); + // Used in the memory folding generation (TableGen backend) to point to an alternative + // instruction to replace the current one in case it got picked during generation. + string FoldGenRegForm = ?; + // TSFlags layout should be kept in sync with X86BaseInfo.h. let TSFlags{6-0} = FormBits; let TSFlags{8-7} = OpSizeBits; diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp index f7083a7448c..33fbd41bb63 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -121,172 +121,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) (STI.is64Bit() ? X86::RETQ : X86::RETL)), Subtarget(STI), RI(STI.getTargetTriple()) { - static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = { - { X86::ADC32ri, X86::ADC32mi, 0 }, - { X86::ADC32ri8, X86::ADC32mi8, 0 }, - { X86::ADC32rr, X86::ADC32mr, 0 }, - { X86::ADC64ri32, X86::ADC64mi32, 0 }, - { X86::ADC64ri8, X86::ADC64mi8, 0 }, - { X86::ADC64rr, X86::ADC64mr, 0 }, - { X86::ADD16ri, X86::ADD16mi, 0 }, - { X86::ADD16ri8, X86::ADD16mi8, 0 }, - { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE }, - { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE }, - { X86::ADD16rr, X86::ADD16mr, 0 }, - { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE }, - { X86::ADD32ri, X86::ADD32mi, 0 }, - { X86::ADD32ri8, X86::ADD32mi8, 0 }, - { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE }, - { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE }, - { X86::ADD32rr, X86::ADD32mr, 0 }, - { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE }, - { X86::ADD64ri32, X86::ADD64mi32, 0 }, - { X86::ADD64ri8, X86::ADD64mi8, 0 }, - { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE }, - { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE }, - { X86::ADD64rr, X86::ADD64mr, 0 }, - { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE }, - { X86::ADD8ri, X86::ADD8mi, 0 }, - { X86::ADD8rr, X86::ADD8mr, 0 }, - { X86::AND16ri, X86::AND16mi, 0 }, - { X86::AND16ri8, X86::AND16mi8, 0 }, - { X86::AND16rr, X86::AND16mr, 0 }, - { X86::AND32ri, X86::AND32mi, 0 }, - { X86::AND32ri8, X86::AND32mi8, 0 }, - { X86::AND32rr, X86::AND32mr, 0 }, - { X86::AND64ri32, X86::AND64mi32, 0 }, - { X86::AND64ri8, X86::AND64mi8, 0 }, - { X86::AND64rr, X86::AND64mr, 0 }, - { X86::AND8ri, X86::AND8mi, 0 }, - { X86::AND8rr, X86::AND8mr, 0 }, - { X86::DEC16r, X86::DEC16m, 0 }, - { X86::DEC32r, X86::DEC32m, 0 }, - { X86::DEC64r, X86::DEC64m, 0 }, - { X86::DEC8r, X86::DEC8m, 0 }, - { X86::INC16r, X86::INC16m, 0 }, - { X86::INC32r, X86::INC32m, 0 }, - { X86::INC64r, X86::INC64m, 0 }, - { X86::INC8r, X86::INC8m, 0 }, - { X86::NEG16r, X86::NEG16m, 0 }, - { X86::NEG32r, X86::NEG32m, 0 }, - { X86::NEG64r, X86::NEG64m, 0 }, - { X86::NEG8r, X86::NEG8m, 0 }, - { X86::NOT16r, X86::NOT16m, 0 }, - { X86::NOT32r, X86::NOT32m, 0 }, - { X86::NOT64r, X86::NOT64m, 0 }, - { X86::NOT8r, X86::NOT8m, 0 }, - { X86::OR16ri, X86::OR16mi, 0 }, - { X86::OR16ri8, X86::OR16mi8, 0 }, - { X86::OR16rr, X86::OR16mr, 0 }, - { X86::OR32ri, X86::OR32mi, 0 }, - { X86::OR32ri8, X86::OR32mi8, 0 }, - { X86::OR32rr, X86::OR32mr, 0 }, - { X86::OR64ri32, X86::OR64mi32, 0 }, - { X86::OR64ri8, X86::OR64mi8, 0 }, - { X86::OR64rr, X86::OR64mr, 0 }, - { X86::OR8ri, X86::OR8mi, 0 }, - { X86::OR8rr, X86::OR8mr, 0 }, - { X86::ROL16r1, X86::ROL16m1, 0 }, - { X86::ROL16rCL, X86::ROL16mCL, 0 }, - { X86::ROL16ri, X86::ROL16mi, 0 }, - { X86::ROL32r1, X86::ROL32m1, 0 }, - { X86::ROL32rCL, X86::ROL32mCL, 0 }, - { X86::ROL32ri, X86::ROL32mi, 0 }, - { X86::ROL64r1, X86::ROL64m1, 0 }, - { X86::ROL64rCL, X86::ROL64mCL, 0 }, - { X86::ROL64ri, X86::ROL64mi, 0 }, - { X86::ROL8r1, X86::ROL8m1, 0 }, - { X86::ROL8rCL, X86::ROL8mCL, 0 }, - { X86::ROL8ri, X86::ROL8mi, 0 }, - { X86::ROR16r1, X86::ROR16m1, 0 }, - { X86::ROR16rCL, X86::ROR16mCL, 0 }, - { X86::ROR16ri, X86::ROR16mi, 0 }, - { X86::ROR32r1, X86::ROR32m1, 0 }, - { X86::ROR32rCL, X86::ROR32mCL, 0 }, - { X86::ROR32ri, X86::ROR32mi, 0 }, - { X86::ROR64r1, X86::ROR64m1, 0 }, - { X86::ROR64rCL, X86::ROR64mCL, 0 }, - { X86::ROR64ri, X86::ROR64mi, 0 }, - { X86::ROR8r1, X86::ROR8m1, 0 }, - { X86::ROR8rCL, X86::ROR8mCL, 0 }, - { X86::ROR8ri, X86::ROR8mi, 0 }, - { X86::SAR16r1, X86::SAR16m1, 0 }, - { X86::SAR16rCL, X86::SAR16mCL, 0 }, - { X86::SAR16ri, X86::SAR16mi, 0 }, - { X86::SAR32r1, X86::SAR32m1, 0 }, - { X86::SAR32rCL, X86::SAR32mCL, 0 }, - { X86::SAR32ri, X86::SAR32mi, 0 }, - { X86::SAR64r1, X86::SAR64m1, 0 }, - { X86::SAR64rCL, X86::SAR64mCL, 0 }, - { X86::SAR64ri, X86::SAR64mi, 0 }, - { X86::SAR8r1, X86::SAR8m1, 0 }, - { X86::SAR8rCL, X86::SAR8mCL, 0 }, - { X86::SAR8ri, X86::SAR8mi, 0 }, - { X86::SBB32ri, X86::SBB32mi, 0 }, - { X86::SBB32ri8, X86::SBB32mi8, 0 }, - { X86::SBB32rr, X86::SBB32mr, 0 }, - { X86::SBB64ri32, X86::SBB64mi32, 0 }, - { X86::SBB64ri8, X86::SBB64mi8, 0 }, - { X86::SBB64rr, X86::SBB64mr, 0 }, - { X86::SHL16r1, X86::SHL16m1, 0 }, - { X86::SHL16rCL, X86::SHL16mCL, 0 }, - { X86::SHL16ri, X86::SHL16mi, 0 }, - { X86::SHL32r1, X86::SHL32m1, 0 }, - { X86::SHL32rCL, X86::SHL32mCL, 0 }, - { X86::SHL32ri, X86::SHL32mi, 0 }, - { X86::SHL64r1, X86::SHL64m1, 0 }, - { X86::SHL64rCL, X86::SHL64mCL, 0 }, - { X86::SHL64ri, X86::SHL64mi, 0 }, - { X86::SHL8r1, X86::SHL8m1, 0 }, - { X86::SHL8rCL, X86::SHL8mCL, 0 }, - { X86::SHL8ri, X86::SHL8mi, 0 }, - { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 }, - { X86::SHLD16rri8, X86::SHLD16mri8, 0 }, - { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 }, - { X86::SHLD32rri8, X86::SHLD32mri8, 0 }, - { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 }, - { X86::SHLD64rri8, X86::SHLD64mri8, 0 }, - { X86::SHR16r1, X86::SHR16m1, 0 }, - { X86::SHR16rCL, X86::SHR16mCL, 0 }, - { X86::SHR16ri, X86::SHR16mi, 0 }, - { X86::SHR32r1, X86::SHR32m1, 0 }, - { X86::SHR32rCL, X86::SHR32mCL, 0 }, - { X86::SHR32ri, X86::SHR32mi, 0 }, - { X86::SHR64r1, X86::SHR64m1, 0 }, - { X86::SHR64rCL, X86::SHR64mCL, 0 }, - { X86::SHR64ri, X86::SHR64mi, 0 }, - { X86::SHR8r1, X86::SHR8m1, 0 }, - { X86::SHR8rCL, X86::SHR8mCL, 0 }, - { X86::SHR8ri, X86::SHR8mi, 0 }, - { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 }, - { X86::SHRD16rri8, X86::SHRD16mri8, 0 }, - { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 }, - { X86::SHRD32rri8, X86::SHRD32mri8, 0 }, - { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 }, - { X86::SHRD64rri8, X86::SHRD64mri8, 0 }, - { X86::SUB16ri, X86::SUB16mi, 0 }, - { X86::SUB16ri8, X86::SUB16mi8, 0 }, - { X86::SUB16rr, X86::SUB16mr, 0 }, - { X86::SUB32ri, X86::SUB32mi, 0 }, - { X86::SUB32ri8, X86::SUB32mi8, 0 }, - { X86::SUB32rr, X86::SUB32mr, 0 }, - { X86::SUB64ri32, X86::SUB64mi32, 0 }, - { X86::SUB64ri8, X86::SUB64mi8, 0 }, - { X86::SUB64rr, X86::SUB64mr, 0 }, - { X86::SUB8ri, X86::SUB8mi, 0 }, - { X86::SUB8rr, X86::SUB8mr, 0 }, - { X86::XOR16ri, X86::XOR16mi, 0 }, - { X86::XOR16ri8, X86::XOR16mi8, 0 }, - { X86::XOR16rr, X86::XOR16mr, 0 }, - { X86::XOR32ri, X86::XOR32mi, 0 }, - { X86::XOR32ri8, X86::XOR32mi8, 0 }, - { X86::XOR32rr, X86::XOR32mr, 0 }, - { X86::XOR64ri32, X86::XOR64mi32, 0 }, - { X86::XOR64ri8, X86::XOR64mi8, 0 }, - { X86::XOR64rr, X86::XOR64mr, 0 }, - { X86::XOR8ri, X86::XOR8mi, 0 }, - { X86::XOR8rr, X86::XOR8mr, 0 } - }; +// Generated memory folding tables. +#include "X86GenFoldTables.inc" for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2Addr) { AddTableEntry(RegOp2MemOpTable2Addr, MemOp2RegOpTable, @@ -295,744 +131,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) Entry.Flags | TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE); } - static const X86MemoryFoldTableEntry MemoryFoldTable0[] = { - { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD }, - { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD }, - { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD }, - { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD }, - { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD }, - { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD }, - { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD }, - { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD }, - { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD }, - { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD }, - { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD }, - { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD }, - { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD }, - { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD }, - { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD }, - { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD }, - { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD }, - { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD }, - { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD }, - { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD }, - { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE }, - { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD }, - { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD }, - { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD }, - { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD }, - { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD }, - { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD }, - { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD }, - { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD }, - { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD }, - { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD }, - { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE }, - { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE }, - { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE }, - { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE }, - { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE }, - { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE }, - { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE }, - { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE }, - { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE }, - { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 }, - { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, - { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 }, - { X86::MOVDQUrr, X86::MOVDQUmr, TB_FOLDED_STORE }, - { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE }, - { X86::MOVPQIto64rr,X86::MOVPQI2QImr, TB_FOLDED_STORE }, - { X86::MOVSDto64rr, X86::MOVSDto64mr, TB_FOLDED_STORE }, - { X86::MOVSS2DIrr, X86::MOVSS2DImr, TB_FOLDED_STORE }, - { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE }, - { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE }, - { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD }, - { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD }, - { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD }, - { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD }, - { X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE }, - { X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE }, - { X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD }, - { X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD }, - { X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD }, - { X86::SETAEr, X86::SETAEm, TB_FOLDED_STORE }, - { X86::SETAr, X86::SETAm, TB_FOLDED_STORE }, - { X86::SETBEr, X86::SETBEm, TB_FOLDED_STORE }, - { X86::SETBr, X86::SETBm, TB_FOLDED_STORE }, - { X86::SETEr, X86::SETEm, TB_FOLDED_STORE }, - { X86::SETGEr, X86::SETGEm, TB_FOLDED_STORE }, - { X86::SETGr, X86::SETGm, TB_FOLDED_STORE }, - { X86::SETLEr, X86::SETLEm, TB_FOLDED_STORE }, - { X86::SETLr, X86::SETLm, TB_FOLDED_STORE }, - { X86::SETNEr, X86::SETNEm, TB_FOLDED_STORE }, - { X86::SETNOr, X86::SETNOm, TB_FOLDED_STORE }, - { X86::SETNPr, X86::SETNPm, TB_FOLDED_STORE }, - { X86::SETNSr, X86::SETNSm, TB_FOLDED_STORE }, - { X86::SETOr, X86::SETOm, TB_FOLDED_STORE }, - { X86::SETPr, X86::SETPm, TB_FOLDED_STORE }, - { X86::SETSr, X86::SETSm, TB_FOLDED_STORE }, - { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD }, - { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD }, - { X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD }, - { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD }, - { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD }, - { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD }, - { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD }, - - // AVX 128-bit versions of foldable instructions - { X86::VEXTRACTPSrr,X86::VEXTRACTPSmr, TB_FOLDED_STORE }, - { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, - { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 }, - { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 }, - { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 }, - { X86::VMOVDQUrr, X86::VMOVDQUmr, TB_FOLDED_STORE }, - { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr, TB_FOLDED_STORE }, - { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE }, - { X86::VMOVSDto64rr,X86::VMOVSDto64mr, TB_FOLDED_STORE }, - { X86::VMOVSS2DIrr, X86::VMOVSS2DImr, TB_FOLDED_STORE }, - { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE }, - { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE }, - { X86::VPEXTRDrr, X86::VPEXTRDmr, TB_FOLDED_STORE }, - { X86::VPEXTRQrr, X86::VPEXTRQmr, TB_FOLDED_STORE }, - - // AVX 256-bit foldable instructions - { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, - { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, - { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, - { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 }, - { X86::VMOVDQUYrr, X86::VMOVDQUYmr, TB_FOLDED_STORE }, - { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE }, - { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE }, - - // AVX-512 foldable instructions - { X86::VEXTRACTF32x4Zrr,X86::VEXTRACTF32x4Zmr, TB_FOLDED_STORE }, - { X86::VEXTRACTF32x8Zrr,X86::VEXTRACTF32x8Zmr, TB_FOLDED_STORE }, - { X86::VEXTRACTF64x2Zrr,X86::VEXTRACTF64x2Zmr, TB_FOLDED_STORE }, - { X86::VEXTRACTF64x4Zrr,X86::VEXTRACTF64x4Zmr, TB_FOLDED_STORE }, - { X86::VEXTRACTI32x4Zrr,X86::VEXTRACTI32x4Zmr, TB_FOLDED_STORE }, - { X86::VEXTRACTI32x8Zrr,X86::VEXTRACTI32x8Zmr, TB_FOLDED_STORE }, - { X86::VEXTRACTI64x2Zrr,X86::VEXTRACTI64x2Zmr, TB_FOLDED_STORE }, - { X86::VEXTRACTI64x4Zrr,X86::VEXTRACTI64x4Zmr, TB_FOLDED_STORE }, - { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZmr, TB_FOLDED_STORE }, - { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 }, - { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 }, - { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 }, - { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 }, - { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE }, - { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE }, - { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE }, - { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE }, - { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE }, - { X86::VMOVPQIto64Zrr, X86::VMOVPQI2QIZmr, TB_FOLDED_STORE }, - { X86::VMOVSDto64Zrr, X86::VMOVSDto64Zmr, TB_FOLDED_STORE }, - { X86::VMOVSS2DIZrr, X86::VMOVSS2DIZmr, TB_FOLDED_STORE }, - { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE }, - { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE }, - { X86::VPEXTRDZrr, X86::VPEXTRDZmr, TB_FOLDED_STORE }, - { X86::VPEXTRQZrr, X86::VPEXTRQZmr, TB_FOLDED_STORE }, - { X86::VPMOVDBZrr, X86::VPMOVDBZmr, TB_FOLDED_STORE }, - { X86::VPMOVDWZrr, X86::VPMOVDWZmr, TB_FOLDED_STORE }, - { X86::VPMOVQDZrr, X86::VPMOVQDZmr, TB_FOLDED_STORE }, - { X86::VPMOVQWZrr, X86::VPMOVQWZmr, TB_FOLDED_STORE }, - { X86::VPMOVWBZrr, X86::VPMOVWBZmr, TB_FOLDED_STORE }, - { X86::VPMOVSDBZrr, X86::VPMOVSDBZmr, TB_FOLDED_STORE }, - { X86::VPMOVSDWZrr, X86::VPMOVSDWZmr, TB_FOLDED_STORE }, - { X86::VPMOVSQDZrr, X86::VPMOVSQDZmr, TB_FOLDED_STORE }, - { X86::VPMOVSQWZrr, X86::VPMOVSQWZmr, TB_FOLDED_STORE }, - { X86::VPMOVSWBZrr, X86::VPMOVSWBZmr, TB_FOLDED_STORE }, - { X86::VPMOVUSDBZrr, X86::VPMOVUSDBZmr, TB_FOLDED_STORE }, - { X86::VPMOVUSDWZrr, X86::VPMOVUSDWZmr, TB_FOLDED_STORE }, - { X86::VPMOVUSQDZrr, X86::VPMOVUSQDZmr, TB_FOLDED_STORE }, - { X86::VPMOVUSQWZrr, X86::VPMOVUSQWZmr, TB_FOLDED_STORE }, - { X86::VPMOVUSWBZrr, X86::VPMOVUSWBZmr, TB_FOLDED_STORE }, - - // AVX-512 foldable instructions (256-bit versions) - { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256mr, TB_FOLDED_STORE }, - { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256mr, TB_FOLDED_STORE }, - { X86::VEXTRACTI32x4Z256rr,X86::VEXTRACTI32x4Z256mr, TB_FOLDED_STORE }, - { X86::VEXTRACTI64x2Z256rr,X86::VEXTRACTI64x2Z256mr, TB_FOLDED_STORE }, - { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, - { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, - { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, - { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 }, - { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE }, - { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE }, - { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE }, - { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE }, - { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE }, - { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE }, - { X86::VPMOVDWZ256rr, X86::VPMOVDWZ256mr, TB_FOLDED_STORE }, - { X86::VPMOVQDZ256rr, X86::VPMOVQDZ256mr, TB_FOLDED_STORE }, - { X86::VPMOVWBZ256rr, X86::VPMOVWBZ256mr, TB_FOLDED_STORE }, - { X86::VPMOVSDWZ256rr, X86::VPMOVSDWZ256mr, TB_FOLDED_STORE }, - { X86::VPMOVSQDZ256rr, X86::VPMOVSQDZ256mr, TB_FOLDED_STORE }, - { X86::VPMOVSWBZ256rr, X86::VPMOVSWBZ256mr, TB_FOLDED_STORE }, - { X86::VPMOVUSDWZ256rr, X86::VPMOVUSDWZ256mr, TB_FOLDED_STORE }, - { X86::VPMOVUSQDZ256rr, X86::VPMOVUSQDZ256mr, TB_FOLDED_STORE }, - { X86::VPMOVUSWBZ256rr, X86::VPMOVUSWBZ256mr, TB_FOLDED_STORE }, - - // AVX-512 foldable instructions (128-bit versions) - { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, - { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, - { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, - { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 }, - { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE }, - { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE }, - { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE }, - { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE }, - { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE }, - { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE }, - - // F16C foldable instructions - { X86::VCVTPS2PHrr, X86::VCVTPS2PHmr, TB_FOLDED_STORE }, - { X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE } - }; - for (X86MemoryFoldTableEntry Entry : MemoryFoldTable0) { AddTableEntry(RegOp2MemOpTable0, MemOp2RegOpTable, Entry.RegOp, Entry.MemOp, TB_INDEX_0 | Entry.Flags); } - static const X86MemoryFoldTableEntry MemoryFoldTable1[] = { - { X86::BSF16rr, X86::BSF16rm, 0 }, - { X86::BSF32rr, X86::BSF32rm, 0 }, - { X86::BSF64rr, X86::BSF64rm, 0 }, - { X86::BSR16rr, X86::BSR16rm, 0 }, - { X86::BSR32rr, X86::BSR32rm, 0 }, - { X86::BSR64rr, X86::BSR64rm, 0 }, - { X86::CMP16rr, X86::CMP16rm, 0 }, - { X86::CMP32rr, X86::CMP32rm, 0 }, - { X86::CMP64rr, X86::CMP64rm, 0 }, - { X86::CMP8rr, X86::CMP8rm, 0 }, - { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, - { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, - { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, - { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, - { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, - { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, - { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, - { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, - { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, - { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, - { X86::IMUL16rri, X86::IMUL16rmi, 0 }, - { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, - { X86::IMUL32rri, X86::IMUL32rmi, 0 }, - { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, - { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, - { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, - { X86::Int_COMISDrr, X86::Int_COMISDrm, TB_NO_REVERSE }, - { X86::Int_COMISSrr, X86::Int_COMISSrm, TB_NO_REVERSE }, - { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, TB_NO_REVERSE }, - { X86::CVTSD2SIrr, X86::CVTSD2SIrm, TB_NO_REVERSE }, - { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, TB_NO_REVERSE }, - { X86::CVTSS2SIrr, X86::CVTSS2SIrm, TB_NO_REVERSE }, - { X86::CVTDQ2PDrr, X86::CVTDQ2PDrm, TB_NO_REVERSE }, - { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 }, - { X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 }, - { X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 }, - { X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 }, - { X86::CVTPS2PDrr, X86::CVTPS2PDrm, TB_NO_REVERSE }, - { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 }, - { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 }, - { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, TB_NO_REVERSE }, - { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, TB_NO_REVERSE }, - { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, TB_NO_REVERSE }, - { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, TB_NO_REVERSE }, - { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, TB_NO_REVERSE }, - { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, TB_NO_REVERSE }, - { X86::MOV16rr, X86::MOV16rm, 0 }, - { X86::MOV32rr, X86::MOV32rm, 0 }, - { X86::MOV64rr, X86::MOV64rm, 0 }, - { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, - { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, - { X86::MOV8rr, X86::MOV8rm, 0 }, - { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 }, - { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 }, - { X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE }, - { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, - { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, - { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 }, - { X86::MOVDQUrr, X86::MOVDQUrm, 0 }, - { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 }, - { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 }, - { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, - { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, - { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, - { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, - { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, - { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, - { X86::MOVUPDrr, X86::MOVUPDrm, 0 }, - { X86::MOVUPSrr, X86::MOVUPSrm, 0 }, - { X86::MOVZPQILo2PQIrr, X86::MOVQI2PQIrm, TB_NO_REVERSE }, - { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, - { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, - { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, - { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, - { X86::PABSBrr, X86::PABSBrm, TB_ALIGN_16 }, - { X86::PABSDrr, X86::PABSDrm, TB_ALIGN_16 }, - { X86::PABSWrr, X86::PABSWrm, TB_ALIGN_16 }, - { X86::PCMPESTRIrr, X86::PCMPESTRIrm, TB_ALIGN_16 }, - { X86::PCMPESTRM128rr, X86::PCMPESTRM128rm, TB_ALIGN_16 }, - { X86::PCMPISTRIrr, X86::PCMPISTRIrm, TB_ALIGN_16 }, - { X86::PCMPISTRM128rr, X86::PCMPISTRM128rm, TB_ALIGN_16 }, - { X86::PHMINPOSUWrr128, X86::PHMINPOSUWrm128, TB_ALIGN_16 }, - { X86::PMOVSXBDrr, X86::PMOVSXBDrm, TB_NO_REVERSE }, - { X86::PMOVSXBQrr, X86::PMOVSXBQrm, TB_NO_REVERSE }, - { X86::PMOVSXBWrr, X86::PMOVSXBWrm, TB_NO_REVERSE }, - { X86::PMOVSXDQrr, X86::PMOVSXDQrm, TB_NO_REVERSE }, - { X86::PMOVSXWDrr, X86::PMOVSXWDrm, TB_NO_REVERSE }, - { X86::PMOVSXWQrr, X86::PMOVSXWQrm, TB_NO_REVERSE }, - { X86::PMOVZXBDrr, X86::PMOVZXBDrm, TB_NO_REVERSE }, - { X86::PMOVZXBQrr, X86::PMOVZXBQrm, TB_NO_REVERSE }, - { X86::PMOVZXBWrr, X86::PMOVZXBWrm, TB_NO_REVERSE }, - { X86::PMOVZXDQrr, X86::PMOVZXDQrm, TB_NO_REVERSE }, - { X86::PMOVZXWDrr, X86::PMOVZXWDrm, TB_NO_REVERSE }, - { X86::PMOVZXWQrr, X86::PMOVZXWQrm, TB_NO_REVERSE }, - { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 }, - { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 }, - { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 }, - { X86::PTESTrr, X86::PTESTrm, TB_ALIGN_16 }, - { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 }, - { X86::RCPSSr, X86::RCPSSm, 0 }, - { X86::RCPSSr_Int, X86::RCPSSm_Int, TB_NO_REVERSE }, - { X86::ROUNDPDr, X86::ROUNDPDm, TB_ALIGN_16 }, - { X86::ROUNDPSr, X86::ROUNDPSm, TB_ALIGN_16 }, - { X86::ROUNDSDr, X86::ROUNDSDm, 0 }, - { X86::ROUNDSSr, X86::ROUNDSSm, 0 }, - { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 }, - { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, - { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, TB_NO_REVERSE }, - { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 }, - { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 }, - { X86::SQRTSDr, X86::SQRTSDm, 0 }, - { X86::SQRTSDr_Int, X86::SQRTSDm_Int, TB_NO_REVERSE }, - { X86::SQRTSSr, X86::SQRTSSm, 0 }, - { X86::SQRTSSr_Int, X86::SQRTSSm_Int, TB_NO_REVERSE }, - { X86::TEST16rr, X86::TEST16rm, 0 }, - { X86::TEST32rr, X86::TEST32rm, 0 }, - { X86::TEST64rr, X86::TEST64rm, 0 }, - { X86::TEST8rr, X86::TEST8rm, 0 }, - // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 - { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, - { X86::UCOMISSrr, X86::UCOMISSrm, 0 }, - - // MMX version of foldable instructions - { X86::MMX_CVTPD2PIirr, X86::MMX_CVTPD2PIirm, 0 }, - { X86::MMX_CVTPI2PDirr, X86::MMX_CVTPI2PDirm, 0 }, - { X86::MMX_CVTPS2PIirr, X86::MMX_CVTPS2PIirm, 0 }, - { X86::MMX_CVTTPD2PIirr, X86::MMX_CVTTPD2PIirm, 0 }, - { X86::MMX_CVTTPS2PIirr, X86::MMX_CVTTPS2PIirm, 0 }, - { X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 }, - { X86::MMX_PABSBrr64, X86::MMX_PABSBrm64, 0 }, - { X86::MMX_PABSDrr64, X86::MMX_PABSDrm64, 0 }, - { X86::MMX_PABSWrr64, X86::MMX_PABSWrm64, 0 }, - { X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 }, - - // 3DNow! version of foldable instructions - { X86::PF2IDrr, X86::PF2IDrm, 0 }, - { X86::PF2IWrr, X86::PF2IWrm, 0 }, - { X86::PFRCPrr, X86::PFRCPrm, 0 }, - { X86::PFRSQRTrr, X86::PFRSQRTrm, 0 }, - { X86::PI2FDrr, X86::PI2FDrm, 0 }, - { X86::PI2FWrr, X86::PI2FWrm, 0 }, - { X86::PSWAPDrr, X86::PSWAPDrm, 0 }, - - // AVX 128-bit versions of foldable instructions - { X86::Int_VCOMISDrr, X86::Int_VCOMISDrm, TB_NO_REVERSE }, - { X86::Int_VCOMISSrr, X86::Int_VCOMISSrm, TB_NO_REVERSE }, - { X86::Int_VUCOMISDrr, X86::Int_VUCOMISDrm, TB_NO_REVERSE }, - { X86::Int_VUCOMISSrr, X86::Int_VUCOMISSrm, TB_NO_REVERSE }, - { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 }, - { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,TB_NO_REVERSE }, - { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 }, - { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm, TB_NO_REVERSE }, - { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 }, - { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,TB_NO_REVERSE }, - { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 }, - { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm, TB_NO_REVERSE }, - { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, TB_NO_REVERSE }, - { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, TB_NO_REVERSE }, - { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, TB_NO_REVERSE }, - { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, TB_NO_REVERSE }, - { X86::VCVTDQ2PDrr, X86::VCVTDQ2PDrm, TB_NO_REVERSE }, - { X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 }, - { X86::VCVTPD2DQrr, X86::VCVTPD2DQrm, 0 }, - { X86::VCVTPD2PSrr, X86::VCVTPD2PSrm, 0 }, - { X86::VCVTPS2DQrr, X86::VCVTPS2DQrm, 0 }, - { X86::VCVTPS2PDrr, X86::VCVTPS2PDrm, TB_NO_REVERSE }, - { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, 0 }, - { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 }, - { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, 0 }, - { X86::VMOV64toSDrr, X86::VMOV64toSDrm, 0 }, - { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 }, - { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 }, - { X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE }, - { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 }, - { X86::VMOVDI2SSrr, X86::VMOVDI2SSrm, 0 }, - { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 }, - { X86::VMOVDQUrr, X86::VMOVDQUrm, 0 }, - { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, 0 }, - { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, 0 }, - { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 }, - { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 }, - { X86::VMOVZPQILo2PQIrr,X86::VMOVQI2PQIrm, TB_NO_REVERSE }, - { X86::VPABSBrr, X86::VPABSBrm, 0 }, - { X86::VPABSDrr, X86::VPABSDrm, 0 }, - { X86::VPABSWrr, X86::VPABSWrm, 0 }, - { X86::VPCMPESTRIrr, X86::VPCMPESTRIrm, 0 }, - { X86::VPCMPESTRM128rr, X86::VPCMPESTRM128rm, 0 }, - { X86::VPCMPISTRIrr, X86::VPCMPISTRIrm, 0 }, - { X86::VPCMPISTRM128rr, X86::VPCMPISTRM128rm, 0 }, - { X86::VPHMINPOSUWrr128, X86::VPHMINPOSUWrm128, 0 }, - { X86::VPERMILPDri, X86::VPERMILPDmi, 0 }, - { X86::VPERMILPSri, X86::VPERMILPSmi, 0 }, - { X86::VPMOVSXBDrr, X86::VPMOVSXBDrm, TB_NO_REVERSE }, - { X86::VPMOVSXBQrr, X86::VPMOVSXBQrm, TB_NO_REVERSE }, - { X86::VPMOVSXBWrr, X86::VPMOVSXBWrm, TB_NO_REVERSE }, - { X86::VPMOVSXDQrr, X86::VPMOVSXDQrm, TB_NO_REVERSE }, - { X86::VPMOVSXWDrr, X86::VPMOVSXWDrm, TB_NO_REVERSE }, - { X86::VPMOVSXWQrr, X86::VPMOVSXWQrm, TB_NO_REVERSE }, - { X86::VPMOVZXBDrr, X86::VPMOVZXBDrm, TB_NO_REVERSE }, - { X86::VPMOVZXBQrr, X86::VPMOVZXBQrm, TB_NO_REVERSE }, - { X86::VPMOVZXBWrr, X86::VPMOVZXBWrm, TB_NO_REVERSE }, - { X86::VPMOVZXDQrr, X86::VPMOVZXDQrm, TB_NO_REVERSE }, - { X86::VPMOVZXWDrr, X86::VPMOVZXWDrm, TB_NO_REVERSE }, - { X86::VPMOVZXWQrr, X86::VPMOVZXWQrm, TB_NO_REVERSE }, - { X86::VPSHUFDri, X86::VPSHUFDmi, 0 }, - { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 }, - { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 }, - { X86::VPTESTrr, X86::VPTESTrm, 0 }, - { X86::VRCPPSr, X86::VRCPPSm, 0 }, - { X86::VROUNDPDr, X86::VROUNDPDm, 0 }, - { X86::VROUNDPSr, X86::VROUNDPSm, 0 }, - { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 }, - { X86::VSQRTPDr, X86::VSQRTPDm, 0 }, - { X86::VSQRTPSr, X86::VSQRTPSm, 0 }, - { X86::VTESTPDrr, X86::VTESTPDrm, 0 }, - { X86::VTESTPSrr, X86::VTESTPSrm, 0 }, - { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 }, - { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 }, - - // AVX 256-bit foldable instructions - { X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, TB_NO_REVERSE }, - { X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 }, - { X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 }, - { X86::VCVTPD2PSYrr, X86::VCVTPD2PSYrm, 0 }, - { X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 }, - { X86::VCVTPS2PDYrr, X86::VCVTPS2PDYrm, TB_NO_REVERSE }, - { X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 }, - { X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 }, - { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 }, - { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 }, - { X86::VMOVDDUPYrr, X86::VMOVDDUPYrm, 0 }, - { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 }, - { X86::VMOVDQUYrr, X86::VMOVDQUYrm, 0 }, - { X86::VMOVSLDUPYrr, X86::VMOVSLDUPYrm, 0 }, - { X86::VMOVSHDUPYrr, X86::VMOVSHDUPYrm, 0 }, - { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, - { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, - { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 }, - { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 }, - { X86::VPTESTYrr, X86::VPTESTYrm, 0 }, - { X86::VRCPPSYr, X86::VRCPPSYm, 0 }, - { X86::VROUNDYPDr, X86::VROUNDYPDm, 0 }, - { X86::VROUNDYPSr, X86::VROUNDYPSm, 0 }, - { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 }, - { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 }, - { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 }, - { X86::VTESTPDYrr, X86::VTESTPDYrm, 0 }, - { X86::VTESTPSYrr, X86::VTESTPSYrm, 0 }, - - // AVX2 foldable instructions - - // VBROADCASTS{SD}rr register instructions were an AVX2 addition while the - // VBROADCASTS{SD}rm memory instructions were available from AVX1. - // TB_NO_REVERSE prevents unfolding from introducing an illegal instruction - // on AVX1 targets. The VPBROADCAST instructions are all AVX2 instructions - // so they don't need an equivalent limitation. - { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE }, - { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, - { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, - { X86::VPABSBYrr, X86::VPABSBYrm, 0 }, - { X86::VPABSDYrr, X86::VPABSDYrm, 0 }, - { X86::VPABSWYrr, X86::VPABSWYrm, 0 }, - { X86::VPBROADCASTBrr, X86::VPBROADCASTBrm, TB_NO_REVERSE }, - { X86::VPBROADCASTBYrr, X86::VPBROADCASTBYrm, TB_NO_REVERSE }, - { X86::VPBROADCASTDrr, X86::VPBROADCASTDrm, TB_NO_REVERSE }, - { X86::VPBROADCASTDYrr, X86::VPBROADCASTDYrm, TB_NO_REVERSE }, - { X86::VPBROADCASTQrr, X86::VPBROADCASTQrm, TB_NO_REVERSE }, - { X86::VPBROADCASTQYrr, X86::VPBROADCASTQYrm, TB_NO_REVERSE }, - { X86::VPBROADCASTWrr, X86::VPBROADCASTWrm, TB_NO_REVERSE }, - { X86::VPBROADCASTWYrr, X86::VPBROADCASTWYrm, TB_NO_REVERSE }, - { X86::VPERMPDYri, X86::VPERMPDYmi, 0 }, - { X86::VPERMQYri, X86::VPERMQYmi, 0 }, - { X86::VPMOVSXBDYrr, X86::VPMOVSXBDYrm, TB_NO_REVERSE }, - { X86::VPMOVSXBQYrr, X86::VPMOVSXBQYrm, TB_NO_REVERSE }, - { X86::VPMOVSXBWYrr, X86::VPMOVSXBWYrm, 0 }, - { X86::VPMOVSXDQYrr, X86::VPMOVSXDQYrm, 0 }, - { X86::VPMOVSXWDYrr, X86::VPMOVSXWDYrm, 0 }, - { X86::VPMOVSXWQYrr, X86::VPMOVSXWQYrm, TB_NO_REVERSE }, - { X86::VPMOVZXBDYrr, X86::VPMOVZXBDYrm, TB_NO_REVERSE }, - { X86::VPMOVZXBQYrr, X86::VPMOVZXBQYrm, TB_NO_REVERSE }, - { X86::VPMOVZXBWYrr, X86::VPMOVZXBWYrm, 0 }, - { X86::VPMOVZXDQYrr, X86::VPMOVZXDQYrm, 0 }, - { X86::VPMOVZXWDYrr, X86::VPMOVZXWDYrm, 0 }, - { X86::VPMOVZXWQYrr, X86::VPMOVZXWQYrm, TB_NO_REVERSE }, - { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 }, - { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 }, - { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 }, - - // XOP foldable instructions - { X86::VFRCZPDrr, X86::VFRCZPDrm, 0 }, - { X86::VFRCZPDrrY, X86::VFRCZPDrmY, 0 }, - { X86::VFRCZPSrr, X86::VFRCZPSrm, 0 }, - { X86::VFRCZPSrrY, X86::VFRCZPSrmY, 0 }, - { X86::VFRCZSDrr, X86::VFRCZSDrm, 0 }, - { X86::VFRCZSSrr, X86::VFRCZSSrm, 0 }, - { X86::VPHADDBDrr, X86::VPHADDBDrm, 0 }, - { X86::VPHADDBQrr, X86::VPHADDBQrm, 0 }, - { X86::VPHADDBWrr, X86::VPHADDBWrm, 0 }, - { X86::VPHADDDQrr, X86::VPHADDDQrm, 0 }, - { X86::VPHADDWDrr, X86::VPHADDWDrm, 0 }, - { X86::VPHADDWQrr, X86::VPHADDWQrm, 0 }, - { X86::VPHADDUBDrr, X86::VPHADDUBDrm, 0 }, - { X86::VPHADDUBQrr, X86::VPHADDUBQrm, 0 }, - { X86::VPHADDUBWrr, X86::VPHADDUBWrm, 0 }, - { X86::VPHADDUDQrr, X86::VPHADDUDQrm, 0 }, - { X86::VPHADDUWDrr, X86::VPHADDUWDrm, 0 }, - { X86::VPHADDUWQrr, X86::VPHADDUWQrm, 0 }, - { X86::VPHSUBBWrr, X86::VPHSUBBWrm, 0 }, - { X86::VPHSUBDQrr, X86::VPHSUBDQrm, 0 }, - { X86::VPHSUBWDrr, X86::VPHSUBWDrm, 0 }, - { X86::VPROTBri, X86::VPROTBmi, 0 }, - { X86::VPROTBrr, X86::VPROTBmr, 0 }, - { X86::VPROTDri, X86::VPROTDmi, 0 }, - { X86::VPROTDrr, X86::VPROTDmr, 0 }, - { X86::VPROTQri, X86::VPROTQmi, 0 }, - { X86::VPROTQrr, X86::VPROTQmr, 0 }, - { X86::VPROTWri, X86::VPROTWmi, 0 }, - { X86::VPROTWrr, X86::VPROTWmr, 0 }, - { X86::VPSHABrr, X86::VPSHABmr, 0 }, - { X86::VPSHADrr, X86::VPSHADmr, 0 }, - { X86::VPSHAQrr, X86::VPSHAQmr, 0 }, - { X86::VPSHAWrr, X86::VPSHAWmr, 0 }, - { X86::VPSHLBrr, X86::VPSHLBmr, 0 }, - { X86::VPSHLDrr, X86::VPSHLDmr, 0 }, - { X86::VPSHLQrr, X86::VPSHLQmr, 0 }, - { X86::VPSHLWrr, X86::VPSHLWmr, 0 }, - - // LWP foldable instructions - { X86::LWPINS32rri, X86::LWPINS32rmi, 0 }, - { X86::LWPINS64rri, X86::LWPINS64rmi, 0 }, - { X86::LWPVAL32rri, X86::LWPVAL32rmi, 0 }, - { X86::LWPVAL64rri, X86::LWPVAL64rmi, 0 }, - - // BMI/BMI2/LZCNT/POPCNT/TBM foldable instructions - { X86::BEXTR32rr, X86::BEXTR32rm, 0 }, - { X86::BEXTR64rr, X86::BEXTR64rm, 0 }, - { X86::BEXTRI32ri, X86::BEXTRI32mi, 0 }, - { X86::BEXTRI64ri, X86::BEXTRI64mi, 0 }, - { X86::BLCFILL32rr, X86::BLCFILL32rm, 0 }, - { X86::BLCFILL64rr, X86::BLCFILL64rm, 0 }, - { X86::BLCI32rr, X86::BLCI32rm, 0 }, - { X86::BLCI64rr, X86::BLCI64rm, 0 }, - { X86::BLCIC32rr, X86::BLCIC32rm, 0 }, - { X86::BLCIC64rr, X86::BLCIC64rm, 0 }, - { X86::BLCMSK32rr, X86::BLCMSK32rm, 0 }, - { X86::BLCMSK64rr, X86::BLCMSK64rm, 0 }, - { X86::BLCS32rr, X86::BLCS32rm, 0 }, - { X86::BLCS64rr, X86::BLCS64rm, 0 }, - { X86::BLSFILL32rr, X86::BLSFILL32rm, 0 }, - { X86::BLSFILL64rr, X86::BLSFILL64rm, 0 }, - { X86::BLSI32rr, X86::BLSI32rm, 0 }, - { X86::BLSI64rr, X86::BLSI64rm, 0 }, - { X86::BLSIC32rr, X86::BLSIC32rm, 0 }, - { X86::BLSIC64rr, X86::BLSIC64rm, 0 }, - { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 }, - { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 }, - { X86::BLSR32rr, X86::BLSR32rm, 0 }, - { X86::BLSR64rr, X86::BLSR64rm, 0 }, - { X86::BZHI32rr, X86::BZHI32rm, 0 }, - { X86::BZHI64rr, X86::BZHI64rm, 0 }, - { X86::LZCNT16rr, X86::LZCNT16rm, 0 }, - { X86::LZCNT32rr, X86::LZCNT32rm, 0 }, - { X86::LZCNT64rr, X86::LZCNT64rm, 0 }, - { X86::POPCNT16rr, X86::POPCNT16rm, 0 }, - { X86::POPCNT32rr, X86::POPCNT32rm, 0 }, - { X86::POPCNT64rr, X86::POPCNT64rm, 0 }, - { X86::RORX32ri, X86::RORX32mi, 0 }, - { X86::RORX64ri, X86::RORX64mi, 0 }, - { X86::SARX32rr, X86::SARX32rm, 0 }, - { X86::SARX64rr, X86::SARX64rm, 0 }, - { X86::SHRX32rr, X86::SHRX32rm, 0 }, - { X86::SHRX64rr, X86::SHRX64rm, 0 }, - { X86::SHLX32rr, X86::SHLX32rm, 0 }, - { X86::SHLX64rr, X86::SHLX64rm, 0 }, - { X86::T1MSKC32rr, X86::T1MSKC32rm, 0 }, - { X86::T1MSKC64rr, X86::T1MSKC64rm, 0 }, - { X86::TZCNT16rr, X86::TZCNT16rm, 0 }, - { X86::TZCNT32rr, X86::TZCNT32rm, 0 }, - { X86::TZCNT64rr, X86::TZCNT64rm, 0 }, - { X86::TZMSK32rr, X86::TZMSK32rm, 0 }, - { X86::TZMSK64rr, X86::TZMSK64rm, 0 }, - - // AVX-512 foldable instructions - { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE }, - { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE }, - { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, - { X86::VMOV64toSDZrr, X86::VMOV64toSDZrm, 0 }, - { X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 }, - { X86::VMOVDI2SSZrr, X86::VMOVDI2SSZrm, 0 }, - { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 }, - { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 }, - { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 }, - { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 }, - { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zrm, 0 }, - { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zrm, 0 }, - { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 }, - { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 }, - { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 }, - { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 }, - { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm, TB_NO_REVERSE }, - { X86::VPABSBZrr, X86::VPABSBZrm, 0 }, - { X86::VPABSDZrr, X86::VPABSDZrm, 0 }, - { X86::VPABSQZrr, X86::VPABSQZrm, 0 }, - { X86::VPABSWZrr, X86::VPABSWZrm, 0 }, - { X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 }, - { X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 }, - { X86::VPERMPDZri, X86::VPERMPDZmi, 0 }, - { X86::VPERMQZri, X86::VPERMQZmi, 0 }, - { X86::VPMOVSXBDZrr, X86::VPMOVSXBDZrm, 0 }, - { X86::VPMOVSXBQZrr, X86::VPMOVSXBQZrm, TB_NO_REVERSE }, - { X86::VPMOVSXBWZrr, X86::VPMOVSXBWZrm, 0 }, - { X86::VPMOVSXDQZrr, X86::VPMOVSXDQZrm, 0 }, - { X86::VPMOVSXWDZrr, X86::VPMOVSXWDZrm, 0 }, - { X86::VPMOVSXWQZrr, X86::VPMOVSXWQZrm, 0 }, - { X86::VPMOVZXBDZrr, X86::VPMOVZXBDZrm, 0 }, - { X86::VPMOVZXBQZrr, X86::VPMOVZXBQZrm, TB_NO_REVERSE }, - { X86::VPMOVZXBWZrr, X86::VPMOVZXBWZrm, 0 }, - { X86::VPMOVZXDQZrr, X86::VPMOVZXDQZrm, 0 }, - { X86::VPMOVZXWDZrr, X86::VPMOVZXWDZrm, 0 }, - { X86::VPMOVZXWQZrr, X86::VPMOVZXWQZrm, 0 }, - { X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 }, - { X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 }, - { X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 }, - { X86::VPSLLDQZ512rr, X86::VPSLLDQZ512rm, 0 }, - { X86::VPSLLDZri, X86::VPSLLDZmi, 0 }, - { X86::VPSLLQZri, X86::VPSLLQZmi, 0 }, - { X86::VPSLLWZri, X86::VPSLLWZmi, 0 }, - { X86::VPSRADZri, X86::VPSRADZmi, 0 }, - { X86::VPSRAQZri, X86::VPSRAQZmi, 0 }, - { X86::VPSRAWZri, X86::VPSRAWZmi, 0 }, - { X86::VPSRLDQZ512rr, X86::VPSRLDQZ512rm, 0 }, - { X86::VPSRLDZri, X86::VPSRLDZmi, 0 }, - { X86::VPSRLQZri, X86::VPSRLQZmi, 0 }, - { X86::VPSRLWZri, X86::VPSRLWZmi, 0 }, - - // AVX-512 foldable instructions (256-bit versions) - { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256m, TB_NO_REVERSE }, - { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256m, TB_NO_REVERSE }, - { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 }, - { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 }, - { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 }, - { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rm, TB_ALIGN_32 }, - { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256rm, 0 }, - { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256rm, 0 }, - { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256rm, 0 }, - { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 }, - { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 }, - { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 }, - { X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0 }, - { X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 }, - { X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 }, - { X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 }, - { X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 }, - { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 }, - { X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 }, - { X86::VPERMQZ256ri, X86::VPERMQZ256mi, 0 }, - { X86::VPMOVSXBDZ256rr, X86::VPMOVSXBDZ256rm, TB_NO_REVERSE }, - { X86::VPMOVSXBQZ256rr, X86::VPMOVSXBQZ256rm, TB_NO_REVERSE }, - { X86::VPMOVSXBWZ256rr, X86::VPMOVSXBWZ256rm, 0 }, - { X86::VPMOVSXDQZ256rr, X86::VPMOVSXDQZ256rm, 0 }, - { X86::VPMOVSXWDZ256rr, X86::VPMOVSXWDZ256rm, 0 }, - { X86::VPMOVSXWQZ256rr, X86::VPMOVSXWQZ256rm, TB_NO_REVERSE }, - { X86::VPMOVZXBDZ256rr, X86::VPMOVZXBDZ256rm, TB_NO_REVERSE }, - { X86::VPMOVZXBQZ256rr, X86::VPMOVZXBQZ256rm, TB_NO_REVERSE }, - { X86::VPMOVZXBWZ256rr, X86::VPMOVZXBWZ256rm, 0 }, - { X86::VPMOVZXDQZ256rr, X86::VPMOVZXDQZ256rm, 0 }, - { X86::VPMOVZXWDZ256rr, X86::VPMOVZXWDZ256rm, 0 }, - { X86::VPMOVZXWQZ256rr, X86::VPMOVZXWQZ256rm, TB_NO_REVERSE }, - { X86::VPSHUFDZ256ri, X86::VPSHUFDZ256mi, 0 }, - { X86::VPSHUFHWZ256ri, X86::VPSHUFHWZ256mi, 0 }, - { X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 }, - { X86::VPSLLDQZ256rr, X86::VPSLLDQZ256rm, 0 }, - { X86::VPSLLDZ256ri, X86::VPSLLDZ256mi, 0 }, - { X86::VPSLLQZ256ri, X86::VPSLLQZ256mi, 0 }, - { X86::VPSLLWZ256ri, X86::VPSLLWZ256mi, 0 }, - { X86::VPSRADZ256ri, X86::VPSRADZ256mi, 0 }, - { X86::VPSRAQZ256ri, X86::VPSRAQZ256mi, 0 }, - { X86::VPSRAWZ256ri, X86::VPSRAWZ256mi, 0 }, - { X86::VPSRLDQZ256rr, X86::VPSRLDQZ256rm, 0 }, - { X86::VPSRLDZ256ri, X86::VPSRLDZ256mi, 0 }, - { X86::VPSRLQZ256ri, X86::VPSRLQZ256mi, 0 }, - { X86::VPSRLWZ256ri, X86::VPSRLWZ256mi, 0 }, - - // AVX-512 foldable instructions (128-bit versions) - { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128m, TB_NO_REVERSE }, - { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 }, - { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 }, - { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 }, - { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 }, - { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 }, - { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128rm, 0 }, - { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128rm, 0 }, - { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 }, - { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 }, - { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 }, - { X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0 }, - { X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 }, - { X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 }, - { X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 }, - { X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 }, - { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 }, - { X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE }, - { X86::VPMOVSXBQZ128rr, X86::VPMOVSXBQZ128rm, TB_NO_REVERSE }, - { X86::VPMOVSXBWZ128rr, X86::VPMOVSXBWZ128rm, TB_NO_REVERSE }, - { X86::VPMOVSXDQZ128rr, X86::VPMOVSXDQZ128rm, TB_NO_REVERSE }, - { X86::VPMOVSXWDZ128rr, X86::VPMOVSXWDZ128rm, TB_NO_REVERSE }, - { X86::VPMOVSXWQZ128rr, X86::VPMOVSXWQZ128rm, TB_NO_REVERSE }, - { X86::VPMOVZXBDZ128rr, X86::VPMOVZXBDZ128rm, TB_NO_REVERSE }, - { X86::VPMOVZXBQZ128rr, X86::VPMOVZXBQZ128rm, TB_NO_REVERSE }, - { X86::VPMOVZXBWZ128rr, X86::VPMOVZXBWZ128rm, TB_NO_REVERSE }, - { X86::VPMOVZXDQZ128rr, X86::VPMOVZXDQZ128rm, TB_NO_REVERSE }, - { X86::VPMOVZXWDZ128rr, X86::VPMOVZXWDZ128rm, TB_NO_REVERSE }, - { X86::VPMOVZXWQZ128rr, X86::VPMOVZXWQZ128rm, TB_NO_REVERSE }, - { X86::VPSHUFDZ128ri, X86::VPSHUFDZ128mi, 0 }, - { X86::VPSHUFHWZ128ri, X86::VPSHUFHWZ128mi, 0 }, - { X86::VPSHUFLWZ128ri, X86::VPSHUFLWZ128mi, 0 }, - { X86::VPSLLDQZ128rr, X86::VPSLLDQZ128rm, 0 }, - { X86::VPSLLDZ128ri, X86::VPSLLDZ128mi, 0 }, - { X86::VPSLLQZ128ri, X86::VPSLLQZ128mi, 0 }, - { X86::VPSLLWZ128ri, X86::VPSLLWZ128mi, 0 }, - { X86::VPSRADZ128ri, X86::VPSRADZ128mi, 0 }, - { X86::VPSRAQZ128ri, X86::VPSRAQZ128mi, 0 }, - { X86::VPSRAWZ128ri, X86::VPSRAWZ128mi, 0 }, - { X86::VPSRLDQZ128rr, X86::VPSRLDQZ128rm, 0 }, - { X86::VPSRLDZ128ri, X86::VPSRLDZ128mi, 0 }, - { X86::VPSRLQZ128ri, X86::VPSRLQZ128mi, 0 }, - { X86::VPSRLWZ128ri, X86::VPSRLWZ128mi, 0 }, - - // F16C foldable instructions - { X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, 0 }, - { X86::VCVTPH2PSYrr, X86::VCVTPH2PSYrm, 0 }, - - // AES foldable instructions - { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 }, - { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 }, - { X86::VAESIMCrr, X86::VAESIMCrm, 0 }, - { X86::VAESKEYGENASSIST128rr, X86::VAESKEYGENASSIST128rm, 0 } - }; - for (X86MemoryFoldTableEntry Entry : MemoryFoldTable1) { AddTableEntry(RegOp2MemOpTable1, MemOp2RegOpTable, Entry.RegOp, Entry.MemOp, @@ -1040,1394 +143,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) Entry.Flags | TB_INDEX_1 | TB_FOLDED_LOAD); } - static const X86MemoryFoldTableEntry MemoryFoldTable2[] = { - { X86::ADC32rr, X86::ADC32rm, 0 }, - { X86::ADC64rr, X86::ADC64rm, 0 }, - { X86::ADD16rr, X86::ADD16rm, 0 }, - { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE }, - { X86::ADD32rr, X86::ADD32rm, 0 }, - { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE }, - { X86::ADD64rr, X86::ADD64rm, 0 }, - { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE }, - { X86::ADD8rr, X86::ADD8rm, 0 }, - { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 }, - { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 }, - { X86::ADDSDrr, X86::ADDSDrm, 0 }, - { X86::ADDSDrr_Int, X86::ADDSDrm_Int, TB_NO_REVERSE }, - { X86::ADDSSrr, X86::ADDSSrm, 0 }, - { X86::ADDSSrr_Int, X86::ADDSSrm_Int, TB_NO_REVERSE }, - { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 }, - { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 }, - { X86::AND16rr, X86::AND16rm, 0 }, - { X86::AND32rr, X86::AND32rm, 0 }, - { X86::AND64rr, X86::AND64rm, 0 }, - { X86::AND8rr, X86::AND8rm, 0 }, - { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 }, - { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 }, - { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 }, - { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 }, - { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 }, - { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 }, - { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 }, - { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 }, - { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, - { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, - { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, - { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, - { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, - { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, - { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, - { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, - { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, - { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, - { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, - { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, - { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, - { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, - { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, - { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, - { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, - { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, - { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, - { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, - { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, - { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, - { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, - { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, - { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, - { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, - { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, - { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, - { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, - { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, - { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, - { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, - { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, - { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, - { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, - { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, - { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, - { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, - { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, - { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, - { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, - { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, - { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, - { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, - { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, - { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, - { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, - { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, - { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 }, - { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 }, - { X86::CMPSDrr, X86::CMPSDrm, 0 }, - { X86::CMPSSrr, X86::CMPSSrm, 0 }, - { X86::CRC32r32r32, X86::CRC32r32m32, 0 }, - { X86::CRC32r64r64, X86::CRC32r64m64, 0 }, - { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 }, - { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 }, - { X86::DIVSDrr, X86::DIVSDrm, 0 }, - { X86::DIVSDrr_Int, X86::DIVSDrm_Int, TB_NO_REVERSE }, - { X86::DIVSSrr, X86::DIVSSrm, 0 }, - { X86::DIVSSrr_Int, X86::DIVSSrm_Int, TB_NO_REVERSE }, - { X86::DPPDrri, X86::DPPDrmi, TB_ALIGN_16 }, - { X86::DPPSrri, X86::DPPSrmi, TB_ALIGN_16 }, - { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 }, - { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 }, - { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 }, - { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 }, - { X86::IMUL16rr, X86::IMUL16rm, 0 }, - { X86::IMUL32rr, X86::IMUL32rm, 0 }, - { X86::IMUL64rr, X86::IMUL64rm, 0 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm, TB_NO_REVERSE }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm, TB_NO_REVERSE }, - { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, TB_NO_REVERSE }, - { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, - { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, - { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, - { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, - { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, TB_NO_REVERSE }, - { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 }, - { X86::MAXCPDrr, X86::MAXCPDrm, TB_ALIGN_16 }, - { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 }, - { X86::MAXCPSrr, X86::MAXCPSrm, TB_ALIGN_16 }, - { X86::MAXSDrr, X86::MAXSDrm, 0 }, - { X86::MAXCSDrr, X86::MAXCSDrm, 0 }, - { X86::MAXSDrr_Int, X86::MAXSDrm_Int, TB_NO_REVERSE }, - { X86::MAXSSrr, X86::MAXSSrm, 0 }, - { X86::MAXCSSrr, X86::MAXCSSrm, 0 }, - { X86::MAXSSrr_Int, X86::MAXSSrm_Int, TB_NO_REVERSE }, - { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 }, - { X86::MINCPDrr, X86::MINCPDrm, TB_ALIGN_16 }, - { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 }, - { X86::MINCPSrr, X86::MINCPSrm, TB_ALIGN_16 }, - { X86::MINSDrr, X86::MINSDrm, 0 }, - { X86::MINCSDrr, X86::MINCSDrm, 0 }, - { X86::MINSDrr_Int, X86::MINSDrm_Int, TB_NO_REVERSE }, - { X86::MINSSrr, X86::MINSSrm, 0 }, - { X86::MINCSSrr, X86::MINCSSrm, 0 }, - { X86::MINSSrr_Int, X86::MINSSrm_Int, TB_NO_REVERSE }, - { X86::MOVLHPSrr, X86::MOVHPSrm, TB_NO_REVERSE }, - { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 }, - { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 }, - { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 }, - { X86::MULSDrr, X86::MULSDrm, 0 }, - { X86::MULSDrr_Int, X86::MULSDrm_Int, TB_NO_REVERSE }, - { X86::MULSSrr, X86::MULSSrm, 0 }, - { X86::MULSSrr_Int, X86::MULSSrm_Int, TB_NO_REVERSE }, - { X86::OR16rr, X86::OR16rm, 0 }, - { X86::OR32rr, X86::OR32rm, 0 }, - { X86::OR64rr, X86::OR64rm, 0 }, - { X86::OR8rr, X86::OR8rm, 0 }, - { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 }, - { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 }, - { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 }, - { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 }, - { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 }, - { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 }, - { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 }, - { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 }, - { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 }, - { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 }, - { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 }, - { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 }, - { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 }, - { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 }, - { X86::PALIGNRrri, X86::PALIGNRrmi, TB_ALIGN_16 }, - { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 }, - { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 }, - { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 }, - { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 }, - { X86::PBLENDVBrr0, X86::PBLENDVBrm0, TB_ALIGN_16 }, - { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 }, - { X86::PCLMULQDQrr, X86::PCLMULQDQrm, TB_ALIGN_16 }, - { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 }, - { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 }, - { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 }, - { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 }, - { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 }, - { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 }, - { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 }, - { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 }, - { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 }, - { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 }, - { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 }, - { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 }, - { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 }, - { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 }, - { X86::PINSRBrr, X86::PINSRBrm, 0 }, - { X86::PINSRDrr, X86::PINSRDrm, 0 }, - { X86::PINSRQrr, X86::PINSRQrm, 0 }, - { X86::PINSRWrri, X86::PINSRWrmi, 0 }, - { X86::PMADDUBSWrr, X86::PMADDUBSWrm, TB_ALIGN_16 }, - { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, - { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 }, - { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 }, - { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 }, - { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 }, - { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 }, - { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 }, - { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 }, - { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 }, - { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 }, - { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 }, - { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 }, - { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 }, - { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 }, - { X86::PMULHRSWrr, X86::PMULHRSWrm, TB_ALIGN_16 }, - { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 }, - { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 }, - { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 }, - { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 }, - { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 }, - { X86::PORrr, X86::PORrm, TB_ALIGN_16 }, - { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 }, - { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 }, - { X86::PSIGNBrr128, X86::PSIGNBrm128, TB_ALIGN_16 }, - { X86::PSIGNWrr128, X86::PSIGNWrm128, TB_ALIGN_16 }, - { X86::PSIGNDrr128, X86::PSIGNDrm128, TB_ALIGN_16 }, - { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 }, - { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 }, - { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 }, - { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 }, - { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 }, - { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 }, - { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 }, - { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 }, - { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 }, - { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 }, - { X86::PSUBQrr, X86::PSUBQrm, TB_ALIGN_16 }, - { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 }, - { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 }, - { X86::PSUBUSBrr, X86::PSUBUSBrm, TB_ALIGN_16 }, - { X86::PSUBUSWrr, X86::PSUBUSWrm, TB_ALIGN_16 }, - { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 }, - { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 }, - { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 }, - { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 }, - { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 }, - { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 }, - { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 }, - { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 }, - { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 }, - { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 }, - { X86::ROUNDSDr_Int, X86::ROUNDSDm_Int, TB_NO_REVERSE }, - { X86::ROUNDSSr_Int, X86::ROUNDSSm_Int, TB_NO_REVERSE }, - { X86::SBB32rr, X86::SBB32rm, 0 }, - { X86::SBB64rr, X86::SBB64rm, 0 }, - { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 }, - { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 }, - { X86::SUB16rr, X86::SUB16rm, 0 }, - { X86::SUB32rr, X86::SUB32rm, 0 }, - { X86::SUB64rr, X86::SUB64rm, 0 }, - { X86::SUB8rr, X86::SUB8rm, 0 }, - { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 }, - { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 }, - { X86::SUBSDrr, X86::SUBSDrm, 0 }, - { X86::SUBSDrr_Int, X86::SUBSDrm_Int, TB_NO_REVERSE }, - { X86::SUBSSrr, X86::SUBSSrm, 0 }, - { X86::SUBSSrr_Int, X86::SUBSSrm_Int, TB_NO_REVERSE }, - // FIXME: TEST*rr -> swapped operand of TEST*mr. - { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 }, - { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 }, - { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 }, - { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 }, - { X86::XOR16rr, X86::XOR16rm, 0 }, - { X86::XOR32rr, X86::XOR32rm, 0 }, - { X86::XOR64rr, X86::XOR64rm, 0 }, - { X86::XOR8rr, X86::XOR8rm, 0 }, - { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 }, - { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 }, - - // MMX version of foldable instructions - { X86::MMX_CVTPI2PSirr, X86::MMX_CVTPI2PSirm, 0 }, - { X86::MMX_PACKSSDWirr, X86::MMX_PACKSSDWirm, 0 }, - { X86::MMX_PACKSSWBirr, X86::MMX_PACKSSWBirm, 0 }, - { X86::MMX_PACKUSWBirr, X86::MMX_PACKUSWBirm, 0 }, - { X86::MMX_PADDBirr, X86::MMX_PADDBirm, 0 }, - { X86::MMX_PADDDirr, X86::MMX_PADDDirm, 0 }, - { X86::MMX_PADDQirr, X86::MMX_PADDQirm, 0 }, - { X86::MMX_PADDSBirr, X86::MMX_PADDSBirm, 0 }, - { X86::MMX_PADDSWirr, X86::MMX_PADDSWirm, 0 }, - { X86::MMX_PADDUSBirr, X86::MMX_PADDUSBirm, 0 }, - { X86::MMX_PADDUSWirr, X86::MMX_PADDUSWirm, 0 }, - { X86::MMX_PADDWirr, X86::MMX_PADDWirm, 0 }, - { X86::MMX_PALIGNR64irr, X86::MMX_PALIGNR64irm, 0 }, - { X86::MMX_PANDNirr, X86::MMX_PANDNirm, 0 }, - { X86::MMX_PANDirr, X86::MMX_PANDirm, 0 }, - { X86::MMX_PAVGBirr, X86::MMX_PAVGBirm, 0 }, - { X86::MMX_PAVGWirr, X86::MMX_PAVGWirm, 0 }, - { X86::MMX_PCMPEQBirr, X86::MMX_PCMPEQBirm, 0 }, - { X86::MMX_PCMPEQDirr, X86::MMX_PCMPEQDirm, 0 }, - { X86::MMX_PCMPEQWirr, X86::MMX_PCMPEQWirm, 0 }, - { X86::MMX_PCMPGTBirr, X86::MMX_PCMPGTBirm, 0 }, - { X86::MMX_PCMPGTDirr, X86::MMX_PCMPGTDirm, 0 }, - { X86::MMX_PCMPGTWirr, X86::MMX_PCMPGTWirm, 0 }, - { X86::MMX_PHADDSWrr64, X86::MMX_PHADDSWrm64, 0 }, - { X86::MMX_PHADDWrr64, X86::MMX_PHADDWrm64, 0 }, - { X86::MMX_PHADDrr64, X86::MMX_PHADDrm64, 0 }, - { X86::MMX_PHSUBDrr64, X86::MMX_PHSUBDrm64, 0 }, - { X86::MMX_PHSUBSWrr64, X86::MMX_PHSUBSWrm64, 0 }, - { X86::MMX_PHSUBWrr64, X86::MMX_PHSUBWrm64, 0 }, - { X86::MMX_PINSRWirri, X86::MMX_PINSRWirmi, 0 }, - { X86::MMX_PMADDUBSWrr64, X86::MMX_PMADDUBSWrm64, 0 }, - { X86::MMX_PMADDWDirr, X86::MMX_PMADDWDirm, 0 }, - { X86::MMX_PMAXSWirr, X86::MMX_PMAXSWirm, 0 }, - { X86::MMX_PMAXUBirr, X86::MMX_PMAXUBirm, 0 }, - { X86::MMX_PMINSWirr, X86::MMX_PMINSWirm, 0 }, - { X86::MMX_PMINUBirr, X86::MMX_PMINUBirm, 0 }, - { X86::MMX_PMULHRSWrr64, X86::MMX_PMULHRSWrm64, 0 }, - { X86::MMX_PMULHUWirr, X86::MMX_PMULHUWirm, 0 }, - { X86::MMX_PMULHWirr, X86::MMX_PMULHWirm, 0 }, - { X86::MMX_PMULLWirr, X86::MMX_PMULLWirm, 0 }, - { X86::MMX_PMULUDQirr, X86::MMX_PMULUDQirm, 0 }, - { X86::MMX_PORirr, X86::MMX_PORirm, 0 }, - { X86::MMX_PSADBWirr, X86::MMX_PSADBWirm, 0 }, - { X86::MMX_PSHUFBrr64, X86::MMX_PSHUFBrm64, 0 }, - { X86::MMX_PSIGNBrr64, X86::MMX_PSIGNBrm64, 0 }, - { X86::MMX_PSIGNDrr64, X86::MMX_PSIGNDrm64, 0 }, - { X86::MMX_PSIGNWrr64, X86::MMX_PSIGNWrm64, 0 }, - { X86::MMX_PSLLDrr, X86::MMX_PSLLDrm, 0 }, - { X86::MMX_PSLLQrr, X86::MMX_PSLLQrm, 0 }, - { X86::MMX_PSLLWrr, X86::MMX_PSLLWrm, 0 }, - { X86::MMX_PSRADrr, X86::MMX_PSRADrm, 0 }, - { X86::MMX_PSRAWrr, X86::MMX_PSRAWrm, 0 }, - { X86::MMX_PSRLDrr, X86::MMX_PSRLDrm, 0 }, - { X86::MMX_PSRLQrr, X86::MMX_PSRLQrm, 0 }, - { X86::MMX_PSRLWrr, X86::MMX_PSRLWrm, 0 }, - { X86::MMX_PSUBBirr, X86::MMX_PSUBBirm, 0 }, - { X86::MMX_PSUBDirr, X86::MMX_PSUBDirm, 0 }, - { X86::MMX_PSUBQirr, X86::MMX_PSUBQirm, 0 }, - { X86::MMX_PSUBSBirr, X86::MMX_PSUBSBirm, 0 }, - { X86::MMX_PSUBSWirr, X86::MMX_PSUBSWirm, 0 }, - { X86::MMX_PSUBUSBirr, X86::MMX_PSUBUSBirm, 0 }, - { X86::MMX_PSUBUSWirr, X86::MMX_PSUBUSWirm, 0 }, - { X86::MMX_PSUBWirr, X86::MMX_PSUBWirm, 0 }, - { X86::MMX_PUNPCKHBWirr, X86::MMX_PUNPCKHBWirm, 0 }, - { X86::MMX_PUNPCKHDQirr, X86::MMX_PUNPCKHDQirm, 0 }, - { X86::MMX_PUNPCKHWDirr, X86::MMX_PUNPCKHWDirm, 0 }, - { X86::MMX_PUNPCKLBWirr, X86::MMX_PUNPCKLBWirm, 0 }, - { X86::MMX_PUNPCKLDQirr, X86::MMX_PUNPCKLDQirm, 0 }, - { X86::MMX_PUNPCKLWDirr, X86::MMX_PUNPCKLWDirm, 0 }, - { X86::MMX_PXORirr, X86::MMX_PXORirm, 0 }, - - // 3DNow! version of foldable instructions - { X86::PAVGUSBrr, X86::PAVGUSBrm, 0 }, - { X86::PFACCrr, X86::PFACCrm, 0 }, - { X86::PFADDrr, X86::PFADDrm, 0 }, - { X86::PFCMPEQrr, X86::PFCMPEQrm, 0 }, - { X86::PFCMPGErr, X86::PFCMPGErm, 0 }, - { X86::PFCMPGTrr, X86::PFCMPGTrm, 0 }, - { X86::PFMAXrr, X86::PFMAXrm, 0 }, - { X86::PFMINrr, X86::PFMINrm, 0 }, - { X86::PFMULrr, X86::PFMULrm, 0 }, - { X86::PFNACCrr, X86::PFNACCrm, 0 }, - { X86::PFPNACCrr, X86::PFPNACCrm, 0 }, - { X86::PFRCPIT1rr, X86::PFRCPIT1rm, 0 }, - { X86::PFRCPIT2rr, X86::PFRCPIT2rm, 0 }, - { X86::PFRSQIT1rr, X86::PFRSQIT1rm, 0 }, - { X86::PFSUBrr, X86::PFSUBrm, 0 }, - { X86::PFSUBRrr, X86::PFSUBRrm, 0 }, - { X86::PMULHRWrr, X86::PMULHRWrm, 0 }, - - // AVX 128-bit versions of foldable instructions - { X86::VCVTSI2SD64rr, X86::VCVTSI2SD64rm, 0 }, - { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm, 0 }, - { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 }, - { X86::Int_VCVTSI2SDrr, X86::Int_VCVTSI2SDrm, 0 }, - { X86::VCVTSI2SS64rr, X86::VCVTSI2SS64rm, 0 }, - { X86::Int_VCVTSI2SS64rr, X86::Int_VCVTSI2SS64rm, 0 }, - { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 }, - { X86::Int_VCVTSI2SSrr, X86::Int_VCVTSI2SSrm, 0 }, - { X86::VADDPDrr, X86::VADDPDrm, 0 }, - { X86::VADDPSrr, X86::VADDPSrm, 0 }, - { X86::VADDSDrr, X86::VADDSDrm, 0 }, - { X86::VADDSDrr_Int, X86::VADDSDrm_Int, TB_NO_REVERSE }, - { X86::VADDSSrr, X86::VADDSSrm, 0 }, - { X86::VADDSSrr_Int, X86::VADDSSrm_Int, TB_NO_REVERSE }, - { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 }, - { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 }, - { X86::VANDNPDrr, X86::VANDNPDrm, 0 }, - { X86::VANDNPSrr, X86::VANDNPSrm, 0 }, - { X86::VANDPDrr, X86::VANDPDrm, 0 }, - { X86::VANDPSrr, X86::VANDPSrm, 0 }, - { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 }, - { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 }, - { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 }, - { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 }, - { X86::VCMPPDrri, X86::VCMPPDrmi, 0 }, - { X86::VCMPPSrri, X86::VCMPPSrmi, 0 }, - { X86::VCMPSDrr, X86::VCMPSDrm, 0 }, - { X86::VCMPSSrr, X86::VCMPSSrm, 0 }, - { X86::VDIVPDrr, X86::VDIVPDrm, 0 }, - { X86::VDIVPSrr, X86::VDIVPSrm, 0 }, - { X86::VDIVSDrr, X86::VDIVSDrm, 0 }, - { X86::VDIVSDrr_Int, X86::VDIVSDrm_Int, TB_NO_REVERSE }, - { X86::VDIVSSrr, X86::VDIVSSrm, 0 }, - { X86::VDIVSSrr_Int, X86::VDIVSSrm_Int, TB_NO_REVERSE }, - { X86::VDPPDrri, X86::VDPPDrmi, 0 }, - { X86::VDPPSrri, X86::VDPPSrmi, 0 }, - { X86::VHADDPDrr, X86::VHADDPDrm, 0 }, - { X86::VHADDPSrr, X86::VHADDPSrm, 0 }, - { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 }, - { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 }, - { X86::Int_VCMPSDrr, X86::Int_VCMPSDrm, TB_NO_REVERSE }, - { X86::Int_VCMPSSrr, X86::Int_VCMPSSrm, TB_NO_REVERSE }, - { X86::VMAXCPDrr, X86::VMAXCPDrm, 0 }, - { X86::VMAXCPSrr, X86::VMAXCPSrm, 0 }, - { X86::VMAXCSDrr, X86::VMAXCSDrm, 0 }, - { X86::VMAXCSSrr, X86::VMAXCSSrm, 0 }, - { X86::VMAXPDrr, X86::VMAXPDrm, 0 }, - { X86::VMAXPSrr, X86::VMAXPSrm, 0 }, - { X86::VMAXSDrr, X86::VMAXSDrm, 0 }, - { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, TB_NO_REVERSE }, - { X86::VMAXSSrr, X86::VMAXSSrm, 0 }, - { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, TB_NO_REVERSE }, - { X86::VMINCPDrr, X86::VMINCPDrm, 0 }, - { X86::VMINCPSrr, X86::VMINCPSrm, 0 }, - { X86::VMINCSDrr, X86::VMINCSDrm, 0 }, - { X86::VMINCSSrr, X86::VMINCSSrm, 0 }, - { X86::VMINPDrr, X86::VMINPDrm, 0 }, - { X86::VMINPSrr, X86::VMINPSrm, 0 }, - { X86::VMINSDrr, X86::VMINSDrm, 0 }, - { X86::VMINSDrr_Int, X86::VMINSDrm_Int, TB_NO_REVERSE }, - { X86::VMINSSrr, X86::VMINSSrm, 0 }, - { X86::VMINSSrr_Int, X86::VMINSSrm_Int, TB_NO_REVERSE }, - { X86::VMOVLHPSrr, X86::VMOVHPSrm, TB_NO_REVERSE }, - { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 }, - { X86::VMULPDrr, X86::VMULPDrm, 0 }, - { X86::VMULPSrr, X86::VMULPSrm, 0 }, - { X86::VMULSDrr, X86::VMULSDrm, 0 }, - { X86::VMULSDrr_Int, X86::VMULSDrm_Int, TB_NO_REVERSE }, - { X86::VMULSSrr, X86::VMULSSrm, 0 }, - { X86::VMULSSrr_Int, X86::VMULSSrm_Int, TB_NO_REVERSE }, - { X86::VORPDrr, X86::VORPDrm, 0 }, - { X86::VORPSrr, X86::VORPSrm, 0 }, - { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 }, - { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 }, - { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 }, - { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 }, - { X86::VPADDBrr, X86::VPADDBrm, 0 }, - { X86::VPADDDrr, X86::VPADDDrm, 0 }, - { X86::VPADDQrr, X86::VPADDQrm, 0 }, - { X86::VPADDSBrr, X86::VPADDSBrm, 0 }, - { X86::VPADDSWrr, X86::VPADDSWrm, 0 }, - { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 }, - { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 }, - { X86::VPADDWrr, X86::VPADDWrm, 0 }, - { X86::VPALIGNRrri, X86::VPALIGNRrmi, 0 }, - { X86::VPANDNrr, X86::VPANDNrm, 0 }, - { X86::VPANDrr, X86::VPANDrm, 0 }, - { X86::VPAVGBrr, X86::VPAVGBrm, 0 }, - { X86::VPAVGWrr, X86::VPAVGWrm, 0 }, - { X86::VPBLENDVBrr, X86::VPBLENDVBrm, 0 }, - { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 }, - { X86::VPCLMULQDQrr, X86::VPCLMULQDQrm, 0 }, - { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 }, - { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 }, - { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 }, - { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 }, - { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 }, - { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 }, - { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 }, - { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 }, - { X86::VPHADDDrr, X86::VPHADDDrm, 0 }, - { X86::VPHADDSWrr128, X86::VPHADDSWrm128, 0 }, - { X86::VPHADDWrr, X86::VPHADDWrm, 0 }, - { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 }, - { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, 0 }, - { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 }, - { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 }, - { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 }, - { X86::VPINSRBrr, X86::VPINSRBrm, 0 }, - { X86::VPINSRDrr, X86::VPINSRDrm, 0 }, - { X86::VPINSRQrr, X86::VPINSRQrm, 0 }, - { X86::VPINSRWrri, X86::VPINSRWrmi, 0 }, - { X86::VPMADDUBSWrr, X86::VPMADDUBSWrm, 0 }, - { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 }, - { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 }, - { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 }, - { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 }, - { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 }, - { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 }, - { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 }, - { X86::VPMINSBrr, X86::VPMINSBrm, 0 }, - { X86::VPMINSDrr, X86::VPMINSDrm, 0 }, - { X86::VPMINSWrr, X86::VPMINSWrm, 0 }, - { X86::VPMINUBrr, X86::VPMINUBrm, 0 }, - { X86::VPMINUDrr, X86::VPMINUDrm, 0 }, - { X86::VPMINUWrr, X86::VPMINUWrm, 0 }, - { X86::VPMULDQrr, X86::VPMULDQrm, 0 }, - { X86::VPMULHRSWrr, X86::VPMULHRSWrm, 0 }, - { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 }, - { X86::VPMULHWrr, X86::VPMULHWrm, 0 }, - { X86::VPMULLDrr, X86::VPMULLDrm, 0 }, - { X86::VPMULLWrr, X86::VPMULLWrm, 0 }, - { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 }, - { X86::VPORrr, X86::VPORrm, 0 }, - { X86::VPSADBWrr, X86::VPSADBWrm, 0 }, - { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 }, - { X86::VPSIGNBrr128, X86::VPSIGNBrm128, 0 }, - { X86::VPSIGNWrr128, X86::VPSIGNWrm128, 0 }, - { X86::VPSIGNDrr128, X86::VPSIGNDrm128, 0 }, - { X86::VPSLLDrr, X86::VPSLLDrm, 0 }, - { X86::VPSLLQrr, X86::VPSLLQrm, 0 }, - { X86::VPSLLWrr, X86::VPSLLWrm, 0 }, - { X86::VPSRADrr, X86::VPSRADrm, 0 }, - { X86::VPSRAWrr, X86::VPSRAWrm, 0 }, - { X86::VPSRLDrr, X86::VPSRLDrm, 0 }, - { X86::VPSRLQrr, X86::VPSRLQrm, 0 }, - { X86::VPSRLWrr, X86::VPSRLWrm, 0 }, - { X86::VPSUBBrr, X86::VPSUBBrm, 0 }, - { X86::VPSUBDrr, X86::VPSUBDrm, 0 }, - { X86::VPSUBQrr, X86::VPSUBQrm, 0 }, - { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 }, - { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 }, - { X86::VPSUBUSBrr, X86::VPSUBUSBrm, 0 }, - { X86::VPSUBUSWrr, X86::VPSUBUSWrm, 0 }, - { X86::VPSUBWrr, X86::VPSUBWrm, 0 }, - { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 }, - { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 }, - { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 }, - { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 }, - { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 }, - { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 }, - { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 }, - { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 }, - { X86::VPXORrr, X86::VPXORrm, 0 }, - { X86::VRCPSSr, X86::VRCPSSm, 0 }, - { X86::VRCPSSr_Int, X86::VRCPSSm_Int, TB_NO_REVERSE }, - { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 }, - { X86::VRSQRTSSr_Int, X86::VRSQRTSSm_Int, TB_NO_REVERSE }, - { X86::VROUNDSDr, X86::VROUNDSDm, 0 }, - { X86::VROUNDSDr_Int, X86::VROUNDSDm_Int, TB_NO_REVERSE }, - { X86::VROUNDSSr, X86::VROUNDSSm, 0 }, - { X86::VROUNDSSr_Int, X86::VROUNDSSm_Int, TB_NO_REVERSE }, - { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 }, - { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 }, - { X86::VSQRTSDr, X86::VSQRTSDm, 0 }, - { X86::VSQRTSDr_Int, X86::VSQRTSDm_Int, TB_NO_REVERSE }, - { X86::VSQRTSSr, X86::VSQRTSSm, 0 }, - { X86::VSQRTSSr_Int, X86::VSQRTSSm_Int, TB_NO_REVERSE }, - { X86::VSUBPDrr, X86::VSUBPDrm, 0 }, - { X86::VSUBPSrr, X86::VSUBPSrm, 0 }, - { X86::VSUBSDrr, X86::VSUBSDrm, 0 }, - { X86::VSUBSDrr_Int, X86::VSUBSDrm_Int, TB_NO_REVERSE }, - { X86::VSUBSSrr, X86::VSUBSSrm, 0 }, - { X86::VSUBSSrr_Int, X86::VSUBSSrm_Int, TB_NO_REVERSE }, - { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 }, - { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 }, - { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 }, - { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 }, - { X86::VXORPDrr, X86::VXORPDrm, 0 }, - { X86::VXORPSrr, X86::VXORPSrm, 0 }, - - // AVX 256-bit foldable instructions - { X86::VADDPDYrr, X86::VADDPDYrm, 0 }, - { X86::VADDPSYrr, X86::VADDPSYrm, 0 }, - { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 }, - { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 }, - { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 }, - { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 }, - { X86::VANDPDYrr, X86::VANDPDYrm, 0 }, - { X86::VANDPSYrr, X86::VANDPSYrm, 0 }, - { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 }, - { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 }, - { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 }, - { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 }, - { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 }, - { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 }, - { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 }, - { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 }, - { X86::VDPPSYrri, X86::VDPPSYrmi, 0 }, - { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 }, - { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 }, - { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 }, - { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 }, - { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 }, - { X86::VMAXCPDYrr, X86::VMAXCPDYrm, 0 }, - { X86::VMAXCPSYrr, X86::VMAXCPSYrm, 0 }, - { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 }, - { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 }, - { X86::VMINCPDYrr, X86::VMINCPDYrm, 0 }, - { X86::VMINCPSYrr, X86::VMINCPSYrm, 0 }, - { X86::VMINPDYrr, X86::VMINPDYrm, 0 }, - { X86::VMINPSYrr, X86::VMINPSYrm, 0 }, - { X86::VMULPDYrr, X86::VMULPDYrm, 0 }, - { X86::VMULPSYrr, X86::VMULPSYrm, 0 }, - { X86::VORPDYrr, X86::VORPDYrm, 0 }, - { X86::VORPSYrr, X86::VORPSYrm, 0 }, - { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 }, - { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 }, - { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 }, - { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 }, - { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 }, - { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 }, - { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 }, - { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 }, - { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 }, - { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 }, - { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 }, - { X86::VXORPDYrr, X86::VXORPDYrm, 0 }, - { X86::VXORPSYrr, X86::VXORPSYrm, 0 }, - - // AVX2 foldable instructions - { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 }, - { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 }, - { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 }, - { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 }, - { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 }, - { X86::VPADDBYrr, X86::VPADDBYrm, 0 }, - { X86::VPADDDYrr, X86::VPADDDYrm, 0 }, - { X86::VPADDQYrr, X86::VPADDQYrm, 0 }, - { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 }, - { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 }, - { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 }, - { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 }, - { X86::VPADDWYrr, X86::VPADDWYrm, 0 }, - { X86::VPALIGNRYrri, X86::VPALIGNRYrmi, 0 }, - { X86::VPANDNYrr, X86::VPANDNYrm, 0 }, - { X86::VPANDYrr, X86::VPANDYrm, 0 }, - { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 }, - { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 }, - { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 }, - { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 }, - { X86::VPBLENDVBYrr, X86::VPBLENDVBYrm, 0 }, - { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 }, - { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 }, - { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 }, - { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 }, - { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 }, - { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 }, - { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 }, - { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 }, - { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 }, - { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 }, - { X86::VPERMDYrr, X86::VPERMDYrm, 0 }, - { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 }, - { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 }, - { X86::VPHADDSWrr256, X86::VPHADDSWrm256, 0 }, - { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 }, - { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 }, - { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, 0 }, - { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 }, - { X86::VPMADDUBSWYrr, X86::VPMADDUBSWYrm, 0 }, - { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 }, - { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 }, - { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 }, - { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 }, - { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 }, - { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 }, - { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 }, - { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 }, - { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 }, - { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 }, - { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 }, - { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 }, - { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 }, - { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 }, - { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 }, - { X86::VPMULHRSWYrr, X86::VPMULHRSWYrm, 0 }, - { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 }, - { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 }, - { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 }, - { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 }, - { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 }, - { X86::VPORYrr, X86::VPORYrm, 0 }, - { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 }, - { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 }, - { X86::VPSIGNBYrr256, X86::VPSIGNBYrm256, 0 }, - { X86::VPSIGNWYrr256, X86::VPSIGNWYrm256, 0 }, - { X86::VPSIGNDYrr256, X86::VPSIGNDYrm256, 0 }, - { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 }, - { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 }, - { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 }, - { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 }, - { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 }, - { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 }, - { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 }, - { X86::VPSRADYrr, X86::VPSRADYrm, 0 }, - { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 }, - { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 }, - { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 }, - { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 }, - { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 }, - { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 }, - { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 }, - { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 }, - { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 }, - { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 }, - { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 }, - { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 }, - { X86::VPSUBQYrr, X86::VPSUBQYrm, 0 }, - { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 }, - { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 }, - { X86::VPSUBUSBYrr, X86::VPSUBUSBYrm, 0 }, - { X86::VPSUBUSWYrr, X86::VPSUBUSWYrm, 0 }, - { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 }, - { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 }, - { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 }, - { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 }, - { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 }, - { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 }, - { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 }, - { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 }, - { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 }, - { X86::VPXORYrr, X86::VPXORYrm, 0 }, - - // FMA4 foldable patterns - { X86::VFMADDSS4rr, X86::VFMADDSS4mr, TB_ALIGN_NONE }, - { X86::VFMADDSS4rr_Int, X86::VFMADDSS4mr_Int, TB_NO_REVERSE }, - { X86::VFMADDSD4rr, X86::VFMADDSD4mr, TB_ALIGN_NONE }, - { X86::VFMADDSD4rr_Int, X86::VFMADDSD4mr_Int, TB_NO_REVERSE }, - { X86::VFMADDPS4rr, X86::VFMADDPS4mr, TB_ALIGN_NONE }, - { X86::VFMADDPD4rr, X86::VFMADDPD4mr, TB_ALIGN_NONE }, - { X86::VFMADDPS4Yrr, X86::VFMADDPS4Ymr, TB_ALIGN_NONE }, - { X86::VFMADDPD4Yrr, X86::VFMADDPD4Ymr, TB_ALIGN_NONE }, - { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, TB_ALIGN_NONE }, - { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4mr_Int, TB_NO_REVERSE }, - { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, TB_ALIGN_NONE }, - { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4mr_Int, TB_NO_REVERSE }, - { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, TB_ALIGN_NONE }, - { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, TB_ALIGN_NONE }, - { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Ymr, TB_ALIGN_NONE }, - { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Ymr, TB_ALIGN_NONE }, - { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, TB_ALIGN_NONE }, - { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4mr_Int, TB_NO_REVERSE }, - { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, TB_ALIGN_NONE }, - { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4mr_Int, TB_NO_REVERSE }, - { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, TB_ALIGN_NONE }, - { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, TB_ALIGN_NONE }, - { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Ymr, TB_ALIGN_NONE }, - { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Ymr, TB_ALIGN_NONE }, - { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, TB_ALIGN_NONE }, - { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4mr_Int, TB_NO_REVERSE }, - { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, TB_ALIGN_NONE }, - { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4mr_Int, TB_NO_REVERSE }, - { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, TB_ALIGN_NONE }, - { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, TB_ALIGN_NONE }, - { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Ymr, TB_ALIGN_NONE }, - { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Ymr, TB_ALIGN_NONE }, - { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, TB_ALIGN_NONE }, - { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, TB_ALIGN_NONE }, - { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Ymr, TB_ALIGN_NONE }, - { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Ymr, TB_ALIGN_NONE }, - { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, TB_ALIGN_NONE }, - { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, TB_ALIGN_NONE }, - { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Ymr, TB_ALIGN_NONE }, - { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Ymr, TB_ALIGN_NONE }, - - // XOP foldable instructions - { X86::VPCMOVrrr, X86::VPCMOVrmr, 0 }, - { X86::VPCMOVYrrr, X86::VPCMOVYrmr, 0 }, - { X86::VPCOMBri, X86::VPCOMBmi, 0 }, - { X86::VPCOMDri, X86::VPCOMDmi, 0 }, - { X86::VPCOMQri, X86::VPCOMQmi, 0 }, - { X86::VPCOMWri, X86::VPCOMWmi, 0 }, - { X86::VPCOMUBri, X86::VPCOMUBmi, 0 }, - { X86::VPCOMUDri, X86::VPCOMUDmi, 0 }, - { X86::VPCOMUQri, X86::VPCOMUQmi, 0 }, - { X86::VPCOMUWri, X86::VPCOMUWmi, 0 }, - { X86::VPERMIL2PDrr, X86::VPERMIL2PDmr, 0 }, - { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYmr, 0 }, - { X86::VPERMIL2PSrr, X86::VPERMIL2PSmr, 0 }, - { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYmr, 0 }, - { X86::VPMACSDDrr, X86::VPMACSDDrm, 0 }, - { X86::VPMACSDQHrr, X86::VPMACSDQHrm, 0 }, - { X86::VPMACSDQLrr, X86::VPMACSDQLrm, 0 }, - { X86::VPMACSSDDrr, X86::VPMACSSDDrm, 0 }, - { X86::VPMACSSDQHrr, X86::VPMACSSDQHrm, 0 }, - { X86::VPMACSSDQLrr, X86::VPMACSSDQLrm, 0 }, - { X86::VPMACSSWDrr, X86::VPMACSSWDrm, 0 }, - { X86::VPMACSSWWrr, X86::VPMACSSWWrm, 0 }, - { X86::VPMACSWDrr, X86::VPMACSWDrm, 0 }, - { X86::VPMACSWWrr, X86::VPMACSWWrm, 0 }, - { X86::VPMADCSSWDrr, X86::VPMADCSSWDrm, 0 }, - { X86::VPMADCSWDrr, X86::VPMADCSWDrm, 0 }, - { X86::VPPERMrrr, X86::VPPERMrmr, 0 }, - { X86::VPROTBrr, X86::VPROTBrm, 0 }, - { X86::VPROTDrr, X86::VPROTDrm, 0 }, - { X86::VPROTQrr, X86::VPROTQrm, 0 }, - { X86::VPROTWrr, X86::VPROTWrm, 0 }, - { X86::VPSHABrr, X86::VPSHABrm, 0 }, - { X86::VPSHADrr, X86::VPSHADrm, 0 }, - { X86::VPSHAQrr, X86::VPSHAQrm, 0 }, - { X86::VPSHAWrr, X86::VPSHAWrm, 0 }, - { X86::VPSHLBrr, X86::VPSHLBrm, 0 }, - { X86::VPSHLDrr, X86::VPSHLDrm, 0 }, - { X86::VPSHLQrr, X86::VPSHLQrm, 0 }, - { X86::VPSHLWrr, X86::VPSHLWrm, 0 }, - - // BMI/BMI2 foldable instructions - { X86::ANDN32rr, X86::ANDN32rm, 0 }, - { X86::ANDN64rr, X86::ANDN64rm, 0 }, - { X86::MULX32rr, X86::MULX32rm, 0 }, - { X86::MULX64rr, X86::MULX64rm, 0 }, - { X86::PDEP32rr, X86::PDEP32rm, 0 }, - { X86::PDEP64rr, X86::PDEP64rm, 0 }, - { X86::PEXT32rr, X86::PEXT32rm, 0 }, - { X86::PEXT64rr, X86::PEXT64rm, 0 }, - - // ADX foldable instructions - { X86::ADCX32rr, X86::ADCX32rm, 0 }, - { X86::ADCX64rr, X86::ADCX64rm, 0 }, - { X86::ADOX32rr, X86::ADOX32rm, 0 }, - { X86::ADOX64rr, X86::ADOX64rm, 0 }, - - // AVX-512 foldable instructions - { X86::VADDPDZrr, X86::VADDPDZrm, 0 }, - { X86::VADDPSZrr, X86::VADDPSZrm, 0 }, - { X86::VADDSDZrr, X86::VADDSDZrm, 0 }, - { X86::VADDSDZrr_Int, X86::VADDSDZrm_Int, TB_NO_REVERSE }, - { X86::VADDSSZrr, X86::VADDSSZrm, 0 }, - { X86::VADDSSZrr_Int, X86::VADDSSZrm_Int, TB_NO_REVERSE }, - { X86::VALIGNDZrri, X86::VALIGNDZrmi, 0 }, - { X86::VALIGNQZrri, X86::VALIGNQZrmi, 0 }, - { X86::VANDNPDZrr, X86::VANDNPDZrm, 0 }, - { X86::VANDNPSZrr, X86::VANDNPSZrm, 0 }, - { X86::VANDPDZrr, X86::VANDPDZrm, 0 }, - { X86::VANDPSZrr, X86::VANDPSZrm, 0 }, - { X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 }, - { X86::VCMPPSZrri, X86::VCMPPSZrmi, 0 }, - { X86::VCMPSDZrr, X86::VCMPSDZrm, 0 }, - { X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE }, - { X86::VCMPSSZrr, X86::VCMPSSZrm, 0 }, - { X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE }, - { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 }, - { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 }, - { X86::VDIVSDZrr, X86::VDIVSDZrm, 0 }, - { X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, TB_NO_REVERSE }, - { X86::VDIVSSZrr, X86::VDIVSSZrm, 0 }, - { X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, TB_NO_REVERSE }, - { X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrm, 0 }, - { X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrm, 0 }, - { X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrm, 0 }, - { X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrm, 0 }, - { X86::VINSERTI32x4Zrr, X86::VINSERTI32x4Zrm, 0 }, - { X86::VINSERTI32x8Zrr, X86::VINSERTI32x8Zrm, 0 }, - { X86::VINSERTI64x2Zrr, X86::VINSERTI64x2Zrm, 0 }, - { X86::VINSERTI64x4Zrr, X86::VINSERTI64x4Zrm, 0 }, - { X86::VMAXCPDZrr, X86::VMAXCPDZrm, 0 }, - { X86::VMAXCPSZrr, X86::VMAXCPSZrm, 0 }, - { X86::VMAXCSDZrr, X86::VMAXCSDZrm, 0 }, - { X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0 }, - { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 }, - { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 }, - { X86::VMAXSDZrr, X86::VMAXSDZrm, 0 }, - { X86::VMAXSDZrr_Int, X86::VMAXSDZrm_Int, TB_NO_REVERSE }, - { X86::VMAXSSZrr, X86::VMAXSSZrm, 0 }, - { X86::VMAXSSZrr_Int, X86::VMAXSSZrm_Int, TB_NO_REVERSE }, - { X86::VMINCPDZrr, X86::VMINCPDZrm, 0 }, - { X86::VMINCPSZrr, X86::VMINCPSZrm, 0 }, - { X86::VMINCSDZrr, X86::VMINCSDZrm, 0 }, - { X86::VMINCSSZrr, X86::VMINCSSZrm, 0 }, - { X86::VMINPDZrr, X86::VMINPDZrm, 0 }, - { X86::VMINPSZrr, X86::VMINPSZrm, 0 }, - { X86::VMINSDZrr, X86::VMINSDZrm, 0 }, - { X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, TB_NO_REVERSE }, - { X86::VMINSSZrr, X86::VMINSSZrm, 0 }, - { X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, TB_NO_REVERSE }, - { X86::VMOVLHPSZrr, X86::VMOVHPSZ128rm, TB_NO_REVERSE }, - { X86::VMULPDZrr, X86::VMULPDZrm, 0 }, - { X86::VMULPSZrr, X86::VMULPSZrm, 0 }, - { X86::VMULSDZrr, X86::VMULSDZrm, 0 }, - { X86::VMULSDZrr_Int, X86::VMULSDZrm_Int, TB_NO_REVERSE }, - { X86::VMULSSZrr, X86::VMULSSZrm, 0 }, - { X86::VMULSSZrr_Int, X86::VMULSSZrm_Int, TB_NO_REVERSE }, - { X86::VORPDZrr, X86::VORPDZrm, 0 }, - { X86::VORPSZrr, X86::VORPSZrm, 0 }, - { X86::VPACKSSDWZrr, X86::VPACKSSDWZrm, 0 }, - { X86::VPACKSSWBZrr, X86::VPACKSSWBZrm, 0 }, - { X86::VPACKUSDWZrr, X86::VPACKUSDWZrm, 0 }, - { X86::VPACKUSWBZrr, X86::VPACKUSWBZrm, 0 }, - { X86::VPADDBZrr, X86::VPADDBZrm, 0 }, - { X86::VPADDDZrr, X86::VPADDDZrm, 0 }, - { X86::VPADDQZrr, X86::VPADDQZrm, 0 }, - { X86::VPADDSBZrr, X86::VPADDSBZrm, 0 }, - { X86::VPADDSWZrr, X86::VPADDSWZrm, 0 }, - { X86::VPADDUSBZrr, X86::VPADDUSBZrm, 0 }, - { X86::VPADDUSWZrr, X86::VPADDUSWZrm, 0 }, - { X86::VPADDWZrr, X86::VPADDWZrm, 0 }, - { X86::VPALIGNRZrri, X86::VPALIGNRZrmi, 0 }, - { X86::VPANDDZrr, X86::VPANDDZrm, 0 }, - { X86::VPANDNDZrr, X86::VPANDNDZrm, 0 }, - { X86::VPANDNQZrr, X86::VPANDNQZrm, 0 }, - { X86::VPANDQZrr, X86::VPANDQZrm, 0 }, - { X86::VPAVGBZrr, X86::VPAVGBZrm, 0 }, - { X86::VPAVGWZrr, X86::VPAVGWZrm, 0 }, - { X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 }, - { X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 }, - { X86::VPCMPEQBZrr, X86::VPCMPEQBZrm, 0 }, - { X86::VPCMPEQDZrr, X86::VPCMPEQDZrm, 0 }, - { X86::VPCMPEQQZrr, X86::VPCMPEQQZrm, 0 }, - { X86::VPCMPEQWZrr, X86::VPCMPEQWZrm, 0 }, - { X86::VPCMPGTBZrr, X86::VPCMPGTBZrm, 0 }, - { X86::VPCMPGTDZrr, X86::VPCMPGTDZrm, 0 }, - { X86::VPCMPGTQZrr, X86::VPCMPGTQZrm, 0 }, - { X86::VPCMPGTWZrr, X86::VPCMPGTWZrm, 0 }, - { X86::VPCMPQZrri, X86::VPCMPQZrmi, 0 }, - { X86::VPCMPUBZrri, X86::VPCMPUBZrmi, 0 }, - { X86::VPCMPUDZrri, X86::VPCMPUDZrmi, 0 }, - { X86::VPCMPUQZrri, X86::VPCMPUQZrmi, 0 }, - { X86::VPCMPUWZrri, X86::VPCMPUWZrmi, 0 }, - { X86::VPCMPWZrri, X86::VPCMPWZrmi, 0 }, - { X86::VPERMBZrr, X86::VPERMBZrm, 0 }, - { X86::VPERMDZrr, X86::VPERMDZrm, 0 }, - { X86::VPERMILPDZrr, X86::VPERMILPDZrm, 0 }, - { X86::VPERMILPSZrr, X86::VPERMILPSZrm, 0 }, - { X86::VPERMPDZrr, X86::VPERMPDZrm, 0 }, - { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 }, - { X86::VPERMQZrr, X86::VPERMQZrm, 0 }, - { X86::VPERMWZrr, X86::VPERMWZrm, 0 }, - { X86::VPINSRBZrr, X86::VPINSRBZrm, 0 }, - { X86::VPINSRDZrr, X86::VPINSRDZrm, 0 }, - { X86::VPINSRQZrr, X86::VPINSRQZrm, 0 }, - { X86::VPINSRWZrr, X86::VPINSRWZrm, 0 }, - { X86::VPMADDUBSWZrr, X86::VPMADDUBSWZrm, 0 }, - { X86::VPMADDWDZrr, X86::VPMADDWDZrm, 0 }, - { X86::VPMAXSBZrr, X86::VPMAXSBZrm, 0 }, - { X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 }, - { X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 }, - { X86::VPMAXSWZrr, X86::VPMAXSWZrm, 0 }, - { X86::VPMAXUBZrr, X86::VPMAXUBZrm, 0 }, - { X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 }, - { X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 }, - { X86::VPMAXUWZrr, X86::VPMAXUWZrm, 0 }, - { X86::VPMINSBZrr, X86::VPMINSBZrm, 0 }, - { X86::VPMINSDZrr, X86::VPMINSDZrm, 0 }, - { X86::VPMINSQZrr, X86::VPMINSQZrm, 0 }, - { X86::VPMINSWZrr, X86::VPMINSWZrm, 0 }, - { X86::VPMINUBZrr, X86::VPMINUBZrm, 0 }, - { X86::VPMINUDZrr, X86::VPMINUDZrm, 0 }, - { X86::VPMINUQZrr, X86::VPMINUQZrm, 0 }, - { X86::VPMINUWZrr, X86::VPMINUWZrm, 0 }, - { X86::VPMULDQZrr, X86::VPMULDQZrm, 0 }, - { X86::VPMULLDZrr, X86::VPMULLDZrm, 0 }, - { X86::VPMULLQZrr, X86::VPMULLQZrm, 0 }, - { X86::VPMULLWZrr, X86::VPMULLWZrm, 0 }, - { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 }, - { X86::VPORDZrr, X86::VPORDZrm, 0 }, - { X86::VPORQZrr, X86::VPORQZrm, 0 }, - { X86::VPSADBWZ512rr, X86::VPSADBWZ512rm, 0 }, - { X86::VPSHUFBZrr, X86::VPSHUFBZrm, 0 }, - { X86::VPSLLDZrr, X86::VPSLLDZrm, 0 }, - { X86::VPSLLQZrr, X86::VPSLLQZrm, 0 }, - { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 }, - { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 }, - { X86::VPSLLVWZrr, X86::VPSLLVWZrm, 0 }, - { X86::VPSLLWZrr, X86::VPSLLWZrm, 0 }, - { X86::VPSRADZrr, X86::VPSRADZrm, 0 }, - { X86::VPSRAQZrr, X86::VPSRAQZrm, 0 }, - { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 }, - { X86::VPSRAVQZrr, X86::VPSRAVQZrm, 0 }, - { X86::VPSRAVWZrr, X86::VPSRAVWZrm, 0 }, - { X86::VPSRAWZrr, X86::VPSRAWZrm, 0 }, - { X86::VPSRLDZrr, X86::VPSRLDZrm, 0 }, - { X86::VPSRLQZrr, X86::VPSRLQZrm, 0 }, - { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 }, - { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 }, - { X86::VPSRLVWZrr, X86::VPSRLVWZrm, 0 }, - { X86::VPSRLWZrr, X86::VPSRLWZrm, 0 }, - { X86::VPSUBBZrr, X86::VPSUBBZrm, 0 }, - { X86::VPSUBDZrr, X86::VPSUBDZrm, 0 }, - { X86::VPSUBQZrr, X86::VPSUBQZrm, 0 }, - { X86::VPSUBSBZrr, X86::VPSUBSBZrm, 0 }, - { X86::VPSUBSWZrr, X86::VPSUBSWZrm, 0 }, - { X86::VPSUBUSBZrr, X86::VPSUBUSBZrm, 0 }, - { X86::VPSUBUSWZrr, X86::VPSUBUSWZrm, 0 }, - { X86::VPSUBWZrr, X86::VPSUBWZrm, 0 }, - { X86::VPUNPCKHBWZrr, X86::VPUNPCKHBWZrm, 0 }, - { X86::VPUNPCKHDQZrr, X86::VPUNPCKHDQZrm, 0 }, - { X86::VPUNPCKHQDQZrr, X86::VPUNPCKHQDQZrm, 0 }, - { X86::VPUNPCKHWDZrr, X86::VPUNPCKHWDZrm, 0 }, - { X86::VPUNPCKLBWZrr, X86::VPUNPCKLBWZrm, 0 }, - { X86::VPUNPCKLDQZrr, X86::VPUNPCKLDQZrm, 0 }, - { X86::VPUNPCKLQDQZrr, X86::VPUNPCKLQDQZrm, 0 }, - { X86::VPUNPCKLWDZrr, X86::VPUNPCKLWDZrm, 0 }, - { X86::VPXORDZrr, X86::VPXORDZrm, 0 }, - { X86::VPXORQZrr, X86::VPXORQZrm, 0 }, - { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 }, - { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 }, - { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 }, - { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 }, - { X86::VSUBSDZrr, X86::VSUBSDZrm, 0 }, - { X86::VSUBSDZrr_Int, X86::VSUBSDZrm_Int, TB_NO_REVERSE }, - { X86::VSUBSSZrr, X86::VSUBSSZrm, 0 }, - { X86::VSUBSSZrr_Int, X86::VSUBSSZrm_Int, TB_NO_REVERSE }, - { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrm, 0 }, - { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrm, 0 }, - { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrm, 0 }, - { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrm, 0 }, - { X86::VXORPDZrr, X86::VXORPDZrm, 0 }, - { X86::VXORPSZrr, X86::VXORPSZrm, 0 }, - - // AVX-512{F,VL} foldable instructions - { X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0 }, - { X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 }, - { X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 }, - { X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 }, - { X86::VALIGNDZ128rri, X86::VALIGNDZ128rmi, 0 }, - { X86::VALIGNDZ256rri, X86::VALIGNDZ256rmi, 0 }, - { X86::VALIGNQZ128rri, X86::VALIGNQZ128rmi, 0 }, - { X86::VALIGNQZ256rri, X86::VALIGNQZ256rmi, 0 }, - { X86::VANDNPDZ128rr, X86::VANDNPDZ128rm, 0 }, - { X86::VANDNPDZ256rr, X86::VANDNPDZ256rm, 0 }, - { X86::VANDNPSZ128rr, X86::VANDNPSZ128rm, 0 }, - { X86::VANDNPSZ256rr, X86::VANDNPSZ256rm, 0 }, - { X86::VANDPDZ128rr, X86::VANDPDZ128rm, 0 }, - { X86::VANDPDZ256rr, X86::VANDPDZ256rm, 0 }, - { X86::VANDPSZ128rr, X86::VANDPSZ128rm, 0 }, - { X86::VANDPSZ256rr, X86::VANDPSZ256rm, 0 }, - { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0 }, - { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 }, - { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 }, - { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmi, 0 }, - { X86::VDIVPDZ128rr, X86::VDIVPDZ128rm, 0 }, - { X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0 }, - { X86::VDIVPSZ128rr, X86::VDIVPSZ128rm, 0 }, - { X86::VDIVPSZ256rr, X86::VDIVPSZ256rm, 0 }, - { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rm, 0 }, - { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rm, 0 }, - { X86::VINSERTI32x4Z256rr,X86::VINSERTI32x4Z256rm, 0 }, - { X86::VINSERTI64x2Z256rr,X86::VINSERTI64x2Z256rm, 0 }, - { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0 }, - { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0 }, - { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rm, 0 }, - { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rm, 0 }, - { X86::VMAXPDZ128rr, X86::VMAXPDZ128rm, 0 }, - { X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0 }, - { X86::VMAXPSZ128rr, X86::VMAXPSZ128rm, 0 }, - { X86::VMAXPSZ256rr, X86::VMAXPSZ256rm, 0 }, - { X86::VMINCPDZ128rr, X86::VMINCPDZ128rm, 0 }, - { X86::VMINCPDZ256rr, X86::VMINCPDZ256rm, 0 }, - { X86::VMINCPSZ128rr, X86::VMINCPSZ128rm, 0 }, - { X86::VMINCPSZ256rr, X86::VMINCPSZ256rm, 0 }, - { X86::VMINPDZ128rr, X86::VMINPDZ128rm, 0 }, - { X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0 }, - { X86::VMINPSZ128rr, X86::VMINPSZ128rm, 0 }, - { X86::VMINPSZ256rr, X86::VMINPSZ256rm, 0 }, - { X86::VMULPDZ128rr, X86::VMULPDZ128rm, 0 }, - { X86::VMULPDZ256rr, X86::VMULPDZ256rm, 0 }, - { X86::VMULPSZ128rr, X86::VMULPSZ128rm, 0 }, - { X86::VMULPSZ256rr, X86::VMULPSZ256rm, 0 }, - { X86::VORPDZ128rr, X86::VORPDZ128rm, 0 }, - { X86::VORPDZ256rr, X86::VORPDZ256rm, 0 }, - { X86::VORPSZ128rr, X86::VORPSZ128rm, 0 }, - { X86::VORPSZ256rr, X86::VORPSZ256rm, 0 }, - { X86::VPACKSSDWZ256rr, X86::VPACKSSDWZ256rm, 0 }, - { X86::VPACKSSDWZ128rr, X86::VPACKSSDWZ128rm, 0 }, - { X86::VPACKSSWBZ256rr, X86::VPACKSSWBZ256rm, 0 }, - { X86::VPACKSSWBZ128rr, X86::VPACKSSWBZ128rm, 0 }, - { X86::VPACKUSDWZ256rr, X86::VPACKUSDWZ256rm, 0 }, - { X86::VPACKUSDWZ128rr, X86::VPACKUSDWZ128rm, 0 }, - { X86::VPACKUSWBZ256rr, X86::VPACKUSWBZ256rm, 0 }, - { X86::VPACKUSWBZ128rr, X86::VPACKUSWBZ128rm, 0 }, - { X86::VPADDBZ128rr, X86::VPADDBZ128rm, 0 }, - { X86::VPADDBZ256rr, X86::VPADDBZ256rm, 0 }, - { X86::VPADDDZ128rr, X86::VPADDDZ128rm, 0 }, - { X86::VPADDDZ256rr, X86::VPADDDZ256rm, 0 }, - { X86::VPADDQZ128rr, X86::VPADDQZ128rm, 0 }, - { X86::VPADDQZ256rr, X86::VPADDQZ256rm, 0 }, - { X86::VPADDSBZ128rr, X86::VPADDSBZ128rm, 0 }, - { X86::VPADDSBZ256rr, X86::VPADDSBZ256rm, 0 }, - { X86::VPADDSWZ128rr, X86::VPADDSWZ128rm, 0 }, - { X86::VPADDSWZ256rr, X86::VPADDSWZ256rm, 0 }, - { X86::VPADDUSBZ128rr, X86::VPADDUSBZ128rm, 0 }, - { X86::VPADDUSBZ256rr, X86::VPADDUSBZ256rm, 0 }, - { X86::VPADDUSWZ128rr, X86::VPADDUSWZ128rm, 0 }, - { X86::VPADDUSWZ256rr, X86::VPADDUSWZ256rm, 0 }, - { X86::VPADDWZ128rr, X86::VPADDWZ128rm, 0 }, - { X86::VPADDWZ256rr, X86::VPADDWZ256rm, 0 }, - { X86::VPALIGNRZ128rri, X86::VPALIGNRZ128rmi, 0 }, - { X86::VPALIGNRZ256rri, X86::VPALIGNRZ256rmi, 0 }, - { X86::VPANDDZ128rr, X86::VPANDDZ128rm, 0 }, - { X86::VPANDDZ256rr, X86::VPANDDZ256rm, 0 }, - { X86::VPANDNDZ128rr, X86::VPANDNDZ128rm, 0 }, - { X86::VPANDNDZ256rr, X86::VPANDNDZ256rm, 0 }, - { X86::VPANDNQZ128rr, X86::VPANDNQZ128rm, 0 }, - { X86::VPANDNQZ256rr, X86::VPANDNQZ256rm, 0 }, - { X86::VPANDQZ128rr, X86::VPANDQZ128rm, 0 }, - { X86::VPANDQZ256rr, X86::VPANDQZ256rm, 0 }, - { X86::VPAVGBZ128rr, X86::VPAVGBZ128rm, 0 }, - { X86::VPAVGBZ256rr, X86::VPAVGBZ256rm, 0 }, - { X86::VPAVGWZ128rr, X86::VPAVGWZ128rm, 0 }, - { X86::VPAVGWZ256rr, X86::VPAVGWZ256rm, 0 }, - { X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 }, - { X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 }, - { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 }, - { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmi, 0 }, - { X86::VPCMPEQBZ128rr, X86::VPCMPEQBZ128rm, 0 }, - { X86::VPCMPEQBZ256rr, X86::VPCMPEQBZ256rm, 0 }, - { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rm, 0 }, - { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rm, 0 }, - { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rm, 0 }, - { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rm, 0 }, - { X86::VPCMPEQWZ128rr, X86::VPCMPEQWZ128rm, 0 }, - { X86::VPCMPEQWZ256rr, X86::VPCMPEQWZ256rm, 0 }, - { X86::VPCMPGTBZ128rr, X86::VPCMPGTBZ128rm, 0 }, - { X86::VPCMPGTBZ256rr, X86::VPCMPGTBZ256rm, 0 }, - { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rm, 0 }, - { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rm, 0 }, - { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rm, 0 }, - { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rm, 0 }, - { X86::VPCMPGTWZ128rr, X86::VPCMPGTWZ128rm, 0 }, - { X86::VPCMPGTWZ256rr, X86::VPCMPGTWZ256rm, 0 }, - { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmi, 0 }, - { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmi, 0 }, - { X86::VPCMPUBZ128rri, X86::VPCMPUBZ128rmi, 0 }, - { X86::VPCMPUBZ256rri, X86::VPCMPUBZ256rmi, 0 }, - { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmi, 0 }, - { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmi, 0 }, - { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmi, 0 }, - { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmi, 0 }, - { X86::VPCMPUWZ128rri, X86::VPCMPUWZ128rmi, 0 }, - { X86::VPCMPUWZ256rri, X86::VPCMPUWZ256rmi, 0 }, - { X86::VPCMPWZ128rri, X86::VPCMPWZ128rmi, 0 }, - { X86::VPCMPWZ256rri, X86::VPCMPWZ256rmi, 0 }, - { X86::VPERMBZ128rr, X86::VPERMBZ128rm, 0 }, - { X86::VPERMBZ256rr, X86::VPERMBZ256rm, 0 }, - { X86::VPERMDZ256rr, X86::VPERMDZ256rm, 0 }, - { X86::VPERMILPDZ128rr, X86::VPERMILPDZ128rm, 0 }, - { X86::VPERMILPDZ256rr, X86::VPERMILPDZ256rm, 0 }, - { X86::VPERMILPSZ128rr, X86::VPERMILPSZ128rm, 0 }, - { X86::VPERMILPSZ256rr, X86::VPERMILPSZ256rm, 0 }, - { X86::VPERMPDZ256rr, X86::VPERMPDZ256rm, 0 }, - { X86::VPERMPSZ256rr, X86::VPERMPSZ256rm, 0 }, - { X86::VPERMQZ256rr, X86::VPERMQZ256rm, 0 }, - { X86::VPERMWZ128rr, X86::VPERMWZ128rm, 0 }, - { X86::VPERMWZ256rr, X86::VPERMWZ256rm, 0 }, - { X86::VPMADDUBSWZ128rr, X86::VPMADDUBSWZ128rm, 0 }, - { X86::VPMADDUBSWZ256rr, X86::VPMADDUBSWZ256rm, 0 }, - { X86::VPMADDWDZ128rr, X86::VPMADDWDZ128rm, 0 }, - { X86::VPMADDWDZ256rr, X86::VPMADDWDZ256rm, 0 }, - { X86::VPMAXSBZ128rr, X86::VPMAXSBZ128rm, 0 }, - { X86::VPMAXSBZ256rr, X86::VPMAXSBZ256rm, 0 }, - { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rm, 0 }, - { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rm, 0 }, - { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rm, 0 }, - { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rm, 0 }, - { X86::VPMAXSWZ128rr, X86::VPMAXSWZ128rm, 0 }, - { X86::VPMAXSWZ256rr, X86::VPMAXSWZ256rm, 0 }, - { X86::VPMAXUBZ128rr, X86::VPMAXUBZ128rm, 0 }, - { X86::VPMAXUBZ256rr, X86::VPMAXUBZ256rm, 0 }, - { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rm, 0 }, - { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rm, 0 }, - { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rm, 0 }, - { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rm, 0 }, - { X86::VPMAXUWZ128rr, X86::VPMAXUWZ128rm, 0 }, - { X86::VPMAXUWZ256rr, X86::VPMAXUWZ256rm, 0 }, - { X86::VPMINSBZ128rr, X86::VPMINSBZ128rm, 0 }, - { X86::VPMINSBZ256rr, X86::VPMINSBZ256rm, 0 }, - { X86::VPMINSDZ128rr, X86::VPMINSDZ128rm, 0 }, - { X86::VPMINSDZ256rr, X86::VPMINSDZ256rm, 0 }, - { X86::VPMINSQZ128rr, X86::VPMINSQZ128rm, 0 }, - { X86::VPMINSQZ256rr, X86::VPMINSQZ256rm, 0 }, - { X86::VPMINSWZ128rr, X86::VPMINSWZ128rm, 0 }, - { X86::VPMINSWZ256rr, X86::VPMINSWZ256rm, 0 }, - { X86::VPMINUBZ128rr, X86::VPMINUBZ128rm, 0 }, - { X86::VPMINUBZ256rr, X86::VPMINUBZ256rm, 0 }, - { X86::VPMINUDZ128rr, X86::VPMINUDZ128rm, 0 }, - { X86::VPMINUDZ256rr, X86::VPMINUDZ256rm, 0 }, - { X86::VPMINUQZ128rr, X86::VPMINUQZ128rm, 0 }, - { X86::VPMINUQZ256rr, X86::VPMINUQZ256rm, 0 }, - { X86::VPMINUWZ128rr, X86::VPMINUWZ128rm, 0 }, - { X86::VPMINUWZ256rr, X86::VPMINUWZ256rm, 0 }, - { X86::VPMULDQZ128rr, X86::VPMULDQZ128rm, 0 }, - { X86::VPMULDQZ256rr, X86::VPMULDQZ256rm, 0 }, - { X86::VPMULLDZ128rr, X86::VPMULLDZ128rm, 0 }, - { X86::VPMULLDZ256rr, X86::VPMULLDZ256rm, 0 }, - { X86::VPMULLQZ128rr, X86::VPMULLQZ128rm, 0 }, - { X86::VPMULLQZ256rr, X86::VPMULLQZ256rm, 0 }, - { X86::VPMULLWZ128rr, X86::VPMULLWZ128rm, 0 }, - { X86::VPMULLWZ256rr, X86::VPMULLWZ256rm, 0 }, - { X86::VPMULUDQZ128rr, X86::VPMULUDQZ128rm, 0 }, - { X86::VPMULUDQZ256rr, X86::VPMULUDQZ256rm, 0 }, - { X86::VPORDZ128rr, X86::VPORDZ128rm, 0 }, - { X86::VPORDZ256rr, X86::VPORDZ256rm, 0 }, - { X86::VPORQZ128rr, X86::VPORQZ128rm, 0 }, - { X86::VPORQZ256rr, X86::VPORQZ256rm, 0 }, - { X86::VPSADBWZ128rr, X86::VPSADBWZ128rm, 0 }, - { X86::VPSADBWZ256rr, X86::VPSADBWZ256rm, 0 }, - { X86::VPSHUFBZ128rr, X86::VPSHUFBZ128rm, 0 }, - { X86::VPSHUFBZ256rr, X86::VPSHUFBZ256rm, 0 }, - { X86::VPSLLDZ128rr, X86::VPSLLDZ128rm, 0 }, - { X86::VPSLLDZ256rr, X86::VPSLLDZ256rm, 0 }, - { X86::VPSLLQZ128rr, X86::VPSLLQZ128rm, 0 }, - { X86::VPSLLQZ256rr, X86::VPSLLQZ256rm, 0 }, - { X86::VPSLLVDZ128rr, X86::VPSLLVDZ128rm, 0 }, - { X86::VPSLLVDZ256rr, X86::VPSLLVDZ256rm, 0 }, - { X86::VPSLLVQZ128rr, X86::VPSLLVQZ128rm, 0 }, - { X86::VPSLLVQZ256rr, X86::VPSLLVQZ256rm, 0 }, - { X86::VPSLLVWZ128rr, X86::VPSLLVWZ128rm, 0 }, - { X86::VPSLLVWZ256rr, X86::VPSLLVWZ256rm, 0 }, - { X86::VPSLLWZ128rr, X86::VPSLLWZ128rm, 0 }, - { X86::VPSLLWZ256rr, X86::VPSLLWZ256rm, 0 }, - { X86::VPSRADZ128rr, X86::VPSRADZ128rm, 0 }, - { X86::VPSRADZ256rr, X86::VPSRADZ256rm, 0 }, - { X86::VPSRAQZ128rr, X86::VPSRAQZ128rm, 0 }, - { X86::VPSRAQZ256rr, X86::VPSRAQZ256rm, 0 }, - { X86::VPSRAVDZ128rr, X86::VPSRAVDZ128rm, 0 }, - { X86::VPSRAVDZ256rr, X86::VPSRAVDZ256rm, 0 }, - { X86::VPSRAVQZ128rr, X86::VPSRAVQZ128rm, 0 }, - { X86::VPSRAVQZ256rr, X86::VPSRAVQZ256rm, 0 }, - { X86::VPSRAVWZ128rr, X86::VPSRAVWZ128rm, 0 }, - { X86::VPSRAVWZ256rr, X86::VPSRAVWZ256rm, 0 }, - { X86::VPSRAWZ128rr, X86::VPSRAWZ128rm, 0 }, - { X86::VPSRAWZ256rr, X86::VPSRAWZ256rm, 0 }, - { X86::VPSRLDZ128rr, X86::VPSRLDZ128rm, 0 }, - { X86::VPSRLDZ256rr, X86::VPSRLDZ256rm, 0 }, - { X86::VPSRLQZ128rr, X86::VPSRLQZ128rm, 0 }, - { X86::VPSRLQZ256rr, X86::VPSRLQZ256rm, 0 }, - { X86::VPSRLVDZ128rr, X86::VPSRLVDZ128rm, 0 }, - { X86::VPSRLVDZ256rr, X86::VPSRLVDZ256rm, 0 }, - { X86::VPSRLVQZ128rr, X86::VPSRLVQZ128rm, 0 }, - { X86::VPSRLVQZ256rr, X86::VPSRLVQZ256rm, 0 }, - { X86::VPSRLVWZ128rr, X86::VPSRLVWZ128rm, 0 }, - { X86::VPSRLVWZ256rr, X86::VPSRLVWZ256rm, 0 }, - { X86::VPSRLWZ128rr, X86::VPSRLWZ128rm, 0 }, - { X86::VPSRLWZ256rr, X86::VPSRLWZ256rm, 0 }, - { X86::VPSUBBZ128rr, X86::VPSUBBZ128rm, 0 }, - { X86::VPSUBBZ256rr, X86::VPSUBBZ256rm, 0 }, - { X86::VPSUBDZ128rr, X86::VPSUBDZ128rm, 0 }, - { X86::VPSUBDZ256rr, X86::VPSUBDZ256rm, 0 }, - { X86::VPSUBQZ128rr, X86::VPSUBQZ128rm, 0 }, - { X86::VPSUBQZ256rr, X86::VPSUBQZ256rm, 0 }, - { X86::VPSUBSBZ128rr, X86::VPSUBSBZ128rm, 0 }, - { X86::VPSUBSBZ256rr, X86::VPSUBSBZ256rm, 0 }, - { X86::VPSUBSWZ128rr, X86::VPSUBSWZ128rm, 0 }, - { X86::VPSUBSWZ256rr, X86::VPSUBSWZ256rm, 0 }, - { X86::VPSUBUSBZ128rr, X86::VPSUBUSBZ128rm, 0 }, - { X86::VPSUBUSBZ256rr, X86::VPSUBUSBZ256rm, 0 }, - { X86::VPSUBUSWZ128rr, X86::VPSUBUSWZ128rm, 0 }, - { X86::VPSUBUSWZ256rr, X86::VPSUBUSWZ256rm, 0 }, - { X86::VPSUBWZ128rr, X86::VPSUBWZ128rm, 0 }, - { X86::VPSUBWZ256rr, X86::VPSUBWZ256rm, 0 }, - { X86::VPUNPCKHBWZ128rr, X86::VPUNPCKHBWZ128rm, 0 }, - { X86::VPUNPCKHBWZ256rr, X86::VPUNPCKHBWZ256rm, 0 }, - { X86::VPUNPCKHDQZ128rr, X86::VPUNPCKHDQZ128rm, 0 }, - { X86::VPUNPCKHDQZ256rr, X86::VPUNPCKHDQZ256rm, 0 }, - { X86::VPUNPCKHQDQZ128rr, X86::VPUNPCKHQDQZ128rm, 0 }, - { X86::VPUNPCKHQDQZ256rr, X86::VPUNPCKHQDQZ256rm, 0 }, - { X86::VPUNPCKHWDZ128rr, X86::VPUNPCKHWDZ128rm, 0 }, - { X86::VPUNPCKHWDZ256rr, X86::VPUNPCKHWDZ256rm, 0 }, - { X86::VPUNPCKLBWZ128rr, X86::VPUNPCKLBWZ128rm, 0 }, - { X86::VPUNPCKLBWZ256rr, X86::VPUNPCKLBWZ256rm, 0 }, - { X86::VPUNPCKLDQZ128rr, X86::VPUNPCKLDQZ128rm, 0 }, - { X86::VPUNPCKLDQZ256rr, X86::VPUNPCKLDQZ256rm, 0 }, - { X86::VPUNPCKLQDQZ128rr, X86::VPUNPCKLQDQZ128rm, 0 }, - { X86::VPUNPCKLQDQZ256rr, X86::VPUNPCKLQDQZ256rm, 0 }, - { X86::VPUNPCKLWDZ128rr, X86::VPUNPCKLWDZ128rm, 0 }, - { X86::VPUNPCKLWDZ256rr, X86::VPUNPCKLWDZ256rm, 0 }, - { X86::VPXORDZ128rr, X86::VPXORDZ128rm, 0 }, - { X86::VPXORDZ256rr, X86::VPXORDZ256rm, 0 }, - { X86::VPXORQZ128rr, X86::VPXORQZ128rm, 0 }, - { X86::VPXORQZ256rr, X86::VPXORQZ256rm, 0 }, - { X86::VSHUFPDZ128rri, X86::VSHUFPDZ128rmi, 0 }, - { X86::VSHUFPDZ256rri, X86::VSHUFPDZ256rmi, 0 }, - { X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmi, 0 }, - { X86::VSHUFPSZ256rri, X86::VSHUFPSZ256rmi, 0 }, - { X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0 }, - { X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0 }, - { X86::VSUBPSZ128rr, X86::VSUBPSZ128rm, 0 }, - { X86::VSUBPSZ256rr, X86::VSUBPSZ256rm, 0 }, - { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rm, 0 }, - { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rm, 0 }, - { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rm, 0 }, - { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rm, 0 }, - { X86::VUNPCKLPDZ128rr, X86::VUNPCKLPDZ128rm, 0 }, - { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rm, 0 }, - { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rm, 0 }, - { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rm, 0 }, - { X86::VXORPDZ128rr, X86::VXORPDZ128rm, 0 }, - { X86::VXORPDZ256rr, X86::VXORPDZ256rm, 0 }, - { X86::VXORPSZ128rr, X86::VXORPSZ128rm, 0 }, - { X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 }, - - // AVX-512 masked foldable instructions - { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE }, - { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE }, - { X86::VPABSBZrrkz, X86::VPABSBZrmkz, 0 }, - { X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 }, - { X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 }, - { X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 }, - { X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 }, - { X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 }, - { X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 }, - { X86::VPERMQZrikz, X86::VPERMQZmikz, 0 }, - { X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 }, - { X86::VPMOVSXBQZrrkz, X86::VPMOVSXBQZrmkz, TB_NO_REVERSE }, - { X86::VPMOVSXBWZrrkz, X86::VPMOVSXBWZrmkz, 0 }, - { X86::VPMOVSXDQZrrkz, X86::VPMOVSXDQZrmkz, 0 }, - { X86::VPMOVSXWDZrrkz, X86::VPMOVSXWDZrmkz, 0 }, - { X86::VPMOVSXWQZrrkz, X86::VPMOVSXWQZrmkz, 0 }, - { X86::VPMOVZXBDZrrkz, X86::VPMOVZXBDZrmkz, 0 }, - { X86::VPMOVZXBQZrrkz, X86::VPMOVZXBQZrmkz, TB_NO_REVERSE }, - { X86::VPMOVZXBWZrrkz, X86::VPMOVZXBWZrmkz, 0 }, - { X86::VPMOVZXDQZrrkz, X86::VPMOVZXDQZrmkz, 0 }, - { X86::VPMOVZXWDZrrkz, X86::VPMOVZXWDZrmkz, 0 }, - { X86::VPMOVZXWQZrrkz, X86::VPMOVZXWQZrmkz, 0 }, - { X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 }, - { X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 }, - { X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 }, - { X86::VPSLLDZrikz, X86::VPSLLDZmikz, 0 }, - { X86::VPSLLQZrikz, X86::VPSLLQZmikz, 0 }, - { X86::VPSLLWZrikz, X86::VPSLLWZmikz, 0 }, - { X86::VPSRADZrikz, X86::VPSRADZmikz, 0 }, - { X86::VPSRAQZrikz, X86::VPSRAQZmikz, 0 }, - { X86::VPSRAWZrikz, X86::VPSRAWZmikz, 0 }, - { X86::VPSRLDZrikz, X86::VPSRLDZmikz, 0 }, - { X86::VPSRLQZrikz, X86::VPSRLQZmikz, 0 }, - { X86::VPSRLWZrikz, X86::VPSRLWZmikz, 0 }, - - // AVX-512VL 256-bit masked foldable instructions - { X86::VBROADCASTSDZ256rkz, X86::VBROADCASTSDZ256mkz, TB_NO_REVERSE }, - { X86::VBROADCASTSSZ256rkz, X86::VBROADCASTSSZ256mkz, TB_NO_REVERSE }, - { X86::VPABSBZ256rrkz, X86::VPABSBZ256rmkz, 0 }, - { X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 }, - { X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 }, - { X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 }, - { X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 }, - { X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 }, - { X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 }, - { X86::VPERMQZ256rikz, X86::VPERMQZ256mikz, 0 }, - { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, TB_NO_REVERSE }, - { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz, TB_NO_REVERSE }, - { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz, 0 }, - { X86::VPMOVSXDQZ256rrkz, X86::VPMOVSXDQZ256rmkz, 0 }, - { X86::VPMOVSXWDZ256rrkz, X86::VPMOVSXWDZ256rmkz, 0 }, - { X86::VPMOVSXWQZ256rrkz, X86::VPMOVSXWQZ256rmkz, TB_NO_REVERSE }, - { X86::VPMOVZXBDZ256rrkz, X86::VPMOVZXBDZ256rmkz, TB_NO_REVERSE }, - { X86::VPMOVZXBQZ256rrkz, X86::VPMOVZXBQZ256rmkz, TB_NO_REVERSE }, - { X86::VPMOVZXBWZ256rrkz, X86::VPMOVZXBWZ256rmkz, 0 }, - { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz, 0 }, - { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz, 0 }, - { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz, TB_NO_REVERSE }, - { X86::VPSHUFDZ256rikz, X86::VPSHUFDZ256mikz, 0 }, - { X86::VPSHUFHWZ256rikz, X86::VPSHUFHWZ256mikz, 0 }, - { X86::VPSHUFLWZ256rikz, X86::VPSHUFLWZ256mikz, 0 }, - { X86::VPSLLDZ256rikz, X86::VPSLLDZ256mikz, 0 }, - { X86::VPSLLQZ256rikz, X86::VPSLLQZ256mikz, 0 }, - { X86::VPSLLWZ256rikz, X86::VPSLLWZ256mikz, 0 }, - { X86::VPSRADZ256rikz, X86::VPSRADZ256mikz, 0 }, - { X86::VPSRAQZ256rikz, X86::VPSRAQZ256mikz, 0 }, - { X86::VPSRAWZ256rikz, X86::VPSRAWZ256mikz, 0 }, - { X86::VPSRLDZ256rikz, X86::VPSRLDZ256mikz, 0 }, - { X86::VPSRLQZ256rikz, X86::VPSRLQZ256mikz, 0 }, - { X86::VPSRLWZ256rikz, X86::VPSRLWZ256mikz, 0 }, - - // AVX-512VL 128-bit masked foldable instructions - { X86::VBROADCASTSSZ128rkz, X86::VBROADCASTSSZ128mkz, TB_NO_REVERSE }, - { X86::VPABSBZ128rrkz, X86::VPABSBZ128rmkz, 0 }, - { X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 }, - { X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 }, - { X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 }, - { X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 }, - { X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 }, - { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE }, - { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz, TB_NO_REVERSE }, - { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz, TB_NO_REVERSE }, - { X86::VPMOVSXDQZ128rrkz, X86::VPMOVSXDQZ128rmkz, TB_NO_REVERSE }, - { X86::VPMOVSXWDZ128rrkz, X86::VPMOVSXWDZ128rmkz, TB_NO_REVERSE }, - { X86::VPMOVSXWQZ128rrkz, X86::VPMOVSXWQZ128rmkz, TB_NO_REVERSE }, - { X86::VPMOVZXBDZ128rrkz, X86::VPMOVZXBDZ128rmkz, TB_NO_REVERSE }, - { X86::VPMOVZXBQZ128rrkz, X86::VPMOVZXBQZ128rmkz, TB_NO_REVERSE }, - { X86::VPMOVZXBWZ128rrkz, X86::VPMOVZXBWZ128rmkz, TB_NO_REVERSE }, - { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz, TB_NO_REVERSE }, - { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz, TB_NO_REVERSE }, - { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz, TB_NO_REVERSE }, - { X86::VPSHUFDZ128rikz, X86::VPSHUFDZ128mikz, 0 }, - { X86::VPSHUFHWZ128rikz, X86::VPSHUFHWZ128mikz, 0 }, - { X86::VPSHUFLWZ128rikz, X86::VPSHUFLWZ128mikz, 0 }, - { X86::VPSLLDZ128rikz, X86::VPSLLDZ128mikz, 0 }, - { X86::VPSLLQZ128rikz, X86::VPSLLQZ128mikz, 0 }, - { X86::VPSLLWZ128rikz, X86::VPSLLWZ128mikz, 0 }, - { X86::VPSRADZ128rikz, X86::VPSRADZ128mikz, 0 }, - { X86::VPSRAQZ128rikz, X86::VPSRAQZ128mikz, 0 }, - { X86::VPSRAWZ128rikz, X86::VPSRAWZ128mikz, 0 }, - { X86::VPSRLDZ128rikz, X86::VPSRLDZ128mikz, 0 }, - { X86::VPSRLQZ128rikz, X86::VPSRLQZ128mikz, 0 }, - { X86::VPSRLWZ128rikz, X86::VPSRLWZ128mikz, 0 }, - - // AES foldable instructions - { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 }, - { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 }, - { X86::AESENCLASTrr, X86::AESENCLASTrm, TB_ALIGN_16 }, - { X86::AESENCrr, X86::AESENCrm, TB_ALIGN_16 }, - { X86::VAESDECLASTrr, X86::VAESDECLASTrm, 0 }, - { X86::VAESDECrr, X86::VAESDECrm, 0 }, - { X86::VAESENCLASTrr, X86::VAESENCLASTrm, 0 }, - { X86::VAESENCrr, X86::VAESENCrm, 0 }, - - // SHA foldable instructions - { X86::SHA1MSG1rr, X86::SHA1MSG1rm, TB_ALIGN_16 }, - { X86::SHA1MSG2rr, X86::SHA1MSG2rm, TB_ALIGN_16 }, - { X86::SHA1NEXTErr, X86::SHA1NEXTErm, TB_ALIGN_16 }, - { X86::SHA1RNDS4rri, X86::SHA1RNDS4rmi, TB_ALIGN_16 }, - { X86::SHA256MSG1rr, X86::SHA256MSG1rm, TB_ALIGN_16 }, - { X86::SHA256MSG2rr, X86::SHA256MSG2rm, TB_ALIGN_16 }, - { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 } - }; - for (X86MemoryFoldTableEntry Entry : MemoryFoldTable2) { AddTableEntry(RegOp2MemOpTable2, MemOp2RegOpTable, Entry.RegOp, Entry.MemOp, @@ -2435,1103 +150,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) Entry.Flags | TB_INDEX_2 | TB_FOLDED_LOAD); } - static const X86MemoryFoldTableEntry MemoryFoldTable3[] = { - // FMA4 foldable patterns - { X86::VFMADDSS4rr, X86::VFMADDSS4rm, TB_ALIGN_NONE }, - { X86::VFMADDSS4rr_Int, X86::VFMADDSS4rm_Int, TB_NO_REVERSE }, - { X86::VFMADDSD4rr, X86::VFMADDSD4rm, TB_ALIGN_NONE }, - { X86::VFMADDSD4rr_Int, X86::VFMADDSD4rm_Int, TB_NO_REVERSE }, - { X86::VFMADDPS4rr, X86::VFMADDPS4rm, TB_ALIGN_NONE }, - { X86::VFMADDPD4rr, X86::VFMADDPD4rm, TB_ALIGN_NONE }, - { X86::VFMADDPS4Yrr, X86::VFMADDPS4Yrm, TB_ALIGN_NONE }, - { X86::VFMADDPD4Yrr, X86::VFMADDPD4Yrm, TB_ALIGN_NONE }, - { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, TB_ALIGN_NONE }, - { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4rm_Int, TB_NO_REVERSE }, - { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, TB_ALIGN_NONE }, - { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4rm_Int, TB_NO_REVERSE }, - { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, TB_ALIGN_NONE }, - { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, TB_ALIGN_NONE }, - { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Yrm, TB_ALIGN_NONE }, - { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Yrm, TB_ALIGN_NONE }, - { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, TB_ALIGN_NONE }, - { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4rm_Int, TB_NO_REVERSE }, - { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, TB_ALIGN_NONE }, - { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4rm_Int, TB_NO_REVERSE }, - { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, TB_ALIGN_NONE }, - { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, TB_ALIGN_NONE }, - { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Yrm, TB_ALIGN_NONE }, - { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Yrm, TB_ALIGN_NONE }, - { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, TB_ALIGN_NONE }, - { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4rm_Int, TB_NO_REVERSE }, - { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, TB_ALIGN_NONE }, - { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4rm_Int, TB_NO_REVERSE }, - { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, TB_ALIGN_NONE }, - { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, TB_ALIGN_NONE }, - { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Yrm, TB_ALIGN_NONE }, - { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Yrm, TB_ALIGN_NONE }, - { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, TB_ALIGN_NONE }, - { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, TB_ALIGN_NONE }, - { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Yrm, TB_ALIGN_NONE }, - { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Yrm, TB_ALIGN_NONE }, - { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, TB_ALIGN_NONE }, - { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, TB_ALIGN_NONE }, - { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Yrm, TB_ALIGN_NONE }, - { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Yrm, TB_ALIGN_NONE }, - - // XOP foldable instructions - { X86::VPCMOVrrr, X86::VPCMOVrrm, 0 }, - { X86::VPCMOVYrrr, X86::VPCMOVYrrm, 0 }, - { X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0 }, - { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYrm, 0 }, - { X86::VPERMIL2PSrr, X86::VPERMIL2PSrm, 0 }, - { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYrm, 0 }, - { X86::VPPERMrrr, X86::VPPERMrrm, 0 }, - - // AVX-512 instructions with 3 source operands. - { X86::VPERMI2Brr, X86::VPERMI2Brm, 0 }, - { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 }, - { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 }, - { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 }, - { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 }, - { X86::VPERMI2Wrr, X86::VPERMI2Wrm, 0 }, - { X86::VPERMT2Brr, X86::VPERMT2Brm, 0 }, - { X86::VPERMT2Drr, X86::VPERMT2Drm, 0 }, - { X86::VPERMT2PSrr, X86::VPERMT2PSrm, 0 }, - { X86::VPERMT2PDrr, X86::VPERMT2PDrm, 0 }, - { X86::VPERMT2Qrr, X86::VPERMT2Qrm, 0 }, - { X86::VPERMT2Wrr, X86::VPERMT2Wrm, 0 }, - { X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmi, 0 }, - { X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmi, 0 }, - - // AVX-512VL 256-bit instructions with 3 source operands. - { X86::VPERMI2B256rr, X86::VPERMI2B256rm, 0 }, - { X86::VPERMI2D256rr, X86::VPERMI2D256rm, 0 }, - { X86::VPERMI2PD256rr, X86::VPERMI2PD256rm, 0 }, - { X86::VPERMI2PS256rr, X86::VPERMI2PS256rm, 0 }, - { X86::VPERMI2Q256rr, X86::VPERMI2Q256rm, 0 }, - { X86::VPERMI2W256rr, X86::VPERMI2W256rm, 0 }, - { X86::VPERMT2B256rr, X86::VPERMT2B256rm, 0 }, - { X86::VPERMT2D256rr, X86::VPERMT2D256rm, 0 }, - { X86::VPERMT2PD256rr, X86::VPERMT2PD256rm, 0 }, - { X86::VPERMT2PS256rr, X86::VPERMT2PS256rm, 0 }, - { X86::VPERMT2Q256rr, X86::VPERMT2Q256rm, 0 }, - { X86::VPERMT2W256rr, X86::VPERMT2W256rm, 0 }, - { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmi, 0 }, - { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmi, 0 }, - - // AVX-512VL 128-bit instructions with 3 source operands. - { X86::VPERMI2B128rr, X86::VPERMI2B128rm, 0 }, - { X86::VPERMI2D128rr, X86::VPERMI2D128rm, 0 }, - { X86::VPERMI2PD128rr, X86::VPERMI2PD128rm, 0 }, - { X86::VPERMI2PS128rr, X86::VPERMI2PS128rm, 0 }, - { X86::VPERMI2Q128rr, X86::VPERMI2Q128rm, 0 }, - { X86::VPERMI2W128rr, X86::VPERMI2W128rm, 0 }, - { X86::VPERMT2B128rr, X86::VPERMT2B128rm, 0 }, - { X86::VPERMT2D128rr, X86::VPERMT2D128rm, 0 }, - { X86::VPERMT2PD128rr, X86::VPERMT2PD128rm, 0 }, - { X86::VPERMT2PS128rr, X86::VPERMT2PS128rm, 0 }, - { X86::VPERMT2Q128rr, X86::VPERMT2Q128rm, 0 }, - { X86::VPERMT2W128rr, X86::VPERMT2W128rm, 0 }, - { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmi, 0 }, - { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmi, 0 }, - - // AVX-512 masked instructions - { X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 }, - { X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 }, - { X86::VADDSDZrr_Intkz, X86::VADDSDZrm_Intkz, TB_NO_REVERSE }, - { X86::VADDSSZrr_Intkz, X86::VADDSSZrm_Intkz, TB_NO_REVERSE }, - { X86::VALIGNDZrrikz, X86::VALIGNDZrmikz, 0 }, - { X86::VALIGNQZrrikz, X86::VALIGNQZrmikz, 0 }, - { X86::VANDNPDZrrkz, X86::VANDNPDZrmkz, 0 }, - { X86::VANDNPSZrrkz, X86::VANDNPSZrmkz, 0 }, - { X86::VANDPDZrrkz, X86::VANDPDZrmkz, 0 }, - { X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 }, - { X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 }, - { X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 }, - { X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE }, - { X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE }, - { X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 }, - { X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 }, - { X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 }, - { X86::VINSERTF64x4Zrrkz, X86::VINSERTF64x4Zrmkz, 0 }, - { X86::VINSERTI32x4Zrrkz, X86::VINSERTI32x4Zrmkz, 0 }, - { X86::VINSERTI32x8Zrrkz, X86::VINSERTI32x8Zrmkz, 0 }, - { X86::VINSERTI64x2Zrrkz, X86::VINSERTI64x2Zrmkz, 0 }, - { X86::VINSERTI64x4Zrrkz, X86::VINSERTI64x4Zrmkz, 0 }, - { X86::VMAXCPDZrrkz, X86::VMAXCPDZrmkz, 0 }, - { X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 }, - { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 }, - { X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 }, - { X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, 0 }, - { X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, 0 }, - { X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 }, - { X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 }, - { X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 }, - { X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 }, - { X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, 0 }, - { X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, 0 }, - { X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 }, - { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 }, - { X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE }, - { X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE }, - { X86::VORPDZrrkz, X86::VORPDZrmkz, 0 }, - { X86::VORPSZrrkz, X86::VORPSZrmkz, 0 }, - { X86::VPACKSSDWZrrkz, X86::VPACKSSDWZrmkz, 0 }, - { X86::VPACKSSWBZrrkz, X86::VPACKSSWBZrmkz, 0 }, - { X86::VPACKUSDWZrrkz, X86::VPACKUSDWZrmkz, 0 }, - { X86::VPACKUSWBZrrkz, X86::VPACKUSWBZrmkz, 0 }, - { X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 }, - { X86::VPADDDZrrkz, X86::VPADDDZrmkz, 0 }, - { X86::VPADDQZrrkz, X86::VPADDQZrmkz, 0 }, - { X86::VPADDSBZrrkz, X86::VPADDSBZrmkz, 0 }, - { X86::VPADDSWZrrkz, X86::VPADDSWZrmkz, 0 }, - { X86::VPADDUSBZrrkz, X86::VPADDUSBZrmkz, 0 }, - { X86::VPADDUSWZrrkz, X86::VPADDUSWZrmkz, 0 }, - { X86::VPADDWZrrkz, X86::VPADDWZrmkz, 0 }, - { X86::VPALIGNRZrrikz, X86::VPALIGNRZrmikz, 0 }, - { X86::VPANDDZrrkz, X86::VPANDDZrmkz, 0 }, - { X86::VPANDNDZrrkz, X86::VPANDNDZrmkz, 0 }, - { X86::VPANDNQZrrkz, X86::VPANDNQZrmkz, 0 }, - { X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 }, - { X86::VPAVGBZrrkz, X86::VPAVGBZrmkz, 0 }, - { X86::VPAVGWZrrkz, X86::VPAVGWZrmkz, 0 }, - { X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 }, - { X86::VPERMDZrrkz, X86::VPERMDZrmkz, 0 }, - { X86::VPERMILPDZrrkz, X86::VPERMILPDZrmkz, 0 }, - { X86::VPERMILPSZrrkz, X86::VPERMILPSZrmkz, 0 }, - { X86::VPERMPDZrrkz, X86::VPERMPDZrmkz, 0 }, - { X86::VPERMPSZrrkz, X86::VPERMPSZrmkz, 0 }, - { X86::VPERMQZrrkz, X86::VPERMQZrmkz, 0 }, - { X86::VPERMWZrrkz, X86::VPERMWZrmkz, 0 }, - { X86::VPMADDUBSWZrrkz, X86::VPMADDUBSWZrmkz, 0 }, - { X86::VPMADDWDZrrkz, X86::VPMADDWDZrmkz, 0 }, - { X86::VPMAXSBZrrkz, X86::VPMAXSBZrmkz, 0 }, - { X86::VPMAXSDZrrkz, X86::VPMAXSDZrmkz, 0 }, - { X86::VPMAXSQZrrkz, X86::VPMAXSQZrmkz, 0 }, - { X86::VPMAXSWZrrkz, X86::VPMAXSWZrmkz, 0 }, - { X86::VPMAXUBZrrkz, X86::VPMAXUBZrmkz, 0 }, - { X86::VPMAXUDZrrkz, X86::VPMAXUDZrmkz, 0 }, - { X86::VPMAXUQZrrkz, X86::VPMAXUQZrmkz, 0 }, - { X86::VPMAXUWZrrkz, X86::VPMAXUWZrmkz, 0 }, - { X86::VPMINSBZrrkz, X86::VPMINSBZrmkz, 0 }, - { X86::VPMINSDZrrkz, X86::VPMINSDZrmkz, 0 }, - { X86::VPMINSQZrrkz, X86::VPMINSQZrmkz, 0 }, - { X86::VPMINSWZrrkz, X86::VPMINSWZrmkz, 0 }, - { X86::VPMINUBZrrkz, X86::VPMINUBZrmkz, 0 }, - { X86::VPMINUDZrrkz, X86::VPMINUDZrmkz, 0 }, - { X86::VPMINUQZrrkz, X86::VPMINUQZrmkz, 0 }, - { X86::VPMINUWZrrkz, X86::VPMINUWZrmkz, 0 }, - { X86::VPMULLDZrrkz, X86::VPMULLDZrmkz, 0 }, - { X86::VPMULLQZrrkz, X86::VPMULLQZrmkz, 0 }, - { X86::VPMULLWZrrkz, X86::VPMULLWZrmkz, 0 }, - { X86::VPMULDQZrrkz, X86::VPMULDQZrmkz, 0 }, - { X86::VPMULUDQZrrkz, X86::VPMULUDQZrmkz, 0 }, - { X86::VPORDZrrkz, X86::VPORDZrmkz, 0 }, - { X86::VPORQZrrkz, X86::VPORQZrmkz, 0 }, - { X86::VPSHUFBZrrkz, X86::VPSHUFBZrmkz, 0 }, - { X86::VPSLLDZrrkz, X86::VPSLLDZrmkz, 0 }, - { X86::VPSLLQZrrkz, X86::VPSLLQZrmkz, 0 }, - { X86::VPSLLVDZrrkz, X86::VPSLLVDZrmkz, 0 }, - { X86::VPSLLVQZrrkz, X86::VPSLLVQZrmkz, 0 }, - { X86::VPSLLVWZrrkz, X86::VPSLLVWZrmkz, 0 }, - { X86::VPSLLWZrrkz, X86::VPSLLWZrmkz, 0 }, - { X86::VPSRADZrrkz, X86::VPSRADZrmkz, 0 }, - { X86::VPSRAQZrrkz, X86::VPSRAQZrmkz, 0 }, - { X86::VPSRAVDZrrkz, X86::VPSRAVDZrmkz, 0 }, - { X86::VPSRAVQZrrkz, X86::VPSRAVQZrmkz, 0 }, - { X86::VPSRAVWZrrkz, X86::VPSRAVWZrmkz, 0 }, - { X86::VPSRAWZrrkz, X86::VPSRAWZrmkz, 0 }, - { X86::VPSRLDZrrkz, X86::VPSRLDZrmkz, 0 }, - { X86::VPSRLQZrrkz, X86::VPSRLQZrmkz, 0 }, - { X86::VPSRLVDZrrkz, X86::VPSRLVDZrmkz, 0 }, - { X86::VPSRLVQZrrkz, X86::VPSRLVQZrmkz, 0 }, - { X86::VPSRLVWZrrkz, X86::VPSRLVWZrmkz, 0 }, - { X86::VPSRLWZrrkz, X86::VPSRLWZrmkz, 0 }, - { X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 }, - { X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 }, - { X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 }, - { X86::VPSUBSBZrrkz, X86::VPSUBSBZrmkz, 0 }, - { X86::VPSUBSWZrrkz, X86::VPSUBSWZrmkz, 0 }, - { X86::VPSUBUSBZrrkz, X86::VPSUBUSBZrmkz, 0 }, - { X86::VPSUBUSWZrrkz, X86::VPSUBUSWZrmkz, 0 }, - { X86::VPSUBWZrrkz, X86::VPSUBWZrmkz, 0 }, - { X86::VPUNPCKHBWZrrkz, X86::VPUNPCKHBWZrmkz, 0 }, - { X86::VPUNPCKHDQZrrkz, X86::VPUNPCKHDQZrmkz, 0 }, - { X86::VPUNPCKHQDQZrrkz, X86::VPUNPCKHQDQZrmkz, 0 }, - { X86::VPUNPCKHWDZrrkz, X86::VPUNPCKHWDZrmkz, 0 }, - { X86::VPUNPCKLBWZrrkz, X86::VPUNPCKLBWZrmkz, 0 }, - { X86::VPUNPCKLDQZrrkz, X86::VPUNPCKLDQZrmkz, 0 }, - { X86::VPUNPCKLQDQZrrkz, X86::VPUNPCKLQDQZrmkz, 0 }, - { X86::VPUNPCKLWDZrrkz, X86::VPUNPCKLWDZrmkz, 0 }, - { X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 }, - { X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 }, - { X86::VSHUFPDZrrikz, X86::VSHUFPDZrmikz, 0 }, - { X86::VSHUFPSZrrikz, X86::VSHUFPSZrmikz, 0 }, - { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 }, - { X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 }, - { X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE }, - { X86::VSUBSSZrr_Intkz, X86::VSUBSSZrm_Intkz, TB_NO_REVERSE }, - { X86::VUNPCKHPDZrrkz, X86::VUNPCKHPDZrmkz, 0 }, - { X86::VUNPCKHPSZrrkz, X86::VUNPCKHPSZrmkz, 0 }, - { X86::VUNPCKLPDZrrkz, X86::VUNPCKLPDZrmkz, 0 }, - { X86::VUNPCKLPSZrrkz, X86::VUNPCKLPSZrmkz, 0 }, - { X86::VXORPDZrrkz, X86::VXORPDZrmkz, 0 }, - { X86::VXORPSZrrkz, X86::VXORPSZrmkz, 0 }, - - // AVX-512{F,VL} masked arithmetic instructions 256-bit - { X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 }, - { X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 }, - { X86::VALIGNDZ256rrikz, X86::VALIGNDZ256rmikz, 0 }, - { X86::VALIGNQZ256rrikz, X86::VALIGNQZ256rmikz, 0 }, - { X86::VANDNPDZ256rrkz, X86::VANDNPDZ256rmkz, 0 }, - { X86::VANDNPSZ256rrkz, X86::VANDNPSZ256rmkz, 0 }, - { X86::VANDPDZ256rrkz, X86::VANDPDZ256rmkz, 0 }, - { X86::VANDPSZ256rrkz, X86::VANDPSZ256rmkz, 0 }, - { X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 }, - { X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 }, - { X86::VINSERTF32x4Z256rrkz, X86::VINSERTF32x4Z256rmkz, 0 }, - { X86::VINSERTF64x2Z256rrkz, X86::VINSERTF64x2Z256rmkz, 0 }, - { X86::VINSERTI32x4Z256rrkz, X86::VINSERTI32x4Z256rmkz, 0 }, - { X86::VINSERTI64x2Z256rrkz, X86::VINSERTI64x2Z256rmkz, 0 }, - { X86::VMAXCPDZ256rrkz, X86::VMAXCPDZ256rmkz, 0 }, - { X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0 }, - { X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 }, - { X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 }, - { X86::VMINCPDZ256rrkz, X86::VMINCPDZ256rmkz, 0 }, - { X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmkz, 0 }, - { X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 }, - { X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 }, - { X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 }, - { X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 }, - { X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0 }, - { X86::VORPSZ256rrkz, X86::VORPSZ256rmkz, 0 }, - { X86::VPACKSSDWZ256rrkz, X86::VPACKSSDWZ256rmkz, 0 }, - { X86::VPACKSSWBZ256rrkz, X86::VPACKSSWBZ256rmkz, 0 }, - { X86::VPACKUSDWZ256rrkz, X86::VPACKUSDWZ256rmkz, 0 }, - { X86::VPACKUSWBZ256rrkz, X86::VPACKUSWBZ256rmkz, 0 }, - { X86::VPADDBZ256rrkz, X86::VPADDBZ256rmkz, 0 }, - { X86::VPADDDZ256rrkz, X86::VPADDDZ256rmkz, 0 }, - { X86::VPADDQZ256rrkz, X86::VPADDQZ256rmkz, 0 }, - { X86::VPADDSBZ256rrkz, X86::VPADDSBZ256rmkz, 0 }, - { X86::VPADDSWZ256rrkz, X86::VPADDSWZ256rmkz, 0 }, - { X86::VPADDUSBZ256rrkz, X86::VPADDUSBZ256rmkz, 0 }, - { X86::VPADDUSWZ256rrkz, X86::VPADDUSWZ256rmkz, 0 }, - { X86::VPADDWZ256rrkz, X86::VPADDWZ256rmkz, 0 }, - { X86::VPALIGNRZ256rrikz, X86::VPALIGNRZ256rmikz, 0 }, - { X86::VPANDDZ256rrkz, X86::VPANDDZ256rmkz, 0 }, - { X86::VPANDNDZ256rrkz, X86::VPANDNDZ256rmkz, 0 }, - { X86::VPANDNQZ256rrkz, X86::VPANDNQZ256rmkz, 0 }, - { X86::VPANDQZ256rrkz, X86::VPANDQZ256rmkz, 0 }, - { X86::VPAVGBZ256rrkz, X86::VPAVGBZ256rmkz, 0 }, - { X86::VPAVGWZ256rrkz, X86::VPAVGWZ256rmkz, 0 }, - { X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 }, - { X86::VPERMDZ256rrkz, X86::VPERMDZ256rmkz, 0 }, - { X86::VPERMILPDZ256rrkz, X86::VPERMILPDZ256rmkz, 0 }, - { X86::VPERMILPSZ256rrkz, X86::VPERMILPSZ256rmkz, 0 }, - { X86::VPERMPDZ256rrkz, X86::VPERMPDZ256rmkz, 0 }, - { X86::VPERMPSZ256rrkz, X86::VPERMPSZ256rmkz, 0 }, - { X86::VPERMQZ256rrkz, X86::VPERMQZ256rmkz, 0 }, - { X86::VPERMWZ256rrkz, X86::VPERMWZ256rmkz, 0 }, - { X86::VPMADDUBSWZ256rrkz, X86::VPMADDUBSWZ256rmkz, 0 }, - { X86::VPMADDWDZ256rrkz, X86::VPMADDWDZ256rmkz, 0 }, - { X86::VPMAXSBZ256rrkz, X86::VPMAXSBZ256rmkz, 0 }, - { X86::VPMAXSDZ256rrkz, X86::VPMAXSDZ256rmkz, 0 }, - { X86::VPMAXSQZ256rrkz, X86::VPMAXSQZ256rmkz, 0 }, - { X86::VPMAXSWZ256rrkz, X86::VPMAXSWZ256rmkz, 0 }, - { X86::VPMAXUBZ256rrkz, X86::VPMAXUBZ256rmkz, 0 }, - { X86::VPMAXUDZ256rrkz, X86::VPMAXUDZ256rmkz, 0 }, - { X86::VPMAXUQZ256rrkz, X86::VPMAXUQZ256rmkz, 0 }, - { X86::VPMAXUWZ256rrkz, X86::VPMAXUWZ256rmkz, 0 }, - { X86::VPMINSBZ256rrkz, X86::VPMINSBZ256rmkz, 0 }, - { X86::VPMINSDZ256rrkz, X86::VPMINSDZ256rmkz, 0 }, - { X86::VPMINSQZ256rrkz, X86::VPMINSQZ256rmkz, 0 }, - { X86::VPMINSWZ256rrkz, X86::VPMINSWZ256rmkz, 0 }, - { X86::VPMINUBZ256rrkz, X86::VPMINUBZ256rmkz, 0 }, - { X86::VPMINUDZ256rrkz, X86::VPMINUDZ256rmkz, 0 }, - { X86::VPMINUQZ256rrkz, X86::VPMINUQZ256rmkz, 0 }, - { X86::VPMINUWZ256rrkz, X86::VPMINUWZ256rmkz, 0 }, - { X86::VPMULDQZ256rrkz, X86::VPMULDQZ256rmkz, 0 }, - { X86::VPMULLDZ256rrkz, X86::VPMULLDZ256rmkz, 0 }, - { X86::VPMULLQZ256rrkz, X86::VPMULLQZ256rmkz, 0 }, - { X86::VPMULLWZ256rrkz, X86::VPMULLWZ256rmkz, 0 }, - { X86::VPMULUDQZ256rrkz, X86::VPMULUDQZ256rmkz, 0 }, - { X86::VPORDZ256rrkz, X86::VPORDZ256rmkz, 0 }, - { X86::VPORQZ256rrkz, X86::VPORQZ256rmkz, 0 }, - { X86::VPSHUFBZ256rrkz, X86::VPSHUFBZ256rmkz, 0 }, - { X86::VPSLLDZ256rrkz, X86::VPSLLDZ256rmkz, 0 }, - { X86::VPSLLQZ256rrkz, X86::VPSLLQZ256rmkz, 0 }, - { X86::VPSLLVDZ256rrkz, X86::VPSLLVDZ256rmkz, 0 }, - { X86::VPSLLVQZ256rrkz, X86::VPSLLVQZ256rmkz, 0 }, - { X86::VPSLLVWZ256rrkz, X86::VPSLLVWZ256rmkz, 0 }, - { X86::VPSLLWZ256rrkz, X86::VPSLLWZ256rmkz, 0 }, - { X86::VPSRADZ256rrkz, X86::VPSRADZ256rmkz, 0 }, - { X86::VPSRAQZ256rrkz, X86::VPSRAQZ256rmkz, 0 }, - { X86::VPSRAVDZ256rrkz, X86::VPSRAVDZ256rmkz, 0 }, - { X86::VPSRAVQZ256rrkz, X86::VPSRAVQZ256rmkz, 0 }, - { X86::VPSRAVWZ256rrkz, X86::VPSRAVWZ256rmkz, 0 }, - { X86::VPSRAWZ256rrkz, X86::VPSRAWZ256rmkz, 0 }, - { X86::VPSRLDZ256rrkz, X86::VPSRLDZ256rmkz, 0 }, - { X86::VPSRLQZ256rrkz, X86::VPSRLQZ256rmkz, 0 }, - { X86::VPSRLVDZ256rrkz, X86::VPSRLVDZ256rmkz, 0 }, - { X86::VPSRLVQZ256rrkz, X86::VPSRLVQZ256rmkz, 0 }, - { X86::VPSRLVWZ256rrkz, X86::VPSRLVWZ256rmkz, 0 }, - { X86::VPSRLWZ256rrkz, X86::VPSRLWZ256rmkz, 0 }, - { X86::VPSUBBZ256rrkz, X86::VPSUBBZ256rmkz, 0 }, - { X86::VPSUBDZ256rrkz, X86::VPSUBDZ256rmkz, 0 }, - { X86::VPSUBQZ256rrkz, X86::VPSUBQZ256rmkz, 0 }, - { X86::VPSUBSBZ256rrkz, X86::VPSUBSBZ256rmkz, 0 }, - { X86::VPSUBSWZ256rrkz, X86::VPSUBSWZ256rmkz, 0 }, - { X86::VPSUBUSBZ256rrkz, X86::VPSUBUSBZ256rmkz, 0 }, - { X86::VPSUBUSWZ256rrkz, X86::VPSUBUSWZ256rmkz, 0 }, - { X86::VPSUBWZ256rrkz, X86::VPSUBWZ256rmkz, 0 }, - { X86::VPUNPCKHBWZ256rrkz, X86::VPUNPCKHBWZ256rmkz, 0 }, - { X86::VPUNPCKHDQZ256rrkz, X86::VPUNPCKHDQZ256rmkz, 0 }, - { X86::VPUNPCKHQDQZ256rrkz, X86::VPUNPCKHQDQZ256rmkz, 0 }, - { X86::VPUNPCKHWDZ256rrkz, X86::VPUNPCKHWDZ256rmkz, 0 }, - { X86::VPUNPCKLBWZ256rrkz, X86::VPUNPCKLBWZ256rmkz, 0 }, - { X86::VPUNPCKLDQZ256rrkz, X86::VPUNPCKLDQZ256rmkz, 0 }, - { X86::VPUNPCKLQDQZ256rrkz, X86::VPUNPCKLQDQZ256rmkz, 0 }, - { X86::VPUNPCKLWDZ256rrkz, X86::VPUNPCKLWDZ256rmkz, 0 }, - { X86::VPXORDZ256rrkz, X86::VPXORDZ256rmkz, 0 }, - { X86::VPXORQZ256rrkz, X86::VPXORQZ256rmkz, 0 }, - { X86::VSHUFPDZ256rrikz, X86::VSHUFPDZ256rmikz, 0 }, - { X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmikz, 0 }, - { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 }, - { X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 }, - { X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0 }, - { X86::VUNPCKHPSZ256rrkz, X86::VUNPCKHPSZ256rmkz, 0 }, - { X86::VUNPCKLPDZ256rrkz, X86::VUNPCKLPDZ256rmkz, 0 }, - { X86::VUNPCKLPSZ256rrkz, X86::VUNPCKLPSZ256rmkz, 0 }, - { X86::VXORPDZ256rrkz, X86::VXORPDZ256rmkz, 0 }, - { X86::VXORPSZ256rrkz, X86::VXORPSZ256rmkz, 0 }, - - // AVX-512{F,VL} masked arithmetic instructions 128-bit - { X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 }, - { X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 }, - { X86::VALIGNDZ128rrikz, X86::VALIGNDZ128rmikz, 0 }, - { X86::VALIGNQZ128rrikz, X86::VALIGNQZ128rmikz, 0 }, - { X86::VANDNPDZ128rrkz, X86::VANDNPDZ128rmkz, 0 }, - { X86::VANDNPSZ128rrkz, X86::VANDNPSZ128rmkz, 0 }, - { X86::VANDPDZ128rrkz, X86::VANDPDZ128rmkz, 0 }, - { X86::VANDPSZ128rrkz, X86::VANDPSZ128rmkz, 0 }, - { X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 }, - { X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 }, - { X86::VMAXCPDZ128rrkz, X86::VMAXCPDZ128rmkz, 0 }, - { X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmkz, 0 }, - { X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 }, - { X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 }, - { X86::VMINCPDZ128rrkz, X86::VMINCPDZ128rmkz, 0 }, - { X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmkz, 0 }, - { X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 }, - { X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 }, - { X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 }, - { X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 }, - { X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0 }, - { X86::VORPSZ128rrkz, X86::VORPSZ128rmkz, 0 }, - { X86::VPACKSSDWZ128rrkz, X86::VPACKSSDWZ128rmkz, 0 }, - { X86::VPACKSSWBZ128rrkz, X86::VPACKSSWBZ128rmkz, 0 }, - { X86::VPACKUSDWZ128rrkz, X86::VPACKUSDWZ128rmkz, 0 }, - { X86::VPACKUSWBZ128rrkz, X86::VPACKUSWBZ128rmkz, 0 }, - { X86::VPADDBZ128rrkz, X86::VPADDBZ128rmkz, 0 }, - { X86::VPADDDZ128rrkz, X86::VPADDDZ128rmkz, 0 }, - { X86::VPADDQZ128rrkz, X86::VPADDQZ128rmkz, 0 }, - { X86::VPADDSBZ128rrkz, X86::VPADDSBZ128rmkz, 0 }, - { X86::VPADDSWZ128rrkz, X86::VPADDSWZ128rmkz, 0 }, - { X86::VPADDUSBZ128rrkz, X86::VPADDUSBZ128rmkz, 0 }, - { X86::VPADDUSWZ128rrkz, X86::VPADDUSWZ128rmkz, 0 }, - { X86::VPADDWZ128rrkz, X86::VPADDWZ128rmkz, 0 }, - { X86::VPALIGNRZ128rrikz, X86::VPALIGNRZ128rmikz, 0 }, - { X86::VPANDDZ128rrkz, X86::VPANDDZ128rmkz, 0 }, - { X86::VPANDNDZ128rrkz, X86::VPANDNDZ128rmkz, 0 }, - { X86::VPANDNQZ128rrkz, X86::VPANDNQZ128rmkz, 0 }, - { X86::VPANDQZ128rrkz, X86::VPANDQZ128rmkz, 0 }, - { X86::VPAVGBZ128rrkz, X86::VPAVGBZ128rmkz, 0 }, - { X86::VPAVGWZ128rrkz, X86::VPAVGWZ128rmkz, 0 }, - { X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 }, - { X86::VPERMILPDZ128rrkz, X86::VPERMILPDZ128rmkz, 0 }, - { X86::VPERMILPSZ128rrkz, X86::VPERMILPSZ128rmkz, 0 }, - { X86::VPERMWZ128rrkz, X86::VPERMWZ128rmkz, 0 }, - { X86::VPMADDUBSWZ128rrkz, X86::VPMADDUBSWZ128rmkz, 0 }, - { X86::VPMADDWDZ128rrkz, X86::VPMADDWDZ128rmkz, 0 }, - { X86::VPMAXSBZ128rrkz, X86::VPMAXSBZ128rmkz, 0 }, - { X86::VPMAXSDZ128rrkz, X86::VPMAXSDZ128rmkz, 0 }, - { X86::VPMAXSQZ128rrkz, X86::VPMAXSQZ128rmkz, 0 }, - { X86::VPMAXSWZ128rrkz, X86::VPMAXSWZ128rmkz, 0 }, - { X86::VPMAXUBZ128rrkz, X86::VPMAXUBZ128rmkz, 0 }, - { X86::VPMAXUDZ128rrkz, X86::VPMAXUDZ128rmkz, 0 }, - { X86::VPMAXUQZ128rrkz, X86::VPMAXUQZ128rmkz, 0 }, - { X86::VPMAXUWZ128rrkz, X86::VPMAXUWZ128rmkz, 0 }, - { X86::VPMINSBZ128rrkz, X86::VPMINSBZ128rmkz, 0 }, - { X86::VPMINSDZ128rrkz, X86::VPMINSDZ128rmkz, 0 }, - { X86::VPMINSQZ128rrkz, X86::VPMINSQZ128rmkz, 0 }, - { X86::VPMINSWZ128rrkz, X86::VPMINSWZ128rmkz, 0 }, - { X86::VPMINUBZ128rrkz, X86::VPMINUBZ128rmkz, 0 }, - { X86::VPMINUDZ128rrkz, X86::VPMINUDZ128rmkz, 0 }, - { X86::VPMINUQZ128rrkz, X86::VPMINUQZ128rmkz, 0 }, - { X86::VPMINUWZ128rrkz, X86::VPMINUWZ128rmkz, 0 }, - { X86::VPMULDQZ128rrkz, X86::VPMULDQZ128rmkz, 0 }, - { X86::VPMULLDZ128rrkz, X86::VPMULLDZ128rmkz, 0 }, - { X86::VPMULLQZ128rrkz, X86::VPMULLQZ128rmkz, 0 }, - { X86::VPMULLWZ128rrkz, X86::VPMULLWZ128rmkz, 0 }, - { X86::VPMULUDQZ128rrkz, X86::VPMULUDQZ128rmkz, 0 }, - { X86::VPORDZ128rrkz, X86::VPORDZ128rmkz, 0 }, - { X86::VPORQZ128rrkz, X86::VPORQZ128rmkz, 0 }, - { X86::VPSHUFBZ128rrkz, X86::VPSHUFBZ128rmkz, 0 }, - { X86::VPSLLDZ128rrkz, X86::VPSLLDZ128rmkz, 0 }, - { X86::VPSLLQZ128rrkz, X86::VPSLLQZ128rmkz, 0 }, - { X86::VPSLLVDZ128rrkz, X86::VPSLLVDZ128rmkz, 0 }, - { X86::VPSLLVQZ128rrkz, X86::VPSLLVQZ128rmkz, 0 }, - { X86::VPSLLVWZ128rrkz, X86::VPSLLVWZ128rmkz, 0 }, - { X86::VPSLLWZ128rrkz, X86::VPSLLWZ128rmkz, 0 }, - { X86::VPSRADZ128rrkz, X86::VPSRADZ128rmkz, 0 }, - { X86::VPSRAQZ128rrkz, X86::VPSRAQZ128rmkz, 0 }, - { X86::VPSRAVDZ128rrkz, X86::VPSRAVDZ128rmkz, 0 }, - { X86::VPSRAVQZ128rrkz, X86::VPSRAVQZ128rmkz, 0 }, - { X86::VPSRAVWZ128rrkz, X86::VPSRAVWZ128rmkz, 0 }, - { X86::VPSRAWZ128rrkz, X86::VPSRAWZ128rmkz, 0 }, - { X86::VPSRLDZ128rrkz, X86::VPSRLDZ128rmkz, 0 }, - { X86::VPSRLQZ128rrkz, X86::VPSRLQZ128rmkz, 0 }, - { X86::VPSRLVDZ128rrkz, X86::VPSRLVDZ128rmkz, 0 }, - { X86::VPSRLVQZ128rrkz, X86::VPSRLVQZ128rmkz, 0 }, - { X86::VPSRLVWZ128rrkz, X86::VPSRLVWZ128rmkz, 0 }, - { X86::VPSRLWZ128rrkz, X86::VPSRLWZ128rmkz, 0 }, - { X86::VPSUBBZ128rrkz, X86::VPSUBBZ128rmkz, 0 }, - { X86::VPSUBDZ128rrkz, X86::VPSUBDZ128rmkz, 0 }, - { X86::VPSUBQZ128rrkz, X86::VPSUBQZ128rmkz, 0 }, - { X86::VPSUBSBZ128rrkz, X86::VPSUBSBZ128rmkz, 0 }, - { X86::VPSUBSWZ128rrkz, X86::VPSUBSWZ128rmkz, 0 }, - { X86::VPSUBUSBZ128rrkz, X86::VPSUBUSBZ128rmkz, 0 }, - { X86::VPSUBUSWZ128rrkz, X86::VPSUBUSWZ128rmkz, 0 }, - { X86::VPSUBWZ128rrkz, X86::VPSUBWZ128rmkz, 0 }, - { X86::VPUNPCKHBWZ128rrkz, X86::VPUNPCKHBWZ128rmkz, 0 }, - { X86::VPUNPCKHDQZ128rrkz, X86::VPUNPCKHDQZ128rmkz, 0 }, - { X86::VPUNPCKHQDQZ128rrkz, X86::VPUNPCKHQDQZ128rmkz, 0 }, - { X86::VPUNPCKHWDZ128rrkz, X86::VPUNPCKHWDZ128rmkz, 0 }, - { X86::VPUNPCKLBWZ128rrkz, X86::VPUNPCKLBWZ128rmkz, 0 }, - { X86::VPUNPCKLDQZ128rrkz, X86::VPUNPCKLDQZ128rmkz, 0 }, - { X86::VPUNPCKLQDQZ128rrkz, X86::VPUNPCKLQDQZ128rmkz, 0 }, - { X86::VPUNPCKLWDZ128rrkz, X86::VPUNPCKLWDZ128rmkz, 0 }, - { X86::VPXORDZ128rrkz, X86::VPXORDZ128rmkz, 0 }, - { X86::VPXORQZ128rrkz, X86::VPXORQZ128rmkz, 0 }, - { X86::VSHUFPDZ128rrikz, X86::VSHUFPDZ128rmikz, 0 }, - { X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmikz, 0 }, - { X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 }, - { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 }, - { X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0 }, - { X86::VUNPCKHPSZ128rrkz, X86::VUNPCKHPSZ128rmkz, 0 }, - { X86::VUNPCKLPDZ128rrkz, X86::VUNPCKLPDZ128rmkz, 0 }, - { X86::VUNPCKLPSZ128rrkz, X86::VUNPCKLPSZ128rmkz, 0 }, - { X86::VXORPDZ128rrkz, X86::VXORPDZ128rmkz, 0 }, - { X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 }, - - // AVX-512 masked foldable instructions - { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE }, - { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE }, - { X86::VPABSBZrrk, X86::VPABSBZrmk, 0 }, - { X86::VPABSDZrrk, X86::VPABSDZrmk, 0 }, - { X86::VPABSQZrrk, X86::VPABSQZrmk, 0 }, - { X86::VPABSWZrrk, X86::VPABSWZrmk, 0 }, - { X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 }, - { X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 }, - { X86::VPERMPDZrik, X86::VPERMPDZmik, 0 }, - { X86::VPERMQZrik, X86::VPERMQZmik, 0 }, - { X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 }, - { X86::VPMOVSXBQZrrk, X86::VPMOVSXBQZrmk, TB_NO_REVERSE }, - { X86::VPMOVSXBWZrrk, X86::VPMOVSXBWZrmk, 0 }, - { X86::VPMOVSXDQZrrk, X86::VPMOVSXDQZrmk, 0 }, - { X86::VPMOVSXWDZrrk, X86::VPMOVSXWDZrmk, 0 }, - { X86::VPMOVSXWQZrrk, X86::VPMOVSXWQZrmk, 0 }, - { X86::VPMOVZXBDZrrk, X86::VPMOVZXBDZrmk, 0 }, - { X86::VPMOVZXBQZrrk, X86::VPMOVZXBQZrmk, TB_NO_REVERSE }, - { X86::VPMOVZXBWZrrk, X86::VPMOVZXBWZrmk, 0 }, - { X86::VPMOVZXDQZrrk, X86::VPMOVZXDQZrmk, 0 }, - { X86::VPMOVZXWDZrrk, X86::VPMOVZXWDZrmk, 0 }, - { X86::VPMOVZXWQZrrk, X86::VPMOVZXWQZrmk, 0 }, - { X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 }, - { X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 }, - { X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 }, - { X86::VPSLLDZrik, X86::VPSLLDZmik, 0 }, - { X86::VPSLLQZrik, X86::VPSLLQZmik, 0 }, - { X86::VPSLLWZrik, X86::VPSLLWZmik, 0 }, - { X86::VPSRADZrik, X86::VPSRADZmik, 0 }, - { X86::VPSRAQZrik, X86::VPSRAQZmik, 0 }, - { X86::VPSRAWZrik, X86::VPSRAWZmik, 0 }, - { X86::VPSRLDZrik, X86::VPSRLDZmik, 0 }, - { X86::VPSRLQZrik, X86::VPSRLQZmik, 0 }, - { X86::VPSRLWZrik, X86::VPSRLWZmik, 0 }, - - // AVX-512VL 256-bit masked foldable instructions - { X86::VBROADCASTSSZ256rk, X86::VBROADCASTSSZ256mk, TB_NO_REVERSE }, - { X86::VBROADCASTSDZ256rk, X86::VBROADCASTSDZ256mk, TB_NO_REVERSE }, - { X86::VPABSBZ256rrk, X86::VPABSBZ256rmk, 0 }, - { X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 }, - { X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 }, - { X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 }, - { X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 }, - { X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 }, - { X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 }, - { X86::VPERMQZ256rik, X86::VPERMQZ256mik, 0 }, - { X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, TB_NO_REVERSE }, - { X86::VPMOVSXBQZ256rrk, X86::VPMOVSXBQZ256rmk, TB_NO_REVERSE }, - { X86::VPMOVSXBWZ256rrk, X86::VPMOVSXBWZ256rmk, 0 }, - { X86::VPMOVSXDQZ256rrk, X86::VPMOVSXDQZ256rmk, 0 }, - { X86::VPMOVSXWDZ256rrk, X86::VPMOVSXWDZ256rmk, 0 }, - { X86::VPMOVSXWQZ256rrk, X86::VPMOVSXWQZ256rmk, TB_NO_REVERSE }, - { X86::VPMOVZXBDZ256rrk, X86::VPMOVZXBDZ256rmk, TB_NO_REVERSE }, - { X86::VPMOVZXBQZ256rrk, X86::VPMOVZXBQZ256rmk, TB_NO_REVERSE }, - { X86::VPMOVZXBWZ256rrk, X86::VPMOVZXBWZ256rmk, 0 }, - { X86::VPMOVZXDQZ256rrk, X86::VPMOVZXDQZ256rmk, 0 }, - { X86::VPMOVZXWDZ256rrk, X86::VPMOVZXWDZ256rmk, 0 }, - { X86::VPMOVZXWQZ256rrk, X86::VPMOVZXWQZ256rmk, TB_NO_REVERSE }, - { X86::VPSHUFDZ256rik, X86::VPSHUFDZ256mik, 0 }, - { X86::VPSHUFHWZ256rik, X86::VPSHUFHWZ256mik, 0 }, - { X86::VPSHUFLWZ256rik, X86::VPSHUFLWZ256mik, 0 }, - { X86::VPSLLDZ256rik, X86::VPSLLDZ256mik, 0 }, - { X86::VPSLLQZ256rik, X86::VPSLLQZ256mik, 0 }, - { X86::VPSLLWZ256rik, X86::VPSLLWZ256mik, 0 }, - { X86::VPSRADZ256rik, X86::VPSRADZ256mik, 0 }, - { X86::VPSRAQZ256rik, X86::VPSRAQZ256mik, 0 }, - { X86::VPSRAWZ256rik, X86::VPSRAWZ256mik, 0 }, - { X86::VPSRLDZ256rik, X86::VPSRLDZ256mik, 0 }, - { X86::VPSRLQZ256rik, X86::VPSRLQZ256mik, 0 }, - { X86::VPSRLWZ256rik, X86::VPSRLWZ256mik, 0 }, - - // AVX-512VL 128-bit masked foldable instructions - { X86::VBROADCASTSSZ128rk, X86::VBROADCASTSSZ128mk, TB_NO_REVERSE }, - { X86::VPABSBZ128rrk, X86::VPABSBZ128rmk, 0 }, - { X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 }, - { X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 }, - { X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 }, - { X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 }, - { X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 }, - { X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE }, - { X86::VPMOVSXBQZ128rrk, X86::VPMOVSXBQZ128rmk, TB_NO_REVERSE }, - { X86::VPMOVSXBWZ128rrk, X86::VPMOVSXBWZ128rmk, TB_NO_REVERSE }, - { X86::VPMOVSXDQZ128rrk, X86::VPMOVSXDQZ128rmk, TB_NO_REVERSE }, - { X86::VPMOVSXWDZ128rrk, X86::VPMOVSXWDZ128rmk, TB_NO_REVERSE }, - { X86::VPMOVSXWQZ128rrk, X86::VPMOVSXWQZ128rmk, TB_NO_REVERSE }, - { X86::VPMOVZXBDZ128rrk, X86::VPMOVZXBDZ128rmk, TB_NO_REVERSE }, - { X86::VPMOVZXBQZ128rrk, X86::VPMOVZXBQZ128rmk, TB_NO_REVERSE }, - { X86::VPMOVZXBWZ128rrk, X86::VPMOVZXBWZ128rmk, TB_NO_REVERSE }, - { X86::VPMOVZXDQZ128rrk, X86::VPMOVZXDQZ128rmk, TB_NO_REVERSE }, - { X86::VPMOVZXWDZ128rrk, X86::VPMOVZXWDZ128rmk, TB_NO_REVERSE }, - { X86::VPMOVZXWQZ128rrk, X86::VPMOVZXWQZ128rmk, TB_NO_REVERSE }, - { X86::VPSHUFDZ128rik, X86::VPSHUFDZ128mik, 0 }, - { X86::VPSHUFHWZ128rik, X86::VPSHUFHWZ128mik, 0 }, - { X86::VPSHUFLWZ128rik, X86::VPSHUFLWZ128mik, 0 }, - { X86::VPSLLDZ128rik, X86::VPSLLDZ128mik, 0 }, - { X86::VPSLLQZ128rik, X86::VPSLLQZ128mik, 0 }, - { X86::VPSLLWZ128rik, X86::VPSLLWZ128mik, 0 }, - { X86::VPSRADZ128rik, X86::VPSRADZ128mik, 0 }, - { X86::VPSRAQZ128rik, X86::VPSRAQZ128mik, 0 }, - { X86::VPSRAWZ128rik, X86::VPSRAWZ128mik, 0 }, - { X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 }, - { X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 }, - { X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 }, - }; - for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) { AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, Entry.RegOp, Entry.MemOp, // Index 3, folded load Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD); } - auto I = X86InstrFMA3Info::rm_begin(); - auto E = X86InstrFMA3Info::rm_end(); - for (; I != E; ++I) { - if (!I.getGroup()->isKMasked()) { - // Intrinsic forms need to pass TB_NO_REVERSE. - if (I.getGroup()->isIntrinsic()) { - AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, - I.getRegOpcode(), I.getMemOpcode(), - TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD | TB_NO_REVERSE); - } else { - AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable, - I.getRegOpcode(), I.getMemOpcode(), - TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD); - } - } - } - - static const X86MemoryFoldTableEntry MemoryFoldTable4[] = { - // AVX-512 foldable masked instructions - { X86::VADDPDZrrk, X86::VADDPDZrmk, 0 }, - { X86::VADDPSZrrk, X86::VADDPSZrmk, 0 }, - { X86::VADDSDZrr_Intk, X86::VADDSDZrm_Intk, TB_NO_REVERSE }, - { X86::VADDSSZrr_Intk, X86::VADDSSZrm_Intk, TB_NO_REVERSE }, - { X86::VALIGNDZrrik, X86::VALIGNDZrmik, 0 }, - { X86::VALIGNQZrrik, X86::VALIGNQZrmik, 0 }, - { X86::VANDNPDZrrk, X86::VANDNPDZrmk, 0 }, - { X86::VANDNPSZrrk, X86::VANDNPSZrmk, 0 }, - { X86::VANDPDZrrk, X86::VANDPDZrmk, 0 }, - { X86::VANDPSZrrk, X86::VANDPSZrmk, 0 }, - { X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 }, - { X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 }, - { X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE }, - { X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE }, - { X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 }, - { X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 }, - { X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 }, - { X86::VINSERTF64x4Zrrk, X86::VINSERTF64x4Zrmk, 0 }, - { X86::VINSERTI32x4Zrrk, X86::VINSERTI32x4Zrmk, 0 }, - { X86::VINSERTI32x8Zrrk, X86::VINSERTI32x8Zrmk, 0 }, - { X86::VINSERTI64x2Zrrk, X86::VINSERTI64x2Zrmk, 0 }, - { X86::VINSERTI64x4Zrrk, X86::VINSERTI64x4Zrmk, 0 }, - { X86::VMAXCPDZrrk, X86::VMAXCPDZrmk, 0 }, - { X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 }, - { X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 }, - { X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 }, - { X86::VMAXSDZrr_Intk, X86::VMAXSDZrm_Intk, 0 }, - { X86::VMAXSSZrr_Intk, X86::VMAXSSZrm_Intk, 0 }, - { X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0 }, - { X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0 }, - { X86::VMINPDZrrk, X86::VMINPDZrmk, 0 }, - { X86::VMINPSZrrk, X86::VMINPSZrmk, 0 }, - { X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, 0 }, - { X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, 0 }, - { X86::VMULPDZrrk, X86::VMULPDZrmk, 0 }, - { X86::VMULPSZrrk, X86::VMULPSZrmk, 0 }, - { X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE }, - { X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE }, - { X86::VORPDZrrk, X86::VORPDZrmk, 0 }, - { X86::VORPSZrrk, X86::VORPSZrmk, 0 }, - { X86::VPACKSSDWZrrk, X86::VPACKSSDWZrmk, 0 }, - { X86::VPACKSSWBZrrk, X86::VPACKSSWBZrmk, 0 }, - { X86::VPACKUSDWZrrk, X86::VPACKUSDWZrmk, 0 }, - { X86::VPACKUSWBZrrk, X86::VPACKUSWBZrmk, 0 }, - { X86::VPADDBZrrk, X86::VPADDBZrmk, 0 }, - { X86::VPADDDZrrk, X86::VPADDDZrmk, 0 }, - { X86::VPADDQZrrk, X86::VPADDQZrmk, 0 }, - { X86::VPADDSBZrrk, X86::VPADDSBZrmk, 0 }, - { X86::VPADDSWZrrk, X86::VPADDSWZrmk, 0 }, - { X86::VPADDUSBZrrk, X86::VPADDUSBZrmk, 0 }, - { X86::VPADDUSWZrrk, X86::VPADDUSWZrmk, 0 }, - { X86::VPADDWZrrk, X86::VPADDWZrmk, 0 }, - { X86::VPALIGNRZrrik, X86::VPALIGNRZrmik, 0 }, - { X86::VPANDDZrrk, X86::VPANDDZrmk, 0 }, - { X86::VPANDNDZrrk, X86::VPANDNDZrmk, 0 }, - { X86::VPANDNQZrrk, X86::VPANDNQZrmk, 0 }, - { X86::VPANDQZrrk, X86::VPANDQZrmk, 0 }, - { X86::VPAVGBZrrk, X86::VPAVGBZrmk, 0 }, - { X86::VPAVGWZrrk, X86::VPAVGWZrmk, 0 }, - { X86::VPERMBZrrk, X86::VPERMBZrmk, 0 }, - { X86::VPERMDZrrk, X86::VPERMDZrmk, 0 }, - { X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0 }, - { X86::VPERMI2Drrk, X86::VPERMI2Drmk, 0 }, - { X86::VPERMI2PSrrk, X86::VPERMI2PSrmk, 0 }, - { X86::VPERMI2PDrrk, X86::VPERMI2PDrmk, 0 }, - { X86::VPERMI2Qrrk, X86::VPERMI2Qrmk, 0 }, - { X86::VPERMI2Wrrk, X86::VPERMI2Wrmk, 0 }, - { X86::VPERMILPDZrrk, X86::VPERMILPDZrmk, 0 }, - { X86::VPERMILPSZrrk, X86::VPERMILPSZrmk, 0 }, - { X86::VPERMPDZrrk, X86::VPERMPDZrmk, 0 }, - { X86::VPERMPSZrrk, X86::VPERMPSZrmk, 0 }, - { X86::VPERMQZrrk, X86::VPERMQZrmk, 0 }, - { X86::VPERMT2Brrk, X86::VPERMT2Brmk, 0 }, - { X86::VPERMT2Drrk, X86::VPERMT2Drmk, 0 }, - { X86::VPERMT2PSrrk, X86::VPERMT2PSrmk, 0 }, - { X86::VPERMT2PDrrk, X86::VPERMT2PDrmk, 0 }, - { X86::VPERMT2Qrrk, X86::VPERMT2Qrmk, 0 }, - { X86::VPERMT2Wrrk, X86::VPERMT2Wrmk, 0 }, - { X86::VPERMWZrrk, X86::VPERMWZrmk, 0 }, - { X86::VPMADDUBSWZrrk, X86::VPMADDUBSWZrmk, 0 }, - { X86::VPMADDWDZrrk, X86::VPMADDWDZrmk, 0 }, - { X86::VPMAXSBZrrk, X86::VPMAXSBZrmk, 0 }, - { X86::VPMAXSDZrrk, X86::VPMAXSDZrmk, 0 }, - { X86::VPMAXSQZrrk, X86::VPMAXSQZrmk, 0 }, - { X86::VPMAXSWZrrk, X86::VPMAXSWZrmk, 0 }, - { X86::VPMAXUBZrrk, X86::VPMAXUBZrmk, 0 }, - { X86::VPMAXUDZrrk, X86::VPMAXUDZrmk, 0 }, - { X86::VPMAXUQZrrk, X86::VPMAXUQZrmk, 0 }, - { X86::VPMAXUWZrrk, X86::VPMAXUWZrmk, 0 }, - { X86::VPMINSBZrrk, X86::VPMINSBZrmk, 0 }, - { X86::VPMINSDZrrk, X86::VPMINSDZrmk, 0 }, - { X86::VPMINSQZrrk, X86::VPMINSQZrmk, 0 }, - { X86::VPMINSWZrrk, X86::VPMINSWZrmk, 0 }, - { X86::VPMINUBZrrk, X86::VPMINUBZrmk, 0 }, - { X86::VPMINUDZrrk, X86::VPMINUDZrmk, 0 }, - { X86::VPMINUQZrrk, X86::VPMINUQZrmk, 0 }, - { X86::VPMINUWZrrk, X86::VPMINUWZrmk, 0 }, - { X86::VPMULDQZrrk, X86::VPMULDQZrmk, 0 }, - { X86::VPMULLDZrrk, X86::VPMULLDZrmk, 0 }, - { X86::VPMULLQZrrk, X86::VPMULLQZrmk, 0 }, - { X86::VPMULLWZrrk, X86::VPMULLWZrmk, 0 }, - { X86::VPMULUDQZrrk, X86::VPMULUDQZrmk, 0 }, - { X86::VPORDZrrk, X86::VPORDZrmk, 0 }, - { X86::VPORQZrrk, X86::VPORQZrmk, 0 }, - { X86::VPSHUFBZrrk, X86::VPSHUFBZrmk, 0 }, - { X86::VPSLLDZrrk, X86::VPSLLDZrmk, 0 }, - { X86::VPSLLQZrrk, X86::VPSLLQZrmk, 0 }, - { X86::VPSLLVDZrrk, X86::VPSLLVDZrmk, 0 }, - { X86::VPSLLVQZrrk, X86::VPSLLVQZrmk, 0 }, - { X86::VPSLLVWZrrk, X86::VPSLLVWZrmk, 0 }, - { X86::VPSLLWZrrk, X86::VPSLLWZrmk, 0 }, - { X86::VPSRADZrrk, X86::VPSRADZrmk, 0 }, - { X86::VPSRAQZrrk, X86::VPSRAQZrmk, 0 }, - { X86::VPSRAVDZrrk, X86::VPSRAVDZrmk, 0 }, - { X86::VPSRAVQZrrk, X86::VPSRAVQZrmk, 0 }, - { X86::VPSRAVWZrrk, X86::VPSRAVWZrmk, 0 }, - { X86::VPSRAWZrrk, X86::VPSRAWZrmk, 0 }, - { X86::VPSRLDZrrk, X86::VPSRLDZrmk, 0 }, - { X86::VPSRLQZrrk, X86::VPSRLQZrmk, 0 }, - { X86::VPSRLVDZrrk, X86::VPSRLVDZrmk, 0 }, - { X86::VPSRLVQZrrk, X86::VPSRLVQZrmk, 0 }, - { X86::VPSRLVWZrrk, X86::VPSRLVWZrmk, 0 }, - { X86::VPSRLWZrrk, X86::VPSRLWZrmk, 0 }, - { X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 }, - { X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 }, - { X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 }, - { X86::VPSUBSBZrrk, X86::VPSUBSBZrmk, 0 }, - { X86::VPSUBSWZrrk, X86::VPSUBSWZrmk, 0 }, - { X86::VPSUBUSBZrrk, X86::VPSUBUSBZrmk, 0 }, - { X86::VPSUBUSWZrrk, X86::VPSUBUSWZrmk, 0 }, - { X86::VPTERNLOGDZrrik, X86::VPTERNLOGDZrmik, 0 }, - { X86::VPTERNLOGQZrrik, X86::VPTERNLOGQZrmik, 0 }, - { X86::VPUNPCKHBWZrrk, X86::VPUNPCKHBWZrmk, 0 }, - { X86::VPUNPCKHDQZrrk, X86::VPUNPCKHDQZrmk, 0 }, - { X86::VPUNPCKHQDQZrrk, X86::VPUNPCKHQDQZrmk, 0 }, - { X86::VPUNPCKHWDZrrk, X86::VPUNPCKHWDZrmk, 0 }, - { X86::VPUNPCKLBWZrrk, X86::VPUNPCKLBWZrmk, 0 }, - { X86::VPUNPCKLDQZrrk, X86::VPUNPCKLDQZrmk, 0 }, - { X86::VPUNPCKLQDQZrrk, X86::VPUNPCKLQDQZrmk, 0 }, - { X86::VPUNPCKLWDZrrk, X86::VPUNPCKLWDZrmk, 0 }, - { X86::VPXORDZrrk, X86::VPXORDZrmk, 0 }, - { X86::VPXORQZrrk, X86::VPXORQZrmk, 0 }, - { X86::VSHUFPDZrrik, X86::VSHUFPDZrmik, 0 }, - { X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 }, - { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 }, - { X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 }, - { X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE }, - { X86::VSUBSSZrr_Intk, X86::VSUBSSZrm_Intk, TB_NO_REVERSE }, - { X86::VUNPCKHPDZrrk, X86::VUNPCKHPDZrmk, 0 }, - { X86::VUNPCKHPSZrrk, X86::VUNPCKHPSZrmk, 0 }, - { X86::VUNPCKLPDZrrk, X86::VUNPCKLPDZrmk, 0 }, - { X86::VUNPCKLPSZrrk, X86::VUNPCKLPSZrmk, 0 }, - { X86::VXORPDZrrk, X86::VXORPDZrmk, 0 }, - { X86::VXORPSZrrk, X86::VXORPSZrmk, 0 }, - - // AVX-512{F,VL} foldable masked instructions 256-bit - { X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 }, - { X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 }, - { X86::VALIGNDZ256rrik, X86::VALIGNDZ256rmik, 0 }, - { X86::VALIGNQZ256rrik, X86::VALIGNQZ256rmik, 0 }, - { X86::VANDNPDZ256rrk, X86::VANDNPDZ256rmk, 0 }, - { X86::VANDNPSZ256rrk, X86::VANDNPSZ256rmk, 0 }, - { X86::VANDPDZ256rrk, X86::VANDPDZ256rmk, 0 }, - { X86::VANDPSZ256rrk, X86::VANDPSZ256rmk, 0 }, - { X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 }, - { X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 }, - { X86::VINSERTF32x4Z256rrk,X86::VINSERTF32x4Z256rmk, 0 }, - { X86::VINSERTF64x2Z256rrk,X86::VINSERTF64x2Z256rmk, 0 }, - { X86::VINSERTI32x4Z256rrk,X86::VINSERTI32x4Z256rmk, 0 }, - { X86::VINSERTI64x2Z256rrk,X86::VINSERTI64x2Z256rmk, 0 }, - { X86::VMAXCPDZ256rrk, X86::VMAXCPDZ256rmk, 0 }, - { X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0 }, - { X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 }, - { X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 }, - { X86::VMINCPDZ256rrk, X86::VMINCPDZ256rmk, 0 }, - { X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmk, 0 }, - { X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 }, - { X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 }, - { X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 }, - { X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 }, - { X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0 }, - { X86::VORPSZ256rrk, X86::VORPSZ256rmk, 0 }, - { X86::VPACKSSDWZ256rrk, X86::VPACKSSDWZ256rmk, 0 }, - { X86::VPACKSSWBZ256rrk, X86::VPACKSSWBZ256rmk, 0 }, - { X86::VPACKUSDWZ256rrk, X86::VPACKUSDWZ256rmk, 0 }, - { X86::VPACKUSWBZ256rrk, X86::VPACKUSWBZ256rmk, 0 }, - { X86::VPADDBZ256rrk, X86::VPADDBZ256rmk, 0 }, - { X86::VPADDDZ256rrk, X86::VPADDDZ256rmk, 0 }, - { X86::VPADDQZ256rrk, X86::VPADDQZ256rmk, 0 }, - { X86::VPADDSBZ256rrk, X86::VPADDSBZ256rmk, 0 }, - { X86::VPADDSWZ256rrk, X86::VPADDSWZ256rmk, 0 }, - { X86::VPADDUSBZ256rrk, X86::VPADDUSBZ256rmk, 0 }, - { X86::VPADDUSWZ256rrk, X86::VPADDUSWZ256rmk, 0 }, - { X86::VPADDWZ256rrk, X86::VPADDWZ256rmk, 0 }, - { X86::VPALIGNRZ256rrik, X86::VPALIGNRZ256rmik, 0 }, - { X86::VPANDDZ256rrk, X86::VPANDDZ256rmk, 0 }, - { X86::VPANDNDZ256rrk, X86::VPANDNDZ256rmk, 0 }, - { X86::VPANDNQZ256rrk, X86::VPANDNQZ256rmk, 0 }, - { X86::VPANDQZ256rrk, X86::VPANDQZ256rmk, 0 }, - { X86::VPAVGBZ256rrk, X86::VPAVGBZ256rmk, 0 }, - { X86::VPAVGWZ256rrk, X86::VPAVGWZ256rmk, 0 }, - { X86::VPERMBZ256rrk, X86::VPERMBZ256rmk, 0 }, - { X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0 }, - { X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0 }, - { X86::VPERMI2D256rrk, X86::VPERMI2D256rmk, 0 }, - { X86::VPERMI2PD256rrk, X86::VPERMI2PD256rmk, 0 }, - { X86::VPERMI2PS256rrk, X86::VPERMI2PS256rmk, 0 }, - { X86::VPERMI2Q256rrk, X86::VPERMI2Q256rmk, 0 }, - { X86::VPERMI2W256rrk, X86::VPERMI2W256rmk, 0 }, - { X86::VPERMILPDZ256rrk, X86::VPERMILPDZ256rmk, 0 }, - { X86::VPERMILPSZ256rrk, X86::VPERMILPSZ256rmk, 0 }, - { X86::VPERMPDZ256rrk, X86::VPERMPDZ256rmk, 0 }, - { X86::VPERMPSZ256rrk, X86::VPERMPSZ256rmk, 0 }, - { X86::VPERMQZ256rrk, X86::VPERMQZ256rmk, 0 }, - { X86::VPERMT2B256rrk, X86::VPERMT2B256rmk, 0 }, - { X86::VPERMT2D256rrk, X86::VPERMT2D256rmk, 0 }, - { X86::VPERMT2PD256rrk, X86::VPERMT2PD256rmk, 0 }, - { X86::VPERMT2PS256rrk, X86::VPERMT2PS256rmk, 0 }, - { X86::VPERMT2Q256rrk, X86::VPERMT2Q256rmk, 0 }, - { X86::VPERMT2W256rrk, X86::VPERMT2W256rmk, 0 }, - { X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0 }, - { X86::VPMADDUBSWZ256rrk, X86::VPMADDUBSWZ256rmk, 0 }, - { X86::VPMADDWDZ256rrk, X86::VPMADDWDZ256rmk, 0 }, - { X86::VPMAXSBZ256rrk, X86::VPMAXSBZ256rmk, 0 }, - { X86::VPMAXSDZ256rrk, X86::VPMAXSDZ256rmk, 0 }, - { X86::VPMAXSQZ256rrk, X86::VPMAXSQZ256rmk, 0 }, - { X86::VPMAXSWZ256rrk, X86::VPMAXSWZ256rmk, 0 }, - { X86::VPMAXUBZ256rrk, X86::VPMAXUBZ256rmk, 0 }, - { X86::VPMAXUDZ256rrk, X86::VPMAXUDZ256rmk, 0 }, - { X86::VPMAXUQZ256rrk, X86::VPMAXUQZ256rmk, 0 }, - { X86::VPMAXUWZ256rrk, X86::VPMAXUWZ256rmk, 0 }, - { X86::VPMINSBZ256rrk, X86::VPMINSBZ256rmk, 0 }, - { X86::VPMINSDZ256rrk, X86::VPMINSDZ256rmk, 0 }, - { X86::VPMINSQZ256rrk, X86::VPMINSQZ256rmk, 0 }, - { X86::VPMINSWZ256rrk, X86::VPMINSWZ256rmk, 0 }, - { X86::VPMINUBZ256rrk, X86::VPMINUBZ256rmk, 0 }, - { X86::VPMINUDZ256rrk, X86::VPMINUDZ256rmk, 0 }, - { X86::VPMINUQZ256rrk, X86::VPMINUQZ256rmk, 0 }, - { X86::VPMINUWZ256rrk, X86::VPMINUWZ256rmk, 0 }, - { X86::VPMULDQZ256rrk, X86::VPMULDQZ256rmk, 0 }, - { X86::VPMULLDZ256rrk, X86::VPMULLDZ256rmk, 0 }, - { X86::VPMULLQZ256rrk, X86::VPMULLQZ256rmk, 0 }, - { X86::VPMULLWZ256rrk, X86::VPMULLWZ256rmk, 0 }, - { X86::VPMULUDQZ256rrk, X86::VPMULUDQZ256rmk, 0 }, - { X86::VPORDZ256rrk, X86::VPORDZ256rmk, 0 }, - { X86::VPORQZ256rrk, X86::VPORQZ256rmk, 0 }, - { X86::VPSHUFBZ256rrk, X86::VPSHUFBZ256rmk, 0 }, - { X86::VPSLLDZ256rrk, X86::VPSLLDZ256rmk, 0 }, - { X86::VPSLLQZ256rrk, X86::VPSLLQZ256rmk, 0 }, - { X86::VPSLLVDZ256rrk, X86::VPSLLVDZ256rmk, 0 }, - { X86::VPSLLVQZ256rrk, X86::VPSLLVQZ256rmk, 0 }, - { X86::VPSLLVWZ256rrk, X86::VPSLLVWZ256rmk, 0 }, - { X86::VPSLLWZ256rrk, X86::VPSLLWZ256rmk, 0 }, - { X86::VPSRADZ256rrk, X86::VPSRADZ256rmk, 0 }, - { X86::VPSRAQZ256rrk, X86::VPSRAQZ256rmk, 0 }, - { X86::VPSRAVDZ256rrk, X86::VPSRAVDZ256rmk, 0 }, - { X86::VPSRAVQZ256rrk, X86::VPSRAVQZ256rmk, 0 }, - { X86::VPSRAVWZ256rrk, X86::VPSRAVWZ256rmk, 0 }, - { X86::VPSRAWZ256rrk, X86::VPSRAWZ256rmk, 0 }, - { X86::VPSRLDZ256rrk, X86::VPSRLDZ256rmk, 0 }, - { X86::VPSRLQZ256rrk, X86::VPSRLQZ256rmk, 0 }, - { X86::VPSRLVDZ256rrk, X86::VPSRLVDZ256rmk, 0 }, - { X86::VPSRLVQZ256rrk, X86::VPSRLVQZ256rmk, 0 }, - { X86::VPSRLVWZ256rrk, X86::VPSRLVWZ256rmk, 0 }, - { X86::VPSRLWZ256rrk, X86::VPSRLWZ256rmk, 0 }, - { X86::VPSUBBZ256rrk, X86::VPSUBBZ256rmk, 0 }, - { X86::VPSUBDZ256rrk, X86::VPSUBDZ256rmk, 0 }, - { X86::VPSUBQZ256rrk, X86::VPSUBQZ256rmk, 0 }, - { X86::VPSUBSBZ256rrk, X86::VPSUBSBZ256rmk, 0 }, - { X86::VPSUBSWZ256rrk, X86::VPSUBSWZ256rmk, 0 }, - { X86::VPSUBUSBZ256rrk, X86::VPSUBUSBZ256rmk, 0 }, - { X86::VPSUBUSWZ256rrk, X86::VPSUBUSWZ256rmk, 0 }, - { X86::VPSUBWZ256rrk, X86::VPSUBWZ256rmk, 0 }, - { X86::VPTERNLOGDZ256rrik, X86::VPTERNLOGDZ256rmik, 0 }, - { X86::VPTERNLOGQZ256rrik, X86::VPTERNLOGQZ256rmik, 0 }, - { X86::VPUNPCKHBWZ256rrk, X86::VPUNPCKHBWZ256rmk, 0 }, - { X86::VPUNPCKHDQZ256rrk, X86::VPUNPCKHDQZ256rmk, 0 }, - { X86::VPUNPCKHQDQZ256rrk, X86::VPUNPCKHQDQZ256rmk, 0 }, - { X86::VPUNPCKHWDZ256rrk, X86::VPUNPCKHWDZ256rmk, 0 }, - { X86::VPUNPCKLBWZ256rrk, X86::VPUNPCKLBWZ256rmk, 0 }, - { X86::VPUNPCKLDQZ256rrk, X86::VPUNPCKLDQZ256rmk, 0 }, - { X86::VPUNPCKLQDQZ256rrk, X86::VPUNPCKLQDQZ256rmk, 0 }, - { X86::VPUNPCKLWDZ256rrk, X86::VPUNPCKLWDZ256rmk, 0 }, - { X86::VPXORDZ256rrk, X86::VPXORDZ256rmk, 0 }, - { X86::VPXORQZ256rrk, X86::VPXORQZ256rmk, 0 }, - { X86::VSHUFPDZ256rrik, X86::VSHUFPDZ256rmik, 0 }, - { X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0 }, - { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 }, - { X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 }, - { X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0 }, - { X86::VUNPCKHPSZ256rrk, X86::VUNPCKHPSZ256rmk, 0 }, - { X86::VUNPCKLPDZ256rrk, X86::VUNPCKLPDZ256rmk, 0 }, - { X86::VUNPCKLPSZ256rrk, X86::VUNPCKLPSZ256rmk, 0 }, - { X86::VXORPDZ256rrk, X86::VXORPDZ256rmk, 0 }, - { X86::VXORPSZ256rrk, X86::VXORPSZ256rmk, 0 }, - - // AVX-512{F,VL} foldable instructions 128-bit - { X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 }, - { X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 }, - { X86::VALIGNDZ128rrik, X86::VALIGNDZ128rmik, 0 }, - { X86::VALIGNQZ128rrik, X86::VALIGNQZ128rmik, 0 }, - { X86::VANDNPDZ128rrk, X86::VANDNPDZ128rmk, 0 }, - { X86::VANDNPSZ128rrk, X86::VANDNPSZ128rmk, 0 }, - { X86::VANDPDZ128rrk, X86::VANDPDZ128rmk, 0 }, - { X86::VANDPSZ128rrk, X86::VANDPSZ128rmk, 0 }, - { X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 }, - { X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 }, - { X86::VMAXCPDZ128rrk, X86::VMAXCPDZ128rmk, 0 }, - { X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmk, 0 }, - { X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 }, - { X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 }, - { X86::VMINCPDZ128rrk, X86::VMINCPDZ128rmk, 0 }, - { X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmk, 0 }, - { X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 }, - { X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 }, - { X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 }, - { X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 }, - { X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0 }, - { X86::VORPSZ128rrk, X86::VORPSZ128rmk, 0 }, - { X86::VPACKSSDWZ128rrk, X86::VPACKSSDWZ128rmk, 0 }, - { X86::VPACKSSWBZ128rrk, X86::VPACKSSWBZ128rmk, 0 }, - { X86::VPACKUSDWZ128rrk, X86::VPACKUSDWZ128rmk, 0 }, - { X86::VPACKUSWBZ128rrk, X86::VPACKUSWBZ128rmk, 0 }, - { X86::VPADDBZ128rrk, X86::VPADDBZ128rmk, 0 }, - { X86::VPADDDZ128rrk, X86::VPADDDZ128rmk, 0 }, - { X86::VPADDQZ128rrk, X86::VPADDQZ128rmk, 0 }, - { X86::VPADDSBZ128rrk, X86::VPADDSBZ128rmk, 0 }, - { X86::VPADDSWZ128rrk, X86::VPADDSWZ128rmk, 0 }, - { X86::VPADDUSBZ128rrk, X86::VPADDUSBZ128rmk, 0 }, - { X86::VPADDUSWZ128rrk, X86::VPADDUSWZ128rmk, 0 }, - { X86::VPADDWZ128rrk, X86::VPADDWZ128rmk, 0 }, - { X86::VPALIGNRZ128rrik, X86::VPALIGNRZ128rmik, 0 }, - { X86::VPANDDZ128rrk, X86::VPANDDZ128rmk, 0 }, - { X86::VPANDNDZ128rrk, X86::VPANDNDZ128rmk, 0 }, - { X86::VPANDNQZ128rrk, X86::VPANDNQZ128rmk, 0 }, - { X86::VPANDQZ128rrk, X86::VPANDQZ128rmk, 0 }, - { X86::VPAVGBZ128rrk, X86::VPAVGBZ128rmk, 0 }, - { X86::VPAVGWZ128rrk, X86::VPAVGWZ128rmk, 0 }, - { X86::VPERMBZ128rrk, X86::VPERMBZ128rmk, 0 }, - { X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0 }, - { X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0 }, - { X86::VPERMI2PD128rrk, X86::VPERMI2PD128rmk, 0 }, - { X86::VPERMI2PS128rrk, X86::VPERMI2PS128rmk, 0 }, - { X86::VPERMI2Q128rrk, X86::VPERMI2Q128rmk, 0 }, - { X86::VPERMI2W128rrk, X86::VPERMI2W128rmk, 0 }, - { X86::VPERMILPDZ128rrk, X86::VPERMILPDZ128rmk, 0 }, - { X86::VPERMILPSZ128rrk, X86::VPERMILPSZ128rmk, 0 }, - { X86::VPERMT2B128rrk, X86::VPERMT2B128rmk, 0 }, - { X86::VPERMT2D128rrk, X86::VPERMT2D128rmk, 0 }, - { X86::VPERMT2PD128rrk, X86::VPERMT2PD128rmk, 0 }, - { X86::VPERMT2PS128rrk, X86::VPERMT2PS128rmk, 0 }, - { X86::VPERMT2Q128rrk, X86::VPERMT2Q128rmk, 0 }, - { X86::VPERMT2W128rrk, X86::VPERMT2W128rmk, 0 }, - { X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0 }, - { X86::VPMADDUBSWZ128rrk, X86::VPMADDUBSWZ128rmk, 0 }, - { X86::VPMADDWDZ128rrk, X86::VPMADDWDZ128rmk, 0 }, - { X86::VPMAXSBZ128rrk, X86::VPMAXSBZ128rmk, 0 }, - { X86::VPMAXSDZ128rrk, X86::VPMAXSDZ128rmk, 0 }, - { X86::VPMAXSQZ128rrk, X86::VPMAXSQZ128rmk, 0 }, - { X86::VPMAXSWZ128rrk, X86::VPMAXSWZ128rmk, 0 }, - { X86::VPMAXUBZ128rrk, X86::VPMAXUBZ128rmk, 0 }, - { X86::VPMAXUDZ128rrk, X86::VPMAXUDZ128rmk, 0 }, - { X86::VPMAXUQZ128rrk, X86::VPMAXUQZ128rmk, 0 }, - { X86::VPMAXUWZ128rrk, X86::VPMAXUWZ128rmk, 0 }, - { X86::VPMINSBZ128rrk, X86::VPMINSBZ128rmk, 0 }, - { X86::VPMINSDZ128rrk, X86::VPMINSDZ128rmk, 0 }, - { X86::VPMINSQZ128rrk, X86::VPMINSQZ128rmk, 0 }, - { X86::VPMINSWZ128rrk, X86::VPMINSWZ128rmk, 0 }, - { X86::VPMINUBZ128rrk, X86::VPMINUBZ128rmk, 0 }, - { X86::VPMINUDZ128rrk, X86::VPMINUDZ128rmk, 0 }, - { X86::VPMINUQZ128rrk, X86::VPMINUQZ128rmk, 0 }, - { X86::VPMINUWZ128rrk, X86::VPMINUWZ128rmk, 0 }, - { X86::VPMULDQZ128rrk, X86::VPMULDQZ128rmk, 0 }, - { X86::VPMULLDZ128rrk, X86::VPMULLDZ128rmk, 0 }, - { X86::VPMULLQZ128rrk, X86::VPMULLQZ128rmk, 0 }, - { X86::VPMULLWZ128rrk, X86::VPMULLWZ128rmk, 0 }, - { X86::VPMULUDQZ128rrk, X86::VPMULUDQZ128rmk, 0 }, - { X86::VPORDZ128rrk, X86::VPORDZ128rmk, 0 }, - { X86::VPORQZ128rrk, X86::VPORQZ128rmk, 0 }, - { X86::VPSHUFBZ128rrk, X86::VPSHUFBZ128rmk, 0 }, - { X86::VPSLLDZ128rrk, X86::VPSLLDZ128rmk, 0 }, - { X86::VPSLLQZ128rrk, X86::VPSLLQZ128rmk, 0 }, - { X86::VPSLLVDZ128rrk, X86::VPSLLVDZ128rmk, 0 }, - { X86::VPSLLVQZ128rrk, X86::VPSLLVQZ128rmk, 0 }, - { X86::VPSLLVWZ128rrk, X86::VPSLLVWZ128rmk, 0 }, - { X86::VPSLLWZ128rrk, X86::VPSLLWZ128rmk, 0 }, - { X86::VPSRADZ128rrk, X86::VPSRADZ128rmk, 0 }, - { X86::VPSRAQZ128rrk, X86::VPSRAQZ128rmk, 0 }, - { X86::VPSRAVDZ128rrk, X86::VPSRAVDZ128rmk, 0 }, - { X86::VPSRAVQZ128rrk, X86::VPSRAVQZ128rmk, 0 }, - { X86::VPSRAVWZ128rrk, X86::VPSRAVWZ128rmk, 0 }, - { X86::VPSRAWZ128rrk, X86::VPSRAWZ128rmk, 0 }, - { X86::VPSRLDZ128rrk, X86::VPSRLDZ128rmk, 0 }, - { X86::VPSRLQZ128rrk, X86::VPSRLQZ128rmk, 0 }, - { X86::VPSRLVDZ128rrk, X86::VPSRLVDZ128rmk, 0 }, - { X86::VPSRLVQZ128rrk, X86::VPSRLVQZ128rmk, 0 }, - { X86::VPSRLVWZ128rrk, X86::VPSRLVWZ128rmk, 0 }, - { X86::VPSRLWZ128rrk, X86::VPSRLWZ128rmk, 0 }, - { X86::VPSUBBZ128rrk, X86::VPSUBBZ128rmk, 0 }, - { X86::VPSUBDZ128rrk, X86::VPSUBDZ128rmk, 0 }, - { X86::VPSUBQZ128rrk, X86::VPSUBQZ128rmk, 0 }, - { X86::VPSUBSBZ128rrk, X86::VPSUBSBZ128rmk, 0 }, - { X86::VPSUBSWZ128rrk, X86::VPSUBSWZ128rmk, 0 }, - { X86::VPSUBUSBZ128rrk, X86::VPSUBUSBZ128rmk, 0 }, - { X86::VPSUBUSWZ128rrk, X86::VPSUBUSWZ128rmk, 0 }, - { X86::VPSUBWZ128rrk, X86::VPSUBWZ128rmk, 0 }, - { X86::VPTERNLOGDZ128rrik, X86::VPTERNLOGDZ128rmik, 0 }, - { X86::VPTERNLOGQZ128rrik, X86::VPTERNLOGQZ128rmik, 0 }, - { X86::VPUNPCKHBWZ128rrk, X86::VPUNPCKHBWZ128rmk, 0 }, - { X86::VPUNPCKHDQZ128rrk, X86::VPUNPCKHDQZ128rmk, 0 }, - { X86::VPUNPCKHQDQZ128rrk, X86::VPUNPCKHQDQZ128rmk, 0 }, - { X86::VPUNPCKHWDZ128rrk, X86::VPUNPCKHWDZ128rmk, 0 }, - { X86::VPUNPCKLBWZ128rrk, X86::VPUNPCKLBWZ128rmk, 0 }, - { X86::VPUNPCKLDQZ128rrk, X86::VPUNPCKLDQZ128rmk, 0 }, - { X86::VPUNPCKLQDQZ128rrk, X86::VPUNPCKLQDQZ128rmk, 0 }, - { X86::VPUNPCKLWDZ128rrk, X86::VPUNPCKLWDZ128rmk, 0 }, - { X86::VPXORDZ128rrk, X86::VPXORDZ128rmk, 0 }, - { X86::VPXORQZ128rrk, X86::VPXORQZ128rmk, 0 }, - { X86::VSHUFPDZ128rrik, X86::VSHUFPDZ128rmik, 0 }, - { X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmik, 0 }, - { X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 }, - { X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 }, - { X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0 }, - { X86::VUNPCKHPSZ128rrk, X86::VUNPCKHPSZ128rmk, 0 }, - { X86::VUNPCKLPDZ128rrk, X86::VUNPCKLPDZ128rmk, 0 }, - { X86::VUNPCKLPSZ128rrk, X86::VUNPCKLPSZ128rmk, 0 }, - { X86::VXORPDZ128rrk, X86::VXORPDZ128rmk, 0 }, - { X86::VXORPSZ128rrk, X86::VXORPSZ128rmk, 0 }, - - // 512-bit three source instructions with zero masking. - { X86::VPERMI2Brrkz, X86::VPERMI2Brmkz, 0 }, - { X86::VPERMI2Drrkz, X86::VPERMI2Drmkz, 0 }, - { X86::VPERMI2PSrrkz, X86::VPERMI2PSrmkz, 0 }, - { X86::VPERMI2PDrrkz, X86::VPERMI2PDrmkz, 0 }, - { X86::VPERMI2Qrrkz, X86::VPERMI2Qrmkz, 0 }, - { X86::VPERMI2Wrrkz, X86::VPERMI2Wrmkz, 0 }, - { X86::VPERMT2Brrkz, X86::VPERMT2Brmkz, 0 }, - { X86::VPERMT2Drrkz, X86::VPERMT2Drmkz, 0 }, - { X86::VPERMT2PSrrkz, X86::VPERMT2PSrmkz, 0 }, - { X86::VPERMT2PDrrkz, X86::VPERMT2PDrmkz, 0 }, - { X86::VPERMT2Qrrkz, X86::VPERMT2Qrmkz, 0 }, - { X86::VPERMT2Wrrkz, X86::VPERMT2Wrmkz, 0 }, - { X86::VPTERNLOGDZrrikz, X86::VPTERNLOGDZrmikz, 0 }, - { X86::VPTERNLOGQZrrikz, X86::VPTERNLOGQZrmikz, 0 }, - - // 256-bit three source instructions with zero masking. - { X86::VPERMI2B256rrkz, X86::VPERMI2B256rmkz, 0 }, - { X86::VPERMI2D256rrkz, X86::VPERMI2D256rmkz, 0 }, - { X86::VPERMI2PD256rrkz, X86::VPERMI2PD256rmkz, 0 }, - { X86::VPERMI2PS256rrkz, X86::VPERMI2PS256rmkz, 0 }, - { X86::VPERMI2Q256rrkz, X86::VPERMI2Q256rmkz, 0 }, - { X86::VPERMI2W256rrkz, X86::VPERMI2W256rmkz, 0 }, - { X86::VPERMT2B256rrkz, X86::VPERMT2B256rmkz, 0 }, - { X86::VPERMT2D256rrkz, X86::VPERMT2D256rmkz, 0 }, - { X86::VPERMT2PD256rrkz, X86::VPERMT2PD256rmkz, 0 }, - { X86::VPERMT2PS256rrkz, X86::VPERMT2PS256rmkz, 0 }, - { X86::VPERMT2Q256rrkz, X86::VPERMT2Q256rmkz, 0 }, - { X86::VPERMT2W256rrkz, X86::VPERMT2W256rmkz, 0 }, - { X86::VPTERNLOGDZ256rrikz,X86::VPTERNLOGDZ256rmikz, 0 }, - { X86::VPTERNLOGQZ256rrikz,X86::VPTERNLOGQZ256rmikz, 0 }, - - // 128-bit three source instructions with zero masking. - { X86::VPERMI2B128rrkz, X86::VPERMI2B128rmkz, 0 }, - { X86::VPERMI2D128rrkz, X86::VPERMI2D128rmkz, 0 }, - { X86::VPERMI2PD128rrkz, X86::VPERMI2PD128rmkz, 0 }, - { X86::VPERMI2PS128rrkz, X86::VPERMI2PS128rmkz, 0 }, - { X86::VPERMI2Q128rrkz, X86::VPERMI2Q128rmkz, 0 }, - { X86::VPERMI2W128rrkz, X86::VPERMI2W128rmkz, 0 }, - { X86::VPERMT2B128rrkz, X86::VPERMT2B128rmkz, 0 }, - { X86::VPERMT2D128rrkz, X86::VPERMT2D128rmkz, 0 }, - { X86::VPERMT2PD128rrkz, X86::VPERMT2PD128rmkz, 0 }, - { X86::VPERMT2PS128rrkz, X86::VPERMT2PS128rmkz, 0 }, - { X86::VPERMT2Q128rrkz, X86::VPERMT2Q128rmkz, 0 }, - { X86::VPERMT2W128rrkz, X86::VPERMT2W128rmkz, 0 }, - { X86::VPTERNLOGDZ128rrikz,X86::VPTERNLOGDZ128rmikz, 0 }, - { X86::VPTERNLOGQZ128rrikz,X86::VPTERNLOGQZ128rmikz, 0 }, - }; for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) { AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable, @@ -3539,20 +163,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) // Index 4, folded load Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD); } - for (I = X86InstrFMA3Info::rm_begin(); I != E; ++I) { - if (I.getGroup()->isKMasked()) { - // Intrinsics need to pass TB_NO_REVERSE. - if (I.getGroup()->isIntrinsic()) { - AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable, - I.getRegOpcode(), I.getMemOpcode(), - TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD | TB_NO_REVERSE); - } else { - AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable, - I.getRegOpcode(), I.getMemOpcode(), - TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD); - } - } - } } void @@ -5930,7 +2540,7 @@ void X86InstrInfo::replaceBranchWithTailCall( // Add implicit uses and defs of all live regs potentially clobbered by the // call. This way they still appear live across the call. - LivePhysRegs LiveRegs(&getRegisterInfo()); + LivePhysRegs LiveRegs(getRegisterInfo()); LiveRegs.addLiveOuts(MBB); SmallVector, 8> Clobbers; LiveRegs.stepForward(*MIB, Clobbers); @@ -6545,9 +3155,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // first frame index. // See X86ISelLowering.cpp - X86::hasCopyImplyingStackAdjustment. - const TargetRegisterInfo *TRI = &getRegisterInfo(); + const TargetRegisterInfo &TRI = getRegisterInfo(); MachineBasicBlock::LivenessQueryResult LQR = - MBB.computeRegisterLiveness(TRI, AX, MI); + MBB.computeRegisterLiveness(&TRI, AX, MI); // We do not want to save and restore AX if we do not have to. // Moreover, if we do so whereas AX is dead, we would need to set // an undef flag on the use of AX, otherwise the verifier will @@ -6564,7 +3174,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, } // AX contains the top most register in the aliasing hierarchy. // It may not be live, but one of its aliases may be. - for (MCRegAliasIterator AI(AX, TRI, true); + for (MCRegAliasIterator AI(AX, &TRI, true); AI.isValid() && LQR != MachineBasicBlock::LQR_Live; ++AI) LQR = LPR.contains(*AI) ? MachineBasicBlock::LQR_Live : MachineBasicBlock::LQR_Dead; @@ -8374,7 +4984,7 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, unsigned Opc = LoadMI.getOpcode(); unsigned UserOpc = UserMI.getOpcode(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const TargetRegisterClass *RC = + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(LoadMI.getOperand(0).getReg()); unsigned RegSize = TRI.getRegSizeInBits(*RC); @@ -10473,7 +7083,7 @@ X86InstrInfo::getOutliningType(MachineInstr &MI) const { // catch it. if (MI.modifiesRegister(X86::RSP, &RI) || MI.readsRegister(X86::RSP, &RI) || MI.getDesc().hasImplicitUseOfPhysReg(X86::RSP) || - MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) + MI.getDesc().hasImplicitDefOfPhysReg(X86::RSP)) return MachineOutlinerInstrType::Illegal; // Outlined calls change the instruction pointer, so don't read from it. diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td index 01df07e1715..fab70e918b8 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td +++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td @@ -813,6 +813,8 @@ def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">; def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; def HasCDI : Predicate<"Subtarget->hasCDI()">, AssemblerPredicate<"FeatureCDI", "AVX-512 CD ISA">; +def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">, + AssemblerPredicate<"FeatureVPOPCNTDQ", "AVX-512 VPOPCNTDQ ISA">; def HasPFI : Predicate<"Subtarget->hasPFI()">, AssemblerPredicate<"FeaturePFI", "AVX-512 PF ISA">; def HasERI : Predicate<"Subtarget->hasERI()">, @@ -1436,11 +1438,14 @@ def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src), // Longer forms that use a ModR/M byte. Needed for disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def MOV8ri_alt : Ii8 <0xC6, MRM0r, (outs GR8 :$dst), (ins i8imm :$src), - "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; + "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, + FoldGenData<"MOV8ri">; def MOV16ri_alt : Ii16<0xC7, MRM0r, (outs GR16:$dst), (ins i16imm:$src), - "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16, + FoldGenData<"MOV16ri">; def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src), - "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32, + FoldGenData<"MOV32ri">; } } // SchedRW @@ -1563,13 +1568,17 @@ def MOV64o64a : RIi64<0xA3, RawFrmMemOffs, (outs), (ins offset64_64:$dst), let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, SchedRW = [WriteMove] in { def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src), - "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; + "mov{b}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, + FoldGenData<"MOV8rr">; def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), - "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16; + "mov{w}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize16, + FoldGenData<"MOV16rr">; def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), - "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32; + "mov{l}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, OpSize32, + FoldGenData<"MOV32rr">; def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), - "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>; + "mov{q}\t{$src, $dst|$dst, $src}", [], IIC_MOV>, + FoldGenData<"MOV64rr">; } let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td index dc3800ce381..2c047722db2 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrMMX.td +++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td @@ -248,7 +248,8 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (MMX_X86movd2w (x86mmx VR64:$src)))], - IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>; + IIC_MMX_MOV_REG_MM>, Sched<[WriteMove]>, + FoldGenData<"MMX_MOVD64rr">; let isBitcast = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), @@ -277,7 +278,7 @@ def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", [], - IIC_MMX_MOVQ_RR>; + IIC_MMX_MOVQ_RR>, FoldGenData<"MMX_MOVQ64rr">; } } // SchedRW diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td index f73d85e7e01..a3e67720930 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td +++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td @@ -507,7 +507,8 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, multiclass sse12_move_rr { + string asm_opr, Domain d = GenericDomain, + string Name> { let isCommutable = 1 in def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), @@ -521,15 +522,17 @@ multiclass sse12_move_rr, Sched<[WriteFShuffle]>; + [], IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>, + FoldGenData; } multiclass sse12_move { + Domain d = GenericDomain, string Name> { // AVX defm V#NAME : sse12_move_rr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d, + "V"#Name>, VEX_4V, VEX_LIG, VEX_WIG; def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), @@ -539,7 +542,7 @@ multiclass sse12_move; + "\t{$src2, $dst|$dst, $src2}", d, Name>; } def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), @@ -563,9 +566,9 @@ multiclass sse12_move_rm, XS; + SSEPackedSingle, "MOVSS">, XS; defm MOVSD : sse12_move, XD; + SSEPackedDouble, "MOVSD">, XD; let canFoldAsLoad = 1, isReMaterializable = 1 in { defm MOVSS : sse12_move_rm, VEX, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVAPSrr">; def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVAPDrr">; def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVUPSrr">; def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_WIG, + FoldGenData<"VMOVUPDrr">; def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movaps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVAPSYrr">; def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVAPDYrr">; def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVUPSYrr">; def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVUPDYrr">; } // Aliases to help the assembler pick two byte VEX encodings by swapping the @@ -938,16 +949,16 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, SchedRW = [WriteFShuffle] in { def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>; + IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVAPSrr">; def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movapd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>; + IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVAPDrr">; def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>; + IIC_SSE_MOVU_P_RR>, FoldGenData<"MOVUPSrr">; def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movupd\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>; + IIC_SSE_MOVU_P_RR>, FoldGenData<"MOVUPDrr">; } let Predicates = [HasAVX, NoVLX] in { @@ -3752,17 +3763,19 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, - VEX, VEX_WIG; + VEX, VEX_WIG, FoldGenData<"VMOVDQArr">; def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movdqa\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVA_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVDQAYrr">; def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, - VEX, VEX_WIG; + VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">; def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), "movdqu\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG; + IIC_SSE_MOVU_P_RR>, VEX, VEX_L, VEX_WIG, + FoldGenData<"VMOVDQUYrr">; } let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, @@ -3820,11 +3833,12 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", [], - IIC_SSE_MOVA_P_RR>; + IIC_SSE_MOVA_P_RR>, FoldGenData<"MOVDQArr">; def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", - [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; + [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>, + FoldGenData<"MOVDQUrr">; } } // SchedRW @@ -5915,7 +5929,7 @@ multiclass SS41I_extract16 opc, string OpcodeStr> { (ins VR128:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - []>, Sched<[WriteShuffle]>; + []>, Sched<[WriteShuffle]>, FoldGenData; let hasSideEffects = 0, mayStore = 1, SchedRW = [WriteShuffleLd, WriteRMW] in diff --git a/contrib/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm/lib/Target/X86/X86InstrXOP.td index 53224431c0e..5dde2d07bab 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrXOP.td +++ b/contrib/llvm/lib/Target/X86/X86InstrXOP.td @@ -111,7 +111,7 @@ multiclass xop3op opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, - XOP_4V, VEX_W, Sched<[WriteVarVecShift]>; + XOP_4V, VEX_W, Sched<[WriteVarVecShift]>, FoldGenData; } let ExeDomain = SSEPackedInt in { @@ -282,7 +282,7 @@ multiclass xop4op opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, VEX_W; + []>, XOP_4V, VEX_W, FoldGenData; } let ExeDomain = SSEPackedInt in { @@ -318,7 +318,7 @@ multiclass xop4op_int opc, string OpcodeStr, RegisterClass RC, (ins RC:$src1, RC:$src2, RC:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, VEX_W; + []>, XOP_4V, VEX_W, FoldGenData; } let ExeDomain = SSEPackedInt in { @@ -357,7 +357,7 @@ multiclass xop_vpermil2 Opc, string OpcodeStr, RegisterClass RC, (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>, VEX_W; + []>, VEX_W, FoldGenData; } let ExeDomain = SSEPackedDouble in { diff --git a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp index 61956f74182..77dead8d241 100644 --- a/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp +++ b/contrib/llvm/lib/Target/X86/X86InstructionSelector.cpp @@ -302,6 +302,26 @@ unsigned X86InstructionSelector::getLoadStoreOp(LLT &Ty, const RegisterBank &RB, : HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); + } else if (Ty.isVector() && Ty.getSizeInBits() == 256) { + if (Alignment >= 32) + return Isload ? (HasVLX ? X86::VMOVAPSZ256rm + : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX + : X86::VMOVAPSYrm) + : (HasVLX ? X86::VMOVAPSZ256mr + : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX + : X86::VMOVAPSYmr); + else + return Isload ? (HasVLX ? X86::VMOVUPSZ256rm + : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX + : X86::VMOVUPSYrm) + : (HasVLX ? X86::VMOVUPSZ256mr + : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX + : X86::VMOVUPSYmr); + } else if (Ty.isVector() && Ty.getSizeInBits() == 512) { + if (Alignment >= 64) + return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; + else + return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; } return Opc; } diff --git a/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp index da724f5d898..979aaee110a 100644 --- a/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp +++ b/contrib/llvm/lib/Target/X86/X86LegalizerInfo.cpp @@ -35,6 +35,7 @@ X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, setLegalizerInfoSSE1(); setLegalizerInfoSSE2(); setLegalizerInfoSSE41(); + setLegalizerInfoAVX(); setLegalizerInfoAVX2(); setLegalizerInfoAVX512(); setLegalizerInfoAVX512DQ(); @@ -209,6 +210,18 @@ void X86LegalizerInfo::setLegalizerInfoSSE41() { setAction({G_MUL, v4s32}, Legal); } +void X86LegalizerInfo::setLegalizerInfoAVX() { + if (!Subtarget.hasAVX()) + return; + + const LLT v8s32 = LLT::vector(8, 32); + const LLT v4s64 = LLT::vector(4, 64); + + for (unsigned MemOp : {G_LOAD, G_STORE}) + for (auto Ty : {v8s32, v4s64}) + setAction({MemOp, Ty}, Legal); +} + void X86LegalizerInfo::setLegalizerInfoAVX2() { if (!Subtarget.hasAVX2()) return; @@ -239,6 +252,10 @@ void X86LegalizerInfo::setLegalizerInfoAVX512() { setAction({G_MUL, v16s32}, Legal); + for (unsigned MemOp : {G_LOAD, G_STORE}) + for (auto Ty : {v16s32, v8s64}) + setAction({MemOp, Ty}, Legal); + /************ VLX *******************/ if (!Subtarget.hasVLX()) return; diff --git a/contrib/llvm/lib/Target/X86/X86LegalizerInfo.h b/contrib/llvm/lib/Target/X86/X86LegalizerInfo.h index ab5405a7042..135950a95f8 100644 --- a/contrib/llvm/lib/Target/X86/X86LegalizerInfo.h +++ b/contrib/llvm/lib/Target/X86/X86LegalizerInfo.h @@ -39,6 +39,7 @@ private: void setLegalizerInfoSSE1(); void setLegalizerInfoSSE2(); void setLegalizerInfoSSE41(); + void setLegalizerInfoAVX(); void setLegalizerInfoAVX2(); void setLegalizerInfoAVX512(); void setLegalizerInfoAVX512DQ(); diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp index 2b1f43bffd7..84ec98484f8 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp @@ -286,6 +286,7 @@ void X86Subtarget::initializeEnvironment() { HasCDI = false; HasPFI = false; HasDQI = false; + HasVPOPCNTDQ = false; HasBWI = false; HasVLX = false; HasADX = false; diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h index a9f3a2aee1b..550e95c39ab 100644 --- a/contrib/llvm/lib/Target/X86/X86Subtarget.h +++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h @@ -270,6 +270,9 @@ protected: /// Processor has AVX-512 Conflict Detection Instructions bool HasCDI; + /// Processor has AVX-512 population count Instructions + bool HasVPOPCNTDQ; + /// Processor has AVX-512 Doubleword and Quadword instructions bool HasDQI; @@ -494,6 +497,7 @@ public: bool slow3OpsLEA() const { return Slow3OpsLEA; } bool slowIncDec() const { return SlowIncDec; } bool hasCDI() const { return HasCDI; } + bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } bool hasDQI() const { return HasDQI; } diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp index a97db6fde45..5cf2a8c25d8 100644 --- a/contrib/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp +++ b/contrib/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp @@ -124,6 +124,7 @@ struct CoroCleanup : FunctionPass { if (!L) AU.setPreservesAll(); } + StringRef getPassName() const override { return "Coroutine Cleanup"; } }; } diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroEarly.cpp index e8bb0ca99d8..b5298918616 100644 --- a/contrib/llvm/lib/Transforms/Coroutines/CoroEarly.cpp +++ b/contrib/llvm/lib/Transforms/Coroutines/CoroEarly.cpp @@ -208,6 +208,9 @@ struct CoroEarly : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); } + StringRef getPassName() const override { + return "Lower early coroutine intrinsics"; + } }; } diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp index c6ac3f614ff..acb22449142 100644 --- a/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -301,6 +301,7 @@ struct CoroElide : FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); } + StringRef getPassName() const override { return "Coroutine Elision"; } }; } diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp index 417d57f7625..85e9003ec3c 100644 --- a/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -799,9 +799,9 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { splitAround(CSI, "CoroSuspend"); } - // Put fallthrough CoroEnd into its own block. Note: Shape::buildFrom places - // the fallthrough coro.end as the first element of CoroEnds array. - splitAround(Shape.CoroEnds.front(), "CoroEnd"); + // Put CoroEnds into their own blocks. + for (CoroEndInst *CE : Shape.CoroEnds) + splitAround(CE, "CoroEnd"); // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will // never has its definition separated from the PHI by the suspend point. @@ -813,19 +813,24 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { IRBuilder<> Builder(F.getContext()); SpillInfo Spills; - // See if there are materializable instructions across suspend points. - for (Instruction &I : instructions(F)) - if (materializable(I)) - for (User *U : I.users()) - if (Checker.isDefinitionAcrossSuspend(I, U)) - Spills.emplace_back(&I, U); + for (int Repeat = 0; Repeat < 4; ++Repeat) { + // See if there are materializable instructions across suspend points. + for (Instruction &I : instructions(F)) + if (materializable(I)) + for (User *U : I.users()) + if (Checker.isDefinitionAcrossSuspend(I, U)) + Spills.emplace_back(&I, U); - // Rewrite materializable instructions to be materialized at the use point. - DEBUG(dump("Materializations", Spills)); - rewriteMaterializableInstructions(Builder, Spills); + if (Spills.empty()) + break; + + // Rewrite materializable instructions to be materialized at the use point. + DEBUG(dump("Materializations", Spills)); + rewriteMaterializableInstructions(Builder, Spills); + Spills.clear(); + } // Collect the spills for arguments and other not-materializable values. - Spills.clear(); for (Argument &A : F.args()) for (User *U : A.users()) if (Checker.isDefinitionAcrossSuspend(A, U)) @@ -847,8 +852,6 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) { if (I.getType()->isTokenTy()) report_fatal_error( "token definition is separated from the use by a suspend point"); - assert(!materializable(I) && - "rewriteMaterializable did not do its job"); Spills.emplace_back(&I, U); } } diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index 12eb1678982..cd549e4be28 100644 --- a/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -228,15 +228,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape, SmallVector Returns; - if (DISubprogram *SP = F.getSubprogram()) { - // If we have debug info, add mapping for the metadata nodes that should not - // be cloned by CloneFunctionInfo. - auto &MD = VMap.MD(); - MD[SP->getUnit()].reset(SP->getUnit()); - MD[SP->getType()].reset(SP->getType()); - MD[SP->getFile()].reset(SP->getFile()); - } - CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns); + CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/false, Returns); // Remove old returns. for (ReturnInst *Return : Returns) @@ -509,12 +501,87 @@ static void simplifySuspendPoints(coro::Shape &Shape) { S.resize(N); } +static SmallPtrSet getCoroBeginPredBlocks(CoroBeginInst *CB) { + // Collect all blocks that we need to look for instructions to relocate. + SmallPtrSet RelocBlocks; + SmallVector Work; + Work.push_back(CB->getParent()); + + do { + BasicBlock *Current = Work.pop_back_val(); + for (BasicBlock *BB : predecessors(Current)) + if (RelocBlocks.count(BB) == 0) { + RelocBlocks.insert(BB); + Work.push_back(BB); + } + } while (!Work.empty()); + return RelocBlocks; +} + +static SmallPtrSet +getNotRelocatableInstructions(CoroBeginInst *CoroBegin, + SmallPtrSetImpl &RelocBlocks) { + SmallPtrSet DoNotRelocate; + // Collect all instructions that we should not relocate + SmallVector Work; + + // Start with CoroBegin and terminators of all preceding blocks. + Work.push_back(CoroBegin); + BasicBlock *CoroBeginBB = CoroBegin->getParent(); + for (BasicBlock *BB : RelocBlocks) + if (BB != CoroBeginBB) + Work.push_back(BB->getTerminator()); + + // For every instruction in the Work list, place its operands in DoNotRelocate + // set. + do { + Instruction *Current = Work.pop_back_val(); + DoNotRelocate.insert(Current); + for (Value *U : Current->operands()) { + auto *I = dyn_cast(U); + if (!I) + continue; + if (isa(U)) + continue; + if (DoNotRelocate.count(I) == 0) { + Work.push_back(I); + DoNotRelocate.insert(I); + } + } + } while (!Work.empty()); + return DoNotRelocate; +} + +static void relocateInstructionBefore(CoroBeginInst *CoroBegin, Function &F) { + // Analyze which non-alloca instructions are needed for allocation and + // relocate the rest to after coro.begin. We need to do it, since some of the + // targets of those instructions may be placed into coroutine frame memory + // for which becomes available after coro.begin intrinsic. + + auto BlockSet = getCoroBeginPredBlocks(CoroBegin); + auto DoNotRelocateSet = getNotRelocatableInstructions(CoroBegin, BlockSet); + + Instruction *InsertPt = CoroBegin->getNextNode(); + BasicBlock &BB = F.getEntryBlock(); // TODO: Look at other blocks as well. + for (auto B = BB.begin(), E = BB.end(); B != E;) { + Instruction &I = *B++; + if (isa(&I)) + continue; + if (&I == CoroBegin) + break; + if (DoNotRelocateSet.count(&I)) + continue; + I.moveBefore(InsertPt); + } +} + static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) { coro::Shape Shape(F); if (!Shape.CoroBegin) return; simplifySuspendPoints(Shape); + relocateInstructionBefore(Shape.CoroBegin, F); buildCoroutineFrame(F, Shape); replaceFrameSize(Shape); @@ -660,6 +727,7 @@ struct CoroSplit : public CallGraphSCCPass { void getAnalysisUsage(AnalysisUsage &AU) const override { CallGraphSCCPass::getAnalysisUsage(AU); } + StringRef getPassName() const override { return "Coroutine Splitting"; } }; } diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp index 8dff2fb3be8..4c417f1c55e 100644 --- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -558,17 +558,17 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap( std::vector Users(DuplicateFunction->user_begin(), DuplicateFunction->user_end()); Function *CurrentCaller = nullptr; + std::unique_ptr TempBFI; BlockFrequencyInfo *CurrentCallerBFI = nullptr; auto ComputeCurrBFI = [&,this](Function *Caller) { // For the old pass manager: if (!GetBFI) { - if (CurrentCallerBFI) - delete CurrentCallerBFI; DominatorTree DT(*Caller); LoopInfo LI(DT); BranchProbabilityInfo BPI(*Caller, LI); - CurrentCallerBFI = new BlockFrequencyInfo(*Caller, BPI, LI); + TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI)); + CurrentCallerBFI = TempBFI.get(); } else { // New pass manager: CurrentCallerBFI = &(*GetBFI)(*Caller); @@ -591,10 +591,6 @@ void PartialInlinerImpl::computeCallsiteToProfCountMap( else CallSiteToProfCountMap[User] = 0; } - if (!GetBFI) { - if (CurrentCallerBFI) - delete CurrentCallerBFI; - } } Function *PartialInlinerImpl::unswitchFunction(Function *F) { diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index ec06d5f9fb0..9fd3a9021a2 100644 --- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -155,6 +155,10 @@ static cl::opt cl::Hidden, cl::desc("Enable the simple loop unswitch pass.")); +static cl::opt EnableGVNSink( + "enable-gvn-sink", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN sinking pass (default = on)")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -307,6 +311,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createEarlyCSEPass()); // Catch trivial redundancies if (EnableGVNHoist) MPM.add(createGVNHoistPass()); + if (EnableGVNSink) { + MPM.add(createGVNSinkPass()); + MPM.add(createCFGSimplificationPass()); + } + // Speculative execution if the target has divergent branches; otherwise nop. MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); MPM.add(createJumpThreadingPass()); // Thread jumps. @@ -904,6 +913,12 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) { if (OptLevel != 0) addLTOOptimizationPasses(PM); + else { + // The whole-program-devirt pass needs to run at -O0 because only it knows + // about the llvm.type.checked.load intrinsic: it needs to both lower the + // intrinsic itself and handle it in the summary. + PM.add(createWholeProgramDevirtPass(ExportSummary, nullptr)); + } // Create a function that performs CFI checks for cross-DSO calls with targets // in the current module. diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 733eeb1767a..7204bf51768 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -861,12 +861,9 @@ bool InstCombiner::willNotOverflowSignedSub(const Value *LHS, ComputeNumSignBits(RHS, 0, &CxtI) > 1) return true; - unsigned BitWidth = LHS->getType()->getScalarSizeInBits(); - KnownBits LHSKnown(BitWidth); - computeKnownBits(LHS, LHSKnown, 0, &CxtI); + KnownBits LHSKnown = computeKnownBits(LHS, 0, &CxtI); - KnownBits RHSKnown(BitWidth); - computeKnownBits(RHS, RHSKnown, 0, &CxtI); + KnownBits RHSKnown = computeKnownBits(RHS, 0, &CxtI); // Subtraction of two 2's complement numbers having identical signs will // never overflow. @@ -1059,9 +1056,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { // If this is a xor that was canonicalized from a sub, turn it back into // a sub and fuse this add with it. if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) { - IntegerType *IT = cast(I.getType()); - KnownBits LHSKnown(IT->getBitWidth()); - computeKnownBits(XorLHS, LHSKnown, 0, &I); + KnownBits LHSKnown = computeKnownBits(XorLHS, 0, &I); if ((XorRHS->getValue() | LHSKnown.Zero).isAllOnesValue()) return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), XorLHS); @@ -1577,8 +1572,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known // zero. if (Op0C->isMask()) { - KnownBits RHSKnown(BitWidth); - computeKnownBits(Op1, RHSKnown, 0, &I); + KnownBits RHSKnown = computeKnownBits(Op1, 0, &I); if ((*Op0C | RHSKnown.Zero).isAllOnesValue()) return BinaryOperator::CreateXor(Op1, Op0); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 4227b2d01be..1f8319efb3b 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1610,17 +1610,13 @@ Value *InstCombiner::foldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS, Value *Mask = nullptr; Value *Masked = nullptr; if (LAnd->getOperand(0) == RAnd->getOperand(0) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, &AC, CxtI, - &DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, &AC, CxtI, - &DT)) { + isKnownToBeAPowerOfTwo(LAnd->getOperand(1), false, 0, CxtI) && + isKnownToBeAPowerOfTwo(RAnd->getOperand(1), false, 0, CxtI)) { Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1)); Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask); } else if (LAnd->getOperand(1) == RAnd->getOperand(1) && - isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, &AC, - CxtI, &DT) && - isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, &AC, - CxtI, &DT)) { + isKnownToBeAPowerOfTwo(LAnd->getOperand(0), false, 0, CxtI) && + isKnownToBeAPowerOfTwo(RAnd->getOperand(0), false, 0, CxtI)) { Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0)); Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask); } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index face7abcc95..92a38f26dde 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1378,9 +1378,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { if (!IT) return nullptr; - unsigned BitWidth = IT->getBitWidth(); - KnownBits Known(BitWidth); - IC.computeKnownBits(Op0, Known, 0, &II); + KnownBits Known = IC.computeKnownBits(Op0, 0, &II); // Create a mask for bits above (ctlz) or below (cttz) the first known one. bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; @@ -1401,7 +1399,9 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) { // If the input to cttz/ctlz is known to be non-zero, // then change the 'ZeroIsUndef' parameter to 'true' // because we know the zero behavior can't affect the result. - if (Known.One != 0 || isKnownNonZero(Op0, IC.getDataLayout())) { + if (Known.One != 0 || + isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II, + &IC.getDominatorTree())) { if (!match(II.getArgOperand(1), m_One())) { II.setOperand(1, IC.Builder->getTrue()); return &II; diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp index f4bf5221f6a..766939c56df 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -692,8 +692,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // This only works for EQ and NE ICI->isEquality()) { // If Op1C some other power of two, convert: - KnownBits Known(Op1C->getType()->getBitWidth()); - computeKnownBits(ICI->getOperand(0), Known, 0, &CI); + KnownBits Known = computeKnownBits(ICI->getOperand(0), 0, &CI); APInt KnownZeroMask(~Known.Zero); if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1? @@ -737,14 +736,11 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI, // may lead to additional simplifications. if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { if (IntegerType *ITy = dyn_cast(CI.getType())) { - uint32_t BitWidth = ITy->getBitWidth(); Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); - KnownBits KnownLHS(BitWidth); - KnownBits KnownRHS(BitWidth); - computeKnownBits(LHS, KnownLHS, 0, &CI); - computeKnownBits(RHS, KnownRHS, 0, &CI); + KnownBits KnownLHS = computeKnownBits(LHS, 0, &CI); + KnownBits KnownRHS = computeKnownBits(RHS, 0, &CI); if (KnownLHS.Zero == KnownRHS.Zero && KnownLHS.One == KnownRHS.One) { APInt KnownBits = KnownLHS.Zero | KnownLHS.One; @@ -1063,9 +1059,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { // the icmp and sext into bitwise/integer operations. if (ICI->hasOneUse() && ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ - unsigned BitWidth = Op1C->getType()->getBitWidth(); - KnownBits Known(BitWidth); - computeKnownBits(Op0, Known, 0, &CI); + KnownBits Known = computeKnownBits(Op0, 0, &CI); APInt KnownZeroMask(~Known.Zero); if (KnownZeroMask.isPowerOf2()) { @@ -1104,7 +1098,7 @@ Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { // Distribute the bit over the whole bit width. In = Builder->CreateAShr(In, ConstantInt::get(In->getType(), - BitWidth - 1), "sext"); + KnownZeroMask.getBitWidth() - 1), "sext"); } if (CI.getType() == In->getType()) diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 6492eaedae9..2c2b7317a1c 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1402,9 +1402,9 @@ Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) { if (*C == 0 && Pred == ICmpInst::ICMP_SGT) { SelectPatternResult SPR = matchSelectPattern(X, A, B); if (SPR.Flavor == SPF_SMIN) { - if (isKnownPositive(A, DL)) + if (isKnownPositive(A, DL, 0, &AC, &Cmp, &DT)) return new ICmpInst(Pred, B, Cmp.getOperand(1)); - if (isKnownPositive(B, DL)) + if (isKnownPositive(B, DL, 0, &AC, &Cmp, &DT)) return new ICmpInst(Pred, A, Cmp.getOperand(1)); } } @@ -1478,8 +1478,7 @@ Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp, // of the high bits truncated out of x are known. unsigned DstBits = Trunc->getType()->getScalarSizeInBits(), SrcBits = X->getType()->getScalarSizeInBits(); - KnownBits Known(SrcBits); - computeKnownBits(X, Known, 0, &Cmp); + KnownBits Known = computeKnownBits(X, 0, &Cmp); // If all the high bits are known, we can do this xform. if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) { @@ -3030,18 +3029,21 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { break; case Instruction::Add: case Instruction::Sub: - case Instruction::Xor: + case Instruction::Xor: { if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); - // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b - if (ConstantInt *CI = dyn_cast(BO0->getOperand(1))) { - if (CI->getValue().isSignMask()) { + + const APInt *C; + if (match(BO0->getOperand(1), m_APInt(C))) { + // icmp u/s (a ^ signmask), (b ^ signmask) --> icmp s/u a, b + if (C->isSignMask()) { ICmpInst::Predicate NewPred = I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); return new ICmpInst(NewPred, BO0->getOperand(0), BO1->getOperand(0)); } - if (BO0->getOpcode() == Instruction::Xor && CI->isMaxValue(true)) { + // icmp u/s (a ^ maxsignval), (b ^ maxsignval) --> icmp s/u' a, b + if (BO0->getOpcode() == Instruction::Xor && C->isMaxSignedValue()) { ICmpInst::Predicate NewPred = I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate(); NewPred = I.getSwappedPredicate(NewPred); @@ -3049,26 +3051,30 @@ Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) { } } break; - case Instruction::Mul: + } + case Instruction::Mul: { if (!I.isEquality()) break; - if (ConstantInt *CI = dyn_cast(BO0->getOperand(1))) { - // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask - // Mask = -1 >> count-trailing-zeros(Cst). - if (!CI->isZero() && !CI->isOne()) { - const APInt &AP = CI->getValue(); - ConstantInt *Mask = ConstantInt::get( - I.getContext(), - APInt::getLowBitsSet(AP.getBitWidth(), - AP.getBitWidth() - AP.countTrailingZeros())); + const APInt *C; + if (match(BO0->getOperand(1), m_APInt(C)) && *C != 0 && *C != 1) { + // icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask) + // Mask = -1 >> count-trailing-zeros(C). + if (unsigned TZs = C->countTrailingZeros()) { + Constant *Mask = ConstantInt::get( + BO0->getType(), + APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs)); Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask); Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask); return new ICmpInst(Pred, And1, And2); } + // If there are no trailing zeros in the multiplier, just eliminate + // the multiplies (no masking is needed): + // icmp eq/ne (X * C), (Y * C) --> icmp eq/ne X, Y + return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0)); } break; - + } case Instruction::UDiv: case Instruction::LShr: if (I.isSigned() || !BO0->isExact() || !BO1->isExact()) @@ -4497,7 +4503,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // if A is a power of 2. if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) && match(Op1, m_Zero()) && - isKnownToBeAPowerOfTwo(A, DL, false, 0, &AC, &I, &DT) && I.isEquality()) + isKnownToBeAPowerOfTwo(A, false, 0, &I) && I.isEquality()) return new ICmpInst(I.getInversePredicate(), Builder->CreateAnd(A, B), Op1); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index 6829be86885..56f133de3de 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -540,6 +540,12 @@ public: return llvm::computeKnownBits(V, DL, Depth, &AC, CxtI, &DT); } + bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero = false, + unsigned Depth = 0, + const Instruction *CxtI = nullptr) { + return llvm::isKnownToBeAPowerOfTwo(V, DL, OrZero, Depth, &AC, CxtI, &DT); + } + bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth = 0, const Instruction *CxtI = nullptr) const { return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index fc13854f8fe..4d408359eee 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -47,9 +47,7 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC, // inexact. Similarly for <<. BinaryOperator *I = dyn_cast(V); if (I && I->isLogicalShift() && - isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0, - &IC.getAssumptionCache(), &CxtI, - &IC.getDominatorTree())) { + IC.isKnownToBeAPowerOfTwo(I->getOperand(0), false, 0, &CxtI)) { // We know that this is an exact/nuw shift and that the input is a // non-zero context as well. if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC, CxtI)) { @@ -1240,7 +1238,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { return BO; } - if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y) // Safe because the only negative value (1 << Y) can take on is // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have @@ -1487,7 +1485,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { I.getType()); // X urem Y -> X and Y-1, where Y is a power of 2, - if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) { + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) { Constant *N1 = Constant::getAllOnesValue(I.getType()); Value *Add = Builder->CreateAdd(Op1, N1); return BinaryOperator::CreateAnd(Op0, Add); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp index 219effce7ba..b40d067b281 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -44,7 +44,8 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { Value *A; Constant *C; if (match(Op0, m_Constant()) && match(Op1, m_Add(m_Value(A), m_Constant(C)))) - if (isKnownNonNegative(A, DL) && isKnownNonNegative(C, DL)) + if (isKnownNonNegative(A, DL, 0, &AC, &I, &DT) && + isKnownNonNegative(C, DL, 0, &AC, &I, &DT)) return BinaryOperator::Create( I.getOpcode(), Builder->CreateBinOp(I.getOpcode(), Op0, C), A); diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 4028a92771a..5df55f01b83 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -158,8 +158,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.Zero, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); - assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); + assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); // Output known-0 are known to be clear if zero in either the LHS | RHS. APInt IKnownZero = RHSKnown.Zero | LHSKnown.Zero; @@ -192,8 +192,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, SimplifyDemandedBits(I, 0, DemandedMask & ~RHSKnown.One, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); - assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); + assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); // Output known-0 bits are only known if clear in both the LHS & RHS. APInt IKnownZero = RHSKnown.Zero & LHSKnown.Zero; @@ -224,8 +224,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 1, DemandedMask, RHSKnown, Depth + 1) || SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); - assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); + assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt IKnownZero = (RHSKnown.Zero & LHSKnown.Zero) | @@ -313,8 +313,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 2, DemandedMask, RHSKnown, Depth + 1) || SimplifyDemandedBits(I, 1, DemandedMask, LHSKnown, Depth + 1)) return I; - assert(!(RHSKnown.Zero & RHSKnown.One) && "Bits known to be one AND zero?"); - assert(!(LHSKnown.Zero & LHSKnown.One) && "Bits known to be one AND zero?"); + assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?"); + assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. if (ShrinkDemandedConstant(I, 1, DemandedMask) || @@ -325,15 +325,19 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, Known.One = RHSKnown.One & LHSKnown.One; Known.Zero = RHSKnown.Zero & LHSKnown.Zero; break; + case Instruction::ZExt: case Instruction::Trunc: { - unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits(); - DemandedMask = DemandedMask.zext(truncBf); - Known = Known.zext(truncBf); - if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) + unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); + + APInt InputDemandedMask = DemandedMask.zextOrTrunc(SrcBitWidth); + KnownBits InputKnown(SrcBitWidth); + if (SimplifyDemandedBits(I, 0, InputDemandedMask, InputKnown, Depth + 1)) return I; - DemandedMask = DemandedMask.trunc(BitWidth); - Known = Known.trunc(BitWidth); - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + Known = Known.zextOrTrunc(BitWidth); + // Any top bits are known to be zero. + if (BitWidth > SrcBitWidth) + Known.Zero.setBitsFrom(SrcBitWidth); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; } case Instruction::BitCast: @@ -355,56 +359,36 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) return I; - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; - case Instruction::ZExt: { - // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); - - DemandedMask = DemandedMask.trunc(SrcBitWidth); - Known = Known.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I, 0, DemandedMask, Known, Depth + 1)) - return I; - DemandedMask = DemandedMask.zext(BitWidth); - Known = Known.zext(BitWidth); - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); - // The top bits are known to be zero. - Known.Zero.setBitsFrom(SrcBitWidth); - break; - } case Instruction::SExt: { // Compute the bits in the result that are not present in the input. - unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits(); + unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - APInt InputDemandedBits = DemandedMask & - APInt::getLowBitsSet(BitWidth, SrcBitWidth); + APInt InputDemandedBits = DemandedMask.trunc(SrcBitWidth); - APInt NewBits(APInt::getBitsSetFrom(BitWidth, SrcBitWidth)); // If any of the sign extended bits are demanded, we know that the sign // bit is demanded. - if ((NewBits & DemandedMask) != 0) + if (DemandedMask.getActiveBits() > SrcBitWidth) InputDemandedBits.setBit(SrcBitWidth-1); - InputDemandedBits = InputDemandedBits.trunc(SrcBitWidth); - Known = Known.trunc(SrcBitWidth); - if (SimplifyDemandedBits(I, 0, InputDemandedBits, Known, Depth + 1)) + KnownBits InputKnown(SrcBitWidth); + if (SimplifyDemandedBits(I, 0, InputDemandedBits, InputKnown, Depth + 1)) return I; - InputDemandedBits = InputDemandedBits.zext(BitWidth); - Known = Known.zext(BitWidth); - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); - - // If the sign bit of the input is known set or clear, then we know the - // top bits of the result. // If the input sign bit is known zero, or if the NewBits are not demanded // convert this into a zero extension. - if (Known.Zero[SrcBitWidth-1] || (NewBits & ~DemandedMask) == NewBits) { - // Convert to ZExt cast + if (InputKnown.isNonNegative() || + DemandedMask.getActiveBits() <= SrcBitWidth) { + // Convert to ZExt cast. CastInst *NewCast = new ZExtInst(I->getOperand(0), VTy, I->getName()); return InsertNewInstWith(NewCast, *I); - } else if (Known.One[SrcBitWidth-1]) { // Input sign bit known set - Known.One |= NewBits; - } + } + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + Known = InputKnown.sext(BitWidth); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; } case Instruction::Add: @@ -467,7 +451,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero <<= ShiftAmt; Known.One <<= ShiftAmt; // low bits known zero. @@ -491,7 +475,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShiftAmt); Known.One.lshrInPlace(ShiftAmt); if (ShiftAmt) @@ -535,7 +519,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (SimplifyDemandedBits(I, 0, DemandedMaskIn, Known, Depth + 1)) return I; - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); // Compute the new bits that are at the top now. APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); Known.Zero.lshrInPlace(ShiftAmt); @@ -590,7 +574,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One)) Known.One |= ~LowBits; - assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); break; } } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 7ed9fd566b3..2730afc5c5b 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1963,6 +1963,7 @@ static bool isAllocSiteRemovable(Instruction *AI, // Give up the moment we see something we can't handle. return false; + case Instruction::AddrSpaceCast: case Instruction::BitCast: case Instruction::GetElementPtr: Users.emplace_back(I); @@ -2064,7 +2065,8 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { replaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()), C->isFalseWhenEqual())); - } else if (isa(I) || isa(I)) { + } else if (isa(I) || isa(I) || + isa(I)) { replaceInstUsesWith(*I, UndefValue::get(I->getType())); } eraseInstFromFunction(*I); @@ -2180,8 +2182,7 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) { // There might be assume intrinsics dominating this return that completely // determine the value. If so, constant fold it. - KnownBits Known(VTy->getPrimitiveSizeInBits()); - computeKnownBits(ResultOp, Known, 0, &RI); + KnownBits Known = computeKnownBits(ResultOp, 0, &RI); if (Known.isConstant()) RI.setOperand(0, Constant::getIntegerValue(VTy, Known.getConstant())); @@ -2242,9 +2243,7 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { return &SI; } - unsigned BitWidth = cast(Cond->getType())->getBitWidth(); - KnownBits Known(BitWidth); - computeKnownBits(Cond, Known, 0, &SI); + KnownBits Known = computeKnownBits(Cond, 0, &SI); unsigned LeadingKnownZeros = Known.countMinLeadingZeros(); unsigned LeadingKnownOnes = Known.countMinLeadingOnes(); @@ -2257,12 +2256,12 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) { LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes()); } - unsigned NewWidth = BitWidth - std::max(LeadingKnownZeros, LeadingKnownOnes); + unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes); // Shrink the condition operand if the new type is smaller than the old type. // This may produce a non-standard type for the switch, but that's ok because // the backend should extend back to a legal type for the target. - if (NewWidth > 0 && NewWidth < BitWidth) { + if (NewWidth > 0 && NewWidth < Known.getBitWidth()) { IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth); Builder->SetInsertPoint(&SI); Value *NewCond = Builder->CreateTrunc(Cond, Ty, "trunc"); @@ -2841,9 +2840,7 @@ bool InstCombiner::run() { // a value even when the operands are not all constants. Type *Ty = I->getType(); if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) { - unsigned BitWidth = Ty->getScalarSizeInBits(); - KnownBits Known(BitWidth); - computeKnownBits(I, Known, /*Depth*/0, I); + KnownBits Known = computeKnownBits(I, /*Depth*/0, I); if (Known.isConstant()) { Constant *C = ConstantInt::get(Ty, Known.getConstant()); DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C << diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 990bcec109d..1e30dbf6b55 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -180,7 +180,7 @@ static cl::opt static cl::opt PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden, cl::desc("Use this option to turn on/off " - "memory instrinsic size profiling.")); + "memory intrinsic size profiling.")); // Command line option to turn on CFG dot dump after profile annotation. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 4bc0a713311..300085eccb0 100644 --- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -401,7 +401,10 @@ static bool shouldInstrumentBlock(const Function &F, const BasicBlock *BB, if (Options.NoPrune || &F.getEntryBlock() == BB) return true; - return !(isFullDominator(BB, DT) || isFullPostDominator(BB, PDT)); + // Do not instrument full dominators, or full post-dominators with multiple + // predecessors. + return !isFullDominator(BB, DT) + && !(isFullPostDominator(BB, PDT) && !BB->getSinglePredecessor()); } bool SanitizerCoverageModule::runOnFunction(Function &F) { diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp index f62e111460c..c3810366bf2 100644 --- a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp @@ -164,9 +164,9 @@ Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst, /// \brief Given \p BBs as input, find another set of BBs which collectively /// dominates \p BBs and have the minimal sum of frequencies. Return the BB /// set found in \p BBs. -void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, - BasicBlock *Entry, - SmallPtrSet &BBs) { +static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI, + BasicBlock *Entry, + SmallPtrSet &BBs) { assert(!BBs.count(Entry) && "Assume Entry is not in BBs"); // Nodes on the current path to the root. SmallPtrSet Path; diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp index 0490d93f645..0d6e0538261 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp @@ -80,9 +80,10 @@ MaxRecurseDepth("max-recurse-depth", cl::Hidden, cl::init(1000), cl::ZeroOrMore, struct llvm::GVN::Expression { uint32_t opcode; Type *type; + bool commutative; SmallVector varargs; - Expression(uint32_t o = ~2U) : opcode(o) {} + Expression(uint32_t o = ~2U) : opcode(o), commutative(false) {} bool operator==(const Expression &other) const { if (opcode != other.opcode) @@ -246,6 +247,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!"); if (e.varargs[0] > e.varargs[1]) std::swap(e.varargs[0], e.varargs[1]); + e.commutative = true; } if (CmpInst *C = dyn_cast(I)) { @@ -256,6 +258,7 @@ GVN::Expression GVN::ValueTable::createExpr(Instruction *I) { Predicate = CmpInst::getSwappedPredicate(Predicate); } e.opcode = (C->getOpcode() << 8) | Predicate; + e.commutative = true; } else if (InsertValueInst *E = dyn_cast(I)) { for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end(); II != IE; ++II) @@ -281,6 +284,7 @@ GVN::Expression GVN::ValueTable::createCmpExpr(unsigned Opcode, Predicate = CmpInst::getSwappedPredicate(Predicate); } e.opcode = (Opcode << 8) | Predicate; + e.commutative = true; return e; } @@ -348,25 +352,25 @@ GVN::ValueTable::~ValueTable() = default; /// add - Insert a value into the table with a specified value number. void GVN::ValueTable::add(Value *V, uint32_t num) { valueNumbering.insert(std::make_pair(V, num)); + if (PHINode *PN = dyn_cast(V)) + NumberingPhi[num] = PN; } uint32_t GVN::ValueTable::lookupOrAddCall(CallInst *C) { if (AA->doesNotAccessMemory(C)) { Expression exp = createExpr(C); - uint32_t &e = expressionNumbering[exp]; - if (!e) e = nextValueNumber++; + uint32_t e = assignExpNewValueNum(exp).first; valueNumbering[C] = e; return e; } else if (AA->onlyReadsMemory(C)) { Expression exp = createExpr(C); - uint32_t &e = expressionNumbering[exp]; - if (!e) { - e = nextValueNumber++; - valueNumbering[C] = e; - return e; + auto ValNum = assignExpNewValueNum(exp); + if (ValNum.second) { + valueNumbering[C] = ValNum.first; + return ValNum.first; } if (!MD) { - e = nextValueNumber++; + uint32_t e = assignExpNewValueNum(exp).first; valueNumbering[C] = e; return e; } @@ -522,23 +526,29 @@ uint32_t GVN::ValueTable::lookupOrAdd(Value *V) { case Instruction::ExtractValue: exp = createExtractvalueExpr(cast(I)); break; + case Instruction::PHI: + valueNumbering[V] = nextValueNumber; + NumberingPhi[nextValueNumber] = cast(V); + return nextValueNumber++; default: valueNumbering[V] = nextValueNumber; return nextValueNumber++; } - uint32_t& e = expressionNumbering[exp]; - if (!e) e = nextValueNumber++; + uint32_t e = assignExpNewValueNum(exp).first; valueNumbering[V] = e; return e; } /// Returns the value number of the specified value. Fails if /// the value has not yet been numbered. -uint32_t GVN::ValueTable::lookup(Value *V) const { +uint32_t GVN::ValueTable::lookup(Value *V, bool Verify) const { DenseMap::const_iterator VI = valueNumbering.find(V); - assert(VI != valueNumbering.end() && "Value not numbered?"); - return VI->second; + if (Verify) { + assert(VI != valueNumbering.end() && "Value not numbered?"); + return VI->second; + } + return (VI != valueNumbering.end()) ? VI->second : 0; } /// Returns the value number of the given comparison, @@ -549,21 +559,29 @@ uint32_t GVN::ValueTable::lookupOrAddCmp(unsigned Opcode, CmpInst::Predicate Predicate, Value *LHS, Value *RHS) { Expression exp = createCmpExpr(Opcode, Predicate, LHS, RHS); - uint32_t& e = expressionNumbering[exp]; - if (!e) e = nextValueNumber++; - return e; + return assignExpNewValueNum(exp).first; } /// Remove all entries from the ValueTable. void GVN::ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); + NumberingPhi.clear(); + PhiTranslateTable.clear(); + BlockRPONumber.clear(); nextValueNumber = 1; + Expressions.clear(); + ExprIdx.clear(); + nextExprNumber = 0; } /// Remove a value from the value numbering. void GVN::ValueTable::erase(Value *V) { + uint32_t Num = valueNumbering.lookup(V); valueNumbering.erase(V); + // If V is PHINode, V <--> value number is an one-to-one mapping. + if (isa(V)) + NumberingPhi.erase(Num); } /// verifyRemoved - Verify that the value is removed from all internal data @@ -1451,6 +1469,104 @@ bool GVN::processLoad(LoadInst *L) { return false; } +/// Return a pair the first field showing the value number of \p Exp and the +/// second field showing whether it is a value number newly created. +std::pair +GVN::ValueTable::assignExpNewValueNum(Expression &Exp) { + uint32_t &e = expressionNumbering[Exp]; + bool CreateNewValNum = !e; + if (CreateNewValNum) { + Expressions.push_back(Exp); + if (ExprIdx.size() < nextValueNumber + 1) + ExprIdx.resize(nextValueNumber * 2); + e = nextValueNumber; + ExprIdx[nextValueNumber++] = nextExprNumber++; + } + return {e, CreateNewValNum}; +} + +void GVN::ValueTable::assignBlockRPONumber(Function &F) { + uint32_t NextBlockNumber = 1; + ReversePostOrderTraversal RPOT(&F); + for (BasicBlock *BB : RPOT) + BlockRPONumber[BB] = NextBlockNumber++; +} + +/// Return whether all the values related with the same \p num are +/// defined in \p BB. +bool GVN::ValueTable::areAllValsInBB(uint32_t Num, const BasicBlock *BB, + GVN &Gvn) { + LeaderTableEntry *Vals = &Gvn.LeaderTable[Num]; + while (Vals && Vals->BB == BB) + Vals = Vals->Next; + return !Vals; +} + +/// Wrap phiTranslateImpl to provide caching functionality. +uint32_t GVN::ValueTable::phiTranslate(const BasicBlock *Pred, + const BasicBlock *PhiBlock, uint32_t Num, + GVN &Gvn) { + auto FindRes = PhiTranslateTable.find({Num, Pred}); + if (FindRes != PhiTranslateTable.end()) + return FindRes->second; + uint32_t NewNum = phiTranslateImpl(Pred, PhiBlock, Num, Gvn); + PhiTranslateTable.insert({{Num, Pred}, NewNum}); + return NewNum; +} + +/// Translate value number \p Num using phis, so that it has the values of +/// the phis in BB. +uint32_t GVN::ValueTable::phiTranslateImpl(const BasicBlock *Pred, + const BasicBlock *PhiBlock, + uint32_t Num, GVN &Gvn) { + if (PHINode *PN = NumberingPhi[Num]) { + if (BlockRPONumber[Pred] >= BlockRPONumber[PhiBlock]) + return Num; + for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { + if (PN->getParent() == PhiBlock && PN->getIncomingBlock(i) == Pred) + if (uint32_t TransVal = lookup(PN->getIncomingValue(i), false)) + return TransVal; + } + return Num; + } + + // If there is any value related with Num is defined in a BB other than + // PhiBlock, it cannot depend on a phi in PhiBlock without going through + // a backedge. We can do an early exit in that case to save compile time. + if (!areAllValsInBB(Num, PhiBlock, Gvn)) + return Num; + + if (ExprIdx[Num] == 0 || Num >= ExprIdx.size()) + return Num; + Expression Exp = Expressions[ExprIdx[Num]]; + + for (unsigned i = 0; i < Exp.varargs.size(); i++) { + // For InsertValue and ExtractValue, some varargs are index numbers + // instead of value numbers. Those index numbers should not be + // translated. + if ((i > 1 && Exp.opcode == Instruction::InsertValue) || + (i > 0 && Exp.opcode == Instruction::ExtractValue)) + continue; + Exp.varargs[i] = phiTranslate(Pred, PhiBlock, Exp.varargs[i], Gvn); + } + + if (Exp.commutative) { + assert(Exp.varargs.size() == 2 && "Unsupported commutative expression!"); + if (Exp.varargs[0] > Exp.varargs[1]) { + std::swap(Exp.varargs[0], Exp.varargs[1]); + uint32_t Opcode = Exp.opcode >> 8; + if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) + Exp.opcode = (Opcode << 8) | + CmpInst::getSwappedPredicate( + static_cast(Exp.opcode & 255)); + } + } + + if (uint32_t NewNum = expressionNumbering[Exp]) + return NewNum; + return Num; +} + // In order to find a leader for a given value number at a // specific basic block, we first obtain the list of all Values for that number, // and then scan the list to find one whose block dominates the block in @@ -1856,6 +1972,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, // Fabricate val-num for dead-code in order to suppress assertion in // performPRE(). assignValNumForDeadCode(); + VN.assignBlockRPONumber(F); bool PREChanged = true; while (PREChanged) { PREChanged = performPRE(F); @@ -1945,7 +2062,9 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, success = false; break; } - if (Value *V = findLeader(Pred, VN.lookup(Op))) { + uint32_t TValNo = + VN.phiTranslate(Pred, Instr->getParent(), VN.lookup(Op), *this); + if (Value *V = findLeader(Pred, TValNo)) { Instr->setOperand(i, V); } else { success = false; @@ -1962,10 +2081,12 @@ bool GVN::performScalarPREInsertion(Instruction *Instr, BasicBlock *Pred, Instr->insertBefore(Pred->getTerminator()); Instr->setName(Instr->getName() + ".pre"); Instr->setDebugLoc(Instr->getDebugLoc()); - VN.add(Instr, ValNo); + + unsigned Num = VN.lookupOrAdd(Instr); + VN.add(Instr, Num); // Update the availability map to include the new instruction. - addToLeaderTable(ValNo, Instr, Pred); + addToLeaderTable(Num, Instr, Pred); return true; } @@ -2014,7 +2135,8 @@ bool GVN::performScalarPRE(Instruction *CurInst) { break; } - Value *predV = findLeader(P, ValNo); + uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this); + Value *predV = findLeader(P, TValNo); if (!predV) { predMap.push_back(std::make_pair(static_cast(nullptr), P)); PREPred = P; diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp new file mode 100644 index 00000000000..5c75f39e381 --- /dev/null +++ b/contrib/llvm/lib/Transforms/Scalar/GVNSink.cpp @@ -0,0 +1,872 @@ +//===- GVNSink.cpp - sink expressions into successors -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file GVNSink.cpp +/// This pass attempts to sink instructions into successors, reducing static +/// instruction count and enabling if-conversion. +/// +/// We use a variant of global value numbering to decide what can be sunk. +/// Consider: +/// +/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ] +/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ] +/// \ / +/// [ %e = phi i32 %a2, %c2 ] +/// [ add i32 %e, 4 ] +/// +/// +/// GVN would number %a1 and %c1 differently because they compute different +/// results - the VN of an instruction is a function of its opcode and the +/// transitive closure of its operands. This is the key property for hoisting +/// and CSE. +/// +/// What we want when sinking however is for a numbering that is a function of +/// the *uses* of an instruction, which allows us to answer the question "if I +/// replace %a1 with %c1, will it contribute in an equivalent way to all +/// successive instructions?". The PostValueTable class in GVN provides this +/// mapping. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/GVNExpression.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "gvn-sink" + +STATISTIC(NumRemoved, "Number of instructions removed"); + +namespace { + +static bool isMemoryInst(const Instruction *I) { + return isa(I) || isa(I) || + (isa(I) && !cast(I)->doesNotAccessMemory()) || + (isa(I) && !cast(I)->doesNotAccessMemory()); +} + +/// Iterates through instructions in a set of blocks in reverse order from the +/// first non-terminator. For example (assume all blocks have size n): +/// LockstepReverseIterator I([B1, B2, B3]); +/// *I-- = [B1[n], B2[n], B3[n]]; +/// *I-- = [B1[n-1], B2[n-1], B3[n-1]]; +/// *I-- = [B1[n-2], B2[n-2], B3[n-2]]; +/// ... +/// +/// It continues until all blocks have been exhausted. Use \c getActiveBlocks() +/// to +/// determine which blocks are still going and the order they appear in the +/// list returned by operator*. +class LockstepReverseIterator { + ArrayRef Blocks; + SmallPtrSet ActiveBlocks; + SmallVector Insts; + bool Fail; + +public: + LockstepReverseIterator(ArrayRef Blocks) : Blocks(Blocks) { + reset(); + } + + void reset() { + Fail = false; + ActiveBlocks.clear(); + for (BasicBlock *BB : Blocks) + ActiveBlocks.insert(BB); + Insts.clear(); + for (BasicBlock *BB : Blocks) { + if (BB->size() <= 1) { + // Block wasn't big enough - only contained a terminator. + ActiveBlocks.erase(BB); + continue; + } + Insts.push_back(BB->getTerminator()->getPrevNode()); + } + if (Insts.empty()) + Fail = true; + } + + bool isValid() const { return !Fail; } + ArrayRef operator*() const { return Insts; } + SmallPtrSet &getActiveBlocks() { return ActiveBlocks; } + + void restrictToBlocks(SmallPtrSetImpl &Blocks) { + for (auto II = Insts.begin(); II != Insts.end();) { + if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) == + Blocks.end()) { + ActiveBlocks.erase((*II)->getParent()); + II = Insts.erase(II); + } else { + ++II; + } + } + } + + void operator--() { + if (Fail) + return; + SmallVector NewInsts; + for (auto *Inst : Insts) { + if (Inst == &Inst->getParent()->front()) + ActiveBlocks.erase(Inst->getParent()); + else + NewInsts.push_back(Inst->getPrevNode()); + } + if (NewInsts.empty()) { + Fail = true; + return; + } + Insts = NewInsts; + } +}; + +//===----------------------------------------------------------------------===// + +/// Candidate solution for sinking. There may be different ways to +/// sink instructions, differing in the number of instructions sunk, +/// the number of predecessors sunk from and the number of PHIs +/// required. +struct SinkingInstructionCandidate { + unsigned NumBlocks; + unsigned NumInstructions; + unsigned NumPHIs; + unsigned NumMemoryInsts; + int Cost = -1; + SmallVector Blocks; + + void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) { + unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs; + unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0; + Cost = (NumInstructions * (NumBlocks - 1)) - + (NumExtraPHIs * + NumExtraPHIs) // PHIs are expensive, so make sure they're worth it. + - SplitEdgeCost; + } + bool operator>=(const SinkingInstructionCandidate &Other) const { + return Cost >= Other.Cost; + } +}; + +#ifndef NDEBUG +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const SinkingInstructionCandidate &C) { + OS << ""; + return OS; +} +#endif + +//===----------------------------------------------------------------------===// + +/// Describes a PHI node that may or may not exist. These track the PHIs +/// that must be created if we sunk a sequence of instructions. It provides +/// a hash function for efficient equality comparisons. +class ModelledPHI { + SmallVector Values; + SmallVector Blocks; + +public: + ModelledPHI() {} + ModelledPHI(const PHINode *PN) { + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) + Blocks.push_back(PN->getIncomingBlock(I)); + std::sort(Blocks.begin(), Blocks.end()); + + // This assumes the PHI is already well-formed and there aren't conflicting + // incoming values for the same block. + for (auto *B : Blocks) + Values.push_back(PN->getIncomingValueForBlock(B)); + } + /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI + /// without the same ID. + /// \note This is specifically for DenseMapInfo - do not use this! + static ModelledPHI createDummy(size_t ID) { + ModelledPHI M; + M.Values.push_back(reinterpret_cast(ID)); + return M; + } + + /// Create a PHI from an array of incoming values and incoming blocks. + template + ModelledPHI(const VArray &V, const BArray &B) { + std::copy(V.begin(), V.end(), std::back_inserter(Values)); + std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); + } + + /// Create a PHI from [I[OpNum] for I in Insts]. + template + ModelledPHI(ArrayRef Insts, unsigned OpNum, const BArray &B) { + std::copy(B.begin(), B.end(), std::back_inserter(Blocks)); + for (auto *I : Insts) + Values.push_back(I->getOperand(OpNum)); + } + + /// Restrict the PHI's contents down to only \c NewBlocks. + /// \c NewBlocks must be a subset of \c this->Blocks. + void restrictToBlocks(const SmallPtrSetImpl &NewBlocks) { + auto BI = Blocks.begin(); + auto VI = Values.begin(); + while (BI != Blocks.end()) { + assert(VI != Values.end()); + if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) == + NewBlocks.end()) { + BI = Blocks.erase(BI); + VI = Values.erase(VI); + } else { + ++BI; + ++VI; + } + } + assert(Blocks.size() == NewBlocks.size()); + } + + ArrayRef getValues() const { return Values; } + + bool areAllIncomingValuesSame() const { + return all_of(Values, [&](Value *V) { return V == Values[0]; }); + } + bool areAllIncomingValuesSameType() const { + return all_of( + Values, [&](Value *V) { return V->getType() == Values[0]->getType(); }); + } + bool areAnyIncomingValuesConstant() const { + return any_of(Values, [&](Value *V) { return isa(V); }); + } + // Hash functor + unsigned hash() const { + return (unsigned)hash_combine_range(Values.begin(), Values.end()); + } + bool operator==(const ModelledPHI &Other) const { + return Values == Other.Values && Blocks == Other.Blocks; + } +}; + +template struct DenseMapInfo { + static inline ModelledPHI &getEmptyKey() { + static ModelledPHI Dummy = ModelledPHI::createDummy(0); + return Dummy; + } + static inline ModelledPHI &getTombstoneKey() { + static ModelledPHI Dummy = ModelledPHI::createDummy(1); + return Dummy; + } + static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); } + static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) { + return LHS == RHS; + } +}; + +typedef DenseSet> ModelledPHISet; + +//===----------------------------------------------------------------------===// +// ValueTable +//===----------------------------------------------------------------------===// +// This is a value number table where the value number is a function of the +// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know +// that the program would be equivalent if we replaced A with PHI(A, B). +//===----------------------------------------------------------------------===// + +/// A GVN expression describing how an instruction is used. The operands +/// field of BasicExpression is used to store uses, not operands. +/// +/// This class also contains fields for discriminators used when determining +/// equivalence of instructions with sideeffects. +class InstructionUseExpr : public GVNExpression::BasicExpression { + unsigned MemoryUseOrder = -1; + bool Volatile = false; + +public: + InstructionUseExpr(Instruction *I, ArrayRecycler &R, + BumpPtrAllocator &A) + : GVNExpression::BasicExpression(I->getNumUses()) { + allocateOperands(R, A); + setOpcode(I->getOpcode()); + setType(I->getType()); + + for (auto &U : I->uses()) + op_push_back(U.getUser()); + std::sort(op_begin(), op_end()); + } + void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; } + void setVolatile(bool V) { Volatile = V; } + + virtual hash_code getHashValue() const { + return hash_combine(GVNExpression::BasicExpression::getHashValue(), + MemoryUseOrder, Volatile); + } + + template hash_code getHashValue(Function MapFn) { + hash_code H = + hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile); + for (auto *V : operands()) + H = hash_combine(H, MapFn(V)); + return H; + } +}; + +class ValueTable { + DenseMap ValueNumbering; + DenseMap ExpressionNumbering; + DenseMap HashNumbering; + BumpPtrAllocator Allocator; + ArrayRecycler Recycler; + uint32_t nextValueNumber; + + /// Create an expression for I based on its opcode and its uses. If I + /// touches or reads memory, the expression is also based upon its memory + /// order - see \c getMemoryUseOrder(). + InstructionUseExpr *createExpr(Instruction *I) { + InstructionUseExpr *E = + new (Allocator) InstructionUseExpr(I, Recycler, Allocator); + if (isMemoryInst(I)) + E->setMemoryUseOrder(getMemoryUseOrder(I)); + + if (CmpInst *C = dyn_cast(I)) { + CmpInst::Predicate Predicate = C->getPredicate(); + E->setOpcode((C->getOpcode() << 8) | Predicate); + } + return E; + } + + /// Helper to compute the value number for a memory instruction + /// (LoadInst/StoreInst), including checking the memory ordering and + /// volatility. + template InstructionUseExpr *createMemoryExpr(Inst *I) { + if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic()) + return nullptr; + InstructionUseExpr *E = createExpr(I); + E->setVolatile(I->isVolatile()); + return E; + } + +public: + /// Returns the value number for the specified value, assigning + /// it a new number if it did not have one before. + uint32_t lookupOrAdd(Value *V) { + auto VI = ValueNumbering.find(V); + if (VI != ValueNumbering.end()) + return VI->second; + + if (!isa(V)) { + ValueNumbering[V] = nextValueNumber; + return nextValueNumber++; + } + + Instruction *I = cast(V); + InstructionUseExpr *exp = nullptr; + switch (I->getOpcode()) { + case Instruction::Load: + exp = createMemoryExpr(cast(I)); + break; + case Instruction::Store: + exp = createMemoryExpr(cast(I)); + break; + case Instruction::Call: + case Instruction::Invoke: + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + case Instruction::FCmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: + case Instruction::Select: + case Instruction::ExtractElement: + case Instruction::InsertElement: + case Instruction::ShuffleVector: + case Instruction::InsertValue: + case Instruction::GetElementPtr: + exp = createExpr(I); + break; + default: + break; + } + + if (!exp) { + ValueNumbering[V] = nextValueNumber; + return nextValueNumber++; + } + + uint32_t e = ExpressionNumbering[exp]; + if (!e) { + hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); }); + auto I = HashNumbering.find(H); + if (I != HashNumbering.end()) { + e = I->second; + } else { + e = nextValueNumber++; + HashNumbering[H] = e; + ExpressionNumbering[exp] = e; + } + } + ValueNumbering[V] = e; + return e; + } + + /// Returns the value number of the specified value. Fails if the value has + /// not yet been numbered. + uint32_t lookup(Value *V) const { + auto VI = ValueNumbering.find(V); + assert(VI != ValueNumbering.end() && "Value not numbered?"); + return VI->second; + } + + /// Removes all value numberings and resets the value table. + void clear() { + ValueNumbering.clear(); + ExpressionNumbering.clear(); + HashNumbering.clear(); + Recycler.clear(Allocator); + nextValueNumber = 1; + } + + ValueTable() : nextValueNumber(1) {} + + /// \c Inst uses or touches memory. Return an ID describing the memory state + /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2), + /// the exact same memory operations happen after I1 and I2. + /// + /// This is a very hard problem in general, so we use domain-specific + /// knowledge that we only ever check for equivalence between blocks sharing a + /// single immediate successor that is common, and when determining if I1 == + /// I2 we will have already determined that next(I1) == next(I2). This + /// inductive property allows us to simply return the value number of the next + /// instruction that defines memory. + uint32_t getMemoryUseOrder(Instruction *Inst) { + auto *BB = Inst->getParent(); + for (auto I = std::next(Inst->getIterator()), E = BB->end(); + I != E && !I->isTerminator(); ++I) { + if (!isMemoryInst(&*I)) + continue; + if (isa(&*I)) + continue; + CallInst *CI = dyn_cast(&*I); + if (CI && CI->onlyReadsMemory()) + continue; + InvokeInst *II = dyn_cast(&*I); + if (II && II->onlyReadsMemory()) + continue; + return lookupOrAdd(&*I); + } + return 0; + } +}; + +//===----------------------------------------------------------------------===// + +class GVNSink { +public: + GVNSink() : VN() {} + bool run(Function &F) { + DEBUG(dbgs() << "GVNSink: running on function @" << F.getName() << "\n"); + + unsigned NumSunk = 0; + ReversePostOrderTraversal RPOT(&F); + for (auto *N : RPOT) + NumSunk += sinkBB(N); + + return NumSunk > 0; + } + +private: + ValueTable VN; + + bool isInstructionBlacklisted(Instruction *I) { + // These instructions may change or break semantics if moved. + if (isa(I) || I->isEHPad() || isa(I) || + I->getType()->isTokenTy()) + return true; + return false; + } + + /// The main heuristic function. Analyze the set of instructions pointed to by + /// LRI and return a candidate solution if these instructions can be sunk, or + /// None otherwise. + Optional analyzeInstructionForSinking( + LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, + ModelledPHISet &NeededPHIs, SmallPtrSetImpl &PHIContents); + + /// Create a ModelledPHI for each PHI in BB, adding to PHIs. + void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs, + SmallPtrSetImpl &PHIContents) { + for (auto &I : *BB) { + auto *PN = dyn_cast(&I); + if (!PN) + return; + + auto MPHI = ModelledPHI(PN); + PHIs.insert(MPHI); + for (auto *V : MPHI.getValues()) + PHIContents.insert(V); + } + } + + /// The main instruction sinking driver. Set up state and try and sink + /// instructions into BBEnd from its predecessors. + unsigned sinkBB(BasicBlock *BBEnd); + + /// Perform the actual mechanics of sinking an instruction from Blocks into + /// BBEnd, which is their only successor. + void sinkLastInstruction(ArrayRef Blocks, BasicBlock *BBEnd); + + /// Remove PHIs that all have the same incoming value. + void foldPointlessPHINodes(BasicBlock *BB) { + auto I = BB->begin(); + while (PHINode *PN = dyn_cast(I++)) { + if (!all_of(PN->incoming_values(), + [&](const Value *V) { return V == PN->getIncomingValue(0); })) + continue; + if (PN->getIncomingValue(0) != PN) + PN->replaceAllUsesWith(PN->getIncomingValue(0)); + else + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + PN->eraseFromParent(); + } + } +}; + +Optional GVNSink::analyzeInstructionForSinking( + LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum, + ModelledPHISet &NeededPHIs, SmallPtrSetImpl &PHIContents) { + auto Insts = *LRI; + DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I + : Insts) { + I->dump(); + } dbgs() << " ]\n";); + + DenseMap VNums; + for (auto *I : Insts) { + uint32_t N = VN.lookupOrAdd(I); + DEBUG(dbgs() << " VN=" << utohexstr(N) << " for" << *I << "\n"); + if (N == ~0U) + return None; + VNums[N]++; + } + unsigned VNumToSink = + std::max_element(VNums.begin(), VNums.end(), + [](const std::pair &I, + const std::pair &J) { + return I.second < J.second; + }) + ->first; + + if (VNums[VNumToSink] == 1) + // Can't sink anything! + return None; + + // Now restrict the number of incoming blocks down to only those with + // VNumToSink. + auto &ActivePreds = LRI.getActiveBlocks(); + unsigned InitialActivePredSize = ActivePreds.size(); + SmallVector NewInsts; + for (auto *I : Insts) { + if (VN.lookup(I) != VNumToSink) + ActivePreds.erase(I->getParent()); + else + NewInsts.push_back(I); + } + for (auto *I : NewInsts) + if (isInstructionBlacklisted(I)) + return None; + + // If we've restricted the incoming blocks, restrict all needed PHIs also + // to that set. + bool RecomputePHIContents = false; + if (ActivePreds.size() != InitialActivePredSize) { + ModelledPHISet NewNeededPHIs; + for (auto P : NeededPHIs) { + P.restrictToBlocks(ActivePreds); + NewNeededPHIs.insert(P); + } + NeededPHIs = NewNeededPHIs; + LRI.restrictToBlocks(ActivePreds); + RecomputePHIContents = true; + } + + // The sunk instruction's results. + ModelledPHI NewPHI(NewInsts, ActivePreds); + + // Does sinking this instruction render previous PHIs redundant? + if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) { + NeededPHIs.erase(NewPHI); + RecomputePHIContents = true; + } + + if (RecomputePHIContents) { + // The needed PHIs have changed, so recompute the set of all needed + // values. + PHIContents.clear(); + for (auto &PHI : NeededPHIs) + PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); + } + + // Is this instruction required by a later PHI that doesn't match this PHI? + // if so, we can't sink this instruction. + for (auto *V : NewPHI.getValues()) + if (PHIContents.count(V)) + // V exists in this PHI, but the whole PHI is different to NewPHI + // (else it would have been removed earlier). We cannot continue + // because this isn't representable. + return None; + + // Which operands need PHIs? + // FIXME: If any of these fail, we should partition up the candidates to + // try and continue making progress. + Instruction *I0 = NewInsts[0]; + for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) { + ModelledPHI PHI(NewInsts, OpNum, ActivePreds); + if (PHI.areAllIncomingValuesSame()) + continue; + if (!canReplaceOperandWithVariable(I0, OpNum)) + // We can 't create a PHI from this instruction! + return None; + if (NeededPHIs.count(PHI)) + continue; + if (!PHI.areAllIncomingValuesSameType()) + return None; + // Don't create indirect calls! The called value is the final operand. + if ((isa(I0) || isa(I0)) && OpNum == E - 1 && + PHI.areAnyIncomingValuesConstant()) + return None; + + NeededPHIs.reserve(NeededPHIs.size()); + NeededPHIs.insert(PHI); + PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end()); + } + + if (isMemoryInst(NewInsts[0])) + ++MemoryInstNum; + + SinkingInstructionCandidate Cand; + Cand.NumInstructions = ++InstNum; + Cand.NumMemoryInsts = MemoryInstNum; + Cand.NumBlocks = ActivePreds.size(); + Cand.NumPHIs = NeededPHIs.size(); + for (auto *C : ActivePreds) + Cand.Blocks.push_back(C); + + return Cand; +} + +unsigned GVNSink::sinkBB(BasicBlock *BBEnd) { + DEBUG(dbgs() << "GVNSink: running on basic block "; + BBEnd->printAsOperand(dbgs()); dbgs() << "\n"); + SmallVector Preds; + for (auto *B : predecessors(BBEnd)) { + auto *T = B->getTerminator(); + if (isa(T) || isa(T)) + Preds.push_back(B); + else + return 0; + } + if (Preds.size() < 2) + return 0; + std::sort(Preds.begin(), Preds.end()); + + unsigned NumOrigPreds = Preds.size(); + // We can only sink instructions through unconditional branches. + for (auto I = Preds.begin(); I != Preds.end();) { + if ((*I)->getTerminator()->getNumSuccessors() != 1) + I = Preds.erase(I); + else + ++I; + } + + LockstepReverseIterator LRI(Preds); + SmallVector Candidates; + unsigned InstNum = 0, MemoryInstNum = 0; + ModelledPHISet NeededPHIs; + SmallPtrSet PHIContents; + analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents); + unsigned NumOrigPHIs = NeededPHIs.size(); + + while (LRI.isValid()) { + auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum, + NeededPHIs, PHIContents); + if (!Cand) + break; + Cand->calculateCost(NumOrigPHIs, Preds.size()); + Candidates.emplace_back(*Cand); + --LRI; + } + + std::stable_sort( + Candidates.begin(), Candidates.end(), + [](const SinkingInstructionCandidate &A, + const SinkingInstructionCandidate &B) { return A >= B; }); + DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C + : Candidates) dbgs() + << " " << C << "\n";); + + // Pick the top candidate, as long it is positive! + if (Candidates.empty() || Candidates.front().Cost <= 0) + return 0; + auto C = Candidates.front(); + + DEBUG(dbgs() << " -- Sinking: " << C << "\n"); + BasicBlock *InsertBB = BBEnd; + if (C.Blocks.size() < NumOrigPreds) { + DEBUG(dbgs() << " -- Splitting edge to "; BBEnd->printAsOperand(dbgs()); + dbgs() << "\n"); + InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split"); + if (!InsertBB) { + DEBUG(dbgs() << " -- FAILED to split edge!\n"); + // Edge couldn't be split. + return 0; + } + } + + for (unsigned I = 0; I < C.NumInstructions; ++I) + sinkLastInstruction(C.Blocks, InsertBB); + + return C.NumInstructions; +} + +void GVNSink::sinkLastInstruction(ArrayRef Blocks, + BasicBlock *BBEnd) { + SmallVector Insts; + for (BasicBlock *BB : Blocks) + Insts.push_back(BB->getTerminator()->getPrevNode()); + Instruction *I0 = Insts.front(); + + SmallVector NewOperands; + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { + bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) { + return I->getOperand(O) != I0->getOperand(O); + }); + if (!NeedPHI) { + NewOperands.push_back(I0->getOperand(O)); + continue; + } + + // Create a new PHI in the successor block and populate it. + auto *Op = I0->getOperand(O); + assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!"); + auto *PN = PHINode::Create(Op->getType(), Insts.size(), + Op->getName() + ".sink", &BBEnd->front()); + for (auto *I : Insts) + PN->addIncoming(I->getOperand(O), I->getParent()); + NewOperands.push_back(PN); + } + + // Arbitrarily use I0 as the new "common" instruction; remap its operands + // and move it to the start of the successor block. + for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) + I0->getOperandUse(O).set(NewOperands[O]); + I0->moveBefore(&*BBEnd->getFirstInsertionPt()); + + // Update metadata and IR flags. + for (auto *I : Insts) + if (I != I0) { + combineMetadataForCSE(I0, I); + I0->andIRFlags(I); + } + + for (auto *I : Insts) + if (I != I0) + I->replaceAllUsesWith(I0); + foldPointlessPHINodes(BBEnd); + + // Finally nuke all instructions apart from the common instruction. + for (auto *I : Insts) + if (I != I0) + I->eraseFromParent(); + + NumRemoved += Insts.size() - 1; +} + +//////////////////////////////////////////////////////////////////////////////// +// Pass machinery / boilerplate + +class GVNSinkLegacyPass : public FunctionPass { +public: + static char ID; + + GVNSinkLegacyPass() : FunctionPass(ID) { + initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + GVNSink G; + return G.run(F); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved(); + } +}; +} // namespace + +PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) { + GVNSink G; + if (!G.run(F)) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve(); + return PA; +} + +char GVNSinkLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink", + "Early GVN sinking of Expressions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink", + "Early GVN sinking of Expressions", false, false) + +FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); } diff --git a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp index 198d2b2b024..65a2cd95567 100644 --- a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -537,9 +537,7 @@ bool GuardWideningImpl::parseRangeChecks( Changed = true; } else if (match(Check.getBase(), m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { - unsigned BitWidth = OpLHS->getType()->getScalarSizeInBits(); - KnownBits Known(BitWidth); - computeKnownBits(OpLHS, Known, DL); + KnownBits Known = computeKnownBits(OpLHS, DL); if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { Check.setBase(OpLHS); APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp index 85db6e5e110..e21b0feb7c5 100644 --- a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -1228,7 +1228,12 @@ void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef BBs) { Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent, ValueToValueMapTy &VM) { - Loop &New = LPM.addLoop(Parent); + Loop &New = *new Loop(); + if (Parent) + Parent->addChildLoop(&New); + else + LI.addTopLevelLoop(&New); + LPM.addLoop(New); // Add all of the blocks in Original to the new loop. for (auto *BB : Original->blocks()) diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp index ada22ae38eb..2ef8f8563bb 100644 --- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -253,6 +253,35 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_, return EverChanged; } +// Replace uses of Cond with ToVal when safe to do so. If all uses are +// replaced, we can remove Cond. We cannot blindly replace all uses of Cond +// because we may incorrectly replace uses when guards/assumes are uses of +// of `Cond` and we used the guards/assume to reason about the `Cond` value +// at the end of block. RAUW unconditionally replaces all uses +// including the guards/assumes themselves and the uses before the +// guard/assume. +static void ReplaceFoldableUses(Instruction *Cond, Value *ToVal) { + assert(Cond->getType() == ToVal->getType()); + auto *BB = Cond->getParent(); + // We can unconditionally replace all uses in non-local blocks (i.e. uses + // strictly dominated by BB), since LVI information is true from the + // terminator of BB. + replaceNonLocalUsesWith(Cond, ToVal); + for (Instruction &I : reverse(*BB)) { + // Reached the Cond whose uses we are trying to replace, so there are no + // more uses. + if (&I == Cond) + break; + // We only replace uses in instructions that are guaranteed to reach the end + // of BB, where we know Cond is ToVal. + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + break; + I.replaceUsesOfWith(Cond, ToVal); + } + if (Cond->use_empty() && !Cond->mayHaveSideEffects()) + Cond->eraseFromParent(); +} + /// Return the cost of duplicating a piece of this block from first non-phi /// and before StopAt instruction to thread across it. Stop scanning the block /// when exceeding the threshold. If duplication is impossible, returns ~0U. @@ -833,13 +862,19 @@ bool JumpThreadingPass::ProcessBlock(BasicBlock *BB) { CondBr->eraseFromParent(); if (CondCmp->use_empty()) CondCmp->eraseFromParent(); - // TODO: We can safely replace *some* uses of the CondInst if it has + // We can safely replace *some* uses of the CondInst if it has // exactly one value as returned by LVI. RAUW is incorrect in the // presence of guards and assumes, that have the `Cond` as the use. This // is because we use the guards/assume to reason about the `Cond` value // at the end of block, but RAUW unconditionally replaces all uses // including the guards/assumes themselves and the uses before the // guard/assume. + else if (CondCmp->getParent() == BB) { + auto *CI = Ret == LazyValueInfo::True ? + ConstantInt::getTrue(CondCmp->getType()) : + ConstantInt::getFalse(CondCmp->getType()); + ReplaceFoldableUses(CondCmp, CI); + } return true; } @@ -1325,13 +1360,16 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB, if (auto *CondInst = dyn_cast(Cond)) { if (CondInst->use_empty() && !CondInst->mayHaveSideEffects()) CondInst->eraseFromParent(); - // TODO: We can safely replace *some* uses of the CondInst if it has + // We can safely replace *some* uses of the CondInst if it has // exactly one value as returned by LVI. RAUW is incorrect in the // presence of guards and assumes, that have the `Cond` as the use. This // is because we use the guards/assume to reason about the `Cond` value // at the end of block, but RAUW unconditionally replaces all uses // including the guards/assumes themselves and the uses before the // guard/assume. + else if (OnlyVal && OnlyVal != MultipleVal && + CondInst->getParent() == BB) + ReplaceFoldableUses(CondInst, OnlyVal); } return true; } diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 97337ea5ba6..c6a05ecbd0b 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -1035,6 +1035,17 @@ static Value *matchCondition(BranchInst *BI, BasicBlock *LoopEntry) { return nullptr; } +// Check if the recurrence variable `VarX` is in the right form to create +// the idiom. Returns the value coerced to a PHINode if so. +static PHINode *getRecurrenceVar(Value *VarX, Instruction *DefX, + BasicBlock *LoopEntry) { + auto *PhiX = dyn_cast(VarX); + if (PhiX && PhiX->getParent() == LoopEntry && + (PhiX->getOperand(0) == DefX || PhiX->getOperand(1) == DefX)) + return PhiX; + return nullptr; +} + /// Return true iff the idiom is detected in the loop. /// /// Additionally: @@ -1110,13 +1121,9 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, } // step 3: Check the recurrence of variable X - { - PhiX = dyn_cast(VarX1); - if (!PhiX || - (PhiX->getOperand(0) != DefX2 && PhiX->getOperand(1) != DefX2)) { - return false; - } - } + PhiX = getRecurrenceVar(VarX1, DefX2, LoopEntry); + if (!PhiX) + return false; // step 4: Find the instruction which count the population: cnt2 = cnt1 + 1 { @@ -1132,8 +1139,8 @@ static bool detectPopcountIdiom(Loop *CurLoop, BasicBlock *PreCondBB, if (!Inc || !Inc->isOne()) continue; - PHINode *Phi = dyn_cast(Inst->getOperand(0)); - if (!Phi || Phi->getParent() != LoopEntry) + PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry); + if (!Phi) continue; // Check if the result of the instruction is live of the loop. @@ -1227,8 +1234,8 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX, VarX = DefX->getOperand(0); // step 3: Check the recurrence of variable X - PhiX = dyn_cast(VarX); - if (!PhiX || (PhiX->getOperand(0) != DefX && PhiX->getOperand(1) != DefX)) + PhiX = getRecurrenceVar(VarX, DefX, LoopEntry); + if (!PhiX) return false; // step 4: Find the instruction which count the CTLZ: cnt.next = cnt + 1 @@ -1248,8 +1255,8 @@ static bool detectCTLZIdiom(Loop *CurLoop, PHINode *&PhiX, if (!Inc || !Inc->isOne()) continue; - PHINode *Phi = dyn_cast(Inst->getOperand(0)); - if (!Phi || Phi->getParent() != LoopEntry) + PHINode *Phi = getRecurrenceVar(Inst->getOperand(0), Inst, LoopEntry); + if (!Phi) continue; CntInst = Inst; diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp index 6ef1464e933..19daebd0613 100644 --- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -831,7 +831,12 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val, /// mapping the blocks with the specified map. static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM, LoopInfo *LI, LPPassManager *LPM) { - Loop &New = LPM->addLoop(PL); + Loop &New = *new Loop(); + if (PL) + PL->addChildLoop(&New); + else + LI->addTopLevelLoop(&New); + LPM->addLoop(New); // Add all of the blocks in L to the new loop. for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); diff --git a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp index 5cfbf6baeaa..67abc311698 100644 --- a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp @@ -858,7 +858,14 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, // Filter out unreachable phi operands. auto Filtered = make_filter_range(PHIOperands, [&](const Use *U) { - return ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock}); + if (*U == PN) + return false; + if (!ReachableEdges.count({PN->getIncomingBlock(*U), PHIBlock})) + return false; + // Things in TOPClass are equivalent to everything. + if (ValueToClass.lookup(*U) == TOPClass) + return false; + return true; }); std::transform(Filtered.begin(), Filtered.end(), op_inserter(E), [&](const Use *U) -> Value * { @@ -866,14 +873,6 @@ PHIExpression *NewGVN::createPHIExpression(Instruction *I, bool &HasBackedge, HasBackedge = HasBackedge || isBackedge(BB, PHIBlock); OriginalOpsConstant = OriginalOpsConstant && isa(*U); - // Use nullptr to distinguish between things that were - // originally self-defined and those that have an operand - // leader that is self-defined. - if (*U == PN) - return nullptr; - // Things in TOPClass are equivalent to everything. - if (ValueToClass.lookup(*U) == TOPClass) - return nullptr; return lookupOperandLeader(*U); }); return E; @@ -955,6 +954,10 @@ const Expression *NewGVN::checkSimplificationResults(Expression *E, CongruenceClass *CC = ValueToClass.lookup(V); if (CC && CC->getDefiningExpr()) { + // If we simplified to something else, we need to communicate + // that we're users of the value we simplified to. + if (I != V) + addAdditionalUsers(V, I); if (I) DEBUG(dbgs() << "Simplified " << *I << " to " << " expression " << *CC->getDefiningExpr() << "\n"); @@ -1581,6 +1584,30 @@ bool NewGVN::isCycleFree(const Instruction *I) const { // Evaluate PHI nodes symbolically, and create an expression result. const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { + // Resolve irreducible and reducible phi cycles. + // FIXME: This is hopefully a temporary solution while we resolve the issues + // with fixpointing self-cycles. It currently should be "guaranteed" to be + // correct, but non-optimal. The SCCFinder does not, for example, take + // reachability of arguments into account, etc. + SCCFinder.Start(I); + bool CanOptimize = true; + SmallPtrSet OuterOps; + + auto &Component = SCCFinder.getComponentFor(I); + for (auto *Member : Component) { + if (!isa(Member)) { + CanOptimize = false; + break; + } + for (auto &PHIOp : cast(Member)->operands()) + if (!isa(PHIOp) || !Component.count(cast(PHIOp))) + OuterOps.insert(PHIOp); + } + if (CanOptimize && OuterOps.size() == 1) { + DEBUG(dbgs() << "Resolving cyclic phi to value " << *(*OuterOps.begin()) + << "\n"); + return createVariableOrConstant(*OuterOps.begin()); + } // True if one of the incoming phi edges is a backedge. bool HasBackedge = false; // All constant tracks the state of whether all the *original* phi operands @@ -1594,17 +1621,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { // See if all arguments are the same. // We track if any were undef because they need special handling. bool HasUndef = false; - bool CycleFree = isCycleFree(I); auto Filtered = make_filter_range(E->operands(), [&](Value *Arg) { - if (Arg == nullptr) - return false; - // Original self-operands are already eliminated during expression creation. - // We can only eliminate value-wise self-operands if it's cycle - // free. Otherwise, eliminating the operand can cause our value to change, - // which can cause us to not eliminate the operand, which changes the value - // back to what it was before, cycling forever. - if (CycleFree && Arg == I) - return false; if (isa(Arg)) { HasUndef = true; return false; @@ -1613,6 +1630,14 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { }); // If we are left with no operands, it's dead. if (Filtered.begin() == Filtered.end()) { + // If it has undef at this point, it means there are no-non-undef arguments, + // and thus, the value of the phi node must be undef. + if (HasUndef) { + DEBUG(dbgs() << "PHI Node " << *I + << " has no non-undef arguments, valuing it as undef\n"); + return createConstantExpression(UndefValue::get(I->getType())); + } + DEBUG(dbgs() << "No arguments of PHI node " << *I << " are live\n"); deleteExpression(E); return createDeadExpression(); @@ -1642,7 +1667,7 @@ const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I) const { // constants, or all operands are ignored but the undef, it also must be // cycle free. if (!AllConstant && HasBackedge && NumOps > 0 && - !isa(AllSameValue) && !CycleFree) + !isa(AllSameValue) && !isCycleFree(I)) return E; // Only have to check for instructions @@ -3556,6 +3581,7 @@ bool NewGVN::eliminateInstructions(Function &F) { // Map to store the use counts DenseMap UseCounts; for (auto *CC : reverse(CongruenceClasses)) { + DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() << "\n"); // Track the equivalent store info so we can decide whether to try // dead store elimination. SmallVector PossibleDeadStores; @@ -3602,8 +3628,6 @@ bool NewGVN::eliminateInstructions(Function &F) { } CC->swap(MembersLeft); } else { - DEBUG(dbgs() << "Eliminating in congruence class " << CC->getID() - << "\n"); // If this is a singleton, we can skip it. if (CC->size() != 1 || RealToTemp.lookup(Leader)) { // This is a stack because equality replacement/etc may place @@ -3846,6 +3870,7 @@ bool NewGVN::shouldSwapOperands(const Value *A, const Value *B) const { return std::make_pair(getRank(A), A) > std::make_pair(getRank(B), B); } +namespace { class NewGVNLegacyPass : public FunctionPass { public: static char ID; // Pass identification, replacement for typeid. @@ -3865,6 +3890,7 @@ private: AU.addPreserved(); } }; +} // namespace bool NewGVNLegacyPass::runOnFunction(Function &F) { if (skipFunction(F)) diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp index 8908dae2f54..1d0e8396f6a 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp @@ -1779,8 +1779,9 @@ static bool runIPSCCP(Module &M, const DataLayout &DL, // arguments and return value aggressively, and can assume it is not called // unless we see evidence to the contrary. if (F.hasLocalLinkage()) { - if (AddressIsTaken(&F)) + if (F.hasAddressTaken()) { AddressTakenFunctions.insert(&F); + } else { Solver.AddArgumentTrackedFunction(&F); continue; diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp index 24bd0a2b7bd..6e113bccff9 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp @@ -326,7 +326,7 @@ private: /// partition. uint64_t BeginOffset, EndOffset; - /// \brief The start end end iterators of this partition. + /// \brief The start and end iterators of this partition. iterator SI, SJ; /// \brief A collection of split slice tails overlapping the partition. diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp index 52201d8f3e5..9fa43da99da 100644 --- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp @@ -48,6 +48,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeEarlyCSELegacyPassPass(Registry); initializeEarlyCSEMemSSALegacyPassPass(Registry); initializeGVNHoistLegacyPassPass(Registry); + initializeGVNSinkLegacyPassPass(Registry); initializeFlattenCFGPassPass(Registry); initializeInductiveRangeCheckEliminationPass(Registry); initializeIndVarSimplifyLegacyPassPass(Registry); diff --git a/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index b32a61a7e8f..0f170e26ce5 100644 --- a/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/contrib/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -123,11 +123,62 @@ static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, // exit block. DT.changeImmediateDominator(UnswitchedNode, OldPHNode); - // Blocks reachable from the unswitched block may need to change their IDom - // as well. + // For everything that moves up the dominator tree, we need to examine the + // dominator frontier to see if it additionally should move up the dominator + // tree. This lambda appends the dominator frontier for a node on the + // worklist. + // + // Note that we don't currently use the IDFCalculator here for two reasons: + // 1) It computes dominator tree levels for the entire function on each run + // of 'compute'. While this isn't terrible, given that we expect to update + // relatively small subtrees of the domtree, it isn't necessarily the right + // tradeoff. + // 2) The interface doesn't fit this usage well. It doesn't operate in + // append-only, and builds several sets that we don't need. + // + // FIXME: Neither of these issues are a big deal and could be addressed with + // some amount of refactoring of IDFCalculator. That would allow us to share + // the core logic here (which is solving the same core problem). SmallSetVector Worklist; - for (auto *SuccBB : successors(UnswitchedBB)) - Worklist.insert(SuccBB); + SmallVector DomNodes; + SmallPtrSet DomSet; + auto AppendDomFrontier = [&](DomTreeNode *Node) { + assert(DomNodes.empty() && "Must start with no dominator nodes."); + assert(DomSet.empty() && "Must start with an empty dominator set."); + + // First flatten this subtree into sequence of nodes by doing a pre-order + // walk. + DomNodes.push_back(Node); + // We intentionally re-evaluate the size as each node can add new children. + // Because this is a tree walk, this cannot add any duplicates. + for (int i = 0; i < (int)DomNodes.size(); ++i) + DomNodes.insert(DomNodes.end(), DomNodes[i]->begin(), DomNodes[i]->end()); + + // Now create a set of the basic blocks so we can quickly test for + // dominated successors. We could in theory use the DFS numbers of the + // dominator tree for this, but we want this to remain predictably fast + // even while we mutate the dominator tree in ways that would invalidate + // the DFS numbering. + for (DomTreeNode *InnerN : DomNodes) + DomSet.insert(InnerN->getBlock()); + + // Now re-walk the nodes, appending every successor of every node that isn't + // in the set. Note that we don't append the node itself, even though if it + // is a successor it does not strictly dominate itself and thus it would be + // part of the dominance frontier. The reason we don't append it is that + // the node passed in came *from* the worklist and so it has already been + // processed. + for (DomTreeNode *InnerN : DomNodes) + for (BasicBlock *SuccBB : successors(InnerN->getBlock())) + if (!DomSet.count(SuccBB)) + Worklist.insert(SuccBB); + + DomNodes.clear(); + DomSet.clear(); + }; + + // Append the initial dom frontier nodes. + AppendDomFrontier(UnswitchedNode); // Walk the worklist. We grow the list in the loop and so must recompute size. for (int i = 0; i < (int)Worklist.size(); ++i) { @@ -136,20 +187,17 @@ static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, DomTreeNode *Node = DT[BB]; assert(!DomChain.count(Node) && "Cannot be dominated by a block you can reach!"); - // If this block doesn't have an immediate dominator somewhere in the chain - // we hoisted over, then its position in the domtree hasn't changed. Either - // it is above the region hoisted and still valid, or it is below the - // hoisted block and so was trivially updated. This also applies to - // everything reachable from this block so we're completely done with the - // it. + + // If this block had an immediate dominator somewhere in the chain + // we hoisted over, then its position in the domtree needs to move as it is + // reachable from a node hoisted over this chain. if (!DomChain.count(Node->getIDom())) continue; - // We need to change the IDom for this node but also walk its successors - // which could have similar dominance position. DT.changeImmediateDominator(Node, OldPHNode); - for (auto *SuccBB : successors(BB)) - Worklist.insert(SuccBB); + + // Now add this node's dominator frontier to the worklist as well. + AppendDomFrontier(Node); } } diff --git a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp index bf2ab7c55be..1ec3d0d4963 100644 --- a/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -133,7 +133,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, auto *SP = cast(MD.second); NewMD = DISubprogram::getDistinct( NewFunc->getContext(), SP->getScope(), SP->getName(), - NewFunc->getName(), SP->getFile(), SP->getLine(), SP->getType(), + SP->getLinkageName(), SP->getFile(), SP->getLine(), SP->getType(), SP->isLocalToUnit(), SP->isDefinition(), SP->getScopeLine(), SP->getContainingType(), SP->getVirtuality(), SP->getVirtualIndex(), SP->getThisAdjustment(), SP->getFlags(), SP->isOptimized(), diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp index 73a0b2737e9..57468be9a2a 100644 --- a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -76,12 +76,14 @@ int FunctionComparator::cmpMem(StringRef L, StringRef R) const { int FunctionComparator::cmpAttrs(const AttributeList L, const AttributeList R) const { - if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots())) + if (int Res = cmpNumbers(L.getNumAttrSets(), R.getNumAttrSets())) return Res; - for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) { - AttributeList::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i), - RE = R.end(i); + for (unsigned i = L.index_begin(), e = L.index_end(); i != e; ++i) { + AttributeSet LAS = L.getAttributes(i); + AttributeSet RAS = R.getAttributes(i); + AttributeSet::iterator LI = LAS.begin(), LE = LAS.end(); + AttributeSet::iterator RI = RAS.begin(), RE = RAS.end(); for (; LI != LE && RI != RE; ++LI, ++RI) { Attribute LA = *LI; Attribute RA = *RI; diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index 9cb4762b683..0ca9f4c484e 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1397,11 +1397,12 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock, static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, const Optional &CalleeEntryCount, const Instruction *TheCall, - ProfileSummaryInfo *PSI) { + ProfileSummaryInfo *PSI, + BlockFrequencyInfo *CallerBFI) { if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1) return; Optional CallSiteCount = - PSI ? PSI->getProfileCount(TheCall, nullptr) : None; + PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; uint64_t CallCount = std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, CalleeEntryCount.getValue()); @@ -1637,7 +1638,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, CalledFunc->front()); updateCallProfile(CalledFunc, VMap, CalledFunc->getEntryCount(), TheCall, - IFI.PSI); + IFI.PSI, IFI.CallerBFI); // Update the profile count of callee. updateCalleeCount(IFI.CallerBFI, OrigBB, TheCall, CalledFunc, IFI.PSI); diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 1ca509472b5..ebd528bc8ec 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -1037,17 +1037,15 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DominatorTree *DT) { assert(V->getType()->isPointerTy() && "getOrEnforceKnownAlignment expects a pointer!"); - unsigned BitWidth = DL.getPointerTypeSizeInBits(V->getType()); - KnownBits Known(BitWidth); - computeKnownBits(V, Known, DL, 0, AC, CxtI, DT); + KnownBits Known = computeKnownBits(V, DL, 0, AC, CxtI, DT); unsigned TrailZ = Known.countMinTrailingZeros(); // Avoid trouble with ridiculously large TrailZ values, such as // those computed from a null pointer. TrailZ = std::min(TrailZ, unsigned(sizeof(unsigned) * CHAR_BIT - 1)); - unsigned Align = 1u << std::min(BitWidth - 1, TrailZ); + unsigned Align = 1u << std::min(Known.getBitWidth() - 1, TrailZ); // LLVM doesn't support alignments larger than this currently. Align = std::min(Align, +Value::MaximumAlignment); @@ -1796,6 +1794,23 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To, return Count; } +unsigned llvm::replaceNonLocalUsesWith(Instruction *From, Value *To) { + assert(From->getType() == To->getType()); + auto *BB = From->getParent(); + unsigned Count = 0; + + for (Value::use_iterator UI = From->use_begin(), UE = From->use_end(); + UI != UE;) { + Use &U = *UI++; + auto *I = cast(U.getUser()); + if (I->getParent() == BB) + continue; + U.set(To); + ++Count; + } + return Count; +} + unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlockEdge &Root) { @@ -2094,3 +2109,48 @@ void llvm::maybeMarkSanitizerLibraryCallNoBuiltin( !F->doesNotAccessMemory()) CI->addAttribute(AttributeList::FunctionIndex, Attribute::NoBuiltin); } + +bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) { + // We can't have a PHI with a metadata type. + if (I->getOperand(OpIdx)->getType()->isMetadataTy()) + return false; + + // Early exit. + if (!isa(I->getOperand(OpIdx))) + return true; + + switch (I->getOpcode()) { + default: + return true; + case Instruction::Call: + case Instruction::Invoke: + // Many arithmetic intrinsics have no issue taking a + // variable, however it's hard to distingish these from + // specials such as @llvm.frameaddress that require a constant. + if (isa(I)) + return false; + + // Constant bundle operands may need to retain their constant-ness for + // correctness. + if (ImmutableCallSite(I).isBundleOperand(OpIdx)) + return false; + return true; + case Instruction::ShuffleVector: + // Shufflevector masks are constant. + return OpIdx != 2; + case Instruction::ExtractValue: + case Instruction::InsertValue: + // All operands apart from the first are constant. + return OpIdx == 0; + case Instruction::Alloca: + return false; + case Instruction::GetElementPtr: + if (OpIdx == 0) + return true; + gep_type_iterator It = gep_type_begin(I); + for (auto E = std::next(It, OpIdx); It != E; ++It) + if (It.isStruct()) + return false; + return true; + } +} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 27f72fcd8bd..1b442a9a264 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1376,53 +1376,6 @@ HoistTerminator: return true; } -// Is it legal to place a variable in operand \c OpIdx of \c I? -// FIXME: This should be promoted to Instruction. -static bool canReplaceOperandWithVariable(const Instruction *I, - unsigned OpIdx) { - // We can't have a PHI with a metadata type. - if (I->getOperand(OpIdx)->getType()->isMetadataTy()) - return false; - - // Early exit. - if (!isa(I->getOperand(OpIdx))) - return true; - - switch (I->getOpcode()) { - default: - return true; - case Instruction::Call: - case Instruction::Invoke: - // FIXME: many arithmetic intrinsics have no issue taking a - // variable, however it's hard to distingish these from - // specials such as @llvm.frameaddress that require a constant. - if (isa(I)) - return false; - - // Constant bundle operands may need to retain their constant-ness for - // correctness. - if (ImmutableCallSite(I).isBundleOperand(OpIdx)) - return false; - - return true; - - case Instruction::ShuffleVector: - // Shufflevector masks are constant. - return OpIdx != 2; - case Instruction::ExtractValue: - case Instruction::InsertValue: - // All operands apart from the first are constant. - return OpIdx == 0; - case Instruction::Alloca: - return false; - case Instruction::GetElementPtr: - if (OpIdx == 0) - return true; - gep_type_iterator It = std::next(gep_type_begin(I), OpIdx - 1); - return It.isSequential(); - } -} - // All instructions in Insts belong to different blocks that all unconditionally // branch to a common successor. Analyze each instruction and return true if it // would be possible to sink them into their successor, creating one common @@ -4368,8 +4321,7 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC, const DataLayout &DL) { Value *Cond = SI->getCondition(); unsigned Bits = Cond->getType()->getIntegerBitWidth(); - KnownBits Known(Bits); - computeKnownBits(Cond, Known, DL, 0, AC, SI); + KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI); // We can also eliminate cases by determining that their values are outside of // the limited range of the condition based on how many significant (non-sign) diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 85c9464b556..49effda5d83 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -466,9 +466,7 @@ Value *LibCallSimplifier::optimizeStringLength(CallInst *CI, IRBuilder<> &B, } Value *Offset = GEP->getOperand(2); - unsigned BitWidth = Offset->getType()->getIntegerBitWidth(); - KnownBits Known(BitWidth); - computeKnownBits(Offset, Known, DL, 0, nullptr, CI, nullptr); + KnownBits Known = computeKnownBits(Offset, DL, 0, nullptr, CI, nullptr); Known.Zero.flipAllBits(); uint64_t ArrSize = cast(GEP->getSourceElementType())->getNumElements(); diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1dc554bede7..3b036a6ac43 100644 --- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2092,6 +2092,10 @@ private: /// The data is collected per VF. DenseMap> Scalars; + /// Holds the instructions (address computations) that are forced to be + /// scalarized. + DenseMap> ForcedScalars; + /// Returns the expected difference in cost from scalarizing the expression /// feeding a predicated instruction \p PredInst. The instructions to /// scalarize and their scalar costs are collected in \p ScalarCosts. A @@ -5086,12 +5090,18 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { } bool LoopVectorizationLegality::canVectorize() { + // Store the result and return it at the end instead of exiting early, in case + // allowExtraAnalysis is used to report multiple reasons for not vectorizing. + bool Result = true; // We must have a loop in canonical form. Loops with indirectbr in them cannot // be canonicalized. if (!TheLoop->getLoopPreheader()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // FIXME: The code is currently dead, since the loop gets sent to @@ -5101,21 +5111,30 @@ bool LoopVectorizationLegality::canVectorize() { if (!TheLoop->empty()) { ORE->emit(createMissedAnalysis("NotInnermostLoop") << "loop is not the innermost loop"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We must have a single backedge. if (TheLoop->getNumBackEdges() != 1) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We must have a single exiting block. if (!TheLoop->getExitingBlock()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We only handle bottom-tested loops, i.e. loop in which the condition is @@ -5124,7 +5143,10 @@ bool LoopVectorizationLegality::canVectorize() { if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) { ORE->emit(createMissedAnalysis("CFGNotUnderstood") << "loop control flow is not understood by vectorizer"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // We need to have a loop header. @@ -5135,28 +5157,28 @@ bool LoopVectorizationLegality::canVectorize() { unsigned NumBlocks = TheLoop->getNumBlocks(); if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); - return false; - } - - // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = PSE.getBackedgeTakenCount(); - if (ExitCount == PSE.getSE()->getCouldNotCompute()) { - ORE->emit(createMissedAnalysis("CantComputeNumberOfIterations") - << "could not determine number of loop iterations"); - DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // Check if we can vectorize the instructions and CFG in this loop. if (!canVectorizeInstrs()) { DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } // Go over each instruction and look at memory deps. if (!canVectorizeMemory()) { DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } DEBUG(dbgs() << "LV: We can vectorize this loop" @@ -5184,13 +5206,17 @@ bool LoopVectorizationLegality::canVectorize() { << "Too many SCEV assumptions need to be made and checked " << "at runtime"); DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n"); - return false; + if (ORE->allowExtraAnalysis()) + Result = false; + else + return false; } - // Okay! We can vectorize. At this point we don't have any other mem analysis + // Okay! We've done all the tests. If any have failed, return false. Otherwise + // we can vectorize, and at this point we don't have any other mem analysis // which may limit our maximum vectorization factor, so just return true with // no restrictions. - return true; + return Result; } static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { @@ -5554,6 +5580,13 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) { DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate << "\n"); } + // Insert the forced scalars. + // FIXME: Currently widenPHIInstruction() often creates a dead vector + // induction variable when the PHI user is scalarized. + if (ForcedScalars.count(VF)) + for (auto *I : ForcedScalars.find(VF)->second) + Worklist.insert(I); + // Expand the worklist by looking through any bitcasts and getelementptr // instructions we've already identified as scalar. This is similar to the // expansion step in collectLoopUniforms(); however, here we're only @@ -7129,11 +7162,18 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) { if (VF > 1 && isProfitableToScalarize(I, VF)) return VectorizationCostTy(InstsToScalarize[VF][I], false); + // Forced scalars do not have any scalarization overhead. + if (VF > 1 && ForcedScalars.count(VF) && + ForcedScalars.find(VF)->second.count(I)) + return VectorizationCostTy((getInstructionCost(I, 1).first * VF), false); + Type *VectorTy; unsigned C = getInstructionCost(I, VF, VectorTy); + // Note: Even if all instructions are scalarized, return true if any memory + // accesses appear in the loop to get benefits from address folding etc. bool TypeNotScalarized = - VF > 1 && !VectorTy->isVoidTy() && TTI.getNumberOfParts(VectorTy) < VF; + VF > 1 && VectorTy->isVectorTy() && TTI.getNumberOfParts(VectorTy) < VF; return VectorizationCostTy(C, TypeNotScalarized); } @@ -7208,6 +7248,62 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(unsigned VF) { setWideningDecision(&I, VF, Decision, Cost); } } + + // Make sure that any load of address and any other address computation + // remains scalar unless there is gather/scatter support. This avoids + // inevitable extracts into address registers, and also has the benefit of + // activating LSR more, since that pass can't optimize vectorized + // addresses. + if (TTI.prefersVectorizedAddressing()) + return; + + // Start with all scalar pointer uses. + SmallPtrSet AddrDefs; + for (BasicBlock *BB : TheLoop->blocks()) + for (Instruction &I : *BB) { + Instruction *PtrDef = + dyn_cast_or_null(getPointerOperand(&I)); + if (PtrDef && TheLoop->contains(PtrDef) && + getWideningDecision(&I, VF) != CM_GatherScatter) + AddrDefs.insert(PtrDef); + } + + // Add all instructions used to generate the addresses. + SmallVector Worklist; + for (auto *I : AddrDefs) + Worklist.push_back(I); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + for (auto &Op : I->operands()) + if (auto *InstOp = dyn_cast(Op)) + if ((InstOp->getParent() == I->getParent()) && !isa(InstOp) && + AddrDefs.insert(InstOp).second == true) + Worklist.push_back(InstOp); + } + + for (auto *I : AddrDefs) { + if (isa(I)) { + // Setting the desired widening decision should ideally be handled in + // by cost functions, but since this involves the task of finding out + // if the loaded register is involved in an address computation, it is + // instead changed here when we know this is the case. + if (getWideningDecision(I, VF) == CM_Widen) + // Scalarize a widened load of address. + setWideningDecision(I, VF, CM_Scalarize, + (VF * getMemoryInstructionCost(I, 1))); + else if (auto Group = Legal->getInterleavedAccessGroup(I)) { + // Scalarize an interleave group of address loads. + for (unsigned I = 0; I < Group->getFactor(); ++I) { + if (Instruction *Member = Group->getMember(I)) + setWideningDecision(Member, VF, CM_Scalarize, + (VF * getMemoryInstructionCost(Member, 1))); + } + } + } else + // Make sure I gets scalarized and a cost estimate without + // scalarization overhead. + ForcedScalars[VF].insert(I); + } } unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, @@ -7216,7 +7312,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, Type *RetTy = I->getType(); if (canTruncateToMinimalBitwidth(I, VF)) RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); - VectorTy = ToVectorTy(RetTy, VF); + VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF); auto SE = PSE.getSE(); // TODO: We need to estimate the cost of intrinsic calls. @@ -7349,9 +7445,10 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } else if (Legal->isUniform(Op2)) { Op2VK = TargetTransformInfo::OK_UniformValue; } - SmallVector Operands(I->operand_values()); - return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, - Op2VK, Op1VP, Op2VP, Operands); + SmallVector Operands(I->operand_values()); + unsigned N = isScalarAfterVectorization(I, VF) ? VF : 1; + return N * TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, + Op2VK, Op1VP, Op2VP, Operands); } case Instruction::Select: { SelectInst *SI = cast(I); @@ -7374,7 +7471,15 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I, } case Instruction::Store: case Instruction::Load: { - VectorTy = ToVectorTy(getMemInstValueType(I), VF); + unsigned Width = VF; + if (Width > 1) { + InstWidening Decision = getWideningDecision(I, Width); + assert(Decision != CM_Unknown && + "CM decision should be taken at this point"); + if (Decision == CM_Scalarize) + Width = 1; + } + VectorTy = ToVectorTy(getMemInstValueType(I), Width); return getMemoryInstructionCost(I, VF); } case Instruction::ZExt: diff --git a/contrib/llvm/tools/clang/include/clang-c/Index.h b/contrib/llvm/tools/clang/include/clang-c/Index.h index 462d9269f7a..aeda39308f9 100644 --- a/contrib/llvm/tools/clang/include/clang-c/Index.h +++ b/contrib/llvm/tools/clang/include/clang-c/Index.h @@ -32,7 +32,7 @@ * compatible, thus CINDEX_VERSION_MAJOR is expected to remain stable. */ #define CINDEX_VERSION_MAJOR 0 -#define CINDEX_VERSION_MINOR 39 +#define CINDEX_VERSION_MINOR 40 #define CINDEX_VERSION_ENCODE(major, minor) ( \ ((major) * 10000) \ @@ -3076,7 +3076,52 @@ enum CXTypeKind { * * E.g., struct S, or via a qualified name, e.g., N::M::type, or both. */ - CXType_Elaborated = 119 + CXType_Elaborated = 119, + + /* OpenCL PipeType. */ + CXType_Pipe = 120, + + /* OpenCL builtin types. */ + CXType_OCLImage1dRO = 121, + CXType_OCLImage1dArrayRO = 122, + CXType_OCLImage1dBufferRO = 123, + CXType_OCLImage2dRO = 124, + CXType_OCLImage2dArrayRO = 125, + CXType_OCLImage2dDepthRO = 126, + CXType_OCLImage2dArrayDepthRO = 127, + CXType_OCLImage2dMSAARO = 128, + CXType_OCLImage2dArrayMSAARO = 129, + CXType_OCLImage2dMSAADepthRO = 130, + CXType_OCLImage2dArrayMSAADepthRO = 131, + CXType_OCLImage3dRO = 132, + CXType_OCLImage1dWO = 133, + CXType_OCLImage1dArrayWO = 134, + CXType_OCLImage1dBufferWO = 135, + CXType_OCLImage2dWO = 136, + CXType_OCLImage2dArrayWO = 137, + CXType_OCLImage2dDepthWO = 138, + CXType_OCLImage2dArrayDepthWO = 139, + CXType_OCLImage2dMSAAWO = 140, + CXType_OCLImage2dArrayMSAAWO = 141, + CXType_OCLImage2dMSAADepthWO = 142, + CXType_OCLImage2dArrayMSAADepthWO = 143, + CXType_OCLImage3dWO = 144, + CXType_OCLImage1dRW = 145, + CXType_OCLImage1dArrayRW = 146, + CXType_OCLImage1dBufferRW = 147, + CXType_OCLImage2dRW = 148, + CXType_OCLImage2dArrayRW = 149, + CXType_OCLImage2dDepthRW = 150, + CXType_OCLImage2dArrayDepthRW = 151, + CXType_OCLImage2dMSAARW = 152, + CXType_OCLImage2dArrayMSAARW = 153, + CXType_OCLImage2dMSAADepthRW = 154, + CXType_OCLImage2dArrayMSAADepthRW = 155, + CXType_OCLImage3dRW = 156, + CXType_OCLSampler = 157, + CXType_OCLEvent = 158, + CXType_OCLQueue = 159, + CXType_OCLReserveID = 160 }; /** diff --git a/contrib/llvm/tools/clang/include/clang/AST/Expr.h b/contrib/llvm/tools/clang/include/clang/AST/Expr.h index 986145e62a5..0cdbd2a97ee 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/Expr.h +++ b/contrib/llvm/tools/clang/include/clang/AST/Expr.h @@ -4284,6 +4284,9 @@ public: } Designator *getDesignator(unsigned Idx) { return &designators()[Idx]; } + const Designator *getDesignator(unsigned Idx) const { + return &designators()[Idx]; + } void setDesignators(const ASTContext &C, const Designator *Desigs, unsigned NumDesigs); diff --git a/contrib/llvm/tools/clang/include/clang/AST/StmtCXX.h b/contrib/llvm/tools/clang/include/clang/AST/StmtCXX.h index 56bfce987f5..d6c9654fefa 100644 --- a/contrib/llvm/tools/clang/include/clang/AST/StmtCXX.h +++ b/contrib/llvm/tools/clang/include/clang/AST/StmtCXX.h @@ -308,7 +308,9 @@ class CoroutineBodyStmt final OnFallthrough, ///< Handler for control flow falling off the body. Allocate, ///< Coroutine frame memory allocation. Deallocate, ///< Coroutine frame memory deallocation. - ReturnValue, ///< Return value for thunk function. + ReturnValue, ///< Return value for thunk function: p.get_return_object(). + ResultDecl, ///< Declaration holding the result of get_return_object. + ReturnStmt, ///< Return statement for the thunk function. ReturnStmtOnAllocFailure, ///< Return statement if allocation failed. FirstParamMove ///< First offset for move construction of parameter copies. }; @@ -332,7 +334,9 @@ public: Stmt *OnFallthrough = nullptr; Expr *Allocate = nullptr; Expr *Deallocate = nullptr; - Stmt *ReturnValue = nullptr; + Expr *ReturnValue = nullptr; + Stmt *ResultDecl = nullptr; + Stmt *ReturnStmt = nullptr; Stmt *ReturnStmtOnAllocFailure = nullptr; ArrayRef ParamMoves; }; @@ -381,10 +385,11 @@ public: Expr *getDeallocate() const { return cast_or_null(getStoredStmts()[SubStmt::Deallocate]); } - Expr *getReturnValueInit() const { - return cast_or_null(getStoredStmts()[SubStmt::ReturnValue]); + return cast(getStoredStmts()[SubStmt::ReturnValue]); } + Stmt *getResultDecl() const { return getStoredStmts()[SubStmt::ResultDecl]; } + Stmt *getReturnStmt() const { return getStoredStmts()[SubStmt::ReturnStmt]; } Stmt *getReturnStmtOnAllocFailure() const { return getStoredStmts()[SubStmt::ReturnStmtOnAllocFailure]; } diff --git a/contrib/llvm/tools/clang/include/clang/Basic/Attr.td b/contrib/llvm/tools/clang/include/clang/Basic/Attr.td index a885ede8df1..9da2cc376d5 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/Attr.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/Attr.td @@ -149,6 +149,9 @@ class ExprArgument : Argument; class FunctionArgument : Argument; +class NamedArgument : Argument; class TypeArgument : Argument; class UnsignedArgument : Argument; class VariadicUnsignedArgument : Argument; @@ -1819,14 +1822,14 @@ def Unavailable : InheritableAttr { def DiagnoseIf : InheritableAttr { let Spellings = [GNU<"diagnose_if">]; - let Subjects = SubjectList<[Function]>; + let Subjects = SubjectList<[Function, ObjCMethod, ObjCProperty]>; let Args = [ExprArgument<"Cond">, StringArgument<"Message">, EnumArgument<"DiagnosticType", "DiagnosticType", ["error", "warning"], ["DT_Error", "DT_Warning"]>, BoolArgument<"ArgDependent", 0, /*fake*/ 1>, - FunctionArgument<"Parent", 0, /*fake*/ 1>]; + NamedArgument<"Parent", 0, /*fake*/ 1>]; let DuplicatesAllowedWhileMerging = 1; let LateParsed = 1; let AdditionalMembers = [{ diff --git a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsAMDGPU.def b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsAMDGPU.def index a8ab657c379..6542acafe48 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -36,6 +36,7 @@ BUILTIN(__builtin_amdgcn_workitem_id_z, "Ui", "nc") // Instruction builtins. //===----------------------------------------------------------------------===// BUILTIN(__builtin_amdgcn_s_getreg, "UiIi", "n") +BUILTIN(__builtin_amdgcn_s_getpc, "LUi", "n") BUILTIN(__builtin_amdgcn_s_waitcnt, "vIi", "n") BUILTIN(__builtin_amdgcn_s_sendmsg, "vIiUi", "n") BUILTIN(__builtin_amdgcn_s_sendmsghalt, "vIiUi", "n") diff --git a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsPPC.def b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsPPC.def index f7cddc03131..119490314b2 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsPPC.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsPPC.def @@ -420,6 +420,9 @@ BUILTIN(__builtin_vsx_xvtstdcsp, "V4UiV4fIi", "") BUILTIN(__builtin_vsx_insertword, "V16UcV4UiV16UcIi", "") BUILTIN(__builtin_vsx_extractuword, "V2ULLiV16UcIi", "") +BUILTIN(__builtin_vsx_xxpermdi, "v.", "t") +BUILTIN(__builtin_vsx_xxsldwi, "v.", "t") + // HTM builtins BUILTIN(__builtin_tbegin, "UiUIi", "") BUILTIN(__builtin_tend, "UiUIi", "") diff --git a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def index 68b868ce8e6..a98c8f0a53d 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/BuiltinsX86.def @@ -1107,6 +1107,9 @@ TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "", "avx5 TARGET_BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "", "avx512cd") TARGET_BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "", "avx512cd") +TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq") +TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq") + TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw") diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.def b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.def index 0ab6724ed9e..2467b24fd90 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.def +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.def @@ -87,6 +87,8 @@ VALUE_DIAGOPT(TemplateBacktraceLimit, 32, DefaultTemplateBacktraceLimit) VALUE_DIAGOPT(ConstexprBacktraceLimit, 32, DefaultConstexprBacktraceLimit) /// Limit number of times to perform spell checking. VALUE_DIAGOPT(SpellCheckingLimit, 32, DefaultSpellCheckingLimit) +/// Limit number of lines shown in a snippet. +VALUE_DIAGOPT(SnippetLineLimit, 32, DefaultSnippetLineLimit) VALUE_DIAGOPT(TabStop, 32, DefaultTabStop) /// The distance between tab stops. /// Column limit for formatting message diagnostics, or 0 if unused. diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.h b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.h index c9b0c5def99..c195003de5c 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticOptions.h @@ -63,11 +63,15 @@ public: enum TextDiagnosticFormat { Clang, MSVC, Vi }; // Default values. - enum { DefaultTabStop = 8, MaxTabStop = 100, + enum { + DefaultTabStop = 8, + MaxTabStop = 100, DefaultMacroBacktraceLimit = 6, DefaultTemplateBacktraceLimit = 10, DefaultConstexprBacktraceLimit = 10, - DefaultSpellCheckingLimit = 50 }; + DefaultSpellCheckingLimit = 50, + DefaultSnippetLineLimit = 1, + }; // Define simple diagnostic options (with no accessors). #define DIAGOPT(Name, Bits, Default) unsigned Name : Bits; diff --git a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 463d057d8fd..4934bcfa389 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/contrib/llvm/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -537,10 +537,10 @@ def err_maybe_falloff_nonvoid_block : Error< def err_falloff_nonvoid_block : Error< "control reaches end of non-void block">; def warn_maybe_falloff_nonvoid_coroutine : Warning< - "control may reach end of non-void coroutine">, + "control may reach end of coroutine; which is undefined behavior because the promise type %0 does not declare 'return_void()'">, InGroup; def warn_falloff_nonvoid_coroutine : Warning< - "control reaches end of non-void coroutine">, + "control reaches end of coroutine; which is undefined behavior because the promise type %0 does not declare 'return_void()'">, InGroup; def warn_suggest_noreturn_function : Warning< "%select{function|method}0 %1 could be declared with attribute 'noreturn'">, @@ -1554,11 +1554,9 @@ def note_ivar_decl : Note<"instance variable is declared here">; def note_bitfield_decl : Note<"bit-field is declared here">; def note_implicit_param_decl : Note<"%0 is an implicit parameter">; def note_member_synthesized_at : Note< - "implicit %select{default constructor|copy constructor|move constructor|copy " - "assignment operator|move assignment operator|destructor}0 for %1 first " - "required here">; -def note_inhctor_synthesized_at : Note< - "inherited constructor for %0 first required here">; + "in implicit %select{default constructor|copy constructor|move constructor|" + "copy assignment operator|move assignment operator|destructor}0 for %1 " + "first required here">; def err_missing_default_ctor : Error< "%select{constructor for %1 must explicitly initialize the|" "implicit default constructor for %1 must explicitly initialize the|" @@ -2769,6 +2767,7 @@ def warn_attribute_wrong_decl_type : Warning< "|types and namespaces" "|Objective-C interfaces" "|methods and properties" + "|functions, methods, and properties" "|struct or union" "|struct, union or class" "|types" @@ -2883,6 +2882,10 @@ def warn_partial_message : Warning<"%0 is partial: %1">, def warn_partial_fwdclass_message : Warning< "%0 may be partial because the receiver type is unknown">, InGroup, DefaultIgnore; +def warn_at_available_unchecked_use : Warning< + "%select{@available|__builtin_available}0 does not guard availability here; " + "use if (%select{@available|__builtin_available}0) instead">, + InGroup>; // Thread Safety Attributes def warn_invalid_capability_name : Warning< @@ -6298,6 +6301,8 @@ def warn_ambiguous_suitable_delete_function_found : Warning< InGroup>; def note_member_declared_here : Note< "member %0 declared here">; +def note_member_first_declared_here : Note< + "member %0 first declared here">; def err_decrement_bool : Error<"cannot decrement expression of type bool">; def warn_increment_bool : Warning< "incrementing expression of type bool is deprecated and " @@ -8009,10 +8014,13 @@ def err_block_on_nonlocal : Error< def err_block_on_vm : Error< "__block attribute not allowed on declaration with a variably modified type">; -def err_shufflevector_non_vector : Error< - "first two arguments to __builtin_shufflevector must be vectors">; -def err_shufflevector_incompatible_vector : Error< - "first two arguments to __builtin_shufflevector must have the same type">; +def err_vec_builtin_non_vector : Error< + "first two arguments to %0 must be vectors">; +def err_vec_builtin_incompatible_vector : Error< + "first two arguments to %0 must have the same type">; +def err_vsx_builtin_nonconstant_argument : Error< + "argument %0 to %1 must be a 2-bit unsigned literal (i.e. 0, 1, 2 or 3)">; + def err_shufflevector_nonconstant_argument : Error< "index for __builtin_shufflevector must be a constant integer">; def err_shufflevector_argument_too_large : Error< @@ -8899,8 +8907,6 @@ def note_equivalent_internal_linkage_decl : Note< def note_redefinition_modules_same_file : Note< "'%0' included multiple times, additional include site in header from module '%1'">; -def note_redefinition_modules_same_file_modulemap : Note< - "consider adding '%0' as part of '%1' definition">; def note_redefinition_include_same_file : Note< "'%0' included multiple times, additional include site here">; } @@ -8944,8 +8950,10 @@ def err_coroutine_promise_type_incomplete : Error< def err_coroutine_type_missing_specialization : Error< "this function cannot be a coroutine: missing definition of " "specialization %q0">; -def err_coroutine_promise_return_ill_formed : Error< - "%0 declares both 'return_value' and 'return_void'">; +def err_coroutine_promise_incompatible_return_functions : Error< + "the coroutine promise type %0 declares both 'return_value' and 'return_void'">; +def err_coroutine_promise_requires_return_function : Error< + "the coroutine promise type %0 must declare either 'return_value' or 'return_void'">; def note_coroutine_promise_implicit_await_transform_required_here : Note< "call to 'await_transform' implicitly required by 'co_await' here">; def note_coroutine_promise_suspend_implicitly_required : Note< @@ -8958,11 +8966,19 @@ def warn_coroutine_promise_unhandled_exception_required_with_exceptions : Warnin InGroup; def err_coroutine_promise_get_return_object_on_allocation_failure : Error< "%0: 'get_return_object_on_allocation_failure()' must be a static member function">; +def err_seh_in_a_coroutine_with_cxx_exceptions : Error< + "cannot use SEH '__try' in a coroutine when C++ exceptions are enabled">; def err_coroutine_promise_new_requires_nothrow : Error< "%0 is required to have a non-throwing noexcept specification when the promise " "type declares 'get_return_object_on_allocation_failure()'">; def note_coroutine_promise_call_implicitly_required : Note< "call to %0 implicitly required by coroutine function here">; +def err_await_suspend_invalid_return_type : Error< + "the return type of 'await_suspend' is required to be 'void' or 'bool' (have %0)" +>; +def note_await_ready_no_bool_conversion : Note< + "the return type of 'await_ready' is required to be contextually convertible to 'bool'" +>; } let CategoryName = "Documentation Issue" in { diff --git a/contrib/llvm/tools/clang/include/clang/Basic/SourceManager.h b/contrib/llvm/tools/clang/include/clang/Basic/SourceManager.h index 6960ea690b9..eda80293400 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/SourceManager.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/SourceManager.h @@ -1399,10 +1399,9 @@ public: /// specified by Loc. /// /// If FilenameID is -1, it is considered to be unspecified. - void AddLineNote(SourceLocation Loc, unsigned LineNo, int FilenameID); void AddLineNote(SourceLocation Loc, unsigned LineNo, int FilenameID, bool IsFileEntry, bool IsFileExit, - bool IsSystemHeader, bool IsExternCHeader); + SrcMgr::CharacteristicKind FileKind); /// \brief Determine if the source manager has a line table. bool hasLineTable() const { return LineTable != nullptr; } diff --git a/contrib/llvm/tools/clang/include/clang/Basic/SourceManagerInternals.h b/contrib/llvm/tools/clang/include/clang/Basic/SourceManagerInternals.h index e65c97b0031..9403dea8889 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/SourceManagerInternals.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/SourceManagerInternals.h @@ -101,8 +101,6 @@ public: } unsigned getNumFilenames() const { return FilenamesByID.size(); } - void AddLineNote(FileID FID, unsigned Offset, - unsigned LineNo, int FilenameID); void AddLineNote(FileID FID, unsigned Offset, unsigned LineNo, int FilenameID, unsigned EntryExit, SrcMgr::CharacteristicKind FileKind); diff --git a/contrib/llvm/tools/clang/include/clang/Basic/TemplateKinds.h b/contrib/llvm/tools/clang/include/clang/Basic/TemplateKinds.h index aed287b4622..ac99ad185f3 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/TemplateKinds.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/TemplateKinds.h @@ -26,13 +26,21 @@ enum TemplateNameKind { TNK_Function_template, /// The name refers to a template whose specialization produces a /// type. The template itself could be a class template, template - /// template parameter, or C++0x template alias. + /// template parameter, or template alias. TNK_Type_template, /// The name refers to a variable template whose specialization produces a /// variable. TNK_Var_template, - /// The name refers to a dependent template name. Whether the - /// template name is assumed to refer to a type template or a + /// The name refers to a dependent template name: + /// \code + /// template struct apply2 { + /// typedef typename MetaFun::template apply::type type; + /// }; + /// \endcode + /// + /// Here, "apply" is a dependent template name within the typename + /// specifier in the typedef. "apply" is a nested template, and + /// whether the template name is assumed to refer to a type template or a /// function template depends on the context in which the template /// name occurs. TNK_Dependent_template_name diff --git a/contrib/llvm/tools/clang/include/clang/Basic/XRayLists.h b/contrib/llvm/tools/clang/include/clang/Basic/XRayLists.h index fe538289c3a..8cfea70e280 100644 --- a/contrib/llvm/tools/clang/include/clang/Basic/XRayLists.h +++ b/contrib/llvm/tools/clang/include/clang/Basic/XRayLists.h @@ -37,6 +37,7 @@ public: NONE, ALWAYS, NEVER, + ALWAYS_ARG1, }; ImbueAttribute shouldImbueFunction(StringRef FunctionName) const; diff --git a/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td b/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td index bd2062d967b..b55289dc35a 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td +++ b/contrib/llvm/tools/clang/include/clang/Driver/CC1Options.td @@ -172,6 +172,8 @@ def disable_llvm_optzns : Flag<["-"], "disable-llvm-optzns">, def disable_lifetimemarkers : Flag<["-"], "disable-lifetime-markers">, HelpText<"Disable lifetime-markers emission even when optimizations are " "enabled">; +def disable_O0_optnone : Flag<["-"], "disable-O0-optnone">, + HelpText<"Disable adding the optnone attribute to functions at O0">; def disable_red_zone : Flag<["-"], "disable-red-zone">, HelpText<"Do not emit code that uses the red zone.">; def dwarf_column_info : Flag<["-"], "dwarf-column-info">, @@ -359,6 +361,9 @@ def fconstexpr_backtrace_limit : Separate<["-"], "fconstexpr-backtrace-limit">, HelpText<"Set the maximum number of entries to print in a constexpr evaluation backtrace (0 = no limit).">; def fspell_checking_limit : Separate<["-"], "fspell-checking-limit">, MetaVarName<"">, HelpText<"Set the maximum number of times to perform spell checking on unrecognized identifiers (0 = no limit).">; +def fcaret_diagnostics_max_lines : + Separate<["-"], "fcaret-diagnostics-max-lines">, MetaVarName<"">, + HelpText<"Set the maximum number of source lines to show in a caret diagnostic">; def fmessage_length : Separate<["-"], "fmessage-length">, MetaVarName<"">, HelpText<"Format message diagnostics so that they fit within N columns or fewer, when possible.">; def verify : Flag<["-"], "verify">, diff --git a/contrib/llvm/tools/clang/include/clang/Driver/Options.td b/contrib/llvm/tools/clang/include/clang/Driver/Options.td index d812bd8ec03..2de4a2d1b41 100644 --- a/contrib/llvm/tools/clang/include/clang/Driver/Options.td +++ b/contrib/llvm/tools/clang/include/clang/Driver/Options.td @@ -469,6 +469,7 @@ def arch__errors__fatal : Flag<["-"], "arch_errors_fatal">; def arch : Separate<["-"], "arch">, Flags<[DriverOption]>; def arch__only : Separate<["-"], "arch_only">; def a : Joined<["-"], "a">; +def autocomplete : Joined<["--"], "autocomplete=">; def bind__at__load : Flag<["-"], "bind_at_load">; def bundle__loader : Separate<["-"], "bundle_loader">; def bundle : Flag<["-"], "bundle">; @@ -1740,6 +1741,7 @@ def mno_avx : Flag<["-"], "mno-avx">, Group; def mno_avx2 : Flag<["-"], "mno-avx2">, Group; def mno_avx512f : Flag<["-"], "mno-avx512f">, Group; def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group; +def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group; def mno_avx512er : Flag<["-"], "mno-avx512er">, Group; def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group; def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group; @@ -1940,6 +1942,7 @@ def mavx : Flag<["-"], "mavx">, Group; def mavx2 : Flag<["-"], "mavx2">, Group; def mavx512f : Flag<["-"], "mavx512f">, Group; def mavx512cd : Flag<["-"], "mavx512cd">, Group; +def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group; def mavx512er : Flag<["-"], "mavx512er">, Group; def mavx512pf : Flag<["-"], "mavx512pf">, Group; def mavx512dq : Flag<["-"], "mavx512dq">, Group; diff --git a/contrib/llvm/tools/clang/include/clang/Format/Format.h b/contrib/llvm/tools/clang/include/clang/Format/Format.h index 2405f8319c6..7ec3e22ca4d 100644 --- a/contrib/llvm/tools/clang/include/clang/Format/Format.h +++ b/contrib/llvm/tools/clang/include/clang/Format/Format.h @@ -710,16 +710,35 @@ struct FormatStyle { /// \endcode bool BreakBeforeTernaryOperators; - /// \brief Always break constructor initializers before commas and align - /// the commas with the colon. - /// \code - /// true: false: - /// SomeClass::Constructor() vs. SomeClass::Constructor() : a(a), - /// : a(a) b(b), - /// , b(b) c(c) {} - /// , c(c) {} - /// \endcode - bool BreakConstructorInitializersBeforeComma; + /// \brief Different ways to break initializers. + enum BreakConstructorInitializersStyle + { + /// Break constructor initializers before the colon and after the commas. + /// \code + /// Constructor() + /// : initializer1(), + /// initializer2() + /// \endcode + BCIS_BeforeColon, + /// Break constructor initializers before the colon and commas, and align + /// the commas with the colon. + /// \code + /// Constructor() + /// : initializer1() + /// , initializer2() + /// \endcode + BCIS_BeforeComma, + /// Break constructor initializers after the colon and commas. + /// \code + /// Constructor() : + /// initializer1(), + /// initializer2() + /// \endcode + BCIS_AfterColon + }; + + /// \brief The constructor initializers style to use.. + BreakConstructorInitializersStyle BreakConstructorInitializers; /// \brief Break after each annotation on a field in Java files. /// \code{.java} @@ -1390,8 +1409,7 @@ struct FormatStyle { BreakBeforeBinaryOperators == R.BreakBeforeBinaryOperators && BreakBeforeBraces == R.BreakBeforeBraces && BreakBeforeTernaryOperators == R.BreakBeforeTernaryOperators && - BreakConstructorInitializersBeforeComma == - R.BreakConstructorInitializersBeforeComma && + BreakConstructorInitializers == R.BreakConstructorInitializers && BreakAfterJavaFieldAnnotations == R.BreakAfterJavaFieldAnnotations && BreakStringLiterals == R.BreakStringLiterals && ColumnLimit == R.ColumnLimit && CommentPragmas == R.CommentPragmas && diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/ASTUnit.h b/contrib/llvm/tools/clang/include/clang/Frontend/ASTUnit.h index 46395cf6e86..7f70609efc9 100644 --- a/contrib/llvm/tools/clang/include/clang/Frontend/ASTUnit.h +++ b/contrib/llvm/tools/clang/include/clang/Frontend/ASTUnit.h @@ -59,6 +59,10 @@ class TargetInfo; class FrontendAction; class ASTDeserializationListener; +namespace vfs { +class FileSystem; +} + /// \brief Utility class for loading a ASTContext from an AST file. /// class ASTUnit : public ModuleLoader { @@ -420,7 +424,8 @@ private: explicit ASTUnit(bool MainFileIsAST); bool Parse(std::shared_ptr PCHContainerOps, - std::unique_ptr OverrideMainBuffer); + std::unique_ptr OverrideMainBuffer, + IntrusiveRefCntPtr VFS); struct ComputedPreamble { llvm::MemoryBuffer *Buffer; @@ -434,11 +439,13 @@ private: PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {} }; ComputedPreamble ComputePreamble(CompilerInvocation &Invocation, - unsigned MaxLines); + unsigned MaxLines, + IntrusiveRefCntPtr VFS); std::unique_ptr getMainBufferWithPrecompiledPreamble( std::shared_ptr PCHContainerOps, - const CompilerInvocation &PreambleInvocationIn, bool AllowRebuild = true, + const CompilerInvocation &PreambleInvocationIn, + IntrusiveRefCntPtr VFS, bool AllowRebuild = true, unsigned MaxLines = 0); void RealizeTopLevelDeclsFromPreamble(); @@ -731,11 +738,17 @@ private: /// of this translation unit should be precompiled, to improve the performance /// of reparsing. Set to zero to disable preambles. /// + /// \param VFS - A vfs::FileSystem to be used for all file accesses. Note that + /// preamble is saved to a temporary directory on a RealFileSystem, so in order + /// for it to be loaded correctly, VFS should have access to it(i.e., be an + /// overlay over RealFileSystem). + /// /// \returns \c true if a catastrophic failure occurred (which means that the /// \c ASTUnit itself is invalid), or \c false otherwise. bool LoadFromCompilerInvocation( std::shared_ptr PCHContainerOps, - unsigned PrecompilePreambleAfterNParses); + unsigned PrecompilePreambleAfterNParses, + IntrusiveRefCntPtr VFS); public: @@ -826,6 +839,11 @@ public: /// (e.g. because the PCH could not be loaded), this accepts the ASTUnit /// mainly to allow the caller to see the diagnostics. /// + /// \param VFS - A vfs::FileSystem to be used for all file accesses. Note that + /// preamble is saved to a temporary directory on a RealFileSystem, so in order + /// for it to be loaded correctly, VFS should have access to it(i.e., be an + /// overlay over RealFileSystem). RealFileSystem will be used if \p VFS is nullptr. + /// // FIXME: Move OnlyLocalDecls, UseBumpAllocator to setters on the ASTUnit, we // shouldn't need to specify them at construction time. static ASTUnit *LoadFromCommandLine( @@ -842,15 +860,23 @@ public: bool AllowPCHWithCompilerErrors = false, bool SkipFunctionBodies = false, bool UserFilesAreVolatile = false, bool ForSerialization = false, llvm::Optional ModuleFormat = llvm::None, - std::unique_ptr *ErrAST = nullptr); + std::unique_ptr *ErrAST = nullptr, + IntrusiveRefCntPtr VFS = nullptr); /// \brief Reparse the source files using the same command-line options that /// were originally used to produce this translation unit. /// + /// \param VFS - A vfs::FileSystem to be used for all file accesses. Note that + /// preamble is saved to a temporary directory on a RealFileSystem, so in order + /// for it to be loaded correctly, VFS should give an access to this(i.e. be an + /// overlay over RealFileSystem). FileMgr->getVirtualFileSystem() will be used if + /// \p VFS is nullptr. + /// /// \returns True if a failure occurred that causes the ASTUnit not to /// contain any translation-unit information, false otherwise. bool Reparse(std::shared_ptr PCHContainerOps, - ArrayRef RemappedFiles = None); + ArrayRef RemappedFiles = None, + IntrusiveRefCntPtr VFS = nullptr); /// \brief Perform code completion at the given file, line, and /// column within this translation unit. diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def index 251441d38ff..9a13854671d 100644 --- a/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def +++ b/contrib/llvm/tools/clang/include/clang/Frontend/CodeGenOptions.def @@ -53,6 +53,7 @@ CODEGENOPT(DisableLLVMPasses , 1, 0) ///< Don't run any LLVM IR passes to get ///< the pristine IR generated by the ///< frontend. CODEGENOPT(DisableLifetimeMarkers, 1, 0) ///< Don't emit any lifetime markers +CODEGENOPT(DisableO0ImplyOptNone , 1, 0) ///< Don't annonate function with optnone at O0 CODEGENOPT(ExperimentalNewPassManager, 1, 0) ///< Enables the new, experimental ///< pass manager. CODEGENOPT(DisableRedZone , 1, 0) ///< Set when -mno-red-zone is enabled. diff --git a/contrib/llvm/tools/clang/include/clang/Frontend/CompilerInvocation.h b/contrib/llvm/tools/clang/include/clang/Frontend/CompilerInvocation.h index cef7f73ecaa..8c4c932190b 100644 --- a/contrib/llvm/tools/clang/include/clang/Frontend/CompilerInvocation.h +++ b/contrib/llvm/tools/clang/include/clang/Frontend/CompilerInvocation.h @@ -225,6 +225,11 @@ IntrusiveRefCntPtr createVFSFromCompilerInvocation(const CompilerInvocation &CI, DiagnosticsEngine &Diags); +IntrusiveRefCntPtr +createVFSFromCompilerInvocation(const CompilerInvocation &CI, + DiagnosticsEngine &Diags, + IntrusiveRefCntPtr BaseFS); + } // end namespace clang #endif diff --git a/contrib/llvm/tools/clang/include/clang/Lex/ModuleMap.h b/contrib/llvm/tools/clang/include/clang/Lex/ModuleMap.h index b6eed23f4d3..c05faa4a820 100644 --- a/contrib/llvm/tools/clang/include/clang/Lex/ModuleMap.h +++ b/contrib/llvm/tools/clang/include/clang/Lex/ModuleMap.h @@ -257,6 +257,23 @@ private: /// resolved. Module *resolveModuleId(const ModuleId &Id, Module *Mod, bool Complain) const; + /// Resolve the given header directive to an actual header file. + /// + /// \param M The module in which we're resolving the header directive. + /// \param Header The header directive to resolve. + /// \param RelativePathName Filled in with the relative path name from the + /// module to the resolved header. + /// \return The resolved file, if any. + const FileEntry *resolveHeader(Module *M, + Module::UnresolvedHeaderDirective Header, + SmallVectorImpl &RelativePathName); + + /// Attempt to resolve the specified header directive as naming a builtin + /// header. + const FileEntry * + resolveAsBuiltinHeader(Module *M, Module::UnresolvedHeaderDirective Header, + SmallVectorImpl &BuiltinPathName); + /// \brief Looks up the modules that \p File corresponds to. /// /// If \p File represents a builtin header within Clang's builtin include diff --git a/contrib/llvm/tools/clang/include/clang/Sema/AttributeList.h b/contrib/llvm/tools/clang/include/clang/Sema/AttributeList.h index f3b042c9ce7..6bdd9d5fcdb 100644 --- a/contrib/llvm/tools/clang/include/clang/Sema/AttributeList.h +++ b/contrib/llvm/tools/clang/include/clang/Sema/AttributeList.h @@ -915,6 +915,7 @@ enum AttributeDeclKind { ExpectedTypeOrNamespace, ExpectedObjectiveCInterface, ExpectedMethodOrProperty, + ExpectedFunctionOrMethodOrProperty, ExpectedStructOrUnion, ExpectedStructOrUnionOrClass, ExpectedType, diff --git a/contrib/llvm/tools/clang/include/clang/Sema/ParsedTemplate.h b/contrib/llvm/tools/clang/include/clang/Sema/ParsedTemplate.h index 03de9ff6ae4..01a4ab3f37a 100644 --- a/contrib/llvm/tools/clang/include/clang/Sema/ParsedTemplate.h +++ b/contrib/llvm/tools/clang/include/clang/Sema/ParsedTemplate.h @@ -145,12 +145,15 @@ namespace clang { /// expressions, or template names, and the source locations for important /// tokens. All of the information about template arguments is allocated /// directly after this structure. - struct TemplateIdAnnotation { + struct TemplateIdAnnotation final + : private llvm::TrailingObjects { + friend TrailingObjects; /// \brief The nested-name-specifier that precedes the template name. CXXScopeSpec SS; - /// TemplateKWLoc - The location of the template keyword within the - /// source. + /// TemplateKWLoc - The location of the template keyword. + /// For e.g. typename T::template Y SourceLocation TemplateKWLoc; /// TemplateNameLoc - The location of the template name within the @@ -183,34 +186,56 @@ namespace clang { /// \brief Retrieves a pointer to the template arguments ParsedTemplateArgument *getTemplateArgs() { - return reinterpret_cast(this + 1); + return getTrailingObjects(); } /// \brief Creates a new TemplateIdAnnotation with NumArgs arguments and /// appends it to List. static TemplateIdAnnotation * - Allocate(unsigned NumArgs, SmallVectorImpl &List) { - TemplateIdAnnotation *TemplateId - = (TemplateIdAnnotation *)std::malloc(sizeof(TemplateIdAnnotation) + - sizeof(ParsedTemplateArgument) * NumArgs); - TemplateId->NumArgs = NumArgs; - - // Default-construct nested-name-specifier. - new (&TemplateId->SS) CXXScopeSpec(); - - // Default-construct parsed template arguments. - ParsedTemplateArgument *TemplateArgs = TemplateId->getTemplateArgs(); - for (unsigned I = 0; I != NumArgs; ++I) - new (TemplateArgs + I) ParsedTemplateArgument(); - - List.push_back(TemplateId); + Create(CXXScopeSpec SS, SourceLocation TemplateKWLoc, + SourceLocation TemplateNameLoc, IdentifierInfo *Name, + OverloadedOperatorKind OperatorKind, + ParsedTemplateTy OpaqueTemplateName, TemplateNameKind TemplateKind, + SourceLocation LAngleLoc, SourceLocation RAngleLoc, + ArrayRef TemplateArgs, + SmallVectorImpl &CleanupList) { + + TemplateIdAnnotation *TemplateId = new (std::malloc( + totalSizeToAlloc(TemplateArgs.size()))) + TemplateIdAnnotation(SS, TemplateKWLoc, TemplateNameLoc, Name, + OperatorKind, OpaqueTemplateName, TemplateKind, + LAngleLoc, RAngleLoc, TemplateArgs); + CleanupList.push_back(TemplateId); return TemplateId; } - - void Destroy() { - SS.~CXXScopeSpec(); + + void Destroy() { + std::for_each( + getTemplateArgs(), getTemplateArgs() + NumArgs, + [](ParsedTemplateArgument &A) { A.~ParsedTemplateArgument(); }); + this->~TemplateIdAnnotation(); free(this); } + private: + TemplateIdAnnotation(const TemplateIdAnnotation &) = delete; + + TemplateIdAnnotation(CXXScopeSpec SS, SourceLocation TemplateKWLoc, + SourceLocation TemplateNameLoc, IdentifierInfo *Name, + OverloadedOperatorKind OperatorKind, + ParsedTemplateTy OpaqueTemplateName, + TemplateNameKind TemplateKind, + SourceLocation LAngleLoc, SourceLocation RAngleLoc, + ArrayRef TemplateArgs) noexcept + : SS(SS), TemplateKWLoc(TemplateKWLoc), + TemplateNameLoc(TemplateNameLoc), Name(Name), Operator(OperatorKind), + Template(OpaqueTemplateName), Kind(TemplateKind), + LAngleLoc(LAngleLoc), RAngleLoc(RAngleLoc), + NumArgs(TemplateArgs.size()) { + + std::uninitialized_copy(TemplateArgs.begin(), TemplateArgs.end(), + getTemplateArgs()); + } + ~TemplateIdAnnotation() = default; }; /// Retrieves the range of the given template parameter lists. diff --git a/contrib/llvm/tools/clang/include/clang/Sema/ScopeInfo.h b/contrib/llvm/tools/clang/include/clang/Sema/ScopeInfo.h index 4487c7c2ccb..4251fa649a8 100644 --- a/contrib/llvm/tools/clang/include/clang/Sema/ScopeInfo.h +++ b/contrib/llvm/tools/clang/include/clang/Sema/ScopeInfo.h @@ -388,6 +388,8 @@ public: (HasBranchProtectedScope && HasBranchIntoScope)); } + bool isCoroutine() const { return !FirstCoroutineStmtLoc.isInvalid(); } + void setFirstCoroutineStmt(SourceLocation Loc, StringRef Keyword) { assert(FirstCoroutineStmtLoc.isInvalid() && "first coroutine statement location already set"); diff --git a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h index fcd3ee714fa..4c9f18a0724 100644 --- a/contrib/llvm/tools/clang/include/clang/Sema/Sema.h +++ b/contrib/llvm/tools/clang/include/clang/Sema/Sema.h @@ -689,17 +689,37 @@ public: class SynthesizedFunctionScope { Sema &S; Sema::ContextRAII SavedContext; + bool PushedCodeSynthesisContext = false; public: SynthesizedFunctionScope(Sema &S, DeclContext *DC) - : S(S), SavedContext(S, DC) - { + : S(S), SavedContext(S, DC) { S.PushFunctionScope(); S.PushExpressionEvaluationContext( Sema::ExpressionEvaluationContext::PotentiallyEvaluated); + if (auto *FD = dyn_cast(DC)) + FD->setWillHaveBody(true); + else + assert(isa(DC)); + } + + void addContextNote(SourceLocation UseLoc) { + assert(!PushedCodeSynthesisContext); + + Sema::CodeSynthesisContext Ctx; + Ctx.Kind = Sema::CodeSynthesisContext::DefiningSynthesizedFunction; + Ctx.PointOfInstantiation = UseLoc; + Ctx.Entity = cast(S.CurContext); + S.pushCodeSynthesisContext(Ctx); + + PushedCodeSynthesisContext = true; } ~SynthesizedFunctionScope() { + if (PushedCodeSynthesisContext) + S.popCodeSynthesisContext(); + if (auto *FD = dyn_cast(S.CurContext)) + FD->setWillHaveBody(false); S.PopExpressionEvaluationContext(); S.PopFunctionScopeInfo(); } @@ -2727,7 +2747,7 @@ public: /// of a function. /// /// Returns true if any errors were emitted. - bool diagnoseArgIndependentDiagnoseIfAttrs(const FunctionDecl *Function, + bool diagnoseArgIndependentDiagnoseIfAttrs(const NamedDecl *ND, SourceLocation Loc); /// Returns whether the given function's address can be taken or not, @@ -6974,6 +6994,10 @@ public: /// We are declaring an implicit special member function. DeclaringSpecialMember, + + /// We are defining a synthesized function (such as a defaulted special + /// member). + DefiningSynthesizedFunction, } Kind; /// \brief Was the enclosing context a non-instantiation SFINAE context? @@ -10121,6 +10145,7 @@ private: bool SemaBuiltinVAStartARM(CallExpr *Call); bool SemaBuiltinUnorderedCompare(CallExpr *TheCall); bool SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs); + bool SemaBuiltinVSX(CallExpr *TheCall); bool SemaBuiltinOSLogFormat(CallExpr *TheCall); public: diff --git a/contrib/llvm/tools/clang/include/clang/Tooling/CompilationDatabase.h b/contrib/llvm/tools/clang/include/clang/Tooling/CompilationDatabase.h index 4611d3cdae5..e988b84b6ea 100644 --- a/contrib/llvm/tools/clang/include/clang/Tooling/CompilationDatabase.h +++ b/contrib/llvm/tools/clang/include/clang/Tooling/CompilationDatabase.h @@ -60,16 +60,6 @@ struct CompileCommand { /// The output file associated with the command. std::string Output; - - /// \brief An optional mapping from each file's path to its content for all - /// files needed for the compilation that are not available via the file - /// system. - /// - /// Note that a tool implementation is required to fall back to the file - /// system if a source file is not provided in the mapped sources, as - /// compilation databases will usually not provide all files in mapped sources - /// for performance reasons. - std::vector > MappedSources; }; /// \brief Interface for compilation databases. @@ -186,10 +176,11 @@ public: /// the number of arguments before "--", if "--" was found in the argument /// list. /// \param Argv Points to the command line arguments. + /// \param ErrorMsg Contains error text if the function returns null pointer. /// \param Directory The base directory used in the FixedCompilationDatabase. - static FixedCompilationDatabase *loadFromCommandLine(int &Argc, - const char *const *Argv, - Twine Directory = "."); + static std::unique_ptr loadFromCommandLine( + int &Argc, const char *const *Argv, std::string &ErrorMsg, + Twine Directory = "."); /// \brief Constructs a compilation data base from a specified directory /// and command line. diff --git a/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp b/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp index 81cd1cc4265..032a20afa83 100644 --- a/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp +++ b/contrib/llvm/tools/clang/lib/AST/DeclBase.cpp @@ -274,9 +274,17 @@ void Decl::setLexicalDeclContext(DeclContext *DC) { } else { getMultipleDC()->LexicalDC = DC; } - Hidden = cast(DC)->Hidden; - if (Hidden && !isFromASTFile() && hasLocalOwningModuleStorage()) - setLocalOwningModule(cast(DC)->getOwningModule()); + + // FIXME: We shouldn't be changing the lexical context of declarations + // imported from AST files. + if (!isFromASTFile()) { + Hidden = cast(DC)->Hidden && hasLocalOwningModuleStorage(); + if (Hidden) + setLocalOwningModule(cast(DC)->getOwningModule()); + } + + assert((!Hidden || getOwningModule()) && + "hidden declaration has no owning module"); } void Decl::setDeclContextsImpl(DeclContext *SemaDC, DeclContext *LexicalDC, @@ -440,8 +448,8 @@ const Attr *Decl::getDefiningAttr() const { return nullptr; } -StringRef getRealizedPlatform(const AvailabilityAttr *A, - const ASTContext &Context) { +static StringRef getRealizedPlatform(const AvailabilityAttr *A, + const ASTContext &Context) { // Check if this is an App Extension "platform", and if so chop off // the suffix for matching with the actual platform. StringRef RealizedPlatform = A->getPlatform()->getName(); diff --git a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp index bd8b3abd927..c19812e341c 100644 --- a/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp +++ b/contrib/llvm/tools/clang/lib/AST/ExprConstant.cpp @@ -1230,8 +1230,7 @@ namespace { IsNullPtr = V.isNullPointer(); } - void set(APValue::LValueBase B, unsigned I = 0, bool BInvalid = false, - bool IsNullPtr_ = false, uint64_t Offset_ = 0) { + void set(APValue::LValueBase B, unsigned I = 0, bool BInvalid = false) { #ifndef NDEBUG // We only allow a few types of invalid bases. Enforce that here. if (BInvalid) { @@ -1242,11 +1241,20 @@ namespace { #endif Base = B; - Offset = CharUnits::fromQuantity(Offset_); + Offset = CharUnits::fromQuantity(0); InvalidBase = BInvalid; CallIndex = I; Designator = SubobjectDesignator(getType(B)); - IsNullPtr = IsNullPtr_; + IsNullPtr = false; + } + + void setNull(QualType PointerTy, uint64_t TargetVal) { + Base = (Expr *)nullptr; + Offset = CharUnits::fromQuantity(TargetVal); + InvalidBase = false; + CallIndex = 0; + Designator = SubobjectDesignator(PointerTy->getPointeeType()); + IsNullPtr = true; } void setInvalid(APValue::LValueBase B, unsigned I = 0) { @@ -5494,8 +5502,8 @@ public: return true; } bool ZeroInitialization(const Expr *E) { - auto Offset = Info.Ctx.getTargetNullPointerValue(E->getType()); - Result.set((Expr*)nullptr, 0, false, true, Offset); + auto TargetVal = Info.Ctx.getTargetNullPointerValue(E->getType()); + Result.setNull(E->getType(), TargetVal); return true; } diff --git a/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp b/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp index 6e14dd055cf..cc7a6941f63 100644 --- a/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp +++ b/contrib/llvm/tools/clang/lib/AST/MicrosoftMangle.cpp @@ -1689,6 +1689,8 @@ void MicrosoftCXXNameMangler::mangleType(const BuiltinType *T, Qualifiers, // ::= _N # bool // _O # // ::= _T # __float80 (Intel) + // ::= _S # char16_t + // ::= _U # char32_t // ::= _W # wchar_t // ::= _Z # __float80 (Digital Mars) switch (T->getKind()) { diff --git a/contrib/llvm/tools/clang/lib/AST/StmtCXX.cpp b/contrib/llvm/tools/clang/lib/AST/StmtCXX.cpp index aade13ed3bd..8466cd61f05 100644 --- a/contrib/llvm/tools/clang/lib/AST/StmtCXX.cpp +++ b/contrib/llvm/tools/clang/lib/AST/StmtCXX.cpp @@ -88,7 +88,7 @@ const VarDecl *CXXForRangeStmt::getLoopVariable() const { } CoroutineBodyStmt *CoroutineBodyStmt::Create( - const ASTContext &C, CoroutineBodyStmt::CtorArgs const& Args) { + const ASTContext &C, CoroutineBodyStmt::CtorArgs const &Args) { std::size_t Size = totalSizeToAlloc( CoroutineBodyStmt::FirstParamMove + Args.ParamMoves.size()); @@ -108,6 +108,8 @@ CoroutineBodyStmt::CoroutineBodyStmt(CoroutineBodyStmt::CtorArgs const &Args) SubStmts[CoroutineBodyStmt::Allocate] = Args.Allocate; SubStmts[CoroutineBodyStmt::Deallocate] = Args.Deallocate; SubStmts[CoroutineBodyStmt::ReturnValue] = Args.ReturnValue; + SubStmts[CoroutineBodyStmt::ResultDecl] = Args.ResultDecl; + SubStmts[CoroutineBodyStmt::ReturnStmt] = Args.ReturnStmt; SubStmts[CoroutineBodyStmt::ReturnStmtOnAllocFailure] = Args.ReturnStmtOnAllocFailure; std::copy(Args.ParamMoves.begin(), Args.ParamMoves.end(), diff --git a/contrib/llvm/tools/clang/lib/Analysis/AnalysisDeclContext.cpp b/contrib/llvm/tools/clang/lib/Analysis/AnalysisDeclContext.cpp index 6b58916162f..7c0f5543da0 100644 --- a/contrib/llvm/tools/clang/lib/Analysis/AnalysisDeclContext.cpp +++ b/contrib/llvm/tools/clang/lib/Analysis/AnalysisDeclContext.cpp @@ -92,6 +92,8 @@ Stmt *AnalysisDeclContext::getBody(bool &IsAutosynthesized) const { IsAutosynthesized = false; if (const FunctionDecl *FD = dyn_cast(D)) { Stmt *Body = FD->getBody(); + if (auto *CoroBody = dyn_cast_or_null(Body)) + Body = CoroBody->getBody(); if (Manager && Manager->synthesizeBodies()) { Stmt *SynthesizedBody = getBodyFarm(getASTContext(), Manager->Injector.get()).getBody(FD); diff --git a/contrib/llvm/tools/clang/lib/Basic/Module.cpp b/contrib/llvm/tools/clang/lib/Basic/Module.cpp index a6fd931cb17..ac3d7c55967 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Module.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Module.cpp @@ -64,6 +64,7 @@ static bool hasFeature(StringRef Feature, const LangOptions &LangOpts, bool HasFeature = llvm::StringSwitch(Feature) .Case("altivec", LangOpts.AltiVec) .Case("blocks", LangOpts.Blocks) + .Case("coroutines", LangOpts.CoroutinesTS) .Case("cplusplus", LangOpts.CPlusPlus) .Case("cplusplus11", LangOpts.CPlusPlus11) .Case("freestanding", LangOpts.Freestanding) diff --git a/contrib/llvm/tools/clang/lib/Basic/SourceManager.cpp b/contrib/llvm/tools/clang/lib/Basic/SourceManager.cpp index 156aa0b11f7..c5cff74ac97 100644 --- a/contrib/llvm/tools/clang/lib/Basic/SourceManager.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/SourceManager.cpp @@ -183,48 +183,22 @@ unsigned LineTableInfo::getLineTableFilenameID(StringRef Name) { return IterBool.first->second; } -/// AddLineNote - Add a line note to the line table that indicates that there -/// is a \#line at the specified FID/Offset location which changes the presumed -/// location to LineNo/FilenameID. -void LineTableInfo::AddLineNote(FileID FID, unsigned Offset, - unsigned LineNo, int FilenameID) { - std::vector &Entries = LineEntries[FID]; - - assert((Entries.empty() || Entries.back().FileOffset < Offset) && - "Adding line entries out of order!"); - - SrcMgr::CharacteristicKind Kind = SrcMgr::C_User; - unsigned IncludeOffset = 0; - - if (!Entries.empty()) { - // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember - // that we are still in "foo.h". - if (FilenameID == -1) - FilenameID = Entries.back().FilenameID; - - // If we are after a line marker that switched us to system header mode, or - // that set #include information, preserve it. - Kind = Entries.back().FileKind; - IncludeOffset = Entries.back().IncludeOffset; - } - - Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind, - IncludeOffset)); -} - -/// AddLineNote This is the same as the previous version of AddLineNote, but is -/// used for GNU line markers. If EntryExit is 0, then this doesn't change the -/// presumed \#include stack. If it is 1, this is a file entry, if it is 2 then -/// this is a file exit. FileKind specifies whether this is a system header or -/// extern C system header. -void LineTableInfo::AddLineNote(FileID FID, unsigned Offset, - unsigned LineNo, int FilenameID, - unsigned EntryExit, +/// Add a line note to the line table that indicates that there is a \#line or +/// GNU line marker at the specified FID/Offset location which changes the +/// presumed location to LineNo/FilenameID. If EntryExit is 0, then this doesn't +/// change the presumed \#include stack. If it is 1, this is a file entry, if +/// it is 2 then this is a file exit. FileKind specifies whether this is a +/// system header or extern C system header. +void LineTableInfo::AddLineNote(FileID FID, unsigned Offset, unsigned LineNo, + int FilenameID, unsigned EntryExit, SrcMgr::CharacteristicKind FileKind) { - assert(FilenameID != -1 && "Unspecified filename should use other accessor"); - std::vector &Entries = LineEntries[FID]; + // An unspecified FilenameID means use the last filename if available, or the + // main source file otherwise. + if (FilenameID == -1 && !Entries.empty()) + FilenameID = Entries.back().FilenameID; + assert((Entries.empty() || Entries.back().FileOffset < Offset) && "Adding line entries out of order!"); @@ -281,47 +255,20 @@ unsigned SourceManager::getLineTableFilenameID(StringRef Name) { return getLineTable().getLineTableFilenameID(Name); } - /// AddLineNote - Add a line note to the line table for the FileID and offset /// specified by Loc. If FilenameID is -1, it is considered to be /// unspecified. -void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo, - int FilenameID) { - std::pair LocInfo = getDecomposedExpansionLoc(Loc); - - bool Invalid = false; - const SLocEntry &Entry = getSLocEntry(LocInfo.first, &Invalid); - if (!Entry.isFile() || Invalid) - return; - - const SrcMgr::FileInfo &FileInfo = Entry.getFile(); - - // Remember that this file has #line directives now if it doesn't already. - const_cast(FileInfo).setHasLineDirectives(); - - getLineTable().AddLineNote(LocInfo.first, LocInfo.second, LineNo, FilenameID); -} - -/// AddLineNote - Add a GNU line marker to the line table. void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo, int FilenameID, bool IsFileEntry, - bool IsFileExit, bool IsSystemHeader, - bool IsExternCHeader) { - // If there is no filename and no flags, this is treated just like a #line, - // which does not change the flags of the previous line marker. - if (FilenameID == -1) { - assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader && - "Can't set flags without setting the filename!"); - return AddLineNote(Loc, LineNo, FilenameID); - } - + bool IsFileExit, + SrcMgr::CharacteristicKind FileKind) { std::pair LocInfo = getDecomposedExpansionLoc(Loc); bool Invalid = false; const SLocEntry &Entry = getSLocEntry(LocInfo.first, &Invalid); if (!Entry.isFile() || Invalid) return; - + const SrcMgr::FileInfo &FileInfo = Entry.getFile(); // Remember that this file has #line directives now if it doesn't already. @@ -329,14 +276,6 @@ void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo, (void) getLineTable(); - SrcMgr::CharacteristicKind FileKind; - if (IsExternCHeader) - FileKind = SrcMgr::C_ExternCSystem; - else if (IsSystemHeader) - FileKind = SrcMgr::C_System; - else - FileKind = SrcMgr::C_User; - unsigned EntryExit = 0; if (IsFileEntry) EntryExit = 1; diff --git a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp index 1b9fbed1773..6be83d22a25 100644 --- a/contrib/llvm/tools/clang/lib/Basic/Targets.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/Targets.cpp @@ -2169,15 +2169,20 @@ public: : DataLayoutStringR600); assert(DataLayout->getAllocaAddrSpace() == AS.Private); - AddrSpaceMap = - llvm::StringSwitch(Triple.getEnvironmentName()) - .Case("opencl", &AMDGPUOpenCLPrivateIsZeroMap) - .Case("amdgiz", &AMDGPUNonOpenCLGenericIsZeroMap) - .Case("amdgizcl", &AMDGPUOpenCLGenericIsZeroMap) - .Default(&AMDGPUNonOpenCLPrivateIsZeroMap); UseAddrSpaceMapMangling = true; } + void adjust(LangOptions &Opts) override { + TargetInfo::adjust(Opts); + if (isGenericZero(getTriple())) { + AddrSpaceMap = Opts.OpenCL ? &AMDGPUOpenCLGenericIsZeroMap + : &AMDGPUNonOpenCLGenericIsZeroMap; + } else { + AddrSpaceMap = Opts.OpenCL ? &AMDGPUOpenCLPrivateIsZeroMap + : &AMDGPUNonOpenCLPrivateIsZeroMap; + } + } + uint64_t getPointerWidthV(unsigned AddrSpace) const override { if (GPU <= GK_CAYMAN) return 32; @@ -2619,6 +2624,7 @@ class X86TargetInfo : public TargetInfo { bool HasFMA = false; bool HasF16C = false; bool HasAVX512CD = false; + bool HasAVX512VPOPCNTDQ = false; bool HasAVX512ER = false; bool HasAVX512PF = false; bool HasAVX512DQ = false; @@ -3435,23 +3441,32 @@ void X86TargetInfo::setSSELevel(llvm::StringMap &Features, switch (Level) { case AVX512F: Features["avx512f"] = true; + LLVM_FALLTHROUGH; case AVX2: Features["avx2"] = true; + LLVM_FALLTHROUGH; case AVX: Features["avx"] = true; Features["xsave"] = true; + LLVM_FALLTHROUGH; case SSE42: Features["sse4.2"] = true; + LLVM_FALLTHROUGH; case SSE41: Features["sse4.1"] = true; + LLVM_FALLTHROUGH; case SSSE3: Features["ssse3"] = true; + LLVM_FALLTHROUGH; case SSE3: Features["sse3"] = true; + LLVM_FALLTHROUGH; case SSE2: Features["sse2"] = true; + LLVM_FALLTHROUGH; case SSE1: Features["sse"] = true; + LLVM_FALLTHROUGH; case NoSSE: break; } @@ -3462,29 +3477,37 @@ void X86TargetInfo::setSSELevel(llvm::StringMap &Features, case NoSSE: case SSE1: Features["sse"] = false; + LLVM_FALLTHROUGH; case SSE2: Features["sse2"] = Features["pclmul"] = Features["aes"] = Features["sha"] = false; + LLVM_FALLTHROUGH; case SSE3: Features["sse3"] = false; setXOPLevel(Features, NoXOP, false); + LLVM_FALLTHROUGH; case SSSE3: Features["ssse3"] = false; + LLVM_FALLTHROUGH; case SSE41: Features["sse4.1"] = false; + LLVM_FALLTHROUGH; case SSE42: Features["sse4.2"] = false; + LLVM_FALLTHROUGH; case AVX: Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] = Features["xsaveopt"] = false; setXOPLevel(Features, FMA4, false); + LLVM_FALLTHROUGH; case AVX2: Features["avx2"] = false; + LLVM_FALLTHROUGH; case AVX512F: Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] = - Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] = - Features["avx512vl"] = Features["avx512vbmi"] = - Features["avx512ifma"] = false; + Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] = + Features["avx512vl"] = Features["avx512vbmi"] = + Features["avx512ifma"] = Features["avx512vpopcntdq"] = false; } } @@ -3494,10 +3517,13 @@ void X86TargetInfo::setMMXLevel(llvm::StringMap &Features, switch (Level) { case AMD3DNowAthlon: Features["3dnowa"] = true; + LLVM_FALLTHROUGH; case AMD3DNow: Features["3dnow"] = true; + LLVM_FALLTHROUGH; case MMX: Features["mmx"] = true; + LLVM_FALLTHROUGH; case NoMMX3DNow: break; } @@ -3508,8 +3534,10 @@ void X86TargetInfo::setMMXLevel(llvm::StringMap &Features, case NoMMX3DNow: case MMX: Features["mmx"] = false; + LLVM_FALLTHROUGH; case AMD3DNow: Features["3dnow"] = false; + LLVM_FALLTHROUGH; case AMD3DNowAthlon: Features["3dnowa"] = false; } @@ -3521,12 +3549,15 @@ void X86TargetInfo::setXOPLevel(llvm::StringMap &Features, XOPEnum Level, switch (Level) { case XOP: Features["xop"] = true; + LLVM_FALLTHROUGH; case FMA4: Features["fma4"] = true; setSSELevel(Features, AVX, true); + LLVM_FALLTHROUGH; case SSE4A: Features["sse4a"] = true; setSSELevel(Features, SSE3, true); + LLVM_FALLTHROUGH; case NoXOP: break; } @@ -3537,8 +3568,10 @@ void X86TargetInfo::setXOPLevel(llvm::StringMap &Features, XOPEnum Level, case NoXOP: case SSE4A: Features["sse4a"] = false; + LLVM_FALLTHROUGH; case FMA4: Features["fma4"] = false; + LLVM_FALLTHROUGH; case XOP: Features["xop"] = false; } @@ -3584,7 +3617,8 @@ void X86TargetInfo::setFeatureEnabledImpl(llvm::StringMap &Features, setSSELevel(Features, AVX512F, Enabled); } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" || Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" || - Name == "avx512vbmi" || Name == "avx512ifma") { + Name == "avx512vbmi" || Name == "avx512ifma" || + Name == "avx512vpopcntdq") { if (Enabled) setSSELevel(Features, AVX512F, Enabled); // Enable BWI instruction if VBMI is being enabled. @@ -3668,6 +3702,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, HasF16C = true; } else if (Feature == "+avx512cd") { HasAVX512CD = true; + } else if (Feature == "+avx512vpopcntdq") { + HasAVX512VPOPCNTDQ = true; } else if (Feature == "+avx512er") { HasAVX512ER = true; } else if (Feature == "+avx512pf") { @@ -3986,10 +4022,13 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, switch (XOPLevel) { case XOP: Builder.defineMacro("__XOP__"); + LLVM_FALLTHROUGH; case FMA4: Builder.defineMacro("__FMA4__"); + LLVM_FALLTHROUGH; case SSE4A: Builder.defineMacro("__SSE4A__"); + LLVM_FALLTHROUGH; case NoXOP: break; } @@ -4002,6 +4041,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, if (HasAVX512CD) Builder.defineMacro("__AVX512CD__"); + if (HasAVX512VPOPCNTDQ) + Builder.defineMacro("__AVX512VPOPCNTDQ__"); if (HasAVX512ER) Builder.defineMacro("__AVX512ER__"); if (HasAVX512PF) @@ -4051,24 +4092,33 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, switch (SSELevel) { case AVX512F: Builder.defineMacro("__AVX512F__"); + LLVM_FALLTHROUGH; case AVX2: Builder.defineMacro("__AVX2__"); + LLVM_FALLTHROUGH; case AVX: Builder.defineMacro("__AVX__"); + LLVM_FALLTHROUGH; case SSE42: Builder.defineMacro("__SSE4_2__"); + LLVM_FALLTHROUGH; case SSE41: Builder.defineMacro("__SSE4_1__"); + LLVM_FALLTHROUGH; case SSSE3: Builder.defineMacro("__SSSE3__"); + LLVM_FALLTHROUGH; case SSE3: Builder.defineMacro("__SSE3__"); + LLVM_FALLTHROUGH; case SSE2: Builder.defineMacro("__SSE2__"); Builder.defineMacro("__SSE2_MATH__"); // -mfp-math=sse always implied. + LLVM_FALLTHROUGH; case SSE1: Builder.defineMacro("__SSE__"); Builder.defineMacro("__SSE_MATH__"); // -mfp-math=sse always implied. + LLVM_FALLTHROUGH; case NoSSE: break; } @@ -4097,10 +4147,13 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, switch (MMX3DNowLevel) { case AMD3DNowAthlon: Builder.defineMacro("__3dNOW_A__"); + LLVM_FALLTHROUGH; case AMD3DNow: Builder.defineMacro("__3dNOW__"); + LLVM_FALLTHROUGH; case MMX: Builder.defineMacro("__MMX__"); + LLVM_FALLTHROUGH; case NoMMX3DNow: break; } @@ -4112,6 +4165,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, } if (CPU >= CK_i586) Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8"); + + if (HasFloat128) + Builder.defineMacro("__SIZEOF_FLOAT128__", "16"); } bool X86TargetInfo::hasFeature(StringRef Feature) const { @@ -4121,6 +4177,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("avx2", SSELevel >= AVX2) .Case("avx512f", SSELevel >= AVX512F) .Case("avx512cd", HasAVX512CD) + .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ) .Case("avx512er", HasAVX512ER) .Case("avx512pf", HasAVX512PF) .Case("avx512dq", HasAVX512DQ) @@ -4206,6 +4263,7 @@ bool X86TargetInfo::validateCpuSupports(StringRef FeatureStr) const { .Case("avx512bw", true) .Case("avx512dq", true) .Case("avx512cd", true) + .Case("avx512vpopcntdq", true) .Case("avx512er", true) .Case("avx512pf", true) .Case("avx512vbmi", true) @@ -4589,7 +4647,9 @@ static void addMinGWDefines(const LangOptions &Opts, MacroBuilder &Builder) { class MinGWX86_32TargetInfo : public WindowsX86_32TargetInfo { public: MinGWX86_32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) - : WindowsX86_32TargetInfo(Triple, Opts) {} + : WindowsX86_32TargetInfo(Triple, Opts) { + HasFloat128 = true; + } void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override { WindowsX86_32TargetInfo::getTargetDefines(Opts, Builder); @@ -4881,6 +4941,7 @@ public: // with x86 FP ops. Weird. LongDoubleWidth = LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::x87DoubleExtended(); + HasFloat128 = true; } void getTargetDefines(const LangOptions &Opts, diff --git a/contrib/llvm/tools/clang/lib/Basic/XRayLists.cpp b/contrib/llvm/tools/clang/lib/Basic/XRayLists.cpp index dccf3baa75e..0a439c7af90 100644 --- a/contrib/llvm/tools/clang/lib/Basic/XRayLists.cpp +++ b/contrib/llvm/tools/clang/lib/Basic/XRayLists.cpp @@ -26,6 +26,8 @@ XRayFunctionFilter::ImbueAttribute XRayFunctionFilter::shouldImbueFunction(StringRef FunctionName) const { // First apply the always instrument list, than if it isn't an "always" see // whether it's treated as a "never" instrument function. + if (AlwaysInstrument->inSection("fun", FunctionName, "arg1")) + return ImbueAttribute::ALWAYS_ARG1; if (AlwaysInstrument->inSection("fun", FunctionName)) return ImbueAttribute::ALWAYS; if (NeverInstrument->inSection("fun", FunctionName)) diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp index e2d5f8f870d..2134fb9e03e 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGBuiltin.cpp @@ -7332,39 +7332,42 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, AVX512PF, AVX512VBMI, AVX512IFMA, + AVX512VPOPCNTDQ, MAX }; - X86Features Feature = StringSwitch(FeatureStr) - .Case("cmov", X86Features::CMOV) - .Case("mmx", X86Features::MMX) - .Case("popcnt", X86Features::POPCNT) - .Case("sse", X86Features::SSE) - .Case("sse2", X86Features::SSE2) - .Case("sse3", X86Features::SSE3) - .Case("ssse3", X86Features::SSSE3) - .Case("sse4.1", X86Features::SSE4_1) - .Case("sse4.2", X86Features::SSE4_2) - .Case("avx", X86Features::AVX) - .Case("avx2", X86Features::AVX2) - .Case("sse4a", X86Features::SSE4_A) - .Case("fma4", X86Features::FMA4) - .Case("xop", X86Features::XOP) - .Case("fma", X86Features::FMA) - .Case("avx512f", X86Features::AVX512F) - .Case("bmi", X86Features::BMI) - .Case("bmi2", X86Features::BMI2) - .Case("aes", X86Features::AES) - .Case("pclmul", X86Features::PCLMUL) - .Case("avx512vl", X86Features::AVX512VL) - .Case("avx512bw", X86Features::AVX512BW) - .Case("avx512dq", X86Features::AVX512DQ) - .Case("avx512cd", X86Features::AVX512CD) - .Case("avx512er", X86Features::AVX512ER) - .Case("avx512pf", X86Features::AVX512PF) - .Case("avx512vbmi", X86Features::AVX512VBMI) - .Case("avx512ifma", X86Features::AVX512IFMA) - .Default(X86Features::MAX); + X86Features Feature = + StringSwitch(FeatureStr) + .Case("cmov", X86Features::CMOV) + .Case("mmx", X86Features::MMX) + .Case("popcnt", X86Features::POPCNT) + .Case("sse", X86Features::SSE) + .Case("sse2", X86Features::SSE2) + .Case("sse3", X86Features::SSE3) + .Case("ssse3", X86Features::SSSE3) + .Case("sse4.1", X86Features::SSE4_1) + .Case("sse4.2", X86Features::SSE4_2) + .Case("avx", X86Features::AVX) + .Case("avx2", X86Features::AVX2) + .Case("sse4a", X86Features::SSE4_A) + .Case("fma4", X86Features::FMA4) + .Case("xop", X86Features::XOP) + .Case("fma", X86Features::FMA) + .Case("avx512f", X86Features::AVX512F) + .Case("bmi", X86Features::BMI) + .Case("bmi2", X86Features::BMI2) + .Case("aes", X86Features::AES) + .Case("pclmul", X86Features::PCLMUL) + .Case("avx512vl", X86Features::AVX512VL) + .Case("avx512bw", X86Features::AVX512BW) + .Case("avx512dq", X86Features::AVX512DQ) + .Case("avx512cd", X86Features::AVX512CD) + .Case("avx512er", X86Features::AVX512ER) + .Case("avx512pf", X86Features::AVX512PF) + .Case("avx512vbmi", X86Features::AVX512VBMI) + .Case("avx512ifma", X86Features::AVX512IFMA) + .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ) + .Default(X86Features::MAX); assert(Feature != X86Features::MAX && "Invalid feature!"); // Matching the struct layout from the compiler-rt/libgcc structure that is @@ -7517,7 +7520,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_storesd128_mask: { return EmitX86MaskedStore(*this, Ops, 16); } - + case X86::BI__builtin_ia32_vpopcntd_512: + case X86::BI__builtin_ia32_vpopcntq_512: { + llvm::Type *ResultType = ConvertType(E->getType()); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); + return Builder.CreateCall(F, Ops); + } case X86::BI__builtin_ia32_cvtmask2b128: case X86::BI__builtin_ia32_cvtmask2b256: case X86::BI__builtin_ia32_cvtmask2b512: @@ -8442,6 +8450,80 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, Ops); } } + + case PPC::BI__builtin_vsx_xxpermdi: { + ConstantInt *ArgCI = dyn_cast(Ops[2]); + assert(ArgCI && "Third arg must be constant integer!"); + + unsigned Index = ArgCI->getZExtValue(); + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2)); + Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2)); + + // Element zero comes from the first input vector and element one comes from + // the second. The element indices within each vector are numbered in big + // endian order so the shuffle mask must be adjusted for this on little + // endian platforms (i.e. index is complemented and source vector reversed). + unsigned ElemIdx0; + unsigned ElemIdx1; + if (getTarget().isLittleEndian()) { + ElemIdx0 = (~Index & 1) + 2; + ElemIdx1 = (~Index & 2) >> 1; + } else { // BigEndian + ElemIdx0 = (Index & 2) >> 1; + ElemIdx1 = 2 + (Index & 1); + } + + Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0), + ConstantInt::get(Int32Ty, ElemIdx1)}; + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + + Value *ShuffleCall = + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); + QualType BIRetType = E->getType(); + auto RetTy = ConvertType(BIRetType); + return Builder.CreateBitCast(ShuffleCall, RetTy); + } + + case PPC::BI__builtin_vsx_xxsldwi: { + ConstantInt *ArgCI = dyn_cast(Ops[2]); + assert(ArgCI && "Third argument must be a compile time constant"); + unsigned Index = ArgCI->getZExtValue() & 0x3; + Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4)); + Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4)); + + // Create a shuffle mask + unsigned ElemIdx0; + unsigned ElemIdx1; + unsigned ElemIdx2; + unsigned ElemIdx3; + if (getTarget().isLittleEndian()) { + // Little endian element N comes from element 8+N-Index of the + // concatenated wide vector (of course, using modulo arithmetic on + // the total number of elements). + ElemIdx0 = (8 - Index) % 8; + ElemIdx1 = (9 - Index) % 8; + ElemIdx2 = (10 - Index) % 8; + ElemIdx3 = (11 - Index) % 8; + } else { + // Big endian ElemIdx = Index + N + ElemIdx0 = Index; + ElemIdx1 = Index + 1; + ElemIdx2 = Index + 2; + ElemIdx3 = Index + 3; + } + + Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0), + ConstantInt::get(Int32Ty, ElemIdx1), + ConstantInt::get(Int32Ty, ElemIdx2), + ConstantInt::get(Int32Ty, ElemIdx3)}; + + Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts); + Value *ShuffleCall = + Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask); + QualType BIRetType = E->getType(); + auto RetTy = ConvertType(BIRetType); + return Builder.CreateBitCast(ShuffleCall, RetTy); + } } } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGCoroutine.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGCoroutine.cpp index 0ef680ef660..c468c1bb4b5 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGCoroutine.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGCoroutine.cpp @@ -11,9 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "CGCleanup.h" #include "CodeGenFunction.h" #include "llvm/ADT/ScopeExit.h" #include "clang/AST/StmtCXX.h" +#include "clang/AST/StmtVisitor.h" using namespace clang; using namespace CodeGen; @@ -57,6 +59,15 @@ struct clang::CodeGen::CGCoroData { // builtin. llvm::CallInst *CoroId = nullptr; + // Stores the llvm.coro.begin emitted in the function so that we can replace + // all coro.frame intrinsics with direct SSA value of coro.begin that returns + // the address of the coroutine frame of the current coroutine. + llvm::CallInst *CoroBegin = nullptr; + + // Stores the last emitted coro.free for the deallocate expressions, we use it + // to wrap dealloc code with if(auto mem = coro.free) dealloc(mem). + llvm::CallInst *LastCoroFree = nullptr; + // If coro.id came from the builtin, remember the expression to give better // diagnostic. If CoroIdExpr is nullptr, the coro.id was created by // EmitCoroutineBody. @@ -142,6 +153,20 @@ static RValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Coro, AwaitKind Kind, AggValueSlot aggSlot, bool ignoreResult) { auto *E = S.getCommonExpr(); + + // FIXME: rsmith 5/22/2017. Does it still make sense for us to have a + // UO_Coawait at all? As I recall, the only purpose it ever had was to + // represent a dependent co_await expression that couldn't yet be resolved to + // a CoawaitExpr. But now we have (and need!) a separate DependentCoawaitExpr + // node to store unqualified lookup results, it seems that the UnaryOperator + // portion of the representation serves no purpose (and as seen in this patch, + // it's getting in the way). Can we remove it? + + // Skip passthrough operator co_await (present when awaiting on an LValue). + if (auto *UO = dyn_cast(E)) + if (UO->getOpcode() == UO_Coawait) + E = UO->getSubExpr(); + auto Binder = CodeGenFunction::OpaqueValueMappingData::bind(CGF, S.getOpaqueValue(), E); auto UnbindOnExit = llvm::make_scope_exit([&] { Binder.unbind(CGF); }); @@ -215,7 +240,67 @@ void CodeGenFunction::EmitCoreturnStmt(CoreturnStmt const &S) { EmitBranchThroughCleanup(CurCoro.Data->FinalJD); } -// For WinEH exception representation backend need to know what funclet coro.end +// Hunts for the parameter reference in the parameter copy/move declaration. +namespace { +struct GetParamRef : public StmtVisitor { +public: + DeclRefExpr *Expr = nullptr; + GetParamRef() {} + void VisitDeclRefExpr(DeclRefExpr *E) { + assert(Expr == nullptr && "multilple declref in param move"); + Expr = E; + } + void VisitStmt(Stmt *S) { + for (auto *C : S->children()) { + if (C) + Visit(C); + } + } +}; +} + +// This class replaces references to parameters to their copies by changing +// the addresses in CGF.LocalDeclMap and restoring back the original values in +// its destructor. + +namespace { + struct ParamReferenceReplacerRAII { + CodeGenFunction::DeclMapTy SavedLocals; + CodeGenFunction::DeclMapTy& LocalDeclMap; + + ParamReferenceReplacerRAII(CodeGenFunction::DeclMapTy &LocalDeclMap) + : LocalDeclMap(LocalDeclMap) {} + + void addCopy(DeclStmt const *PM) { + // Figure out what param it refers to. + + assert(PM->isSingleDecl()); + VarDecl const*VD = static_cast(PM->getSingleDecl()); + Expr const *InitExpr = VD->getInit(); + GetParamRef Visitor; + Visitor.Visit(const_cast(InitExpr)); + assert(Visitor.Expr); + auto *DREOrig = cast(Visitor.Expr); + auto *PD = DREOrig->getDecl(); + + auto it = LocalDeclMap.find(PD); + assert(it != LocalDeclMap.end() && "parameter is not found"); + SavedLocals.insert({ PD, it->second }); + + auto copyIt = LocalDeclMap.find(VD); + assert(copyIt != LocalDeclMap.end() && "parameter copy is not found"); + it->second = copyIt->getSecond(); + } + + ~ParamReferenceReplacerRAII() { + for (auto&& SavedLocal : SavedLocals) { + LocalDeclMap.insert({SavedLocal.first, SavedLocal.second}); + } + } + }; +} + +// For WinEH exception representation backend needs to know what funclet coro.end // belongs to. That information is passed in a funclet bundle. static SmallVector getBundlesForCoroEnd(CodeGenFunction &CGF) { @@ -257,24 +342,135 @@ namespace { struct CallCoroDelete final : public EHScopeStack::Cleanup { Stmt *Deallocate; - // TODO: Wrap deallocate in if(coro.free(...)) Deallocate. + // Emit "if (coro.free(CoroId, CoroBegin)) Deallocate;" + + // Note: That deallocation will be emitted twice: once for a normal exit and + // once for exceptional exit. This usage is safe because Deallocate does not + // contain any declarations. The SubStmtBuilder::makeNewAndDeleteExpr() + // builds a single call to a deallocation function which is safe to emit + // multiple times. void Emit(CodeGenFunction &CGF, Flags) override { - // Note: That deallocation will be emitted twice: once for a normal exit and - // once for exceptional exit. This usage is safe because Deallocate does not - // contain any declarations. The SubStmtBuilder::makeNewAndDeleteExpr() - // builds a single call to a deallocation function which is safe to emit - // multiple times. + // Remember the current point, as we are going to emit deallocation code + // first to get to coro.free instruction that is an argument to a delete + // call. + BasicBlock *SaveInsertBlock = CGF.Builder.GetInsertBlock(); + + auto *FreeBB = CGF.createBasicBlock("coro.free"); + CGF.EmitBlock(FreeBB); CGF.EmitStmt(Deallocate); + + auto *AfterFreeBB = CGF.createBasicBlock("after.coro.free"); + CGF.EmitBlock(AfterFreeBB); + + // We should have captured coro.free from the emission of deallocate. + auto *CoroFree = CGF.CurCoro.Data->LastCoroFree; + if (!CoroFree) { + CGF.CGM.Error(Deallocate->getLocStart(), + "Deallocation expressoin does not refer to coro.free"); + return; + } + + // Get back to the block we were originally and move coro.free there. + auto *InsertPt = SaveInsertBlock->getTerminator(); + CoroFree->moveBefore(InsertPt); + CGF.Builder.SetInsertPoint(InsertPt); + + // Add if (auto *mem = coro.free) Deallocate; + auto *NullPtr = llvm::ConstantPointerNull::get(CGF.Int8PtrTy); + auto *Cond = CGF.Builder.CreateICmpNE(CoroFree, NullPtr); + CGF.Builder.CreateCondBr(Cond, FreeBB, AfterFreeBB); + + // No longer need old terminator. + InsertPt->eraseFromParent(); + CGF.Builder.SetInsertPoint(AfterFreeBB); } explicit CallCoroDelete(Stmt *DeallocStmt) : Deallocate(DeallocStmt) {} }; } +namespace { +struct GetReturnObjectManager { + CodeGenFunction &CGF; + CGBuilderTy &Builder; + const CoroutineBodyStmt &S; + + Address GroActiveFlag; + CodeGenFunction::AutoVarEmission GroEmission; + + GetReturnObjectManager(CodeGenFunction &CGF, const CoroutineBodyStmt &S) + : CGF(CGF), Builder(CGF.Builder), S(S), GroActiveFlag(Address::invalid()), + GroEmission(CodeGenFunction::AutoVarEmission::invalid()) {} + + // The gro variable has to outlive coroutine frame and coroutine promise, but, + // it can only be initialized after coroutine promise was created, thus, we + // split its emission in two parts. EmitGroAlloca emits an alloca and sets up + // cleanups. Later when coroutine promise is available we initialize the gro + // and sets the flag that the cleanup is now active. + + void EmitGroAlloca() { + auto *GroDeclStmt = dyn_cast(S.getResultDecl()); + if (!GroDeclStmt) { + // If get_return_object returns void, no need to do an alloca. + return; + } + + auto *GroVarDecl = cast(GroDeclStmt->getSingleDecl()); + + // Set GRO flag that it is not initialized yet + GroActiveFlag = + CGF.CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(), "gro.active"); + Builder.CreateStore(Builder.getFalse(), GroActiveFlag); + + GroEmission = CGF.EmitAutoVarAlloca(*GroVarDecl); + + // Remember the top of EHStack before emitting the cleanup. + auto old_top = CGF.EHStack.stable_begin(); + CGF.EmitAutoVarCleanups(GroEmission); + auto top = CGF.EHStack.stable_begin(); + + // Make the cleanup conditional on gro.active + for (auto b = CGF.EHStack.find(top), e = CGF.EHStack.find(old_top); + b != e; b++) { + if (auto *Cleanup = dyn_cast(&*b)) { + assert(!Cleanup->hasActiveFlag() && "cleanup already has active flag?"); + Cleanup->setActiveFlag(GroActiveFlag); + Cleanup->setTestFlagInEHCleanup(); + Cleanup->setTestFlagInNormalCleanup(); + } + } + } + + void EmitGroInit() { + if (!GroActiveFlag.isValid()) { + // No Gro variable was allocated. Simply emit the call to + // get_return_object. + CGF.EmitStmt(S.getResultDecl()); + return; + } + + CGF.EmitAutoVarInit(GroEmission); + Builder.CreateStore(Builder.getTrue(), GroActiveFlag); + } +}; +} + +static void emitBodyAndFallthrough(CodeGenFunction &CGF, + const CoroutineBodyStmt &S, Stmt *Body) { + CGF.EmitStmt(Body); + const bool CanFallthrough = CGF.Builder.GetInsertBlock(); + if (CanFallthrough) + if (Stmt *OnFallthrough = S.getFallthroughHandler()) + CGF.EmitStmt(OnFallthrough); +} + void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { auto *NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); auto &TI = CGM.getContext().getTargetInfo(); unsigned NewAlign = TI.getNewAlign() / TI.getCharWidth(); + auto *EntryBB = Builder.GetInsertBlock(); + auto *AllocBB = createBasicBlock("coro.alloc"); + auto *InitBB = createBasicBlock("coro.init"); auto *FinalBB = createBasicBlock("coro.final"); auto *RetBB = createBasicBlock("coro.ret"); @@ -284,12 +480,20 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { createCoroData(*this, CurCoro, CoroId); CurCoro.Data->SuspendBB = RetBB; + // Backend is allowed to elide memory allocations, to help it, emit + // auto mem = coro.alloc() ? 0 : ... allocation code ...; + auto *CoroAlloc = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::coro_alloc), {CoroId}); + + Builder.CreateCondBr(CoroAlloc, AllocBB, InitBB); + + EmitBlock(AllocBB); auto *AllocateCall = EmitScalarExpr(S.getAllocate()); + auto *AllocOrInvokeContBB = Builder.GetInsertBlock(); // Handle allocation failure if 'ReturnStmtOnAllocFailure' was provided. if (auto *RetOnAllocFailure = S.getReturnStmtOnAllocFailure()) { auto *RetOnFailureBB = createBasicBlock("coro.ret.on.failure"); - auto *InitBB = createBasicBlock("coro.init"); // See if allocation was successful. auto *NullPtr = llvm::ConstantPointerNull::get(Int8PtrTy); @@ -299,40 +503,96 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { // If not, return OnAllocFailure object. EmitBlock(RetOnFailureBB); EmitStmt(RetOnAllocFailure); - - EmitBlock(InitBB); } + else { + Builder.CreateBr(InitBB); + } + + EmitBlock(InitBB); + + // Pass the result of the allocation to coro.begin. + auto *Phi = Builder.CreatePHI(VoidPtrTy, 2); + Phi->addIncoming(NullPtr, EntryBB); + Phi->addIncoming(AllocateCall, AllocOrInvokeContBB); + auto *CoroBegin = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi}); + CurCoro.Data->CoroBegin = CoroBegin; + + GetReturnObjectManager GroManager(*this, S); + GroManager.EmitGroAlloca(); CurCoro.Data->CleanupJD = getJumpDestInCurrentScope(RetBB); { + ParamReferenceReplacerRAII ParamReplacer(LocalDeclMap); CodeGenFunction::RunCleanupsScope ResumeScope(*this); EHStack.pushCleanup(NormalAndEHCleanup, S.getDeallocate()); + // Create parameter copies. We do it before creating a promise, since an + // evolution of coroutine TS may allow promise constructor to observe + // parameter copies. + for (auto *PM : S.getParamMoves()) { + EmitStmt(PM); + ParamReplacer.addCopy(cast(PM)); + // TODO: if(CoroParam(...)) need to surround ctor and dtor + // for the copy, so that llvm can elide it if the copy is + // not needed. + } + EmitStmt(S.getPromiseDeclStmt()); + Address PromiseAddr = GetAddrOfLocalVar(S.getPromiseDecl()); + auto *PromiseAddrVoidPtr = + new llvm::BitCastInst(PromiseAddr.getPointer(), VoidPtrTy, "", CoroId); + // Update CoroId to refer to the promise. We could not do it earlier because + // promise local variable was not emitted yet. + CoroId->setArgOperand(1, PromiseAddrVoidPtr); + + // Now we have the promise, initialize the GRO + GroManager.EmitGroInit(); + EHStack.pushCleanup(EHCleanup); + CurCoro.Data->CurrentAwaitKind = AwaitKind::Init; + EmitStmt(S.getInitSuspendStmt()); CurCoro.Data->FinalJD = getJumpDestInCurrentScope(FinalBB); - // FIXME: Emit initial suspend and more before the body. - CurCoro.Data->CurrentAwaitKind = AwaitKind::Normal; - EmitStmt(S.getBody()); + + if (auto *OnException = S.getExceptionHandler()) { + auto Loc = S.getLocStart(); + CXXCatchStmt Catch(Loc, /*exDecl=*/nullptr, OnException); + auto *TryStmt = CXXTryStmt::Create(getContext(), Loc, S.getBody(), &Catch); + + EnterCXXTryStmt(*TryStmt); + emitBodyAndFallthrough(*this, S, TryStmt->getTryBlock()); + ExitCXXTryStmt(*TryStmt); + } + else { + emitBodyAndFallthrough(*this, S, S.getBody()); + } // See if we need to generate final suspend. const bool CanFallthrough = Builder.GetInsertBlock(); const bool HasCoreturns = CurCoro.Data->CoreturnCount > 0; if (CanFallthrough || HasCoreturns) { EmitBlock(FinalBB); - // FIXME: Emit final suspend. + CurCoro.Data->CurrentAwaitKind = AwaitKind::Final; + EmitStmt(S.getFinalSuspendStmt()); + } + else { + // We don't need FinalBB. Emit it to make sure the block is deleted. + EmitBlock(FinalBB, /*IsFinished=*/true); } } EmitBlock(RetBB); + // Emit coro.end before getReturnStmt (and parameter destructors), since + // resume and destroy parts of the coroutine should not include them. llvm::Function *CoroEnd = CGM.getIntrinsic(llvm::Intrinsic::coro_end); Builder.CreateCall(CoroEnd, {NullPtr, Builder.getFalse()}); - // FIXME: Emit return for the coroutine return object. + if (Stmt *Ret = S.getReturnStmt()) + EmitStmt(Ret); } // Emit coroutine intrinsic and patch up arguments of the token type. @@ -342,6 +602,17 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, switch (IID) { default: break; + // The coro.frame builtin is replaced with an SSA value of the coro.begin + // intrinsic. + case llvm::Intrinsic::coro_frame: { + if (CurCoro.Data && CurCoro.Data->CoroBegin) { + return RValue::get(CurCoro.Data->CoroBegin); + } + CGM.Error(E->getLocStart(), "this builtin expect that __builtin_coro_begin " + "has been used earlier in this function"); + auto NullPtr = llvm::ConstantPointerNull::get(Builder.getInt8PtrTy()); + return RValue::get(NullPtr); + } // The following three intrinsics take a token parameter referring to a token // returned by earlier call to @llvm.coro.id. Since we cannot represent it in // builtins, we patch it up here. @@ -368,10 +639,22 @@ RValue CodeGenFunction::EmitCoroutineIntrinsic(const CallExpr *E, llvm::Value *F = CGM.getIntrinsic(IID); llvm::CallInst *Call = Builder.CreateCall(F, Args); + // Note: The following code is to enable to emit coro.id and coro.begin by + // hand to experiment with coroutines in C. // If we see @llvm.coro.id remember it in the CoroData. We will update // coro.alloc, coro.begin and coro.free intrinsics to refer to it. if (IID == llvm::Intrinsic::coro_id) { createCoroData(*this, CurCoro, Call, E); } + else if (IID == llvm::Intrinsic::coro_begin) { + if (CurCoro.Data) + CurCoro.Data->CoroBegin = Call; + } + else if (IID == llvm::Intrinsic::coro_free) { + // Remember the last coro_free as we need it to build the conditional + // deletion of the coroutine frame. + if (CurCoro.Data) + CurCoro.Data->LastCoroFree = Call; + } return RValue::get(Call); } diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp index a6d5dd85c23..b918a663ce5 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGExpr.cpp @@ -1432,11 +1432,12 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); } if (TBAAInfo) { - llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, - TBAAOffset); - if (TBAAPath) - CGM.DecorateInstructionWithTBAA(Load, TBAAPath, - false /*ConvertTypeToTag*/); + bool MayAlias = BaseInfo.getMayAlias(); + llvm::MDNode *TBAA = MayAlias + ? CGM.getTBAAInfo(getContext().CharTy) + : CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); + if (TBAA) + CGM.DecorateInstructionWithTBAA(Load, TBAA, MayAlias); } if (EmitScalarRangeCheck(Load, Ty, Loc)) { @@ -1522,11 +1523,12 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, Address Addr, Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node); } if (TBAAInfo) { - llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, - TBAAOffset); - if (TBAAPath) - CGM.DecorateInstructionWithTBAA(Store, TBAAPath, - false /*ConvertTypeToTag*/); + bool MayAlias = BaseInfo.getMayAlias(); + llvm::MDNode *TBAA = MayAlias + ? CGM.getTBAAInfo(getContext().CharTy) + : CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo, TBAAOffset); + if (TBAA) + CGM.DecorateInstructionWithTBAA(Store, TBAA, MayAlias); } } @@ -3535,6 +3537,11 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, getFieldAlignmentSource(BaseInfo.getAlignmentSource()); LValueBaseInfo FieldBaseInfo(fieldAlignSource, BaseInfo.getMayAlias()); + const RecordDecl *rec = field->getParent(); + if (rec->isUnion() || rec->hasAttr()) + FieldBaseInfo.setMayAlias(true); + bool mayAlias = FieldBaseInfo.getMayAlias(); + if (field->isBitField()) { const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(field->getParent()); @@ -3556,11 +3563,7 @@ LValue CodeGenFunction::EmitLValueForField(LValue base, return LValue::MakeBitfield(Addr, Info, fieldType, FieldBaseInfo); } - const RecordDecl *rec = field->getParent(); QualType type = field->getType(); - - bool mayAlias = rec->hasAttr(); - Address addr = base.getAddress(); unsigned cvr = base.getVRQualifiers(); bool TBAAPath = CGM.getCodeGenOpts().StructPathTBAA; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 601d662e55e..63300e25d37 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -760,6 +760,7 @@ emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule()); CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo); Fn->removeFnAttr(llvm::Attribute::NoInline); + Fn->removeFnAttr(llvm::Attribute::OptimizeNone); Fn->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. @@ -2903,6 +2904,19 @@ CGOpenMPRuntime::createOffloadingBinaryDescriptorRegistration() { Desc); CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc); }); + if (CGM.supportsCOMDAT()) { + // It is sufficient to call registration function only once, so create a + // COMDAT group for registration/unregistration functions and associated + // data. That would reduce startup time and code size. Registration + // function serves as a COMDAT group key. + auto ComdatKey = M.getOrInsertComdat(RegFn->getName()); + RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage); + RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility); + RegFn->setComdat(ComdatKey); + UnRegFn->setComdat(ComdatKey); + DeviceImages->setComdat(ComdatKey); + Desc->setComdat(ComdatKey); + } return RegFn; } @@ -3502,6 +3516,7 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap, TaskPrivatesMapFnInfo); TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); + TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); CodeGenFunction CGF(CGM); CGF.disableDebugInfo(); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index c3391d087b7..bbedac962d5 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -861,6 +861,7 @@ llvm::Value *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction( D, ThreadIDVar, InnermostKind, CodeGen); llvm::Function *OutlinedFun = cast(OutlinedFunVal); OutlinedFun->removeFnAttr(llvm::Attribute::NoInline); + OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone); OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline); return OutlinedFun; @@ -1243,10 +1244,10 @@ static void emitReductionListCopy( /// local = local @ remote /// else /// local = remote -llvm::Value *emitReduceScratchpadFunction(CodeGenModule &CGM, - ArrayRef Privates, - QualType ReductionArrayTy, - llvm::Value *ReduceFn) { +static llvm::Value * +emitReduceScratchpadFunction(CodeGenModule &CGM, + ArrayRef Privates, + QualType ReductionArrayTy, llvm::Value *ReduceFn) { auto &C = CGM.getContext(); auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); @@ -1372,9 +1373,9 @@ llvm::Value *emitReduceScratchpadFunction(CodeGenModule &CGM, /// for elem in Reduce List: /// scratchpad[elem_id][index] = elem /// -llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM, - ArrayRef Privates, - QualType ReductionArrayTy) { +static llvm::Value *emitCopyToScratchpad(CodeGenModule &CGM, + ArrayRef Privates, + QualType ReductionArrayTy) { auto &C = CGM.getContext(); auto Int32Ty = C.getIntTypeForBitwidth(32, /* Signed */ true); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp index 9d0f802ece0..e4e5fce0227 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenModule.cpp @@ -400,8 +400,11 @@ void CodeGenModule::Release() { } if (OpenMPRuntime) if (llvm::Function *OpenMPRegistrationFunction = - OpenMPRuntime->emitRegistrationFunction()) - AddGlobalCtor(OpenMPRegistrationFunction, 0); + OpenMPRuntime->emitRegistrationFunction()) { + auto ComdatKey = OpenMPRegistrationFunction->hasComdat() ? + OpenMPRegistrationFunction : nullptr; + AddGlobalCtor(OpenMPRegistrationFunction, 0, ComdatKey); + } if (PGOReader) { getModule().setProfileSummary(PGOReader->getSummary().getMD(VMContext)); if (PGOStats.hasDiagnostics()) @@ -904,7 +907,16 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, return; } - if (D->hasAttr()) { + // Track whether we need to add the optnone LLVM attribute, + // starting with the default for this optimization level. + bool ShouldAddOptNone = + !CodeGenOpts.DisableO0ImplyOptNone && CodeGenOpts.OptimizationLevel == 0; + // We can't add optnone in the following cases, it won't pass the verifier. + ShouldAddOptNone &= !D->hasAttr(); + ShouldAddOptNone &= !F->hasFnAttribute(llvm::Attribute::AlwaysInline); + ShouldAddOptNone &= !D->hasAttr(); + + if (ShouldAddOptNone || D->hasAttr()) { B.addAttribute(llvm::Attribute::OptimizeNone); // OptimizeNone implies noinline; we should not be inlining such functions. @@ -958,7 +970,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // function. if (!D->hasAttr()) { if (D->hasAttr()) { - B.addAttribute(llvm::Attribute::OptimizeForSize); + if (!ShouldAddOptNone) + B.addAttribute(llvm::Attribute::OptimizeForSize); B.addAttribute(llvm::Attribute::Cold); } @@ -1508,6 +1521,10 @@ bool CodeGenModule::imbueXRayAttrs(llvm::Function *Fn, SourceLocation Loc, case ImbueAttr::ALWAYS: Fn->addFnAttr("function-instrument", "xray-always"); break; + case ImbueAttr::ALWAYS_ARG1: + Fn->addFnAttr("function-instrument", "xray-always"); + Fn->addFnAttr("xray-log-args", "1"); + break; case ImbueAttr::NEVER: Fn->addFnAttr("function-instrument", "xray-never"); break; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp index 83aa1fdf572..d0ba74119b7 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/TargetInfo.cpp @@ -4821,6 +4821,9 @@ private: bool isSwiftErrorInRegister() const override { return true; } + + bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, + unsigned elts) const override; }; class AArch64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -4994,6 +4997,17 @@ bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const { return false; } +bool AArch64ABIInfo::isLegalVectorTypeForSwift(CharUnits totalSize, + llvm::Type *eltTy, + unsigned elts) const { + if (!llvm::isPowerOf2_32(elts)) + return false; + if (totalSize.getQuantity() != 8 && + (totalSize.getQuantity() != 16 || elts == 1)) + return false; + return true; +} + bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS64 must have base types of a floating // point type or a short-vector type. This is the same as the 32-bit ABI, @@ -5382,6 +5396,8 @@ private: bool isSwiftErrorInRegister() const override { return true; } + bool isLegalVectorTypeForSwift(CharUnits totalSize, llvm::Type *eltTy, + unsigned elts) const override; }; class ARMTargetCodeGenInfo : public TargetCodeGenInfo { @@ -5894,6 +5910,20 @@ bool ARMABIInfo::isIllegalVectorType(QualType Ty) const { return false; } +bool ARMABIInfo::isLegalVectorTypeForSwift(CharUnits vectorSize, + llvm::Type *eltTy, + unsigned numElts) const { + if (!llvm::isPowerOf2_32(numElts)) + return false; + unsigned size = getDataLayout().getTypeStoreSizeInBits(eltTy); + if (size > 64) + return false; + if (vectorSize.getQuantity() != 8 && + (vectorSize.getQuantity() != 16 || numElts == 1)) + return false; + return true; +} + bool ARMABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // Homogeneous aggregates for AAPCS-VFP must have base types of float, // double, or 64-bit or 128-bit vectors. diff --git a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp index f36deff5d73..d07ac96a310 100644 --- a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp @@ -22,6 +22,7 @@ #include "ToolChains/FreeBSD.h" #include "ToolChains/Fuchsia.h" #include "ToolChains/Gnu.h" +#include "ToolChains/BareMetal.h" #include "ToolChains/Haiku.h" #include "ToolChains/Hexagon.h" #include "ToolChains/Lanai.h" @@ -598,6 +599,8 @@ Compilation *Driver::BuildCompilation(ArrayRef ArgList) { bool CCCPrintPhases; InputArgList Args = ParseArgStrings(ArgList.slice(1)); + if (Diags.hasErrorOccurred()) + return nullptr; // Silence driver warnings if requested Diags.setIgnoreAllWarnings(Args.hasArg(options::OPT_w)); @@ -1216,6 +1219,13 @@ bool Driver::HandleImmediateArgs(const Compilation &C) { return false; } + if (Arg *A = C.getArgs().getLastArg(options::OPT_autocomplete)) { + // Print out all options that start with a given argument. This is used for + // shell autocompletion. + llvm::outs() << llvm::join(Opts->findByPrefix(A->getValue()), " ") << '\n'; + return false; + } + if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) { ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs()); switch (RLT) { @@ -3819,6 +3829,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, if (Target.getVendor() == llvm::Triple::Myriad) TC = llvm::make_unique(*this, Target, Args); + else if (toolchains::BareMetal::handlesTarget(Target)) + TC = llvm::make_unique(*this, Target, Args); else if (Target.isOSBinFormatELF()) TC = llvm::make_unique(*this, Target, Args); else if (Target.isOSBinFormatMachO()) diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/BareMetal.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/BareMetal.cpp new file mode 100644 index 00000000000..66246f6d71c --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -0,0 +1,210 @@ +//===--- BaremMetal.cpp - Bare Metal ToolChain ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "BareMetal.h" + +#include "CommonArgs.h" +#include "InputInfo.h" +#include "Gnu.h" + +#include "clang/Basic/VirtualFileSystem.h" +#include "clang/Driver/Compilation.h" +#include "clang/Driver/Driver.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::opt; +using namespace clang; +using namespace clang::driver; +using namespace clang::driver::tools; +using namespace clang::driver::toolchains; + +BareMetal::BareMetal(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args) + : ToolChain(D, Triple, Args) { + getProgramPaths().push_back(getDriver().getInstalledDir()); + if (getDriver().getInstalledDir() != getDriver().Dir) + getProgramPaths().push_back(getDriver().Dir); +} + +BareMetal::~BareMetal() {} + +/// Is the triple {arm,thumb}-none-none-{eabi,eabihf} ? +static bool isARMBareMetal(const llvm::Triple &Triple) { + if (Triple.getArch() != llvm::Triple::arm && + Triple.getArch() != llvm::Triple::thumb) + return false; + + if (Triple.getVendor() != llvm::Triple::UnknownVendor) + return false; + + if (Triple.getOS() != llvm::Triple::UnknownOS) + return false; + + if (Triple.getEnvironment() != llvm::Triple::EABI && + Triple.getEnvironment() != llvm::Triple::EABIHF) + return false; + + return true; +} + +bool BareMetal::handlesTarget(const llvm::Triple &Triple) { + return isARMBareMetal(Triple); +} + +Tool *BareMetal::buildLinker() const { + return new tools::baremetal::Linker(*this); +} + +std::string BareMetal::getThreadModel() const { + return "single"; +} + +bool BareMetal::isThreadModelSupported(const StringRef Model) const { + return Model == "single"; +} + +std::string BareMetal::getRuntimesDir() const { + SmallString<128> Dir(getDriver().ResourceDir); + llvm::sys::path::append(Dir, "lib", "baremetal"); + return Dir.str(); +} + +void BareMetal::AddClangSystemIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + if (DriverArgs.hasArg(options::OPT_nostdinc)) + return; + + if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { + SmallString<128> Dir(getDriver().ResourceDir); + llvm::sys::path::append(Dir, "include"); + addSystemInclude(DriverArgs, CC1Args, Dir.str()); + } + + if (!DriverArgs.hasArg(options::OPT_nostdlibinc)) { + SmallString<128> Dir(getDriver().SysRoot); + llvm::sys::path::append(Dir, "include"); + addSystemInclude(DriverArgs, CC1Args, Dir.str()); + } +} + +void BareMetal::addClangTargetOptions(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + CC1Args.push_back("-nostdsysteminc"); +} + +std::string BareMetal::findLibCxxIncludePath(CXXStdlibType LibType) const { + StringRef SysRoot = getDriver().SysRoot; + if (SysRoot.empty()) + return ""; + + switch (LibType) { + case ToolChain::CST_Libcxx: { + SmallString<128> Dir(SysRoot); + llvm::sys::path::append(Dir, "include", "c++", "v1"); + return Dir.str(); + } + case ToolChain::CST_Libstdcxx: { + SmallString<128> Dir(SysRoot); + llvm::sys::path::append(Dir, "include", "c++"); + std::error_code EC; + Generic_GCC::GCCVersion Version = {"", -1, -1, -1, "", "", ""}; + // Walk the subdirs, and find the one with the newest gcc version: + for (vfs::directory_iterator LI = + getDriver().getVFS().dir_begin(Dir.str(), EC), LE; + !EC && LI != LE; LI = LI.increment(EC)) { + StringRef VersionText = llvm::sys::path::filename(LI->getName()); + auto CandidateVersion = Generic_GCC::GCCVersion::Parse(VersionText); + if (CandidateVersion.Major == -1) + continue; + if (CandidateVersion <= Version) + continue; + Version = CandidateVersion; + } + if (Version.Major == -1) + return ""; + llvm::sys::path::append(Dir, Version.Text); + return Dir.str(); + } + } + llvm_unreachable("unhandled LibType"); +} + +void BareMetal::AddClangCXXStdlibIncludeArgs( + const ArgList &DriverArgs, ArgStringList &CC1Args) const { + if (DriverArgs.hasArg(options::OPT_nostdinc) || + DriverArgs.hasArg(options::OPT_nostdlibinc) || + DriverArgs.hasArg(options::OPT_nostdincxx)) + return; + + std::string Path = findLibCxxIncludePath(GetCXXStdlibType(DriverArgs)); + if (!Path.empty()) + addSystemInclude(DriverArgs, CC1Args, Path); +} + +void BareMetal::AddCXXStdlibLibArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + switch (GetCXXStdlibType(Args)) { + case ToolChain::CST_Libcxx: + CmdArgs.push_back("-lc++"); + CmdArgs.push_back("-lc++abi"); + break; + case ToolChain::CST_Libstdcxx: + CmdArgs.push_back("-lstdc++"); + CmdArgs.push_back("-lsupc++"); + break; + } + CmdArgs.push_back("-lunwind"); +} + +void BareMetal::AddLinkRuntimeLib(const ArgList &Args, + ArgStringList &CmdArgs) const { + CmdArgs.push_back(Args.MakeArgString("-lclang_rt.builtins-" + + getTriple().getArchName() + ".a")); +} + +void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, + const InputInfoList &Inputs, + const ArgList &Args, + const char *LinkingOutput) const { + ArgStringList CmdArgs; + + auto &TC = static_cast(getToolChain()); + + AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); + + CmdArgs.push_back("-Bstatic"); + + CmdArgs.push_back(Args.MakeArgString("-L" + TC.getRuntimesDir())); + + Args.AddAllArgs(CmdArgs, {options::OPT_L, options::OPT_T_Group, + options::OPT_e, options::OPT_s, options::OPT_t, + options::OPT_Z_Flag, options::OPT_r}); + + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { + if (C.getDriver().CCCIsCXX()) + TC.AddCXXStdlibLibArgs(Args, CmdArgs); + + CmdArgs.push_back("-lc"); + CmdArgs.push_back("-lm"); + + TC.AddLinkRuntimeLib(Args, CmdArgs); + } + + CmdArgs.push_back("-o"); + CmdArgs.push_back(Output.getFilename()); + + C.addCommand(llvm::make_unique(JA, *this, + Args.MakeArgString(TC.GetLinkerPath()), + CmdArgs, Inputs)); +} diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/BareMetal.h b/contrib/llvm/tools/clang/lib/Driver/ToolChains/BareMetal.h new file mode 100644 index 00000000000..064c1199735 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/BareMetal.h @@ -0,0 +1,90 @@ +//===--- BareMetal.h - Bare Metal Tool and ToolChain -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_BAREMETAL_H +#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_BAREMETAL_H + +#include "clang/Driver/Tool.h" +#include "clang/Driver/ToolChain.h" + +#include + +namespace clang { +namespace driver { + +namespace toolchains { + +class LLVM_LIBRARY_VISIBILITY BareMetal : public ToolChain { +public: + BareMetal(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args); + ~BareMetal() override; + + static bool handlesTarget(const llvm::Triple &Triple); +protected: + Tool *buildLinker() const override; + +public: + bool useIntegratedAs() const override { return true; } + bool isCrossCompiling() const override { return true; } + bool isPICDefault() const override { return false; } + bool isPIEDefault() const override { return false; } + bool isPICDefaultForced() const override { return false; } + bool SupportsProfiling() const override { return false; } + bool SupportsObjCGC() const override { return false; } + std::string getThreadModel() const override; + bool isThreadModelSupported(const StringRef Model) const override; + + RuntimeLibType GetDefaultRuntimeLibType() const override { + return ToolChain::RLT_CompilerRT; + } + CXXStdlibType GetDefaultCXXStdlibType() const override { + return ToolChain::CST_Libcxx; + } + + const char *getDefaultLinker() const override { return "ld.lld"; } + + std::string getRuntimesDir() const; + void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + std::string findLibCxxIncludePath(ToolChain::CXXStdlibType LibType) const; + void AddClangCXXStdlibIncludeArgs( + const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const override; + void AddLinkRuntimeLib(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; +}; + +} // namespace toolchains + +namespace tools { +namespace baremetal { + +class LLVM_LIBRARY_VISIBILITY Linker : public Tool { +public: + Linker(const ToolChain &TC) : Tool("baremetal::Linker", "ld.lld", TC) {} + bool isLinkJob() const override { return true; } + bool hasIntegratedCPP() const override { return false; } + void ConstructJob(Compilation &C, const JobAction &JA, + const InputInfo &Output, const InputInfoList &Inputs, + const llvm::opt::ArgList &TCArgs, + const char *LinkingOutput) const override; +}; + +} // namespace baremetal +} // namespace tools + +} // namespace driver +} // namespace clang + +#endif diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Gnu.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Gnu.cpp index f1015e62eec..1a398fd8a77 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Gnu.cpp @@ -1598,6 +1598,49 @@ bool Generic_GCC::GCCVersion::isOlderThan(int RHSMajor, int RHSMinor, return false; } +/// \brief Parse a GCCVersion object out of a string of text. +/// +/// This is the primary means of forming GCCVersion objects. +/*static*/ +Generic_GCC::GCCVersion Generic_GCC::GCCVersion::Parse(StringRef VersionText) { + const GCCVersion BadVersion = {VersionText.str(), -1, -1, -1, "", "", ""}; + std::pair First = VersionText.split('.'); + std::pair Second = First.second.split('.'); + + GCCVersion GoodVersion = {VersionText.str(), -1, -1, -1, "", "", ""}; + if (First.first.getAsInteger(10, GoodVersion.Major) || GoodVersion.Major < 0) + return BadVersion; + GoodVersion.MajorStr = First.first.str(); + if (First.second.empty()) + return GoodVersion; + if (Second.first.getAsInteger(10, GoodVersion.Minor) || GoodVersion.Minor < 0) + return BadVersion; + GoodVersion.MinorStr = Second.first.str(); + + // First look for a number prefix and parse that if present. Otherwise just + // stash the entire patch string in the suffix, and leave the number + // unspecified. This covers versions strings such as: + // 5 (handled above) + // 4.4 + // 4.4.0 + // 4.4.x + // 4.4.2-rc4 + // 4.4.x-patched + // And retains any patch number it finds. + StringRef PatchText = GoodVersion.PatchSuffix = Second.second.str(); + if (!PatchText.empty()) { + if (size_t EndNumber = PatchText.find_first_not_of("0123456789")) { + // Try to parse the number and any suffix. + if (PatchText.slice(0, EndNumber).getAsInteger(10, GoodVersion.Patch) || + GoodVersion.Patch < 0) + return BadVersion; + GoodVersion.PatchSuffix = PatchText.substr(EndNumber); + } + } + + return GoodVersion; +} + static llvm::StringRef getGCCToolchainDir(const ArgList &Args) { const Arg *A = Args.getLastArg(clang::driver::options::OPT_gcc_toolchain); if (A) diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Linux.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Linux.cpp index 50443a12524..9da366eb55f 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Linux.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Linux.cpp @@ -372,49 +372,6 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) addPathIfExists(D, SysRoot + "/usr/lib", Paths); } -/// \brief Parse a GCCVersion object out of a string of text. -/// -/// This is the primary means of forming GCCVersion objects. -/*static*/ -Generic_GCC::GCCVersion Linux::GCCVersion::Parse(StringRef VersionText) { - const GCCVersion BadVersion = {VersionText.str(), -1, -1, -1, "", "", ""}; - std::pair First = VersionText.split('.'); - std::pair Second = First.second.split('.'); - - GCCVersion GoodVersion = {VersionText.str(), -1, -1, -1, "", "", ""}; - if (First.first.getAsInteger(10, GoodVersion.Major) || GoodVersion.Major < 0) - return BadVersion; - GoodVersion.MajorStr = First.first.str(); - if (First.second.empty()) - return GoodVersion; - if (Second.first.getAsInteger(10, GoodVersion.Minor) || GoodVersion.Minor < 0) - return BadVersion; - GoodVersion.MinorStr = Second.first.str(); - - // First look for a number prefix and parse that if present. Otherwise just - // stash the entire patch string in the suffix, and leave the number - // unspecified. This covers versions strings such as: - // 5 (handled above) - // 4.4 - // 4.4.0 - // 4.4.x - // 4.4.2-rc4 - // 4.4.x-patched - // And retains any patch number it finds. - StringRef PatchText = GoodVersion.PatchSuffix = Second.second.str(); - if (!PatchText.empty()) { - if (size_t EndNumber = PatchText.find_first_not_of("0123456789")) { - // Try to parse the number and any suffix. - if (PatchText.slice(0, EndNumber).getAsInteger(10, GoodVersion.Patch) || - GoodVersion.Patch < 0) - return BadVersion; - GoodVersion.PatchSuffix = PatchText.substr(EndNumber); - } - } - - return GoodVersion; -} - bool Linux::HasNativeLLVMSupport() const { return true; } Tool *Linux::buildLinker() const { return new tools::gnutools::Linker(*this); } diff --git a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Myriad.cpp b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Myriad.cpp index f70ce93c45c..6fdb5a2248d 100644 --- a/contrib/llvm/tools/clang/lib/Driver/ToolChains/Myriad.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/ToolChains/Myriad.cpp @@ -217,6 +217,7 @@ MyriadToolChain::MyriadToolChain(const Driver &D, const llvm::Triple &Triple, default: D.Diag(clang::diag::err_target_unsupported_arch) << Triple.getArchName() << "myriad"; + LLVM_FALLTHROUGH; case llvm::Triple::sparc: case llvm::Triple::sparcel: case llvm::Triple::shave: diff --git a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp index 006a9710148..ae1af753bf4 100644 --- a/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp +++ b/contrib/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp @@ -54,13 +54,14 @@ static bool startsNextParameter(const FormatToken &Current, const FormatStyle &Style) { const FormatToken &Previous = *Current.Previous; if (Current.is(TT_CtorInitializerComma) && - Style.BreakConstructorInitializersBeforeComma) + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) return true; return Previous.is(tok::comma) && !Current.isTrailingComment() && ((Previous.isNot(TT_CtorInitializerComma) || - !Style.BreakConstructorInitializersBeforeComma) && + Style.BreakConstructorInitializers != + FormatStyle::BCIS_BeforeComma) && (Previous.isNot(TT_InheritanceComma) || - !Style.BreakBeforeInheritanceComma)); + !Style.BreakBeforeInheritanceComma)); } ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, @@ -178,13 +179,20 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { getLengthToMatchingParen(Previous) + State.Column - 1 > getColumnLimit(State)) return true; - if (Current.is(TT_CtorInitializerColon) && - (State.Column + State.Line->Last->TotalLength - Current.TotalLength + 2 > + + const FormatToken &BreakConstructorInitializersToken = + Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon + ? Previous + : Current; + if (BreakConstructorInitializersToken.is(TT_CtorInitializerColon) && + (State.Column + State.Line->Last->TotalLength - Previous.TotalLength > getColumnLimit(State) || State.Stack.back().BreakBeforeParameter) && - ((Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All) || - Style.BreakConstructorInitializersBeforeComma || Style.ColumnLimit != 0)) + (Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All || + Style.BreakConstructorInitializers != FormatStyle::BCIS_BeforeColon || + Style.ColumnLimit != 0)) return true; + if (Current.is(TT_ObjCMethodExpr) && !Previous.is(TT_SelectorName) && State.Line->startsWith(TT_ObjCMethodSpecifier)) return true; @@ -207,7 +215,7 @@ bool ContinuationIndenter::mustBreak(const LineState &State) { // ... // }.bind(...)); // FIXME: We should find a more generic solution to this problem. - !(State.Column <= NewLineColumn && Previous.isNot(tok::r_paren) && + !(State.Column <= NewLineColumn && Style.Language == FormatStyle::LK_JavaScript)) return true; @@ -455,6 +463,11 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, !Previous.is(TT_OverloadedOperator)) || (Previous.is(tok::colon) && Previous.is(TT_ObjCMethodExpr)))) { State.Stack.back().LastSpace = State.Column; + } else if (Previous.is(TT_CtorInitializerColon) && + Style.BreakConstructorInitializers == + FormatStyle::BCIS_AfterColon) { + State.Stack.back().Indent = State.Column; + State.Stack.back().LastSpace = State.Column; } else if ((Previous.isOneOf(TT_BinaryOperator, TT_ConditionalExpr, TT_CtorInitializerColon)) && ((Previous.getPrecedence() != prec::Assignment && @@ -614,7 +627,7 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, State.Stack[i].BreakBeforeParameter = true; if (PreviousNonComment && - !PreviousNonComment->isOneOf(tok::comma, tok::semi) && + !PreviousNonComment->isOneOf(tok::comma, tok::colon, tok::semi) && (PreviousNonComment->isNot(TT_TemplateCloser) || Current.NestingLevel != 0) && !PreviousNonComment->isOneOf( @@ -676,7 +689,18 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { return State.Stack[State.Stack.size() - 2].LastSpace; return State.FirstIndent; } - if (Current.is(tok::r_paren) && State.Stack.size() > 1) + // Indent a closing parenthesis at the previous level if followed by a semi or + // opening brace. This allows indentations such as: + // foo( + // a, + // ); + // function foo( + // a, + // ) { + // code(); // + // } + if (Current.is(tok::r_paren) && State.Stack.size() > 1 && + (!Current.Next || Current.Next->isOneOf(tok::semi, tok::l_brace))) return State.Stack[State.Stack.size() - 2].LastSpace; if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope()) return State.Stack[State.Stack.size() - 2].LastSpace; @@ -750,6 +774,9 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { return ContinuationIndent; if (NextNonComment->is(TT_CtorInitializerComma)) return State.Stack.back().Indent; + if (PreviousNonComment && PreviousNonComment->is(TT_CtorInitializerColon) && + Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) + return State.Stack.back().Indent; if (NextNonComment->isOneOf(TT_CtorInitializerColon, TT_InheritanceColon, TT_InheritanceComma)) return State.FirstIndent + Style.ConstructorInitializerIndentWidth; @@ -810,19 +837,29 @@ unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, State.FirstIndent + Style.ContinuationIndentWidth; } } - if (Current.is(TT_CtorInitializerColon)) { + if (Current.is(TT_CtorInitializerColon) && + Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon) { // Indent 2 from the column, so: // SomeClass::SomeClass() // : First(...), ... // Next(...) // ^ line up here. State.Stack.back().Indent = - State.Column + (Style.BreakConstructorInitializersBeforeComma ? 0 : 2); + State.Column + (Style.BreakConstructorInitializers == + FormatStyle::BCIS_BeforeComma ? 0 : 2); State.Stack.back().NestedBlockIndent = State.Stack.back().Indent; if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) State.Stack.back().AvoidBinPacking = true; State.Stack.back().BreakBeforeParameter = false; } + if (Current.is(TT_CtorInitializerColon) && + Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) { + State.Stack.back().Indent = + State.FirstIndent + Style.ConstructorInitializerIndentWidth; + State.Stack.back().NestedBlockIndent = State.Stack.back().Indent; + if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) + State.Stack.back().AvoidBinPacking = true; + } if (Current.is(TT_InheritanceColon)) State.Stack.back().Indent = State.FirstIndent + Style.ContinuationIndentWidth; diff --git a/contrib/llvm/tools/clang/lib/Format/Format.cpp b/contrib/llvm/tools/clang/lib/Format/Format.cpp index ac83379e6b1..2ef6516e02e 100644 --- a/contrib/llvm/tools/clang/lib/Format/Format.cpp +++ b/contrib/llvm/tools/clang/lib/Format/Format.cpp @@ -123,6 +123,14 @@ template <> struct ScalarEnumerationTraits { } }; +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &IO, FormatStyle::BreakConstructorInitializersStyle &Value) { + IO.enumCase(Value, "BeforeColon", FormatStyle::BCIS_BeforeColon); + IO.enumCase(Value, "BeforeComma", FormatStyle::BCIS_BeforeComma); + IO.enumCase(Value, "AfterColon", FormatStyle::BCIS_AfterColon); + } +}; + template <> struct ScalarEnumerationTraits { static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) { @@ -304,8 +312,19 @@ template <> struct MappingTraits { IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); IO.mapOptional("BreakBeforeTernaryOperators", Style.BreakBeforeTernaryOperators); + + bool BreakConstructorInitializersBeforeComma = false; IO.mapOptional("BreakConstructorInitializersBeforeComma", - Style.BreakConstructorInitializersBeforeComma); + BreakConstructorInitializersBeforeComma); + IO.mapOptional("BreakConstructorInitializers", + Style.BreakConstructorInitializers); + // If BreakConstructorInitializersBeforeComma was specified but + // BreakConstructorInitializers was not, initialize the latter from the + // former for backwards compatibility. + if (BreakConstructorInitializersBeforeComma && + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon) + Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; + IO.mapOptional("BreakAfterJavaFieldAnnotations", Style.BreakAfterJavaFieldAnnotations); IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals); @@ -537,7 +556,7 @@ FormatStyle getLLVMStyle() { LLVMStyle.BraceWrapping = {false, false, false, false, false, false, false, false, false, false, false}; LLVMStyle.BreakAfterJavaFieldAnnotations = false; - LLVMStyle.BreakConstructorInitializersBeforeComma = false; + LLVMStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon; LLVMStyle.BreakBeforeInheritanceComma = false; LLVMStyle.BreakStringLiterals = true; LLVMStyle.ColumnLimit = 80; @@ -694,7 +713,7 @@ FormatStyle getMozillaStyle() { MozillaStyle.BinPackParameters = false; MozillaStyle.BinPackArguments = false; MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla; - MozillaStyle.BreakConstructorInitializersBeforeComma = true; + MozillaStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; MozillaStyle.BreakBeforeInheritanceComma = true; MozillaStyle.ConstructorInitializerIndentWidth = 2; MozillaStyle.ContinuationIndentWidth = 2; @@ -717,7 +736,7 @@ FormatStyle getWebKitStyle() { Style.AlignTrailingComments = false; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; Style.BreakBeforeBraces = FormatStyle::BS_WebKit; - Style.BreakConstructorInitializersBeforeComma = true; + Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; Style.Cpp11BracedListStyle = false; Style.ColumnLimit = 0; Style.FixNamespaceComments = false; @@ -1891,6 +1910,9 @@ tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, ArrayRef Ranges, StringRef FileName) { + // cleanups only apply to C++ (they mostly concern ctor commas etc.) + if (Style.Language != FormatStyle::LK_Cpp) + return tooling::Replacements(); std::unique_ptr Env = Environment::CreateVirtualEnvironment(Code, FileName, Ranges); Cleaner Clean(*Env, Style); diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp index 79f438eb0f8..2af931cdf1b 100644 --- a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp +++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp @@ -1996,7 +1996,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(tok::comment)) return 1000; - if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon)) + if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon, TT_CtorInitializerColon)) return 2; if (Right.isMemberAccess()) { @@ -2514,8 +2514,12 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, Right.Previous->MatchingParen->NestingLevel == 0 && Style.AlwaysBreakTemplateDeclarations) return true; - if ((Right.isOneOf(TT_CtorInitializerComma, TT_CtorInitializerColon)) && - Style.BreakConstructorInitializersBeforeComma && + if (Right.is(TT_CtorInitializerComma) && + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma && + !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) + return true; + if (Right.is(TT_CtorInitializerColon) && + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma && !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) return true; // Break only if we have multiple inheritance. @@ -2625,7 +2629,10 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, // The first comment in a braced lists is always interpreted as belonging to // the first list element. Otherwise, it should be placed outside of the // list. - return Left.BlockKind == BK_BracedInit; + return Left.BlockKind == BK_BracedInit || + (Left.is(TT_CtorInitializerColon) && + Style.BreakConstructorInitializers == + FormatStyle::BCIS_AfterColon); if (Left.is(tok::question) && Right.is(tok::colon)) return false; if (Right.is(TT_ConditionalExpr) || Right.is(tok::question)) @@ -2698,11 +2705,15 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) return true; + if (Left.is(TT_CtorInitializerColon)) + return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon; + if (Right.is(TT_CtorInitializerColon)) + return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon; if (Left.is(TT_CtorInitializerComma) && - Style.BreakConstructorInitializersBeforeComma) + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) return false; if (Right.is(TT_CtorInitializerComma) && - Style.BreakConstructorInitializersBeforeComma) + Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) return true; if (Left.is(TT_InheritanceComma) && Style.BreakBeforeInheritanceComma) return false; diff --git a/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp b/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp index 6ee211c2de6..d660638a1e8 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/ASTUnit.cpp @@ -90,6 +90,21 @@ namespace { /// \brief Erase temporary files and the preamble file. void Cleanup(); }; + + template + std::unique_ptr valueOrNull(llvm::ErrorOr> Val) { + if (!Val) + return nullptr; + return std::move(*Val); + } + + template + bool moveOnNoError(llvm::ErrorOr Val, T &Output) { + if (!Val) + return false; + Output = std::move(*Val); + return true; + } } static llvm::sys::SmartMutex &getOnDiskMutex() { @@ -1019,7 +1034,8 @@ static void checkAndSanitizeDiags(SmallVectorImpl & /// \returns True if a failure occurred that causes the ASTUnit not to /// contain any translation-unit information, false otherwise. bool ASTUnit::Parse(std::shared_ptr PCHContainerOps, - std::unique_ptr OverrideMainBuffer) { + std::unique_ptr OverrideMainBuffer, + IntrusiveRefCntPtr VFS) { SavedMainFileBuffer.reset(); if (!Invocation) @@ -1028,6 +1044,12 @@ bool ASTUnit::Parse(std::shared_ptr PCHContainerOps, // Create the compiler instance to use for building the AST. std::unique_ptr Clang( new CompilerInstance(std::move(PCHContainerOps))); + if (FileMgr && VFS) { + assert(VFS == FileMgr->getVirtualFileSystem() && + "VFS passed to Parse and VFS in FileMgr are different"); + } else if (VFS) { + Clang->setVirtualFileSystem(VFS); + } // Recover resources if we crash before exiting this method. llvm::CrashRecoveryContextCleanupRegistrar @@ -1170,7 +1192,8 @@ static std::string GetPreamblePCHPath() { /// that corresponds to the main file along with a pair (bytes, start-of-line) /// that describes the preamble. ASTUnit::ComputedPreamble -ASTUnit::ComputePreamble(CompilerInvocation &Invocation, unsigned MaxLines) { +ASTUnit::ComputePreamble(CompilerInvocation &Invocation, unsigned MaxLines, + IntrusiveRefCntPtr VFS) { FrontendOptions &FrontendOpts = Invocation.getFrontendOpts(); PreprocessorOptions &PreprocessorOpts = Invocation.getPreprocessorOpts(); @@ -1180,28 +1203,32 @@ ASTUnit::ComputePreamble(CompilerInvocation &Invocation, unsigned MaxLines) { llvm::MemoryBuffer *Buffer = nullptr; std::unique_ptr BufferOwner; std::string MainFilePath(FrontendOpts.Inputs[0].getFile()); - llvm::sys::fs::UniqueID MainFileID; - if (!llvm::sys::fs::getUniqueID(MainFilePath, MainFileID)) { + auto MainFileStatus = VFS->status(MainFilePath); + if (MainFileStatus) { + llvm::sys::fs::UniqueID MainFileID = MainFileStatus->getUniqueID(); + // Check whether there is a file-file remapping of the main file for (const auto &RF : PreprocessorOpts.RemappedFiles) { std::string MPath(RF.first); - llvm::sys::fs::UniqueID MID; - if (!llvm::sys::fs::getUniqueID(MPath, MID)) { + auto MPathStatus = VFS->status(MPath); + if (MPathStatus) { + llvm::sys::fs::UniqueID MID = MPathStatus->getUniqueID(); if (MainFileID == MID) { // We found a remapping. Try to load the resulting, remapped source. - BufferOwner = getBufferForFile(RF.second); + BufferOwner = valueOrNull(VFS->getBufferForFile(RF.second)); if (!BufferOwner) return ComputedPreamble(nullptr, nullptr, 0, true); } } } - + // Check whether there is a file-buffer remapping. It supercedes the // file-file remapping. for (const auto &RB : PreprocessorOpts.RemappedFileBuffers) { std::string MPath(RB.first); - llvm::sys::fs::UniqueID MID; - if (!llvm::sys::fs::getUniqueID(MPath, MID)) { + auto MPathStatus = VFS->status(MPath); + if (MPathStatus) { + llvm::sys::fs::UniqueID MID = MPathStatus->getUniqueID(); if (MainFileID == MID) { // We found a remapping. BufferOwner.reset(); @@ -1213,7 +1240,7 @@ ASTUnit::ComputePreamble(CompilerInvocation &Invocation, unsigned MaxLines) { // If the main source file was not remapped, load it now. if (!Buffer && !BufferOwner) { - BufferOwner = getBufferForFile(FrontendOpts.Inputs[0].getFile()); + BufferOwner = valueOrNull(VFS->getBufferForFile(FrontendOpts.Inputs[0].getFile())); if (!BufferOwner) return ComputedPreamble(nullptr, nullptr, 0, true); } @@ -1324,8 +1351,10 @@ makeStandaloneDiagnostic(const LangOptions &LangOpts, std::unique_ptr ASTUnit::getMainBufferWithPrecompiledPreamble( std::shared_ptr PCHContainerOps, - const CompilerInvocation &PreambleInvocationIn, bool AllowRebuild, + const CompilerInvocation &PreambleInvocationIn, + IntrusiveRefCntPtr VFS, bool AllowRebuild, unsigned MaxLines) { + assert(VFS && "VFS is null"); auto PreambleInvocation = std::make_shared(PreambleInvocationIn); @@ -1333,7 +1362,8 @@ ASTUnit::getMainBufferWithPrecompiledPreamble( PreprocessorOptions &PreprocessorOpts = PreambleInvocation->getPreprocessorOpts(); - ComputedPreamble NewPreamble = ComputePreamble(*PreambleInvocation, MaxLines); + ComputedPreamble NewPreamble = + ComputePreamble(*PreambleInvocation, MaxLines, VFS); if (!NewPreamble.Size) { // We couldn't find a preamble in the main source. Clear out the current @@ -1369,7 +1399,7 @@ ASTUnit::getMainBufferWithPrecompiledPreamble( break; vfs::Status Status; - if (FileMgr->getNoncachedStatValue(R.second, Status)) { + if (!moveOnNoError(VFS->status(R.second), Status)) { // If we can't stat the file we're remapping to, assume that something // horrible happened. AnyFileChanged = true; @@ -1386,7 +1416,7 @@ ASTUnit::getMainBufferWithPrecompiledPreamble( break; vfs::Status Status; - if (FileMgr->getNoncachedStatValue(RB.first, Status)) { + if (!moveOnNoError(VFS->status(RB.first), Status)) { AnyFileChanged = true; break; } @@ -1401,7 +1431,7 @@ ASTUnit::getMainBufferWithPrecompiledPreamble( !AnyFileChanged && F != FEnd; ++F) { vfs::Status Status; - if (FileMgr->getNoncachedStatValue(F->first(), Status)) { + if (!moveOnNoError(VFS->status(F->first()), Status)) { // If we can't stat the file, assume that something horrible happened. AnyFileChanged = true; break; @@ -1546,14 +1576,14 @@ ASTUnit::getMainBufferWithPrecompiledPreamble( TopLevelDeclsInPreamble.clear(); PreambleDiagnostics.clear(); - IntrusiveRefCntPtr VFS = - createVFSFromCompilerInvocation(Clang->getInvocation(), getDiagnostics()); + VFS = createVFSFromCompilerInvocation(Clang->getInvocation(), + getDiagnostics(), VFS); if (!VFS) return nullptr; // Create a file manager object to provide access to and cache the filesystem. Clang->setFileManager(new FileManager(Clang->getFileSystemOpts(), VFS)); - + // Create the source manager. Clang->setSourceManager(new SourceManager(getDiagnostics(), Clang->getFileManager())); @@ -1863,10 +1893,13 @@ ASTUnit *ASTUnit::LoadFromCompilerInvocationAction( bool ASTUnit::LoadFromCompilerInvocation( std::shared_ptr PCHContainerOps, - unsigned PrecompilePreambleAfterNParses) { + unsigned PrecompilePreambleAfterNParses, + IntrusiveRefCntPtr VFS) { if (!Invocation) return true; - + + assert(VFS && "VFS is null"); + // We'll manage file buffers ourselves. Invocation->getPreprocessorOpts().RetainRemappedFileBuffers = true; Invocation->getFrontendOpts().DisableFree = false; @@ -1877,19 +1910,19 @@ bool ASTUnit::LoadFromCompilerInvocation( if (PrecompilePreambleAfterNParses > 0) { PreambleRebuildCounter = PrecompilePreambleAfterNParses; OverrideMainBuffer = - getMainBufferWithPrecompiledPreamble(PCHContainerOps, *Invocation); + getMainBufferWithPrecompiledPreamble(PCHContainerOps, *Invocation, VFS); getDiagnostics().Reset(); ProcessWarningOptions(getDiagnostics(), Invocation->getDiagnosticOpts()); } - + SimpleTimer ParsingTimer(WantTiming); ParsingTimer.setOutput("Parsing " + getMainFileName()); - + // Recover resources if we crash before exiting this method. llvm::CrashRecoveryContextCleanupRegistrar MemBufferCleanup(OverrideMainBuffer.get()); - return Parse(std::move(PCHContainerOps), std::move(OverrideMainBuffer)); + return Parse(std::move(PCHContainerOps), std::move(OverrideMainBuffer), VFS); } std::unique_ptr ASTUnit::LoadFromCompilerInvocation( @@ -1923,7 +1956,8 @@ std::unique_ptr ASTUnit::LoadFromCompilerInvocation( DiagCleanup(Diags.get()); if (AST->LoadFromCompilerInvocation(std::move(PCHContainerOps), - PrecompilePreambleAfterNParses)) + PrecompilePreambleAfterNParses, + AST->FileMgr->getVirtualFileSystem())) return nullptr; return AST; } @@ -1938,7 +1972,8 @@ ASTUnit *ASTUnit::LoadFromCommandLine( bool CacheCodeCompletionResults, bool IncludeBriefCommentsInCodeCompletion, bool AllowPCHWithCompilerErrors, bool SkipFunctionBodies, bool UserFilesAreVolatile, bool ForSerialization, - llvm::Optional ModuleFormat, std::unique_ptr *ErrAST) { + llvm::Optional ModuleFormat, std::unique_ptr *ErrAST, + IntrusiveRefCntPtr VFS) { assert(Diags.get() && "no DiagnosticsEngine was provided"); SmallVector StoredDiagnostics; @@ -1979,8 +2014,9 @@ ASTUnit *ASTUnit::LoadFromCommandLine( ConfigureDiags(Diags, *AST, CaptureDiagnostics); AST->Diagnostics = Diags; AST->FileSystemOpts = CI->getFileSystemOpts(); - IntrusiveRefCntPtr VFS = - createVFSFromCompilerInvocation(*CI, *Diags); + if (!VFS) + VFS = vfs::getRealFileSystem(); + VFS = createVFSFromCompilerInvocation(*CI, *Diags, VFS); if (!VFS) return nullptr; AST->FileMgr = new FileManager(AST->FileSystemOpts, VFS); @@ -2006,7 +2042,8 @@ ASTUnit *ASTUnit::LoadFromCommandLine( ASTUnitCleanup(AST.get()); if (AST->LoadFromCompilerInvocation(std::move(PCHContainerOps), - PrecompilePreambleAfterNParses)) { + PrecompilePreambleAfterNParses, + VFS)) { // Some error occurred, if caller wants to examine diagnostics, pass it the // ASTUnit. if (ErrAST) { @@ -2020,10 +2057,16 @@ ASTUnit *ASTUnit::LoadFromCommandLine( } bool ASTUnit::Reparse(std::shared_ptr PCHContainerOps, - ArrayRef RemappedFiles) { + ArrayRef RemappedFiles, + IntrusiveRefCntPtr VFS) { if (!Invocation) return true; + if (!VFS) { + assert(FileMgr && "FileMgr is null on Reparse call"); + VFS = FileMgr->getVirtualFileSystem(); + } + clearFileLevelDecls(); SimpleTimer ParsingTimer(WantTiming); @@ -2045,7 +2088,8 @@ bool ASTUnit::Reparse(std::shared_ptr PCHContainerOps, std::unique_ptr OverrideMainBuffer; if (!getPreambleFile(this).empty() || PreambleRebuildCounter > 0) OverrideMainBuffer = - getMainBufferWithPrecompiledPreamble(PCHContainerOps, *Invocation); + getMainBufferWithPrecompiledPreamble(PCHContainerOps, *Invocation, VFS); + // Clear out the diagnostics state. FileMgr.reset(); @@ -2056,7 +2100,7 @@ bool ASTUnit::Reparse(std::shared_ptr PCHContainerOps, // Parse the sources bool Result = - Parse(std::move(PCHContainerOps), std::move(OverrideMainBuffer)); + Parse(std::move(PCHContainerOps), std::move(OverrideMainBuffer), VFS); // If we're caching global code-completion results, and the top-level // declarations have changed, clear out the code-completion cache. @@ -2414,15 +2458,19 @@ void ASTUnit::CodeComplete( std::unique_ptr OverrideMainBuffer; if (!getPreambleFile(this).empty()) { std::string CompleteFilePath(File); - llvm::sys::fs::UniqueID CompleteFileID; - if (!llvm::sys::fs::getUniqueID(CompleteFilePath, CompleteFileID)) { + auto VFS = FileMgr.getVirtualFileSystem(); + auto CompleteFileStatus = VFS->status(CompleteFilePath); + if (CompleteFileStatus) { + llvm::sys::fs::UniqueID CompleteFileID = CompleteFileStatus->getUniqueID(); + std::string MainPath(OriginalSourceFile); - llvm::sys::fs::UniqueID MainID; - if (!llvm::sys::fs::getUniqueID(MainPath, MainID)) { + auto MainStatus = VFS->status(MainPath); + if (MainStatus) { + llvm::sys::fs::UniqueID MainID = MainStatus->getUniqueID(); if (CompleteFileID == MainID && Line > 1) OverrideMainBuffer = getMainBufferWithPrecompiledPreamble( - PCHContainerOps, Inv, false, Line - 1); + PCHContainerOps, Inv, VFS, false, Line - 1); } } } diff --git a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp index 51147b6f949..7d7e7d49e9f 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/CompilerInvocation.cpp @@ -534,6 +534,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK, Opts.DisableLLVMPasses = Args.hasArg(OPT_disable_llvm_passes); Opts.DisableLifetimeMarkers = Args.hasArg(OPT_disable_lifetimemarkers); + Opts.DisableO0ImplyOptNone = Args.hasArg(OPT_disable_O0_optnone); Opts.DisableRedZone = Args.hasArg(OPT_disable_red_zone); Opts.ForbidGuardVariables = Args.hasArg(OPT_fforbid_guard_variables); Opts.UseRegisterSizedBitfieldAccess = Args.hasArg( @@ -1087,6 +1088,9 @@ bool clang::ParseDiagnosticArgs(DiagnosticOptions &Opts, ArgList &Args, Opts.SpellCheckingLimit = getLastArgIntValue( Args, OPT_fspell_checking_limit, DiagnosticOptions::DefaultSpellCheckingLimit, Diags); + Opts.SnippetLineLimit = getLastArgIntValue( + Args, OPT_fcaret_diagnostics_max_lines, + DiagnosticOptions::DefaultSnippetLineLimit, Diags); Opts.TabStop = getLastArgIntValue(Args, OPT_ftabstop, DiagnosticOptions::DefaultTabStop, Diags); if (Opts.TabStop == 0 || Opts.TabStop > DiagnosticOptions::MaxTabStop) { @@ -2747,15 +2751,22 @@ void BuryPointer(const void *Ptr) { IntrusiveRefCntPtr createVFSFromCompilerInvocation(const CompilerInvocation &CI, DiagnosticsEngine &Diags) { - if (CI.getHeaderSearchOpts().VFSOverlayFiles.empty()) - return vfs::getRealFileSystem(); + return createVFSFromCompilerInvocation(CI, Diags, vfs::getRealFileSystem()); +} - IntrusiveRefCntPtr - Overlay(new vfs::OverlayFileSystem(vfs::getRealFileSystem())); +IntrusiveRefCntPtr +createVFSFromCompilerInvocation(const CompilerInvocation &CI, + DiagnosticsEngine &Diags, + IntrusiveRefCntPtr BaseFS) { + if (CI.getHeaderSearchOpts().VFSOverlayFiles.empty()) + return BaseFS; + + IntrusiveRefCntPtr Overlay( + new vfs::OverlayFileSystem(BaseFS)); // earlier vfs files are on the bottom for (const std::string &File : CI.getHeaderSearchOpts().VFSOverlayFiles) { llvm::ErrorOr> Buffer = - llvm::MemoryBuffer::getFile(File); + BaseFS->getBufferForFile(File); if (!Buffer) { Diags.Report(diag::err_missing_vfs_overlay_file) << File; return IntrusiveRefCntPtr(); diff --git a/contrib/llvm/tools/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp b/contrib/llvm/tools/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp index 16269064b6e..49d459e78c4 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/CreateInvocationFromCommandLine.cpp @@ -52,6 +52,8 @@ std::unique_ptr clang::createInvocationFromCommandLine( TheDriver.setCheckInputsExist(false); std::unique_ptr C(TheDriver.BuildCompilation(Args)); + if (!C) + return nullptr; // Just print the cc1 options if -### was present. if (C->getArgs().hasArg(driver::options::OPT__HASH_HASH_HASH)) { diff --git a/contrib/llvm/tools/clang/lib/Frontend/FrontendAction.cpp b/contrib/llvm/tools/clang/lib/Frontend/FrontendAction.cpp index 1fbb2b054ba..874c1b6be41 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/FrontendAction.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/FrontendAction.cpp @@ -252,7 +252,8 @@ static SourceLocation ReadOriginalFileName(CompilerInstance &CI, if (AddLineNote) CI.getSourceManager().AddLineNote( - LineNoLoc, LineNo, SourceMgr.getLineTableFilenameID(InputFile)); + LineNoLoc, LineNo, SourceMgr.getLineTableFilenameID(InputFile), false, + false, SrcMgr::C_User); return T.getLocation(); } diff --git a/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp b/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp index 9257dcae84c..08befb33c96 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/InitPreprocessor.cpp @@ -535,7 +535,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, if (LangOpts.ConceptsTS) Builder.defineMacro("__cpp_experimental_concepts", "1"); if (LangOpts.CoroutinesTS) - Builder.defineMacro("__cpp_coroutines", "1"); + Builder.defineMacro("__cpp_coroutines", "201703L"); } static void InitializePredefinedMacros(const TargetInfo &TI, diff --git a/contrib/llvm/tools/clang/lib/Frontend/TextDiagnostic.cpp b/contrib/llvm/tools/clang/lib/Frontend/TextDiagnostic.cpp index a4937386b93..a24d5768f55 100644 --- a/contrib/llvm/tools/clang/lib/Frontend/TextDiagnostic.cpp +++ b/contrib/llvm/tools/clang/lib/Frontend/TextDiagnostic.cpp @@ -928,6 +928,56 @@ void TextDiagnostic::emitBuildingModuleLocation(SourceLocation Loc, OS << "While building module '" << ModuleName << "':\n"; } +/// \brief Find the suitable set of lines to show to include a set of ranges. +static llvm::Optional> +findLinesForRange(const CharSourceRange &R, FileID FID, + const SourceManager &SM) { + if (!R.isValid()) return None; + + SourceLocation Begin = R.getBegin(); + SourceLocation End = R.getEnd(); + if (SM.getFileID(Begin) != FID || SM.getFileID(End) != FID) + return None; + + return std::make_pair(SM.getExpansionLineNumber(Begin), + SM.getExpansionLineNumber(End)); +} + +/// Add as much of range B into range A as possible without exceeding a maximum +/// size of MaxRange. Ranges are inclusive. +static std::pair +maybeAddRange(std::pair A, std::pair B, + unsigned MaxRange) { + // If A is already the maximum size, we're done. + unsigned Slack = MaxRange - (A.second - A.first + 1); + if (Slack == 0) + return A; + + // Easy case: merge succeeds within MaxRange. + unsigned Min = std::min(A.first, B.first); + unsigned Max = std::max(A.second, B.second); + if (Max - Min + 1 <= MaxRange) + return {Min, Max}; + + // If we can't reach B from A within MaxRange, there's nothing to do. + // Don't add lines to the range that contain nothing interesting. + if ((B.first > A.first && B.first - A.first + 1 > MaxRange) || + (B.second < A.second && A.second - B.second + 1 > MaxRange)) + return A; + + // Otherwise, expand A towards B to produce a range of size MaxRange. We + // attempt to expand by the same amount in both directions if B strictly + // contains A. + + // Expand downwards by up to half the available amount, then upwards as + // much as possible, then downwards as much as possible. + A.second = std::min(A.second + (Slack + 1) / 2, Max); + Slack = MaxRange - (A.second - A.first + 1); + A.first = std::max(Min + Slack, A.first) - Slack; + A.second = std::min(A.first + MaxRange - 1, Max); + return A; +} + /// \brief Highlight a SourceRange (with ~'s) for any characters on LineNo. static void highlightRange(const CharSourceRange &R, unsigned LineNo, FileID FID, @@ -990,9 +1040,12 @@ static void highlightRange(const CharSourceRange &R, EndColNo = map.startOfPreviousColumn(EndColNo); // If the start/end passed each other, then we are trying to highlight a - // range that just exists in whitespace, which must be some sort of other - // bug. - assert(StartColNo <= EndColNo && "Trying to highlight whitespace??"); + // range that just exists in whitespace. That most likely means we have + // a multi-line highlighting range that covers a blank line. + if (StartColNo > EndColNo) { + assert(StartLineNo != EndLineNo && "trying to highlight whitespace"); + StartColNo = EndColNo; + } } assert(StartColNo <= map.getSourceLine().size() && "Invalid range!"); @@ -1103,7 +1156,7 @@ void TextDiagnostic::emitSnippetAndCaret( // Decompose the location into a FID/Offset pair. std::pair LocInfo = SM.getDecomposedLoc(Loc); FileID FID = LocInfo.first; - unsigned FileOffset = LocInfo.second; + unsigned CaretFileOffset = LocInfo.second; // Get information about the buffer it points into. bool Invalid = false; @@ -1111,101 +1164,118 @@ void TextDiagnostic::emitSnippetAndCaret( if (Invalid) return; - const char *BufStart = BufData.data(); - const char *BufEnd = BufStart + BufData.size(); + unsigned CaretLineNo = SM.getLineNumber(FID, CaretFileOffset); + unsigned CaretColNo = SM.getColumnNumber(FID, CaretFileOffset); - unsigned LineNo = SM.getLineNumber(FID, FileOffset); - unsigned ColNo = SM.getColumnNumber(FID, FileOffset); - // Arbitrarily stop showing snippets when the line is too long. static const size_t MaxLineLengthToPrint = 4096; - if (ColNo > MaxLineLengthToPrint) + if (CaretColNo > MaxLineLengthToPrint) return; - // Rewind from the current position to the start of the line. - const char *TokPtr = BufStart+FileOffset; - const char *LineStart = TokPtr-ColNo+1; // Column # is 1-based. - - // Compute the line end. Scan forward from the error position to the end of - // the line. - const char *LineEnd = TokPtr; - while (*LineEnd != '\n' && *LineEnd != '\r' && LineEnd != BufEnd) - ++LineEnd; - - // Arbitrarily stop showing snippets when the line is too long. - if (size_t(LineEnd - LineStart) > MaxLineLengthToPrint) - return; - - // Trim trailing null-bytes. - StringRef Line(LineStart, LineEnd - LineStart); - while (Line.size() > ColNo && Line.back() == '\0') - Line = Line.drop_back(); - - // Copy the line of code into an std::string for ease of manipulation. - std::string SourceLine(Line.begin(), Line.end()); - - // Build the byte to column map. - const SourceColumnMap sourceColMap(SourceLine, DiagOpts->TabStop); - - // Create a line for the caret that is filled with spaces that is the same - // number of columns as the line of source code. - std::string CaretLine(sourceColMap.columns(), ' '); - - // Highlight all of the characters covered by Ranges with ~ characters. + // Find the set of lines to include. + const unsigned MaxLines = DiagOpts->SnippetLineLimit; + std::pair Lines = {CaretLineNo, CaretLineNo}; for (SmallVectorImpl::iterator I = Ranges.begin(), E = Ranges.end(); I != E; ++I) - highlightRange(*I, LineNo, FID, sourceColMap, CaretLine, SM, LangOpts); + if (auto OptionalRange = findLinesForRange(*I, FID, SM)) + Lines = maybeAddRange(Lines, *OptionalRange, MaxLines); - // Next, insert the caret itself. - ColNo = sourceColMap.byteToContainingColumn(ColNo-1); - if (CaretLine.size()MessageLength; - if (Columns) - selectInterestingSourceRegion(SourceLine, CaretLine, FixItInsertionLine, - Columns, sourceColMap); + // Compute the line end. + const char *LineEnd = LineStart; + while (*LineEnd != '\n' && *LineEnd != '\r' && LineEnd != BufEnd) + ++LineEnd; - // If we are in -fdiagnostics-print-source-range-info mode, we are trying - // to produce easily machine parsable output. Add a space before the - // source line and the caret to make it trivial to tell the main diagnostic - // line from what the user is intended to see. - if (DiagOpts->ShowSourceRanges) { - SourceLine = ' ' + SourceLine; - CaretLine = ' ' + CaretLine; - } + // Arbitrarily stop showing snippets when the line is too long. + // FIXME: Don't print any lines in this case. + if (size_t(LineEnd - LineStart) > MaxLineLengthToPrint) + return; - // Finally, remove any blank spaces from the end of CaretLine. - while (CaretLine[CaretLine.size()-1] == ' ') - CaretLine.erase(CaretLine.end()-1); + // Trim trailing null-bytes. + StringRef Line(LineStart, LineEnd - LineStart); + while (!Line.empty() && Line.back() == '\0' && + (LineNo != CaretLineNo || Line.size() > CaretColNo)) + Line = Line.drop_back(); - // Emit what we have computed. - emitSnippet(SourceLine); + // Copy the line of code into an std::string for ease of manipulation. + std::string SourceLine(Line.begin(), Line.end()); - if (DiagOpts->ShowColors) - OS.changeColor(caretColor, true); - OS << CaretLine << '\n'; - if (DiagOpts->ShowColors) - OS.resetColor(); + // Build the byte to column map. + const SourceColumnMap sourceColMap(SourceLine, DiagOpts->TabStop); - if (!FixItInsertionLine.empty()) { - if (DiagOpts->ShowColors) - // Print fixit line in color - OS.changeColor(fixitColor, false); - if (DiagOpts->ShowSourceRanges) - OS << ' '; - OS << FixItInsertionLine << '\n'; - if (DiagOpts->ShowColors) - OS.resetColor(); + // Create a line for the caret that is filled with spaces that is the same + // number of columns as the line of source code. + std::string CaretLine(sourceColMap.columns(), ' '); + + // Highlight all of the characters covered by Ranges with ~ characters. + for (SmallVectorImpl::iterator I = Ranges.begin(), + E = Ranges.end(); + I != E; ++I) + highlightRange(*I, LineNo, FID, sourceColMap, CaretLine, SM, LangOpts); + + // Next, insert the caret itself. + if (CaretLineNo == LineNo) { + CaretColNo = sourceColMap.byteToContainingColumn(CaretColNo - 1); + if (CaretLine.size() < CaretColNo + 1) + CaretLine.resize(CaretColNo + 1, ' '); + CaretLine[CaretColNo] = '^'; + } + + std::string FixItInsertionLine = buildFixItInsertionLine( + LineNo, sourceColMap, Hints, SM, DiagOpts.get()); + + // If the source line is too long for our terminal, select only the + // "interesting" source region within that line. + unsigned Columns = DiagOpts->MessageLength; + if (Columns) + selectInterestingSourceRegion(SourceLine, CaretLine, FixItInsertionLine, + Columns, sourceColMap); + + // If we are in -fdiagnostics-print-source-range-info mode, we are trying + // to produce easily machine parsable output. Add a space before the + // source line and the caret to make it trivial to tell the main diagnostic + // line from what the user is intended to see. + if (DiagOpts->ShowSourceRanges) { + SourceLine = ' ' + SourceLine; + CaretLine = ' ' + CaretLine; + } + + // Finally, remove any blank spaces from the end of CaretLine. + while (!CaretLine.empty() && CaretLine[CaretLine.size() - 1] == ' ') + CaretLine.erase(CaretLine.end() - 1); + + // Emit what we have computed. + emitSnippet(SourceLine); + + if (!CaretLine.empty()) { + if (DiagOpts->ShowColors) + OS.changeColor(caretColor, true); + OS << CaretLine << '\n'; + if (DiagOpts->ShowColors) + OS.resetColor(); + } + + if (!FixItInsertionLine.empty()) { + if (DiagOpts->ShowColors) + // Print fixit line in color + OS.changeColor(fixitColor, false); + if (DiagOpts->ShowSourceRanges) + OS << ' '; + OS << FixItInsertionLine << '\n'; + if (DiagOpts->ShowColors) + OS.resetColor(); + } } // Print out any parseable fixit information requested by the options. diff --git a/contrib/llvm/tools/clang/lib/Headers/altivec.h b/contrib/llvm/tools/clang/lib/Headers/altivec.h index 421e2a7754a..957fd5f65e2 100644 --- a/contrib/llvm/tools/clang/lib/Headers/altivec.h +++ b/contrib/llvm/tools/clang/lib/Headers/altivec.h @@ -12156,6 +12156,11 @@ static __inline__ void __ATTRS_o_ai vec_vsx_st(vector unsigned char __a, #endif +#ifdef __VSX__ +#define vec_xxpermdi __builtin_vsx_xxpermdi +#define vec_xxsldwi __builtin_vsx_xxsldwi +#endif + /* vec_xor */ #define __builtin_altivec_vxor vec_xor diff --git a/contrib/llvm/tools/clang/lib/Headers/avx512vpopcntdqintrin.h b/contrib/llvm/tools/clang/lib/Headers/avx512vpopcntdqintrin.h new file mode 100644 index 00000000000..34ab84932e7 --- /dev/null +++ b/contrib/llvm/tools/clang/lib/Headers/avx512vpopcntdqintrin.h @@ -0,0 +1,70 @@ +/*===------------- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics + *------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#ifndef __AVX512VPOPCNTDQINTRIN_H +#define __AVX512VPOPCNTDQINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntd" \ + "q"))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) { + return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectq_512( + (__mmask8)__U, (__v8di)_mm512_popcnt_epi64(__A), (__v8di)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { + return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) { + return (__m512i)__builtin_ia32_vpopcntd_512((__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_popcnt_epi32(__A), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) { + return _mm512_mask_popcnt_epi32((__m512i)_mm512_setzero_si512(), __U, __A); +} + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/contrib/llvm/tools/clang/lib/Headers/immintrin.h b/contrib/llvm/tools/clang/lib/Headers/immintrin.h index 7f91d49fbce..e22dd231427 100644 --- a/contrib/llvm/tools/clang/lib/Headers/immintrin.h +++ b/contrib/llvm/tools/clang/lib/Headers/immintrin.h @@ -146,6 +146,10 @@ _mm256_cvtph_ps(__m128i __a) #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) #include #endif diff --git a/contrib/llvm/tools/clang/lib/Index/IndexBody.cpp b/contrib/llvm/tools/clang/lib/Index/IndexBody.cpp index efa5ed85d60..d3632b8b9b1 100644 --- a/contrib/llvm/tools/clang/lib/Index/IndexBody.cpp +++ b/contrib/llvm/tools/clang/lib/Index/IndexBody.cpp @@ -254,6 +254,18 @@ public: SymbolRoleSet Roles = getRolesForRef(E, Relations); return IndexCtx.handleReference(E->getExplicitProperty(), E->getLocation(), Parent, ParentDC, Roles, Relations, E); + } else if (const ObjCMethodDecl *Getter = E->getImplicitPropertyGetter()) { + // Class properties that are explicitly defined using @property + // declarations are represented implicitly as there is no ivar for class + // properties. + if (Getter->isClassMethod()) { + if (const auto *PD = Getter->getCanonicalDecl()->findPropertyDecl()) { + SmallVector Relations; + SymbolRoleSet Roles = getRolesForRef(E, Relations); + return IndexCtx.handleReference(PD, E->getLocation(), Parent, + ParentDC, Roles, Relations, E); + } + } } // No need to do a handleReference for the objc method, because there will diff --git a/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp b/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp index 5cebb198460..754bc84ff4b 100644 --- a/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp +++ b/contrib/llvm/tools/clang/lib/Index/IndexingContext.cpp @@ -124,10 +124,16 @@ bool IndexingContext::isTemplateImplicitInstantiation(const Decl *D) { TKind = FD->getTemplateSpecializationKind(); } else if (auto *VD = dyn_cast(D)) { TKind = VD->getTemplateSpecializationKind(); - } else if (isa(D)) { - if (const auto *Parent = - dyn_cast(D->getDeclContext())) - TKind = Parent->getSpecializationKind(); + } else if (const auto *RD = dyn_cast(D)) { + if (RD->getInstantiatedFromMemberClass()) + TKind = RD->getTemplateSpecializationKind(); + } else if (const auto *ED = dyn_cast(D)) { + if (ED->getInstantiatedFromMemberEnum()) + TKind = ED->getTemplateSpecializationKind(); + } else if (isa(D) || isa(D) || + isa(D)) { + if (const auto *Parent = dyn_cast(D->getDeclContext())) + return isTemplateImplicitInstantiation(Parent); } switch (TKind) { case TSK_Undeclared: @@ -155,6 +161,16 @@ bool IndexingContext::shouldIgnoreIfImplicit(const Decl *D) { return true; } +static const CXXRecordDecl * +getDeclContextForTemplateInstationPattern(const Decl *D) { + if (const auto *CTSD = + dyn_cast(D->getDeclContext())) + return CTSD->getTemplateInstantiationPattern(); + else if (const auto *RD = dyn_cast(D->getDeclContext())) + return RD->getInstantiatedFromMemberClass(); + return nullptr; +} + static const Decl *adjustTemplateImplicitInstantiation(const Decl *D) { if (const ClassTemplateSpecializationDecl * SD = dyn_cast(D)) { @@ -163,15 +179,26 @@ static const Decl *adjustTemplateImplicitInstantiation(const Decl *D) { return FD->getTemplateInstantiationPattern(); } else if (auto *VD = dyn_cast(D)) { return VD->getTemplateInstantiationPattern(); - } else if (const auto *FD = dyn_cast(D)) { - if (const auto *Parent = - dyn_cast(D->getDeclContext())) { - const CXXRecordDecl *Pattern = Parent->getTemplateInstantiationPattern(); - for (const NamedDecl *ND : Pattern->lookup(FD->getDeclName())) { - if (ND->isImplicit()) + } else if (const auto *RD = dyn_cast(D)) { + return RD->getInstantiatedFromMemberClass(); + } else if (const auto *ED = dyn_cast(D)) { + return ED->getInstantiatedFromMemberEnum(); + } else if (isa(D) || isa(D)) { + const auto *ND = cast(D); + if (const CXXRecordDecl *Pattern = + getDeclContextForTemplateInstationPattern(ND)) { + for (const NamedDecl *BaseND : Pattern->lookup(ND->getDeclName())) { + if (BaseND->isImplicit()) continue; - if (isa(ND)) - return ND; + if (BaseND->getKind() == ND->getKind()) + return BaseND; + } + } + } else if (const auto *ECD = dyn_cast(D)) { + if (const auto *ED = dyn_cast(ECD->getDeclContext())) { + if (const EnumDecl *Pattern = ED->getInstantiatedFromMemberEnum()) { + for (const NamedDecl *BaseECD : Pattern->lookup(ECD->getDeclName())) + return BaseECD; } } } diff --git a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp index fbfd3fe5cce..1e2cbde825f 100644 --- a/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/LiteralSupport.cpp @@ -563,7 +563,6 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, // Parse the suffix. At this point we can classify whether we have an FP or // integer constant. bool isFPConstant = isFloatingLiteral(); - const char *ImaginarySuffixLoc = nullptr; // Loop over all of the characters of the suffix. If we see something bad, // we break out of the loop. @@ -660,7 +659,6 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, case 'J': if (isImaginary) break; // Cannot be repeated. isImaginary = true; - ImaginarySuffixLoc = s; continue; // Success. } // If we reached here, there was an error or a ud-suffix. @@ -694,8 +692,7 @@ NumericLiteralParser::NumericLiteralParser(StringRef TokSpelling, } if (isImaginary) { - PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, - ImaginarySuffixLoc - ThisTokBegin), + PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin), diag::ext_imaginary_constant); } } diff --git a/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp b/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp index 1f7003a2a4a..8c57931e47b 100644 --- a/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/ModuleMap.cpp @@ -84,6 +84,90 @@ Module *ModuleMap::resolveModuleId(const ModuleId &Id, Module *Mod, return Context; } +/// \brief Append to \p Paths the set of paths needed to get to the +/// subframework in which the given module lives. +static void appendSubframeworkPaths(Module *Mod, + SmallVectorImpl &Path) { + // Collect the framework names from the given module to the top-level module. + SmallVector Paths; + for (; Mod; Mod = Mod->Parent) { + if (Mod->IsFramework) + Paths.push_back(Mod->Name); + } + + if (Paths.empty()) + return; + + // Add Frameworks/Name.framework for each subframework. + for (unsigned I = Paths.size() - 1; I != 0; --I) + llvm::sys::path::append(Path, "Frameworks", Paths[I-1] + ".framework"); +} + +const FileEntry * +ModuleMap::resolveHeader(Module *M, Module::UnresolvedHeaderDirective Header, + SmallVectorImpl &RelativePathName) { + if (llvm::sys::path::is_absolute(Header.FileName)) { + RelativePathName.clear(); + RelativePathName.append(Header.FileName.begin(), Header.FileName.end()); + return SourceMgr.getFileManager().getFile(Header.FileName); + } + + // Search for the header file within the module's home directory. + auto *Directory = M->Directory; + SmallString<128> FullPathName(Directory->getName()); + unsigned FullPathLength = FullPathName.size(); + + if (M->isPartOfFramework()) { + appendSubframeworkPaths(M, RelativePathName); + unsigned RelativePathLength = RelativePathName.size(); + + // Check whether this file is in the public headers. + llvm::sys::path::append(RelativePathName, "Headers", Header.FileName); + llvm::sys::path::append(FullPathName, RelativePathName); + if (auto *File = SourceMgr.getFileManager().getFile(FullPathName)) + return File; + + // Check whether this file is in the private headers. + // Ideally, private modules in the form 'FrameworkName.Private' should + // be defined as 'module FrameworkName.Private', and not as + // 'framework module FrameworkName.Private', since a 'Private.Framework' + // does not usually exist. However, since both are currently widely used + // for private modules, make sure we find the right path in both cases. + if (M->IsFramework && M->Name == "Private") + RelativePathName.clear(); + else + RelativePathName.resize(RelativePathLength); + FullPathName.resize(FullPathLength); + llvm::sys::path::append(RelativePathName, "PrivateHeaders", + Header.FileName); + llvm::sys::path::append(FullPathName, RelativePathName); + return SourceMgr.getFileManager().getFile(FullPathName); + } + + // Lookup for normal headers. + llvm::sys::path::append(RelativePathName, Header.FileName); + llvm::sys::path::append(FullPathName, RelativePathName); + return SourceMgr.getFileManager().getFile(FullPathName); +} + +const FileEntry * +ModuleMap::resolveAsBuiltinHeader(Module *M, + Module::UnresolvedHeaderDirective Header, + SmallVectorImpl &BuiltinPathName) { + if (llvm::sys::path::is_absolute(Header.FileName) || M->isPartOfFramework() || + !M->IsSystem || Header.IsUmbrella || !BuiltinIncludeDir || + BuiltinIncludeDir == M->Directory || !isBuiltinHeader(Header.FileName)) + return nullptr; + + // This is a system module with a top-level header. This header + // may have a counterpart (or replacement) in the set of headers + // supplied by Clang. Find that builtin header. + llvm::sys::path::append(BuiltinPathName, BuiltinIncludeDir->getName(), + Header.FileName); + return SourceMgr.getFileManager().getFile( + StringRef(BuiltinPathName.data(), BuiltinPathName.size())); +} + ModuleMap::ModuleMap(SourceManager &SourceMgr, DiagnosticsEngine &Diags, const LangOptions &LangOpts, const TargetInfo *Target, HeaderSearch &HeaderInfo) @@ -1026,9 +1110,6 @@ namespace clang { /// be resolved relative to. const DirectoryEntry *Directory; - /// \brief The directory containing Clang-supplied headers. - const DirectoryEntry *BuiltinIncludeDir; - /// \brief Whether this module map is in a system header directory. bool IsSystem; @@ -1087,12 +1168,10 @@ namespace clang { ModuleMap &Map, const FileEntry *ModuleMapFile, const DirectoryEntry *Directory, - const DirectoryEntry *BuiltinIncludeDir, bool IsSystem) : L(L), SourceMgr(SourceMgr), Target(Target), Diags(Diags), Map(Map), ModuleMapFile(ModuleMapFile), Directory(Directory), - BuiltinIncludeDir(BuiltinIncludeDir), IsSystem(IsSystem), - HadError(false), ActiveModule(nullptr) + IsSystem(IsSystem), HadError(false), ActiveModule(nullptr) { Tok.clear(); consumeToken(); @@ -1772,25 +1851,6 @@ void ModuleMapParser::parseRequiresDecl() { } while (true); } -/// \brief Append to \p Paths the set of paths needed to get to the -/// subframework in which the given module lives. -static void appendSubframeworkPaths(Module *Mod, - SmallVectorImpl &Path) { - // Collect the framework names from the given module to the top-level module. - SmallVector Paths; - for (; Mod; Mod = Mod->Parent) { - if (Mod->IsFramework) - Paths.push_back(Mod->Name); - } - - if (Paths.empty()) - return; - - // Add Frameworks/Name.framework for each subframework. - for (unsigned I = Paths.size() - 1; I != 0; --I) - llvm::sys::path::append(Path, "Frameworks", Paths[I-1] + ".framework"); -} - /// \brief Parse a header declaration. /// /// header-declaration: @@ -1843,85 +1903,36 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken, Module::UnresolvedHeaderDirective Header; Header.FileName = Tok.getString(); Header.FileNameLoc = consumeToken(); + Header.IsUmbrella = LeadingToken == MMToken::UmbrellaKeyword; // Check whether we already have an umbrella. - if (LeadingToken == MMToken::UmbrellaKeyword && ActiveModule->Umbrella) { + if (Header.IsUmbrella && ActiveModule->Umbrella) { Diags.Report(Header.FileNameLoc, diag::err_mmap_umbrella_clash) << ActiveModule->getFullModuleName(); HadError = true; return; } - // Look for this file. - const FileEntry *File = nullptr; - const FileEntry *BuiltinFile = nullptr; - SmallString<128> RelativePathName; - if (llvm::sys::path::is_absolute(Header.FileName)) { - RelativePathName = Header.FileName; - File = SourceMgr.getFileManager().getFile(RelativePathName); - } else { - // Search for the header file within the search directory. - SmallString<128> FullPathName(Directory->getName()); - unsigned FullPathLength = FullPathName.size(); + // Look for this file by name if we don't have any stat information. + SmallString<128> RelativePathName, BuiltinPathName; + const FileEntry *File = + Map.resolveHeader(ActiveModule, Header, RelativePathName); + const FileEntry *BuiltinFile = + Map.resolveAsBuiltinHeader(ActiveModule, Header, BuiltinPathName); - if (ActiveModule->isPartOfFramework()) { - appendSubframeworkPaths(ActiveModule, RelativePathName); - unsigned RelativePathLength = RelativePathName.size(); - - // Check whether this file is in the public headers. - llvm::sys::path::append(RelativePathName, "Headers", Header.FileName); - llvm::sys::path::append(FullPathName, RelativePathName); - File = SourceMgr.getFileManager().getFile(FullPathName); - - // Check whether this file is in the private headers. - if (!File) { - // Ideally, private modules in the form 'FrameworkName.Private' should - // be defined as 'module FrameworkName.Private', and not as - // 'framework module FrameworkName.Private', since a 'Private.Framework' - // does not usually exist. However, since both are currently widely used - // for private modules, make sure we find the right path in both cases. - if (ActiveModule->IsFramework && ActiveModule->Name == "Private") - RelativePathName.clear(); - else - RelativePathName.resize(RelativePathLength); - FullPathName.resize(FullPathLength); - llvm::sys::path::append(RelativePathName, "PrivateHeaders", - Header.FileName); - llvm::sys::path::append(FullPathName, RelativePathName); - File = SourceMgr.getFileManager().getFile(FullPathName); - } - } else { - // Lookup for normal headers. - llvm::sys::path::append(RelativePathName, Header.FileName); - llvm::sys::path::append(FullPathName, RelativePathName); - File = SourceMgr.getFileManager().getFile(FullPathName); - - // If this is a system module with a top-level header, this header - // may have a counterpart (or replacement) in the set of headers - // supplied by Clang. Find that builtin header. - if (ActiveModule->IsSystem && LeadingToken != MMToken::UmbrellaKeyword && - BuiltinIncludeDir && BuiltinIncludeDir != Directory && - ModuleMap::isBuiltinHeader(Header.FileName)) { - SmallString<128> BuiltinPathName(BuiltinIncludeDir->getName()); - llvm::sys::path::append(BuiltinPathName, Header.FileName); - BuiltinFile = SourceMgr.getFileManager().getFile(BuiltinPathName); - - // If Clang supplies this header but the underlying system does not, - // just silently swap in our builtin version. Otherwise, we'll end - // up adding both (later). - if (BuiltinFile && !File) { - File = BuiltinFile; - RelativePathName = BuiltinPathName; - BuiltinFile = nullptr; - } - } - } + // If Clang supplies this header but the underlying system does not, + // just silently swap in our builtin version. Otherwise, we'll end + // up adding both (later). + if (BuiltinFile && !File) { + RelativePathName = BuiltinPathName; + File = BuiltinFile; + BuiltinFile = nullptr; } // FIXME: We shouldn't be eagerly stat'ing every file named in a module map. // Come up with a lazy way to do this. if (File) { - if (LeadingToken == MMToken::UmbrellaKeyword) { + if (Header.IsUmbrella) { const DirectoryEntry *UmbrellaDir = File->getDir(); if (Module *UmbrellaModule = Map.UmbrellaDirs[UmbrellaDir]) { Diags.Report(LeadingLoc, diag::err_mmap_umbrella_clash) @@ -1938,10 +1949,7 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken, // If there is a builtin counterpart to this file, add it now so it can // wrap the system header. if (BuiltinFile) { - // FIXME: Taking the name from the FileEntry is unstable and can give - // different results depending on how we've previously named that file - // in this build. - Module::Header H = { BuiltinFile->getName(), BuiltinFile }; + Module::Header H = { BuiltinPathName.str(), BuiltinFile }; Map.addHeader(ActiveModule, H, Role); // If we have both a builtin and system version of the file, the @@ -1960,7 +1968,6 @@ void ModuleMapParser::parseHeaderDecl(MMToken::TokenKind LeadingToken, // If we find a module that has a missing header, we mark this module as // unavailable and store the header directive for displaying diagnostics. - Header.IsUmbrella = LeadingToken == MMToken::UmbrellaKeyword; ActiveModule->markUnavailable(); ActiveModule->MissingHeaders.push_back(Header); } @@ -2555,7 +2562,7 @@ bool ModuleMap::parseModuleMapFile(const FileEntry *File, bool IsSystem, Buffer->getBufferEnd()); SourceLocation Start = L.getSourceLocation(); ModuleMapParser Parser(L, SourceMgr, Target, Diags, *this, File, Dir, - BuiltinIncludeDir, IsSystem); + IsSystem); bool Result = Parser.parseModuleMapFile(); ParsedModuleMap[File] = Result; diff --git a/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp b/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp index 0534aeb10cc..030717b8bd5 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PPDirectives.cpp @@ -1171,18 +1171,26 @@ void Preprocessor::HandleLineDirective() { CheckEndOfDirective("line", true); } - SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID); + // Take the file kind of the file containing the #line directive. #line + // directives are often used for generated sources from the same codebase, so + // the new file should generally be classified the same way as the current + // file. This is visible in GCC's pre-processed output, which rewrites #line + // to GNU line markers. + SrcMgr::CharacteristicKind FileKind = + SourceMgr.getFileCharacteristic(DigitTok.getLocation()); + + SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, false, + false, FileKind); if (Callbacks) Callbacks->FileChanged(CurPPLexer->getSourceLocation(), - PPCallbacks::RenameFile, - SrcMgr::C_User); + PPCallbacks::RenameFile, FileKind); } /// ReadLineMarkerFlags - Parse and validate any flags at the end of a GNU line /// marker directive. static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit, - bool &IsSystemHeader, bool &IsExternCHeader, + SrcMgr::CharacteristicKind &FileKind, Preprocessor &PP) { unsigned FlagVal; Token FlagTok; @@ -1233,7 +1241,7 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit, return true; } - IsSystemHeader = true; + FileKind = SrcMgr::C_System; PP.Lex(FlagTok); if (FlagTok.is(tok::eod)) return false; @@ -1247,7 +1255,7 @@ static bool ReadLineMarkerFlags(bool &IsFileEntry, bool &IsFileExit, return true; } - IsExternCHeader = true; + FileKind = SrcMgr::C_ExternCSystem; PP.Lex(FlagTok); if (FlagTok.is(tok::eod)) return false; @@ -1277,14 +1285,15 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) { Lex(StrTok); bool IsFileEntry = false, IsFileExit = false; - bool IsSystemHeader = false, IsExternCHeader = false; int FilenameID = -1; + SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User; // If the StrTok is "eod", then it wasn't present. Otherwise, it must be a // string followed by eod. - if (StrTok.is(tok::eod)) - ; // ok - else if (StrTok.isNot(tok::string_literal)) { + if (StrTok.is(tok::eod)) { + // Treat this like "#line NN", which doesn't change file characteristics. + FileKind = SourceMgr.getFileCharacteristic(DigitTok.getLocation()); + } else if (StrTok.isNot(tok::string_literal)) { Diag(StrTok, diag::err_pp_linemarker_invalid_filename); return DiscardUntilEndOfDirective(); } else if (StrTok.hasUDSuffix()) { @@ -1303,15 +1312,13 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) { FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString()); // If a filename was present, read any flags that are present. - if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, - IsSystemHeader, IsExternCHeader, *this)) + if (ReadLineMarkerFlags(IsFileEntry, IsFileExit, FileKind, *this)) return; } // Create a line note with this information. - SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, - IsFileEntry, IsFileExit, - IsSystemHeader, IsExternCHeader); + SourceMgr.AddLineNote(DigitTok.getLocation(), LineNo, FilenameID, IsFileEntry, + IsFileExit, FileKind); // If the preprocessor has callbacks installed, notify them of the #line // change. This is used so that the line marker comes out in -E mode for @@ -1322,11 +1329,6 @@ void Preprocessor::HandleDigitDirective(Token &DigitTok) { Reason = PPCallbacks::EnterFile; else if (IsFileExit) Reason = PPCallbacks::ExitFile; - SrcMgr::CharacteristicKind FileKind = SrcMgr::C_User; - if (IsExternCHeader) - FileKind = SrcMgr::C_ExternCSystem; - else if (IsSystemHeader) - FileKind = SrcMgr::C_System; Callbacks->FileChanged(CurPPLexer->getSourceLocation(), Reason, FileKind); } diff --git a/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp b/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp index 6c7663994a4..a6bfc32e221 100644 --- a/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/PPMacroExpansion.cpp @@ -1125,6 +1125,7 @@ static bool HasFeature(const Preprocessor &PP, StringRef Feature) { .Case("attribute_overloadable", true) .Case("attribute_unavailable_with_message", true) .Case("attribute_unused_on_fields", true) + .Case("attribute_diagnose_if_objc", true) .Case("blocks", LangOpts.Blocks) .Case("c_thread_safety_attributes", true) .Case("cxx_exceptions", LangOpts.CXXExceptions) diff --git a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp index 99d56182c1b..2d078a4e760 100644 --- a/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp +++ b/contrib/llvm/tools/clang/lib/Lex/Pragma.cpp @@ -475,9 +475,9 @@ void Preprocessor::HandlePragmaSystemHeader(Token &SysHeaderTok) { // Emit a line marker. This will change any source locations from this point // forward to realize they are in a system header. // Create a line note with this information. - SourceMgr.AddLineNote(SysHeaderTok.getLocation(), PLoc.getLine()+1, + SourceMgr.AddLineNote(SysHeaderTok.getLocation(), PLoc.getLine() + 1, FilenameID, /*IsEntry=*/false, /*IsExit=*/false, - /*IsSystem=*/true, /*IsExternC=*/false); + SrcMgr::C_System); } /// HandlePragmaDependency - Handle \#pragma GCC dependency "foo" blah. diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseExprCXX.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseExprCXX.cpp index 0b210e0360a..dcafbadae5c 100644 --- a/contrib/llvm/tools/clang/lib/Parse/ParseExprCXX.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/ParseExprCXX.cpp @@ -2120,31 +2120,18 @@ bool Parser::ParseUnqualifiedIdTemplateId(CXXScopeSpec &SS, Id.getKind() == UnqualifiedId::IK_LiteralOperatorId) { // Form a parsed representation of the template-id to be stored in the // UnqualifiedId. - TemplateIdAnnotation *TemplateId - = TemplateIdAnnotation::Allocate(TemplateArgs.size(), TemplateIds); // FIXME: Store name for literal operator too. - if (Id.getKind() == UnqualifiedId::IK_Identifier) { - TemplateId->Name = Id.Identifier; - TemplateId->Operator = OO_None; - TemplateId->TemplateNameLoc = Id.StartLocation; - } else { - TemplateId->Name = nullptr; - TemplateId->Operator = Id.OperatorFunctionId.Operator; - TemplateId->TemplateNameLoc = Id.StartLocation; - } + IdentifierInfo *TemplateII = + Id.getKind() == UnqualifiedId::IK_Identifier ? Id.Identifier : nullptr; + OverloadedOperatorKind OpKind = Id.getKind() == UnqualifiedId::IK_Identifier + ? OO_None + : Id.OperatorFunctionId.Operator; + + TemplateIdAnnotation *TemplateId = TemplateIdAnnotation::Create( + SS, TemplateKWLoc, Id.StartLocation, TemplateII, OpKind, Template, TNK, + LAngleLoc, RAngleLoc, TemplateArgs, TemplateIds); - TemplateId->SS = SS; - TemplateId->TemplateKWLoc = TemplateKWLoc; - TemplateId->Template = Template; - TemplateId->Kind = TNK; - TemplateId->LAngleLoc = LAngleLoc; - TemplateId->RAngleLoc = RAngleLoc; - ParsedTemplateArgument *Args = TemplateId->getTemplateArgs(); - for (unsigned Arg = 0, ArgEnd = TemplateArgs.size(); - Arg != ArgEnd; ++Arg) - Args[Arg] = TemplateArgs[Arg]; - Id.setTemplateId(TemplateId); return false; } diff --git a/contrib/llvm/tools/clang/lib/Parse/ParseTemplate.cpp b/contrib/llvm/tools/clang/lib/Parse/ParseTemplate.cpp index a919d870810..37c80fe5e52 100644 --- a/contrib/llvm/tools/clang/lib/Parse/ParseTemplate.cpp +++ b/contrib/llvm/tools/clang/lib/Parse/ParseTemplate.cpp @@ -1011,25 +1011,21 @@ bool Parser::AnnotateTemplateIdToken(TemplateTy Template, TemplateNameKind TNK, // Build a template-id annotation token that can be processed // later. Tok.setKind(tok::annot_template_id); - TemplateIdAnnotation *TemplateId - = TemplateIdAnnotation::Allocate(TemplateArgs.size(), TemplateIds); - TemplateId->TemplateNameLoc = TemplateNameLoc; - if (TemplateName.getKind() == UnqualifiedId::IK_Identifier) { - TemplateId->Name = TemplateName.Identifier; - TemplateId->Operator = OO_None; - } else { - TemplateId->Name = nullptr; - TemplateId->Operator = TemplateName.OperatorFunctionId.Operator; - } - TemplateId->SS = SS; - TemplateId->TemplateKWLoc = TemplateKWLoc; - TemplateId->Template = Template; - TemplateId->Kind = TNK; - TemplateId->LAngleLoc = LAngleLoc; - TemplateId->RAngleLoc = RAngleLoc; - ParsedTemplateArgument *Args = TemplateId->getTemplateArgs(); - for (unsigned Arg = 0, ArgEnd = TemplateArgs.size(); Arg != ArgEnd; ++Arg) - Args[Arg] = ParsedTemplateArgument(TemplateArgs[Arg]); + + IdentifierInfo *TemplateII = + TemplateName.getKind() == UnqualifiedId::IK_Identifier + ? TemplateName.Identifier + : nullptr; + + OverloadedOperatorKind OpKind = + TemplateName.getKind() == UnqualifiedId::IK_Identifier + ? OO_None + : TemplateName.OperatorFunctionId.Operator; + + TemplateIdAnnotation *TemplateId = TemplateIdAnnotation::Create( + SS, TemplateKWLoc, TemplateNameLoc, TemplateII, OpKind, Template, TNK, + LAngleLoc, RAngleLoc, TemplateArgs, TemplateIds); + Tok.setAnnotationValue(TemplateId); if (TemplateKWLoc.isValid()) Tok.setLocation(TemplateKWLoc); diff --git a/contrib/llvm/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp b/contrib/llvm/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp index 50ad113fc88..db688b12cbc 100644 --- a/contrib/llvm/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/AnalysisBasedWarnings.cpp @@ -542,6 +542,7 @@ static void CheckFallThroughForBody(Sema &S, const Decl *D, const Stmt *Body, bool ReturnsVoid = false; bool HasNoReturn = false; + bool IsCoroutine = S.getCurFunction() && S.getCurFunction()->isCoroutine(); if (const auto *FD = dyn_cast(D)) { if (const auto *CBody = dyn_cast(Body)) @@ -570,8 +571,13 @@ static void CheckFallThroughForBody(Sema &S, const Decl *D, const Stmt *Body, // Short circuit for compilation speed. if (CD.checkDiagnostics(Diags, ReturnsVoid, HasNoReturn)) return; - SourceLocation LBrace = Body->getLocStart(), RBrace = Body->getLocEnd(); + auto EmitDiag = [&](SourceLocation Loc, unsigned DiagID) { + if (IsCoroutine) + S.Diag(Loc, DiagID) << S.getCurFunction()->CoroutinePromise->getType(); + else + S.Diag(Loc, DiagID); + }; // Either in a function body compound statement, or a function-try-block. switch (CheckFallThrough(AC)) { case UnknownFallThrough: @@ -579,15 +585,15 @@ static void CheckFallThroughForBody(Sema &S, const Decl *D, const Stmt *Body, case MaybeFallThrough: if (HasNoReturn) - S.Diag(RBrace, CD.diag_MaybeFallThrough_HasNoReturn); + EmitDiag(RBrace, CD.diag_MaybeFallThrough_HasNoReturn); else if (!ReturnsVoid) - S.Diag(RBrace, CD.diag_MaybeFallThrough_ReturnsNonVoid); + EmitDiag(RBrace, CD.diag_MaybeFallThrough_ReturnsNonVoid); break; case AlwaysFallThrough: if (HasNoReturn) - S.Diag(RBrace, CD.diag_AlwaysFallThrough_HasNoReturn); + EmitDiag(RBrace, CD.diag_AlwaysFallThrough_HasNoReturn); else if (!ReturnsVoid) - S.Diag(RBrace, CD.diag_AlwaysFallThrough_ReturnsNonVoid); + EmitDiag(RBrace, CD.diag_AlwaysFallThrough_ReturnsNonVoid); break; case NeverFallThroughOrReturn: if (ReturnsVoid && !HasNoReturn && CD.diag_NeverFallThroughOrReturn) { @@ -2027,12 +2033,6 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P, // Warning: check missing 'return' if (P.enableCheckFallThrough) { - auto IsCoro = [&]() { - if (auto *FD = dyn_cast(D)) - if (FD->getBody() && isa(FD->getBody())) - return true; - return false; - }; const CheckFallThroughDiagnostics &CD = (isa(D) ? CheckFallThroughDiagnostics::MakeForBlock() @@ -2040,7 +2040,7 @@ AnalysisBasedWarnings::IssueWarnings(sema::AnalysisBasedWarnings::Policy P, cast(D)->getOverloadedOperator() == OO_Call && cast(D)->getParent()->isLambda()) ? CheckFallThroughDiagnostics::MakeForLambda() - : (IsCoro() + : (fscope->isCoroutine() ? CheckFallThroughDiagnostics::MakeForCoroutine(D) : CheckFallThroughDiagnostics::MakeForFunction(D))); CheckFallThroughForBody(S, D, Body, blkExpr, CD, AC); diff --git a/contrib/llvm/tools/clang/lib/Sema/CoroutineStmtBuilder.h b/contrib/llvm/tools/clang/lib/Sema/CoroutineStmtBuilder.h index 4958576219e..954a0f100eb 100644 --- a/contrib/llvm/tools/clang/lib/Sema/CoroutineStmtBuilder.h +++ b/contrib/llvm/tools/clang/lib/Sema/CoroutineStmtBuilder.h @@ -28,7 +28,6 @@ class CoroutineStmtBuilder : public CoroutineBodyStmt::CtorArgs { sema::FunctionScopeInfo &Fn; bool IsValid = true; SourceLocation Loc; - QualType RetType; SmallVector ParamMovesVector; const bool IsPromiseDependentType; CXXRecordDecl *PromiseRecordDecl = nullptr; @@ -61,6 +60,7 @@ private: bool makeOnFallthrough(); bool makeOnException(); bool makeReturnObject(); + bool makeGroDeclAndReturnStmt(); bool makeReturnOnAllocFailure(); bool makeParamMoves(); }; diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp index 14dd6267b85..b794628db73 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaChecking.cpp @@ -1696,6 +1696,9 @@ bool Sema::CheckPPCBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { case PPC::BI__builtin_tabortdci: return SemaBuiltinConstantArgRange(TheCall, 0, 0, 31) || SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); + case PPC::BI__builtin_vsx_xxpermdi: + case PPC::BI__builtin_vsx_xxsldwi: + return SemaBuiltinVSX(TheCall); } return SemaBuiltinConstantArgRange(TheCall, i, l, u); } @@ -3892,6 +3895,65 @@ bool Sema::SemaBuiltinFPClassification(CallExpr *TheCall, unsigned NumArgs) { return false; } +// Customized Sema Checking for VSX builtins that have the following signature: +// vector [...] builtinName(vector [...], vector [...], const int); +// Which takes the same type of vectors (any legal vector type) for the first +// two arguments and takes compile time constant for the third argument. +// Example builtins are : +// vector double vec_xxpermdi(vector double, vector double, int); +// vector short vec_xxsldwi(vector short, vector short, int); +bool Sema::SemaBuiltinVSX(CallExpr *TheCall) { + unsigned ExpectedNumArgs = 3; + if (TheCall->getNumArgs() < ExpectedNumArgs) + return Diag(TheCall->getLocEnd(), + diag::err_typecheck_call_too_few_args_at_least) + << 0 /*function call*/ << ExpectedNumArgs << TheCall->getNumArgs() + << TheCall->getSourceRange(); + + if (TheCall->getNumArgs() > ExpectedNumArgs) + return Diag(TheCall->getLocEnd(), + diag::err_typecheck_call_too_many_args_at_most) + << 0 /*function call*/ << ExpectedNumArgs << TheCall->getNumArgs() + << TheCall->getSourceRange(); + + // Check the third argument is a compile time constant + llvm::APSInt Value; + if(!TheCall->getArg(2)->isIntegerConstantExpr(Value, Context)) + return Diag(TheCall->getLocStart(), + diag::err_vsx_builtin_nonconstant_argument) + << 3 /* argument index */ << TheCall->getDirectCallee() + << SourceRange(TheCall->getArg(2)->getLocStart(), + TheCall->getArg(2)->getLocEnd()); + + QualType Arg1Ty = TheCall->getArg(0)->getType(); + QualType Arg2Ty = TheCall->getArg(1)->getType(); + + // Check the type of argument 1 and argument 2 are vectors. + SourceLocation BuiltinLoc = TheCall->getLocStart(); + if ((!Arg1Ty->isVectorType() && !Arg1Ty->isDependentType()) || + (!Arg2Ty->isVectorType() && !Arg2Ty->isDependentType())) { + return Diag(BuiltinLoc, diag::err_vec_builtin_non_vector) + << TheCall->getDirectCallee() + << SourceRange(TheCall->getArg(0)->getLocStart(), + TheCall->getArg(1)->getLocEnd()); + } + + // Check the first two arguments are the same type. + if (!Context.hasSameUnqualifiedType(Arg1Ty, Arg2Ty)) { + return Diag(BuiltinLoc, diag::err_vec_builtin_incompatible_vector) + << TheCall->getDirectCallee() + << SourceRange(TheCall->getArg(0)->getLocStart(), + TheCall->getArg(1)->getLocEnd()); + } + + // When default clang type checking is turned off and the customized type + // checking is used, the returning type of the function must be explicitly + // set. Otherwise it is _Bool by default. + TheCall->setType(Arg1Ty); + + return false; +} + /// SemaBuiltinShuffleVector - Handle __builtin_shufflevector. // This is declared to take (...), so we have to check everything. ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { @@ -3914,7 +3976,8 @@ ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { if (!LHSType->isVectorType() || !RHSType->isVectorType()) return ExprError(Diag(TheCall->getLocStart(), - diag::err_shufflevector_non_vector) + diag::err_vec_builtin_non_vector) + << TheCall->getDirectCallee() << SourceRange(TheCall->getArg(0)->getLocStart(), TheCall->getArg(1)->getLocEnd())); @@ -3928,12 +3991,14 @@ ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) { if (!RHSType->hasIntegerRepresentation() || RHSType->getAs()->getNumElements() != numElements) return ExprError(Diag(TheCall->getLocStart(), - diag::err_shufflevector_incompatible_vector) + diag::err_vec_builtin_incompatible_vector) + << TheCall->getDirectCallee() << SourceRange(TheCall->getArg(1)->getLocStart(), TheCall->getArg(1)->getLocEnd())); } else if (!Context.hasSameUnqualifiedType(LHSType, RHSType)) { return ExprError(Diag(TheCall->getLocStart(), - diag::err_shufflevector_incompatible_vector) + diag::err_vec_builtin_incompatible_vector) + << TheCall->getDirectCallee() << SourceRange(TheCall->getArg(0)->getLocStart(), TheCall->getArg(1)->getLocEnd())); } else if (numElements != numResElements) { diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaCoroutine.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaCoroutine.cpp index c709a1a723d..ae6c35f2206 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaCoroutine.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaCoroutine.cpp @@ -23,14 +23,22 @@ using namespace clang; using namespace sema; -static bool lookupMember(Sema &S, const char *Name, CXXRecordDecl *RD, - SourceLocation Loc) { +static LookupResult lookupMember(Sema &S, const char *Name, CXXRecordDecl *RD, + SourceLocation Loc, bool &Res) { DeclarationName DN = S.PP.getIdentifierInfo(Name); LookupResult LR(S, DN, Loc, Sema::LookupMemberName); // Suppress diagnostics when a private member is selected. The same warnings // will be produced again when building the call. LR.suppressDiagnostics(); - return S.LookupQualifiedName(LR, RD); + Res = S.LookupQualifiedName(LR, RD); + return LR; +} + +static bool lookupMember(Sema &S, const char *Name, CXXRecordDecl *RD, + SourceLocation Loc) { + bool Res; + lookupMember(S, Name, RD, Loc, Res); + return Res; } /// Look up the std::coroutine_traits<...>::promise_type for the given @@ -120,8 +128,7 @@ static QualType lookupPromiseType(Sema &S, const FunctionProtoType *FnType, return PromiseType; } -/// Look up the std::coroutine_traits<...>::promise_type for the given -/// function type. +/// Look up the std::experimental::coroutine_handle. static QualType lookupCoroutineHandleType(Sema &S, QualType PromiseType, SourceLocation Loc) { if (PromiseType.isNull()) @@ -314,6 +321,7 @@ static ExprResult buildCoroutineHandle(Sema &S, QualType PromiseType, } struct ReadySuspendResumeResult { + enum AwaitCallType { ACT_Ready, ACT_Suspend, ACT_Resume }; Expr *Results[3]; OpaqueValueExpr *OpaqueValue; bool IsInvalid; @@ -359,7 +367,41 @@ static ReadySuspendResumeResult buildCoawaitCalls(Sema &S, VarDecl *CoroPromise, Calls.Results[I] = Result.get(); } + // Assume the calls are valid; all further checking should make them invalid. Calls.IsInvalid = false; + + using ACT = ReadySuspendResumeResult::AwaitCallType; + CallExpr *AwaitReady = cast(Calls.Results[ACT::ACT_Ready]); + if (!AwaitReady->getType()->isDependentType()) { + // [expr.await]p3 [...] + // — await-ready is the expression e.await_ready(), contextually converted + // to bool. + ExprResult Conv = S.PerformContextuallyConvertToBool(AwaitReady); + if (Conv.isInvalid()) { + S.Diag(AwaitReady->getDirectCallee()->getLocStart(), + diag::note_await_ready_no_bool_conversion); + S.Diag(Loc, diag::note_coroutine_promise_call_implicitly_required) + << AwaitReady->getDirectCallee() << E->getSourceRange(); + Calls.IsInvalid = true; + } + Calls.Results[ACT::ACT_Ready] = Conv.get(); + } + CallExpr *AwaitSuspend = cast(Calls.Results[ACT::ACT_Suspend]); + if (!AwaitSuspend->getType()->isDependentType()) { + // [expr.await]p3 [...] + // - await-suspend is the expression e.await_suspend(h), which shall be + // a prvalue of type void or bool. + QualType RetType = AwaitSuspend->getType(); + if (RetType != S.Context.BoolTy && RetType != S.Context.VoidTy) { + S.Diag(AwaitSuspend->getCalleeDecl()->getLocation(), + diag::err_await_suspend_invalid_return_type) + << RetType; + S.Diag(Loc, diag::note_coroutine_promise_call_implicitly_required) + << AwaitSuspend->getDirectCallee(); + Calls.IsInvalid = true; + } + } + return Calls; } @@ -373,7 +415,6 @@ static ExprResult buildPromiseCall(Sema &S, VarDecl *Promise, if (PromiseRef.isInvalid()) return ExprError(); - // Call 'yield_value', passing in E. return buildMemberCall(S, PromiseRef.get(), Loc, Name, Args); } @@ -721,17 +762,19 @@ static FunctionDecl *findDeleteForPromise(Sema &S, SourceLocation Loc, void Sema::CheckCompletedCoroutineBody(FunctionDecl *FD, Stmt *&Body) { FunctionScopeInfo *Fn = getCurFunction(); - assert(Fn && Fn->CoroutinePromise && "not a coroutine"); - + assert(Fn && Fn->isCoroutine() && "not a coroutine"); if (!Body) { assert(FD->isInvalidDecl() && "a null body is only allowed for invalid declarations"); return; } + // We have a function that uses coroutine keywords, but we failed to build + // the promise type. + if (!Fn->CoroutinePromise) + return FD->setInvalidDecl(); if (isa(Body)) { - // FIXME(EricWF): Nothing todo. the body is already a transformed coroutine - // body statement. + // Nothing todo. the body is already a transformed coroutine body statement. return; } @@ -780,7 +823,8 @@ bool CoroutineStmtBuilder::buildDependentStatements() { assert(!this->IsPromiseDependentType && "coroutine cannot have a dependent promise type"); this->IsValid = makeOnException() && makeOnFallthrough() && - makeReturnOnAllocFailure() && makeNewAndDeleteExpr(); + makeGroDeclAndReturnStmt() && makeReturnOnAllocFailure() && + makeNewAndDeleteExpr(); return this->IsValid; } @@ -857,15 +901,15 @@ bool CoroutineStmtBuilder::makeReturnOnAllocFailure() { if (ReturnObjectOnAllocationFailure.isInvalid()) return false; - // FIXME: ActOnReturnStmt expects a scope that is inside of the function, due - // to CheckJumpOutOfSEHFinally(*this, ReturnLoc, *CurScope->getFnParent()); - // S.getCurScope()->getFnParent() == nullptr at ActOnFinishFunctionBody when - // CoroutineBodyStmt is built. Figure it out and fix it. - // Use BuildReturnStmt here to unbreak sanitized tests. (Gor:3/27/2017) StmtResult ReturnStmt = S.BuildReturnStmt(Loc, ReturnObjectOnAllocationFailure.get()); - if (ReturnStmt.isInvalid()) + if (ReturnStmt.isInvalid()) { + S.Diag(Found.getFoundDecl()->getLocation(), diag::note_member_declared_here) + << DN; + S.Diag(Fn.FirstCoroutineStmtLoc, diag::note_declared_coroutine_here) + << Fn.getFirstCoroutineStmtKeyword(); return false; + } this->ReturnStmtOnAllocFailure = ReturnStmt.get(); return true; @@ -991,13 +1035,32 @@ bool CoroutineStmtBuilder::makeOnFallthrough() { // [dcl.fct.def.coroutine]/4 // The unqualified-ids 'return_void' and 'return_value' are looked up in // the scope of class P. If both are found, the program is ill-formed. - const bool HasRVoid = lookupMember(S, "return_void", PromiseRecordDecl, Loc); - const bool HasRValue = lookupMember(S, "return_value", PromiseRecordDecl, Loc); + bool HasRVoid, HasRValue; + LookupResult LRVoid = + lookupMember(S, "return_void", PromiseRecordDecl, Loc, HasRVoid); + LookupResult LRValue = + lookupMember(S, "return_value", PromiseRecordDecl, Loc, HasRValue); StmtResult Fallthrough; if (HasRVoid && HasRValue) { // FIXME Improve this diagnostic - S.Diag(FD.getLocation(), diag::err_coroutine_promise_return_ill_formed) + S.Diag(FD.getLocation(), + diag::err_coroutine_promise_incompatible_return_functions) + << PromiseRecordDecl; + S.Diag(LRVoid.getRepresentativeDecl()->getLocation(), + diag::note_member_first_declared_here) + << LRVoid.getLookupName(); + S.Diag(LRValue.getRepresentativeDecl()->getLocation(), + diag::note_member_first_declared_here) + << LRValue.getLookupName(); + return false; + } else if (!HasRVoid && !HasRValue) { + // FIXME: The PDTS currently specifies this case as UB, not ill-formed. + // However we still diagnose this as an error since until the PDTS is fixed. + S.Diag(FD.getLocation(), + diag::err_coroutine_promise_requires_return_function) + << PromiseRecordDecl; + S.Diag(PromiseRecordDecl->getLocation(), diag::note_defined_here) << PromiseRecordDecl; return false; } else if (HasRVoid) { @@ -1029,6 +1092,8 @@ bool CoroutineStmtBuilder::makeOnException() { : diag:: warn_coroutine_promise_unhandled_exception_required_with_exceptions; S.Diag(Loc, DiagID) << PromiseRecordDecl; + S.Diag(PromiseRecordDecl->getLocation(), diag::note_defined_here) + << PromiseRecordDecl; return !RequireUnhandledException; } @@ -1042,37 +1107,185 @@ bool CoroutineStmtBuilder::makeOnException() { if (UnhandledException.isInvalid()) return false; + // Since the body of the coroutine will be wrapped in try-catch, it will + // be incompatible with SEH __try if present in a function. + if (!S.getLangOpts().Borland && Fn.FirstSEHTryLoc.isValid()) { + S.Diag(Fn.FirstSEHTryLoc, diag::err_seh_in_a_coroutine_with_cxx_exceptions); + S.Diag(Fn.FirstCoroutineStmtLoc, diag::note_declared_coroutine_here) + << Fn.getFirstCoroutineStmtKeyword(); + return false; + } + this->OnException = UnhandledException.get(); return true; } bool CoroutineStmtBuilder::makeReturnObject() { - // Build implicit 'p.get_return_object()' expression and form initialization // of return type from it. ExprResult ReturnObject = buildPromiseCall(S, Fn.CoroutinePromise, Loc, "get_return_object", None); if (ReturnObject.isInvalid()) return false; - QualType RetType = FD.getReturnType(); - if (!RetType->isDependentType()) { - InitializedEntity Entity = - InitializedEntity::InitializeResult(Loc, RetType, false); - ReturnObject = S.PerformMoveOrCopyInitialization(Entity, nullptr, RetType, - ReturnObject.get()); - if (ReturnObject.isInvalid()) - return false; - } - ReturnObject = S.ActOnFinishFullExpr(ReturnObject.get(), Loc); - if (ReturnObject.isInvalid()) - return false; this->ReturnValue = ReturnObject.get(); return true; } +static void noteMemberDeclaredHere(Sema &S, Expr *E, FunctionScopeInfo &Fn) { + if (auto *MbrRef = dyn_cast(E)) { + auto *MethodDecl = MbrRef->getMethodDecl(); + S.Diag(MethodDecl->getLocation(), diag::note_member_declared_here) + << MethodDecl; + } + S.Diag(Fn.FirstCoroutineStmtLoc, diag::note_declared_coroutine_here) + << Fn.getFirstCoroutineStmtKeyword(); +} + +bool CoroutineStmtBuilder::makeGroDeclAndReturnStmt() { + assert(!IsPromiseDependentType && + "cannot make statement while the promise type is dependent"); + assert(this->ReturnValue && "ReturnValue must be already formed"); + + QualType const GroType = this->ReturnValue->getType(); + assert(!GroType->isDependentType() && + "get_return_object type must no longer be dependent"); + + QualType const FnRetType = FD.getReturnType(); + assert(!FnRetType->isDependentType() && + "get_return_object type must no longer be dependent"); + + if (FnRetType->isVoidType()) { + ExprResult Res = S.ActOnFinishFullExpr(this->ReturnValue, Loc); + if (Res.isInvalid()) + return false; + + this->ResultDecl = Res.get(); + return true; + } + + if (GroType->isVoidType()) { + // Trigger a nice error message. + InitializedEntity Entity = + InitializedEntity::InitializeResult(Loc, FnRetType, false); + S.PerformMoveOrCopyInitialization(Entity, nullptr, FnRetType, ReturnValue); + noteMemberDeclaredHere(S, ReturnValue, Fn); + return false; + } + + auto *GroDecl = VarDecl::Create( + S.Context, &FD, FD.getLocation(), FD.getLocation(), + &S.PP.getIdentifierTable().get("__coro_gro"), GroType, + S.Context.getTrivialTypeSourceInfo(GroType, Loc), SC_None); + + S.CheckVariableDeclarationType(GroDecl); + if (GroDecl->isInvalidDecl()) + return false; + + InitializedEntity Entity = InitializedEntity::InitializeVariable(GroDecl); + ExprResult Res = S.PerformMoveOrCopyInitialization(Entity, nullptr, GroType, + this->ReturnValue); + if (Res.isInvalid()) + return false; + + Res = S.ActOnFinishFullExpr(Res.get()); + if (Res.isInvalid()) + return false; + + if (GroType == FnRetType) { + GroDecl->setNRVOVariable(true); + } + + S.AddInitializerToDecl(GroDecl, Res.get(), + /*DirectInit=*/false); + + S.FinalizeDeclaration(GroDecl); + + // Form a declaration statement for the return declaration, so that AST + // visitors can more easily find it. + StmtResult GroDeclStmt = + S.ActOnDeclStmt(S.ConvertDeclToDeclGroup(GroDecl), Loc, Loc); + if (GroDeclStmt.isInvalid()) + return false; + + this->ResultDecl = GroDeclStmt.get(); + + ExprResult declRef = S.BuildDeclRefExpr(GroDecl, GroType, VK_LValue, Loc); + if (declRef.isInvalid()) + return false; + + StmtResult ReturnStmt = S.BuildReturnStmt(Loc, declRef.get()); + if (ReturnStmt.isInvalid()) { + noteMemberDeclaredHere(S, ReturnValue, Fn); + return false; + } + + this->ReturnStmt = ReturnStmt.get(); + return true; +} + +// Create a static_cast\(expr). +static Expr *castForMoving(Sema &S, Expr *E, QualType T = QualType()) { + if (T.isNull()) + T = E->getType(); + QualType TargetType = S.BuildReferenceType( + T, /*SpelledAsLValue*/ false, SourceLocation(), DeclarationName()); + SourceLocation ExprLoc = E->getLocStart(); + TypeSourceInfo *TargetLoc = + S.Context.getTrivialTypeSourceInfo(TargetType, ExprLoc); + + return S + .BuildCXXNamedCast(ExprLoc, tok::kw_static_cast, TargetLoc, E, + SourceRange(ExprLoc, ExprLoc), E->getSourceRange()) + .get(); +} + +/// \brief Build a variable declaration for move parameter. +static VarDecl *buildVarDecl(Sema &S, SourceLocation Loc, QualType Type, + StringRef Name) { + DeclContext *DC = S.CurContext; + IdentifierInfo *II = &S.PP.getIdentifierTable().get(Name); + TypeSourceInfo *TInfo = S.Context.getTrivialTypeSourceInfo(Type, Loc); + VarDecl *Decl = + VarDecl::Create(S.Context, DC, Loc, Loc, II, Type, TInfo, SC_None); + Decl->setImplicit(); + return Decl; +} + bool CoroutineStmtBuilder::makeParamMoves() { - // FIXME: Perform move-initialization of parameters into frame-local copies. + for (auto *paramDecl : FD.parameters()) { + auto Ty = paramDecl->getType(); + if (Ty->isDependentType()) + continue; + + // No need to copy scalars, llvm will take care of them. + if (Ty->getAsCXXRecordDecl()) { + if (!paramDecl->getIdentifier()) + continue; + + ExprResult ParamRef = + S.BuildDeclRefExpr(paramDecl, paramDecl->getType(), + ExprValueKind::VK_LValue, Loc); // FIXME: scope? + if (ParamRef.isInvalid()) + return false; + + Expr *RCast = castForMoving(S, ParamRef.get()); + + auto D = buildVarDecl(S, Loc, Ty, paramDecl->getIdentifier()->getName()); + + S.AddInitializerToDecl(D, RCast, /*DirectInit=*/true); + + // Convert decl to a statement. + StmtResult Stmt = S.ActOnDeclStmt(S.ConvertDeclToDeclGroup(D), Loc, Loc); + if (Stmt.isInvalid()) + return false; + + ParamMovesVector.push_back(Stmt.get()); + } + } + + // Convert to ArrayRef in CtorArgs structure that builder inherits from. + ParamMoves = ParamMovesVector; return true; } diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp index 5e937aa6996..96fd952c81c 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDecl.cpp @@ -6516,7 +6516,7 @@ NamedDecl *Sema::ActOnVariableDeclarator( diag::err_thread_non_global) << DeclSpec::getSpecifierName(TSCS); else if (!Context.getTargetInfo().isTLSSupported()) { - if (getLangOpts().CUDA) { + if (getLangOpts().CUDA || getLangOpts().OpenMPIsDevice) { // Postpone error emission until we've collected attributes required to // figure out whether it's a host or device variable and whether the // error should be ignored. @@ -6578,8 +6578,11 @@ NamedDecl *Sema::ActOnVariableDeclarator( // Handle attributes prior to checking for duplicates in MergeVarDecl ProcessDeclAttributes(S, NewVD, D); - if (getLangOpts().CUDA) { - if (EmitTLSUnsupportedError && DeclAttrsMatchCUDAMode(getLangOpts(), NewVD)) + if (getLangOpts().CUDA || getLangOpts().OpenMPIsDevice) { + if (EmitTLSUnsupportedError && + ((getLangOpts().CUDA && DeclAttrsMatchCUDAMode(getLangOpts(), NewVD)) || + (getLangOpts().OpenMPIsDevice && + NewVD->hasAttr()))) Diag(D.getDeclSpec().getThreadStorageClassSpecLoc(), diag::err_thread_unsupported); // CUDA B.2.5: "__shared__ and __constant__ variables have implied static @@ -7917,10 +7920,7 @@ static OpenCLParamType getOpenCLKernelParameterType(Sema &S, QualType PT) { if (PT->isImageType()) return PtrKernelParam; - if (PT->isBooleanType()) - return InvalidKernelParam; - - if (PT->isEventT()) + if (PT->isBooleanType() || PT->isEventT() || PT->isReserveIDT()) return InvalidKernelParam; // OpenCL extension spec v1.2 s9.5: @@ -12176,7 +12176,7 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body, sema::AnalysisBasedWarnings::Policy WP = AnalysisWarnings.getDefaultPolicy(); sema::AnalysisBasedWarnings::Policy *ActivePolicy = nullptr; - if (getLangOpts().CoroutinesTS && getCurFunction()->CoroutinePromise) + if (getLangOpts().CoroutinesTS && getCurFunction()->isCoroutine()) CheckCompletedCoroutineBody(FD, Body); if (FD) { @@ -16097,7 +16097,8 @@ void Sema::ActOnModuleEnd(SourceLocation EomLoc, Module *Mod) { void Sema::createImplicitModuleImportForErrorRecovery(SourceLocation Loc, Module *Mod) { // Bail if we're not allowed to implicitly import a module here. - if (isSFINAEContext() || !getLangOpts().ModulesErrorRecovery) + if (isSFINAEContext() || !getLangOpts().ModulesErrorRecovery || + VisibleModules.isVisible(Mod)) return; // Create the implicit import declaration. diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp index 32be26efa14..6c492fac9eb 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclAttr.cpp @@ -238,7 +238,7 @@ static typename std::enable_if::value, getAttrName(const AttrInfo &Attr) { return &Attr; } -const IdentifierInfo *getAttrName(const clang::AttributeList &Attr) { +static const IdentifierInfo *getAttrName(const clang::AttributeList &Attr) { return Attr.getName(); } @@ -949,7 +949,7 @@ static bool checkFunctionConditionAttr(Sema &S, Decl *D, Msg = ""; SmallVector Diags; - if (!Cond->isValueDependent() && + if (isa(D) && !Cond->isValueDependent() && !Expr::isPotentialConstantExprUnevaluated(Cond, cast(D), Diags)) { S.Diag(Attr.getLoc(), diag::err_attr_cond_never_constant_expr) @@ -1037,10 +1037,11 @@ static void handleDiagnoseIfAttr(Sema &S, Decl *D, const AttributeList &Attr) { return; } - auto *FD = cast(D); - bool ArgDependent = ArgumentDependenceChecker(FD).referencesArgs(Cond); + bool ArgDependent = false; + if (const auto *FD = dyn_cast(D)) + ArgDependent = ArgumentDependenceChecker(FD).referencesArgs(Cond); D->addAttr(::new (S.Context) DiagnoseIfAttr( - Attr.getRange(), S.Context, Cond, Msg, DiagType, ArgDependent, FD, + Attr.getRange(), S.Context, Cond, Msg, DiagType, ArgDependent, cast(D), Attr.getAttributeSpellingListIndex())); } @@ -7283,6 +7284,12 @@ public: return true; } + bool VisitObjCAvailabilityCheckExpr(ObjCAvailabilityCheckExpr *E) { + SemaRef.Diag(E->getLocStart(), diag::warn_at_available_unchecked_use) + << (!SemaRef.getLangOpts().ObjC1); + return true; + } + bool VisitTypeLoc(TypeLoc Ty); }; diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp index b543a731641..d9528be2d38 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaDeclCXX.cpp @@ -10348,32 +10348,33 @@ void Sema::DefineImplicitDefaultConstructor(SourceLocation CurrentLocation, !Constructor->doesThisDeclarationHaveABody() && !Constructor->isDeleted()) && "DefineImplicitDefaultConstructor - call it for implicit default ctor"); + if (Constructor->willHaveBody() || Constructor->isInvalidDecl()) + return; CXXRecordDecl *ClassDecl = Constructor->getParent(); assert(ClassDecl && "DefineImplicitDefaultConstructor - invalid constructor"); SynthesizedFunctionScope Scope(*this, Constructor); - DiagnosticErrorTrap Trap(Diags); - if (SetCtorInitializers(Constructor, /*AnyErrors=*/false) || - Trap.hasErrorOccurred()) { - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXDefaultConstructor << Context.getTagDeclType(ClassDecl); - Constructor->setInvalidDecl(); - return; - } // The exception specification is needed because we are defining the // function. ResolveExceptionSpec(CurrentLocation, Constructor->getType()->castAs()); + MarkVTableUsed(CurrentLocation, ClassDecl); + + // Add a context note for diagnostics produced after this point. + Scope.addContextNote(CurrentLocation); + + if (SetCtorInitializers(Constructor, /*AnyErrors=*/false)) { + Constructor->setInvalidDecl(); + return; + } SourceLocation Loc = Constructor->getLocEnd().isValid() ? Constructor->getLocEnd() : Constructor->getLocation(); Constructor->setBody(new (Context) CompoundStmt(Loc)); - Constructor->markUsed(Context); - MarkVTableUsed(CurrentLocation, ClassDecl); if (ASTMutationListener *L = getASTMutationListener()) { L->CompletedImplicitDefinition(Constructor); @@ -10483,9 +10484,22 @@ void Sema::DefineInheritingConstructor(SourceLocation CurrentLocation, assert(Constructor->getInheritedConstructor() && !Constructor->doesThisDeclarationHaveABody() && !Constructor->isDeleted()); - if (Constructor->isInvalidDecl()) + if (Constructor->willHaveBody() || Constructor->isInvalidDecl()) return; + // Initializations are performed "as if by a defaulted default constructor", + // so enter the appropriate scope. + SynthesizedFunctionScope Scope(*this, Constructor); + + // The exception specification is needed because we are defining the + // function. + ResolveExceptionSpec(CurrentLocation, + Constructor->getType()->castAs()); + MarkVTableUsed(CurrentLocation, ClassDecl); + + // Add a context note for diagnostics produced after this point. + Scope.addContextNote(CurrentLocation); + ConstructorUsingShadowDecl *Shadow = Constructor->getInheritedConstructor().getShadowDecl(); CXXConstructorDecl *InheritedCtor = @@ -10500,11 +10514,6 @@ void Sema::DefineInheritingConstructor(SourceLocation CurrentLocation, CXXRecordDecl *RD = Shadow->getParent(); SourceLocation InitLoc = Shadow->getLocation(); - // Initializations are performed "as if by a defaulted default constructor", - // so enter the appropriate scope. - SynthesizedFunctionScope Scope(*this, Constructor); - DiagnosticErrorTrap Trap(Diags); - // Build explicit initializers for all base classes from which the // constructor was inherited. SmallVector Inits; @@ -10535,22 +10544,13 @@ void Sema::DefineInheritingConstructor(SourceLocation CurrentLocation, // We now proceed as if for a defaulted default constructor, with the relevant // initializers replaced. - bool HadError = SetCtorInitializers(Constructor, /*AnyErrors*/false, Inits); - if (HadError || Trap.hasErrorOccurred()) { - Diag(CurrentLocation, diag::note_inhctor_synthesized_at) << RD; + if (SetCtorInitializers(Constructor, /*AnyErrors*/false, Inits)) { Constructor->setInvalidDecl(); return; } - // The exception specification is needed because we are defining the - // function. - ResolveExceptionSpec(CurrentLocation, - Constructor->getType()->castAs()); - Constructor->setBody(new (Context) CompoundStmt(InitLoc)); - Constructor->markUsed(Context); - MarkVTableUsed(CurrentLocation, ClassDecl); if (ASTMutationListener *L = getASTMutationListener()) { L->CompletedImplicitDefinition(Constructor); @@ -10626,37 +10626,36 @@ void Sema::DefineImplicitDestructor(SourceLocation CurrentLocation, !Destructor->doesThisDeclarationHaveABody() && !Destructor->isDeleted()) && "DefineImplicitDestructor - call it for implicit default dtor"); + if (Destructor->willHaveBody() || Destructor->isInvalidDecl()) + return; + CXXRecordDecl *ClassDecl = Destructor->getParent(); assert(ClassDecl && "DefineImplicitDestructor - invalid destructor"); - if (Destructor->isInvalidDecl()) - return; - SynthesizedFunctionScope Scope(*this, Destructor); - DiagnosticErrorTrap Trap(Diags); - MarkBaseAndMemberDestructorsReferenced(Destructor->getLocation(), - Destructor->getParent()); - - if (CheckDestructor(Destructor) || Trap.hasErrorOccurred()) { - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXDestructor << Context.getTagDeclType(ClassDecl); - - Destructor->setInvalidDecl(); - return; - } - // The exception specification is needed because we are defining the // function. ResolveExceptionSpec(CurrentLocation, Destructor->getType()->castAs()); + MarkVTableUsed(CurrentLocation, ClassDecl); + + // Add a context note for diagnostics produced after this point. + Scope.addContextNote(CurrentLocation); + + MarkBaseAndMemberDestructorsReferenced(Destructor->getLocation(), + Destructor->getParent()); + + if (CheckDestructor(Destructor)) { + Destructor->setInvalidDecl(); + return; + } SourceLocation Loc = Destructor->getLocEnd().isValid() ? Destructor->getLocEnd() : Destructor->getLocation(); Destructor->setBody(new (Context) CompoundStmt(Loc)); Destructor->markUsed(Context); - MarkVTableUsed(CurrentLocation, ClassDecl); if (ASTMutationListener *L = getASTMutationListener()) { L->CompletedImplicitDefinition(Destructor); @@ -11224,8 +11223,7 @@ CXXMethodDecl *Sema::DeclareImplicitCopyAssignment(CXXRecordDecl *ClassDecl) { /// Diagnose an implicit copy operation for a class which is odr-used, but /// which is deprecated because the class has a user-declared copy constructor, /// copy assignment operator, or destructor. -static void diagnoseDeprecatedCopyOperation(Sema &S, CXXMethodDecl *CopyOp, - SourceLocation UseLoc) { +static void diagnoseDeprecatedCopyOperation(Sema &S, CXXMethodDecl *CopyOp) { assert(CopyOp->isImplicit()); CXXRecordDecl *RD = CopyOp->getParent(); @@ -11264,10 +11262,6 @@ static void diagnoseDeprecatedCopyOperation(Sema &S, CXXMethodDecl *CopyOp, diag::warn_deprecated_copy_operation) << RD << /*copy assignment*/!isa(CopyOp) << /*destructor*/isa(UserDeclaredOperation); - S.Diag(UseLoc, diag::note_member_synthesized_at) - << (isa(CopyOp) ? Sema::CXXCopyConstructor - : Sema::CXXCopyAssignment) - << RD; } } @@ -11279,25 +11273,31 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation, !CopyAssignOperator->doesThisDeclarationHaveABody() && !CopyAssignOperator->isDeleted()) && "DefineImplicitCopyAssignment called for wrong function"); + if (CopyAssignOperator->willHaveBody() || CopyAssignOperator->isInvalidDecl()) + return; CXXRecordDecl *ClassDecl = CopyAssignOperator->getParent(); - - if (ClassDecl->isInvalidDecl() || CopyAssignOperator->isInvalidDecl()) { + if (ClassDecl->isInvalidDecl()) { CopyAssignOperator->setInvalidDecl(); return; } + SynthesizedFunctionScope Scope(*this, CopyAssignOperator); + + // The exception specification is needed because we are defining the + // function. + ResolveExceptionSpec(CurrentLocation, + CopyAssignOperator->getType()->castAs()); + + // Add a context note for diagnostics produced after this point. + Scope.addContextNote(CurrentLocation); + // C++11 [class.copy]p18: // The [definition of an implicitly declared copy assignment operator] is // deprecated if the class has a user-declared copy constructor or a // user-declared destructor. if (getLangOpts().CPlusPlus11 && CopyAssignOperator->isImplicit()) - diagnoseDeprecatedCopyOperation(*this, CopyAssignOperator, CurrentLocation); - - CopyAssignOperator->markUsed(Context); - - SynthesizedFunctionScope Scope(*this, CopyAssignOperator); - DiagnosticErrorTrap Trap(Diags); + diagnoseDeprecatedCopyOperation(*this, CopyAssignOperator); // C++0x [class.copy]p30: // The implicitly-defined or explicitly-defaulted copy assignment operator @@ -11363,8 +11363,6 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation, /*CopyingBaseSubobject=*/true, /*Copying=*/true); if (Copy.isInvalid()) { - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXCopyAssignment << Context.getTagDeclType(ClassDecl); CopyAssignOperator->setInvalidDecl(); return; } @@ -11390,8 +11388,6 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation, Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign) << Context.getTagDeclType(ClassDecl) << 0 << Field->getDeclName(); Diag(Field->getLocation(), diag::note_declared_at); - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXCopyAssignment << Context.getTagDeclType(ClassDecl); Invalid = true; continue; } @@ -11402,8 +11398,6 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation, Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign) << Context.getTagDeclType(ClassDecl) << 1 << Field->getDeclName(); Diag(Field->getLocation(), diag::note_declared_at); - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXCopyAssignment << Context.getTagDeclType(ClassDecl); Invalid = true; continue; } @@ -11436,8 +11430,6 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation, /*CopyingBaseSubobject=*/false, /*Copying=*/true); if (Copy.isInvalid()) { - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXCopyAssignment << Context.getTagDeclType(ClassDecl); CopyAssignOperator->setInvalidDecl(); return; } @@ -11453,22 +11445,10 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation, StmtResult Return = BuildReturnStmt(Loc, ThisObj.get()); if (Return.isInvalid()) Invalid = true; - else { + else Statements.push_back(Return.getAs()); - - if (Trap.hasErrorOccurred()) { - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXCopyAssignment << Context.getTagDeclType(ClassDecl); - Invalid = true; - } - } } - // The exception specification is needed because we are defining the - // function. - ResolveExceptionSpec(CurrentLocation, - CopyAssignOperator->getType()->castAs()); - if (Invalid) { CopyAssignOperator->setInvalidDecl(); return; @@ -11482,6 +11462,7 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation, assert(!Body.isInvalid() && "Compound statement creation cannot fail"); } CopyAssignOperator->setBody(Body.getAs()); + CopyAssignOperator->markUsed(Context); if (ASTMutationListener *L = getASTMutationListener()) { L->CompletedImplicitDefinition(CopyAssignOperator); @@ -11654,19 +11635,15 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation, !MoveAssignOperator->doesThisDeclarationHaveABody() && !MoveAssignOperator->isDeleted()) && "DefineImplicitMoveAssignment called for wrong function"); + if (MoveAssignOperator->willHaveBody() || MoveAssignOperator->isInvalidDecl()) + return; CXXRecordDecl *ClassDecl = MoveAssignOperator->getParent(); - - if (ClassDecl->isInvalidDecl() || MoveAssignOperator->isInvalidDecl()) { + if (ClassDecl->isInvalidDecl()) { MoveAssignOperator->setInvalidDecl(); return; } - MoveAssignOperator->markUsed(Context); - - SynthesizedFunctionScope Scope(*this, MoveAssignOperator); - DiagnosticErrorTrap Trap(Diags); - // C++0x [class.copy]p28: // The implicitly-defined or move assignment operator for a non-union class // X performs memberwise move assignment of its subobjects. The direct base @@ -11679,6 +11656,16 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation, // from a virtual base more than once. checkMoveAssignmentForRepeatedMove(*this, ClassDecl, CurrentLocation); + SynthesizedFunctionScope Scope(*this, MoveAssignOperator); + + // The exception specification is needed because we are defining the + // function. + ResolveExceptionSpec(CurrentLocation, + MoveAssignOperator->getType()->castAs()); + + // Add a context note for diagnostics produced after this point. + Scope.addContextNote(CurrentLocation); + // The statements that form the synthesized function body. SmallVector Statements; @@ -11743,8 +11730,6 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation, /*CopyingBaseSubobject=*/true, /*Copying=*/false); if (Move.isInvalid()) { - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXMoveAssignment << Context.getTagDeclType(ClassDecl); MoveAssignOperator->setInvalidDecl(); return; } @@ -11770,8 +11755,6 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation, Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign) << Context.getTagDeclType(ClassDecl) << 0 << Field->getDeclName(); Diag(Field->getLocation(), diag::note_declared_at); - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXMoveAssignment << Context.getTagDeclType(ClassDecl); Invalid = true; continue; } @@ -11782,8 +11765,6 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation, Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign) << Context.getTagDeclType(ClassDecl) << 1 << Field->getDeclName(); Diag(Field->getLocation(), diag::note_declared_at); - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXMoveAssignment << Context.getTagDeclType(ClassDecl); Invalid = true; continue; } @@ -11819,8 +11800,6 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation, /*CopyingBaseSubobject=*/false, /*Copying=*/false); if (Move.isInvalid()) { - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXMoveAssignment << Context.getTagDeclType(ClassDecl); MoveAssignOperator->setInvalidDecl(); return; } @@ -11837,22 +11816,10 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation, StmtResult Return = BuildReturnStmt(Loc, ThisObj.get()); if (Return.isInvalid()) Invalid = true; - else { + else Statements.push_back(Return.getAs()); - - if (Trap.hasErrorOccurred()) { - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXMoveAssignment << Context.getTagDeclType(ClassDecl); - Invalid = true; - } - } } - // The exception specification is needed because we are defining the - // function. - ResolveExceptionSpec(CurrentLocation, - MoveAssignOperator->getType()->castAs()); - if (Invalid) { MoveAssignOperator->setInvalidDecl(); return; @@ -11866,6 +11833,7 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation, assert(!Body.isInvalid() && "Compound statement creation cannot fail"); } MoveAssignOperator->setBody(Body.getAs()); + MoveAssignOperator->markUsed(Context); if (ASTMutationListener *L = getASTMutationListener()) { L->CompletedImplicitDefinition(MoveAssignOperator); @@ -11952,30 +11920,37 @@ CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor( } void Sema::DefineImplicitCopyConstructor(SourceLocation CurrentLocation, - CXXConstructorDecl *CopyConstructor) { + CXXConstructorDecl *CopyConstructor) { assert((CopyConstructor->isDefaulted() && CopyConstructor->isCopyConstructor() && !CopyConstructor->doesThisDeclarationHaveABody() && !CopyConstructor->isDeleted()) && "DefineImplicitCopyConstructor - call it for implicit copy ctor"); + if (CopyConstructor->willHaveBody() || CopyConstructor->isInvalidDecl()) + return; CXXRecordDecl *ClassDecl = CopyConstructor->getParent(); assert(ClassDecl && "DefineImplicitCopyConstructor - invalid constructor"); + SynthesizedFunctionScope Scope(*this, CopyConstructor); + + // The exception specification is needed because we are defining the + // function. + ResolveExceptionSpec(CurrentLocation, + CopyConstructor->getType()->castAs()); + MarkVTableUsed(CurrentLocation, ClassDecl); + + // Add a context note for diagnostics produced after this point. + Scope.addContextNote(CurrentLocation); + // C++11 [class.copy]p7: // The [definition of an implicitly declared copy constructor] is // deprecated if the class has a user-declared copy assignment operator // or a user-declared destructor. if (getLangOpts().CPlusPlus11 && CopyConstructor->isImplicit()) - diagnoseDeprecatedCopyOperation(*this, CopyConstructor, CurrentLocation); + diagnoseDeprecatedCopyOperation(*this, CopyConstructor); - SynthesizedFunctionScope Scope(*this, CopyConstructor); - DiagnosticErrorTrap Trap(Diags); - - if (SetCtorInitializers(CopyConstructor, /*AnyErrors=*/false) || - Trap.hasErrorOccurred()) { - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXCopyConstructor << Context.getTagDeclType(ClassDecl); + if (SetCtorInitializers(CopyConstructor, /*AnyErrors=*/false)) { CopyConstructor->setInvalidDecl(); } else { SourceLocation Loc = CopyConstructor->getLocEnd().isValid() @@ -11984,16 +11959,9 @@ void Sema::DefineImplicitCopyConstructor(SourceLocation CurrentLocation, Sema::CompoundScopeRAII CompoundScope(*this); CopyConstructor->setBody( ActOnCompoundStmt(Loc, Loc, None, /*isStmtExpr=*/false).getAs()); + CopyConstructor->markUsed(Context); } - // The exception specification is needed because we are defining the - // function. - ResolveExceptionSpec(CurrentLocation, - CopyConstructor->getType()->castAs()); - - CopyConstructor->markUsed(Context); - MarkVTableUsed(CurrentLocation, ClassDecl); - if (ASTMutationListener *L = getASTMutationListener()) { L->CompletedImplicitDefinition(CopyConstructor); } @@ -12075,41 +12043,41 @@ CXXConstructorDecl *Sema::DeclareImplicitMoveConstructor( } void Sema::DefineImplicitMoveConstructor(SourceLocation CurrentLocation, - CXXConstructorDecl *MoveConstructor) { + CXXConstructorDecl *MoveConstructor) { assert((MoveConstructor->isDefaulted() && MoveConstructor->isMoveConstructor() && !MoveConstructor->doesThisDeclarationHaveABody() && !MoveConstructor->isDeleted()) && "DefineImplicitMoveConstructor - call it for implicit move ctor"); + if (MoveConstructor->willHaveBody() || MoveConstructor->isInvalidDecl()) + return; CXXRecordDecl *ClassDecl = MoveConstructor->getParent(); assert(ClassDecl && "DefineImplicitMoveConstructor - invalid constructor"); SynthesizedFunctionScope Scope(*this, MoveConstructor); - DiagnosticErrorTrap Trap(Diags); - if (SetCtorInitializers(MoveConstructor, /*AnyErrors=*/false) || - Trap.hasErrorOccurred()) { - Diag(CurrentLocation, diag::note_member_synthesized_at) - << CXXMoveConstructor << Context.getTagDeclType(ClassDecl); + // The exception specification is needed because we are defining the + // function. + ResolveExceptionSpec(CurrentLocation, + MoveConstructor->getType()->castAs()); + MarkVTableUsed(CurrentLocation, ClassDecl); + + // Add a context note for diagnostics produced after this point. + Scope.addContextNote(CurrentLocation); + + if (SetCtorInitializers(MoveConstructor, /*AnyErrors=*/false)) { MoveConstructor->setInvalidDecl(); - } else { + } else { SourceLocation Loc = MoveConstructor->getLocEnd().isValid() ? MoveConstructor->getLocEnd() : MoveConstructor->getLocation(); Sema::CompoundScopeRAII CompoundScope(*this); MoveConstructor->setBody(ActOnCompoundStmt( Loc, Loc, None, /*isStmtExpr=*/ false).getAs()); + MoveConstructor->markUsed(Context); } - // The exception specification is needed because we are defining the - // function. - ResolveExceptionSpec(CurrentLocation, - MoveConstructor->getType()->castAs()); - - MoveConstructor->markUsed(Context); - MarkVTableUsed(CurrentLocation, ClassDecl); - if (ASTMutationListener *L = getASTMutationListener()) { L->CompletedImplicitDefinition(MoveConstructor); } @@ -12122,6 +12090,8 @@ bool Sema::isImplicitlyDeleted(FunctionDecl *FD) { void Sema::DefineImplicitLambdaToFunctionPointerConversion( SourceLocation CurrentLocation, CXXConversionDecl *Conv) { + SynthesizedFunctionScope Scope(*this, Conv); + CXXRecordDecl *Lambda = Conv->getParent(); CXXMethodDecl *CallOp = Lambda->getLambdaCallOperator(); // If we are defining a specialization of a conversion to function-ptr @@ -12144,6 +12114,7 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion( "Conversion operator must have a corresponding call operator"); CallOp = cast(CallOpSpec); } + // Mark the call operator referenced (and add to pending instantiations // if necessary). // For both the conversion and static-invoker template specializations @@ -12151,9 +12122,6 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion( // to the PendingInstantiations. MarkFunctionReferenced(CurrentLocation, CallOp); - SynthesizedFunctionScope Scope(*this, Conv); - DiagnosticErrorTrap Trap(Diags); - // Retrieve the static invoker... CXXMethodDecl *Invoker = Lambda->getLambdaStaticInvoker(); // ... and get the corresponding specialization for a generic lambda. @@ -12191,7 +12159,7 @@ void Sema::DefineImplicitLambdaToFunctionPointerConversion( if (ASTMutationListener *L = getASTMutationListener()) { L->CompletedImplicitDefinition(Conv); L->CompletedImplicitDefinition(Invoker); - } + } } @@ -12202,10 +12170,7 @@ void Sema::DefineImplicitLambdaToBlockPointerConversion( { assert(!Conv->getParent()->isGenericLambda()); - Conv->markUsed(Context); - SynthesizedFunctionScope Scope(*this, Conv); - DiagnosticErrorTrap Trap(Diags); // Copy-initialize the lambda object as needed to capture it. Expr *This = ActOnCXXThis(CurrentLocation).get(); @@ -12244,6 +12209,7 @@ void Sema::DefineImplicitLambdaToBlockPointerConversion( Conv->setBody(new (Context) CompoundStmt(Context, ReturnS, Conv->getLocation(), Conv->getLocation())); + Conv->markUsed(Context); // We're done; notify the mutation listener, if any. if (ASTMutationListener *L = getASTMutationListener()) { @@ -13971,6 +13937,11 @@ void Sema::SetDeclDefaulted(Decl *Dcl, SourceLocation DefaultLoc) { MD->setDefaulted(); MD->setExplicitlyDefaulted(); + // Unset that we will have a body for this function. We might not, + // if it turns out to be trivial, and we don't need this marking now + // that we've marked it as defaulted. + MD->setWillHaveBody(false); + // If this definition appears within the record, do the checking when // the record is complete. const FunctionDecl *Primary = MD; diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp index 14efc967206..759c82eeaa0 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp @@ -366,8 +366,18 @@ bool Sema::DiagnoseUseOfDecl(NamedDecl *D, SourceLocation Loc, if (getLangOpts().CUDA && !CheckCUDACall(Loc, FD)) return true; + } - if (diagnoseArgIndependentDiagnoseIfAttrs(FD, Loc)) + auto getReferencedObjCProp = [](const NamedDecl *D) -> + const ObjCPropertyDecl * { + if (const auto *MD = dyn_cast(D)) + return MD->findPropertyDecl(); + return nullptr; + }; + if (const ObjCPropertyDecl *ObjCPDecl = getReferencedObjCProp(D)) { + if (diagnoseArgIndependentDiagnoseIfAttrs(ObjCPDecl, Loc)) + return true; + } else if (diagnoseArgIndependentDiagnoseIfAttrs(D, Loc)) { return true; } @@ -15742,6 +15752,13 @@ ExprResult Sema::ActOnObjCAvailabilityCheckExpr( if (Spec != AvailSpecs.end()) Version = Spec->getVersion(); + // The use of `@available` in the enclosing function should be analyzed to + // warn when it's used inappropriately (i.e. not if(@available)). + if (getCurFunctionOrMethodDecl()) + getEnclosingFunction()->HasPotentialAvailabilityViolations = true; + else if (getCurBlock() || getCurLambda()) + getCurFunction()->HasPotentialAvailabilityViolations = true; + return new (Context) ObjCAvailabilityCheckExpr(Version, AtLoc, RParen, Context.BoolTy); } diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaLambda.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaLambda.cpp index 7a9a8ff911a..4b1d7fd3cf2 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaLambda.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaLambda.cpp @@ -1591,6 +1591,7 @@ ExprResult Sema::BuildLambdaExpr(SourceLocation StartLoc, SourceLocation EndLoc, // its constexpr-ness, supressing diagnostics while doing so. if (getLangOpts().CPlusPlus1z && !CallOperator->isInvalidDecl() && !CallOperator->isConstexpr() && + !isa(CallOperator->getBody()) && !Class->getDeclContext()->isDependentContext()) { TentativeAnalysisScope DiagnosticScopeGuard(*this); CallOperator->setConstexpr( diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp index 678817f0299..c97da740e4d 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaLookup.cpp @@ -4929,8 +4929,6 @@ static NamedDecl *getDefinitionToImport(NamedDecl *D) { void Sema::diagnoseMissingImport(SourceLocation Loc, NamedDecl *Decl, MissingImportKind MIK, bool Recover) { - assert(!isVisible(Decl) && "missing import for non-hidden decl?"); - // Suggest importing a module providing the definition of this entity, if // possible. NamedDecl *Def = getDefinitionToImport(Decl); diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp index 51794160278..1ba84034fa4 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaOverload.cpp @@ -6242,11 +6242,11 @@ EnableIfAttr *Sema::CheckEnableIf(FunctionDecl *Function, ArrayRef Args, } template -static bool diagnoseDiagnoseIfAttrsWith(Sema &S, const FunctionDecl *FD, +static bool diagnoseDiagnoseIfAttrsWith(Sema &S, const NamedDecl *ND, bool ArgDependent, SourceLocation Loc, CheckFn &&IsSuccessful) { SmallVector Attrs; - for (const auto *DIA : FD->specific_attrs()) { + for (const auto *DIA : ND->specific_attrs()) { if (ArgDependent == DIA->getArgDependent()) Attrs.push_back(DIA); } @@ -6293,16 +6293,16 @@ bool Sema::diagnoseArgDependentDiagnoseIfAttrs(const FunctionDecl *Function, // EvaluateWithSubstitution only cares about the position of each // argument in the arg list, not the ParmVarDecl* it maps to. if (!DIA->getCond()->EvaluateWithSubstitution( - Result, Context, DIA->getParent(), Args, ThisArg)) + Result, Context, cast(DIA->getParent()), Args, ThisArg)) return false; return Result.isInt() && Result.getInt().getBoolValue(); }); } -bool Sema::diagnoseArgIndependentDiagnoseIfAttrs(const FunctionDecl *Function, +bool Sema::diagnoseArgIndependentDiagnoseIfAttrs(const NamedDecl *ND, SourceLocation Loc) { return diagnoseDiagnoseIfAttrsWith( - *this, Function, /*ArgDependent=*/false, Loc, + *this, ND, /*ArgDependent=*/false, Loc, [&](const DiagnoseIfAttr *DIA) { bool Result; return DIA->getCond()->EvaluateAsBooleanCondition(Result, Context) && diff --git a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp index 2d44489023e..a654ca800b0 100644 --- a/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/contrib/llvm/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -197,6 +197,7 @@ bool Sema::CodeSynthesisContext::isInstantiationRecord() const { case DefaultTemplateArgumentChecking: case DeclaringSpecialMember: + case DefiningSynthesizedFunction: return false; } @@ -624,6 +625,17 @@ void Sema::PrintInstantiationStack() { diag::note_in_declaration_of_implicit_special_member) << cast(Active->Entity) << Active->SpecialMember; break; + + case CodeSynthesisContext::DefiningSynthesizedFunction: + // FIXME: For synthesized members other than special members, produce a note. + auto *MD = dyn_cast(Active->Entity); + auto CSM = MD ? getSpecialMember(MD) : CXXInvalid; + if (CSM != CXXInvalid) { + Diags.Report(Active->PointOfInstantiation, + diag::note_member_synthesized_at) + << CSM << Context.getTagDeclType(MD->getParent()); + } + break; } } } @@ -666,6 +678,7 @@ Optional Sema::isSFINAEContext() const { return Active->DeductionInfo; case CodeSynthesisContext::DeclaringSpecialMember: + case CodeSynthesisContext::DefiningSynthesizedFunction: // This happens in a context unrelated to template instantiation, so // there is no SFINAE. return None; diff --git a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h index f4a97f55789..a65584e3c91 100644 --- a/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h +++ b/contrib/llvm/tools/clang/lib/Sema/TreeTransform.h @@ -6945,6 +6945,19 @@ TreeTransform::TransformCoroutineBodyStmt(CoroutineBodyStmt *S) { if (DeallocRes.isInvalid()) return StmtError(); Builder.Deallocate = DeallocRes.get(); + + assert(S->getResultDecl() && "ResultDecl must already be built"); + StmtResult ResultDecl = getDerived().TransformStmt(S->getResultDecl()); + if (ResultDecl.isInvalid()) + return StmtError(); + Builder.ResultDecl = ResultDecl.get(); + + if (auto *ReturnStmt = S->getReturnStmt()) { + StmtResult Res = getDerived().TransformStmt(ReturnStmt); + if (Res.isInvalid()) + return StmtError(); + Builder.ReturnStmt = Res.get(); + } } return getDerived().RebuildCoroutineBodyStmt(Builder); diff --git a/contrib/llvm/tools/clang/lib/Tooling/CommonOptionsParser.cpp b/contrib/llvm/tools/clang/lib/Tooling/CommonOptionsParser.cpp index 5a44061cbd4..9e9689e6b25 100644 --- a/contrib/llvm/tools/clang/lib/Tooling/CommonOptionsParser.cpp +++ b/contrib/llvm/tools/clang/lib/Tooling/CommonOptionsParser.cpp @@ -116,7 +116,11 @@ CommonOptionsParser::CommonOptionsParser( cl::HideUnrelatedOptions(Category); - Compilations.reset(FixedCompilationDatabase::loadFromCommandLine(argc, argv)); + std::string ErrorMessage; + Compilations = + FixedCompilationDatabase::loadFromCommandLine(argc, argv, ErrorMessage); + if (!Compilations && !ErrorMessage.empty()) + llvm::errs() << ErrorMessage; cl::ParseCommandLineOptions(argc, argv, Overview); cl::PrintOptionValues(); @@ -125,7 +129,6 @@ CommonOptionsParser::CommonOptionsParser( SourcePathList.empty()) return; if (!Compilations) { - std::string ErrorMessage; if (!BuildPath.empty()) { Compilations = CompilationDatabase::autoDetectFromDirectory(BuildPath, ErrorMessage); diff --git a/contrib/llvm/tools/clang/lib/Tooling/CompilationDatabase.cpp b/contrib/llvm/tools/clang/lib/Tooling/CompilationDatabase.cpp index 8ca0b2df701..77c5b547ca0 100644 --- a/contrib/llvm/tools/clang/lib/Tooling/CompilationDatabase.cpp +++ b/contrib/llvm/tools/clang/lib/Tooling/CompilationDatabase.cpp @@ -27,6 +27,7 @@ #include "llvm/Option/Arg.h" #include "llvm/Support/Host.h" #include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" #include #include using namespace clang; @@ -150,23 +151,21 @@ private: // options. class UnusedInputDiagConsumer : public DiagnosticConsumer { public: - UnusedInputDiagConsumer() : Other(nullptr) {} - - // Useful for debugging, chain diagnostics to another consumer after - // recording for our own purposes. - UnusedInputDiagConsumer(DiagnosticConsumer *Other) : Other(Other) {} + UnusedInputDiagConsumer(DiagnosticConsumer &Other) : Other(Other) {} void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info) override { if (Info.getID() == clang::diag::warn_drv_input_file_unused) { // Arg 1 for this diagnostic is the option that didn't get used. UnusedInputs.push_back(Info.getArgStdStr(0)); + } else if (DiagLevel >= DiagnosticsEngine::Error) { + // If driver failed to create compilation object, show the diagnostics + // to user. + Other.HandleDiagnostic(DiagLevel, Info); } - if (Other) - Other->HandleDiagnostic(DiagLevel, Info); } - DiagnosticConsumer *Other; + DiagnosticConsumer &Other; SmallVector UnusedInputs; }; @@ -205,9 +204,12 @@ private: /// \li false if \c Args cannot be used for compilation jobs (e.g. /// contains an option like -E or -version). static bool stripPositionalArgs(std::vector Args, - std::vector &Result) { + std::vector &Result, + std::string &ErrorMsg) { IntrusiveRefCntPtr DiagOpts = new DiagnosticOptions(); - UnusedInputDiagConsumer DiagClient; + llvm::raw_string_ostream Output(ErrorMsg); + TextDiagnosticPrinter DiagnosticPrinter(Output, &*DiagOpts); + UnusedInputDiagConsumer DiagClient(DiagnosticPrinter); DiagnosticsEngine Diagnostics( IntrusiveRefCntPtr(new DiagnosticIDs()), &*DiagOpts, &DiagClient, false); @@ -245,6 +247,8 @@ static bool stripPositionalArgs(std::vector Args, const std::unique_ptr Compilation( NewDriver->BuildCompilation(Args)); + if (!Compilation) + return false; const driver::JobList &Jobs = Compilation->getJobs(); @@ -258,8 +262,7 @@ static bool stripPositionalArgs(std::vector Args, } if (CompileAnalyzer.Inputs.empty()) { - // No compile jobs found. - // FIXME: Emit a warning of some kind? + ErrorMsg = "warning: no compile jobs found\n"; return false; } @@ -280,8 +283,14 @@ static bool stripPositionalArgs(std::vector Args, return true; } -FixedCompilationDatabase *FixedCompilationDatabase::loadFromCommandLine( - int &Argc, const char *const *Argv, Twine Directory) { +std::unique_ptr +FixedCompilationDatabase::loadFromCommandLine(int &Argc, + const char *const *Argv, + std::string &ErrorMsg, + Twine Directory) { + ErrorMsg.clear(); + if (Argc == 0) + return nullptr; const char *const *DoubleDash = std::find(Argv, Argv + Argc, StringRef("--")); if (DoubleDash == Argv + Argc) return nullptr; @@ -289,9 +298,10 @@ FixedCompilationDatabase *FixedCompilationDatabase::loadFromCommandLine( Argc = DoubleDash - Argv; std::vector StrippedArgs; - if (!stripPositionalArgs(CommandLine, StrippedArgs)) + if (!stripPositionalArgs(CommandLine, StrippedArgs, ErrorMsg)) return nullptr; - return new FixedCompilationDatabase(Directory, StrippedArgs); + return std::unique_ptr( + new FixedCompilationDatabase(Directory, StrippedArgs)); } FixedCompilationDatabase:: diff --git a/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp b/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp index 9e1181281f1..2e093dd9afc 100644 --- a/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp +++ b/contrib/llvm/tools/clang/lib/Tooling/Tooling.cpp @@ -260,6 +260,8 @@ bool ToolInvocation::run() { Driver->setCheckInputsExist(false); const std::unique_ptr Compilation( Driver->BuildCompilation(llvm::makeArrayRef(Argv))); + if (!Compilation) + return false; const llvm::opt::ArgStringList *const CC1Args = getCC1Arguments( &Diagnostics, Compilation.get()); if (!CC1Args) { diff --git a/contrib/llvm/tools/clang/tools/driver/driver.cpp b/contrib/llvm/tools/clang/tools/driver/driver.cpp index 626d006ac0d..4bd3b228d0f 100644 --- a/contrib/llvm/tools/clang/tools/driver/driver.cpp +++ b/contrib/llvm/tools/clang/tools/driver/driver.cpp @@ -454,40 +454,41 @@ int main(int argc_, const char **argv_) { SetBackdoorDriverOutputsFromEnvVars(TheDriver); std::unique_ptr C(TheDriver.BuildCompilation(argv)); - int Res = 0; - SmallVector, 4> FailingCommands; - if (C.get()) + int Res = 1; + if (C.get()) { + SmallVector, 4> FailingCommands; Res = TheDriver.ExecuteCompilation(*C, FailingCommands); - // Force a crash to test the diagnostics. - if (TheDriver.GenReproducer) { - Diags.Report(diag::err_drv_force_crash) + // Force a crash to test the diagnostics. + if (TheDriver.GenReproducer) { + Diags.Report(diag::err_drv_force_crash) << !::getenv("FORCE_CLANG_DIAGNOSTICS_CRASH"); - // Pretend that every command failed. - FailingCommands.clear(); - for (const auto &J : C->getJobs()) - if (const Command *C = dyn_cast(&J)) - FailingCommands.push_back(std::make_pair(-1, C)); - } + // Pretend that every command failed. + FailingCommands.clear(); + for (const auto &J : C->getJobs()) + if (const Command *C = dyn_cast(&J)) + FailingCommands.push_back(std::make_pair(-1, C)); + } - for (const auto &P : FailingCommands) { - int CommandRes = P.first; - const Command *FailingCommand = P.second; - if (!Res) - Res = CommandRes; + for (const auto &P : FailingCommands) { + int CommandRes = P.first; + const Command *FailingCommand = P.second; + if (!Res) + Res = CommandRes; - // If result status is < 0, then the driver command signalled an error. - // If result status is 70, then the driver command reported a fatal error. - // On Windows, abort will return an exit code of 3. In these cases, - // generate additional diagnostic information if possible. - bool DiagnoseCrash = CommandRes < 0 || CommandRes == 70; + // If result status is < 0, then the driver command signalled an error. + // If result status is 70, then the driver command reported a fatal error. + // On Windows, abort will return an exit code of 3. In these cases, + // generate additional diagnostic information if possible. + bool DiagnoseCrash = CommandRes < 0 || CommandRes == 70; #ifdef LLVM_ON_WIN32 - DiagnoseCrash |= CommandRes == 3; + DiagnoseCrash |= CommandRes == 3; #endif - if (DiagnoseCrash) { - TheDriver.generateCompilationDiagnostics(*C, *FailingCommand); - break; + if (DiagnoseCrash) { + TheDriver.generateCompilationDiagnostics(*C, *FailingCommand); + break; + } } } diff --git a/contrib/llvm/tools/clang/utils/TableGen/ClangAttrEmitter.cpp b/contrib/llvm/tools/clang/utils/TableGen/ClangAttrEmitter.cpp index 7efc9bccdc5..edd3e38471b 100644 --- a/contrib/llvm/tools/clang/utils/TableGen/ClangAttrEmitter.cpp +++ b/contrib/llvm/tools/clang/utils/TableGen/ClangAttrEmitter.cpp @@ -312,7 +312,7 @@ namespace { } void writeDump(raw_ostream &OS) const override { - if (type == "FunctionDecl *") { + if (type == "FunctionDecl *" || type == "NamedDecl *") { OS << " OS << \" \";\n"; OS << " dumpBareDeclRef(SA->get" << getUpperName() << "());\n"; } else if (type == "IdentifierInfo *") { @@ -1181,6 +1181,8 @@ createArgument(const Record &Arg, StringRef Attr, Ptr = llvm::make_unique(Arg, Attr); else if (ArgName == "FunctionArgument") Ptr = llvm::make_unique(Arg, Attr, "FunctionDecl *"); + else if (ArgName == "NamedArgument") + Ptr = llvm::make_unique(Arg, Attr, "NamedDecl *"); else if (ArgName == "IdentifierArgument") Ptr = llvm::make_unique(Arg, Attr, "IdentifierInfo *"); else if (ArgName == "DefaultBoolArgument") @@ -3103,6 +3105,8 @@ static std::string CalculateDiagnostic(const Record &S) { " : ExpectedVariableOrFunction))"; case ObjCMethod | ObjCProp: return "ExpectedMethodOrProperty"; + case Func | ObjCMethod | ObjCProp: + return "ExpectedFunctionOrMethodOrProperty"; case ObjCProtocol | ObjCInterface: return "ExpectedObjectiveCInterfaceOrProtocol"; case Field | Var: return "ExpectedFieldOrGlobalVar"; diff --git a/contrib/llvm/tools/lld/COFF/ICF.cpp b/contrib/llvm/tools/lld/COFF/ICF.cpp index 3b7cc424f0a..da8ca360542 100644 --- a/contrib/llvm/tools/lld/COFF/ICF.cpp +++ b/contrib/llvm/tools/lld/COFF/ICF.cpp @@ -56,7 +56,6 @@ private: std::vector Chunks; int Cnt = 0; - std::atomic NextId = {1}; std::atomic Repeat = {false}; }; @@ -98,10 +97,10 @@ void ICF::segregate(size_t Begin, size_t End, bool Constant) { }); size_t Mid = Bound - Chunks.begin(); - // Split [Begin, End) into [Begin, Mid) and [Mid, End). - uint32_t Id = NextId++; + // Split [Begin, End) into [Begin, Mid) and [Mid, End). We use Mid as an + // equivalence class ID because every group ends with a unique index. for (size_t I = Begin; I < Mid; ++I) - Chunks[I]->Class[(Cnt + 1) % 2] = Id; + Chunks[I]->Class[(Cnt + 1) % 2] = Mid; // If we created a group, we need to iterate the main loop again. if (Mid != End) @@ -186,6 +185,7 @@ void ICF::forEachClass(std::function Fn) { // call Fn sequentially. if (Chunks.size() < 1024) { forEachClassRange(0, Chunks.size(), Fn); + ++Cnt; return; } @@ -193,9 +193,10 @@ void ICF::forEachClass(std::function Fn) { size_t NumShards = 256; size_t Step = Chunks.size() / NumShards; for_each_n(parallel::par, size_t(0), NumShards, [&](size_t I) { - forEachClassRange(I * Step, (I + 1) * Step, Fn); + size_t End = (I == NumShards - 1) ? Chunks.size() : (I + 1) * Step; + forEachClassRange(I * Step, End, Fn); }); - forEachClassRange(Step * NumShards, Chunks.size(), Fn); + ++Cnt; } // Merge identical COMDAT sections. @@ -203,22 +204,20 @@ void ICF::forEachClass(std::function Fn) { // contents and relocations are all the same. void ICF::run(const std::vector &Vec) { // Collect only mergeable sections and group by hash value. + uint32_t NextId = 1; for (Chunk *C : Vec) { - auto *SC = dyn_cast(C); - if (!SC) - continue; - - if (isEligible(SC)) { - // Set MSB to 1 to avoid collisions with non-hash classs. - SC->Class[0] = getHash(SC) | (1 << 31); - Chunks.push_back(SC); - } else { - SC->Class[0] = NextId++; + if (auto *SC = dyn_cast(C)) { + if (isEligible(SC)) + Chunks.push_back(SC); + else + SC->Class[0] = NextId++; } } - if (Chunks.empty()) - return; + // Initially, we use hash values to partition sections. + for (SectionChunk *SC : Chunks) + // Set MSB to 1 to avoid collisions with non-hash classs. + SC->Class[0] = getHash(SC) | (1 << 31); // From now on, sections in Chunks are ordered so that sections in // the same group are consecutive in the vector. @@ -229,14 +228,12 @@ void ICF::run(const std::vector &Vec) { // Compare static contents and assign unique IDs for each static content. forEachClass([&](size_t Begin, size_t End) { segregate(Begin, End, true); }); - ++Cnt; // Split groups by comparing relocations until convergence is obtained. do { Repeat = false; forEachClass( [&](size_t Begin, size_t End) { segregate(Begin, End, false); }); - ++Cnt; } while (Repeat); log("ICF needed " + Twine(Cnt) + " iterations"); diff --git a/contrib/llvm/tools/lld/COFF/InputFiles.cpp b/contrib/llvm/tools/lld/COFF/InputFiles.cpp index 6e6465cd5d6..258d9fd74b4 100644 --- a/contrib/llvm/tools/lld/COFF/InputFiles.cpp +++ b/contrib/llvm/tools/lld/COFF/InputFiles.cpp @@ -48,13 +48,11 @@ namespace coff { /// alias to Target. static void checkAndSetWeakAlias(SymbolTable *Symtab, InputFile *F, SymbolBody *Source, SymbolBody *Target) { - auto *U = dyn_cast(Source); - if (!U) - return; - else if (!U->WeakAlias) + if (auto *U = dyn_cast(Source)) { + if (U->WeakAlias && U->WeakAlias != Target) + Symtab->reportDuplicate(Source->symbol(), F); U->WeakAlias = Target; - else if (U->WeakAlias != Target) - Symtab->reportDuplicate(Source->symbol(), F); + } } ArchiveFile::ArchiveFile(MemoryBufferRef M) : InputFile(ArchiveKind, M) {} @@ -153,8 +151,10 @@ void ObjectFile::initializeSymbols() { uint32_t NumSymbols = COFFObj->getNumberOfSymbols(); SymbolBodies.reserve(NumSymbols); SparseSymbolBodies.resize(NumSymbols); + SmallVector, 8> WeakAliases; int32_t LastSectionNumber = 0; + for (uint32_t I = 0; I < NumSymbols; ++I) { // Get a COFFSymbolRef object. ErrorOr SymOrErr = COFFObj->getSymbol(I); @@ -185,9 +185,12 @@ void ObjectFile::initializeSymbols() { I += Sym.getNumberOfAuxSymbols(); LastSectionNumber = Sym.getSectionNumber(); } - for (auto WeakAlias : WeakAliases) - checkAndSetWeakAlias(Symtab, this, WeakAlias.first, - SparseSymbolBodies[WeakAlias.second]); + + for (auto &KV : WeakAliases) { + SymbolBody *Sym = KV.first; + uint32_t Idx = KV.second; + checkAndSetWeakAlias(Symtab, this, Sym, SparseSymbolBodies[Idx]); + } } SymbolBody *ObjectFile::createUndefined(COFFSymbolRef Sym) { diff --git a/contrib/llvm/tools/lld/COFF/InputFiles.h b/contrib/llvm/tools/lld/COFF/InputFiles.h index 9e32b3b9f9d..9449f24ac24 100644 --- a/contrib/llvm/tools/lld/COFF/InputFiles.h +++ b/contrib/llvm/tools/lld/COFF/InputFiles.h @@ -10,6 +10,7 @@ #ifndef LLD_COFF_INPUT_FILES_H #define LLD_COFF_INPUT_FILES_H +#include "Config.h" #include "lld/Core/LLVM.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" @@ -161,7 +162,9 @@ private: // for details about the format. class ImportFile : public InputFile { public: - explicit ImportFile(MemoryBufferRef M) : InputFile(ImportKind, M) {} + explicit ImportFile(MemoryBufferRef M) + : InputFile(ImportKind, M), Live(!Config->DoGC) {} + static bool classof(const InputFile *F) { return F->kind() == ImportKind; } DefinedImportData *ImpSym = nullptr; @@ -176,6 +179,14 @@ public: StringRef ExternalName; const coff_import_header *Hdr; Chunk *Location = nullptr; + + // We want to eliminate dllimported symbols if no one actually refers them. + // This "Live" bit is used to keep track of which import library members + // are actually in use. + // + // If the Live bit is turned off by MarkLive, Writer will ignore dllimported + // symbols provided by this import library member. + bool Live; }; // Used for LTO. diff --git a/contrib/llvm/tools/lld/COFF/MarkLive.cpp b/contrib/llvm/tools/lld/COFF/MarkLive.cpp index 0156d238b67..25e5cc35067 100644 --- a/contrib/llvm/tools/lld/COFF/MarkLive.cpp +++ b/contrib/llvm/tools/lld/COFF/MarkLive.cpp @@ -37,19 +37,26 @@ void markLive(const std::vector &Chunks) { Worklist.push_back(C); }; + auto AddSym = [&](SymbolBody *B) { + if (auto *Sym = dyn_cast(B)) + Enqueue(Sym->getChunk()); + else if (auto *Sym = dyn_cast(B)) + Sym->File->Live = true; + else if (auto *Sym = dyn_cast(B)) + Sym->WrappedSym->File->Live = true; + }; + // Add GC root chunks. for (SymbolBody *B : Config->GCRoot) - if (auto *D = dyn_cast(B)) - Enqueue(D->getChunk()); + AddSym(B); while (!Worklist.empty()) { SectionChunk *SC = Worklist.pop_back_val(); assert(SC->isLive() && "We mark as live when pushing onto the worklist!"); // Mark all symbols listed in the relocation table for this section. - for (SymbolBody *S : SC->symbols()) - if (auto *D = dyn_cast(S)) - Enqueue(D->getChunk()); + for (SymbolBody *B : SC->symbols()) + AddSym(B); // Mark associative sections if any. for (SectionChunk *C : SC->children()) diff --git a/contrib/llvm/tools/lld/COFF/PDB.cpp b/contrib/llvm/tools/lld/COFF/PDB.cpp index 0266148cc6c..a3b3ab7bbab 100644 --- a/contrib/llvm/tools/lld/COFF/PDB.cpp +++ b/contrib/llvm/tools/lld/COFF/PDB.cpp @@ -99,6 +99,12 @@ static void addTypeInfo(pdb::TpiStreamBuilder &TpiBuilder, static void mergeDebugT(SymbolTable *Symtab, pdb::PDBFileBuilder &Builder, codeview::TypeTableBuilder &TypeTable, codeview::TypeTableBuilder &IDTable) { + // Follow type servers. If the same type server is encountered more than + // once for this instance of `PDBTypeServerHandler` (for example if many + // object files reference the same TypeServer), the types from the + // TypeServer will only be visited once. + pdb::PDBTypeServerHandler Handler; + // Visit all .debug$T sections to add them to Builder. for (ObjectFile *File : Symtab->ObjectFiles) { ArrayRef Data = getDebugSection(File, ".debug$T"); @@ -109,16 +115,11 @@ static void mergeDebugT(SymbolTable *Symtab, pdb::PDBFileBuilder &Builder, codeview::CVTypeArray Types; BinaryStreamReader Reader(Stream); SmallVector SourceToDest; - // Follow type servers. If the same type server is encountered more than - // once for this instance of `PDBTypeServerHandler` (for example if many - // object files reference the same TypeServer), the types from the - // TypeServer will only be visited once. - pdb::PDBTypeServerHandler Handler; Handler.addSearchPath(llvm::sys::path::parent_path(File->getName())); if (auto EC = Reader.readArray(Types, Reader.getLength())) fatal(EC, "Reader::readArray failed"); - if (auto Err = codeview::mergeTypeStreams(IDTable, TypeTable, SourceToDest, - &Handler, Types)) + if (auto Err = codeview::mergeTypeAndIdRecords( + IDTable, TypeTable, SourceToDest, &Handler, Types)) fatal(Err, "codeview::mergeTypeStreams failed"); } diff --git a/contrib/llvm/tools/lld/COFF/Symbols.cpp b/contrib/llvm/tools/lld/COFF/Symbols.cpp index 993e920ce7f..5c185a511dd 100644 --- a/contrib/llvm/tools/lld/COFF/Symbols.cpp +++ b/contrib/llvm/tools/lld/COFF/Symbols.cpp @@ -61,16 +61,19 @@ COFFSymbolRef DefinedCOFF::getCOFFSymbol() { return COFFSymbolRef(reinterpret_cast(Sym)); } +static Chunk *makeImportThunk(DefinedImportData *S, uint16_t Machine) { + if (Machine == AMD64) + return make(S); + if (Machine == I386) + return make(S); + assert(Machine == ARMNT); + return make(S); +} + DefinedImportThunk::DefinedImportThunk(StringRef Name, DefinedImportData *S, uint16_t Machine) - : Defined(DefinedImportThunkKind, Name) { - switch (Machine) { - case AMD64: Data = make(S); return; - case I386: Data = make(S); return; - case ARMNT: Data = make(S); return; - default: llvm_unreachable("unknown machine type"); - } -} + : Defined(DefinedImportThunkKind, Name), WrappedSym(S), + Data(makeImportThunk(S, Machine)) {} Defined *Undefined::getWeakAlias() { // A weak alias may be a weak alias to another symbol, so check recursively. diff --git a/contrib/llvm/tools/lld/COFF/Symbols.h b/contrib/llvm/tools/lld/COFF/Symbols.h index 1b83f73ff20..801fc87f91d 100644 --- a/contrib/llvm/tools/lld/COFF/Symbols.h +++ b/contrib/llvm/tools/lld/COFF/Symbols.h @@ -300,7 +300,6 @@ public: void setLocation(Chunk *AddressTable) { File->Location = AddressTable; } uint16_t getOrdinal() { return File->Hdr->OrdinalHint; } -private: ImportFile *File; }; @@ -320,6 +319,8 @@ public: uint64_t getRVA() { return Data->getRVA(); } Chunk *getChunk() { return Data; } + DefinedImportData *WrappedSym; + private: Chunk *Data; }; diff --git a/contrib/llvm/tools/lld/COFF/Writer.cpp b/contrib/llvm/tools/lld/COFF/Writer.cpp index cf3ad7ef045..fb1f3cae5bb 100644 --- a/contrib/llvm/tools/lld/COFF/Writer.cpp +++ b/contrib/llvm/tools/lld/COFF/Writer.cpp @@ -365,6 +365,9 @@ void Writer::createImportTables() { // the same order as in the command line. (That affects DLL // initialization order, and this ordering is MSVC-compatible.) for (ImportFile *File : Symtab->ImportFiles) { + if (!File->Live) + continue; + std::string DLL = StringRef(File->DLLName).lower(); if (Config->DLLOrder.count(DLL) == 0) Config->DLLOrder[DLL] = Config->DLLOrder.size(); @@ -372,19 +375,28 @@ void Writer::createImportTables() { OutputSection *Text = createSection(".text"); for (ImportFile *File : Symtab->ImportFiles) { + if (!File->Live) + continue; + if (DefinedImportThunk *Thunk = File->ThunkSym) Text->addChunk(Thunk->getChunk()); + if (Config->DelayLoads.count(StringRef(File->DLLName).lower())) { + if (!File->ThunkSym) + fatal("cannot delay-load " + toString(File) + + " due to import of data: " + toString(*File->ImpSym)); DelayIdata.add(File->ImpSym); } else { Idata.add(File->ImpSym); } } + if (!Idata.empty()) { OutputSection *Sec = createSection(".idata"); for (Chunk *C : Idata.getChunks()) Sec->addChunk(C); } + if (!DelayIdata.empty()) { Defined *Helper = cast(Config->DelayLoadHelper); DelayIdata.create(Helper); @@ -437,6 +449,14 @@ Optional Writer::createSymbol(Defined *Def) { if (!D->getChunk()->isLive()) return None; + if (auto *Sym = dyn_cast(Def)) + if (!Sym->File->Live) + return None; + + if (auto *Sym = dyn_cast(Def)) + if (!Sym->WrappedSym->File->Live) + return None; + coff_symbol16 Sym; StringRef Name = Def->getName(); if (Name.size() > COFF::NameSize) { @@ -491,14 +511,17 @@ void Writer::createSymbolAndStringTable() { Sec->setStringTableOff(addEntryToStringTable(Name)); } - for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) - for (SymbolBody *B : File->getSymbols()) - if (auto *D = dyn_cast(B)) - if (!D->WrittenToSymtab) { - D->WrittenToSymtab = true; - if (Optional Sym = createSymbol(D)) - OutputSymtab.push_back(*Sym); - } + for (lld::coff::ObjectFile *File : Symtab->ObjectFiles) { + for (SymbolBody *B : File->getSymbols()) { + auto *D = dyn_cast(B); + if (!D || D->WrittenToSymtab) + continue; + D->WrittenToSymtab = true; + + if (Optional Sym = createSymbol(D)) + OutputSymtab.push_back(*Sym); + } + } OutputSection *LastSection = OutputSections.back(); // We position the symbol table to be adjacent to the end of the last section. @@ -782,19 +805,15 @@ void Writer::writeBuildId() { if (BuildId == nullptr) return; - MD5 Hash; - MD5::MD5Result Res; - - Hash.update(ArrayRef{Buffer->getBufferStart(), - Buffer->getBufferEnd()}); - Hash.final(Res); - assert(BuildId->DI->Signature.CVSignature == OMF::Signature::PDB70 && "only PDB 7.0 is supported"); - assert(sizeof(Res) == sizeof(BuildId->DI->PDB70.Signature) && + assert(sizeof(BuildId->DI->PDB70.Signature) == 16 && "signature size mismatch"); - memcpy(BuildId->DI->PDB70.Signature, Res.Bytes.data(), - sizeof(codeview::PDB70DebugInfo::Signature)); + + // Compute an MD5 hash. + ArrayRef Buf(Buffer->getBufferStart(), Buffer->getBufferEnd()); + memcpy(BuildId->DI->PDB70.Signature, MD5::hash(Buf).data(), 16); + // TODO(compnerd) track the Age BuildId->DI->PDB70.Age = 1; } diff --git a/contrib/llvm/tools/lld/ELF/Config.h b/contrib/llvm/tools/lld/ELF/Config.h index 57a0e5a5ec7..54f6dc2acc7 100644 --- a/contrib/llvm/tools/lld/ELF/Config.h +++ b/contrib/llvm/tools/lld/ELF/Config.h @@ -145,6 +145,7 @@ struct Configuration { bool ZNow; bool ZOrigin; bool ZRelro; + bool ZRodynamic; bool ZText; bool ExitEarly; bool ZWxneeded; diff --git a/contrib/llvm/tools/lld/ELF/Driver.cpp b/contrib/llvm/tools/lld/ELF/Driver.cpp index 737c6a6bf11..325404447b2 100644 --- a/contrib/llvm/tools/lld/ELF/Driver.cpp +++ b/contrib/llvm/tools/lld/ELF/Driver.cpp @@ -572,10 +572,14 @@ static std::pair getHashStyle(opt::InputArgList &Args) { // -build-id=sha1 are actually tree hashes for performance reasons. static std::pair> getBuildId(opt::InputArgList &Args) { - if (Args.hasArg(OPT_build_id)) + auto *Arg = Args.getLastArg(OPT_build_id, OPT_build_id_eq); + if (!Arg) + return {BuildIdKind::None, {}}; + + if (Arg->getOption().getID() == OPT_build_id) return {BuildIdKind::Fast, {}}; - StringRef S = getString(Args, OPT_build_id_eq, "none"); + StringRef S = Arg->getValue(); if (S == "md5") return {BuildIdKind::Md5, {}}; if (S == "sha1" || S == "tree") @@ -688,6 +692,7 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) { Config->ZNow = hasZOption(Args, "now"); Config->ZOrigin = hasZOption(Args, "origin"); Config->ZRelro = !hasZOption(Args, "norelro"); + Config->ZRodynamic = hasZOption(Args, "rodynamic"); Config->ZStackSize = getZOptionValue(Args, "stack-size", 0); Config->ZText = !hasZOption(Args, "notext"); Config->ZWxneeded = hasZOption(Args, "wxneeded"); diff --git a/contrib/llvm/tools/lld/ELF/ICF.cpp b/contrib/llvm/tools/lld/ELF/ICF.cpp index 3722d4e3ed2..419ae681632 100644 --- a/contrib/llvm/tools/lld/ELF/ICF.cpp +++ b/contrib/llvm/tools/lld/ELF/ICF.cpp @@ -326,9 +326,9 @@ void ICF::forEachClass(std::function Fn) { size_t NumShards = 256; size_t Step = Sections.size() / NumShards; parallelForEachN(0, NumShards, [&](size_t I) { - forEachClassRange(I * Step, (I + 1) * Step, Fn); + size_t End = (I == NumShards - 1) ? Sections.size() : (I + 1) * Step; + forEachClassRange(I * Step, End, Fn); }); - forEachClassRange(Step * NumShards, Sections.size(), Fn); ++Cnt; } diff --git a/contrib/llvm/tools/lld/ELF/InputFiles.cpp b/contrib/llvm/tools/lld/ELF/InputFiles.cpp index fe036a644f4..98189825ccb 100644 --- a/contrib/llvm/tools/lld/ELF/InputFiles.cpp +++ b/contrib/llvm/tools/lld/ELF/InputFiles.cpp @@ -281,18 +281,20 @@ bool elf::ObjectFile::shouldMerge(const Elf_Shdr &Sec) { template void elf::ObjectFile::initializeSections( DenseSet &ComdatGroups) { + const ELFFile &Obj = this->getObj(); + ArrayRef ObjSections = check(this->getObj().sections(), toString(this)); - const ELFFile &Obj = this->getObj(); uint64_t Size = ObjSections.size(); this->Sections.resize(Size); - unsigned I = -1; + StringRef SectionStringTable = check(Obj.getSectionStringTable(ObjSections), toString(this)); - for (const Elf_Shdr &Sec : ObjSections) { - ++I; + + for (size_t I = 0, E = ObjSections.size(); I < E; I++) { if (this->Sections[I] == &InputSection::Discarded) continue; + const Elf_Shdr &Sec = ObjSections[I]; // SHF_EXCLUDE'ed sections are discarded by the linker. However, // if -r is given, we'll let the final link discard such sections. @@ -303,13 +305,22 @@ void elf::ObjectFile::initializeSections( } switch (Sec.sh_type) { - case SHT_GROUP: - this->Sections[I] = &InputSection::Discarded; - if (ComdatGroups - .insert( - CachedHashStringRef(getShtGroupSignature(ObjSections, Sec))) - .second) + case SHT_GROUP: { + // We discard comdat sections usually. When -r we should not do that. We + // still do deduplication in this case to simplify implementation, because + // otherwise merging group sections together would requre additional + // regeneration of its contents. + bool New = ComdatGroups + .insert(CachedHashStringRef( + getShtGroupSignature(ObjSections, Sec))) + .second; + if (New && Config->Relocatable) + this->Sections[I] = createInputSection(Sec, SectionStringTable); + else + this->Sections[I] = &InputSection::Discarded; + if (New) continue; + for (uint32_t SecIndex : getShtGroupEntries(Sec)) { if (SecIndex >= Size) fatal(toString(this) + @@ -317,6 +328,7 @@ void elf::ObjectFile::initializeSections( this->Sections[SecIndex] = &InputSection::Discarded; } break; + } case SHT_SYMTAB: this->initSymtab(ObjSections, &Sec); break; diff --git a/contrib/llvm/tools/lld/ELF/InputSection.cpp b/contrib/llvm/tools/lld/ELF/InputSection.cpp index e8cfd21c4c4..466656efbf0 100644 --- a/contrib/llvm/tools/lld/ELF/InputSection.cpp +++ b/contrib/llvm/tools/lld/ELF/InputSection.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Threading.h" #include using namespace llvm; @@ -172,7 +173,8 @@ void InputSectionBase::uncompress() { if (Error E = Dec.decompress({OutputBuf, Size})) fatal(toString(this) + ": decompress failed: " + llvm::toString(std::move(E))); - Data = ArrayRef((uint8_t *)OutputBuf, Size); + this->Data = ArrayRef((uint8_t *)OutputBuf, Size); + this->Flags &= ~(uint64_t)SHF_COMPRESSED; } uint64_t SectionBase::getOffset(const DefinedRegular &Sym) const { @@ -293,6 +295,24 @@ bool InputSectionBase::classof(const SectionBase *S) { return S->kind() != Output; } +void InputSection::copyShtGroup(uint8_t *Buf) { + assert(this->Type == SHT_GROUP); + + ArrayRef From = getDataAs(); + uint32_t *To = reinterpret_cast(Buf); + + // First entry is a flag word, we leave it unchanged. + *To++ = From[0]; + + // Here we adjust indices of sections that belong to group as it + // might change during linking. + ArrayRef Sections = this->File->getSections(); + for (uint32_t Val : From.slice(1)) { + uint32_t Index = read32(&Val, Config->Endianness); + write32(To++, Sections[Index]->OutSec->SectionIndex, Config->Endianness); + } +} + InputSectionBase *InputSection::getRelocatedSection() { assert(this->Type == SHT_RELA || this->Type == SHT_REL); ArrayRef Sections = this->File->getSections(); @@ -678,6 +698,13 @@ template void InputSection::writeTo(uint8_t *Buf) { return; } + // If -r is given, linker should keep SHT_GROUP sections. We should fixup + // them, see copyShtGroup(). + if (this->Type == SHT_GROUP) { + copyShtGroup(Buf + OutSecOff); + return; + } + // Copy section contents from source object file to output file // and then apply relocations. memcpy(Buf + OutSecOff, Data.data(), Data.size()); @@ -866,7 +893,7 @@ const SectionPiece *MergeInputSection::getSectionPiece(uint64_t Offset) const { // it is not just an addition to a base output offset. uint64_t MergeInputSection::getOffset(uint64_t Offset) const { // Initialize OffsetMap lazily. - std::call_once(InitOffsetMap, [&] { + llvm::call_once(InitOffsetMap, [&] { OffsetMap.reserve(Pieces.size()); for (const SectionPiece &Piece : Pieces) OffsetMap[Piece.InputOff] = Piece.OutputOff; diff --git a/contrib/llvm/tools/lld/ELF/InputSection.h b/contrib/llvm/tools/lld/ELF/InputSection.h index 303c398b58c..4ef4328e8a5 100644 --- a/contrib/llvm/tools/lld/ELF/InputSection.h +++ b/contrib/llvm/tools/lld/ELF/InputSection.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Object/ELF.h" +#include "llvm/Support/Threading.h" #include namespace lld { @@ -248,7 +249,7 @@ private: std::vector Hashes; mutable llvm::DenseMap OffsetMap; - mutable std::once_flag InitOffsetMap; + mutable llvm::once_flag InitOffsetMap; llvm::DenseSet LiveOffsets; }; @@ -318,6 +319,8 @@ public: private: template void copyRelocations(uint8_t *Buf, llvm::ArrayRef Rels); + + void copyShtGroup(uint8_t *Buf); }; // The list of all input sections. diff --git a/contrib/llvm/tools/lld/ELF/LTO.cpp b/contrib/llvm/tools/lld/ELF/LTO.cpp index dd435173101..6915d971389 100644 --- a/contrib/llvm/tools/lld/ELF/LTO.cpp +++ b/contrib/llvm/tools/lld/ELF/LTO.cpp @@ -73,7 +73,12 @@ static std::unique_ptr createLTO() { Conf.Options = InitTargetOptionsFromCodeGenFlags(); Conf.Options.RelaxELFRelocations = true; - Conf.RelocModel = Config->Pic ? Reloc::PIC_ : Reloc::Static; + if (Config->Relocatable) + Conf.RelocModel = None; + else if (Config->Pic) + Conf.RelocModel = Reloc::PIC_; + else + Conf.RelocModel = Reloc::Static; Conf.CodeModel = GetCodeModelFromCMModel(); Conf.DisableVerify = Config->DisableVerify; Conf.DiagHandler = diagnosticHandler; diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp index c303f0524ad..492b81c1fa7 100644 --- a/contrib/llvm/tools/lld/ELF/LinkerScript.cpp +++ b/contrib/llvm/tools/lld/ELF/LinkerScript.cpp @@ -20,6 +20,8 @@ #include "SymbolTable.h" #include "Symbols.h" #include "SyntheticSections.h" +#include "Target.h" +#include "Threads.h" #include "Writer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" @@ -198,6 +200,15 @@ bool OutputSectionCommand::classof(const BaseCommand *C) { return C->Kind == OutputSectionKind; } +// Fill [Buf, Buf + Size) with Filler. +// This is used for linker script "=fillexp" command. +static void fill(uint8_t *Buf, size_t Size, uint32_t Filler) { + size_t I = 0; + for (; I + 4 < Size; I += 4) + memcpy(Buf + I, &Filler, 4); + memcpy(Buf + I, &Filler, Size - I); +} + bool InputSectionDescription::classof(const BaseCommand *C) { return C->Kind == InputSectionKind; } @@ -263,16 +274,16 @@ static bool matchConstraints(ArrayRef Sections, (!IsRW && Kind == ConstraintKind::ReadOnly); } -static void sortSections(InputSectionBase **Begin, InputSectionBase **End, +static void sortSections(InputSection **Begin, InputSection **End, SortSectionPolicy K) { if (K != SortSectionPolicy::Default && K != SortSectionPolicy::None) std::stable_sort(Begin, End, getComparator(K)); } // Compute and remember which sections the InputSectionDescription matches. -std::vector +std::vector LinkerScript::computeInputSections(const InputSectionDescription *Cmd) { - std::vector Ret; + std::vector Ret; // Collects all sections that satisfy constraints of Cmd. for (const SectionPattern &Pat : Cmd->SectionPatterns) { @@ -294,7 +305,7 @@ LinkerScript::computeInputSections(const InputSectionDescription *Cmd) { !Pat.SectionPat.match(Sec->Name)) continue; - Ret.push_back(Sec); + Ret.push_back(cast(Sec)); Sec->Assigned = true; } @@ -309,8 +320,8 @@ LinkerScript::computeInputSections(const InputSectionDescription *Cmd) { // --sort-section is handled as an inner SORT command. // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. // 4. If no SORT command is given, sort according to --sort-section. - InputSectionBase **Begin = Ret.data() + SizeBefore; - InputSectionBase **End = Ret.data() + Ret.size(); + InputSection **Begin = Ret.data() + SizeBefore; + InputSection **End = Ret.data() + Ret.size(); if (Pat.SortOuter != SortSectionPolicy::None) { if (Pat.SortInner == SortSectionPolicy::Default) sortSections(Begin, End, Config->SortSection); @@ -493,7 +504,7 @@ void LinkerScript::addOrphanSections(OutputSectionFactory &Factory) { Sec->SectionIndex = Index; } auto *ISD = make(""); - ISD->Sections.push_back(S); + ISD->Sections.push_back(cast(S)); Cmd->Commands.push_back(ISD); } } @@ -684,7 +695,6 @@ void LinkerScript::adjustSectionsBeforeSorting() { // '.' is assigned to, but creating these section should not have any bad // consequeces and gives us a section to put the symbol in. uint64_t Flags = SHF_ALLOC; - uint32_t Type = SHT_PROGBITS; for (int I = 0, E = Opt.Commands.size(); I != E; ++I) { auto *Cmd = dyn_cast(Opt.Commands[I]); @@ -692,14 +702,13 @@ void LinkerScript::adjustSectionsBeforeSorting() { continue; if (OutputSection *Sec = Cmd->Sec) { Flags = Sec->Flags; - Type = Sec->Type; continue; } if (isAllSectionDescription(*Cmd)) continue; - auto *OutSec = make(Cmd->Name, Type, Flags); + auto *OutSec = make(Cmd->Name, SHT_PROGBITS, Flags); OutSec->SectionIndex = I; OutputSections->push_back(OutSec); Cmd->Sec = OutSec; @@ -875,20 +884,20 @@ void LinkerScript::synchronize() { if (!Cmd) continue; ArrayRef Sections = Cmd->Sec->Sections; - std::vector ScriptSections; - DenseSet ScriptSectionsSet; + std::vector ScriptSections; + DenseSet ScriptSectionsSet; for (BaseCommand *Base : Cmd->Commands) { auto *ISD = dyn_cast(Base); if (!ISD) continue; - for (InputSectionBase *&IS : ISD->Sections) { + for (InputSection *&IS : ISD->Sections) { if (IS->Live) { ScriptSections.push_back(&IS); ScriptSectionsSet.insert(IS); } } } - std::vector Missing; + std::vector Missing; for (InputSection *IS : Sections) if (!ScriptSectionsSet.count(IS)) Missing.push_back(IS); @@ -896,7 +905,7 @@ void LinkerScript::synchronize() { auto ISD = make(""); ISD->Sections = Missing; Cmd->Commands.push_back(ISD); - for (InputSectionBase *&IS : ISD->Sections) + for (InputSection *&IS : ISD->Sections) if (IS->Live) ScriptSections.push_back(&IS); } @@ -1034,10 +1043,12 @@ OutputSectionCommand *LinkerScript::getCmd(OutputSection *Sec) const { return I->second; } -Optional LinkerScript::getFiller(OutputSection *Sec) { - if (OutputSectionCommand *Cmd = getCmd(Sec)) - return Cmd->Filler; - return None; +uint32_t OutputSectionCommand::getFiller() { + if (Filler) + return *Filler; + if (Sec->Flags & SHF_EXECINSTR) + return Target->TrapInstr; + return 0; } static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { @@ -1053,11 +1064,45 @@ static void writeInt(uint8_t *Buf, uint64_t Data, uint64_t Size) { llvm_unreachable("unsupported Size argument"); } -void LinkerScript::writeDataBytes(OutputSection *Sec, uint8_t *Buf) { - if (OutputSectionCommand *Cmd = getCmd(Sec)) - for (BaseCommand *Base : Cmd->Commands) - if (auto *Data = dyn_cast(Base)) - writeInt(Buf + Data->Offset, Data->Expression().getValue(), Data->Size); +template void OutputSectionCommand::writeTo(uint8_t *Buf) { + Sec->Loc = Buf; + + // We may have already rendered compressed content when using + // -compress-debug-sections option. Write it together with header. + if (!Sec->CompressedData.empty()) { + memcpy(Buf, Sec->ZDebugHeader.data(), Sec->ZDebugHeader.size()); + memcpy(Buf + Sec->ZDebugHeader.size(), Sec->CompressedData.data(), + Sec->CompressedData.size()); + return; + } + + // Write leading padding. + ArrayRef Sections = Sec->Sections; + uint32_t Filler = getFiller(); + if (Filler) + fill(Buf, Sections.empty() ? Sec->Size : Sections[0]->OutSecOff, Filler); + + parallelForEachN(0, Sections.size(), [=](size_t I) { + InputSection *IS = Sections[I]; + IS->writeTo(Buf); + + // Fill gaps between sections. + if (Filler) { + uint8_t *Start = Buf + IS->OutSecOff + IS->getSize(); + uint8_t *End; + if (I + 1 == Sections.size()) + End = Buf + Sec->Size; + else + End = Buf + Sections[I + 1]->OutSecOff; + fill(Start, End - Start, Filler); + } + }); + + // Linker scripts may have BYTE()-family commands with which you + // can write arbitrary bytes to the output. Process them if any. + for (BaseCommand *Base : Commands) + if (auto *Data = dyn_cast(Base)) + writeInt(Buf + Data->Offset, Data->Expression().getValue(), Data->Size); } bool LinkerScript::hasLMA(OutputSection *Sec) { @@ -1104,3 +1149,8 @@ size_t LinkerScript::getPhdrIndex(const Twine &Loc, StringRef PhdrName) { error(Loc + ": section header '" + PhdrName + "' is not listed in PHDRS"); return 0; } + +template void OutputSectionCommand::writeTo(uint8_t *Buf); +template void OutputSectionCommand::writeTo(uint8_t *Buf); +template void OutputSectionCommand::writeTo(uint8_t *Buf); +template void OutputSectionCommand::writeTo(uint8_t *Buf); diff --git a/contrib/llvm/tools/lld/ELF/LinkerScript.h b/contrib/llvm/tools/lld/ELF/LinkerScript.h index d0a4d83d72b..e56e569d4e7 100644 --- a/contrib/llvm/tools/lld/ELF/LinkerScript.h +++ b/contrib/llvm/tools/lld/ELF/LinkerScript.h @@ -130,6 +130,9 @@ struct OutputSectionCommand : BaseCommand { ConstraintKind Constraint = ConstraintKind::NoConstraint; std::string Location; std::string MemoryRegionName; + + template void writeTo(uint8_t *Buf); + uint32_t getFiller(); }; // This struct represents one section match pattern in SECTIONS() command. @@ -157,7 +160,7 @@ struct InputSectionDescription : BaseCommand { // will be associated with this InputSectionDescription. std::vector SectionPatterns; - std::vector Sections; + std::vector Sections; }; // Represents an ASSERT(). @@ -213,11 +216,10 @@ struct ScriptConfiguration { class LinkerScript final { llvm::DenseMap SecToCommand; - OutputSectionCommand *getCmd(OutputSection *Sec) const; void assignSymbol(SymbolAssignment *Cmd, bool InSec); void setDot(Expr E, const Twine &Loc, bool InSec); - std::vector + std::vector computeInputSections(const InputSectionDescription *); std::vector @@ -244,6 +246,7 @@ class LinkerScript final { MemoryRegion *CurMemRegion = nullptr; public: + OutputSectionCommand *getCmd(OutputSection *Sec) const; bool hasPhdrsCommands() { return !Opt.PhdrsCommands.empty(); } uint64_t getDot() { return Dot; } OutputSection *getOutputSection(const Twine &Loc, StringRef S); @@ -263,7 +266,6 @@ public: std::vector createPhdrs(); bool ignoreInterpSection(); - llvm::Optional getFiller(OutputSection *Sec); bool hasLMA(OutputSection *Sec); bool shouldKeep(InputSectionBase *S); void assignOffsets(OutputSectionCommand *Cmd); @@ -272,7 +274,6 @@ public: void synchronize(); void assignAddresses(std::vector &Phdrs); - void writeDataBytes(OutputSection *Sec, uint8_t *Buf); void addSymbol(SymbolAssignment *Cmd); void processCommands(OutputSectionFactory &Factory); diff --git a/contrib/llvm/tools/lld/ELF/MapFile.cpp b/contrib/llvm/tools/lld/ELF/MapFile.cpp index 7b82eceba02..806e99e3d9d 100644 --- a/contrib/llvm/tools/lld/ELF/MapFile.cpp +++ b/contrib/llvm/tools/lld/ELF/MapFile.cpp @@ -132,12 +132,17 @@ void elf::writeMapFile(llvm::ArrayRef Script) { OS << OSec->Name << '\n'; // Dump symbols for each input section. - for (InputSection *IS : OSec->Sections) { - writeHeader(OS, OSec->Addr + IS->OutSecOff, IS->getSize(), - IS->Alignment); - OS << indent(1) << toString(IS) << '\n'; - for (DefinedRegular *Sym : SectionSyms[IS]) - OS << SymStr[Sym] << '\n'; + for (BaseCommand *Base : Cmd->Commands) { + auto *ISD = dyn_cast(Base); + if (!ISD) + continue; + for (InputSection *IS : ISD->Sections) { + writeHeader(OS, OSec->Addr + IS->OutSecOff, IS->getSize(), + IS->Alignment); + OS << indent(1) << toString(IS) << '\n'; + for (DefinedRegular *Sym : SectionSyms[IS]) + OS << SymStr[Sym] << '\n'; + } } } } diff --git a/contrib/llvm/tools/lld/ELF/Options.td b/contrib/llvm/tools/lld/ELF/Options.td index 65a0e72d232..335c7ade6db 100644 --- a/contrib/llvm/tools/lld/ELF/Options.td +++ b/contrib/llvm/tools/lld/ELF/Options.td @@ -313,6 +313,7 @@ def alias_o_output2 : Separate<["--"], "output">, Alias; def alias_pie_pic_executable: F<"pic-executable">, Alias; def alias_print_map_M: Flag<["-"], "M">, Alias; def alias_relocatable_r: Flag<["-"], "r">, Alias; +def alias_reproduce_eq: J<"reproduce=">, Alias; def alias_retain_symbols_file: S<"retain-symbols-file">, Alias; def alias_rpath_R: JoinedOrSeparate<["-"], "R">, Alias; def alias_rpath_rpath: J<"rpath=">, Alias; diff --git a/contrib/llvm/tools/lld/ELF/OutputSections.cpp b/contrib/llvm/tools/lld/ELF/OutputSections.cpp index dcefd03766d..d82fdcdc31b 100644 --- a/contrib/llvm/tools/lld/ELF/OutputSections.cpp +++ b/contrib/llvm/tools/lld/ELF/OutputSections.cpp @@ -103,7 +103,7 @@ template void OutputSection::maybeCompress() { // Write section contents to a temporary buffer and compress it. std::vector Buf(Size); - writeTo(Buf.data()); + Script->getCmd(this)->writeTo(Buf.data()); if (Error E = zlib::compress(toStringRef(Buf), CompressedData)) fatal("compress failed: " + llvm::toString(std::move(E))); @@ -112,6 +112,19 @@ template void OutputSection::maybeCompress() { Flags |= SHF_COMPRESSED; } +template static void finalizeShtGroup(OutputSection *Sec) { + // sh_link field for SHT_GROUP sections should contain the section index of + // the symbol table. + Sec->Link = InX::SymTab->OutSec->SectionIndex; + + // sh_link then contain index of an entry in symbol table section which + // provides signature of the section group. + elf::ObjectFile *Obj = Sec->Sections[0]->getFile(); + assert(Config->Relocatable && Sec->Sections.size() == 1); + ArrayRef Symbols = Obj->getSymbols(); + Sec->Info = InX::SymTab->getSymbolIndex(Symbols[Sec->Sections[0]->Info - 1]); +} + template void OutputSection::finalize() { if ((this->Flags & SHF_LINK_ORDER) && !this->Sections.empty()) { std::sort(Sections.begin(), Sections.end(), compareByFilePosition); @@ -126,6 +139,11 @@ template void OutputSection::finalize() { } uint32_t Type = this->Type; + if (Type == SHT_GROUP) { + finalizeShtGroup(this); + return; + } + if (!Config->CopyRelocs || (Type != SHT_RELA && Type != SHT_REL)) return; @@ -259,69 +277,6 @@ void OutputSection::sortCtorsDtors() { std::stable_sort(Sections.begin(), Sections.end(), compCtors); } -// Fill [Buf, Buf + Size) with Filler. -// This is used for linker script "=fillexp" command. -static void fill(uint8_t *Buf, size_t Size, uint32_t Filler) { - size_t I = 0; - for (; I + 4 < Size; I += 4) - memcpy(Buf + I, &Filler, 4); - memcpy(Buf + I, &Filler, Size - I); -} - -uint32_t OutputSection::getFiller() { - // Determine what to fill gaps between InputSections with, as specified by the - // linker script. If nothing is specified and this is an executable section, - // fall back to trap instructions to prevent bad diassembly and detect invalid - // jumps to padding. - if (Optional Filler = Script->getFiller(this)) - return *Filler; - if (Flags & SHF_EXECINSTR) - return Target->TrapInstr; - return 0; -} - -template void OutputSection::writeTo(uint8_t *Buf) { - Loc = Buf; - - // We may have already rendered compressed content when using - // -compress-debug-sections option. Write it together with header. - if (!CompressedData.empty()) { - memcpy(Buf, ZDebugHeader.data(), ZDebugHeader.size()); - memcpy(Buf + ZDebugHeader.size(), CompressedData.data(), - CompressedData.size()); - return; - } - - // Write leading padding. - uint32_t Filler = getFiller(); - if (Filler) - fill(Buf, Sections.empty() ? Size : Sections[0]->OutSecOff, Filler); - - parallelForEachN(0, Sections.size(), [=](size_t I) { - InputSection *Sec = Sections[I]; - Sec->writeTo(Buf); - - // Fill gaps between sections. - if (Filler) { - uint8_t *Start = Buf + Sec->OutSecOff + Sec->getSize(); - uint8_t *End; - if (I + 1 == Sections.size()) - End = Buf + Size; - else - End = Buf + Sections[I + 1]->OutSecOff; - fill(Start, End - Start, Filler); - } - }); - - // Linker scripts may have BYTE()-family commands with which you - // can write arbitrary bytes to the output. Process them if any. - Script->writeDataBytes(this, Buf); -} - -static uint64_t getOutFlags(InputSectionBase *S) { - return S->Flags & ~SHF_GROUP & ~SHF_COMPRESSED; -} - static SectionKey createKey(InputSectionBase *C, StringRef OutsecName) { // The ELF spec just says // ---------------------------------------------------------------- @@ -418,7 +373,10 @@ void OutputSectionFactory::addInputSec(InputSectionBase *IS, return; } - uint64_t Flags = getOutFlags(IS); + uint64_t Flags = IS->Flags; + if (!Config->Relocatable) + Flags &= ~(uint64_t)SHF_GROUP; + if (Sec) { if (getIncompatibleFlags(Sec->Flags) != getIncompatibleFlags(IS->Flags)) error("incompatible section flags for " + Sec->Name + @@ -484,8 +442,3 @@ template void OutputSection::maybeCompress(); template void OutputSection::maybeCompress(); template void OutputSection::maybeCompress(); template void OutputSection::maybeCompress(); - -template void OutputSection::writeTo(uint8_t *Buf); -template void OutputSection::writeTo(uint8_t *Buf); -template void OutputSection::writeTo(uint8_t *Buf); -template void OutputSection::writeTo(uint8_t *Buf); diff --git a/contrib/llvm/tools/lld/ELF/OutputSections.h b/contrib/llvm/tools/lld/ELF/OutputSections.h index 413871b60cf..08655a9ed67 100644 --- a/contrib/llvm/tools/lld/ELF/OutputSections.h +++ b/contrib/llvm/tools/lld/ELF/OutputSections.h @@ -82,8 +82,6 @@ public: void sort(std::function Order); void sortInitFini(); void sortCtorsDtors(); - uint32_t getFiller(); - template void writeTo(uint8_t *Buf); template void finalize(); template void maybeCompress(); void assignOffsets(); diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp index 599f1441a47..d3db32613a8 100644 --- a/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp +++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.cpp @@ -1005,10 +1005,11 @@ DynamicSection::DynamicSection() ".dynamic") { this->Entsize = ELFT::Is64Bits ? 16 : 8; - // .dynamic section is not writable on MIPS. + // .dynamic section is not writable on MIPS and on Fuchsia OS + // which passes -z rodynamic. // See "Special Section" in Chapter 4 in the following document: // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf - if (Config->EMachine == EM_MIPS) + if (Config->EMachine == EM_MIPS || Config->ZRodynamic) this->Flags = SHF_ALLOC; addEntries(); @@ -1053,7 +1054,15 @@ template void DynamicSection::addEntries() { if (DtFlags1) add({DT_FLAGS_1, DtFlags1}); - if (!Config->Shared && !Config->Relocatable) + // DT_DEBUG is a pointer to debug informaion used by debuggers at runtime. We + // need it for each process, so we don't write it for DSOs. The loader writes + // the pointer into this entry. + // + // DT_DEBUG is the only .dynamic entry that needs to be written to. Some + // systems (currently only Fuchsia OS) provide other means to give the + // debugger this information. Such systems may choose make .dynamic read-only. + // If the target is such a system (used -z rodynamic) don't write DT_DEBUG. + if (!Config->Shared && !Config->Relocatable && !Config->ZRodynamic) add({DT_DEBUG, (uint64_t)0}); } @@ -1778,11 +1787,10 @@ void GdbIndexSection::readDwarf(InputSection *Sec) { std::tie(IsNew, Sym) = SymbolTable.add(Hash, Offset); if (IsNew) { Sym->CuVectorIndex = CuVectors.size(); - CuVectors.push_back({{CuId, Pair.second}}); - continue; + CuVectors.resize(CuVectors.size() + 1); } - CuVectors[Sym->CuVectorIndex].push_back({CuId, Pair.second}); + CuVectors[Sym->CuVectorIndex].insert((Pair.second << 24) | (uint32_t)CuId); } } @@ -1806,7 +1814,7 @@ void GdbIndexSection::finalizeContents() { ConstantPoolOffset = SymTabOffset + SymbolTable.getCapacity() * SymTabEntrySize; - for (std::vector> &CuVec : CuVectors) { + for (std::set &CuVec : CuVectors) { CuVectorsOffset.push_back(CuVectorsSize); CuVectorsSize += OffsetTypeSize * (CuVec.size() + 1); } @@ -1859,14 +1867,11 @@ void GdbIndexSection::writeTo(uint8_t *Buf) { } // Write the CU vectors into the constant pool. - for (std::vector> &CuVec : CuVectors) { + for (std::set &CuVec : CuVectors) { write32le(Buf, CuVec.size()); Buf += 4; - for (std::pair &P : CuVec) { - uint32_t Index = P.first; - uint8_t Flags = P.second; - Index |= Flags << 24; - write32le(Buf, Index); + for (uint32_t Val : CuVec) { + write32le(Buf, Val); Buf += 4; } } @@ -2173,17 +2178,6 @@ MipsRldMapSection::MipsRldMapSection() : SyntheticSection(SHF_ALLOC | SHF_WRITE, SHT_PROGBITS, Config->Wordsize, ".rld_map") {} -void MipsRldMapSection::writeTo(uint8_t *Buf) { - // Apply filler from linker script. - Optional Fill = Script->getFiller(this->OutSec); - if (!Fill || *Fill == 0) - return; - - uint64_t Filler = *Fill; - Filler = (Filler << 32) | Filler; - memcpy(Buf, &Filler, getSize()); -} - ARMExidxSentinelSection::ARMExidxSentinelSection() : SyntheticSection(SHF_ALLOC | SHF_LINK_ORDER, SHT_ARM_EXIDX, Config->Wordsize, ".ARM.exidx") {} @@ -2194,7 +2188,7 @@ ARMExidxSentinelSection::ARMExidxSentinelSection() // | PREL31 upper bound of code that has exception tables | EXIDX_CANTUNWIND | void ARMExidxSentinelSection::writeTo(uint8_t *Buf) { // Get the InputSection before us, we are by definition last - auto RI = cast(this->OutSec)->Sections.rbegin(); + auto RI = this->OutSec->Sections.rbegin(); InputSection *LE = *(++RI); InputSection *LC = cast(LE->getLinkOrderDep()); uint64_t S = LC->OutSec->Addr + LC->getOffset(LC->getSize()); diff --git a/contrib/llvm/tools/lld/ELF/SyntheticSections.h b/contrib/llvm/tools/lld/ELF/SyntheticSections.h index c5ffb88c136..61cc03de222 100644 --- a/contrib/llvm/tools/lld/ELF/SyntheticSections.h +++ b/contrib/llvm/tools/lld/ELF/SyntheticSections.h @@ -27,6 +27,8 @@ #include "llvm/ADT/MapVector.h" #include "llvm/MC/StringTableBuilder.h" +#include + namespace lld { namespace elf { @@ -515,7 +517,7 @@ public: GdbHashTab SymbolTable; // The CU vector portion of the constant pool. - std::vector>> CuVectors; + std::vector> CuVectors; std::vector AddressArea; @@ -709,7 +711,7 @@ class MipsRldMapSection : public SyntheticSection { public: MipsRldMapSection(); size_t getSize() const override { return Config->Wordsize; } - void writeTo(uint8_t *Buf) override; + void writeTo(uint8_t *Buf) override {} }; class ARMExidxSentinelSection : public SyntheticSection { diff --git a/contrib/llvm/tools/lld/ELF/Writer.cpp b/contrib/llvm/tools/lld/ELF/Writer.cpp index d66e3d8b38a..0764d159edb 100644 --- a/contrib/llvm/tools/lld/ELF/Writer.cpp +++ b/contrib/llvm/tools/lld/ELF/Writer.cpp @@ -46,6 +46,7 @@ public: void run(); private: + void clearOutputSections(); void createSyntheticSections(); void copyLocalSymbols(); void addSectionSymbols(); @@ -80,6 +81,8 @@ private: void addStartStopSymbols(OutputSection *Sec); uint64_t getEntryAddr(); OutputSection *findSection(StringRef Name); + OutputSection *findSectionInScript(StringRef Name); + OutputSectionCommand *findSectionCommand(StringRef Name); std::vector Phdrs; @@ -161,7 +164,7 @@ static void combineMergableSections() { continue; StringRef OutsecName = getOutputSectionName(MS->Name); - uint64_t Flags = MS->Flags & ~(uint64_t)(SHF_GROUP | SHF_COMPRESSED); + uint64_t Flags = MS->Flags & ~(uint64_t)SHF_GROUP; uint32_t Alignment = std::max(MS->Alignment, MS->Entsize); auto I = llvm::find_if(MergeSections, [=](MergeSyntheticSection *Sec) { @@ -198,6 +201,15 @@ template static void combineEhFrameSections() { V.erase(std::remove(V.begin(), V.end(), nullptr), V.end()); } +template void Writer::clearOutputSections() { + // Clear the OutputSections to make sure it is not used anymore. Any + // code from this point on should be using the linker script + // commands. + for (OutputSection *Sec : OutputSections) + Sec->Sections.clear(); + OutputSections.clear(); +} + // The main function of the writer. template void Writer::run() { // Create linker-synthesized sections such as .got or .plt. @@ -244,13 +256,21 @@ template void Writer::run() { if (ErrorCount) return; + if (!Script->Opt.HasSections) { + if (!Config->Relocatable) + fixSectionAlignments(); + Script->fabricateDefaultCommands(); + } + + // If -compressed-debug-sections is specified, we need to compress + // .debug_* sections. Do it right now because it changes the size of + // output sections. + parallelForEach(OutputSections.begin(), OutputSections.end(), + [](OutputSection *S) { S->maybeCompress(); }); + if (Config->Relocatable) { assignFileOffsets(); } else { - if (!Script->Opt.HasSections) { - fixSectionAlignments(); - Script->fabricateDefaultCommands(); - } Script->synchronize(); Script->assignAddresses(Phdrs); @@ -281,6 +301,7 @@ template void Writer::run() { } else { writeSectionsBinary(); } + clearOutputSections(); // Backfill .note.gnu.build-id section content. This is done at last // because the content is usually a hash value of the entire output file. @@ -288,10 +309,6 @@ template void Writer::run() { if (ErrorCount) return; - // Clear the OutputSections to make sure it is not used anymore. Any - // code from this point on should be using the linker script - // commands. - OutputSections.clear(); // Handle -Map option. writeMapFile(Script->Opt.Commands); @@ -631,10 +648,11 @@ bool elf::isRelroSection(const OutputSection *Sec) { // * It is easy to check if a give branch was taken. // * It is easy two see how similar two ranks are (see getRankProximity). enum RankFlags { - RF_NOT_ADDR_SET = 1 << 15, - RF_NOT_INTERP = 1 << 14, - RF_NOT_ALLOC = 1 << 13, - RF_WRITE = 1 << 12, + RF_NOT_ADDR_SET = 1 << 16, + RF_NOT_INTERP = 1 << 15, + RF_NOT_ALLOC = 1 << 14, + RF_WRITE = 1 << 13, + RF_EXEC_WRITE = 1 << 12, RF_EXEC = 1 << 11, RF_NON_TLS_BSS = 1 << 10, RF_NON_TLS_BSS_RO = 1 << 9, @@ -669,19 +687,29 @@ static unsigned getSectionRank(const OutputSection *Sec) { if (!(Sec->Flags & SHF_ALLOC)) return Rank | RF_NOT_ALLOC; - // We want the read only sections first so that they go in the PT_LOAD - // covering the program headers at the start of the file. - if (Sec->Flags & SHF_WRITE) - Rank |= RF_WRITE; + // Sort sections based on their access permission in the following + // order: R, RX, RWX, RW. This order is based on the following + // considerations: + // * Read-only sections come first such that they go in the + // PT_LOAD covering the program headers at the start of the file. + // * Read-only, executable sections come next, unless the + // -no-rosegment option is used. + // * Writable, executable sections follow such that .plt on + // architectures where it needs to be writable will be placed + // between .text and .data. + // * Writable sections come last, such that .bss lands at the very + // end of the last PT_LOAD. + bool IsExec = Sec->Flags & SHF_EXECINSTR; + bool IsWrite = Sec->Flags & SHF_WRITE; - if (Sec->Flags & SHF_EXECINSTR) { - // For a corresponding reason, put non exec sections first (the program - // header PT_LOAD is not executable). - // We only do that if we are not using linker scripts, since with linker - // scripts ro and rx sections are in the same PT_LOAD, so their relative - // order is not important. The same applies for -no-rosegment. - if ((Rank & RF_WRITE) || !Config->SingleRoRx) + if (IsExec) { + if (IsWrite) + Rank |= RF_EXEC_WRITE; + else if (!Config->SingleRoRx) Rank |= RF_EXEC; + } else { + if (IsWrite) + Rank |= RF_WRITE; } // If we got here we know that both A and B are in the same PT_LOAD. @@ -778,12 +806,6 @@ static bool compareSections(const OutputSection *A, const OutputSection *B) { return compareSectionsNonScript(A, B); } -// Program header entry -PhdrEntry::PhdrEntry(unsigned Type, unsigned Flags) { - p_type = Type; - p_flags = Flags; -} - void PhdrEntry::add(OutputSection *Sec) { Last = Sec; if (!First) @@ -1239,12 +1261,6 @@ template void Writer::finalizeSections() { for (OutputSection *Sec : OutputSections) Sec->finalize(); - // If -compressed-debug-sections is specified, we need to compress - // .debug_* sections. Do it right now because it changes the size of - // output sections. - parallelForEach(OutputSections.begin(), OutputSections.end(), - [](OutputSection *S) { S->maybeCompress(); }); - // createThunks may have added local symbols to the static symbol table applySynthetic({InX::SymTab, InX::ShStrTab, InX::StrTab}, [](SyntheticSection *SS) { SS->postThunkContents(); }); @@ -1297,6 +1313,21 @@ void Writer::addStartStopSymbols(OutputSection *Sec) { addOptionalRegular(Saver.save("__stop_" + S), Sec, -1, STV_DEFAULT); } +template +OutputSectionCommand *Writer::findSectionCommand(StringRef Name) { + for (BaseCommand *Base : Script->Opt.Commands) + if (auto *Cmd = dyn_cast(Base)) + if (Cmd->Name == Name) + return Cmd; + return nullptr; +} + +template OutputSection *Writer::findSectionInScript(StringRef Name) { + if (OutputSectionCommand *Cmd = findSectionCommand(Name)) + return Cmd->Sec; + return nullptr; +} + template OutputSection *Writer::findSection(StringRef Name) { for (OutputSection *Sec : OutputSections) if (Sec->Name == Name) @@ -1578,7 +1609,7 @@ template uint64_t Writer::getEntryAddr() { return Addr; // Case 4 - if (OutputSection *Sec = findSection(".text")) { + if (OutputSection *Sec = findSectionInScript(".text")) { if (Config->WarnMissingEntry) warn("cannot find entry symbol " + Config->Entry + "; defaulting to 0x" + utohexstr(Sec->Addr)); @@ -1604,18 +1635,6 @@ static uint16_t getELFType() { // to each section. This function fixes some predefined // symbol values that depend on section address and size. template void Writer::fixPredefinedSymbols() { - auto Set = [](DefinedRegular *S1, DefinedRegular *S2, OutputSection *Sec, - uint64_t Value) { - if (S1) { - S1->Section = Sec; - S1->Value = Value; - } - if (S2) { - S2->Section = Sec; - S2->Value = Value; - } - }; - // _etext is the first location after the last read-only loadable segment. // _edata is the first location after the last read-write loadable segment. // _end is the first location after the uninitialized data region. @@ -1631,15 +1650,29 @@ template void Writer::fixPredefinedSymbols() { else LastRO = &P; } - if (Last) - Set(ElfSym::End1, ElfSym::End2, Last->First, Last->p_memsz); - if (LastRO) - Set(ElfSym::Etext1, ElfSym::Etext2, LastRO->First, LastRO->p_filesz); - if (LastRW) - Set(ElfSym::Edata1, ElfSym::Edata2, LastRW->First, LastRW->p_filesz); + + auto Set = [](DefinedRegular *S, OutputSection *Sec, uint64_t Value) { + if (S) { + S->Section = Sec; + S->Value = Value; + } + }; + + if (Last) { + Set(ElfSym::End1, Last->First, Last->p_memsz); + Set(ElfSym::End2, Last->First, Last->p_memsz); + } + if (LastRO) { + Set(ElfSym::Etext1, LastRO->First, LastRO->p_filesz); + Set(ElfSym::Etext2, LastRO->First, LastRO->p_filesz); + } + if (LastRW) { + Set(ElfSym::Edata1, LastRW->First, LastRW->p_filesz); + Set(ElfSym::Edata2, LastRW->First, LastRW->p_filesz); + } if (ElfSym::Bss) - ElfSym::Bss->Section = findSection(".bss"); + ElfSym::Bss->Section = findSectionInScript(".bss"); // Setup MIPS _gp_disp/__gnu_local_gp symbols which should // be equal to the _gp symbol's value. @@ -1731,9 +1764,14 @@ template void Writer::openFile() { template void Writer::writeSectionsBinary() { uint8_t *Buf = Buffer->getBufferStart(); - for (OutputSection *Sec : OutputSections) + for (BaseCommand *Base : Script->Opt.Commands) { + auto *Cmd = dyn_cast(Base); + if (!Cmd) + continue; + OutputSection *Sec = Cmd->Sec; if (Sec->Flags & SHF_ALLOC) - Sec->writeTo(Buf + Sec->Offset); + Cmd->writeTo(Buf + Sec->Offset); + } } // Write section contents to a mmap'ed file. @@ -1742,31 +1780,45 @@ template void Writer::writeSections() { // PPC64 needs to process relocations in the .opd section // before processing relocations in code-containing sections. - Out::Opd = findSection(".opd"); - if (Out::Opd) { + if (auto *OpdCmd = findSectionCommand(".opd")) { + Out::Opd = OpdCmd->Sec; Out::OpdBuf = Buf + Out::Opd->Offset; - Out::Opd->template writeTo(Buf + Out::Opd->Offset); + OpdCmd->template writeTo(Buf + Out::Opd->Offset); } OutputSection *EhFrameHdr = - In::EhFrameHdr ? In::EhFrameHdr->OutSec : nullptr; + (In::EhFrameHdr && !In::EhFrameHdr->empty()) + ? In::EhFrameHdr->OutSec + : nullptr; // In -r or -emit-relocs mode, write the relocation sections first as in // ELf_Rel targets we might find out that we need to modify the relocated // section while doing it. - for (OutputSection *Sec : OutputSections) + for (BaseCommand *Base : Script->Opt.Commands) { + auto *Cmd = dyn_cast(Base); + if (!Cmd) + continue; + OutputSection *Sec = Cmd->Sec; if (Sec->Type == SHT_REL || Sec->Type == SHT_RELA) - Sec->writeTo(Buf + Sec->Offset); + Cmd->writeTo(Buf + Sec->Offset); + } - for (OutputSection *Sec : OutputSections) + for (BaseCommand *Base : Script->Opt.Commands) { + auto *Cmd = dyn_cast(Base); + if (!Cmd) + continue; + OutputSection *Sec = Cmd->Sec; if (Sec != Out::Opd && Sec != EhFrameHdr && Sec->Type != SHT_REL && Sec->Type != SHT_RELA) - Sec->writeTo(Buf + Sec->Offset); + Cmd->writeTo(Buf + Sec->Offset); + } // The .eh_frame_hdr depends on .eh_frame section contents, therefore // it should be written after .eh_frame is written. - if (EhFrameHdr && !EhFrameHdr->Sections.empty()) - EhFrameHdr->writeTo(Buf + EhFrameHdr->Offset); + if (EhFrameHdr) { + OutputSectionCommand *Cmd = Script->getCmd(EhFrameHdr); + Cmd->writeTo(Buf + EhFrameHdr->Offset); + } } template void Writer::writeBuildId() { diff --git a/contrib/llvm/tools/lld/ELF/Writer.h b/contrib/llvm/tools/lld/ELF/Writer.h index 17fbda394a2..e935b6419de 100644 --- a/contrib/llvm/tools/lld/ELF/Writer.h +++ b/contrib/llvm/tools/lld/ELF/Writer.h @@ -30,7 +30,7 @@ bool isRelroSection(const OutputSection *Sec); // Each contains type, access flags and range of output sections that will be // placed in it. struct PhdrEntry { - PhdrEntry(unsigned Type, unsigned Flags); + PhdrEntry(unsigned Type, unsigned Flags) : p_type(Type), p_flags(Flags) {} void add(OutputSection *Sec); uint64_t p_paddr = 0; diff --git a/contrib/llvm/tools/lldb/include/lldb/API/SBStructuredData.h b/contrib/llvm/tools/lldb/include/lldb/API/SBStructuredData.h index 5fb5d3be65a..f7a6469bb8d 100644 --- a/contrib/llvm/tools/lldb/include/lldb/API/SBStructuredData.h +++ b/contrib/llvm/tools/lldb/include/lldb/API/SBStructuredData.h @@ -37,11 +37,70 @@ public: lldb::SBError GetDescription(lldb::SBStream &stream) const; + //------------------------------------------------------------------ + /// Return the type of data in this data structure + //------------------------------------------------------------------ + lldb::StructuredDataType GetType() const; + + //------------------------------------------------------------------ + /// Return the size (i.e. number of elements) in this data structure + /// if it is an array or dictionary type. For other types, 0 will be + // returned. + //------------------------------------------------------------------ + size_t GetSize() const; + + //------------------------------------------------------------------ + /// Return the value corresponding to a key if this data structure + /// is a dictionary type. + //------------------------------------------------------------------ + lldb::SBStructuredData GetValueForKey(const char *key) const; + + //------------------------------------------------------------------ + /// Return the value corresponding to an index if this data structure + /// is array. + //------------------------------------------------------------------ + lldb::SBStructuredData GetItemAtIndex(size_t idx) const; + + //------------------------------------------------------------------ + /// Return the integer value if this data structure is an integer type. + //------------------------------------------------------------------ + uint64_t GetIntegerValue(uint64_t fail_value = 0) const; + + //------------------------------------------------------------------ + /// Return the floating point value if this data structure is a floating + /// type. + //------------------------------------------------------------------ + double GetFloatValue(double fail_value = 0.0) const; + + //------------------------------------------------------------------ + /// Return the boolean value if this data structure is a boolean type. + //------------------------------------------------------------------ + bool GetBooleanValue(bool fail_value = false) const; + + //------------------------------------------------------------------ + /// Provides the string value if this data structure is a string type. + /// + /// @param[out] dst + /// pointer where the string value will be written. In case it is null, + /// nothing will be written at @dst. + /// + /// @param[in] dst_len + /// max number of characters that can be written at @dst. In case it is + /// zero, nothing will be written at @dst. If this length is not enough + /// to write the complete string value, (dst_len-1) bytes of the string + /// value will be written at @dst followed by a null character. + /// + /// @return + /// Returns the byte size needed to completely write the string value at + /// @dst in all cases. + //------------------------------------------------------------------ + size_t GetStringValue(char *dst, size_t dst_len) const; + protected: friend class SBTraceOptions; StructuredDataImplUP m_impl_up; }; -} +} // namespace lldb #endif /* SBStructuredData_h */ diff --git a/contrib/llvm/tools/lldb/include/lldb/API/SBTrace.h b/contrib/llvm/tools/lldb/include/lldb/API/SBTrace.h index e29a5db7cc4..244a01e5ce1 100644 --- a/contrib/llvm/tools/lldb/include/lldb/API/SBTrace.h +++ b/contrib/llvm/tools/lldb/include/lldb/API/SBTrace.h @@ -40,7 +40,7 @@ public: /// /// @param[in] thread_id /// Tracing could be started for the complete process or a - /// single thread, in the first case the uid obtained would + /// single thread, in the first case the traceid obtained would /// map to all the threads existing within the process and the /// ones spawning later. The thread_id parameter can be used in /// such a scenario to select the trace data for a specific @@ -68,16 +68,17 @@ public: /// An error explaining what went wrong. /// /// @param[in] thread_id - /// The user id could map to a tracing instance for a thread + /// The trace id could map to a tracing instance for a thread /// or could also map to a group of threads being traced with /// the same trace options. A thread_id is normally optional /// except in the case of tracing a complete process and tracing /// needs to switched off on a particular thread. /// A situation could occur where initially a thread (lets say - /// thread A) is being individually traced with a particular uid - /// and then tracing is started on the complete process, in this - /// case thread A will continue without any change. All newly - /// spawned threads would be traced with the uid of the process. + /// thread A) is being individually traced with a particular + /// trace id and then tracing is started on the complete + /// process, in this case thread A will continue without any + /// change. All newly spawned threads would be traced with the + /// trace id of the process. /// Now if the StopTrace API is called for the whole process, /// thread A will not be stopped and must be stopped separately. //------------------------------------------------------------------ diff --git a/contrib/llvm/tools/lldb/include/lldb/Core/StructuredData.h b/contrib/llvm/tools/lldb/include/lldb/Core/StructuredData.h index 6cb78dc48ab..39c2f04bb38 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Core/StructuredData.h +++ b/contrib/llvm/tools/lldb/include/lldb/Core/StructuredData.h @@ -13,7 +13,8 @@ #include "llvm/ADT/StringRef.h" #include "lldb/Utility/ConstString.h" -#include "lldb/Utility/FileSpec.h" // for FileSpec +#include "lldb/Utility/FileSpec.h" // for FileSpec +#include "lldb/lldb-enumerations.h" // for StructuredDataType #include #include @@ -71,46 +72,38 @@ public: typedef std::shared_ptr DictionarySP; typedef std::shared_ptr GenericSP; - enum class Type { - eTypeInvalid = -1, - eTypeNull = 0, - eTypeGeneric, - eTypeArray, - eTypeInteger, - eTypeFloat, - eTypeBoolean, - eTypeString, - eTypeDictionary - }; - class Object : public std::enable_shared_from_this { public: - Object(Type t = Type::eTypeInvalid) : m_type(t) {} + Object(lldb::StructuredDataType t = lldb::eStructuredDataTypeInvalid) + : m_type(t) {} virtual ~Object() = default; virtual bool IsValid() const { return true; } - virtual void Clear() { m_type = Type::eTypeInvalid; } + virtual void Clear() { m_type = lldb::eStructuredDataTypeInvalid; } - Type GetType() const { return m_type; } + lldb::StructuredDataType GetType() const { return m_type; } - void SetType(Type t) { m_type = t; } + void SetType(lldb::StructuredDataType t) { m_type = t; } Array *GetAsArray() { - return ((m_type == Type::eTypeArray) ? static_cast(this) - : nullptr); - } - - Dictionary *GetAsDictionary() { - return ((m_type == Type::eTypeDictionary) - ? static_cast(this) + return ((m_type == lldb::eStructuredDataTypeArray) + ? static_cast(this) : nullptr); } + Dictionary *GetAsDictionary() { + return ( + (m_type == lldb::eStructuredDataTypeDictionary) + ? static_cast(this) + : nullptr); + } + Integer *GetAsInteger() { - return ((m_type == Type::eTypeInteger) ? static_cast(this) - : nullptr); + return ((m_type == lldb::eStructuredDataTypeInteger) + ? static_cast(this) + : nullptr); } uint64_t GetIntegerValue(uint64_t fail_value = 0) { @@ -119,8 +112,9 @@ public: } Float *GetAsFloat() { - return ((m_type == Type::eTypeFloat) ? static_cast(this) - : nullptr); + return ((m_type == lldb::eStructuredDataTypeFloat) + ? static_cast(this) + : nullptr); } double GetFloatValue(double fail_value = 0.0) { @@ -129,8 +123,9 @@ public: } Boolean *GetAsBoolean() { - return ((m_type == Type::eTypeBoolean) ? static_cast(this) - : nullptr); + return ((m_type == lldb::eStructuredDataTypeBoolean) + ? static_cast(this) + : nullptr); } bool GetBooleanValue(bool fail_value = false) { @@ -139,8 +134,9 @@ public: } String *GetAsString() { - return ((m_type == Type::eTypeString) ? static_cast(this) - : nullptr); + return ((m_type == lldb::eStructuredDataTypeString) + ? static_cast(this) + : nullptr); } llvm::StringRef GetStringValue(const char *fail_value = nullptr) { @@ -152,8 +148,9 @@ public: } Generic *GetAsGeneric() { - return ((m_type == Type::eTypeGeneric) ? static_cast(this) - : nullptr); + return ((m_type == lldb::eStructuredDataTypeGeneric) + ? static_cast(this) + : nullptr); } ObjectSP GetObjectForDotSeparatedPath(llvm::StringRef path); @@ -163,12 +160,12 @@ public: virtual void Dump(Stream &s, bool pretty_print = true) const = 0; private: - Type m_type; + lldb::StructuredDataType m_type; }; class Array : public Object { public: - Array() : Object(Type::eTypeArray) {} + Array() : Object(lldb::eStructuredDataTypeArray) {} ~Array() override = default; @@ -288,7 +285,8 @@ public: class Integer : public Object { public: - Integer(uint64_t i = 0) : Object(Type::eTypeInteger), m_value(i) {} + Integer(uint64_t i = 0) + : Object(lldb::eStructuredDataTypeInteger), m_value(i) {} ~Integer() override = default; @@ -304,7 +302,8 @@ public: class Float : public Object { public: - Float(double d = 0.0) : Object(Type::eTypeFloat), m_value(d) {} + Float(double d = 0.0) : Object(lldb::eStructuredDataTypeFloat), + m_value(d) {} ~Float() override = default; @@ -320,7 +319,8 @@ public: class Boolean : public Object { public: - Boolean(bool b = false) : Object(Type::eTypeBoolean), m_value(b) {} + Boolean(bool b = false) : Object(lldb::eStructuredDataTypeBoolean), + m_value(b) {} ~Boolean() override = default; @@ -336,9 +336,10 @@ public: class String : public Object { public: - String() : Object(Type::eTypeString) {} + String() : Object(lldb::eStructuredDataTypeString) {} explicit String(llvm::StringRef S) - : Object(Type::eTypeString), m_value(S) {} + : Object(lldb::eStructuredDataTypeString), + m_value(S) {} void SetValue(llvm::StringRef S) { m_value = S; } @@ -352,7 +353,8 @@ public: class Dictionary : public Object { public: - Dictionary() : Object(Type::eTypeDictionary), m_dict() {} + Dictionary() : Object(lldb::eStructuredDataTypeDictionary), + m_dict() {} ~Dictionary() override = default; @@ -522,7 +524,7 @@ public: class Null : public Object { public: - Null() : Object(Type::eTypeNull) {} + Null() : Object(lldb::eStructuredDataTypeNull) {} ~Null() override = default; @@ -534,7 +536,7 @@ public: class Generic : public Object { public: explicit Generic(void *object = nullptr) - : Object(Type::eTypeGeneric), m_object(object) {} + : Object(lldb::eStructuredDataTypeGeneric), m_object(object) {} void SetValue(void *value) { m_object = value; } diff --git a/contrib/llvm/tools/lldb/include/lldb/Core/StructuredDataImpl.h b/contrib/llvm/tools/lldb/include/lldb/Core/StructuredDataImpl.h index 81d59f83ac3..92f0417b354 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Core/StructuredDataImpl.h +++ b/contrib/llvm/tools/lldb/include/lldb/Core/StructuredDataImpl.h @@ -15,7 +15,9 @@ #include "lldb/Target/StructuredDataPlugin.h" #include "lldb/Utility/Status.h" #include "lldb/Utility/Stream.h" +#include "lldb/lldb-enumerations.h" #include "lldb/lldb-forward.h" +#include "llvm/ADT/StringRef.h" #pragma mark-- #pragma mark StructuredDataImpl @@ -78,18 +80,77 @@ public: return plugin_sp->GetDescription(m_data_sp, stream); } - StructuredData::ObjectSP GetObjectSP() { - return m_data_sp; + StructuredData::ObjectSP GetObjectSP() { return m_data_sp; } + + void SetObjectSP(const StructuredData::ObjectSP &obj) { m_data_sp = obj; } + + lldb::StructuredDataType GetType() const { + return (m_data_sp ? m_data_sp->GetType() : + lldb::eStructuredDataTypeInvalid); } - void SetObjectSP(const StructuredData::ObjectSP &obj) { - m_data_sp = obj; + size_t GetSize() const { + if (!m_data_sp) + return 0; + + if (m_data_sp->GetType() == lldb::eStructuredDataTypeDictionary) { + auto dict = m_data_sp->GetAsDictionary(); + return (dict->GetSize()); + } else if (m_data_sp->GetType() == lldb::eStructuredDataTypeArray) { + auto array = m_data_sp->GetAsArray(); + return (array->GetSize()); + } else + return 0; + } + + StructuredData::ObjectSP GetValueForKey(const char *key) const { + if (m_data_sp) { + auto dict = m_data_sp->GetAsDictionary(); + if (dict) + return dict->GetValueForKey(llvm::StringRef(key)); + } + return StructuredData::ObjectSP(); + } + + StructuredData::ObjectSP GetItemAtIndex(size_t idx) const { + if (m_data_sp) { + auto array = m_data_sp->GetAsArray(); + if (array) + return array->GetItemAtIndex(idx); + } + return StructuredData::ObjectSP(); + } + + uint64_t GetIntegerValue(uint64_t fail_value = 0) const { + return (m_data_sp ? m_data_sp->GetIntegerValue(fail_value) : fail_value); + } + + double GetFloatValue(double fail_value = 0.0) const { + return (m_data_sp ? m_data_sp->GetFloatValue(fail_value) : fail_value); + } + + bool GetBooleanValue(bool fail_value = false) const { + return (m_data_sp ? m_data_sp->GetBooleanValue(fail_value) : fail_value); + } + + size_t GetStringValue(char *dst, size_t dst_len) const { + if (!m_data_sp) + return 0; + + llvm::StringRef result = m_data_sp->GetStringValue(); + if (result.empty()) + return 0; + + if (!dst || !dst_len) { + char s[1]; + return (::snprintf(s, 1, "%s", result.data())); + } + return (::snprintf(dst, dst_len, "%s", result.data())); } private: - lldb::StructuredDataPluginWP m_plugin_wp; StructuredData::ObjectSP m_data_sp; }; -} +} // namespace lldb_private #endif diff --git a/contrib/llvm/tools/lldb/include/lldb/Core/TraceOptions.h b/contrib/llvm/tools/lldb/include/lldb/Core/TraceOptions.h index e875a531e87..91f48915c94 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Core/TraceOptions.h +++ b/contrib/llvm/tools/lldb/include/lldb/Core/TraceOptions.h @@ -18,8 +18,7 @@ namespace lldb_private { class TraceOptions { public: - TraceOptions() - : m_trace_params(new StructuredData::Dictionary()) {} + TraceOptions() : m_trace_params(new StructuredData::Dictionary()) {} const StructuredData::DictionarySP &getTraceParams() const { return m_trace_params; @@ -43,7 +42,7 @@ public: void setThreadID(lldb::tid_t thread_id) { m_thread_id = thread_id; } - lldb::tid_t getThreadID() { return m_thread_id; } + lldb::tid_t getThreadID() const { return m_thread_id; } private: lldb::TraceType m_type; diff --git a/contrib/llvm/tools/lldb/include/lldb/Host/Editline.h b/contrib/llvm/tools/lldb/include/lldb/Host/Editline.h index 2b1a8e04726..0b75e9c923c 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Host/Editline.h +++ b/contrib/llvm/tools/lldb/include/lldb/Host/Editline.h @@ -82,8 +82,14 @@ using EditLineStringStreamType = std::stringstream; using EditLineCharType = char; #endif +#ifdef EL_CLIENTDATA /* editline with wide support + wide char read function */ +using EditLineGetCharType = wchar_t; +#else +using EditLineGetCharType = char; +#endif + typedef int (*EditlineGetCharCallbackType)(::EditLine *editline, - EditLineCharType *c); + EditLineGetCharType *c); typedef unsigned char (*EditlineCommandCallbackType)(::EditLine *editline, int ch); typedef const char *(*EditlinePromptCallbackType)(::EditLine *editline); @@ -270,7 +276,7 @@ private: /// Character reading implementation for EditLine that supports our multi-line /// editing trickery. - int GetCharacter(EditLineCharType *c); + int GetCharacter(EditLineGetCharType *c); /// Prompt implementation for EditLine. const char *Prompt(); @@ -323,7 +329,7 @@ private: /// single or multi-line editing. void ConfigureEditor(bool multiline); - bool CompleteCharacter(char ch, EditLineCharType &out); + bool CompleteCharacter(char ch, EditLineGetCharType &out); private: #if LLDB_EDITLINE_USE_WCHAR diff --git a/contrib/llvm/tools/lldb/include/lldb/Host/common/NativeProcessProtocol.h b/contrib/llvm/tools/lldb/include/lldb/Host/common/NativeProcessProtocol.h index 388edef0578..55eca0fa0b6 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Host/common/NativeProcessProtocol.h +++ b/contrib/llvm/tools/lldb/include/lldb/Host/common/NativeProcessProtocol.h @@ -10,6 +10,7 @@ #ifndef liblldb_NativeProcessProtocol_h_ #define liblldb_NativeProcessProtocol_h_ +#include "lldb/Core/TraceOptions.h" #include "lldb/Host/MainLoop.h" #include "lldb/Utility/Status.h" #include "lldb/lldb-private-forward.h" @@ -308,6 +309,108 @@ public: static Status Attach(lldb::pid_t pid, NativeDelegate &native_delegate, MainLoop &mainloop, NativeProcessProtocolSP &process_sp); + //------------------------------------------------------------------ + /// StartTracing API for starting a tracing instance with the + /// TraceOptions on a specific thread or process. + /// + /// @param[in] config + /// The configuration to use when starting tracing. + /// + /// @param[out] error + /// Status indicates what went wrong. + /// + /// @return + /// The API returns a user_id which can be used to get trace + /// data, trace configuration or stopping the trace instance. + /// The user_id is a key to identify and operate with a tracing + /// instance. It may refer to the complete process or a single + /// thread. + //------------------------------------------------------------------ + virtual lldb::user_id_t StartTrace(const TraceOptions &config, + Status &error) { + error.SetErrorString("Not implemented"); + return LLDB_INVALID_UID; + } + + //------------------------------------------------------------------ + /// StopTracing API as the name suggests stops a tracing instance. + /// + /// @param[in] uid + /// The user id of the trace intended to be stopped. Now a + /// user_id may map to multiple threads in which case this API + /// could be used to stop the tracing for a specific thread by + /// supplying its thread id. + /// + /// @param[in] thread + /// Thread is needed when the complete process is being traced + /// and the user wishes to stop tracing on a particular thread. + /// + /// @return + /// Status indicating what went wrong. + //------------------------------------------------------------------ + virtual Status StopTrace(lldb::user_id_t uid, + lldb::tid_t thread = LLDB_INVALID_THREAD_ID) { + return Status("Not implemented"); + } + + //------------------------------------------------------------------ + /// This API provides the trace data collected in the form of raw + /// data. + /// + /// @param[in] uid thread + /// The uid and thread provide the context for the trace + /// instance. + /// + /// @param[in] buffer + /// The buffer provides the destination buffer where the trace + /// data would be read to. The buffer should be truncated to the + /// filled length by this function. + /// + /// @param[in] offset + /// There is possibility to read partially the trace data from + /// a specified offset where in such cases the buffer provided + /// may be smaller than the internal trace collection container. + /// + /// @return + /// The size of the data actually read. + //------------------------------------------------------------------ + virtual Status GetData(lldb::user_id_t uid, lldb::tid_t thread, + llvm::MutableArrayRef &buffer, + size_t offset = 0) { + return Status("Not implemented"); + } + + //------------------------------------------------------------------ + /// Similar API as above except it aims to provide any extra data + /// useful for decoding the actual trace data. + //------------------------------------------------------------------ + virtual Status GetMetaData(lldb::user_id_t uid, lldb::tid_t thread, + llvm::MutableArrayRef &buffer, + size_t offset = 0) { + return Status("Not implemented"); + } + + //------------------------------------------------------------------ + /// API to query the TraceOptions for a given user id + /// + /// @param[in] uid + /// The user id of the tracing instance. + /// + /// @param[in] config + /// The thread id of the tracing instance, in case configuration + /// for a specific thread is needed should be specified in the + /// config. + /// + /// @param[out] error + /// Status indicates what went wrong. + /// + /// @param[out] config + /// The actual configuration being used for tracing. + //------------------------------------------------------------------ + virtual Status GetTraceConfig(lldb::user_id_t uid, TraceOptions &config) { + return Status("Not implemented"); + } + protected: lldb::pid_t m_pid; @@ -381,6 +484,6 @@ protected: private: void SynchronouslyNotifyProcessStateChanged(lldb::StateType state); }; -} +} // namespace lldb_private #endif // #ifndef liblldb_NativeProcessProtocol_h_ diff --git a/contrib/llvm/tools/lldb/include/lldb/Target/Process.h b/contrib/llvm/tools/lldb/include/lldb/Target/Process.h index d2ab85d1652..dbf6cba09f4 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Target/Process.h +++ b/contrib/llvm/tools/lldb/include/lldb/Target/Process.h @@ -2781,7 +2781,7 @@ public: /// GetTraceConfig should supply the actual used trace /// configuration. //------------------------------------------------------------------ - virtual lldb::user_id_t StartTrace(lldb::TraceOptionsSP &options, + virtual lldb::user_id_t StartTrace(const TraceOptions &options, Status &error) { error.SetErrorString("Not implemented"); return LLDB_INVALID_UID; @@ -2796,9 +2796,8 @@ public: /// In the other case that tracing on an individual thread needs /// to be stopped a thread_id can be supplied. //------------------------------------------------------------------ - virtual void StopTrace(lldb::user_id_t uid, lldb::tid_t thread_id, - Status &error) { - error.SetErrorString("Not implemented"); + virtual Status StopTrace(lldb::user_id_t uid, lldb::tid_t thread_id) { + return Status("Not implemented"); } //------------------------------------------------------------------ @@ -2809,21 +2808,19 @@ public: /// may not. The thread_id should be used to select a particular /// thread for trace extraction. //------------------------------------------------------------------ - virtual size_t GetData(lldb::user_id_t uid, lldb::tid_t thread_id, - Status &error, void *buf, size_t size, + virtual Status GetData(lldb::user_id_t uid, lldb::tid_t thread_id, + llvm::MutableArrayRef &buffer, size_t offset = 0) { - error.SetErrorString("Not implemented"); - return 0; + return Status("Not implemented"); } //------------------------------------------------------------------ /// Similar API as above except for obtaining meta data //------------------------------------------------------------------ - virtual size_t GetMetaData(lldb::user_id_t uid, lldb::tid_t thread_id, - Status &error, void *buf, size_t size, + virtual Status GetMetaData(lldb::user_id_t uid, lldb::tid_t thread_id, + llvm::MutableArrayRef &buffer, size_t offset = 0) { - error.SetErrorString("Not implemented"); - return 0; + return Status("Not implemented"); } //------------------------------------------------------------------ @@ -2835,10 +2832,8 @@ public: /// configuration used by a specific thread. The thread_id specified /// should also match the uid otherwise an error will be returned. //------------------------------------------------------------------ - virtual void GetTraceConfig(lldb::user_id_t uid, Status &error, - lldb::TraceOptionsSP &options) { - error.SetErrorString("Not implemented"); - return; + virtual Status GetTraceConfig(lldb::user_id_t uid, TraceOptions &options) { + return Status("Not implemented"); } protected: diff --git a/contrib/llvm/tools/lldb/include/lldb/Utility/StringExtractor.h b/contrib/llvm/tools/lldb/include/lldb/Utility/StringExtractor.h index 40c1ef79cff..311cec87e69 100644 --- a/contrib/llvm/tools/lldb/include/lldb/Utility/StringExtractor.h +++ b/contrib/llvm/tools/lldb/include/lldb/Utility/StringExtractor.h @@ -111,6 +111,8 @@ public: size_t GetHexByteStringTerminatedBy(std::string &str, char terminator); + bool ConsumeFront(const llvm::StringRef &str); + const char *Peek() { if (m_index < m_packet.size()) return m_packet.c_str() + m_index; diff --git a/contrib/llvm/tools/lldb/include/lldb/lldb-enumerations.h b/contrib/llvm/tools/lldb/include/lldb/lldb-enumerations.h index ad10bbba1a5..f62b3cc0b19 100644 --- a/contrib/llvm/tools/lldb/include/lldb/lldb-enumerations.h +++ b/contrib/llvm/tools/lldb/include/lldb/lldb-enumerations.h @@ -725,6 +725,18 @@ enum TraceType { eTraceTypeProcessorTrace }; +enum StructuredDataType { + eStructuredDataTypeInvalid = -1, + eStructuredDataTypeNull = 0, + eStructuredDataTypeGeneric, + eStructuredDataTypeArray, + eStructuredDataTypeInteger, + eStructuredDataTypeFloat, + eStructuredDataTypeBoolean, + eStructuredDataTypeString, + eStructuredDataTypeDictionary +}; + FLAGS_ENUM(TypeClass){ eTypeClassInvalid = (0u), eTypeClassArray = (1u << 0), eTypeClassBlockPointer = (1u << 1), eTypeClassBuiltin = (1u << 2), diff --git a/contrib/llvm/tools/lldb/source/API/SBProcess.cpp b/contrib/llvm/tools/lldb/source/API/SBProcess.cpp index 8b79e521a37..caf07dbe3ce 100644 --- a/contrib/llvm/tools/lldb/source/API/SBProcess.cpp +++ b/contrib/llvm/tools/lldb/source/API/SBProcess.cpp @@ -363,10 +363,9 @@ lldb::SBTrace SBProcess::StartTrace(SBTraceOptions &options, if (!process_sp) { error.SetErrorString("invalid process"); } else { - - uid = process_sp->StartTrace(options.m_traceoptions_sp, error.ref()); + uid = process_sp->StartTrace(*(options.m_traceoptions_sp), error.ref()); trace_instance.SetTraceUID(uid); - LLDB_LOG(log, "SBProcess::returned uid - %" PRIx64, uid); + LLDB_LOG(log, "SBProcess::returned uid - {0}", uid); } return trace_instance; } diff --git a/contrib/llvm/tools/lldb/source/API/SBStructuredData.cpp b/contrib/llvm/tools/lldb/source/API/SBStructuredData.cpp index 971c4ab2295..54022390b80 100644 --- a/contrib/llvm/tools/lldb/source/API/SBStructuredData.cpp +++ b/contrib/llvm/tools/lldb/source/API/SBStructuredData.cpp @@ -46,7 +46,7 @@ lldb::SBError SBStructuredData::SetFromJSON(lldb::SBStream &stream) { StructuredData::ObjectSP json_obj = StructuredData::ParseJSON(json_str); m_impl_up->SetObjectSP(json_obj); - if (!json_obj || json_obj->GetType() != StructuredData::Type::eTypeDictionary) + if (!json_obj || json_obj->GetType() != eStructuredDataTypeDictionary) error.SetErrorString("Invalid Syntax"); return error; } @@ -67,3 +67,45 @@ lldb::SBError SBStructuredData::GetDescription(lldb::SBStream &stream) const { sb_error.SetError(error); return sb_error; } + +StructuredDataType SBStructuredData::GetType() const { + return (m_impl_up ? m_impl_up->GetType() : eStructuredDataTypeInvalid); +} + +size_t SBStructuredData::GetSize() const { + return (m_impl_up ? m_impl_up->GetSize() : 0); +} + +lldb::SBStructuredData SBStructuredData::GetValueForKey(const char *key) const { + if (!m_impl_up) + return SBStructuredData(); + + SBStructuredData result; + result.m_impl_up->SetObjectSP(m_impl_up->GetValueForKey(key)); + return result; +} + +lldb::SBStructuredData SBStructuredData::GetItemAtIndex(size_t idx) const { + if (!m_impl_up) + return SBStructuredData(); + + SBStructuredData result; + result.m_impl_up->SetObjectSP(m_impl_up->GetItemAtIndex(idx)); + return result; +} + +uint64_t SBStructuredData::GetIntegerValue(uint64_t fail_value) const { + return (m_impl_up ? m_impl_up->GetIntegerValue(fail_value) : fail_value); +} + +double SBStructuredData::GetFloatValue(double fail_value) const { + return (m_impl_up ? m_impl_up->GetFloatValue(fail_value) : fail_value); +} + +bool SBStructuredData::GetBooleanValue(bool fail_value) const { + return (m_impl_up ? m_impl_up->GetBooleanValue(fail_value) : fail_value); +} + +size_t SBStructuredData::GetStringValue(char *dst, size_t dst_len) const { + return (m_impl_up ? m_impl_up->GetStringValue(dst, dst_len) : 0); +} diff --git a/contrib/llvm/tools/lldb/source/API/SBThread.cpp b/contrib/llvm/tools/lldb/source/API/SBThread.cpp index 2c82bc3bcdc..65ccb465c8d 100644 --- a/contrib/llvm/tools/lldb/source/API/SBThread.cpp +++ b/contrib/llvm/tools/lldb/source/API/SBThread.cpp @@ -43,6 +43,7 @@ #include "lldb/API/SBThreadCollection.h" #include "lldb/API/SBThreadPlan.h" #include "lldb/API/SBValue.h" +#include "lldb/lldb-enumerations.h" using namespace lldb; using namespace lldb_private; @@ -561,26 +562,26 @@ bool SBThread::GetInfoItemByPathAsString(const char *path, SBStream &strm) { StructuredData::ObjectSP node = info_root_sp->GetObjectForDotSeparatedPath(path); if (node) { - if (node->GetType() == StructuredData::Type::eTypeString) { + if (node->GetType() == eStructuredDataTypeString) { strm.Printf("%s", node->GetAsString()->GetValue().str().c_str()); success = true; } - if (node->GetType() == StructuredData::Type::eTypeInteger) { + if (node->GetType() == eStructuredDataTypeInteger) { strm.Printf("0x%" PRIx64, node->GetAsInteger()->GetValue()); success = true; } - if (node->GetType() == StructuredData::Type::eTypeFloat) { + if (node->GetType() == eStructuredDataTypeFloat) { strm.Printf("0x%f", node->GetAsFloat()->GetValue()); success = true; } - if (node->GetType() == StructuredData::Type::eTypeBoolean) { + if (node->GetType() == eStructuredDataTypeBoolean) { if (node->GetAsBoolean()->GetValue() == true) strm.Printf("true"); else strm.Printf("false"); success = true; } - if (node->GetType() == StructuredData::Type::eTypeNull) { + if (node->GetType() == eStructuredDataTypeNull) { strm.Printf("null"); success = true; } diff --git a/contrib/llvm/tools/lldb/source/API/SBTrace.cpp b/contrib/llvm/tools/lldb/source/API/SBTrace.cpp index 18f7d36e775..9a5fa4ed4f4 100644 --- a/contrib/llvm/tools/lldb/source/API/SBTrace.cpp +++ b/contrib/llvm/tools/lldb/source/API/SBTrace.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "lldb/Utility/Log.h" #include "lldb/Target/Process.h" +#include "lldb/Utility/Log.h" #include "lldb/API/SBTrace.h" #include "lldb/API/SBTraceOptions.h" @@ -25,37 +25,37 @@ lldb::ProcessSP SBTrace::GetSP() const { return m_opaque_wp.lock(); } size_t SBTrace::GetTraceData(SBError &error, void *buf, size_t size, size_t offset, lldb::tid_t thread_id) { - size_t bytes_read = 0; ProcessSP process_sp(GetSP()); Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_API)); + llvm::MutableArrayRef buffer(static_cast(buf), size); error.Clear(); if (!process_sp) { error.SetErrorString("invalid process"); } else { - bytes_read = process_sp->GetData(GetTraceUID(), thread_id, error.ref(), buf, - size, offset); - LLDB_LOG(log, "SBTrace::bytes_read - %" PRIx64, bytes_read); + error.SetError( + process_sp->GetData(GetTraceUID(), thread_id, buffer, offset)); + LLDB_LOG(log, "SBTrace::bytes_read - {0}", buffer.size()); } - return bytes_read; + return buffer.size(); } size_t SBTrace::GetMetaData(SBError &error, void *buf, size_t size, size_t offset, lldb::tid_t thread_id) { - size_t bytes_read = 0; ProcessSP process_sp(GetSP()); Log *log(lldb_private::GetLogIfAllCategoriesSet(LIBLLDB_LOG_API)); + llvm::MutableArrayRef buffer(static_cast(buf), size); error.Clear(); if (!process_sp) { error.SetErrorString("invalid process"); } else { - bytes_read = process_sp->GetMetaData(GetTraceUID(), thread_id, error.ref(), - buf, size, offset); - LLDB_LOG(log, "SBTrace::bytes_read - %" PRIx64, bytes_read); + error.SetError( + process_sp->GetMetaData(GetTraceUID(), thread_id, buffer, offset)); + LLDB_LOG(log, "SBTrace::bytes_read - {0}", buffer.size()); } - return bytes_read; + return buffer.size(); } void SBTrace::StopTrace(SBError &error, lldb::tid_t thread_id) { @@ -66,7 +66,7 @@ void SBTrace::StopTrace(SBError &error, lldb::tid_t thread_id) { error.SetErrorString("invalid process"); return; } - process_sp->StopTrace(GetTraceUID(), thread_id, error.ref()); + error.SetError(process_sp->StopTrace(GetTraceUID(), thread_id)); } void SBTrace::GetTraceConfig(SBTraceOptions &options, SBError &error) { @@ -76,8 +76,8 @@ void SBTrace::GetTraceConfig(SBTraceOptions &options, SBError &error) { if (!process_sp) { error.SetErrorString("invalid process"); } else { - process_sp->GetTraceConfig(GetTraceUID(), error.ref(), - options.m_traceoptions_sp); + error.SetError(process_sp->GetTraceConfig(GetTraceUID(), + *(options.m_traceoptions_sp))); } } diff --git a/contrib/llvm/tools/lldb/source/Commands/CommandObjectThread.cpp b/contrib/llvm/tools/lldb/source/Commands/CommandObjectThread.cpp index 6b0f1b455bc..b585ef9ef8a 100644 --- a/contrib/llvm/tools/lldb/source/Commands/CommandObjectThread.cpp +++ b/contrib/llvm/tools/lldb/source/Commands/CommandObjectThread.cpp @@ -649,7 +649,7 @@ protected: new_plan_sp->SetOkayToDiscard(false); if (m_options.m_step_count > 1) { - if (new_plan_sp->SetIterationCount(m_options.m_step_count)) { + if (!new_plan_sp->SetIterationCount(m_options.m_step_count)) { result.AppendWarning( "step operation does not support iteration count."); } diff --git a/contrib/llvm/tools/lldb/source/Core/FormatEntity.cpp b/contrib/llvm/tools/lldb/source/Core/FormatEntity.cpp index 9fb294aad2f..e3c346f79d6 100644 --- a/contrib/llvm/tools/lldb/source/Core/FormatEntity.cpp +++ b/contrib/llvm/tools/lldb/source/Core/FormatEntity.cpp @@ -1040,24 +1040,24 @@ static bool FormatThreadExtendedInfoRecurse( thread_info_dictionary->GetObjectForDotSeparatedPath(path); if (value) { - if (value->GetType() == StructuredData::Type::eTypeInteger) { + if (value->GetType() == eStructuredDataTypeInteger) { const char *token_format = "0x%4.4" PRIx64; if (!entry.printf_format.empty()) token_format = entry.printf_format.c_str(); s.Printf(token_format, value->GetAsInteger()->GetValue()); return true; - } else if (value->GetType() == StructuredData::Type::eTypeFloat) { + } else if (value->GetType() == eStructuredDataTypeFloat) { s.Printf("%f", value->GetAsFloat()->GetValue()); return true; - } else if (value->GetType() == StructuredData::Type::eTypeString) { + } else if (value->GetType() == eStructuredDataTypeString) { s.Format("{0}", value->GetAsString()->GetValue()); return true; - } else if (value->GetType() == StructuredData::Type::eTypeArray) { + } else if (value->GetType() == eStructuredDataTypeArray) { if (value->GetAsArray()->GetSize() > 0) { s.Printf("%zu", value->GetAsArray()->GetSize()); return true; } - } else if (value->GetType() == StructuredData::Type::eTypeDictionary) { + } else if (value->GetType() == eStructuredDataTypeDictionary) { s.Printf("%zu", value->GetAsDictionary()->GetKeys()->GetAsArray()->GetSize()); return true; @@ -1346,7 +1346,7 @@ bool FormatEntity::Format(const Entry &entry, Stream &s, if (thread) { StructuredData::ObjectSP object_sp = thread->GetExtendedInfo(); if (object_sp && - object_sp->GetType() == StructuredData::Type::eTypeDictionary) { + object_sp->GetType() == eStructuredDataTypeDictionary) { if (FormatThreadExtendedInfoRecurse(entry, object_sp, sc, exe_ctx, s)) return true; } diff --git a/contrib/llvm/tools/lldb/source/Core/StructuredData.cpp b/contrib/llvm/tools/lldb/source/Core/StructuredData.cpp index d52b7730cc6..b03665ed348 100644 --- a/contrib/llvm/tools/lldb/source/Core/StructuredData.cpp +++ b/contrib/llvm/tools/lldb/source/Core/StructuredData.cpp @@ -184,7 +184,7 @@ StructuredData::ObjectSP StructuredData::ParseJSON(std::string json_text) { StructuredData::ObjectSP StructuredData::Object::GetObjectForDotSeparatedPath(llvm::StringRef path) { - if (this->GetType() == Type::eTypeDictionary) { + if (this->GetType() == lldb::eStructuredDataTypeDictionary) { std::pair match = path.split('.'); std::string key = match.first.str(); ObjectSP value = this->GetAsDictionary()->GetValueForKey(key); @@ -200,7 +200,7 @@ StructuredData::Object::GetObjectForDotSeparatedPath(llvm::StringRef path) { return ObjectSP(); } - if (this->GetType() == Type::eTypeArray) { + if (this->GetType() == lldb::eStructuredDataTypeArray) { std::pair match = path.split('['); if (match.second.size() == 0) { return this->shared_from_this(); diff --git a/contrib/llvm/tools/lldb/source/Host/common/Editline.cpp b/contrib/llvm/tools/lldb/source/Host/common/Editline.cpp index 7d4b398a171..7b580dde656 100644 --- a/contrib/llvm/tools/lldb/source/Host/common/Editline.cpp +++ b/contrib/llvm/tools/lldb/source/Host/common/Editline.cpp @@ -474,7 +474,7 @@ unsigned char Editline::RecallHistory(bool earlier) { return CC_NEWLINE; } -int Editline::GetCharacter(EditLineCharType *c) { +int Editline::GetCharacter(EditLineGetCharType *c) { const LineInfoW *info = el_wline(m_editline); // Paint a faint version of the desired prompt over the version libedit draws @@ -969,7 +969,7 @@ void Editline::ConfigureEditor(bool multiline) { })); el_wset(m_editline, EL_GETCFN, (EditlineGetCharCallbackType)([]( - EditLine *editline, EditLineCharType *c) { + EditLine *editline, EditLineGetCharType *c) { return Editline::InstanceFor(editline)->GetCharacter(c); })); @@ -1360,12 +1360,12 @@ void Editline::PrintAsync(Stream *stream, const char *s, size_t len) { } } -bool Editline::CompleteCharacter(char ch, EditLineCharType &out) { +bool Editline::CompleteCharacter(char ch, EditLineGetCharType &out) { #if !LLDB_EDITLINE_USE_WCHAR if (ch == (char)EOF) return false; - out = ch; + out = (unsigned char)ch; return true; #else std::codecvt_utf8 cvt; diff --git a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptx86ABIFixups.cpp b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptx86ABIFixups.cpp index 3ceda5ff67e..439d372fade 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptx86ABIFixups.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/RenderScriptx86ABIFixups.cpp @@ -254,11 +254,12 @@ bool fixupRSAllocationStructByValCalls(llvm::Module &module) { llvm::AttributeList call_attribs = call_inst->getAttributes(); // iterate over the argument attributes - for (size_t i = 1; i <= call_attribs.getNumSlots(); ++i) { + for (unsigned I = call_attribs.index_begin(); I != call_attribs.index_end(); + I++) { // if this argument is passed by val - if (call_attribs.hasAttribute(i, llvm::Attribute::ByVal)) { + if (call_attribs.hasAttribute(I, llvm::Attribute::ByVal)) { // strip away the byval attribute - call_inst->removeAttribute(i, llvm::Attribute::ByVal); + call_inst->removeAttribute(I, llvm::Attribute::ByVal); changed = true; } } diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.cpp index 040f7b7dd34..34d99cd39de 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/FreeBSD/ProcessMonitor.cpp @@ -1132,8 +1132,10 @@ ProcessMessage ProcessMonitor::MonitorSIGTRAP(ProcessMonitor *monitor, case 0: case TRAP_TRACE: +#ifdef TRAP_CAP // Map TRAP_CAP to a trace trap in the absense of a more specific handler. case TRAP_CAP: +#endif if (log) log->Printf("ProcessMonitor::%s() received trace event, tid = %" PRIu64 " : si_code = %d", diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp index 80751147b4f..efb19fc414f 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp @@ -167,8 +167,6 @@ NativeProcessNetBSD::NativeProcessNetBSD() // Handles all waitpid events from the inferior process. void NativeProcessNetBSD::MonitorCallback(lldb::pid_t pid, int signal) { - Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS)); - switch (signal) { case SIGTRAP: return MonitorSIGTRAP(pid); @@ -196,7 +194,6 @@ void NativeProcessNetBSD::MonitorExited(lldb::pid_t pid, int signal, } void NativeProcessNetBSD::MonitorSIGSTOP(lldb::pid_t pid) { - Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS)); ptrace_siginfo_t info; const auto siginfo_err = @@ -305,8 +302,6 @@ void NativeProcessNetBSD::MonitorSIGTRAP(lldb::pid_t pid) { } void NativeProcessNetBSD::MonitorSignal(lldb::pid_t pid, int signal) { - Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS)); - ptrace_siginfo_t info; const auto siginfo_err = PtraceWrapper(PT_GET_SIGINFO, pid, &info, sizeof(info)); @@ -898,6 +893,19 @@ void NativeProcessNetBSD::SigchldHandler() { MonitorCallback(wait_pid, signal); } +bool NativeProcessNetBSD::HasThreadNoLock(lldb::tid_t thread_id) { + for (auto thread_sp : m_threads) { + assert(thread_sp && "thread list should not contain NULL threads"); + if (thread_sp->GetID() == thread_id) { + // We have this thread. + return true; + } + } + + // We don't have this thread. + return false; +} + NativeThreadNetBSDSP NativeProcessNetBSD::AddThread(lldb::tid_t thread_id) { Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD)); @@ -916,8 +924,6 @@ NativeThreadNetBSDSP NativeProcessNetBSD::AddThread(lldb::tid_t thread_id) { } ::pid_t NativeProcessNetBSD::Attach(lldb::pid_t pid, Status &error) { - Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PROCESS)); - if (pid <= 1) { error.SetErrorToGenericError(); error.SetErrorString("Attaching to process 1 is not allowed."); @@ -1006,7 +1012,7 @@ Status NativeProcessNetBSD::WriteMemory(lldb::addr_t addr, const void *buf, io.piod_len = size; do { - io.piod_addr = (void *)(src + bytes_written); + io.piod_addr = const_cast(static_cast(src + bytes_written)); io.piod_offs = (void *)(addr + bytes_written); Status error = NativeProcessNetBSD::PtraceWrapper(PT_IO, GetID(), &io); @@ -1034,10 +1040,11 @@ NativeProcessNetBSD::GetAuxvData() const { ErrorOr> buf = llvm::MemoryBuffer::getNewMemBuffer(auxv_size); - struct ptrace_io_desc io = {.piod_op = PIOD_READ_AUXV, - .piod_offs = 0, - .piod_addr = (void *)buf.get()->getBufferStart(), - .piod_len = auxv_size}; + struct ptrace_io_desc io; + io.piod_op = PIOD_READ_AUXV; + io.piod_offs = 0; + io.piod_addr = const_cast(static_cast(buf.get()->getBufferStart())); + io.piod_len = auxv_size; Status error = NativeProcessNetBSD::PtraceWrapper(PT_IO, GetID(), &io); diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.h b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.h index e18ba162e52..758956e3dca 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.h @@ -115,6 +115,8 @@ private: // --------------------------------------------------------------------- NativeProcessNetBSD(); + bool HasThreadNoLock(lldb::tid_t thread_id); + NativeThreadNetBSDSP AddThread(lldb::tid_t thread_id); Status LaunchInferior(MainLoop &mainloop, ProcessLaunchInfo &launch_info); diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp index 550ec0ea499..33aed7a43c4 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp @@ -30,7 +30,6 @@ #include "lldb/Utility/JSON.h" #include "lldb/Utility/LLDBAssert.h" #include "lldb/Utility/Log.h" -#include "lldb/Utility/StreamGDBRemote.h" #include "lldb/Utility/StreamString.h" // Project includes @@ -3152,6 +3151,211 @@ bool GDBRemoteCommunicationClient::SyncThreadState(lldb::tid_t tid) { response.IsOKResponse(); } +lldb::user_id_t +GDBRemoteCommunicationClient::SendStartTracePacket(const TraceOptions &options, + Status &error) { + Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + lldb::user_id_t ret_uid = LLDB_INVALID_UID; + + StreamGDBRemote escaped_packet; + escaped_packet.PutCString("jTraceStart:"); + + StructuredData::Dictionary json_packet; + json_packet.AddIntegerItem("type", options.getType()); + json_packet.AddIntegerItem("buffersize", options.getTraceBufferSize()); + json_packet.AddIntegerItem("metabuffersize", options.getMetaDataBufferSize()); + + if (options.getThreadID() != LLDB_INVALID_THREAD_ID) + json_packet.AddIntegerItem("threadid", options.getThreadID()); + + StructuredData::DictionarySP custom_params = options.getTraceParams(); + if (custom_params) + json_packet.AddItem("params", custom_params); + + StreamString json_string; + json_packet.Dump(json_string, false); + escaped_packet.PutEscapedBytes(json_string.GetData(), json_string.GetSize()); + + StringExtractorGDBRemote response; + if (SendPacketAndWaitForResponse(escaped_packet.GetString(), response, + true) == + GDBRemoteCommunication::PacketResult::Success) { + if (!response.IsNormalResponse()) { + error.SetError(response.GetError(), eErrorTypeGeneric); + LLDB_LOG(log, "Target does not support Tracing"); + } else { + ret_uid = response.GetHexMaxU64(false, LLDB_INVALID_UID); + } + } else { + LLDB_LOG(log, "failed to send packet"); + error.SetErrorStringWithFormat("failed to send packet: '%s'", + escaped_packet.GetData()); + } + return ret_uid; +} + +Status +GDBRemoteCommunicationClient::SendStopTracePacket(lldb::user_id_t uid, + lldb::tid_t thread_id) { + Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + StringExtractorGDBRemote response; + Status error; + + StructuredData::Dictionary json_packet; + StreamGDBRemote escaped_packet; + StreamString json_string; + escaped_packet.PutCString("jTraceStop:"); + + json_packet.AddIntegerItem("traceid", uid); + + if (thread_id != LLDB_INVALID_THREAD_ID) + json_packet.AddIntegerItem("threadid", thread_id); + + json_packet.Dump(json_string, false); + + escaped_packet.PutEscapedBytes(json_string.GetData(), json_string.GetSize()); + + if (SendPacketAndWaitForResponse(escaped_packet.GetString(), response, + true) == + GDBRemoteCommunication::PacketResult::Success) { + if (!response.IsOKResponse()) { + error.SetError(response.GetError(), eErrorTypeGeneric); + LLDB_LOG(log, "stop tracing failed"); + } + } else { + LLDB_LOG(log, "failed to send packet"); + error.SetErrorStringWithFormat( + "failed to send packet: '%s' with error '%d'", escaped_packet.GetData(), + response.GetError()); + } + return error; +} + +Status GDBRemoteCommunicationClient::SendGetDataPacket( + lldb::user_id_t uid, lldb::tid_t thread_id, + llvm::MutableArrayRef &buffer, size_t offset) { + StreamGDBRemote escaped_packet; + escaped_packet.PutCString("jTraceBufferRead:"); + return SendGetTraceDataPacket(escaped_packet, uid, thread_id, buffer, offset); +} + +Status GDBRemoteCommunicationClient::SendGetMetaDataPacket( + lldb::user_id_t uid, lldb::tid_t thread_id, + llvm::MutableArrayRef &buffer, size_t offset) { + StreamGDBRemote escaped_packet; + escaped_packet.PutCString("jTraceMetaRead:"); + return SendGetTraceDataPacket(escaped_packet, uid, thread_id, buffer, offset); +} + +Status +GDBRemoteCommunicationClient::SendGetTraceConfigPacket(lldb::user_id_t uid, + TraceOptions &options) { + Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + StringExtractorGDBRemote response; + Status error; + + StreamString json_string; + StreamGDBRemote escaped_packet; + escaped_packet.PutCString("jTraceConfigRead:"); + + StructuredData::Dictionary json_packet; + json_packet.AddIntegerItem("traceid", uid); + + if (options.getThreadID() != LLDB_INVALID_THREAD_ID) + json_packet.AddIntegerItem("threadid", options.getThreadID()); + + json_packet.Dump(json_string, false); + escaped_packet.PutEscapedBytes(json_string.GetData(), json_string.GetSize()); + + if (SendPacketAndWaitForResponse(escaped_packet.GetString(), response, + true) == + GDBRemoteCommunication::PacketResult::Success) { + if (response.IsNormalResponse()) { + uint64_t type = std::numeric_limits::max(); + uint64_t buffersize = std::numeric_limits::max(); + uint64_t metabuffersize = std::numeric_limits::max(); + + auto json_object = StructuredData::ParseJSON(response.Peek()); + + if (!json_object || + json_object->GetType() != lldb::eStructuredDataTypeDictionary) { + error.SetErrorString("Invalid Configuration obtained"); + return error; + } + + auto json_dict = json_object->GetAsDictionary(); + + json_dict->GetValueForKeyAsInteger("metabuffersize", + metabuffersize); + options.setMetaDataBufferSize(metabuffersize); + + json_dict->GetValueForKeyAsInteger("buffersize", buffersize); + options.setTraceBufferSize(buffersize); + + json_dict->GetValueForKeyAsInteger("type", type); + options.setType(static_cast(type)); + + StructuredData::ObjectSP custom_params_sp = + json_dict->GetValueForKey("params"); + if (custom_params_sp) { + if (custom_params_sp->GetType() != + lldb::eStructuredDataTypeDictionary) { + error.SetErrorString("Invalid Configuration obtained"); + return error; + } else + options.setTraceParams( + static_pointer_cast( + custom_params_sp)); + } + } else { + error.SetError(response.GetError(), eErrorTypeGeneric); + } + } else { + LLDB_LOG(log, "failed to send packet"); + error.SetErrorStringWithFormat("failed to send packet: '%s'", + escaped_packet.GetData()); + } + return error; +} + +Status GDBRemoteCommunicationClient::SendGetTraceDataPacket( + StreamGDBRemote &packet, lldb::user_id_t uid, lldb::tid_t thread_id, + llvm::MutableArrayRef &buffer, size_t offset) { + Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS)); + Status error; + + StructuredData::Dictionary json_packet; + + json_packet.AddIntegerItem("traceid", uid); + json_packet.AddIntegerItem("offset", offset); + json_packet.AddIntegerItem("buffersize", buffer.size()); + + if (thread_id != LLDB_INVALID_THREAD_ID) + json_packet.AddIntegerItem("threadid", thread_id); + + StreamString json_string; + json_packet.Dump(json_string, false); + + packet.PutEscapedBytes(json_string.GetData(), json_string.GetSize()); + StringExtractorGDBRemote response; + if (SendPacketAndWaitForResponse(packet.GetString(), response, true) == + GDBRemoteCommunication::PacketResult::Success) { + if (response.IsNormalResponse()) { + size_t filled_size = response.GetHexBytesAvail(buffer); + buffer = llvm::MutableArrayRef(buffer.data(), filled_size); + } else { + error.SetError(response.GetError(), eErrorTypeGeneric); + buffer = buffer.slice(buffer.size()); + } + } else { + LLDB_LOG(log, "failed to send packet"); + error.SetErrorStringWithFormat("failed to send packet: '%s'", + packet.GetData()); + buffer = buffer.slice(buffer.size()); + } + return error; +} + bool GDBRemoteCommunicationClient::GetModuleInfo( const FileSpec &module_file_spec, const lldb_private::ArchSpec &arch_spec, ModuleSpec &module_spec) { diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h index 08d0bd5d690..6306651da7a 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h @@ -25,6 +25,7 @@ #include "lldb/Core/ArchSpec.h" #include "lldb/Core/StructuredData.h" #include "lldb/Target/Process.h" +#include "lldb/Utility/StreamGDBRemote.h" #include "llvm/ADT/Optional.h" @@ -499,6 +500,21 @@ public: ConfigureRemoteStructuredData(const ConstString &type_name, const StructuredData::ObjectSP &config_sp); + lldb::user_id_t SendStartTracePacket(const TraceOptions &options, + Status &error); + + Status SendStopTracePacket(lldb::user_id_t uid, lldb::tid_t thread_id); + + Status SendGetDataPacket(lldb::user_id_t uid, lldb::tid_t thread_id, + llvm::MutableArrayRef &buffer, + size_t offset = 0); + + Status SendGetMetaDataPacket(lldb::user_id_t uid, lldb::tid_t thread_id, + llvm::MutableArrayRef &buffer, + size_t offset = 0); + + Status SendGetTraceConfigPacket(lldb::user_id_t uid, TraceOptions &options); + protected: LazyBool m_supports_not_sending_acks; LazyBool m_supports_thread_suffix; @@ -587,6 +603,11 @@ protected: lldb::tid_t tid, StreamString &&payload, StringExtractorGDBRemote &response, bool send_async); + Status SendGetTraceDataPacket(StreamGDBRemote &packet, lldb::user_id_t uid, + lldb::tid_t thread_id, + llvm::MutableArrayRef &buffer, + size_t offset); + private: DISALLOW_COPY_AND_ASSIGN(GDBRemoteCommunicationClient); }; diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp index ec7c2f5330d..d318c35366f 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp @@ -183,6 +183,22 @@ void GDBRemoteCommunicationServerLLGS::RegisterPacketHandlers() { StringExtractorGDBRemote::eServerPacketType_QPassSignals, &GDBRemoteCommunicationServerLLGS::Handle_QPassSignals); + RegisterMemberFunctionHandler( + StringExtractorGDBRemote::eServerPacketType_jTraceStart, + &GDBRemoteCommunicationServerLLGS::Handle_jTraceStart); + RegisterMemberFunctionHandler( + StringExtractorGDBRemote::eServerPacketType_jTraceBufferRead, + &GDBRemoteCommunicationServerLLGS::Handle_jTraceRead); + RegisterMemberFunctionHandler( + StringExtractorGDBRemote::eServerPacketType_jTraceMetaRead, + &GDBRemoteCommunicationServerLLGS::Handle_jTraceRead); + RegisterMemberFunctionHandler( + StringExtractorGDBRemote::eServerPacketType_jTraceStop, + &GDBRemoteCommunicationServerLLGS::Handle_jTraceStop); + RegisterMemberFunctionHandler( + StringExtractorGDBRemote::eServerPacketType_jTraceConfigRead, + &GDBRemoteCommunicationServerLLGS::Handle_jTraceConfigRead); + RegisterPacketHandler(StringExtractorGDBRemote::eServerPacketType_k, [this](StringExtractorGDBRemote packet, Status &error, bool &interrupt, bool &quit) { @@ -1083,6 +1099,231 @@ void GDBRemoteCommunicationServerLLGS::SendProcessOutput() { } } +GDBRemoteCommunication::PacketResult +GDBRemoteCommunicationServerLLGS::Handle_jTraceStart( + StringExtractorGDBRemote &packet) { + Log *log(GetLogIfAnyCategoriesSet(LIBLLDB_LOG_PROCESS)); + // Fail if we don't have a current process. + if (!m_debugged_process_sp || + (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) + return SendErrorResponse(68); + + if (!packet.ConsumeFront("jTraceStart:")) + return SendIllFormedResponse(packet, "jTraceStart: Ill formed packet "); + + TraceOptions options; + uint64_t type = std::numeric_limits::max(); + uint64_t buffersize = std::numeric_limits::max(); + lldb::tid_t tid = LLDB_INVALID_THREAD_ID; + uint64_t metabuffersize = std::numeric_limits::max(); + + auto json_object = StructuredData::ParseJSON(packet.Peek()); + + if (!json_object || + json_object->GetType() != lldb::eStructuredDataTypeDictionary) + return SendIllFormedResponse(packet, "jTraceStart: Ill formed packet "); + + auto json_dict = json_object->GetAsDictionary(); + + json_dict->GetValueForKeyAsInteger("metabuffersize", metabuffersize); + options.setMetaDataBufferSize(metabuffersize); + + json_dict->GetValueForKeyAsInteger("buffersize", buffersize); + options.setTraceBufferSize(buffersize); + + json_dict->GetValueForKeyAsInteger("type", type); + options.setType(static_cast(type)); + + json_dict->GetValueForKeyAsInteger("threadid", tid); + options.setThreadID(tid); + + StructuredData::ObjectSP custom_params_sp = + json_dict->GetValueForKey("params"); + if (custom_params_sp && + custom_params_sp->GetType() != lldb::eStructuredDataTypeDictionary) + return SendIllFormedResponse(packet, "jTraceStart: Ill formed packet "); + + options.setTraceParams( + static_pointer_cast(custom_params_sp)); + + if (buffersize == std::numeric_limits::max() || + type != lldb::TraceType::eTraceTypeProcessorTrace) { + LLDB_LOG(log, "Ill formed packet buffersize = {0} type = {1}", buffersize, + type); + return SendIllFormedResponse(packet, "JTrace:start: Ill formed packet "); + } + + Status error; + lldb::user_id_t uid = LLDB_INVALID_UID; + uid = m_debugged_process_sp->StartTrace(options, error); + LLDB_LOG(log, "uid is {0} , error is {1}", uid, error.GetError()); + if (error.Fail()) + return SendErrorResponse(error.GetError()); + + StreamGDBRemote response; + response.Printf("%" PRIx64, uid); + return SendPacketNoLock(response.GetString()); +} + +GDBRemoteCommunication::PacketResult +GDBRemoteCommunicationServerLLGS::Handle_jTraceStop( + StringExtractorGDBRemote &packet) { + // Fail if we don't have a current process. + if (!m_debugged_process_sp || + (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) + return SendErrorResponse(68); + + if (!packet.ConsumeFront("jTraceStop:")) + return SendIllFormedResponse(packet, "jTraceStop: Ill formed packet "); + + lldb::user_id_t uid = LLDB_INVALID_UID; + lldb::tid_t tid = LLDB_INVALID_THREAD_ID; + + auto json_object = StructuredData::ParseJSON(packet.Peek()); + + if (!json_object || + json_object->GetType() != lldb::eStructuredDataTypeDictionary) + return SendIllFormedResponse(packet, "jTraceStop: Ill formed packet "); + + auto json_dict = json_object->GetAsDictionary(); + + if (!json_dict->GetValueForKeyAsInteger("traceid", uid)) + return SendIllFormedResponse(packet, "jTraceStop: Ill formed packet "); + + json_dict->GetValueForKeyAsInteger("threadid", tid); + + Status error = m_debugged_process_sp->StopTrace(uid, tid); + + if (error.Fail()) + return SendErrorResponse(error.GetError()); + + return SendOKResponse(); +} + +GDBRemoteCommunication::PacketResult +GDBRemoteCommunicationServerLLGS::Handle_jTraceConfigRead( + StringExtractorGDBRemote &packet) { + + // Fail if we don't have a current process. + if (!m_debugged_process_sp || + (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) + return SendErrorResponse(68); + + if (!packet.ConsumeFront("jTraceConfigRead:")) + return SendIllFormedResponse(packet, + "jTraceConfigRead: Ill formed packet "); + + lldb::user_id_t uid = LLDB_INVALID_UID; + lldb::tid_t threadid = LLDB_INVALID_THREAD_ID; + + auto json_object = StructuredData::ParseJSON(packet.Peek()); + + if (!json_object || + json_object->GetType() != lldb::eStructuredDataTypeDictionary) + return SendIllFormedResponse(packet, + "jTraceConfigRead: Ill formed packet "); + + auto json_dict = json_object->GetAsDictionary(); + + if (!json_dict->GetValueForKeyAsInteger("traceid", uid)) + return SendIllFormedResponse(packet, + "jTraceConfigRead: Ill formed packet "); + + json_dict->GetValueForKeyAsInteger("threadid", threadid); + + TraceOptions options; + StreamGDBRemote response; + + options.setThreadID(threadid); + Status error = m_debugged_process_sp->GetTraceConfig(uid, options); + + if (error.Fail()) + return SendErrorResponse(error.GetError()); + + StreamGDBRemote escaped_response; + StructuredData::Dictionary json_packet; + + json_packet.AddIntegerItem("type", options.getType()); + json_packet.AddIntegerItem("buffersize", options.getTraceBufferSize()); + json_packet.AddIntegerItem("metabuffersize", options.getMetaDataBufferSize()); + + StructuredData::DictionarySP custom_params = options.getTraceParams(); + if (custom_params) + json_packet.AddItem("params", custom_params); + + StreamString json_string; + json_packet.Dump(json_string, false); + escaped_response.PutEscapedBytes(json_string.GetData(), + json_string.GetSize()); + return SendPacketNoLock(escaped_response.GetString()); +} + +GDBRemoteCommunication::PacketResult +GDBRemoteCommunicationServerLLGS::Handle_jTraceRead( + StringExtractorGDBRemote &packet) { + + // Fail if we don't have a current process. + if (!m_debugged_process_sp || + (m_debugged_process_sp->GetID() == LLDB_INVALID_PROCESS_ID)) + return SendErrorResponse(68); + + enum PacketType { MetaData, BufferData }; + PacketType tracetype = MetaData; + + if (packet.ConsumeFront("jTraceBufferRead:")) + tracetype = BufferData; + else if (packet.ConsumeFront("jTraceMetaRead:")) + tracetype = MetaData; + else { + return SendIllFormedResponse(packet, "jTrace: Ill formed packet "); + } + + lldb::user_id_t uid = LLDB_INVALID_UID; + + size_t byte_count = std::numeric_limits::max(); + lldb::tid_t tid = LLDB_INVALID_THREAD_ID; + size_t offset = std::numeric_limits::max(); + + auto json_object = StructuredData::ParseJSON(packet.Peek()); + + if (!json_object || + json_object->GetType() != lldb::eStructuredDataTypeDictionary) + return SendIllFormedResponse(packet, "jTrace: Ill formed packet "); + + auto json_dict = json_object->GetAsDictionary(); + + if (!json_dict->GetValueForKeyAsInteger("traceid", uid) || + !json_dict->GetValueForKeyAsInteger("offset", offset) || + !json_dict->GetValueForKeyAsInteger("buffersize", byte_count)) + return SendIllFormedResponse(packet, "jTrace: Ill formed packet "); + + json_dict->GetValueForKeyAsInteger("threadid", tid); + + // Allocate the response buffer. + std::unique_ptr buffer (new (std::nothrow) uint8_t[byte_count]); + if (!buffer) + return SendErrorResponse(0x78); + + StreamGDBRemote response; + Status error; + llvm::MutableArrayRef buf(buffer.get(), byte_count); + + if (tracetype == BufferData) + error = m_debugged_process_sp->GetData(uid, tid, buf, offset); + else if (tracetype == MetaData) + error = m_debugged_process_sp->GetMetaData(uid, tid, buf, offset); + + if (error.Fail()) + return SendErrorResponse(error.GetError()); + + for (size_t i = 0; i < buf.size(); ++i) + response.PutHex8(buf[i]); + + StreamGDBRemote escaped_response; + escaped_response.PutEscapedBytes(response.GetData(), response.GetSize()); + return SendPacketNoLock(escaped_response.GetString()); +} + GDBRemoteCommunication::PacketResult GDBRemoteCommunicationServerLLGS::Handle_qProcessInfo( StringExtractorGDBRemote &packet) { diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h index ebda9a911d3..a7d7850d454 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.h @@ -189,6 +189,14 @@ protected: PacketResult Handle_QSaveRegisterState(StringExtractorGDBRemote &packet); + PacketResult Handle_jTraceStart(StringExtractorGDBRemote &packet); + + PacketResult Handle_jTraceRead(StringExtractorGDBRemote &packet); + + PacketResult Handle_jTraceStop(StringExtractorGDBRemote &packet); + + PacketResult Handle_jTraceConfigRead(StringExtractorGDBRemote &packet); + PacketResult Handle_QRestoreRegisterState(StringExtractorGDBRemote &packet); PacketResult Handle_vAttach(StringExtractorGDBRemote &packet); diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index fd4b5dae8cc..f3c7e77d9da 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -1236,6 +1236,32 @@ Status ProcessGDBRemote::DoAttachToProcessWithName( return error; } +lldb::user_id_t ProcessGDBRemote::StartTrace(const TraceOptions &options, + Status &error) { + return m_gdb_comm.SendStartTracePacket(options, error); +} + +Status ProcessGDBRemote::StopTrace(lldb::user_id_t uid, lldb::tid_t thread_id) { + return m_gdb_comm.SendStopTracePacket(uid, thread_id); +} + +Status ProcessGDBRemote::GetData(lldb::user_id_t uid, lldb::tid_t thread_id, + llvm::MutableArrayRef &buffer, + size_t offset) { + return m_gdb_comm.SendGetDataPacket(uid, thread_id, buffer, offset); +} + +Status ProcessGDBRemote::GetMetaData(lldb::user_id_t uid, lldb::tid_t thread_id, + llvm::MutableArrayRef &buffer, + size_t offset) { + return m_gdb_comm.SendGetMetaDataPacket(uid, thread_id, buffer, offset); +} + +Status ProcessGDBRemote::GetTraceConfig(lldb::user_id_t uid, + TraceOptions &options) { + return m_gdb_comm.SendGetTraceConfigPacket(uid, options); +} + void ProcessGDBRemote::DidExit() { // When we exit, disconnect from the GDB server communications m_gdb_comm.Disconnect(); diff --git a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h index 60f0464f86b..d7a4e961b54 100644 --- a/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h +++ b/contrib/llvm/tools/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h @@ -31,6 +31,7 @@ #include "lldb/Target/Thread.h" #include "lldb/Utility/ConstString.h" #include "lldb/Utility/Status.h" +#include "lldb/Utility/StreamGDBRemote.h" #include "lldb/Utility/StreamString.h" #include "lldb/Utility/StringExtractor.h" #include "lldb/Utility/StringList.h" @@ -177,6 +178,21 @@ public: Status GetWatchpointSupportInfo(uint32_t &num) override; + lldb::user_id_t StartTrace(const TraceOptions &options, + Status &error) override; + + Status StopTrace(lldb::user_id_t uid, lldb::tid_t thread_id) override; + + Status GetData(lldb::user_id_t uid, lldb::tid_t thread_id, + llvm::MutableArrayRef &buffer, + size_t offset = 0) override; + + Status GetMetaData(lldb::user_id_t uid, lldb::tid_t thread_id, + llvm::MutableArrayRef &buffer, + size_t offset = 0) override; + + Status GetTraceConfig(lldb::user_id_t uid, TraceOptions &options) override; + Status GetWatchpointSupportInfo(uint32_t &num, bool &after) override; bool StartNoticingNewThreads() override; diff --git a/contrib/llvm/tools/lldb/source/Target/Process.cpp b/contrib/llvm/tools/lldb/source/Target/Process.cpp index ff86b0dbe05..c6ad536cee1 100644 --- a/contrib/llvm/tools/lldb/source/Target/Process.cpp +++ b/contrib/llvm/tools/lldb/source/Target/Process.cpp @@ -4823,6 +4823,48 @@ GetExpressionTimeout(const EvaluateExpressionOptions &options, return *options.GetTimeout() - GetOneThreadExpressionTimeout(options); } +static llvm::Optional +HandleStoppedEvent(Thread &thread, const ThreadPlanSP &thread_plan_sp, + RestorePlanState &restorer, const EventSP &event_sp, + EventSP &event_to_broadcast_sp, + const EvaluateExpressionOptions &options, bool handle_interrupts) { + Log *log = GetLogIfAnyCategoriesSet(LIBLLDB_LOG_STEP | LIBLLDB_LOG_PROCESS); + + ThreadPlanSP plan = thread.GetCompletedPlan(); + if (plan == thread_plan_sp && plan->PlanSucceeded()) { + LLDB_LOG(log, "execution completed successfully"); + + // Restore the plan state so it will get reported as intended when we are + // done. + restorer.Clean(); + return eExpressionCompleted; + } + + StopInfoSP stop_info_sp = thread.GetStopInfo(); + if (stop_info_sp && stop_info_sp->GetStopReason() == eStopReasonBreakpoint && + stop_info_sp->ShouldNotify(event_sp.get())) { + LLDB_LOG(log, "stopped for breakpoint: {0}.", stop_info_sp->GetDescription()); + if (!options.DoesIgnoreBreakpoints()) { + // Restore the plan state and then force Private to false. We are going + // to stop because of this plan so we need it to become a public plan or + // it won't report correctly when we continue to its termination later on. + restorer.Clean(); + thread_plan_sp->SetPrivate(false); + event_to_broadcast_sp = event_sp; + } + return eExpressionHitBreakpoint; + } + + if (!handle_interrupts && + Process::ProcessEventData::GetInterruptedFromEvent(event_sp.get())) + return llvm::None; + + LLDB_LOG(log, "thread plan did not successfully complete"); + if (!options.DoesUnwindOnError()) + event_to_broadcast_sp = event_sp; + return eExpressionInterrupted; +} + ExpressionResults Process::RunThreadPlan(ExecutionContext &exe_ctx, lldb::ThreadPlanSP &thread_plan_sp, @@ -5228,65 +5270,22 @@ Process::RunThreadPlan(ExecutionContext &exe_ctx, "but our thread (index-id=%u) has vanished.", thread_idx_id); return_value = eExpressionInterrupted; - } else { + } else if (Process::ProcessEventData::GetRestartedFromEvent( + event_sp.get())) { // If we were restarted, we just need to go back up to fetch // another event. - if (Process::ProcessEventData::GetRestartedFromEvent( - event_sp.get())) { - if (log) { - log->Printf("Process::RunThreadPlan(): Got a stop and " - "restart, so we'll continue waiting."); - } - keep_going = true; - do_resume = false; - handle_running_event = true; - } else { - ThreadPlanSP plan = thread->GetCompletedPlan(); - if (plan == thread_plan_sp && plan->PlanSucceeded()) { - - if (log) - log->PutCString("Process::RunThreadPlan(): execution " - "completed successfully."); - - // Restore the plan state so it will get reported as - // intended when we are done. - thread_plan_restorer.Clean(); - - return_value = eExpressionCompleted; - } else { - StopInfoSP stop_info_sp = thread_sp->GetStopInfo(); - // Something restarted the target, so just wait for it to - // stop for real. - if (stop_info_sp && - stop_info_sp->GetStopReason() == eStopReasonBreakpoint) { - if (log) - log->Printf("Process::RunThreadPlan() stopped for " - "breakpoint: %s.", - stop_info_sp->GetDescription()); - return_value = eExpressionHitBreakpoint; - if (!options.DoesIgnoreBreakpoints()) { - // Restore the plan state and then force Private to - // false. We are - // going to stop because of this plan so we need it to - // become a public - // plan or it won't report correctly when we continue to - // its termination - // later on. - thread_plan_restorer.Clean(); - if (thread_plan_sp) - thread_plan_sp->SetPrivate(false); - event_to_broadcast_sp = event_sp; - } - } else { - if (log) - log->PutCString("Process::RunThreadPlan(): thread plan " - "didn't successfully complete."); - if (!options.DoesUnwindOnError()) - event_to_broadcast_sp = event_sp; - return_value = eExpressionInterrupted; - } - } + if (log) { + log->Printf("Process::RunThreadPlan(): Got a stop and " + "restart, so we'll continue waiting."); } + keep_going = true; + do_resume = false; + handle_running_event = true; + } else { + const bool handle_interrupts = true; + return_value = *HandleStoppedEvent( + *thread, thread_plan_sp, thread_plan_restorer, event_sp, + event_to_broadcast_sp, options, handle_interrupts); } } break; @@ -5392,20 +5391,6 @@ Process::RunThreadPlan(ExecutionContext &exe_ctx, } if (stop_state == lldb::eStateStopped) { - // Between the time we initiated the Halt and the time we - // delivered it, the process could have - // already finished its job. Check that here: - - if (thread->IsThreadPlanDone(thread_plan_sp.get())) { - if (log) - log->PutCString("Process::RunThreadPlan(): Even though we " - "timed out, the call plan was done. " - "Exiting wait loop."); - return_value = eExpressionCompleted; - back_to_top = false; - break; - } - if (Process::ProcessEventData::GetRestartedFromEvent( event_sp.get())) { if (log) @@ -5419,6 +5404,18 @@ Process::RunThreadPlan(ExecutionContext &exe_ctx, continue; } + // Between the time we initiated the Halt and the time we + // delivered it, the process could have + // already finished its job. Check that here: + const bool handle_interrupts = false; + if (auto result = HandleStoppedEvent( + *thread, thread_plan_sp, thread_plan_restorer, event_sp, + event_to_broadcast_sp, options, handle_interrupts)) { + return_value = *result; + back_to_top = false; + break; + } + if (!options.GetTryAllThreads()) { if (log) log->PutCString("Process::RunThreadPlan(): try_all_threads " diff --git a/contrib/llvm/tools/lldb/source/Target/Thread.cpp b/contrib/llvm/tools/lldb/source/Target/Thread.cpp index 43ae7b5413b..4aba30be5f7 100644 --- a/contrib/llvm/tools/lldb/source/Target/Thread.cpp +++ b/contrib/llvm/tools/lldb/source/Target/Thread.cpp @@ -51,6 +51,7 @@ #include "lldb/Utility/RegularExpression.h" #include "lldb/Utility/Stream.h" #include "lldb/Utility/StreamString.h" +#include "lldb/lldb-enumerations.h" using namespace lldb; using namespace lldb_private; @@ -397,7 +398,7 @@ lldb::StopInfoSP Thread::GetStopInfo() { bool plan_overrides_trace = have_valid_stop_info && have_valid_completed_plan && (m_stop_info_sp->GetStopReason() == eStopReasonTrace); - + if (have_valid_stop_info && !plan_overrides_trace) { return m_stop_info_sp; } else if (have_valid_completed_plan) { @@ -541,7 +542,7 @@ bool Thread::CheckpointThreadState(ThreadStateCheckpoint &saved_state) { saved_state.orig_stop_id = process_sp->GetStopID(); saved_state.current_inlined_depth = GetCurrentInlinedDepth(); saved_state.m_completed_plan_stack = m_completed_plan_stack; - + return true; } @@ -1994,13 +1995,12 @@ bool Thread::GetDescription(Stream &strm, lldb::DescriptionLevel level, thread_info->GetObjectForDotSeparatedPath("trace_messages"); bool printed_activity = false; - if (activity && - activity->GetType() == StructuredData::Type::eTypeDictionary) { + if (activity && activity->GetType() == eStructuredDataTypeDictionary) { StructuredData::Dictionary *activity_dict = activity->GetAsDictionary(); StructuredData::ObjectSP id = activity_dict->GetValueForKey("id"); StructuredData::ObjectSP name = activity_dict->GetValueForKey("name"); - if (name && name->GetType() == StructuredData::Type::eTypeString && id && - id->GetType() == StructuredData::Type::eTypeInteger) { + if (name && name->GetType() == eStructuredDataTypeString && id && + id->GetType() == eStructuredDataTypeInteger) { strm.Format(" Activity '{0}', {1:x}\n", name->GetAsString()->GetValue(), id->GetAsInteger()->GetValue()); @@ -2008,8 +2008,7 @@ bool Thread::GetDescription(Stream &strm, lldb::DescriptionLevel level, printed_activity = true; } bool printed_breadcrumb = false; - if (breadcrumb && - breadcrumb->GetType() == StructuredData::Type::eTypeDictionary) { + if (breadcrumb && breadcrumb->GetType() == eStructuredDataTypeDictionary) { if (printed_activity) strm.Printf("\n"); StructuredData::Dictionary *breadcrumb_dict = @@ -2017,13 +2016,13 @@ bool Thread::GetDescription(Stream &strm, lldb::DescriptionLevel level, StructuredData::ObjectSP breadcrumb_text = breadcrumb_dict->GetValueForKey("name"); if (breadcrumb_text && - breadcrumb_text->GetType() == StructuredData::Type::eTypeString) { + breadcrumb_text->GetType() == eStructuredDataTypeString) { strm.Format(" Current Breadcrumb: {0}\n", breadcrumb_text->GetAsString()->GetValue()); } printed_breadcrumb = true; } - if (messages && messages->GetType() == StructuredData::Type::eTypeArray) { + if (messages && messages->GetType() == eStructuredDataTypeArray) { if (printed_breadcrumb) strm.Printf("\n"); StructuredData::Array *messages_array = messages->GetAsArray(); @@ -2032,14 +2031,13 @@ bool Thread::GetDescription(Stream &strm, lldb::DescriptionLevel level, strm.Printf(" %zu trace messages:\n", msg_count); for (size_t i = 0; i < msg_count; i++) { StructuredData::ObjectSP message = messages_array->GetItemAtIndex(i); - if (message && - message->GetType() == StructuredData::Type::eTypeDictionary) { + if (message && message->GetType() == eStructuredDataTypeDictionary) { StructuredData::Dictionary *message_dict = message->GetAsDictionary(); StructuredData::ObjectSP message_text = message_dict->GetValueForKey("message"); if (message_text && - message_text->GetType() == StructuredData::Type::eTypeString) { + message_text->GetType() == eStructuredDataTypeString) { strm.Format(" {0}\n", message_text->GetAsString()->GetValue()); } } diff --git a/contrib/llvm/tools/lldb/source/Utility/StringExtractor.cpp b/contrib/llvm/tools/lldb/source/Utility/StringExtractor.cpp index a94f6bcd008..cf5c7e22744 100644 --- a/contrib/llvm/tools/lldb/source/Utility/StringExtractor.cpp +++ b/contrib/llvm/tools/lldb/source/Utility/StringExtractor.cpp @@ -280,6 +280,15 @@ uint64_t StringExtractor::GetHexMaxU64(bool little_endian, return result; } +bool StringExtractor::ConsumeFront(const llvm::StringRef &str) { + llvm::StringRef S = GetStringRef(); + if (!S.startswith(str)) + return false; + else + m_index += str.size(); + return true; +} + size_t StringExtractor::GetHexBytes(llvm::MutableArrayRef dest, uint8_t fail_fill_value) { size_t bytes_extracted = 0; diff --git a/contrib/llvm/tools/lldb/source/Utility/StringExtractorGDBRemote.cpp b/contrib/llvm/tools/lldb/source/Utility/StringExtractorGDBRemote.cpp index 08226f4c8f9..3473a9e9668 100644 --- a/contrib/llvm/tools/lldb/source/Utility/StringExtractorGDBRemote.cpp +++ b/contrib/llvm/tools/lldb/source/Utility/StringExtractorGDBRemote.cpp @@ -286,6 +286,16 @@ StringExtractorGDBRemote::GetServerPacketType() const { return eServerPacketType_jSignalsInfo; if (PACKET_MATCHES("jThreadsInfo")) return eServerPacketType_jThreadsInfo; + if (PACKET_STARTS_WITH("jTraceBufferRead:")) + return eServerPacketType_jTraceBufferRead; + if (PACKET_STARTS_WITH("jTraceConfigRead:")) + return eServerPacketType_jTraceConfigRead; + if (PACKET_STARTS_WITH("jTraceMetaRead:")) + return eServerPacketType_jTraceMetaRead; + if (PACKET_STARTS_WITH("jTraceStart:")) + return eServerPacketType_jTraceStart; + if (PACKET_STARTS_WITH("jTraceStop:")) + return eServerPacketType_jTraceStop; break; case 'v': diff --git a/contrib/llvm/tools/lldb/source/Utility/StringExtractorGDBRemote.h b/contrib/llvm/tools/lldb/source/Utility/StringExtractorGDBRemote.h index a5c0c8e803b..473cab04f80 100644 --- a/contrib/llvm/tools/lldb/source/Utility/StringExtractorGDBRemote.h +++ b/contrib/llvm/tools/lldb/source/Utility/StringExtractorGDBRemote.h @@ -164,6 +164,12 @@ public: eServerPacketType__M, eServerPacketType__m, eServerPacketType_notify, // '%' notification + + eServerPacketType_jTraceStart, + eServerPacketType_jTraceBufferRead, + eServerPacketType_jTraceMetaRead, + eServerPacketType_jTraceStop, + eServerPacketType_jTraceConfigRead, }; ServerPacketType GetServerPacketType() const; diff --git a/contrib/llvm/tools/llvm-nm/llvm-nm.cpp b/contrib/llvm/tools/llvm-nm/llvm-nm.cpp index a07fcef35eb..b022c300756 100644 --- a/contrib/llvm/tools/llvm-nm/llvm-nm.cpp +++ b/contrib/llvm/tools/llvm-nm/llvm-nm.cpp @@ -24,6 +24,7 @@ #include "llvm/IR/Module.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/MachO.h" @@ -269,7 +270,7 @@ static bool compareSymbolName(const NMSymbol &A, const NMSymbol &B) { static char isSymbolList64Bit(SymbolicFile &Obj) { if (auto *IRObj = dyn_cast(&Obj)) return Triple(IRObj->getTargetTriple()).isArch64Bit(); - if (isa(Obj)) + if (isa(Obj) || isa(Obj)) return false; if (isa(Obj)) return false; @@ -849,6 +850,18 @@ static char getSymbolNMTypeChar(COFFObjectFile &Obj, symbol_iterator I) { return '?'; } +static char getSymbolNMTypeChar(COFFImportFile &Obj) { + switch (Obj.getCOFFImportHeader()->getType()) { + case COFF::IMPORT_CODE: + return 't'; + case COFF::IMPORT_DATA: + return 'd'; + case COFF::IMPORT_CONST: + return 'r'; + } + return '?'; +} + static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) { DataRefImpl Symb = I->getRawDataRefImpl(); uint8_t NType = Obj.is64Bit() ? Obj.getSymbol64TableEntry(Symb).n_type @@ -932,6 +945,8 @@ static char getNMTypeChar(SymbolicFile &Obj, basic_symbol_iterator I) { Ret = getSymbolNMTypeChar(*IR, I); else if (COFFObjectFile *COFF = dyn_cast(&Obj)) Ret = getSymbolNMTypeChar(*COFF, I); + else if (COFFImportFile *COFFImport = dyn_cast(&Obj)) + Ret = getSymbolNMTypeChar(*COFFImport); else if (MachOObjectFile *MachO = dyn_cast(&Obj)) Ret = getSymbolNMTypeChar(*MachO, I); else if (WasmObjectFile *Wasm = dyn_cast(&Obj)) diff --git a/contrib/llvm/tools/llvm-pdbdump/PrettyClassLayoutGraphicalDumper.cpp b/contrib/llvm/tools/llvm-pdbdump/PrettyClassLayoutGraphicalDumper.cpp index 16cec82f718..d1147267962 100644 --- a/contrib/llvm/tools/llvm-pdbdump/PrettyClassLayoutGraphicalDumper.cpp +++ b/contrib/llvm/tools/llvm-pdbdump/PrettyClassLayoutGraphicalDumper.cpp @@ -80,7 +80,8 @@ bool PrettyClassLayoutGraphicalDumper::start(const UDTLayoutBase &Layout) { if (Item->getLayoutSize() > 0) { uint32_t Prev = RelativeOffset + Item->getLayoutSize() - 1; - NextPaddingByte = UseMap.find_next_unset(Prev); + if (Prev < UseMap.size()) + NextPaddingByte = UseMap.find_next_unset(Prev); } } diff --git a/contrib/llvm/tools/llvm-pdbdump/YAMLOutputStyle.cpp b/contrib/llvm/tools/llvm-pdbdump/YAMLOutputStyle.cpp index 0573b23cdc7..652182e8e9b 100644 --- a/contrib/llvm/tools/llvm-pdbdump/YAMLOutputStyle.cpp +++ b/contrib/llvm/tools/llvm-pdbdump/YAMLOutputStyle.cpp @@ -39,6 +39,20 @@ YAMLOutputStyle::YAMLOutputStyle(PDBFile &File) } Error YAMLOutputStyle::dump() { + if (opts::pdb2yaml::All) { + opts::pdb2yaml::StreamMetadata = true; + opts::pdb2yaml::StreamDirectory = true; + opts::pdb2yaml::PdbStream = true; + opts::pdb2yaml::StringTable = true; + opts::pdb2yaml::DbiStream = true; + opts::pdb2yaml::DbiModuleInfo = true; + opts::pdb2yaml::DbiModuleSyms = true; + opts::pdb2yaml::DbiModuleSourceFileInfo = true; + opts::pdb2yaml::DbiModuleSourceLineInfo = true; + opts::pdb2yaml::TpiStream = true; + opts::pdb2yaml::IpiStream = true; + } + if (opts::pdb2yaml::StreamDirectory) opts::pdb2yaml::StreamMetadata = true; if (opts::pdb2yaml::DbiModuleSyms) diff --git a/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp b/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp index 1767c3cfda8..ff14c39cbaa 100644 --- a/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp +++ b/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp @@ -378,6 +378,9 @@ cl::opt InputFilename(cl::Positional, } namespace pdb2yaml { +cl::opt All("all", + cl::desc("Dump everything we know how to dump."), + cl::sub(PdbToYamlSubcommand), cl::init(false)); cl::opt NoFileHeaders("no-file-headers", cl::desc("Do not dump MSF file headers (you will not be able " @@ -500,6 +503,7 @@ static void yamlToPdb(StringRef Path) { pdb::yaml::PdbInfoStream DefaultInfoStream; pdb::yaml::PdbDbiStream DefaultDbiStream; pdb::yaml::PdbTpiStream DefaultTpiStream; + pdb::yaml::PdbTpiStream DefaultIpiStream; const auto &Info = YamlObj.PdbStream.getValueOr(DefaultInfoStream); @@ -524,12 +528,12 @@ static void yamlToPdb(StringRef Path) { DbiBuilder.setVersionHeader(Dbi.VerHeader); for (const auto &MI : Dbi.ModInfos) { auto &ModiBuilder = ExitOnErr(DbiBuilder.addModuleInfo(MI.Mod)); + ModiBuilder.setObjFileName(MI.Obj); for (auto S : MI.SourceFiles) ExitOnErr(DbiBuilder.addModuleSourceFile(MI.Mod, S)); if (MI.Modi.hasValue()) { const auto &ModiStream = *MI.Modi; - ModiBuilder.setObjFileName(MI.Obj); for (auto Symbol : ModiStream.Symbols) ModiBuilder.addSymbol(Symbol.Record); } @@ -601,11 +605,11 @@ static void yamlToPdb(StringRef Path) { for (const auto &R : Tpi.Records) TpiBuilder.addTypeRecord(R.Record.data(), R.Record.Hash); - const auto &Ipi = YamlObj.IpiStream.getValueOr(DefaultTpiStream); + const auto &Ipi = YamlObj.IpiStream.getValueOr(DefaultIpiStream); auto &IpiBuilder = Builder.getIpiBuilder(); IpiBuilder.setVersionHeader(Ipi.Version); for (const auto &R : Ipi.Records) - TpiBuilder.addTypeRecord(R.Record.data(), R.Record.Hash); + IpiBuilder.addTypeRecord(R.Record.data(), R.Record.Hash); ExitOnErr(Builder.commit(opts::yaml2pdb::YamlPdbOutputFile)); } @@ -852,18 +856,17 @@ static void mergePdbs() { for (const auto &Path : opts::merge::InputFilenames) { std::unique_ptr Session; auto &File = loadPDB(Path, Session); - SmallVector SourceToDest; + SmallVector TypeMap; + SmallVector IdMap; if (File.hasPDBTpiStream()) { - SourceToDest.clear(); auto &Tpi = ExitOnErr(File.getPDBTpiStream()); - ExitOnErr(codeview::mergeTypeStreams(MergedIpi, MergedTpi, SourceToDest, - nullptr, Tpi.typeArray())); + ExitOnErr(codeview::mergeTypeRecords(MergedTpi, TypeMap, nullptr, + Tpi.typeArray())); } if (File.hasPDBIpiStream()) { - SourceToDest.clear(); auto &Ipi = ExitOnErr(File.getPDBIpiStream()); - ExitOnErr(codeview::mergeTypeStreams(MergedIpi, MergedTpi, SourceToDest, - nullptr, Ipi.typeArray())); + ExitOnErr(codeview::mergeIdRecords(MergedIpi, TypeMap, IdMap, + Ipi.typeArray())); } } @@ -877,14 +880,12 @@ static void mergePdbs() { auto &DestTpi = Builder.getTpiBuilder(); auto &DestIpi = Builder.getIpiBuilder(); - MergedTpi.ForEachRecord( - [&DestTpi](TypeIndex TI, MutableArrayRef Data) { - DestTpi.addTypeRecord(Data, None); - }); - MergedIpi.ForEachRecord( - [&DestIpi](TypeIndex TI, MutableArrayRef Data) { - DestIpi.addTypeRecord(Data, None); - }); + MergedTpi.ForEachRecord([&DestTpi](TypeIndex TI, ArrayRef Data) { + DestTpi.addTypeRecord(Data, None); + }); + MergedIpi.ForEachRecord([&DestIpi](TypeIndex TI, ArrayRef Data) { + DestIpi.addTypeRecord(Data, None); + }); SmallString<64> OutFile(opts::merge::PdbOutputFile); if (OutFile.empty()) { diff --git a/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.h b/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.h index e38b32c6a34..b344129d217 100644 --- a/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.h +++ b/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.h @@ -115,6 +115,7 @@ extern llvm::cl::opt Pedantic; } namespace pdb2yaml { +extern llvm::cl::opt All; extern llvm::cl::opt NoFileHeaders; extern llvm::cl::opt Minimal; extern llvm::cl::opt StreamMetadata; diff --git a/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp b/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp index a257910ecf7..4867acf7098 100644 --- a/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -572,7 +572,7 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts, } if (ShowMemOPSizes && NumMemOPCalls > 0) { - OS << " Memory Instrinsic Size Results:\n"; + OS << " Memory Intrinsic Size Results:\n"; traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS, nullptr); } diff --git a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp index 78bfa558e4a..a59caa351e7 100644 --- a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -1073,8 +1073,8 @@ void COFFDumper::mergeCodeViewTypes(TypeTableBuilder &CVIDs, error(object_error::parse_failed); } SmallVector SourceToDest; - if (auto EC = - mergeTypeStreams(CVIDs, CVTypes, SourceToDest, nullptr, Types)) + if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, nullptr, + Types)) return error(std::move(EC)); } } diff --git a/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp b/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp index 7a500eaf411..185119c2ca4 100644 --- a/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -135,6 +135,9 @@ static Error isTrivialOperatorNode(const TreePatternNode *N) { std::string Explanation = ""; std::string Separator = ""; if (N->isLeaf()) { + if (isa(N->getLeafValue())) + return Error::success(); + Explanation = "Is a leaf"; Separator = ", "; } @@ -272,6 +275,7 @@ public: OPM_ComplexPattern, OPM_Instruction, OPM_Int, + OPM_LiteralInt, OPM_LLT, OPM_RegBank, OPM_MBB, @@ -406,13 +410,14 @@ public: } }; -/// Generates code to check that an operand is a particular int. -class IntOperandMatcher : public OperandPredicateMatcher { +/// Generates code to check that an operand is a G_CONSTANT with a particular +/// int. +class ConstantIntOperandMatcher : public OperandPredicateMatcher { protected: int64_t Value; public: - IntOperandMatcher(int64_t Value) + ConstantIntOperandMatcher(int64_t Value) : OperandPredicateMatcher(OPM_Int), Value(Value) {} static bool classof(const OperandPredicateMatcher *P) { @@ -425,6 +430,27 @@ public: } }; +/// Generates code to check that an operand is a raw int (where MO.isImm() or +/// MO.isCImm() is true). +class LiteralIntOperandMatcher : public OperandPredicateMatcher { +protected: + int64_t Value; + +public: + LiteralIntOperandMatcher(int64_t Value) + : OperandPredicateMatcher(OPM_LiteralInt), Value(Value) {} + + static bool classof(const OperandPredicateMatcher *P) { + return P->getKind() == OPM_LiteralInt; + } + + void emitCxxPredicateExpr(raw_ostream &OS, RuleMatcher &Rule, + StringRef OperandExpr) const override { + OS << OperandExpr << ".isCImm() && " << OperandExpr + << ".getCImm()->equalsInt(" << Value << ")"; + } +}; + /// Generates code to check that a set of predicates match for a particular /// operand. class OperandMatcher : public PredicateListMatcher { @@ -1121,14 +1147,21 @@ void RuleMatcher::emit(raw_ostream &OS, // We must also check if it's safe to fold the matched instructions. if (InsnVariableNames.size() >= 2) { + // Invert the map to create stable ordering (by var names) + SmallVector Names; for (const auto &Pair : InsnVariableNames) { // Skip the root node since it isn't moving anywhere. Everything else is // sinking to meet it. if (Pair.first == Matchers.front().get()) continue; + Names.push_back(Pair.second); + } + std::sort(Names.begin(), Names.end()); + + for (const auto &Name : Names) { // Reject the difficult cases until we have a more accurate check. - OS << " if (!isObviouslySafeToFold(" << Pair.second + OS << " if (!isObviouslySafeToFold(" << Name << ")) return false;\n"; // FIXME: Emit checks to determine it's _actually_ safe to fold and/or @@ -1236,7 +1269,7 @@ private: createAndImportSelDAGMatcher(InstructionMatcher &InsnMatcher, const TreePatternNode *Src) const; Error importChildMatcher(InstructionMatcher &InsnMatcher, - TreePatternNode *SrcChild, unsigned OpIdx, + const TreePatternNode *SrcChild, unsigned OpIdx, unsigned &TempOpIdx) const; Expected createAndImportInstructionRenderer( RuleMatcher &M, const TreePatternNode *Dst, @@ -1299,14 +1332,23 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( if (Src->getExtTypes().size() > 1) return failedImport("Src pattern has multiple results"); - auto SrcGIOrNull = findNodeEquiv(Src->getOperator()); - if (!SrcGIOrNull) - return failedImport("Pattern operator lacks an equivalent Instruction" + - explainOperator(Src->getOperator())); - auto &SrcGI = *SrcGIOrNull; + if (Src->isLeaf()) { + Init *SrcInit = Src->getLeafValue(); + if (isa(SrcInit)) { + InsnMatcher.addPredicate( + &Target.getInstruction(RK.getDef("G_CONSTANT"))); + } else + return failedImport("Unable to deduce gMIR opcode to handle Src (which is a leaf)"); + } else { + auto SrcGIOrNull = findNodeEquiv(Src->getOperator()); + if (!SrcGIOrNull) + return failedImport("Pattern operator lacks an equivalent Instruction" + + explainOperator(Src->getOperator())); + auto &SrcGI = *SrcGIOrNull; - // The operators look good: match the opcode and mutate it to the new one. - InsnMatcher.addPredicate(&SrcGI); + // The operators look good: match the opcode + InsnMatcher.addPredicate(&SrcGI); + } unsigned OpIdx = 0; unsigned TempOpIdx = 0; @@ -1323,18 +1365,27 @@ Expected GlobalISelEmitter::createAndImportSelDAGMatcher( OM.addPredicate(*OpTyOrNone); } - // Match the used operands (i.e. the children of the operator). - for (unsigned i = 0, e = Src->getNumChildren(); i != e; ++i) { - if (auto Error = importChildMatcher(InsnMatcher, Src->getChild(i), OpIdx++, - TempOpIdx)) - return std::move(Error); + if (Src->isLeaf()) { + Init *SrcInit = Src->getLeafValue(); + if (IntInit *SrcIntInit = dyn_cast(SrcInit)) { + OperandMatcher &OM = InsnMatcher.addOperand(OpIdx++, "", TempOpIdx); + OM.addPredicate(SrcIntInit->getValue()); + } else + return failedImport("Unable to deduce gMIR opcode to handle Src (which is a leaf)"); + } else { + // Match the used operands (i.e. the children of the operator). + for (unsigned i = 0, e = Src->getNumChildren(); i != e; ++i) { + if (auto Error = importChildMatcher(InsnMatcher, Src->getChild(i), + OpIdx++, TempOpIdx)) + return std::move(Error); + } } return InsnMatcher; } Error GlobalISelEmitter::importChildMatcher(InstructionMatcher &InsnMatcher, - TreePatternNode *SrcChild, + const TreePatternNode *SrcChild, unsigned OpIdx, unsigned &TempOpIdx) const { OperandMatcher &OM = @@ -1379,7 +1430,7 @@ Error GlobalISelEmitter::importChildMatcher(InstructionMatcher &InsnMatcher, // Check for constant immediates. if (auto *ChildInt = dyn_cast(SrcChild->getLeafValue())) { - OM.addPredicate(ChildInt->getValue()); + OM.addPredicate(ChildInt->getValue()); return Error::success(); } @@ -1605,6 +1656,9 @@ Expected GlobalISelEmitter::runOnPattern(const PatternToMatch &P) { return failedImport("Src pattern root isn't a trivial operator (" + toString(std::move(Err)) + ")"); + if (Dst->isLeaf()) + return failedImport("Dst pattern root isn't a known leaf"); + // Start with the defined operands (i.e., the results of the root operator). Record *DstOp = Dst->getOperator(); if (!DstOp->isSubClassOf("Instruction")) diff --git a/contrib/llvm/utils/TableGen/TableGen.cpp b/contrib/llvm/utils/TableGen/TableGen.cpp index 00d20f1df6c..329ce348727 100644 --- a/contrib/llvm/utils/TableGen/TableGen.cpp +++ b/contrib/llvm/utils/TableGen/TableGen.cpp @@ -46,6 +46,7 @@ enum ActionType { GenAttributes, GenSearchableTables, GenGlobalISel, + GenX86FoldTables, GenX86EVEX2VEXTables, GenRegisterBank, }; @@ -97,6 +98,8 @@ namespace { "Generate generic binary-searchable table"), clEnumValN(GenGlobalISel, "gen-global-isel", "Generate GlobalISel selector"), + clEnumValN(GenX86FoldTables, "gen-x86-fold-tables", + "Generate X86 fold tables"), clEnumValN(GenX86EVEX2VEXTables, "gen-x86-EVEX2VEX-tables", "Generate X86 EVEX to VEX compress tables"), clEnumValN(GenRegisterBank, "gen-register-bank", @@ -190,6 +193,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { case GenGlobalISel: EmitGlobalISel(Records, OS); break; + case GenX86FoldTables: + EmitX86FoldTables(Records, OS); + break; case GenRegisterBank: EmitRegisterBank(Records, OS); break; diff --git a/contrib/llvm/utils/TableGen/TableGenBackends.h b/contrib/llvm/utils/TableGen/TableGenBackends.h index 2512997e27f..53614df27c4 100644 --- a/contrib/llvm/utils/TableGen/TableGenBackends.h +++ b/contrib/llvm/utils/TableGen/TableGenBackends.h @@ -81,6 +81,7 @@ void EmitCTags(RecordKeeper &RK, raw_ostream &OS); void EmitAttributes(RecordKeeper &RK, raw_ostream &OS); void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS); void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS); +void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS); void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS); void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS); diff --git a/contrib/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/contrib/llvm/utils/TableGen/X86FoldTablesEmitter.cpp new file mode 100644 index 00000000000..f211a8fab97 --- /dev/null +++ b/contrib/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -0,0 +1,720 @@ +//===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This tablegen backend is responsible for emitting the memory fold tables of +// the X86 backend instructions. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenDAGPatterns.h" +#include "CodeGenTarget.h" +#include "X86RecognizableInstr.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/TableGenBackend.h" + +using namespace llvm; + +namespace { + +// 3 possible strategies for the unfolding flag (TB_NO_REVERSE) of the +// manual added entries. +enum UnfoldStrategy { + UNFOLD, // Allow unfolding + NO_UNFOLD, // Prevent unfolding + NO_STRATEGY // Make decision according to operands' sizes +}; + +// Represents an entry in the manual mapped instructions set. +struct ManualMapEntry { + const char *RegInstStr; + const char *MemInstStr; + UnfoldStrategy Strategy; + + ManualMapEntry(const char *RegInstStr, const char *MemInstStr, + UnfoldStrategy Strategy = NO_STRATEGY) + : RegInstStr(RegInstStr), MemInstStr(MemInstStr), Strategy(Strategy) {} +}; + +class IsMatch; + +// List of instructions requiring explicitly aligned memory. +const char *const ExplicitAlign[] = {"MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS", + "MOVNTPD", "MOVNTDQ", "MOVNTDQA"}; + +// List of instructions NOT requiring explicit memory alignment. +const char *const ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD"}; + +// For manually mapping instructions that do not match by their encoding. +const ManualMapEntry ManualMapSet[] = { + { "ADD16ri_DB", "ADD16mi", NO_UNFOLD }, + { "ADD16ri8_DB", "ADD16mi8", NO_UNFOLD }, + { "ADD16rr_DB", "ADD16mr", NO_UNFOLD }, + { "ADD32ri_DB", "ADD32mi", NO_UNFOLD }, + { "ADD32ri8_DB", "ADD32mi8", NO_UNFOLD }, + { "ADD32rr_DB", "ADD32mr", NO_UNFOLD }, + { "ADD64ri32_DB", "ADD64mi32", NO_UNFOLD }, + { "ADD64ri8_DB", "ADD64mi8", NO_UNFOLD }, + { "ADD64rr_DB", "ADD64mr", NO_UNFOLD }, + { "ADD16rr_DB", "ADD16rm", NO_UNFOLD }, + { "ADD32rr_DB", "ADD32rm", NO_UNFOLD }, + { "ADD64rr_DB", "ADD64rm", NO_UNFOLD }, + { "PUSH16r", "PUSH16rmm", NO_UNFOLD }, + { "PUSH32r", "PUSH32rmm", NO_UNFOLD }, + { "PUSH64r", "PUSH64rmm", NO_UNFOLD }, + { "TAILJMPr", "TAILJMPm", UNFOLD }, + { "TAILJMPr64", "TAILJMPm64", UNFOLD }, + { "TAILJMPr64_REX", "TAILJMPm64_REX", UNFOLD }, +}; + +// Do not add these instructions to any of the folding tables. +const char *const NoFoldSet[] = { + "TCRETURNri64", + "TCRETURNmi64", // Special dealing (in X86InstrCompiler.td under + "TCRETURNri", // "tailcall stuff" section). + "TCRETURNmi" + + // Different calculations of the folded operand between + // memory and register forms (folding is illegal). + // - In their register form, the second register operand's relevant + // bits are only the first 4/5/6 (depending on mode and reg size). + // - In their memory form, the second register operand's relevant + // bits are only the first 16/32/64 (depending on mode and reg size). + "BT16rr", "BT32rr", "BT64rr", + "BT16mr", "BT32mr", "BT64mr", + "BTC16rr", "BTC32rr", "BTC64rr", + "BTC16mr", "BTC32mr", "BTC64mr", + "BTR16rr", "BTR32rr", "BTR64rr", + "BTR16mr", "BTR32mr", "BTR64mr", + "BTS16rr", "BTS32rr", "BTS64rr", + "BTS16mr", "BTS32mr", "BTS64mr", + + // Memory folding is enabled only when optimizing for size by DAG + // patterns only. (issue detailed in D28744 review) + "VCVTSS2SDrm", "VCVTSS2SDrr", + "VCVTSS2SDZrm", "VCVTSS2SDZrr", + "VCVTSS2SDZrmk", "VCVTSS2SDZrrk", + "VCVTSS2SDZrmkz", "VCVTSS2SDZrrkz", + "VCVTSS2SDZrm_Int", "VCVTSS2SDZrr_Int", + "VCVTSS2SDZrm_Intk", "VCVTSS2SDZrr_Intk", + "VCVTSS2SDZrm_Intkz", "VCVTSS2SDZrr_Intkz", + "VCVTSD2SSrm", "VCVTSD2SSrr", + "VCVTSD2SSZrm", "VCVTSD2SSZrr", + "VCVTSD2SSZrmk", "VCVTSD2SSZrrk", + "VCVTSD2SSZrmkz", "VCVTSD2SSZrrkz", + "VCVTSD2SSZrm_Int", "VCVTSD2SSZrr_Int", + "VCVTSD2SSZrm_Intk", "VCVTSD2SSZrr_Intk", + "VCVTSD2SSZrm_Intkz", "VCVTSD2SSZrr_Intkz", + "VRCP14SSrm", "VRCP14SSrr", + "VRCP14SDrm", "VRCP14SDrr", + "VRSQRT14SSrm", "VRSQRT14SSrr", + "VRSQRT14SDrm", "VRSQRT14SDrr", + "VSQRTSSm", "VSQRTSSr", + "VSQRTSSm_Int", "VSQRTSSr_Int", + "VSQRTSSZm", "VSQRTSSZr", + "VSQRTSSZm_Int", "VSQRTSSZr_Int", + "VSQRTSSZm_Intk", "VSQRTSSZr_Intk", + "VSQRTSSZm_Intkz", "VSQRTSSZr_Intkz", + "VSQRTSDm", "VSQRTSDr", + "VSQRTSDm_Int", "VSQRTSDr_Int", + "VSQRTSDZm", "VSQRTSDZr", + "VSQRTSDZm_Int", "VSQRTSDZr_Int", + "VSQRTSDZm_Intk", "VSQRTSDZr_Intk", + "VSQRTSDZm_Intkz", "VSQRTSDZr_Intkz", +}; + +static bool isExplicitAlign(const CodeGenInstruction *Inst) { + return any_of(ExplicitAlign, [Inst](const char *InstStr) { + return Inst->TheDef->getName().find(InstStr) != StringRef::npos; + }); +} + +static bool isExplicitUnalign(const CodeGenInstruction *Inst) { + return any_of(ExplicitUnalign, [Inst](const char *InstStr) { + return Inst->TheDef->getName().find(InstStr) != StringRef::npos; + }); +} + +class X86FoldTablesEmitter { + RecordKeeper &Records; + CodeGenTarget Target; + + // Represents an entry in the folding table + class X86FoldTableEntry { + const CodeGenInstruction *RegInst; + const CodeGenInstruction *MemInst; + + public: + bool CannotUnfold = false; + bool IsLoad = false; + bool IsStore = false; + bool IsAligned = false; + unsigned int Alignment = 0; + + X86FoldTableEntry(const CodeGenInstruction *RegInst, + const CodeGenInstruction *MemInst) + : RegInst(RegInst), MemInst(MemInst) {} + + friend raw_ostream &operator<<(raw_ostream &OS, + const X86FoldTableEntry &E) { + OS << "{ X86::" << E.RegInst->TheDef->getName() + << ", X86::" << E.MemInst->TheDef->getName() << ", "; + + if (E.IsLoad) + OS << "TB_FOLDED_LOAD | "; + if (E.IsStore) + OS << "TB_FOLDED_STORE | "; + if (E.CannotUnfold) + OS << "TB_NO_REVERSE | "; + if (E.IsAligned) + OS << "TB_ALIGN_" << E.Alignment << " | "; + + OS << "0 },\n"; + + return OS; + } + }; + + typedef std::vector FoldTable; + // std::vector for each folding table. + // Table2Addr - Holds instructions which their memory form performs load+store + // Table#i - Holds instructions which the their memory form perform a load OR + // a store, and their #i'th operand is folded. + FoldTable Table2Addr; + FoldTable Table0; + FoldTable Table1; + FoldTable Table2; + FoldTable Table3; + FoldTable Table4; + +public: + X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {} + + // run - Generate the 6 X86 memory fold tables. + void run(raw_ostream &OS); + +private: + // Decides to which table to add the entry with the given instructions. + // S sets the strategy of adding the TB_NO_REVERSE flag. + void updateTables(const CodeGenInstruction *RegInstr, + const CodeGenInstruction *MemInstr, + const UnfoldStrategy S = NO_STRATEGY); + + // Generates X86FoldTableEntry with the given instructions and fill it with + // the appropriate flags - then adds it to Table. + void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInstr, + const CodeGenInstruction *MemInstr, + const UnfoldStrategy S, const unsigned int FoldedInd); + + // Print the given table as a static const C++ array of type + // X86MemoryFoldTableEntry. + void printTable(const FoldTable &Table, std::string TableName, + raw_ostream &OS) { + OS << "\nstatic const X86MemoryFoldTableEntry MemoryFold" << TableName + << "[] = {\n"; + + for (const X86FoldTableEntry &E : Table) + OS.indent(2) << E; + + OS << "};\n"; + } +}; + +// Return true if one of the instruction's operands is a RST register class +static bool hasRSTRegClass(const CodeGenInstruction *Inst) { + return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) { + return OpIn.Rec->getName() == "RST"; + }); +} + +// Return true if one of the instruction's operands is a ptr_rc_tailcall +static bool hasPtrTailcallRegClass(const CodeGenInstruction *Inst) { + return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) { + return OpIn.Rec->getName() == "ptr_rc_tailcall"; + }); +} + +// Calculates the integer value representing the BitsInit object +static inline uint64_t getValueFromBitsInit(const BitsInit *B) { + assert(B->getNumBits() <= sizeof(uint64_t) * CHAR_BIT && + "BitInits' too long!"); + + uint64_t Value = 0; + for (unsigned i = 0, e = B->getNumBits(); i != e; ++i) { + BitInit *Bit = cast(B->getBit(i)); + Value |= uint64_t(Bit->getValue()) << i; + } + return Value; +} + +// Returns true if the two given BitsInits represent the same integer value +static inline bool equalBitsInits(const BitsInit *B1, const BitsInit *B2) { + if (B1->getNumBits() != B2->getNumBits()) + PrintFatalError("Comparing two BitsInits with different sizes!"); + + for (unsigned i = 0, e = B1->getNumBits(); i != e; ++i) { + BitInit *Bit1 = cast(B1->getBit(i)); + BitInit *Bit2 = cast(B2->getBit(i)); + if (Bit1->getValue() != Bit2->getValue()) + return false; + } + return true; +} + +// Return the size of the register operand +static inline unsigned int getRegOperandSize(const Record *RegRec) { + if (RegRec->isSubClassOf("RegisterOperand")) + RegRec = RegRec->getValueAsDef("RegClass"); + if (RegRec->isSubClassOf("RegisterClass")) + return RegRec->getValueAsListOfDefs("RegTypes")[0]->getValueAsInt("Size"); + + llvm_unreachable("Register operand's size not known!"); +} + +// Return the size of the memory operand +static inline unsigned int +getMemOperandSize(const Record *MemRec, const bool IntrinsicSensitive = false) { + if (MemRec->isSubClassOf("Operand")) { + // Intrinsic memory instructions use ssmem/sdmem. + if (IntrinsicSensitive && + (MemRec->getName() == "sdmem" || MemRec->getName() == "ssmem")) + return 128; + + std::string Name = + MemRec->getValueAsDef("ParserMatchClass")->getValueAsString("Name"); + if (Name == "Mem8") + return 8; + if (Name == "Mem16") + return 16; + if (Name == "Mem32") + return 32; + if (Name == "Mem64") + return 64; + if (Name == "Mem80") + return 80; + if (Name == "Mem128") + return 128; + if (Name == "Mem256") + return 256; + if (Name == "Mem512") + return 512; + } + + llvm_unreachable("Memory operand's size not known!"); +} + +// Returns true if the record's list of defs includes the given def. +static inline bool hasDefInList(const Record *Rec, const StringRef List, + const StringRef Def) { + if (!Rec->isValueUnset(List)) { + return any_of(*(Rec->getValueAsListInit(List)), + [Def](const Init *I) { return I->getAsString() == Def; }); + } + return false; +} + +// Return true if the instruction defined as a register flavor. +static inline bool hasRegisterFormat(const Record *Inst) { + const BitsInit *FormBits = Inst->getValueAsBitsInit("FormBits"); + uint64_t FormBitsNum = getValueFromBitsInit(FormBits); + + // Values from X86Local namespace defined in X86RecognizableInstr.cpp + return FormBitsNum >= X86Local::MRMDestReg && FormBitsNum <= X86Local::MRM7r; +} + +// Return true if the instruction defined as a memory flavor. +static inline bool hasMemoryFormat(const Record *Inst) { + const BitsInit *FormBits = Inst->getValueAsBitsInit("FormBits"); + uint64_t FormBitsNum = getValueFromBitsInit(FormBits); + + // Values from X86Local namespace defined in X86RecognizableInstr.cpp + return FormBitsNum >= X86Local::MRMDestMem && FormBitsNum <= X86Local::MRM7m; +} + +static inline bool isNOREXRegClass(const Record *Op) { + return Op->getName().find("_NOREX") != StringRef::npos; +} + +static inline bool isRegisterOperand(const Record *Rec) { + return Rec->isSubClassOf("RegisterClass") || + Rec->isSubClassOf("RegisterOperand") || + Rec->isSubClassOf("PointerLikeRegClass"); +} + +static inline bool isMemoryOperand(const Record *Rec) { + return Rec->isSubClassOf("Operand") && + Rec->getValueAsString("OperandType") == "OPERAND_MEMORY"; +} + +static inline bool isImmediateOperand(const Record *Rec) { + return Rec->isSubClassOf("Operand") && + Rec->getValueAsString("OperandType") == "OPERAND_IMMEDIATE"; +} + +// Get the alternative instruction pointed by "FoldGenRegForm" field. +static inline const CodeGenInstruction * +getAltRegInst(const CodeGenInstruction *I, const RecordKeeper &Records, + const CodeGenTarget &Target) { + + std::string AltRegInstStr = I->TheDef->getValueAsString("FoldGenRegForm"); + Record *AltRegInstRec = Records.getDef(AltRegInstStr); + assert(AltRegInstRec && + "Alternative register form instruction def not found"); + CodeGenInstruction &AltRegInst = Target.getInstruction(AltRegInstRec); + return &AltRegInst; +} + +// Function object - Operator() returns true if the given VEX instruction +// matches the EVEX instruction of this object. +class IsMatch { + const CodeGenInstruction *MemInst; + const RecordKeeper &Records; + +public: + IsMatch(const CodeGenInstruction *Inst, const RecordKeeper &Records) + : MemInst(Inst), Records(Records) {} + + bool operator()(const CodeGenInstruction *RegInst) { + Record *MemRec = MemInst->TheDef; + Record *RegRec = RegInst->TheDef; + + // Return false if one (at least) of the encoding fields of both + // instructions do not match. + if (RegRec->getValueAsDef("OpEnc") != MemRec->getValueAsDef("OpEnc") || + !equalBitsInits(RegRec->getValueAsBitsInit("Opcode"), + MemRec->getValueAsBitsInit("Opcode")) || + // VEX/EVEX fields + RegRec->getValueAsDef("OpPrefix") != + MemRec->getValueAsDef("OpPrefix") || + RegRec->getValueAsDef("OpMap") != MemRec->getValueAsDef("OpMap") || + RegRec->getValueAsDef("OpSize") != MemRec->getValueAsDef("OpSize") || + RegRec->getValueAsBit("hasVEX_4V") != + MemRec->getValueAsBit("hasVEX_4V") || + RegRec->getValueAsBit("hasEVEX_K") != + MemRec->getValueAsBit("hasEVEX_K") || + RegRec->getValueAsBit("hasEVEX_Z") != + MemRec->getValueAsBit("hasEVEX_Z") || + RegRec->getValueAsBit("hasEVEX_B") != + MemRec->getValueAsBit("hasEVEX_B") || + RegRec->getValueAsBit("hasEVEX_RC") != + MemRec->getValueAsBit("hasEVEX_RC") || + RegRec->getValueAsBit("hasREX_WPrefix") != + MemRec->getValueAsBit("hasREX_WPrefix") || + RegRec->getValueAsBit("hasLockPrefix") != + MemRec->getValueAsBit("hasLockPrefix") || + !equalBitsInits(RegRec->getValueAsBitsInit("EVEX_LL"), + MemRec->getValueAsBitsInit("EVEX_LL")) || + !equalBitsInits(RegRec->getValueAsBitsInit("VEX_WPrefix"), + MemRec->getValueAsBitsInit("VEX_WPrefix")) || + // Instruction's format - The register form's "Form" field should be + // the opposite of the memory form's "Form" field. + !areOppositeForms(RegRec->getValueAsBitsInit("FormBits"), + MemRec->getValueAsBitsInit("FormBits")) || + RegRec->getValueAsBit("isAsmParserOnly") != + MemRec->getValueAsBit("isAsmParserOnly")) + return false; + + // Make sure the sizes of the operands of both instructions suit each other. + // This is needed for instructions with intrinsic version (_Int). + // Where the only difference is the size of the operands. + // For example: VUCOMISDZrm and Int_VUCOMISDrm + // Also for instructions that their EVEX version was upgraded to work with + // k-registers. For example VPCMPEQBrm (xmm output register) and + // VPCMPEQBZ128rm (k register output register). + bool ArgFolded = false; + unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs(); + unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs(); + unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs(); + unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs(); + + // Instructions with one output in their memory form use the memory folded + // operand as source and destination (Read-Modify-Write). + unsigned RegStartIdx = + (MemOutSize + 1 == RegOutSize) && (MemInSize == RegInSize) ? 1 : 0; + + for (unsigned i = 0, e = MemInst->Operands.size(); i < e; i++) { + Record *MemOpRec = MemInst->Operands[i].Rec; + Record *RegOpRec = RegInst->Operands[i + RegStartIdx].Rec; + + if (MemOpRec == RegOpRec) + continue; + + if (isRegisterOperand(MemOpRec) && isRegisterOperand(RegOpRec)) { + if (getRegOperandSize(MemOpRec) != getRegOperandSize(RegOpRec) || + isNOREXRegClass(MemOpRec) != isNOREXRegClass(RegOpRec)) + return false; + } else if (isMemoryOperand(MemOpRec) && isMemoryOperand(RegOpRec)) { + if (getMemOperandSize(MemOpRec) != getMemOperandSize(RegOpRec)) + return false; + } else if (isImmediateOperand(MemOpRec) && isImmediateOperand(RegOpRec)) { + if (MemOpRec->getValueAsDef("Type") != RegOpRec->getValueAsDef("Type")) + return false; + } else { + // Only one operand can be folded. + if (ArgFolded) + return false; + + assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec)); + ArgFolded = true; + } + } + + return true; + } + +private: + // Return true of the 2 given forms are the opposite of each other. + bool areOppositeForms(const BitsInit *RegFormBits, + const BitsInit *MemFormBits) { + uint64_t MemFormNum = getValueFromBitsInit(MemFormBits); + uint64_t RegFormNum = getValueFromBitsInit(RegFormBits); + + if ((MemFormNum == X86Local::MRM0m && RegFormNum == X86Local::MRM0r) || + (MemFormNum == X86Local::MRM1m && RegFormNum == X86Local::MRM1r) || + (MemFormNum == X86Local::MRM2m && RegFormNum == X86Local::MRM2r) || + (MemFormNum == X86Local::MRM3m && RegFormNum == X86Local::MRM3r) || + (MemFormNum == X86Local::MRM4m && RegFormNum == X86Local::MRM4r) || + (MemFormNum == X86Local::MRM5m && RegFormNum == X86Local::MRM5r) || + (MemFormNum == X86Local::MRM6m && RegFormNum == X86Local::MRM6r) || + (MemFormNum == X86Local::MRM7m && RegFormNum == X86Local::MRM7r) || + (MemFormNum == X86Local::MRMXm && RegFormNum == X86Local::MRMXr) || + (MemFormNum == X86Local::MRMDestMem && + RegFormNum == X86Local::MRMDestReg) || + (MemFormNum == X86Local::MRMSrcMem && + RegFormNum == X86Local::MRMSrcReg) || + (MemFormNum == X86Local::MRMSrcMem4VOp3 && + RegFormNum == X86Local::MRMSrcReg4VOp3) || + (MemFormNum == X86Local::MRMSrcMemOp4 && + RegFormNum == X86Local::MRMSrcRegOp4)) + return true; + + return false; + } +}; + +} // end anonymous namespace + +void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table, + const CodeGenInstruction *RegInstr, + const CodeGenInstruction *MemInstr, + const UnfoldStrategy S, + const unsigned int FoldedInd) { + + X86FoldTableEntry Result = X86FoldTableEntry(RegInstr, MemInstr); + Record *RegRec = RegInstr->TheDef; + Record *MemRec = MemInstr->TheDef; + + // Only table0 entries should explicitly specify a load or store flag. + if (&Table == &Table0) { + unsigned MemInOpsNum = MemRec->getValueAsDag("InOperandList")->getNumArgs(); + unsigned RegInOpsNum = RegRec->getValueAsDag("InOperandList")->getNumArgs(); + // If the instruction writes to the folded operand, it will appear as an + // output in the register form instruction and as an input in the memory + // form instruction. + // If the instruction reads from the folded operand, it well appear as in + // input in both forms. + if (MemInOpsNum == RegInOpsNum) + Result.IsLoad = true; + else + Result.IsStore = true; + } + + Record *RegOpRec = RegInstr->Operands[FoldedInd].Rec; + Record *MemOpRec = MemInstr->Operands[FoldedInd].Rec; + + // Unfolding code generates a load/store instruction according to the size of + // the register in the register form instruction. + // If the register's size is greater than the memory's operand size, do not + // allow unfolding. + if (S == UNFOLD) + Result.CannotUnfold = false; + else if (S == NO_UNFOLD) + Result.CannotUnfold = true; + else if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec)) + Result.CannotUnfold = true; // S == NO_STRATEGY + + uint64_t Enc = getValueFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits")); + if (isExplicitAlign(RegInstr)) { + // The instruction require explicitly aligned memory. + BitsInit *VectSize = RegRec->getValueAsBitsInit("VectSize"); + uint64_t Value = getValueFromBitsInit(VectSize); + Result.IsAligned = true; + Result.Alignment = Value; + } else if (Enc != X86Local::XOP && Enc != X86Local::VEX && + Enc != X86Local::EVEX) { + // Instructions with VEX encoding do not require alignment. + if (!isExplicitUnalign(RegInstr) && getMemOperandSize(MemOpRec) > 64) { + // SSE packed vector instructions require a 16 byte alignment. + Result.IsAligned = true; + Result.Alignment = 16; + } + } + + Table.push_back(Result); +} + +void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr, + const CodeGenInstruction *MemInstr, + const UnfoldStrategy S) { + + Record *RegRec = RegInstr->TheDef; + Record *MemRec = MemInstr->TheDef; + unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs(); + unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs(); + unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs(); + unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs(); + + // Instructions which have the WriteRMW value (Read-Modify-Write) should be + // added to Table2Addr. + if (hasDefInList(MemRec, "SchedRW", "WriteRMW") && MemOutSize != RegOutSize && + MemInSize == RegInSize) { + addEntryWithFlags(Table2Addr, RegInstr, MemInstr, S, 0); + return; + } + + if (MemInSize == RegInSize && MemOutSize == RegOutSize) { + // Load-Folding cases. + // If the i'th register form operand is a register and the i'th memory form + // operand is a memory operand, add instructions to Table#i. + for (unsigned i = RegOutSize, e = RegInstr->Operands.size(); i < e; i++) { + Record *RegOpRec = RegInstr->Operands[i].Rec; + Record *MemOpRec = MemInstr->Operands[i].Rec; + if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec)) { + switch (i) { + default: llvm_unreachable("Unexpected operand count!"); + case 0: + addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0); + return; + case 1: + addEntryWithFlags(Table1, RegInstr, MemInstr, S, 1); + return; + case 2: + addEntryWithFlags(Table2, RegInstr, MemInstr, S, 2); + return; + case 3: + addEntryWithFlags(Table3, RegInstr, MemInstr, S, 3); + return; + case 4: + addEntryWithFlags(Table4, RegInstr, MemInstr, S, 4); + return; + } + } + } + } else if (MemInSize == RegInSize + 1 && MemOutSize + 1 == RegOutSize) { + // Store-Folding cases. + // If the memory form instruction performs performs a store, the *output* + // register of the register form instructions disappear and instead a + // memory *input* operand appears in the memory form instruction. + // For example: + // MOVAPSrr => (outs VR128:$dst), (ins VR128:$src) + // MOVAPSmr => (outs), (ins f128mem:$dst, VR128:$src) + Record *RegOpRec = RegInstr->Operands[RegOutSize - 1].Rec; + Record *MemOpRec = MemInstr->Operands[RegOutSize - 1].Rec; + if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec)) + addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0); + } + + return; +} + +void X86FoldTablesEmitter::run(raw_ostream &OS) { + emitSourceFileHeader("X86 fold tables", OS); + + // Holds all memory instructions + std::vector MemInsts; + // Holds all register instructions - divided according to opcode. + std::map> RegInsts; + + ArrayRef NumberedInstructions = + Target.getInstructionsByEnumValue(); + + for (const CodeGenInstruction *Inst : NumberedInstructions) { + if (!Inst->TheDef->getNameInit() || !Inst->TheDef->isSubClassOf("X86Inst")) + continue; + + const Record *Rec = Inst->TheDef; + + // - Do not proceed matching if the instruction in NoFoldSet. + // - Instructions including RST register class operands are not relevant + // for memory folding (for further details check the explanation in + // lib/Target/X86/X86InstrFPStack.td file). + // - Some instructions (listed in the manual map above) use the register + // class ptr_rc_tailcall, which can be of a size 32 or 64, to ensure + // safe mapping of these instruction we manually map them and exclude + // them from the automation. + if (find(NoFoldSet, Rec->getName()) != std::end(NoFoldSet) || + hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst)) + continue; + + // Add all the memory form instructions to MemInsts, and all the register + // form instructions to RegInsts[Opc], where Opc in the opcode of each + // instructions. this helps reducing the runtime of the backend. + if (hasMemoryFormat(Rec)) + MemInsts.push_back(Inst); + else if (hasRegisterFormat(Rec)) { + uint8_t Opc = getValueFromBitsInit(Rec->getValueAsBitsInit("Opcode")); + RegInsts[Opc].push_back(Inst); + } + } + + // For each memory form instruction, try to find its register form + // instruction. + for (const CodeGenInstruction *MemInst : MemInsts) { + uint8_t Opc = + getValueFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode")); + + if (RegInsts.count(Opc) == 0) + continue; + + // Two forms (memory & register) of the same instruction must have the same + // opcode. try matching only with register form instructions with the same + // opcode. + std::vector &OpcRegInsts = + RegInsts.find(Opc)->second; + + auto Match = find_if(OpcRegInsts, IsMatch(MemInst, Records)); + if (Match != OpcRegInsts.end()) { + const CodeGenInstruction *RegInst = *Match; + // If the matched instruction has it's "FoldGenRegForm" set, map the + // memory form instruction to the register form instruction pointed by + // this field + if (RegInst->TheDef->isValueUnset("FoldGenRegForm")) { + updateTables(RegInst, MemInst); + } else { + const CodeGenInstruction *AltRegInst = + getAltRegInst(RegInst, Records, Target); + updateTables(AltRegInst, MemInst); + } + OpcRegInsts.erase(Match); + } + } + + // Add the manually mapped instructions listed above. + for (const ManualMapEntry &Entry : ManualMapSet) { + Record *RegInstIter = Records.getDef(Entry.RegInstStr); + Record *MemInstIter = Records.getDef(Entry.MemInstStr); + + updateTables(&(Target.getInstruction(RegInstIter)), + &(Target.getInstruction(MemInstIter)), Entry.Strategy); + } + + // Print all tables to raw_ostream OS. + printTable(Table2Addr, "Table2Addr", OS); + printTable(Table0, "Table0", OS); + printTable(Table1, "Table1", OS); + printTable(Table2, "Table2", OS); + printTable(Table3, "Table3", OS); + printTable(Table4, "Table4", OS); +} + +namespace llvm { + +void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS) { + X86FoldTablesEmitter(RK).run(OS); +} +} // namespace llvm diff --git a/lib/clang/headers/Makefile b/lib/clang/headers/Makefile index ab0f7dbb0e2..710e9f4a567 100644 --- a/lib/clang/headers/Makefile +++ b/lib/clang/headers/Makefile @@ -37,6 +37,7 @@ INCS+= avx512vlbwintrin.h INCS+= avx512vlcdintrin.h INCS+= avx512vldqintrin.h INCS+= avx512vlintrin.h +INCS+= avx512vpopcntdqintrin.h INCS+= avxintrin.h INCS+= bmi2intrin.h INCS+= bmiintrin.h diff --git a/lib/clang/include/clang/Basic/Version.inc b/lib/clang/include/clang/Basic/Version.inc index 3dd67918217..8582fb63321 100644 --- a/lib/clang/include/clang/Basic/Version.inc +++ b/lib/clang/include/clang/Basic/Version.inc @@ -8,4 +8,4 @@ #define CLANG_VENDOR "FreeBSD " -#define SVN_REVISION "303571" +#define SVN_REVISION "304149" diff --git a/lib/clang/include/lld/Config/Version.inc b/lib/clang/include/lld/Config/Version.inc index 28108b02377..38c939fa0f0 100644 --- a/lib/clang/include/lld/Config/Version.inc +++ b/lib/clang/include/lld/Config/Version.inc @@ -4,5 +4,5 @@ #define LLD_VERSION_STRING "5.0.0" #define LLD_VERSION_MAJOR 5 #define LLD_VERSION_MINOR 0 -#define LLD_REVISION_STRING "303571" +#define LLD_REVISION_STRING "304149" #define LLD_REPOSITORY_STRING "FreeBSD" diff --git a/lib/clang/include/llvm/Support/VCSRevision.h b/lib/clang/include/llvm/Support/VCSRevision.h index 1418db0c70c..b1f0f34b4cb 100644 --- a/lib/clang/include/llvm/Support/VCSRevision.h +++ b/lib/clang/include/llvm/Support/VCSRevision.h @@ -1,2 +1,2 @@ /* $FreeBSD$ */ -#define LLVM_REVISION "svn-r303571" +#define LLVM_REVISION "svn-r304149" diff --git a/lib/clang/libclang/Makefile b/lib/clang/libclang/Makefile index 44735c463be..76730e156e2 100644 --- a/lib/clang/libclang/Makefile +++ b/lib/clang/libclang/Makefile @@ -234,6 +234,7 @@ SRCS_MIN+= Driver/ToolChains/Arch/PPC.cpp SRCS_MIN+= Driver/ToolChains/Arch/Sparc.cpp SRCS_MIN+= Driver/ToolChains/Arch/SystemZ.cpp SRCS_MIN+= Driver/ToolChains/Arch/X86.cpp +SRCS_MIN+= Driver/ToolChains/BareMetal.cpp SRCS_MIN+= Driver/ToolChains/Bitrig.cpp SRCS_MIN+= Driver/ToolChains/Clang.cpp SRCS_MIN+= Driver/ToolChains/CloudABI.cpp diff --git a/lib/clang/libllvm/Makefile b/lib/clang/libllvm/Makefile index 7c9fab9c259..74e96c9ce58 100644 --- a/lib/clang/libllvm/Makefile +++ b/lib/clang/libllvm/Makefile @@ -171,6 +171,7 @@ SRCS_MIN+= CodeGen/GlobalISel/InstructionSelector.cpp SRCS_MIN+= CodeGen/GlobalISel/Legalizer.cpp SRCS_MIN+= CodeGen/GlobalISel/LegalizerHelper.cpp SRCS_MIN+= CodeGen/GlobalISel/LegalizerInfo.cpp +SRCS_MIN+= CodeGen/GlobalISel/Localizer.cpp SRCS_MIN+= CodeGen/GlobalISel/MachineIRBuilder.cpp SRCS_MIN+= CodeGen/GlobalISel/RegBankSelect.cpp SRCS_MIN+= CodeGen/GlobalISel/RegisterBank.cpp @@ -346,6 +347,7 @@ SRCS_MIN+= DebugInfo/CodeView/TypeDatabase.cpp SRCS_MIN+= DebugInfo/CodeView/TypeDatabaseVisitor.cpp SRCS_MIN+= DebugInfo/CodeView/TypeDumpVisitor.cpp SRCS_MIN+= DebugInfo/CodeView/TypeIndex.cpp +#SRCS_MIN+= DebugInfo/CodeView/TypeIndexDiscovery.cpp SRCS_MIN+= DebugInfo/CodeView/TypeRecordMapping.cpp SRCS_MIN+= DebugInfo/CodeView/TypeSerializer.cpp SRCS_MIN+= DebugInfo/CodeView/TypeStreamMerger.cpp @@ -1104,6 +1106,7 @@ SRCS_MIN+= Transforms/Scalar/FlattenCFGPass.cpp SRCS_MIN+= Transforms/Scalar/Float2Int.cpp SRCS_MIN+= Transforms/Scalar/GVN.cpp SRCS_MIN+= Transforms/Scalar/GVNHoist.cpp +SRCS_MIN+= Transforms/Scalar/GVNSink.cpp SRCS_MIN+= Transforms/Scalar/GuardWidening.cpp SRCS_MIN+= Transforms/Scalar/IVUsersPrinter.cpp SRCS_MIN+= Transforms/Scalar/IndVarSimplify.cpp @@ -1278,6 +1281,7 @@ TGHDRS+= Options.inc DisassemblerTables/-gen-disassembler \ EVEX2VEXTables/-gen-x86-EVEX2VEX-tables \ FastISel/-gen-fast-isel \ + FoldTables/-gen-x86-fold-tables \ GlobalISel/-gen-global-isel \ InstrInfo/-gen-instr-info \ MCCodeEmitter/-gen-emitter \ @@ -1359,6 +1363,7 @@ TGHDRS+= X86GenDAGISel.inc TGHDRS+= X86GenDisassemblerTables.inc TGHDRS+= X86GenEVEX2VEXTables.inc TGHDRS+= X86GenFastISel.inc +TGHDRS+= X86GenFoldTables.inc TGHDRS+= X86GenGlobalISel.inc TGHDRS+= X86GenInstrInfo.inc TGHDRS+= X86GenRegisterBank.inc diff --git a/lib/libc++/Makefile b/lib/libc++/Makefile index 7cd79837abc..f7820233f8d 100644 --- a/lib/libc++/Makefile +++ b/lib/libc++/Makefile @@ -65,7 +65,12 @@ cxxrt_${_S}: ${_LIBCXXRTDIR}/${_S} .NOMETA .endfor WARNS= 0 -CFLAGS+= -isystem ${HDRDIR} -isystem ${_LIBCXXRTDIR} -nostdinc++ -nostdlib -D_LIBCPP_BUILDING_LIBRARY -DLIBCXXRT +CFLAGS+= -isystem ${HDRDIR} +CFLAGS+= -isystem ${_LIBCXXRTDIR} +CFLAGS+= -nostdinc++ +CFLAGS+= -nostdlib +CFLAGS+= -D_LIBCPP_BUILDING_LIBRARY +CFLAGS+= -DLIBCXXRT .if empty(CXXFLAGS:M-std=*) CXXFLAGS+= -std=c++11 .endif @@ -215,6 +220,7 @@ EXP_HEADERS+= __memory EXP_HEADERS+= algorithm EXP_HEADERS+= any EXP_HEADERS+= chrono +EXP_HEADERS+= coroutine EXP_HEADERS+= deque EXP_HEADERS+= dynarray EXP_HEADERS+= filesystem diff --git a/usr.bin/clang/llvm-tblgen/Makefile b/usr.bin/clang/llvm-tblgen/Makefile index 58c15a37d06..904cc62ad93 100644 --- a/usr.bin/clang/llvm-tblgen/Makefile +++ b/usr.bin/clang/llvm-tblgen/Makefile @@ -39,6 +39,7 @@ SRCS+= TableGen.cpp SRCS+= Types.cpp SRCS+= X86DisassemblerTables.cpp SRCS+= X86EVEX2VEXTablesEmitter.cpp +SRCS+= X86FoldTablesEmitter.cpp SRCS+= X86ModRMFilters.cpp SRCS+= X86RecognizableInstr.cpp