Commit 35cb3ee4 authored by Bryan Chan's avatar Bryan Chan
Browse files

[AArch64][Builtins] Avoid unnecessary cache cleaning

Use new control bits CTR_EL0.DIC and CTR_EL0.IDC to discover the d-cache
cleaning and i-cache invalidation requirements for instruction-to-data
coherence. This matches the behavior in the latest libgcc.

Author: Shaokun Zhang <zhangshaokun@hisilicon.com>

Reviewed By: peter.smith

Differential Revision: https://reviews.llvm.org/D69247
parent d2ec416c
Loading
Loading
Loading
Loading
+23 −13
Original line number Diff line number Diff line
@@ -93,24 +93,34 @@ void __clear_cache(void *start, void *end) {
#elif defined(__aarch64__) && !defined(__APPLE__)
  uint64_t xstart = (uint64_t)(uintptr_t)start;
  uint64_t xend = (uint64_t)(uintptr_t)end;
  uint64_t addr;

  // Get Cache Type Info
  uint64_t ctr_el0;
  // Get Cache Type Info.
  static uint64_t ctr_el0 = 0;
  if (ctr_el0 == 0)
    __asm __volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));

  // dc & ic instructions must use 64bit registers so we don't use
  // The DC and IC instructions must use 64-bit registers so we don't use
  // uintptr_t in case this runs in an IPL32 environment.
  uint64_t addr;

  // If CTR_EL0.IDC is set, data cache cleaning to the point of unification
  // is not required for instruction to data coherence.
  if (((ctr_el0 >> 28) & 0x1) == 0x0) {
    const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15);
    for (addr = xstart & ~(dcache_line_size - 1); addr < xend;
         addr += dcache_line_size)
      __asm __volatile("dc cvau, %0" ::"r"(addr));
  }
  __asm __volatile("dsb ish");

  // If CTR_EL0.DIC is set, instruction cache invalidation to the point of
  // unification is not required for instruction to data coherence.
  if (((ctr_el0 >> 29) & 0x1) == 0x0) {
    const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15);
    for (addr = xstart & ~(icache_line_size - 1); addr < xend;
         addr += icache_line_size)
      __asm __volatile("ic ivau, %0" ::"r"(addr));
  }
  __asm __volatile("isb sy");
#elif defined(__powerpc64__)
  const size_t line_size = 32;