powerpc: Fix the NUMA domain list on powernv

Summary:
Consolidate the NUMA associativity handling into a platform function.
Non-NUMA platforms will just fall back to the default (0).  Currently
only implemented for powernv, which uses a lookup table to map the
device tree associativity into a system NUMA domain.

Fixes hangs on powernv after r356534, and corrects a fairly longstanding
bug in powernv's NUMA handling, which ended up using domains 1 and 2 for
devices and memory on power9, while CPUs were bound to domains 0 and 1.

Reviewed by:	bdragon, luporl
Differential Revision:	https://reviews.freebsd.org/D23220
This commit is contained in:
Justin Hibbits 2020-01-18 01:26:54 +00:00
parent 432ff6eead
commit 490ebb8f35
7 changed files with 79 additions and 42 deletions

View file

@ -37,6 +37,9 @@
#include <dev/ofw/openfirm.h>
#include <machine/platform.h>
struct mem_region;
struct numa_mem_region;
typedef uint32_t cell_t;
void OF_getetheraddr(device_t dev, u_char *addr);

View file

@ -37,6 +37,7 @@
#ifndef _MACHINE_PLATFORM_H_
#define _MACHINE_PLATFORM_H_
#include <machine/ofw_machdep.h>
#include <machine/smp.h>
#include <machine/pcpu.h>
@ -66,6 +67,7 @@ int platform_smp_start_cpu(struct pcpu *);
void platform_smp_timebase_sync(u_long tb, int ap);
void platform_smp_ap_init(void);
void platform_smp_probe_threads(void);
int platform_node_numa_domain(phandle_t);
const char *installed_platform(void);
void platform_probe_and_attach(void);

View file

@ -466,9 +466,8 @@ void
ofw_numa_mem_regions(struct numa_mem_region *memp, int *memsz)
{
phandle_t phandle;
int res, count, msz;
int count, msz;
char name[31];
cell_t associativity[5];
struct numa_mem_region *curmemp;
msz = 0;
@ -486,13 +485,8 @@ ofw_numa_mem_regions(struct numa_mem_region *memp, int *memsz)
if (count == 0)
continue;
curmemp = &memp[msz];
res = OF_getproplen(phandle, "ibm,associativity");
if (res <= 0)
continue;
MPASS(count == 1);
OF_getencprop(phandle, "ibm,associativity",
associativity, res);
curmemp->mr_domain = associativity[3];
curmemp->mr_domain = platform_node_numa_domain(phandle);
if (bootverbose)
printf("%s %#jx-%#jx domain(%ju)\n",
name, (uintmax_t)curmemp->mr_start,

View file

@ -385,39 +385,13 @@ ofw_pcibus_get_devinfo(device_t bus, device_t dev)
return (&dinfo->opd_obdinfo);
}
static int
ofw_pcibus_parse_associativity(device_t dev, int *domain)
{
phandle_t node;
cell_t associativity[5];
int res;
if ((node = ofw_bus_get_node(dev)) == -1) {
if (bootverbose)
device_printf(dev, "no ofw node found\n");
return (ENXIO);
}
res = OF_getproplen(node, "ibm,associativity");
if (res <= 0)
return (ENXIO);
OF_getencprop(node, "ibm,associativity",
associativity, res);
*domain = associativity[3];
if (bootverbose)
device_printf(dev, "domain(%d)\n", *domain);
return (0);
}
int
ofw_pcibus_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsize,
cpuset_t *cpuset)
{
int d, error;
error = ofw_pcibus_parse_associativity(child, &d);
if (error)
return (bus_generic_get_cpus(dev, child, op, setsize, cpuset));
d = platform_node_numa_domain(ofw_bus_get_node(dev));
switch (op) {
case LOCAL_CPUS:
@ -450,12 +424,7 @@ ofw_pcibus_get_cpus(device_t dev, device_t child, enum cpu_sets op, size_t setsi
int
ofw_pcibus_get_domain(device_t dev, device_t child, int *domain)
{
int d, error;
*domain = platform_node_numa_domain(ofw_bus_get_node(child));
error = ofw_pcibus_parse_associativity(child, &d);
/* No ofw node; go up a level */
if (error)
return (bus_generic_get_domain(dev, child, domain));
*domain = d;
return (0);
}

View file

@ -79,6 +79,7 @@ static struct cpu_group *powernv_smp_topo(platform_t plat);
static void powernv_reset(platform_t);
static void powernv_cpu_idle(sbintime_t sbt);
static int powernv_cpuref_init(void);
static int powernv_node_numa_domain(platform_t platform, phandle_t node);
static platform_method_t powernv_methods[] = {
PLATFORMMETHOD(platform_probe, powernv_probe),
@ -96,6 +97,7 @@ static platform_method_t powernv_methods[] = {
PLATFORMMETHOD(platform_smp_probe_threads, powernv_smp_probe_threads),
PLATFORMMETHOD(platform_smp_topo, powernv_smp_topo),
#endif
PLATFORMMETHOD(platform_node_numa_domain, powernv_node_numa_domain),
PLATFORMMETHOD(platform_reset, powernv_reset),
@ -111,6 +113,7 @@ static platform_def_t powernv_platform = {
static struct cpuref platform_cpuref[MAXCPU];
static int platform_cpuref_cnt;
static int platform_cpuref_valid;
static int platform_associativity;
PLATFORM_DEF(powernv_platform);
@ -131,8 +134,10 @@ powernv_attach(platform_t plat)
uint32_t nptlp, shift = 0, slb_encoding = 0;
int32_t lp_size, lp_encoding;
char buf[255];
pcell_t refpoints[3];
pcell_t prop;
phandle_t cpu;
phandle_t opal;
int res, len, idx;
register_t msr;
@ -144,6 +149,13 @@ powernv_attach(platform_t plat)
#else
opal_call(OPAL_REINIT_CPUS, 1 /* Big endian */);
#endif
opal = OF_finddevice("/ibm,opal");
platform_associativity = 4; /* Skiboot default. */
if (OF_getencprop(opal, "ibm,associativity-reference-points", refpoints,
sizeof(refpoints)) > 0) {
platform_associativity = refpoints[0];
}
if (cpu_idle_hook == NULL)
cpu_idle_hook = powernv_cpu_idle;
@ -328,7 +340,8 @@ powernv_cpuref_init(void)
for (a = 0; a < res/sizeof(cell_t); a++) {
tmp_cpuref[tmp_cpuref_cnt].cr_hwref = interrupt_servers[a];
tmp_cpuref[tmp_cpuref_cnt].cr_cpuid = tmp_cpuref_cnt;
tmp_cpuref[tmp_cpuref_cnt].cr_domain = interrupt_servers[a] >> 11;
tmp_cpuref[tmp_cpuref_cnt].cr_domain =
powernv_node_numa_domain(NULL, cpu);
if (interrupt_servers[a] == (uint32_t)powernv_boot_pir)
bsp = tmp_cpuref_cnt;
@ -495,6 +508,40 @@ powernv_cpu_idle(sbintime_t sbt)
{
}
static int
powernv_node_numa_domain(platform_t platform, phandle_t node)
{
/* XXX: Is locking necessary in here? */
static int numa_domains[MAXMEMDOM];
static int numa_max_domain;
cell_t associativity[5];
int i, res;
res = OF_getproplen(node, "ibm,associativity");
/* If already at the root, use default domain. */
if (res == 0)
return (0);
else if (res < 0)
/* If this node doesn't have associativity, check its parent. */
return (powernv_node_numa_domain(platform, OF_parent(node)));
OF_getencprop(node, "ibm,associativity",
associativity, res);
for (i = 0; i < numa_max_domain; i++) {
if (numa_domains[i] == associativity[platform_associativity])
return (i);
}
if (i < MAXMEMDOM)
numa_domains[numa_max_domain++] =
associativity[platform_associativity];
else
i = 0;
return (i);
}
/* Set up the Nest MMU on POWER9 relatively early, but after pmap is setup. */
static void
powernv_setup_nmmu(void *unused)

View file

@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <machine/cpu.h>
#include <machine/md_var.h>
#include <machine/ofw_machdep.h>
#include <machine/platform.h>
#include <machine/platformvar.h>
#include <machine/smp.h>
@ -310,6 +311,12 @@ cpu_topo(void)
}
#endif
int
platform_node_numa_domain(phandle_t node)
{
return (PLATFORM_NODE_NUMA_DOMAIN(plat_obj, node));
}
/*
* Reset back to firmware.
*/

View file

@ -32,6 +32,7 @@
#include <sys/systm.h>
#include <sys/smp.h>
#include <machine/ofw_machdep.h>
#include <machine/platform.h>
#include <machine/platformvar.h>
#include <machine/smp.h>
@ -88,6 +89,11 @@ CODE {
{
return;
}
static int platform_null_node_numa_domain(platform_t plat,
phandle_t node)
{
return (0);
}
};
/**
@ -255,3 +261,12 @@ METHOD void smp_timebase_sync {
int _ap;
};
/**
* @brief Return the NUMA domain for the given device tree node. Always returns
* a valid domain.
*
*/
METHOD int node_numa_domain {
platform_t _plat;
phandle_t _node;
} DEFAULT platform_null_node_numa_domain;