Fix boot crash on aarch64 Ampere eMAG systems (rhbz #1874117), a more complete fix that's headed to 5.10

This commit is contained in:
Peter Robinson 2020-10-07 09:32:51 +01:00
parent 768279ca4c
commit f86768e230
2 changed files with 71 additions and 58 deletions

View File

@ -1,45 +1,15 @@
From 68912566d659046b12b02e5a316af3760e08eab8 Mon Sep 17 00:00:00 2001
From: Fedora Kernel Team <kernel-team@fedoraproject.org>
Date: Tue, 1 Sep 2020 18:44:00 -0400
From b0697932d03bd78bd4db6466939680c0fbdd8589 Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Tue, 15 Sep 2020 16:41:09 -0400
Subject: [PATCH] drivers/perf: xgene_pmu: Fix uninitialized resource struct
This splat was reported on newer Fedora kernels booting on certain
Ampere machines:
X-gene based machines:
xgene-pmu APMC0D83:00: X-Gene PMU version 3
Unable to handle kernel read from unreadable memory at virtual address 0000000000004006
Mem abort info:
ESR = 0x96000004
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
Data abort info:
ISV = 0, ISS = 0x00000004
CM = 0, WnR = 0
[0000000000004006] user address but active_mm is swapper
Internal error: Oops: 96000004 [#1] SMP
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.8.3-300.fc33.aarch64 #1
Hardware name: Lenovo HR350A 7X35CTO1WW /HR350A , BIOS HVE104N-1.12 11/29/2019
pstate: 00400005 (nzcv daif +PAN -UAO BTYPE=--)
pc : string+0x50/0x100
lr : vsnprintf+0x160/0x750
sp : ffff800012b4b760
x29: ffff800012b4b760 x28: 000000000000000c
x27: ffff8000113610d5 x26: ffff8000113610d5
x25: 0000000000000020 x24: 0000000000000000
x23: 00000000ffffffe8 x22: ffff800010f8e628
x21: ffff800012b4b8f0 x20: 0000000000000000
x19: 0000000000000000 x18: 00000000fffffffc
x17: 000000000000002d x16: 0000000000000001
x15: 0000000000000020 x14: 0000000000000000
x13: 0000000000000000 x12: 071c71c71c71c71c
x11: 00000000ffffff76 x10: ffff800012b4b8f0
x9 : ffff8000109e97d8 x8 : 00000000ffffffff
x7 : 000000000000000b x6 : 0000000000000000
x5 : 0000000000000000 x4 : 0000000000000000
x3 : ffff0a00ffffff04 x2 : 0000000000004006
x1 : ffffffffffffffff x0 : 000000000000000c
Unable to handle kernel read from unreadable memory at virtual \
address 0000000000004006
...
Call trace:
string+0x50/0x100
vsnprintf+0x160/0x750
@ -72,38 +42,80 @@ Ampere machines:
ret_from_fork+0x10/0x18
Code: 91000400 110004e1 eb08009f 540000c0 (38646846)
---[ end trace f08c10566496a703 ]---
Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
SMP: stopping secondary CPUs
Kernel Offset: 0x40000 from 0xffff800010000000
PHYS_OFFSET: 0x80000000
CPU features: 0x240002,20802008
Memory Limit: none
This was due to a local struct resource variable in acpi_get_pmu_hw_inf().
A pointer to that struct makes it's way to __devm_ioremap_resource()
where the name field is passed to devm_kasprintf() and dereferenced.
The struct was never initialized, so the name pointer is whatever
happened to be on the stack. This has been the case since the original
checkin of xgene_pmu.c, but it was a recent change to which added the
use of the name field.
This is due to use of an uninitialized local resource struct in the xgene
pmu driver. The thunderx2_pmu driver avoids this by using the resource list
constructed by acpi_dev_get_resources() rather than using a callback from
that function. The callback in the xgene driver didn't fully initialize
the resource. So get rid of the callback and search the resource list as
done by thunderx2.
Fixes: 832c927d119b ("perf: xgene: Add APM X-Gene SoC Performance Monitoring Unit driver")
Signed-off-by: Mark Salter <msalter@redhat.com>
Link: https://lore.kernel.org/r/20200915204110.326138-1-msalter@redhat.com
Signed-off-by: Will Deacon <will@kernel.org>
---
drivers/perf/xgene_pmu.c | 1 +
1 file changed, 1 insertion(+)
drivers/perf/xgene_pmu.c | 32 +++++++++++++++++---------------
1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c
index edac28c..fdbbd08 100644
index edac28cd25dd..633cf07ba672 100644
--- a/drivers/perf/xgene_pmu.c
+++ b/drivers/perf/xgene_pmu.c
@@ -1483,6 +1483,7 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
@@ -1453,17 +1453,6 @@ static char *xgene_pmu_dev_name(struct device *dev, u32 type, int id)
}
#if defined(CONFIG_ACPI)
-static int acpi_pmu_dev_add_resource(struct acpi_resource *ares, void *data)
-{
- struct resource *res = data;
-
- if (ares->type == ACPI_RESOURCE_TYPE_FIXED_MEMORY32)
- acpi_dev_resource_memory(ares, res);
-
- /* Always tell the ACPI core to skip this resource */
- return 1;
-}
-
static struct
xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
struct acpi_device *adev, u32 type)
@@ -1475,6 +1464,7 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
struct hw_pmu_info *inf;
void __iomem *dev_csr;
struct resource res;
+ struct resource_entry *rentry;
int enable_bit;
int rc;
@@ -1483,11 +1473,23 @@ xgene_pmu_dev_ctx *acpi_get_pmu_hw_inf(struct xgene_pmu *xgene_pmu,
return NULL;
INIT_LIST_HEAD(&resource_list);
+ memset(&res, 0, sizeof(res));
rc = acpi_dev_get_resources(adev, &resource_list,
acpi_pmu_dev_add_resource, &res);
- rc = acpi_dev_get_resources(adev, &resource_list,
- acpi_pmu_dev_add_resource, &res);
+ rc = acpi_dev_get_resources(adev, &resource_list, NULL, NULL);
+ if (rc <= 0) {
+ dev_err(dev, "PMU type %d: No resources found\n", type);
+ return NULL;
+ }
+
+ list_for_each_entry(rentry, &resource_list, node) {
+ if (resource_type(rentry->res) == IORESOURCE_MEM) {
+ res = *rentry->res;
+ rentry = NULL;
+ break;
+ }
+ }
acpi_dev_free_resource_list(&resource_list);
- if (rc < 0) {
- dev_err(dev, "PMU type %d: No resource address found\n", type);
+
+ if (rentry) {
+ dev_err(dev, "PMU type %d: No memory resource found\n", type);
return NULL;
}
--
2.26.0
2.26.2

View File

@ -3011,6 +3011,7 @@ fi
%changelog
* Wed Oct 7 2020 Peter Robinson <pbrobinson@fedoraproject.org>
- Fix aarch64 boot crash on BTI capable systems
- Fix boot crash on aarch64 Ampere eMAG systems (rhbz #1874117)
* Thu Oct 1 12:09:16 CDT 2020 Justin M. Forbes <jforbes@fedoraproject.org> - 5.8.13-300
- Linux v5.8.13