From 6d5fda683bafa6d3933d1d2499a3fffc16f1beed Mon Sep 17 00:00:00 2001 From: Peter Robinson Date: Mon, 7 Oct 2019 12:50:12 +0100 Subject: [PATCH] drop nouveau on Tegra210/GP10B as it breaks others platforms --- drm-nouveau-Enable-GP10B-by-default.patch | 1275 --------------------- kernel.spec | 4 +- 2 files changed, 1 insertion(+), 1278 deletions(-) delete mode 100644 drm-nouveau-Enable-GP10B-by-default.patch diff --git a/drm-nouveau-Enable-GP10B-by-default.patch b/drm-nouveau-Enable-GP10B-by-default.patch deleted file mode 100644 index e44a5b339..000000000 --- a/drm-nouveau-Enable-GP10B-by-default.patch +++ /dev/null @@ -1,1275 +0,0 @@ -From patchwork Mon Sep 16 15:04:02 2019 -Content-Type: text/plain; charset="utf-8" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Subject: [01/11] drm/nouveau: tegra: Avoid pulsing reset twice -From: Thierry Reding -X-Patchwork-Id: 331044 -Message-Id: <20190916150412.10025-2-thierry.reding@gmail.com> -To: Ben Skeggs , Thierry Reding -Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, - dri-devel@lists.freedesktop.org -Date: Mon, 16 Sep 2019 17:04:02 +0200 - -From: Thierry Reding - -When the GPU powergate is controlled by a generic power domain provider, -the reset will automatically be asserted and deasserted as part of the -power-ungating procedure. - -On some Jetson TX2 boards, doing an additional assert and deassert of -the GPU outside of the power-ungate procedure can cause the GPU to go -into a bad state where the memory interface can no longer access system -memory. - -Signed-off-by: Thierry Reding ---- - drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 12 ++++++------ - 1 file changed, 6 insertions(+), 6 deletions(-) - -diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c -index 0e372a190d3f..747a775121cf 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c -+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c -@@ -52,18 +52,18 @@ nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev) - clk_set_rate(tdev->clk_pwr, 204000000); - udelay(10); - -- reset_control_assert(tdev->rst); -- udelay(10); -- - if (!tdev->pdev->dev.pm_domain) { -+ reset_control_assert(tdev->rst); -+ udelay(10); -+ - ret = tegra_powergate_remove_clamping(TEGRA_POWERGATE_3D); - if (ret) - goto err_clamp; - udelay(10); -- } - -- reset_control_deassert(tdev->rst); -- udelay(10); -+ reset_control_deassert(tdev->rst); -+ udelay(10); -+ } - - return 0; - - -From patchwork Mon Sep 16 15:04:03 2019 -Content-Type: text/plain; charset="utf-8" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Subject: [02/11] drm/nouveau: tegra: Set clock rate if not set -From: Thierry Reding -X-Patchwork-Id: 331046 -Message-Id: <20190916150412.10025-3-thierry.reding@gmail.com> -To: Ben Skeggs , Thierry Reding -Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, - dri-devel@lists.freedesktop.org -Date: Mon, 16 Sep 2019 17:04:03 +0200 - -From: Thierry Reding - -If the GPU clock has not had a rate set, initialize it to the maximum -clock rate to make sure it does run. - -Signed-off-by: Thierry Reding ---- - drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c | 12 ++++++++++++ - 1 file changed, 12 insertions(+) - -diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c -index 747a775121cf..d0d52c1d4aee 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c -+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c -@@ -279,6 +279,7 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func, - struct nvkm_device **pdevice) - { - struct nvkm_device_tegra *tdev; -+ unsigned long rate; - int ret; - - if (!(tdev = kzalloc(sizeof(*tdev), GFP_KERNEL))) -@@ -307,6 +308,17 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func, - goto free; - } - -+ rate = clk_get_rate(tdev->clk); -+ if (rate == 0) { -+ ret = clk_set_rate(tdev->clk, ULONG_MAX); -+ if (ret < 0) -+ goto free; -+ -+ rate = clk_get_rate(tdev->clk); -+ -+ dev_dbg(&pdev->dev, "GPU clock set to %lu\n", rate); -+ } -+ - if (func->require_ref_clk) - tdev->clk_ref = devm_clk_get(&pdev->dev, "ref"); - if (IS_ERR(tdev->clk_ref)) { - -From patchwork Mon Sep 16 15:04:04 2019 -Content-Type: text/plain; charset="utf-8" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Subject: [03/11] drm/nouveau: secboot: Read WPR configuration from GPU - registers -From: Thierry Reding -X-Patchwork-Id: 331048 -Message-Id: <20190916150412.10025-4-thierry.reding@gmail.com> -To: Ben Skeggs , Thierry Reding -Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, - dri-devel@lists.freedesktop.org -Date: Mon, 16 Sep 2019 17:04:04 +0200 - -From: Thierry Reding - -The GPUs found on Tegra SoCs have registers that can be used to read the -WPR configuration. Use these registers instead of reaching into the -memory controller's register space to read the same information. - -Signed-off-by: Thierry Reding ---- - .../drm/nouveau/nvkm/subdev/secboot/gm200.h | 2 +- - .../drm/nouveau/nvkm/subdev/secboot/gm20b.c | 81 ++++++++++++------- - .../drm/nouveau/nvkm/subdev/secboot/gp10b.c | 4 +- - 3 files changed, 53 insertions(+), 34 deletions(-) - -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h -index 62c5e162099a..280b1448df88 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h -@@ -41,6 +41,6 @@ int gm200_secboot_run_blob(struct nvkm_secboot *, struct nvkm_gpuobj *, - struct nvkm_falcon *); - - /* Tegra-only */ --int gm20b_secboot_tegra_read_wpr(struct gm200_secboot *, u32); -+int gm20b_secboot_tegra_read_wpr(struct gm200_secboot *); - - #endif -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c -index df8b919dcf09..f8a543122219 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c -@@ -23,39 +23,65 @@ - #include "acr.h" - #include "gm200.h" - --#define TEGRA210_MC_BASE 0x70019000 -- - #ifdef CONFIG_ARCH_TEGRA --#define MC_SECURITY_CARVEOUT2_CFG0 0xc58 --#define MC_SECURITY_CARVEOUT2_BOM_0 0xc5c --#define MC_SECURITY_CARVEOUT2_BOM_HI_0 0xc60 --#define MC_SECURITY_CARVEOUT2_SIZE_128K 0xc64 --#define TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED (1 << 1) - /** - * gm20b_secboot_tegra_read_wpr() - read the WPR registers on Tegra - * -- * On dGPU, we can manage the WPR region ourselves, but on Tegra the WPR region -- * is reserved from system memory by the bootloader and irreversibly locked. -- * This function reads the address and size of the pre-configured WPR region. -+ * On dGPU, we can manage the WPR region ourselves, but on Tegra this region -+ * is allocated from system memory by the secure firmware. The region is then -+ * marked as a "secure carveout" and irreversibly locked. Furthermore, the WPR -+ * secure carveout is also configured to be sent to the GPU via a dedicated -+ * serial bus between the memory controller and the GPU. The GPU requests this -+ * information upon leaving reset and exposes it through a FIFO register at -+ * offset 0x100cd4. -+ * -+ * The FIFO register's lower 4 bits can be used to set the read index into the -+ * FIFO. After each read of the FIFO register, the read index is incremented. -+ * -+ * Indices 2 and 3 contain the lower and upper addresses of the WPR. These are -+ * stored in units of 256 B. The WPR is inclusive of both addresses. -+ * -+ * Unfortunately, for some reason the WPR info register doesn't contain the -+ * correct values for the secure carveout. It seems like the upper address is -+ * always too small by 128 KiB - 1. Given that the secure carvout size in the -+ * memory controller configuration is specified in units of 128 KiB, it's -+ * possible that the computation of the upper address of the WPR is wrong and -+ * causes this difference. - */ - int --gm20b_secboot_tegra_read_wpr(struct gm200_secboot *gsb, u32 mc_base) -+gm20b_secboot_tegra_read_wpr(struct gm200_secboot *gsb) - { -+ struct nvkm_device *device = gsb->base.subdev.device; - struct nvkm_secboot *sb = &gsb->base; -- void __iomem *mc; -- u32 cfg; -+ u64 base, limit; -+ u32 value; - -- mc = ioremap(mc_base, 0xd00); -- if (!mc) { -- nvkm_error(&sb->subdev, "Cannot map Tegra MC registers\n"); -- return -ENOMEM; -- } -- sb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) | -- ((u64)ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_HI_0) << 32); -- sb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K) -- << 17; -- cfg = ioread32_native(mc + MC_SECURITY_CARVEOUT2_CFG0); -- iounmap(mc); -+ /* set WPR info register to point at WPR base address register */ -+ value = nvkm_rd32(device, 0x100cd4); -+ value &= ~0xf; -+ value |= 0x2; -+ nvkm_wr32(device, 0x100cd4, value); -+ -+ /* read base address */ -+ value = nvkm_rd32(device, 0x100cd4); -+ base = (u64)(value >> 4) << 12; -+ -+ /* read limit */ -+ value = nvkm_rd32(device, 0x100cd4); -+ limit = (u64)(value >> 4) << 12; -+ -+ /* -+ * The upper address of the WPR seems to be computed wrongly and is -+ * actually SZ_128K - 1 bytes lower than it should be. Adjust the -+ * value accordingly. -+ */ -+ limit += SZ_128K - 1; -+ -+ sb->wpr_size = limit - base + 1; -+ sb->wpr_addr = base; -+ -+ nvkm_info(&sb->subdev, "WPR: %016llx-%016llx\n", sb->wpr_addr, -+ sb->wpr_addr + sb->wpr_size - 1); - - /* Check that WPR settings are valid */ - if (sb->wpr_size == 0) { -@@ -63,11 +89,6 @@ gm20b_secboot_tegra_read_wpr(struct gm200_secboot *gsb, u32 mc_base) - return -EINVAL; - } - -- if (!(cfg & TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED)) { -- nvkm_error(&sb->subdev, "WPR region not locked\n"); -- return -EINVAL; -- } -- - return 0; - } - #else -@@ -85,7 +106,7 @@ gm20b_secboot_oneinit(struct nvkm_secboot *sb) - struct gm200_secboot *gsb = gm200_secboot(sb); - int ret; - -- ret = gm20b_secboot_tegra_read_wpr(gsb, TEGRA210_MC_BASE); -+ ret = gm20b_secboot_tegra_read_wpr(gsb); - if (ret) - return ret; - -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c -index 28ca29d0eeee..d84e85825995 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp10b.c -@@ -23,15 +23,13 @@ - #include "acr.h" - #include "gm200.h" - --#define TEGRA186_MC_BASE 0x02c10000 -- - static int - gp10b_secboot_oneinit(struct nvkm_secboot *sb) - { - struct gm200_secboot *gsb = gm200_secboot(sb); - int ret; - -- ret = gm20b_secboot_tegra_read_wpr(gsb, TEGRA186_MC_BASE); -+ ret = gm20b_secboot_tegra_read_wpr(gsb); - if (ret) - return ret; - - -From patchwork Mon Sep 16 15:04:05 2019 -Content-Type: text/plain; charset="utf-8" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Subject: [04/11] drm/nouveau: gp10b: Add custom L2 cache implementation -From: Thierry Reding -X-Patchwork-Id: 331049 -Message-Id: <20190916150412.10025-5-thierry.reding@gmail.com> -To: Ben Skeggs , Thierry Reding -Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, - dri-devel@lists.freedesktop.org -Date: Mon, 16 Sep 2019 17:04:05 +0200 - -From: Thierry Reding - -There are extra registers that need to be programmed to make the level 2 -cache work on GP10B, such as the stream ID register that is used when an -SMMU is used to translate memory addresses. - -Signed-off-by: Thierry Reding ---- - .../gpu/drm/nouveau/include/nvkm/subdev/ltc.h | 1 + - .../gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +- - .../gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild | 1 + - .../gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c | 69 +++++++++++++++++++ - .../gpu/drm/nouveau/nvkm/subdev/ltc/priv.h | 2 + - 5 files changed, 74 insertions(+), 1 deletion(-) - create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c - -diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h -index 644d527c3b96..d76f60d7d29a 100644 ---- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h -+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h -@@ -40,4 +40,5 @@ int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); - int gm200_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); - int gp100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); - int gp102_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); -+int gp10b_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **); - #endif -diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c -index c3c7159f3411..d2d6d5f4028a 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c -+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c -@@ -2380,7 +2380,7 @@ nv13b_chipset = { - .fuse = gm107_fuse_new, - .ibus = gp10b_ibus_new, - .imem = gk20a_instmem_new, -- .ltc = gp102_ltc_new, -+ .ltc = gp10b_ltc_new, - .mc = gp10b_mc_new, - .mmu = gp10b_mmu_new, - .secboot = gp10b_secboot_new, -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild -index 2b6d36ea7067..728d75010847 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild -@@ -6,3 +6,4 @@ nvkm-y += nvkm/subdev/ltc/gm107.o - nvkm-y += nvkm/subdev/ltc/gm200.o - nvkm-y += nvkm/subdev/ltc/gp100.o - nvkm-y += nvkm/subdev/ltc/gp102.o -+nvkm-y += nvkm/subdev/ltc/gp10b.o -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c -new file mode 100644 -index 000000000000..4d27c6ea1552 ---- /dev/null -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c -@@ -0,0 +1,69 @@ -+/* -+ * Copyright (c) 2019 NVIDIA Corporation. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -+ * OTHER DEALINGS IN THE SOFTWARE. -+ * -+ * Authors: Thierry Reding -+ */ -+ -+#include "priv.h" -+ -+static void -+gp10b_ltc_init(struct nvkm_ltc *ltc) -+{ -+ struct nvkm_device *device = ltc->subdev.device; -+#ifdef CONFIG_IOMMU_API -+ struct iommu_fwspec *spec; -+#endif -+ -+ nvkm_wr32(device, 0x17e27c, ltc->ltc_nr); -+ nvkm_wr32(device, 0x17e000, ltc->ltc_nr); -+ nvkm_wr32(device, 0x100800, ltc->ltc_nr); -+ -+#ifdef CONFIG_IOMMU_API -+ spec = dev_iommu_fwspec_get(device->dev); -+ if (spec) { -+ u32 sid = spec->ids[0] & 0xffff; -+ -+ /* stream ID */ -+ nvkm_wr32(device, 0x160000, sid << 2); -+ } -+#endif -+} -+ -+static const struct nvkm_ltc_func -+gp10b_ltc = { -+ .oneinit = gp100_ltc_oneinit, -+ .init = gp10b_ltc_init, -+ .intr = gp100_ltc_intr, -+ .cbc_clear = gm107_ltc_cbc_clear, -+ .cbc_wait = gm107_ltc_cbc_wait, -+ .zbc = 16, -+ .zbc_clear_color = gm107_ltc_zbc_clear_color, -+ .zbc_clear_depth = gm107_ltc_zbc_clear_depth, -+ .zbc_clear_stencil = gp102_ltc_zbc_clear_stencil, -+ .invalidate = gf100_ltc_invalidate, -+ .flush = gf100_ltc_flush, -+}; -+ -+int -+gp10b_ltc_new(struct nvkm_device *device, int index, struct nvkm_ltc **pltc) -+{ -+ return nvkm_ltc_new_(&gp10b_ltc, device, index, pltc); -+} -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h -index 2fcf18e46ce3..eca5a711b1b8 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h -@@ -46,4 +46,6 @@ void gm107_ltc_zbc_clear_depth(struct nvkm_ltc *, int, const u32); - int gp100_ltc_oneinit(struct nvkm_ltc *); - void gp100_ltc_init(struct nvkm_ltc *); - void gp100_ltc_intr(struct nvkm_ltc *); -+ -+void gp102_ltc_zbc_clear_stencil(struct nvkm_ltc *, int, const u32); - #endif - -From patchwork Mon Sep 16 15:04:06 2019 -Content-Type: text/plain; charset="utf-8" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Subject: [05/11] drm/nouveau: gp10b: Use correct copy engine -From: Thierry Reding -X-Patchwork-Id: 331052 -Message-Id: <20190916150412.10025-6-thierry.reding@gmail.com> -To: Ben Skeggs , Thierry Reding -Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, - dri-devel@lists.freedesktop.org -Date: Mon, 16 Sep 2019 17:04:06 +0200 - -From: Thierry Reding - -gp10b uses the new engine enumeration mechanism introduced in the Pascal -architecture. As a result, the copy engine, which used to be at index 2 -for prior Tegra GPU instantiations, has now moved to index 0. Fix up the -index and also use the gp100 variant of the copy engine class because on -gp10b the PASCAL_DMA_COPY_B class is not supported. - -Signed-off-by: Thierry Reding ---- - drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c -index d2d6d5f4028a..99d3fa3fad89 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c -+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c -@@ -2387,7 +2387,7 @@ nv13b_chipset = { - .pmu = gm20b_pmu_new, - .timer = gk20a_timer_new, - .top = gk104_top_new, -- .ce[2] = gp102_ce_new, -+ .ce[0] = gp100_ce_new, - .dma = gf119_dma_new, - .fifo = gp10b_fifo_new, - .gr = gp10b_gr_new, - -From patchwork Mon Sep 16 15:04:07 2019 -Content-Type: text/plain; charset="utf-8" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Subject: [06/11] drm/nouveau: gk20a: Set IOMMU bit for DMA API if appropriate -From: Thierry Reding -X-Patchwork-Id: 331053 -Message-Id: <20190916150412.10025-7-thierry.reding@gmail.com> -To: Ben Skeggs , Thierry Reding -Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, - dri-devel@lists.freedesktop.org -Date: Mon, 16 Sep 2019 17:04:07 +0200 - -From: Thierry Reding - -Detect if the DMA API is backed by an IOMMU and set the IOMMU bit if so. -This is needed to make sure IOMMU addresses are properly translated even -the explicit IOMMU API is not used. - -Signed-off-by: Thierry Reding ---- - .../drm/nouveau/nvkm/subdev/instmem/gk20a.c | 35 +++++++++++++------ - 1 file changed, 25 insertions(+), 10 deletions(-) - -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c -index b0493f8df1fe..1120a2a7d5f1 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c -@@ -100,12 +100,14 @@ struct gk20a_instmem { - unsigned int vaddr_max; - struct list_head vaddr_lru; - -+ /* IOMMU mapping */ -+ unsigned int page_shift; -+ u64 iommu_mask; -+ - /* Only used if IOMMU if present */ - struct mutex *mm_mutex; - struct nvkm_mm *mm; - struct iommu_domain *domain; -- unsigned long iommu_pgshift; -- u16 iommu_bit; - - /* Only used by DMA API */ - unsigned long attrs; -@@ -357,12 +359,12 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory) - mutex_unlock(&imem->lock); - - /* clear IOMMU bit to unmap pages */ -- r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift); -+ r->offset &= ~imem->iommu_mask; - - /* Unmap pages from GPU address space and free them */ - for (i = 0; i < node->base.mn->length; i++) { - iommu_unmap(imem->domain, -- (r->offset + i) << imem->iommu_pgshift, PAGE_SIZE); -+ (r->offset + i) << imem->page_shift, PAGE_SIZE); - dma_unmap_page(dev, node->dma_addrs[i], PAGE_SIZE, - DMA_BIDIRECTIONAL); - __free_page(node->pages[i]); -@@ -440,7 +442,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, - - /* present memory for being mapped using small pages */ - node->r.type = 12; -- node->r.offset = node->handle >> 12; -+ node->r.offset = imem->iommu_mask | node->handle >> 12; - node->r.length = (npages << PAGE_SHIFT) >> 12; - - node->base.mn = &node->r; -@@ -493,7 +495,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, - mutex_lock(imem->mm_mutex); - /* Reserve area from GPU address space */ - ret = nvkm_mm_head(imem->mm, 0, 1, npages, npages, -- align >> imem->iommu_pgshift, &r); -+ align >> imem->page_shift, &r); - mutex_unlock(imem->mm_mutex); - if (ret) { - nvkm_error(subdev, "IOMMU space is full!\n"); -@@ -502,7 +504,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, - - /* Map into GPU address space */ - for (i = 0; i < npages; i++) { -- u32 offset = (r->offset + i) << imem->iommu_pgshift; -+ u32 offset = (r->offset + i) << imem->page_shift; - - ret = iommu_map(imem->domain, offset, node->dma_addrs[i], - PAGE_SIZE, IOMMU_READ | IOMMU_WRITE); -@@ -518,7 +520,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, - } - - /* IOMMU bit tells that an address is to be resolved through the IOMMU */ -- r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift); -+ r->offset |= imem->iommu_mask; - - node->base.mn = r; - return 0; -@@ -619,11 +621,12 @@ gk20a_instmem_new(struct nvkm_device *device, int index, - imem->mm_mutex = &tdev->iommu.mutex; - imem->mm = &tdev->iommu.mm; - imem->domain = tdev->iommu.domain; -- imem->iommu_pgshift = tdev->iommu.pgshift; -- imem->iommu_bit = tdev->func->iommu_bit; -+ imem->page_shift = tdev->iommu.pgshift; - - nvkm_info(&imem->base.subdev, "using IOMMU\n"); - } else { -+ imem->page_shift = PAGE_SHIFT; -+ - imem->attrs = DMA_ATTR_NON_CONSISTENT | - DMA_ATTR_WEAK_ORDERING | - DMA_ATTR_WRITE_COMBINE; -@@ -631,5 +634,17 @@ gk20a_instmem_new(struct nvkm_device *device, int index, - nvkm_info(&imem->base.subdev, "using DMA API\n"); - } - -+ /* -+ * The IOMMU mask needs to be set if an IOMMU is used explicitly (via -+ * direct IOMMU API usage) or implicitly (via the DMA API). In both -+ * cases the device will have been attached to an IOMMU domain. -+ */ -+ if (iommu_get_domain_for_dev(device->dev)) { -+ imem->iommu_mask = BIT_ULL(tdev->func->iommu_bit - -+ imem->page_shift); -+ nvkm_debug(&imem->base.subdev, "IOMMU mask: %016llx\n", -+ imem->iommu_mask); -+ } -+ - return 0; - } - -From patchwork Mon Sep 16 15:04:08 2019 -Content-Type: text/plain; charset="utf-8" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Subject: [07/11] drm/nouveau: gk20a: Implement custom MMU class -From: Thierry Reding -X-Patchwork-Id: 331057 -Message-Id: <20190916150412.10025-8-thierry.reding@gmail.com> -To: Ben Skeggs , Thierry Reding -Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, - dri-devel@lists.freedesktop.org -Date: Mon, 16 Sep 2019 17:04:08 +0200 - -From: Thierry Reding - -The GPU integrated in NVIDIA Tegra SoCs is connected to system memory -via two paths: one direct path to the memory controller and another path -that goes through a system MMU first. It's not typically necessary to go -through the system MMU because the GPU's MMU can already map buffers so -that they appear contiguous to the GPU. - -However, in order to support big pages, the system MMU has to be used to -combine multiple small pages into one virtually contiguous chunk so that -the GPU can then treat that as a single big page. - -In order to prepare for big page support, implement a custom MMU class -that takes care of setting the IOMMU bit when writing page tables and -when appropriate. - -This is also necessary to make sure that Nouveau works correctly on -Tegra devices where the GPU is connected to a system MMU and that IOMMU -is used to back the DMA API. Currently Nouveau assumes that the DMA API -is never backed by an IOMMU, so access to DMA-mapped buffers fault when -suddenly this assumption is no longer true. - -One situation where this can happen is on 32-bit Tegra SoCs where the -ARM architecture code automatically attaches the GPU with a DMA/IOMMU -domain. This is currently worked around by detaching the GPU from the -IOMMU domain at probe time. However, with Tegra186 and later this can -now also happen, but unfortunately no mechanism exists to detach from -the domain in the 64-bit ARM architecture code. - -Using this Tegra-specific MMU class ensures that DMA-mapped buffers are -properly mapped (with the IOMMU bit set) if the DMA API is backed by an -IOMMU domain. - -Signed-off-by: Thierry Reding ---- - .../gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c | 50 ++++++++++++++++++- - .../gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h | 44 ++++++++++++++++ - .../gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c | 6 ++- - .../gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c | 4 +- - drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 1 + - .../drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c | 22 +++++++- - .../drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c | 4 +- - .../drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c | 20 +++++++- - 8 files changed, 142 insertions(+), 9 deletions(-) - create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h - -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c -index ac74965a60d4..d9a5e05b7dc7 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.c -@@ -19,11 +19,59 @@ - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -+ -+#include "gk20a.h" - #include "mem.h" - #include "vmm.h" - -+#include - #include - -+static void -+gk20a_mmu_ctor(const struct nvkm_mmu_func *func, struct nvkm_device *device, -+ int index, struct gk20a_mmu *mmu) -+{ -+ struct iommu_domain *domain = iommu_get_domain_for_dev(device->dev); -+ struct nvkm_device_tegra *tegra = device->func->tegra(device); -+ -+ nvkm_mmu_ctor(func, device, index, &mmu->base); -+ -+ /* -+ * If the DMA API is backed by an IOMMU, make sure the IOMMU bit is -+ * set for all buffer accesses. If the IOMMU is explicitly used, it -+ * is only used for instance blocks and the MMU doesn't care, since -+ * buffer objects are only mapped through the MMU, not through the -+ * IOMMU. -+ * -+ * Big page support could be implemented using explicit IOMMU usage, -+ * but the DMA API already provides that for free, so we don't worry -+ * about it for now. -+ */ -+ if (domain && !tegra->iommu.domain) { -+ mmu->iommu_mask = BIT_ULL(tegra->func->iommu_bit); -+ nvkm_debug(&mmu->base.subdev, "IOMMU mask: %llx\n", -+ mmu->iommu_mask); -+ } -+} -+ -+int -+gk20a_mmu_new_(const struct nvkm_mmu_func *func, struct nvkm_device *device, -+ int index, struct nvkm_mmu **pmmu) -+{ -+ struct gk20a_mmu *mmu; -+ -+ mmu = kzalloc(sizeof(*mmu), GFP_KERNEL); -+ if (!mmu) -+ return -ENOMEM; -+ -+ gk20a_mmu_ctor(func, device, index, mmu); -+ -+ if (pmmu) -+ *pmmu = &mmu->base; -+ -+ return 0; -+} -+ - static const struct nvkm_mmu_func - gk20a_mmu = { - .dma_bits = 40, -@@ -37,5 +85,5 @@ gk20a_mmu = { - int - gk20a_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) - { -- return nvkm_mmu_new_(&gk20a_mmu, device, index, pmmu); -+ return gk20a_mmu_new_(&gk20a_mmu, device, index, pmmu); - } -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h -new file mode 100644 -index 000000000000..bb81fc62509c ---- /dev/null -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gk20a.h -@@ -0,0 +1,44 @@ -+/* -+ * Copyright (c) 2019 NVIDIA Corporation. -+ * -+ * Permission is hereby granted, free of charge, to any person obtaining a -+ * copy of this software and associated documentation files (the "Software"), -+ * to deal in the Software without restriction, including without limitation -+ * the rights to use, copy, modify, merge, publish, distribute, sublicense, -+ * and/or sell copies of the Software, and to permit persons to whom the -+ * Software is furnished to do so, subject to the following conditions: -+ * -+ * The above copyright notice and this permission notice shall be included in -+ * all copies or substantial portions of the Software. -+ * -+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -+ * OTHER DEALINGS IN THE SOFTWARE. -+ */ -+ -+#ifndef __NVKM_MMU_GK20A_H__ -+#define __NVKM_MMU_GK20A_H__ -+ -+#include "priv.h" -+ -+struct gk20a_mmu { -+ struct nvkm_mmu base; -+ -+ /* -+ * If an IOMMU is used, indicates which address bit will trigger an -+ * IOMMU translation when set (when this bit is not set, the IOMMU is -+ * bypassed). A value of 0 means an IOMMU is never used. -+ */ -+ u64 iommu_mask; -+}; -+ -+#define gk20a_mmu(mmu) container_of(mmu, struct gk20a_mmu, base) -+ -+int gk20a_mmu_new_(const struct nvkm_mmu_func *, struct nvkm_device *, -+ int index, struct nvkm_mmu **); -+ -+#endif -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c -index 7353a94b4091..7fccd4df52a8 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gm20b.c -@@ -19,6 +19,8 @@ - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -+ -+#include "gk20a.h" - #include "mem.h" - #include "vmm.h" - -@@ -50,6 +52,6 @@ int - gm20b_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) - { - if (device->fb->page) -- return nvkm_mmu_new_(&gm20b_mmu_fixed, device, index, pmmu); -- return nvkm_mmu_new_(&gm20b_mmu, device, index, pmmu); -+ return gk20a_mmu_new_(&gm20b_mmu_fixed, device, index, pmmu); -+ return gk20a_mmu_new_(&gm20b_mmu, device, index, pmmu); - } -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c -index 0a50be9a785a..ae3cb47be3d8 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/gp10b.c -@@ -19,6 +19,8 @@ - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -+ -+#include "gk20a.h" - #include "mem.h" - #include "vmm.h" - -@@ -41,5 +43,5 @@ gp10b_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu) - { - if (!nvkm_boolopt(device->cfgopt, "GP100MmuLayout", true)) - return gm20b_mmu_new(device, index, pmmu); -- return nvkm_mmu_new_(&gp10b_mmu, device, index, pmmu); -+ return gk20a_mmu_new_(&gp10b_mmu, device, index, pmmu); - } -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h -index 5e55ecbd8005..fb3a9e8bb9cd 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h -@@ -213,6 +213,7 @@ void gf100_vmm_invalidate(struct nvkm_vmm *, u32 type); - void gf100_vmm_invalidate_pdb(struct nvkm_vmm *, u64 addr); - - int gk20a_vmm_aper(enum nvkm_memory_target); -+int gk20a_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *); - - int gm200_vmm_new_(const struct nvkm_vmm_func *, const struct nvkm_vmm_func *, - struct nvkm_mmu *, bool, u64, u64, void *, u32, -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c -index 5a9582dce970..16d7bf727292 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgk20a.c -@@ -19,6 +19,8 @@ - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -+ -+#include "gk20a.h" - #include "vmm.h" - - #include -@@ -33,12 +35,28 @@ gk20a_vmm_aper(enum nvkm_memory_target target) - } - } - -+int -+gk20a_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, -+ struct nvkm_vmm_map *map) -+{ -+ struct gk20a_mmu *mmu = gk20a_mmu(vmm->mmu); -+ int ret; -+ -+ ret = gf100_vmm_valid(vmm, argv, argc, map); -+ if (ret < 0) -+ return ret; -+ -+ map->type |= mmu->iommu_mask >> 8; -+ -+ return 0; -+} -+ - static const struct nvkm_vmm_func - gk20a_vmm_17 = { - .join = gf100_vmm_join, - .part = gf100_vmm_part, - .aper = gf100_vmm_aper, -- .valid = gf100_vmm_valid, -+ .valid = gk20a_vmm_valid, - .flush = gf100_vmm_flush, - .invalidate_pdb = gf100_vmm_invalidate_pdb, - .page = { -@@ -53,7 +71,7 @@ gk20a_vmm_16 = { - .join = gf100_vmm_join, - .part = gf100_vmm_part, - .aper = gf100_vmm_aper, -- .valid = gf100_vmm_valid, -+ .valid = gk20a_vmm_valid, - .flush = gf100_vmm_flush, - .invalidate_pdb = gf100_vmm_invalidate_pdb, - .page = { -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c -index 96b759695dd8..7a6066d886cd 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgm20b.c -@@ -26,7 +26,7 @@ gm20b_vmm_17 = { - .join = gm200_vmm_join, - .part = gf100_vmm_part, - .aper = gk20a_vmm_aper, -- .valid = gf100_vmm_valid, -+ .valid = gk20a_vmm_valid, - .flush = gf100_vmm_flush, - .invalidate_pdb = gf100_vmm_invalidate_pdb, - .page = { -@@ -42,7 +42,7 @@ gm20b_vmm_16 = { - .join = gm200_vmm_join, - .part = gf100_vmm_part, - .aper = gk20a_vmm_aper, -- .valid = gf100_vmm_valid, -+ .valid = gk20a_vmm_valid, - .flush = gf100_vmm_flush, - .invalidate_pdb = gf100_vmm_invalidate_pdb, - .page = { -diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c -index e081239afe58..180c8f006e32 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c -+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp10b.c -@@ -19,14 +19,32 @@ - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ -+ -+#include "gk20a.h" - #include "vmm.h" - -+static int -+gp10b_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc, -+ struct nvkm_vmm_map *map) -+{ -+ struct gk20a_mmu *mmu = gk20a_mmu(vmm->mmu); -+ int ret; -+ -+ ret = gp100_vmm_valid(vmm, argv, argc, map); -+ if (ret < 0) -+ return ret; -+ -+ map->type |= mmu->iommu_mask >> 4; -+ -+ return 0; -+} -+ - static const struct nvkm_vmm_func - gp10b_vmm = { - .join = gp100_vmm_join, - .part = gf100_vmm_part, - .aper = gk20a_vmm_aper, -- .valid = gp100_vmm_valid, -+ .valid = gp10b_vmm_valid, - .flush = gp100_vmm_flush, - .mthd = gp100_vmm_mthd, - .invalidate_pdb = gp100_vmm_invalidate_pdb, - -From patchwork Mon Sep 16 15:04:09 2019 -Content-Type: text/plain; charset="utf-8" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Subject: [08/11] drm/nouveau: tegra: Skip IOMMU initialization if already - attached -From: Thierry Reding -X-Patchwork-Id: 331060 -Message-Id: <20190916150412.10025-9-thierry.reding@gmail.com> -To: Ben Skeggs , Thierry Reding -Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, - dri-devel@lists.freedesktop.org -Date: Mon, 16 Sep 2019 17:04:09 +0200 - -From: Thierry Reding - -If the GPU is already attached to an IOMMU, don't detach it and setup an -explicit IOMMU domain. Since Nouveau can now properly handle the case of -the DMA API being backed by an IOMMU, just continue using the DMA API. - -Signed-off-by: Thierry Reding ---- - .../drm/nouveau/nvkm/engine/device/tegra.c | 19 +++++++------------ - 1 file changed, 7 insertions(+), 12 deletions(-) - -diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c -index d0d52c1d4aee..fc652aaa41c7 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c -+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c -@@ -23,10 +23,6 @@ - #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER - #include "priv.h" - --#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) --#include --#endif -- - static int - nvkm_device_tegra_power_up(struct nvkm_device_tegra *tdev) - { -@@ -109,14 +105,13 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) - unsigned long pgsize_bitmap; - int ret; - --#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) -- if (dev->archdata.mapping) { -- struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev); -- -- arm_iommu_detach_device(dev); -- arm_iommu_release_mapping(mapping); -- } --#endif -+ /* -+ * Skip explicit IOMMU initialization if the GPU is already attached -+ * to an IOMMU domain. This can happen if the DMA API is backed by an -+ * IOMMU. -+ */ -+ if (iommu_get_domain_for_dev(dev)) -+ return; - - if (!tdev->func->iommu_bit) - return; - -From patchwork Mon Sep 16 15:04:10 2019 -Content-Type: text/plain; charset="utf-8" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Subject: [09/11] drm/nouveau: tegra: Fall back to 32-bit DMA mask without IOMMU -From: Thierry Reding -X-Patchwork-Id: 331061 -Message-Id: <20190916150412.10025-10-thierry.reding@gmail.com> -To: Ben Skeggs , Thierry Reding -Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, - dri-devel@lists.freedesktop.org -Date: Mon, 16 Sep 2019 17:04:10 +0200 - -From: Thierry Reding - -The GPU can usually address more than 32-bit, even without being -attached to an IOMMU. However, if the GPU is not attached to an IOMMU, -it's likely that there is no IOMMU in the system, in which case any -buffers allocated by Nouveau will likely end up in a region of memory -that cannot be accessed by host1x. - -Signed-off-by: Thierry Reding ---- - .../drm/nouveau/nvkm/engine/device/tegra.c | 111 +++++++++++------- - 1 file changed, 70 insertions(+), 41 deletions(-) - -diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c -index fc652aaa41c7..221238a2cf53 100644 ---- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c -+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c -@@ -97,7 +97,7 @@ nvkm_device_tegra_power_down(struct nvkm_device_tegra *tdev) - return 0; - } - --static void -+static int - nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) - { - #if IS_ENABLED(CONFIG_IOMMU_API) -@@ -111,47 +111,65 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) - * IOMMU. - */ - if (iommu_get_domain_for_dev(dev)) -- return; -+ return -ENODEV; - - if (!tdev->func->iommu_bit) -- return; -+ return -ENODEV; -+ -+ if (!iommu_present(&platform_bus_type)) -+ return -ENODEV; - - mutex_init(&tdev->iommu.mutex); - -- if (iommu_present(&platform_bus_type)) { -- tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type); -- if (!tdev->iommu.domain) -- goto error; -+ tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type); -+ if (!tdev->iommu.domain) -+ return -ENOMEM; - -- /* -- * A IOMMU is only usable if it supports page sizes smaller -- * or equal to the system's PAGE_SIZE, with a preference if -- * both are equal. -- */ -- pgsize_bitmap = tdev->iommu.domain->ops->pgsize_bitmap; -- if (pgsize_bitmap & PAGE_SIZE) { -- tdev->iommu.pgshift = PAGE_SHIFT; -- } else { -- tdev->iommu.pgshift = fls(pgsize_bitmap & ~PAGE_MASK); -- if (tdev->iommu.pgshift == 0) { -- dev_warn(dev, "unsupported IOMMU page size\n"); -- goto free_domain; -- } -- tdev->iommu.pgshift -= 1; -+ /* -+ * An IOMMU is only usable if it supports page sizes smaller or equal -+ * to the system's PAGE_SIZE, with a preference if both are equal. -+ */ -+ pgsize_bitmap = tdev->iommu.domain->ops->pgsize_bitmap; -+ if (pgsize_bitmap & PAGE_SIZE) { -+ tdev->iommu.pgshift = PAGE_SHIFT; -+ } else { -+ tdev->iommu.pgshift = fls(pgsize_bitmap & ~PAGE_MASK); -+ if (tdev->iommu.pgshift == 0) { -+ dev_warn(dev, "unsupported IOMMU page size\n"); -+ ret = -ENOTSUPP; -+ goto free_domain; - } - -- ret = iommu_attach_device(tdev->iommu.domain, dev); -- if (ret) -- goto free_domain; -+ tdev->iommu.pgshift -= 1; -+ } - -- ret = nvkm_mm_init(&tdev->iommu.mm, 0, 0, -- (1ULL << tdev->func->iommu_bit) >> -- tdev->iommu.pgshift, 1); -- if (ret) -- goto detach_device; -+ ret = iommu_attach_device(tdev->iommu.domain, dev); -+ if (ret) { -+ dev_warn(dev, "failed to attach to IOMMU: %d\n", ret); -+ goto free_domain; -+ } -+ -+ ret = nvkm_mm_init(&tdev->iommu.mm, 0, 0, -+ (1ULL << tdev->func->iommu_bit) >> -+ tdev->iommu.pgshift, 1); -+ if (ret) { -+ dev_warn(dev, "failed to initialize IOVA space: %d\n", ret); -+ goto detach_device; -+ } -+ -+ /* -+ * The IOMMU bit defines the upper limit of the GPU-addressable space. -+ */ -+ ret = dma_set_mask(dev, DMA_BIT_MASK(tdev->func->iommu_bit)); -+ if (ret) { -+ dev_warn(dev, "failed to set DMA mask: %d\n", ret); -+ goto fini_mm; - } - -- return; -+ return 0; -+ -+fini_mm: -+ nvkm_mm_fini(&tdev->iommu.mm); - - detach_device: - iommu_detach_device(tdev->iommu.domain, dev); -@@ -159,10 +177,15 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev) - free_domain: - iommu_domain_free(tdev->iommu.domain); - --error: -+ /* reset these so that the DMA API code paths are executed */ - tdev->iommu.domain = NULL; - tdev->iommu.pgshift = 0; -- dev_err(dev, "cannot initialize IOMMU MM\n"); -+ -+ dev_warn(dev, "cannot initialize IOMMU MM\n"); -+ -+ return ret; -+#else -+ return -ENOTSUPP; - #endif - } - -@@ -327,14 +350,20 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func, - goto free; - } - -- /** -- * The IOMMU bit defines the upper limit of the GPU-addressable space. -- */ -- ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(tdev->func->iommu_bit)); -- if (ret) -- goto free; -- -- nvkm_device_tegra_probe_iommu(tdev); -+ ret = nvkm_device_tegra_probe_iommu(tdev); -+ if (ret) { -+ /* -+ * If we fail to set up an IOMMU, fall back to a 32-bit DMA -+ * mask. This is not necessary for the GPU to work because it -+ * can usually address all of system memory. However, if the -+ * buffers allocated by Nouveau are meant to be shared with -+ * the display controller, we need to restrict where they can -+ * come from. -+ */ -+ ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32)); -+ if (ret) -+ goto free; -+ } - - ret = nvkm_device_tegra_power_up(tdev); - if (ret) - -From patchwork Mon Sep 16 15:04:11 2019 -Content-Type: text/plain; charset="utf-8" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Subject: [10/11] arm64: tegra: Enable GPU on Jetson TX2 -From: Thierry Reding -X-Patchwork-Id: 331063 -Message-Id: <20190916150412.10025-11-thierry.reding@gmail.com> -To: Ben Skeggs , Thierry Reding -Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, - dri-devel@lists.freedesktop.org -Date: Mon, 16 Sep 2019 17:04:11 +0200 - -From: Alexandre Courbot - -Enable the GPU node for the Jetson TX2 board. - -Signed-off-by: Alexandre Courbot -Signed-off-by: Thierry Reding ---- - arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts -index bdace01561ba..6f7c7c4c5c29 100644 ---- a/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts -+++ b/arch/arm64/boot/dts/nvidia/tegra186-p2771-0000.dts -@@ -276,6 +276,10 @@ - }; - }; - -+ gpu@17000000 { -+ status = "okay"; -+ }; -+ - gpio-keys { - compatible = "gpio-keys"; - - -From patchwork Mon Sep 16 15:04:12 2019 -Content-Type: text/plain; charset="utf-8" -MIME-Version: 1.0 -Content-Transfer-Encoding: 7bit -Subject: [11/11] arm64: tegra: Enable SMMU for GPU on Tegra186 -From: Thierry Reding -X-Patchwork-Id: 331062 -Message-Id: <20190916150412.10025-12-thierry.reding@gmail.com> -To: Ben Skeggs , Thierry Reding -Cc: linux-tegra@vger.kernel.org, nouveau@lists.freedesktop.org, - dri-devel@lists.freedesktop.org -Date: Mon, 16 Sep 2019 17:04:12 +0200 - -From: Thierry Reding - -The GPU has a connection to the ARM SMMU found on Tegra186, which can be -used to support large pages. Make sure the GPU is attached to the SMMU -to take advantage of its capabilities. - -Signed-off-by: Thierry Reding ---- - arch/arm64/boot/dts/nvidia/tegra186.dtsi | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi -index 47cd831fcf44..171fd4dfa58d 100644 ---- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi -+++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi -@@ -1172,6 +1172,7 @@ - status = "disabled"; - - power-domains = <&bpmp TEGRA186_POWER_DOMAIN_GPU>; -+ iommus = <&smmu TEGRA186_SID_GPU>; - }; - - sysram@30000000 { diff --git a/kernel.spec b/kernel.spec index 28023a419..a63725d9c 100644 --- a/kernel.spec +++ b/kernel.spec @@ -542,10 +542,8 @@ Patch321: arm64-tegra-Jetson-TX2-Allow-bootloader-to-configure.patch Patch322: mfd-max77620-Do-not-allocate-IRQs-upfront.patch # https://patchwork.ozlabs.org/patch/1170631/ Patch323: gpio-max77620-Use-correct-unit-for-debounce-times.patch -# https://patchwork.freedesktop.org/series/66762/ -Patch324: drm-nouveau-Enable-GP10B-by-default.patch # https://www.spinics.net/lists/linux-tegra/msg44216.html -Patch325: arm64-tegra186-enable-USB-on-Jetson-TX2.patch +Patch324: arm64-tegra186-enable-USB-on-Jetson-TX2.patch # 400 - IBM (ppc/s390x) patches