diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 597d563d636a..b822e484b7e5 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -155,8 +155,7 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) order = __ffs(tegra->domain->pgsize_bitmap); init_iova_domain(&tegra->carveout.domain, 1UL << order, - carveout_start >> order, - carveout_end >> order); + carveout_start >> order); tegra->carveout.shift = iova_shift(&tegra->carveout.domain); tegra->carveout.limit = carveout_end >> tegra->carveout.shift; diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 7f22c5c37660..5267c62e8896 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -198,8 +198,7 @@ static int host1x_probe(struct platform_device *pdev) order = __ffs(host->domain->pgsize_bitmap); init_iova_domain(&host->iova, 1UL << order, - geometry->aperture_start >> order, - geometry->aperture_end >> order); + geometry->aperture_start >> order); host->iova_end = geometry->aperture_end; } diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 8e8874d23717..9dc7facfd2e5 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -63,7 +63,6 @@ /* IO virtual address start page frame number */ #define IOVA_START_PFN (1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) -#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) /* Reserved IOVA ranges */ #define MSI_RANGE_START (0xfee00000) @@ -1547,10 +1546,11 @@ static unsigned long dma_ops_alloc_iova(struct device *dev, if (dma_mask > DMA_BIT_MASK(32)) pfn = alloc_iova_fast(&dma_dom->iovad, pages, - IOVA_PFN(DMA_BIT_MASK(32))); + IOVA_PFN(DMA_BIT_MASK(32)), false); if (!pfn) - pfn = alloc_iova_fast(&dma_dom->iovad, pages, IOVA_PFN(dma_mask)); + pfn = alloc_iova_fast(&dma_dom->iovad, pages, + IOVA_PFN(dma_mask), true); return (pfn << PAGE_SHIFT); } @@ -1788,8 +1788,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void) if (!dma_dom->domain.pt_root) goto free_dma_dom; - init_iova_domain(&dma_dom->iovad, PAGE_SIZE, - IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&dma_dom->iovad, PAGE_SIZE, IOVA_START_PFN); if (init_iova_flush_queue(&dma_dom->iovad, iova_domain_flush_tlb, NULL)) goto free_dma_dom; @@ -2696,8 +2695,7 @@ static int init_reserved_iova_ranges(void) struct pci_dev *pdev = NULL; struct iova *val; - init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, - IOVA_START_PFN, DMA_32BIT_PFN); + init_iova_domain(&reserved_iova_ranges, PAGE_SIZE, IOVA_START_PFN); lockdep_set_class(&reserved_iova_ranges.iova_rbtree_lock, &reserved_rbtree_key); @@ -3663,11 +3661,11 @@ out_unlock: return table; } -static int alloc_irq_index(u16 devid, int count) +static int alloc_irq_index(u16 devid, int count, bool align) { struct irq_remap_table *table; + int index, c, alignment = 1; unsigned long flags; - int index, c; struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; if (!iommu) @@ -3677,16 +3675,22 @@ static int alloc_irq_index(u16 devid, int count) if (!table) return -ENODEV; + if (align) + alignment = roundup_pow_of_two(count); + spin_lock_irqsave(&table->lock, flags); /* Scan table for free entries */ - for (c = 0, index = table->min_index; + for (index = ALIGN(table->min_index, alignment), c = 0; index < MAX_IRQS_PER_TABLE; - ++index) { - if (!iommu->irte_ops->is_allocated(table, index)) + index++) { + if (!iommu->irte_ops->is_allocated(table, index)) { c += 1; - else - c = 0; + } else { + c = 0; + index = ALIGN(index, alignment); + continue; + } if (c == count) { for (; c != 0; --c) @@ -4099,7 +4103,9 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, else ret = -ENOMEM; } else { - index = alloc_irq_index(devid, nr_irqs); + bool align = (info->type == X86_IRQ_ALLOC_TYPE_MSI); + + index = alloc_irq_index(devid, nr_irqs, align); } if (index < 0) { pr_warn("Failed to allocate IRTE\n"); diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index e67ba6c40faf..ee0c7b73cff7 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1743,6 +1743,14 @@ arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) return ops->unmap(ops, iova, size); } +static void arm_smmu_iotlb_sync(struct iommu_domain *domain) +{ + struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; + + if (smmu) + __arm_smmu_tlb_sync(smmu); +} + static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { @@ -1963,6 +1971,8 @@ static struct iommu_ops arm_smmu_ops = { .map = arm_smmu_map, .unmap = arm_smmu_unmap, .map_sg = default_iommu_map_sg, + .flush_iotlb_all = arm_smmu_iotlb_sync, + .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 3bdb799d3b4b..e4a82d70d446 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -250,6 +250,7 @@ enum arm_smmu_domain_stage { struct arm_smmu_domain { struct arm_smmu_device *smmu; struct io_pgtable_ops *pgtbl_ops; + const struct iommu_gather_ops *tlb_ops; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; struct mutex init_mutex; /* Protects smmu pointer */ @@ -735,7 +736,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, enum io_pgtable_fmt fmt; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - const struct iommu_gather_ops *tlb_ops; mutex_lock(&smmu_domain->init_mutex); if (smmu_domain->smmu) @@ -813,7 +813,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ias = min(ias, 32UL); oas = min(oas, 32UL); } - tlb_ops = &arm_smmu_s1_tlb_ops; + smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops; break; case ARM_SMMU_DOMAIN_NESTED: /* @@ -833,9 +833,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, oas = min(oas, 40UL); } if (smmu->version == ARM_SMMU_V2) - tlb_ops = &arm_smmu_s2_tlb_ops_v2; + smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2; else - tlb_ops = &arm_smmu_s2_tlb_ops_v1; + smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1; break; default: ret = -EINVAL; @@ -863,7 +863,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, - .tlb = tlb_ops, + .tlb = smmu_domain->tlb_ops, .iommu_dev = smmu->dev, }; @@ -1259,6 +1259,14 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, return ops->unmap(ops, iova, size); } +static void arm_smmu_iotlb_sync(struct iommu_domain *domain) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + + if (smmu_domain->tlb_ops) + smmu_domain->tlb_ops->tlb_sync(smmu_domain); +} + static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, dma_addr_t iova) { @@ -1562,6 +1570,8 @@ static struct iommu_ops arm_smmu_ops = { .map = arm_smmu_map, .unmap = arm_smmu_unmap, .map_sg = default_iommu_map_sg, + .flush_iotlb_all = arm_smmu_iotlb_sync, + .iotlb_sync = arm_smmu_iotlb_sync, .iova_to_phys = arm_smmu_iova_to_phys, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 9d1cebe7f6cb..25914d36c5ac 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -292,18 +292,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, /* ...then finally give it a kicking to make sure it fits */ base_pfn = max_t(unsigned long, base_pfn, domain->geometry.aperture_start >> order); - end_pfn = min_t(unsigned long, end_pfn, - domain->geometry.aperture_end >> order); } - /* - * PCI devices may have larger DMA masks, but still prefer allocating - * within a 32-bit mask to avoid DAC addressing. Such limitations don't - * apply to the typical platform device, so for those we may as well - * leave the cache limit at the top of their range to save an rb_last() - * traversal on every allocation. - */ - if (dev && dev_is_pci(dev)) - end_pfn &= DMA_BIT_MASK(32) >> order; /* start_pfn is always nonzero for an already-initialised domain */ if (iovad->start_pfn) { @@ -312,16 +301,11 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, pr_warn("Incompatible range for DMA domain\n"); return -EFAULT; } - /* - * If we have devices with different DMA masks, move the free - * area cache limit down for the benefit of the smaller one. - */ - iovad->dma_32bit_pfn = min(end_pfn + 1, iovad->dma_32bit_pfn); return 0; } - init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); + init_iova_domain(iovad, 1UL << order, base_pfn); if (!dev) return 0; @@ -386,10 +370,12 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, /* Try to get PCI devices a SAC address */ if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) - iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); + iova = alloc_iova_fast(iovad, iova_len, + DMA_BIT_MASK(32) >> shift, false); if (!iova) - iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, + true); return (dma_addr_t)iova << shift; } diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 57c920c1372d..1ea7cd537873 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -801,13 +801,16 @@ int __init dmar_dev_scope_init(void) dmar_free_pci_notify_info(info); } } - - bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); } return dmar_dev_scope_status; } +void dmar_register_bus_notifier(void) +{ + bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); +} + int __init dmar_table_init(void) { diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 25c2c75f5332..79c45650f8de 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -263,6 +263,7 @@ struct exynos_iommu_domain { struct sysmmu_drvdata { struct device *sysmmu; /* SYSMMU controller device */ struct device *master; /* master device (owner) */ + struct device_link *link; /* runtime PM link to master */ void __iomem *sfrbase; /* our registers */ struct clk *clk; /* SYSMMU's clock */ struct clk *aclk; /* SYSMMU's aclk clock */ @@ -1250,6 +1251,8 @@ static struct iommu_group *get_device_iommu_group(struct device *dev) static int exynos_iommu_add_device(struct device *dev) { + struct exynos_iommu_owner *owner = dev->archdata.iommu; + struct sysmmu_drvdata *data; struct iommu_group *group; if (!has_sysmmu(dev)) @@ -1260,6 +1263,15 @@ static int exynos_iommu_add_device(struct device *dev) if (IS_ERR(group)) return PTR_ERR(group); + list_for_each_entry(data, &owner->controllers, owner_node) { + /* + * SYSMMU will be runtime activated via device link + * (dependency) to its master device, so there are no + * direct calls to pm_runtime_get/put in this driver. + */ + data->link = device_link_add(dev, data->sysmmu, + DL_FLAG_PM_RUNTIME); + } iommu_group_put(group); return 0; @@ -1268,6 +1280,7 @@ static int exynos_iommu_add_device(struct device *dev) static void exynos_iommu_remove_device(struct device *dev) { struct exynos_iommu_owner *owner = dev->archdata.iommu; + struct sysmmu_drvdata *data; if (!has_sysmmu(dev)) return; @@ -1283,6 +1296,9 @@ static void exynos_iommu_remove_device(struct device *dev) } } iommu_group_remove_device(dev); + + list_for_each_entry(data, &owner->controllers, owner_node) + device_link_del(data->link); } static int exynos_iommu_of_xlate(struct device *dev, @@ -1316,13 +1332,6 @@ static int exynos_iommu_of_xlate(struct device *dev, list_add_tail(&data->owner_node, &owner->controllers); data->master = dev; - /* - * SYSMMU will be runtime activated via device link (dependency) to its - * master device, so there are no direct calls to pm_runtime_get/put - * in this driver. - */ - device_link_add(dev, data->sysmmu, DL_FLAG_PM_RUNTIME); - return 0; } diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 6784a05dd6b2..a0babdbf7146 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -82,8 +82,6 @@ #define IOVA_START_PFN (1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) -#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32)) -#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64)) /* page table handling */ #define LEVEL_STRIDE (9) @@ -1878,8 +1876,7 @@ static int dmar_init_reserved_ranges(void) struct iova *iova; int i; - init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN, - DMA_32BIT_PFN); + init_iova_domain(&reserved_iova_list, VTD_PAGE_SIZE, IOVA_START_PFN); lockdep_set_class(&reserved_iova_list.iova_rbtree_lock, &reserved_rbtree_key); @@ -1938,8 +1935,7 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, unsigned long sagaw; int err; - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, - DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); err = init_iova_flush_queue(&domain->iovad, iommu_flush_iova, iova_entry_free); @@ -2058,7 +2054,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, if (context_copied(context)) { u16 did_old = context_domain_id(context); - if (did_old >= 0 && did_old < cap_ndoms(iommu->cap)) { + if (did_old < cap_ndoms(iommu->cap)) { iommu->flush.flush_context(iommu, did_old, (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, @@ -3473,11 +3469,12 @@ static unsigned long intel_alloc_iova(struct device *dev, * from higher range */ iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, - IOVA_PFN(DMA_BIT_MASK(32))); + IOVA_PFN(DMA_BIT_MASK(32)), false); if (iova_pfn) return iova_pfn; } - iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask)); + iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, + IOVA_PFN(dma_mask), true); if (unlikely(!iova_pfn)) { pr_err("Allocating %ld-page iova for %s failed", nrpages, dev_name(dev)); @@ -4752,6 +4749,16 @@ int __init intel_iommu_init(void) goto out_free_dmar; } + up_write(&dmar_global_lock); + + /* + * The bus notifier takes the dmar_global_lock, so lockdep will + * complain later when we register it under the lock. + */ + dmar_register_bus_notifier(); + + down_write(&dmar_global_lock); + if (no_iommu || dmar_disabled) { /* * We exit the function here to ensure IOMMU's remapping and @@ -4897,8 +4904,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; - init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN, - DMA_32BIT_PFN); + init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN); domain_reserve_special_ranges(domain); /* calculate AGAW */ diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 6961fc393f0b..2ca08dc9331c 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -660,16 +660,11 @@ static int arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, size_t size) { struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); - size_t unmapped; if (WARN_ON(upper_32_bits(iova))) return 0; - unmapped = __arm_v7s_unmap(data, iova, size, 1, data->pgd); - if (unmapped) - io_pgtable_tlb_sync(&data->iop); - - return unmapped; + return __arm_v7s_unmap(data, iova, size, 1, data->pgd); } static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index e8018a308868..51e5c43caed1 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -609,7 +609,6 @@ static int __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, size_t size) { - size_t unmapped; struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); arm_lpae_iopte *ptep = data->pgd; int lvl = ARM_LPAE_START_LVL(data); @@ -617,11 +616,7 @@ static int arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) return 0; - unmapped = __arm_lpae_unmap(data, iova, size, lvl, ptep); - if (unmapped) - io_pgtable_tlb_sync(&data->iop); - - return unmapped; + return __arm_lpae_unmap(data, iova, size, lvl, ptep); } static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 33edfa794ae9..84bda3a4dafc 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -24,6 +24,9 @@ #include #include +/* The anchor node sits above the top of the usable address space */ +#define IOVA_ANCHOR ~0UL + static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, unsigned long size); @@ -37,7 +40,7 @@ static void fq_flush_timeout(unsigned long data); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, - unsigned long start_pfn, unsigned long pfn_32bit) + unsigned long start_pfn) { /* * IOVA granularity will normally be equal to the smallest @@ -48,12 +51,16 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; - iovad->cached32_node = NULL; + iovad->cached_node = &iovad->anchor.node; + iovad->cached32_node = &iovad->anchor.node; iovad->granule = granule; iovad->start_pfn = start_pfn; - iovad->dma_32bit_pfn = pfn_32bit + 1; + iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); iovad->flush_cb = NULL; iovad->fq = NULL; + iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; + rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); + rb_insert_color(&iovad->anchor.node, &iovad->rbroot); init_iova_rcaches(iovad); } EXPORT_SYMBOL_GPL(init_iova_domain); @@ -108,50 +115,36 @@ int init_iova_flush_queue(struct iova_domain *iovad, EXPORT_SYMBOL_GPL(init_iova_flush_queue); static struct rb_node * -__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) +__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) { - if ((*limit_pfn > iovad->dma_32bit_pfn) || - (iovad->cached32_node == NULL)) - return rb_last(&iovad->rbroot); - else { - struct rb_node *prev_node = rb_prev(iovad->cached32_node); - struct iova *curr_iova = - rb_entry(iovad->cached32_node, struct iova, node); - *limit_pfn = curr_iova->pfn_lo; - return prev_node; - } + if (limit_pfn <= iovad->dma_32bit_pfn) + return iovad->cached32_node; + + return iovad->cached_node; } static void -__cached_rbnode_insert_update(struct iova_domain *iovad, - unsigned long limit_pfn, struct iova *new) +__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) { - if (limit_pfn != iovad->dma_32bit_pfn) - return; - iovad->cached32_node = &new->node; + if (new->pfn_hi < iovad->dma_32bit_pfn) + iovad->cached32_node = &new->node; + else + iovad->cached_node = &new->node; } static void __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) { struct iova *cached_iova; - struct rb_node *curr; - if (!iovad->cached32_node) - return; - curr = iovad->cached32_node; - cached_iova = rb_entry(curr, struct iova, node); + cached_iova = rb_entry(iovad->cached32_node, struct iova, node); + if (free->pfn_hi < iovad->dma_32bit_pfn && + free->pfn_lo >= cached_iova->pfn_lo) + iovad->cached32_node = rb_next(&free->node); - if (free->pfn_lo >= cached_iova->pfn_lo) { - struct rb_node *node = rb_next(&free->node); - struct iova *iova = rb_entry(node, struct iova, node); - - /* only cache if it's below 32bit pfn */ - if (node && iova->pfn_lo < iovad->dma_32bit_pfn) - iovad->cached32_node = node; - else - iovad->cached32_node = NULL; - } + cached_iova = rb_entry(iovad->cached_node, struct iova, node); + if (free->pfn_lo >= cached_iova->pfn_lo) + iovad->cached_node = rb_next(&free->node); } /* Insert the iova into domain rbtree by holding writer lock */ @@ -182,63 +175,43 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova, rb_insert_color(&iova->node, root); } -/* - * Computes the padding size required, to make the start address - * naturally aligned on the power-of-two order of its size - */ -static unsigned int -iova_get_pad_size(unsigned int size, unsigned int limit_pfn) -{ - return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1); -} - static int __alloc_and_insert_iova_range(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, struct iova *new, bool size_aligned) { - struct rb_node *prev, *curr = NULL; + struct rb_node *curr, *prev; + struct iova *curr_iova; unsigned long flags; - unsigned long saved_pfn; - unsigned int pad_size = 0; + unsigned long new_pfn; + unsigned long align_mask = ~0UL; + + if (size_aligned) + align_mask <<= fls_long(size - 1); /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - saved_pfn = limit_pfn; - curr = __get_cached_rbnode(iovad, &limit_pfn); - prev = curr; - while (curr) { - struct iova *curr_iova = rb_entry(curr, struct iova, node); - - if (limit_pfn <= curr_iova->pfn_lo) { - goto move_left; - } else if (limit_pfn > curr_iova->pfn_hi) { - if (size_aligned) - pad_size = iova_get_pad_size(size, limit_pfn); - if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn) - break; /* found a free slot */ - } - limit_pfn = curr_iova->pfn_lo; -move_left: + curr = __get_cached_rbnode(iovad, limit_pfn); + curr_iova = rb_entry(curr, struct iova, node); + do { + limit_pfn = min(limit_pfn, curr_iova->pfn_lo); + new_pfn = (limit_pfn - size) & align_mask; prev = curr; curr = rb_prev(curr); - } + curr_iova = rb_entry(curr, struct iova, node); + } while (curr && new_pfn <= curr_iova->pfn_hi); - if (!curr) { - if (size_aligned) - pad_size = iova_get_pad_size(size, limit_pfn); - if ((iovad->start_pfn + size + pad_size) > limit_pfn) { - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - return -ENOMEM; - } + if (limit_pfn < size || new_pfn < iovad->start_pfn) { + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return -ENOMEM; } /* pfn_lo will point to size aligned address if size_aligned is set */ - new->pfn_lo = limit_pfn - (size + pad_size); + new->pfn_lo = new_pfn; new->pfn_hi = new->pfn_lo + size - 1; /* If we have 'prev', it's a valid place to start the insertion. */ iova_insert_rbtree(&iovad->rbroot, new, prev); - __cached_rbnode_insert_update(iovad, saved_pfn, new); + __cached_rbnode_insert_update(iovad, new); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); @@ -258,7 +231,8 @@ EXPORT_SYMBOL(alloc_iova_mem); void free_iova_mem(struct iova *iova) { - kmem_cache_free(iova_cache, iova); + if (iova->pfn_lo != IOVA_ANCHOR) + kmem_cache_free(iova_cache, iova); } EXPORT_SYMBOL(free_iova_mem); @@ -342,15 +316,12 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn) while (node) { struct iova *iova = rb_entry(node, struct iova, node); - /* If pfn falls within iova's range, return iova */ - if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) { - return iova; - } - if (pfn < iova->pfn_lo) node = node->rb_left; - else if (pfn > iova->pfn_lo) + else if (pfn > iova->pfn_hi) node = node->rb_right; + else + return iova; /* pfn falls within iova's range */ } return NULL; @@ -424,18 +395,19 @@ EXPORT_SYMBOL_GPL(free_iova); * @iovad: - iova domain in question * @size: - size of page frames to allocate * @limit_pfn: - max limit address + * @flush_rcache: - set to flush rcache on regular allocation failure * This function tries to satisfy an iova allocation from the rcache, - * and falls back to regular allocation on failure. + * and falls back to regular allocation on failure. If regular allocation + * fails too and the flush_rcache flag is set then the rcache will be flushed. */ unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn) + unsigned long limit_pfn, bool flush_rcache) { - bool flushed_rcache = false; unsigned long iova_pfn; struct iova *new_iova; - iova_pfn = iova_rcache_get(iovad, size, limit_pfn); + iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1); if (iova_pfn) return iova_pfn; @@ -444,11 +416,11 @@ retry: if (!new_iova) { unsigned int cpu; - if (flushed_rcache) + if (!flush_rcache) return 0; /* Try replenishing IOVAs by flushing rcache. */ - flushed_rcache = true; + flush_rcache = false; for_each_online_cpu(cpu) free_cpu_cached_iovas(cpu, iovad); goto retry; @@ -612,21 +584,12 @@ EXPORT_SYMBOL_GPL(queue_iova); */ void put_iova_domain(struct iova_domain *iovad) { - struct rb_node *node; - unsigned long flags; + struct iova *iova, *tmp; free_iova_flush_queue(iovad); free_iova_rcaches(iovad); - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - node = rb_first(&iovad->rbroot); - while (node) { - struct iova *iova = rb_entry(node, struct iova, node); - - rb_erase(node, &iovad->rbroot); + rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) free_iova_mem(iova); - node = rb_first(&iovad->rbroot); - } - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); } EXPORT_SYMBOL_GPL(put_iova_domain); @@ -695,6 +658,10 @@ reserve_iova(struct iova_domain *iovad, struct iova *iova; unsigned int overlap = 0; + /* Don't allow nonsensical pfns */ + if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad)))) + return NULL; + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { if (__is_range_overlap(node, pfn_lo, pfn_hi)) { @@ -738,6 +705,9 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) struct iova *iova = rb_entry(node, struct iova, node); struct iova *new_iova; + if (iova->pfn_lo == IOVA_ANCHOR) + continue; + new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); if (!new_iova) printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", @@ -855,12 +825,21 @@ static bool iova_magazine_empty(struct iova_magazine *mag) static unsigned long iova_magazine_pop(struct iova_magazine *mag, unsigned long limit_pfn) { + int i; + unsigned long pfn; + BUG_ON(iova_magazine_empty(mag)); - if (mag->pfns[mag->size - 1] >= limit_pfn) - return 0; + /* Only fall back to the rbtree if we have no suitable pfns at all */ + for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) + if (i == 0) + return 0; - return mag->pfns[--mag->size]; + /* Swap it to pop it */ + pfn = mag->pfns[i]; + mag->pfns[i] = mag->pfns[--mag->size]; + + return pfn; } static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) @@ -1011,27 +990,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) return 0; - return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn); -} - -/* - * Free a cpu's rcache. - */ -static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad, - struct iova_rcache *rcache) -{ - struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); - unsigned long flags; - - spin_lock_irqsave(&cpu_rcache->lock, flags); - - iova_magazine_free_pfns(cpu_rcache->loaded, iovad); - iova_magazine_free(cpu_rcache->loaded); - - iova_magazine_free_pfns(cpu_rcache->prev, iovad); - iova_magazine_free(cpu_rcache->prev); - - spin_unlock_irqrestore(&cpu_rcache->lock, flags); + return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); } /* @@ -1040,21 +999,20 @@ static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad, static void free_iova_rcaches(struct iova_domain *iovad) { struct iova_rcache *rcache; - unsigned long flags; + struct iova_cpu_rcache *cpu_rcache; unsigned int cpu; int i, j; for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; - for_each_possible_cpu(cpu) - free_cpu_iova_rcache(cpu, iovad, rcache); - spin_lock_irqsave(&rcache->lock, flags); - free_percpu(rcache->cpu_rcaches); - for (j = 0; j < rcache->depot_size; ++j) { - iova_magazine_free_pfns(rcache->depot[j], iovad); - iova_magazine_free(rcache->depot[j]); + for_each_possible_cpu(cpu) { + cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); + iova_magazine_free(cpu_rcache->loaded); + iova_magazine_free(cpu_rcache->prev); } - spin_unlock_irqrestore(&rcache->lock, flags); + free_percpu(rcache->cpu_rcaches); + for (j = 0; j < rcache->depot_size; ++j) + iova_magazine_free(rcache->depot[j]); } } diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 195d6e93ac71..af8140054273 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -619,6 +619,14 @@ static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova, return domain->iop->unmap(domain->iop, iova, size); } +static void ipmmu_iotlb_sync(struct iommu_domain *io_domain) +{ + struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); + + if (domain->mmu) + ipmmu_tlb_flush_all(domain); +} + static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, dma_addr_t iova) { @@ -876,6 +884,8 @@ static const struct iommu_ops ipmmu_ops = { .detach_dev = ipmmu_detach_device, .map = ipmmu_map, .unmap = ipmmu_unmap, + .flush_iotlb_all = ipmmu_iotlb_sync, + .iotlb_sync = ipmmu_iotlb_sync, .map_sg = default_iommu_map_sg, .iova_to_phys = ipmmu_iova_to_phys, .add_device = ipmmu_add_device_dma, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 16d33ac19db0..f227d73e7bf6 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -392,6 +392,11 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain, return unmapsz; } +static void mtk_iommu_iotlb_sync(struct iommu_domain *domain) +{ + mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data()); +} + static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { @@ -491,6 +496,8 @@ static struct iommu_ops mtk_iommu_ops = { .map = mtk_iommu_map, .unmap = mtk_iommu_unmap, .map_sg = default_iommu_map_sg, + .flush_iotlb_all = mtk_iommu_iotlb_sync, + .iotlb_sync = mtk_iommu_iotlb_sync, .iova_to_phys = mtk_iommu_iova_to_phys, .add_device = mtk_iommu_add_device, .remove_device = mtk_iommu_remove_device, diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index bd67e1b2c64e..e135ab830ebf 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -2,6 +2,7 @@ * omap iommu: tlb and pagetable primitives * * Copyright (C) 2008-2010 Nokia Corporation + * Copyright (C) 2013-2017 Texas Instruments Incorporated - http://www.ti.com/ * * Written by Hiroshi DOYU , * Paul Mundt and Toshihiro Kobayashi @@ -71,13 +72,23 @@ static struct omap_iommu_domain *to_omap_domain(struct iommu_domain *dom) **/ void omap_iommu_save_ctx(struct device *dev) { - struct omap_iommu *obj = dev_to_omap_iommu(dev); - u32 *p = obj->ctx; + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu *obj; + u32 *p; int i; - for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { - p[i] = iommu_read_reg(obj, i * sizeof(u32)); - dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); + if (!arch_data) + return; + + while (arch_data->iommu_dev) { + obj = arch_data->iommu_dev; + p = obj->ctx; + for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { + p[i] = iommu_read_reg(obj, i * sizeof(u32)); + dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, + p[i]); + } + arch_data++; } } EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); @@ -88,13 +99,23 @@ EXPORT_SYMBOL_GPL(omap_iommu_save_ctx); **/ void omap_iommu_restore_ctx(struct device *dev) { - struct omap_iommu *obj = dev_to_omap_iommu(dev); - u32 *p = obj->ctx; + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu *obj; + u32 *p; int i; - for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { - iommu_write_reg(obj, p[i], i * sizeof(u32)); - dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, p[i]); + if (!arch_data) + return; + + while (arch_data->iommu_dev) { + obj = arch_data->iommu_dev; + p = obj->ctx; + for (i = 0; i < (MMU_REG_SIZE / sizeof(u32)); i++) { + iommu_write_reg(obj, p[i], i * sizeof(u32)); + dev_dbg(obj->dev, "%s\t[%02d] %08x\n", __func__, i, + p[i]); + } + arch_data++; } } EXPORT_SYMBOL_GPL(omap_iommu_restore_ctx); @@ -805,7 +826,7 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) struct iommu_domain *domain = obj->domain; struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - if (!omap_domain->iommu_dev) + if (!omap_domain->dev) return IRQ_NONE; errs = iommu_report_fault(obj, &da); @@ -893,6 +914,24 @@ static void omap_iommu_detach(struct omap_iommu *obj) dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); } +static bool omap_iommu_can_register(struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + + if (!of_device_is_compatible(np, "ti,dra7-dsp-iommu")) + return true; + + /* + * restrict IOMMU core registration only for processor-port MDMA MMUs + * on DRA7 DSPs + */ + if ((!strcmp(dev_name(&pdev->dev), "40d01000.mmu")) || + (!strcmp(dev_name(&pdev->dev), "41501000.mmu"))) + return true; + + return false; +} + static int omap_iommu_dra7_get_dsp_system_cfg(struct platform_device *pdev, struct omap_iommu *obj) { @@ -984,19 +1023,22 @@ static int omap_iommu_probe(struct platform_device *pdev) return err; platform_set_drvdata(pdev, obj); - obj->group = iommu_group_alloc(); - if (IS_ERR(obj->group)) - return PTR_ERR(obj->group); + if (omap_iommu_can_register(pdev)) { + obj->group = iommu_group_alloc(); + if (IS_ERR(obj->group)) + return PTR_ERR(obj->group); - err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, obj->name); - if (err) - goto out_group; + err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, + obj->name); + if (err) + goto out_group; - iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); + iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); - err = iommu_device_register(&obj->iommu); - if (err) - goto out_sysfs; + err = iommu_device_register(&obj->iommu); + if (err) + goto out_sysfs; + } pm_runtime_irq_safe(obj->dev); pm_runtime_enable(obj->dev); @@ -1018,11 +1060,13 @@ static int omap_iommu_remove(struct platform_device *pdev) { struct omap_iommu *obj = platform_get_drvdata(pdev); - iommu_group_put(obj->group); - obj->group = NULL; + if (obj->group) { + iommu_group_put(obj->group); + obj->group = NULL; - iommu_device_sysfs_remove(&obj->iommu); - iommu_device_unregister(&obj->iommu); + iommu_device_sysfs_remove(&obj->iommu); + iommu_device_unregister(&obj->iommu); + } omap_iommu_debugfs_remove(obj); @@ -1068,11 +1112,13 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, phys_addr_t pa, size_t bytes, int prot) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - struct omap_iommu *oiommu = omap_domain->iommu_dev; - struct device *dev = oiommu->dev; + struct device *dev = omap_domain->dev; + struct omap_iommu_device *iommu; + struct omap_iommu *oiommu; struct iotlb_entry e; int omap_pgsz; - u32 ret; + u32 ret = -EINVAL; + int i; omap_pgsz = bytes_to_iopgsz(bytes); if (omap_pgsz < 0) { @@ -1084,9 +1130,24 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, iotlb_init_entry(&e, da, pa, omap_pgsz); - ret = omap_iopgtable_store_entry(oiommu, &e); - if (ret) - dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", ret); + iommu = omap_domain->iommus; + for (i = 0; i < omap_domain->num_iommus; i++, iommu++) { + oiommu = iommu->iommu_dev; + ret = omap_iopgtable_store_entry(oiommu, &e); + if (ret) { + dev_err(dev, "omap_iopgtable_store_entry failed: %d\n", + ret); + break; + } + } + + if (ret) { + while (i--) { + iommu--; + oiommu = iommu->iommu_dev; + iopgtable_clear_entry(oiommu, da); + } + } return ret; } @@ -1095,12 +1156,90 @@ static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da, size_t size) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - struct omap_iommu *oiommu = omap_domain->iommu_dev; - struct device *dev = oiommu->dev; + struct device *dev = omap_domain->dev; + struct omap_iommu_device *iommu; + struct omap_iommu *oiommu; + bool error = false; + size_t bytes = 0; + int i; dev_dbg(dev, "unmapping da 0x%lx size %u\n", da, size); - return iopgtable_clear_entry(oiommu, da); + iommu = omap_domain->iommus; + for (i = 0; i < omap_domain->num_iommus; i++, iommu++) { + oiommu = iommu->iommu_dev; + bytes = iopgtable_clear_entry(oiommu, da); + if (!bytes) + error = true; + } + + /* + * simplify return - we are only checking if any of the iommus + * reported an error, but not if all of them are unmapping the + * same number of entries. This should not occur due to the + * mirror programming. + */ + return error ? 0 : bytes; +} + +static int omap_iommu_count(struct device *dev) +{ + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + int count = 0; + + while (arch_data->iommu_dev) { + count++; + arch_data++; + } + + return count; +} + +/* caller should call cleanup if this function fails */ +static int omap_iommu_attach_init(struct device *dev, + struct omap_iommu_domain *odomain) +{ + struct omap_iommu_device *iommu; + int i; + + odomain->num_iommus = omap_iommu_count(dev); + if (!odomain->num_iommus) + return -EINVAL; + + odomain->iommus = kcalloc(odomain->num_iommus, sizeof(*iommu), + GFP_ATOMIC); + if (!odomain->iommus) + return -ENOMEM; + + iommu = odomain->iommus; + for (i = 0; i < odomain->num_iommus; i++, iommu++) { + iommu->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_ATOMIC); + if (!iommu->pgtable) + return -ENOMEM; + + /* + * should never fail, but please keep this around to ensure + * we keep the hardware happy + */ + if (WARN_ON(!IS_ALIGNED((long)iommu->pgtable, + IOPGD_TABLE_SIZE))) + return -EINVAL; + } + + return 0; +} + +static void omap_iommu_detach_fini(struct omap_iommu_domain *odomain) +{ + int i; + struct omap_iommu_device *iommu = odomain->iommus; + + for (i = 0; iommu && i < odomain->num_iommus; i++, iommu++) + kfree(iommu->pgtable); + + kfree(odomain->iommus); + odomain->num_iommus = 0; + odomain->iommus = NULL; } static int @@ -1108,8 +1247,10 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu_device *iommu; struct omap_iommu *oiommu; int ret = 0; + int i; if (!arch_data || !arch_data->iommu_dev) { dev_err(dev, "device doesn't have an associated iommu\n"); @@ -1118,26 +1259,49 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) spin_lock(&omap_domain->lock); - /* only a single device is supported per domain for now */ - if (omap_domain->iommu_dev) { + /* only a single client device can be attached to a domain */ + if (omap_domain->dev) { dev_err(dev, "iommu domain is already attached\n"); ret = -EBUSY; goto out; } - oiommu = arch_data->iommu_dev; - - /* get a handle to and enable the omap iommu */ - ret = omap_iommu_attach(oiommu, omap_domain->pgtable); + ret = omap_iommu_attach_init(dev, omap_domain); if (ret) { - dev_err(dev, "can't get omap iommu: %d\n", ret); - goto out; + dev_err(dev, "failed to allocate required iommu data %d\n", + ret); + goto init_fail; } - omap_domain->iommu_dev = oiommu; - omap_domain->dev = dev; - oiommu->domain = domain; + iommu = omap_domain->iommus; + for (i = 0; i < omap_domain->num_iommus; i++, iommu++, arch_data++) { + /* configure and enable the omap iommu */ + oiommu = arch_data->iommu_dev; + ret = omap_iommu_attach(oiommu, iommu->pgtable); + if (ret) { + dev_err(dev, "can't get omap iommu: %d\n", ret); + goto attach_fail; + } + oiommu->domain = domain; + iommu->iommu_dev = oiommu; + } + + omap_domain->dev = dev; + + goto out; + +attach_fail: + while (i--) { + iommu--; + arch_data--; + oiommu = iommu->iommu_dev; + omap_iommu_detach(oiommu); + iommu->iommu_dev = NULL; + oiommu->domain = NULL; + } +init_fail: + omap_iommu_detach_fini(omap_domain); out: spin_unlock(&omap_domain->lock); return ret; @@ -1146,21 +1310,40 @@ out: static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, struct device *dev) { - struct omap_iommu *oiommu = dev_to_omap_iommu(dev); + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu_device *iommu = omap_domain->iommus; + struct omap_iommu *oiommu; + int i; - /* only a single device is supported per domain for now */ - if (omap_domain->iommu_dev != oiommu) { - dev_err(dev, "invalid iommu device\n"); + if (!omap_domain->dev) { + dev_err(dev, "domain has no attached device\n"); return; } - iopgtable_clear_entry_all(oiommu); + /* only a single device is supported per domain for now */ + if (omap_domain->dev != dev) { + dev_err(dev, "invalid attached device\n"); + return; + } - omap_iommu_detach(oiommu); + /* + * cleanup in the reverse order of attachment - this addresses + * any h/w dependencies between multiple instances, if any + */ + iommu += (omap_domain->num_iommus - 1); + arch_data += (omap_domain->num_iommus - 1); + for (i = 0; i < omap_domain->num_iommus; i++, iommu--, arch_data--) { + oiommu = iommu->iommu_dev; + iopgtable_clear_entry_all(oiommu); + + omap_iommu_detach(oiommu); + iommu->iommu_dev = NULL; + oiommu->domain = NULL; + } + + omap_iommu_detach_fini(omap_domain); - omap_domain->iommu_dev = NULL; omap_domain->dev = NULL; - oiommu->domain = NULL; } static void omap_iommu_detach_dev(struct iommu_domain *domain, @@ -1182,18 +1365,7 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type) omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL); if (!omap_domain) - goto out; - - omap_domain->pgtable = kzalloc(IOPGD_TABLE_SIZE, GFP_KERNEL); - if (!omap_domain->pgtable) - goto fail_nomem; - - /* - * should never fail, but please keep this around to ensure - * we keep the hardware happy - */ - if (WARN_ON(!IS_ALIGNED((long)omap_domain->pgtable, IOPGD_TABLE_SIZE))) - goto fail_align; + return NULL; spin_lock_init(&omap_domain->lock); @@ -1202,13 +1374,6 @@ static struct iommu_domain *omap_iommu_domain_alloc(unsigned type) omap_domain->domain.geometry.force_aperture = true; return &omap_domain->domain; - -fail_align: - kfree(omap_domain->pgtable); -fail_nomem: - kfree(omap_domain); -out: - return NULL; } static void omap_iommu_domain_free(struct iommu_domain *domain) @@ -1219,10 +1384,9 @@ static void omap_iommu_domain_free(struct iommu_domain *domain) * An iommu device is still attached * (currently, only one device can be attached) ? */ - if (omap_domain->iommu_dev) + if (omap_domain->dev) _omap_iommu_detach_dev(omap_domain, omap_domain->dev); - kfree(omap_domain->pgtable); kfree(omap_domain); } @@ -1230,11 +1394,16 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t da) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - struct omap_iommu *oiommu = omap_domain->iommu_dev; + struct omap_iommu_device *iommu = omap_domain->iommus; + struct omap_iommu *oiommu = iommu->iommu_dev; struct device *dev = oiommu->dev; u32 *pgd, *pte; phys_addr_t ret = 0; + /* + * all the iommus within the domain will have identical programming, + * so perform the lookup using just the first iommu + */ iopgtable_lookup_entry(oiommu, da, &pgd, &pte); if (pte) { @@ -1260,11 +1429,12 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, static int omap_iommu_add_device(struct device *dev) { - struct omap_iommu_arch_data *arch_data; + struct omap_iommu_arch_data *arch_data, *tmp; struct omap_iommu *oiommu; struct iommu_group *group; struct device_node *np; struct platform_device *pdev; + int num_iommus, i; int ret; /* @@ -1276,36 +1446,57 @@ static int omap_iommu_add_device(struct device *dev) if (!dev->of_node) return 0; - np = of_parse_phandle(dev->of_node, "iommus", 0); - if (!np) + /* + * retrieve the count of IOMMU nodes using phandle size as element size + * since #iommu-cells = 0 for OMAP + */ + num_iommus = of_property_count_elems_of_size(dev->of_node, "iommus", + sizeof(phandle)); + if (num_iommus < 0) return 0; - pdev = of_find_device_by_node(np); - if (WARN_ON(!pdev)) { - of_node_put(np); - return -EINVAL; - } - - oiommu = platform_get_drvdata(pdev); - if (!oiommu) { - of_node_put(np); - return -EINVAL; - } - - arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL); - if (!arch_data) { - of_node_put(np); + arch_data = kzalloc((num_iommus + 1) * sizeof(*arch_data), GFP_KERNEL); + if (!arch_data) return -ENOMEM; + + for (i = 0, tmp = arch_data; i < num_iommus; i++, tmp++) { + np = of_parse_phandle(dev->of_node, "iommus", i); + if (!np) { + kfree(arch_data); + return -EINVAL; + } + + pdev = of_find_device_by_node(np); + if (WARN_ON(!pdev)) { + of_node_put(np); + kfree(arch_data); + return -EINVAL; + } + + oiommu = platform_get_drvdata(pdev); + if (!oiommu) { + of_node_put(np); + kfree(arch_data); + return -EINVAL; + } + + tmp->iommu_dev = oiommu; + + of_node_put(np); } + /* + * use the first IOMMU alone for the sysfs device linking. + * TODO: Evaluate if a single iommu_group needs to be + * maintained for both IOMMUs + */ + oiommu = arch_data->iommu_dev; ret = iommu_device_link(&oiommu->iommu, dev); if (ret) { kfree(arch_data); - of_node_put(np); return ret; } - arch_data->iommu_dev = oiommu; dev->archdata.iommu = arch_data; /* @@ -1321,8 +1512,6 @@ static int omap_iommu_add_device(struct device *dev) } iommu_group_put(group); - of_node_put(np); - return 0; } diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index a675af29a6ec..1703159ef5af 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -28,18 +28,27 @@ struct iotlb_entry { u32 endian, elsz, mixed; }; +/** + * struct omap_iommu_device - omap iommu device data + * @pgtable: page table used by an omap iommu attached to a domain + * @iommu_dev: pointer to store an omap iommu instance attached to a domain + */ +struct omap_iommu_device { + u32 *pgtable; + struct omap_iommu *iommu_dev; +}; + /** * struct omap_iommu_domain - omap iommu domain - * @pgtable: the page table - * @iommu_dev: an omap iommu device attached to this domain. only a single - * iommu device can be attached for now. + * @num_iommus: number of iommus in this domain + * @iommus: omap iommu device data for all iommus in this domain * @dev: Device using this domain. * @lock: domain lock, should be taken when attaching/detaching * @domain: generic domain handle used by iommu core code */ struct omap_iommu_domain { - u32 *pgtable; - struct omap_iommu *iommu_dev; + u32 num_iommus; + struct omap_iommu_device *iommus; struct device *dev; spinlock_t lock; struct iommu_domain domain; @@ -97,17 +106,6 @@ struct iotlb_lock { short vict; }; -/** - * dev_to_omap_iommu() - retrieves an omap iommu object from a user device - * @dev: iommu client device - */ -static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) -{ - struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; - - return arch_data->iommu_dev; -} - /* * MMU Register offsets */ diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index c8a587d034b0..4a2c4378b3db 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -443,6 +443,19 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, return ret; } +static void qcom_iommu_iotlb_sync(struct iommu_domain *domain) +{ + struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); + struct io_pgtable *pgtable = container_of(qcom_domain->pgtbl_ops, + struct io_pgtable, ops); + if (!qcom_domain->pgtbl_ops) + return; + + pm_runtime_get_sync(qcom_domain->iommu->dev); + qcom_iommu_tlb_sync(pgtable->cookie); + pm_runtime_put_sync(qcom_domain->iommu->dev); +} + static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { @@ -570,6 +583,8 @@ static const struct iommu_ops qcom_iommu_ops = { .map = qcom_iommu_map, .unmap = qcom_iommu_unmap, .map_sg = default_iommu_map_sg, + .flush_iotlb_all = qcom_iommu_iotlb_sync, + .iotlb_sync = qcom_iommu_iotlb_sync, .iova_to_phys = qcom_iommu_iova_to_phys, .add_device = qcom_iommu_add_device, .remove_device = qcom_iommu_remove_device, diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index 329727e00e97..c824329f7012 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -39,8 +39,7 @@ void scif_rma_ep_init(struct scif_endpt *ep) struct scif_endpt_rma_info *rma = &ep->rma_info; mutex_init(&rma->rma_lock); - init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN, - SCIF_DMA_64BIT_PFN); + init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN); spin_lock_init(&rma->tc_lock); mutex_init(&rma->mmn_lock); INIT_LIST_HEAD(&rma->reg_list); diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e8ffba1052d3..e2433bc50210 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -112,6 +112,7 @@ static inline bool dmar_rcu_check(void) extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); +extern void dmar_register_bus_notifier(void); extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, struct dmar_dev_scope **devices, u16 segment); extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt); diff --git a/include/linux/iova.h b/include/linux/iova.h index d179b9bf7814..928442dda565 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -70,10 +70,12 @@ struct iova_fq { struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ - struct rb_node *cached32_node; /* Save last alloced node */ + struct rb_node *cached_node; /* Save last alloced node */ + struct rb_node *cached32_node; /* Save last 32-bit alloced node */ unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; + struct iova anchor; /* rbtree lookup anchor */ struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU @@ -148,12 +150,12 @@ void queue_iova(struct iova_domain *iovad, unsigned long pfn, unsigned long pages, unsigned long data); unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn); + unsigned long limit_pfn, bool flush_rcache); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, - unsigned long start_pfn, unsigned long pfn_32bit); + unsigned long start_pfn); int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); @@ -210,7 +212,8 @@ static inline void queue_iova(struct iova_domain *iovad, static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn) + unsigned long limit_pfn, + bool flush_rcache) { return 0; } @@ -229,8 +232,7 @@ static inline void copy_reserved_iova(struct iova_domain *from, static inline void init_iova_domain(struct iova_domain *iovad, unsigned long granule, - unsigned long start_pfn, - unsigned long pfn_32bit) + unsigned long start_pfn) { }