Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 313699 Details for
Bug 455813
Under heavy memory usage dma_alloc_coherent does not return aligned address
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
RHEL5 fix for this issue
455813-v3.patch (text/plain), 10.50 KB, created by
Prarit Bhargava
on 2008-08-07 15:03:07 UTC
(
hide
)
Description:
RHEL5 fix for this issue
Filename:
MIME Type:
Creator:
Prarit Bhargava
Created:
2008-08-07 15:03:07 UTC
Size:
10.50 KB
patch
obsolete
>pci_alloc_consistent/dma_alloc_coherent is supposed to return size aligned >addresses. > >From Documentation/DMA-mapping.txt: > >"pci_alloc_consistent returns two values: the virtual address which you >can use to access it from the CPU and dma_handle which you pass to the >card. > >The cpu return address and the DMA bus master address are both >guaranteed to be aligned to the smallest PAGE_SIZE order which >is greater than or equal to the requested size. This invariant >exists (for example) to guarantee that if you allocate a chunk >which is smaller than or equal to 64 kilobytes, the extent of the >buffer you receive will not cross a 64K boundary." > >1. Backport upstream iommu-helper code into RHEL5. > >While #1 was being done, two bugs were noted: > >2. It is possible that alloc_iommu()'s boundary_size overflows as >dma_get_seg_boundary can return 0xffffffff. In that case, further usage of >boundary_size triggers a BUG_ON() in the iommu code. > >3. Fix the GART's alloc_iommu code to return a size aligned address. > >(Patch for 2 & 3 was sent upstream and is in jbarnes' for-linus PCI tree) > >Patch was tested with a similar test module provided in BZ 298811. > >Resolves BZ 455813. > >diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c >index 565ee9b..1c4c523 100644 >--- a/arch/x86_64/kernel/pci-gart.c >+++ b/arch/x86_64/kernel/pci-gart.c >@@ -22,6 +22,7 @@ > #include <linux/topology.h> > #include <linux/interrupt.h> > #include <linux/bitops.h> >+#include <linux/iommu-helper.h> > #include <asm/atomic.h> > #include <asm/io.h> > #include <asm/mtrr.h> >@@ -76,37 +77,50 @@ AGPEXTERN __u32 *agp_gatt_table; > static unsigned long next_bit; /* protected by iommu_bitmap_lock */ > static int need_flush; /* global flush state. set for each gart wrap */ > >-static unsigned long alloc_iommu(int size) >-{ >+static unsigned long alloc_iommu(struct device *dev, int size, >+ unsigned long mask) >+{ > unsigned long offset, flags; >+ unsigned long boundary_size; >+ unsigned long base_index; >+ >+ base_index = ALIGN(iommu_bus_base & 0xffffffff, >+ PAGE_SIZE) >> PAGE_SHIFT; >+ boundary_size = ALIGN((unsigned long long) 0x100000000, >+ PAGE_SIZE) >> PAGE_SHIFT; > >- spin_lock_irqsave(&iommu_bitmap_lock, flags); >- offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size); >+ spin_lock_irqsave(&iommu_bitmap_lock, flags); >+ offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, >+ size, base_index, boundary_size, mask); > if (offset == -1) { > need_flush = 1; >- offset = find_next_zero_string(iommu_gart_bitmap,0,iommu_pages,size); >+ offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, 0, >+ size, base_index, boundary_size, >+ mask); > } >- if (offset != -1) { >- set_bit_string(iommu_gart_bitmap, offset, size); >- next_bit = offset+size; >- if (next_bit >= iommu_pages) { >+ if (offset != -1) { >+ set_bit_string(iommu_gart_bitmap, offset, size); >+ next_bit = offset+size; >+ if (next_bit >= iommu_pages) { > next_bit = 0; > need_flush = 1; >- } >- } >+ } >+ } > if (iommu_fullflush) > need_flush = 1; >- spin_unlock_irqrestore(&iommu_bitmap_lock, flags); >+ spin_unlock_irqrestore(&iommu_bitmap_lock, flags); >+ > return offset; >-} >+} > > static void free_iommu(unsigned long offset, int size) >-{ >+{ > unsigned long flags; >+ > spin_lock_irqsave(&iommu_bitmap_lock, flags); >- __clear_bit_string(iommu_gart_bitmap, offset, size); >+ iommu_area_free(iommu_gart_bitmap, offset, size); > spin_unlock_irqrestore(&iommu_bitmap_lock, flags); >-} >+} > > /* > * Use global flush state to avoid races with multiple flushers. >@@ -204,10 +218,11 @@ static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t > * Caller needs to check if the iommu is needed and flush. > */ > static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, >- size_t size, int dir) >+ size_t size, int dir, u64 align_mask) > { > unsigned long npages = to_pages(phys_mem, size); >- unsigned long iommu_page = alloc_iommu(npages); >+ unsigned long palign_mask = align_mask >> PAGE_SHIFT; >+ unsigned long iommu_page = alloc_iommu(dev, npages, palign_mask); > int i; > if (iommu_page == -1) { > if (!nonforced_iommu(dev, phys_mem, size)) >@@ -223,13 +238,15 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, > SET_LEAK(iommu_page + i); > phys_mem += PAGE_SIZE; > } >+ > return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); > } > > static dma_addr_t gart_map_simple(struct device *dev, char *buf, > size_t size, int dir) > { >- dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); >+ dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir, >+ size - 1); > flush_gart(); > return map; > } >@@ -248,7 +265,9 @@ dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir) > if (!need_iommu(dev, phys_mem, size)) > return phys_mem; > >- bus = gart_map_simple(dev, addr, size, dir); >+ bus = dma_map_area(dev, virt_to_bus(addr), size, dir, 0); >+ flush_gart(); >+ > return bus; > } > >@@ -303,7 +322,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, > struct scatterlist *s = &sg[i]; > unsigned long addr = page_to_phys(s->page) + s->offset; > if (nonforced_iommu(dev, addr, s->length)) { >- addr = dma_map_area(dev, addr, s->length, dir); >+ addr = dma_map_area(dev, addr, s->length, dir, 0); > if (addr == bad_dma_address) { > if (i > 0) > gart_unmap_sg(dev, sg, i, dir); >@@ -320,10 +339,11 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, > } > > /* Map multiple scatterlist entries continuous into the first. */ >-static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, >- struct scatterlist *sout, unsigned long pages) >+static int __dma_map_cont(struct device *dev, struct scatterlist *sg, int start, >+ int stopat, struct scatterlist *sout, >+ unsigned long pages) > { >- unsigned long iommu_start = alloc_iommu(pages); >+ unsigned long iommu_start = alloc_iommu(dev, pages, 0); > unsigned long iommu_page = iommu_start; > int i; > >@@ -358,9 +378,10 @@ static int __dma_map_cont(struct scatterlist *sg, int start, int stopat, > return 0; > } > >-static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, >- struct scatterlist *sout, >- unsigned long pages, int need) >+static inline int dma_map_cont(struct device *dev, struct scatterlist *sg, >+ int start, int stopat, >+ struct scatterlist *sout, >+ unsigned long pages, int need) > { > if (!need) { > BUG_ON(stopat - start != 1); >@@ -368,7 +389,7 @@ static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, > sout->dma_length = sg[start].length; > return 0; > } >- return __dma_map_cont(sg, start, stopat, sout, pages); >+ return __dma_map_cont(dev, sg, start, stopat, sout, pages); > } > > /* >@@ -407,8 +428,8 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) > boundary and the new one doesn't have an offset. */ > if (!iommu_merge || !nextneed || !need || s->offset || > (ps->offset + ps->length) % PAGE_SIZE) { >- if (dma_map_cont(sg, start, i, sg+out, pages, >- need) < 0) >+ if (dma_map_cont(dev, sg, start, i, sg+out, >+ pages, need) < 0) > goto error; > out++; > pages = 0; >@@ -419,7 +440,7 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) > need = nextneed; > pages += to_pages(s->offset, s->length); > } >- if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) >+ if (dma_map_cont(dev, sg, start, i, sg+out, pages, need) < 0) > goto error; > out++; > flush_gart(); >diff --git a/lib/Makefile b/lib/Makefile >index 09891f6..7f0cdc7 100644 >--- a/lib/Makefile >+++ b/lib/Makefile >@@ -55,6 +55,8 @@ obj-$(CONFIG_SMP) += percpu_counter.o > obj-$(CONFIG_AUDIT_GENERIC) += audit.o > > obj-$(CONFIG_SWIOTLB) += swiotlb.o >+obj-$(CONFIG_IOMMU) += iommu-helper.o >+ > ifeq ($(CONFIG_X86),y) > swiotlb-$(CONFIG_XEN) := ../arch/i386/kernel/swiotlb.o > endif >diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h >new file mode 100644 >index 0000000..c975caf >--- /dev/null >+++ b/include/linux/iommu-helper.h >@@ -0,0 +1,10 @@ >+extern int iommu_is_span_boundary(unsigned int index, unsigned int nr, >+ unsigned long shift, >+ unsigned long boundary_size); >+extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, >+ unsigned long start, unsigned int nr, >+ unsigned long shift, >+ unsigned long boundary_size, >+ unsigned long align_mask); >+extern void iommu_area_free(unsigned long *map, unsigned long start, >+ unsigned int nr); >diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c >new file mode 100644 >index 0000000..e0c0e76 >--- /dev/null >+++ b/lib/iommu-helper.c >@@ -0,0 +1,81 @@ >+/* >+ * IOMMU helper functions for the free area management >+ */ >+ >+#include <linux/module.h> >+#include <linux/bitops.h> >+ >+static unsigned long find_next_zero_area(unsigned long *map, >+ unsigned long size, >+ unsigned long start, >+ unsigned int nr, >+ unsigned long align_mask) >+{ >+ unsigned long index, end, i; >+again: >+ index = find_next_zero_bit(map, size, start); >+ >+ /* Align allocation */ >+ index = (index + (align_mask + 1)) & ~align_mask; >+ end = index + nr; >+ if (end >= size) >+ return -1; >+ for (i = index; i < end; i++) { >+ if (test_bit(i, map)) { >+ start = i+1; >+ goto again; >+ } >+ } >+ return index; >+} >+ >+static inline void set_bit_area(unsigned long *map, unsigned long i, >+ int len) >+{ >+ unsigned long end = i + len; >+ while (i < end) { >+ __set_bit(i, map); >+ i++; >+ } >+} >+ >+int iommu_is_span_boundary(unsigned int index, unsigned int nr, >+ unsigned long shift, >+ unsigned long boundary_size) >+{ >+ BUG_ON(!is_power_of_2(boundary_size)); >+ >+ shift = (shift + index) & (boundary_size - 1); >+ return shift + nr > boundary_size; >+} >+ >+unsigned long iommu_area_alloc(unsigned long *map, unsigned long size, >+ unsigned long start, unsigned int nr, >+ unsigned long shift, unsigned long boundary_size, >+ unsigned long align_mask) >+{ >+ unsigned long index = 0; >+again: >+ index = find_next_zero_area(map, size, start, nr, align_mask); >+ if (index != -1) { >+ if (iommu_is_span_boundary(index, nr, shift, boundary_size)) { >+ /* we could do more effectively */ >+ start = index + 1; >+ goto again; >+ } >+ set_bit_area(map, index, nr); >+ } >+ return index; >+} >+EXPORT_SYMBOL(iommu_area_alloc); >+ >+void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr) >+{ >+ unsigned long end = start + nr; >+ >+ while (start < end) { >+ __clear_bit(start, map); >+ start++; >+ } >+} >+EXPORT_SYMBOL(iommu_area_free);
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 455813
:
312088
|
312089
|
312135
|
312136
|
312164
|
312463
| 313699