Bug 434684 - Promise SATA Controller Data corruption
Promise SATA Controller Data corruption
Status: CLOSED NEXTRELEASE
Product: Red Hat Enterprise Linux 5
Classification: Red Hat
Component: kernel (Show other bugs)
5.1
All Linux
low Severity high
: rc
: ---
Assigned To: David Milburn
Red Hat Kernel QE team
:
Depends On:
Blocks:
  Show dependency treegraph
 
Reported: 2008-02-24 05:24 EST by Morey Roof
Modified: 2010-05-07 12:38 EDT (History)
2 users (show)

See Also:
Fixed In Version:
Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of:
Environment:
Last Closed: 2010-05-07 12:38:47 EDT
Type: ---
Regression: ---
Mount Type: ---
Documentation: ---
CRM:
Verified Versions:
Category: ---
oVirt Team: ---
RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: ---


Attachments (Terms of Use)
Patch to correct Promise SATA PRD issue (4.05 KB, text/plain)
2008-02-24 05:26 EST, Morey Roof
no flags Details

  None (edit)
Description Morey Roof 2008-02-24 05:24:17 EST
The promise controllers supported in the sata_promise modules support can have
an ASIC bug with the PRD entries.  This bug often appears with messages on the
console and the event log that appear like this:

ata3.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x2
ata3.00: (port_status 0x20080000)
ata3.00: cmd 25/00:08:85:31:b3/00:00:14:00:00/e0 tag 0 cdb 0x0 data 4096 in
         res 50/00:00:8c:31:b3/00:00:14:00:00/e0 Emask 0x2 (HSM violation)
ata3: soft resetting port
ata3: SATA link up 3.0 Gbps (SStatus 123 SControl 300)
ata3.00: configured for UDMA/133
ata3: EH complete
SCSI device sdc: 586072368 512-byte hdwr sectors (300069 MB)
sdc: Write Protect is off
sdc: Mode Sense: 00 3a 00 00
SCSI device sdc: drive cache: write back


My testing shows that it can cause data corruption problems under high load. 
The Linux IDE mailing list has gone over the problems with the controllers and a
patch was correction was made.  I took the patch that emerged and put this one
together that applies to the currently released kernel for RHEL 5.1
(2.6.18-53.1.13.el5).  The patch is shown at the end of this, but it would be
really nice to have this in the package from redhat so that others don't have to
go searching for the fix.

----------------------------------------------
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -45,12 +45,13 @@
 #include "sata_promise.h"

 #define DRV_NAME       "sata_promise"
-#define DRV_VERSION    "2.07"
+#define DRV_VERSION    "2.07asicfix"


 enum {
        PDC_MAX_PORTS           = 4,
        PDC_MMIO_BAR            = 3,
+       PDC_MAX_PRD             = LIBATA_MAX_PRD - 1, /* -1 for ASIC PRD bug
workaround */

        /* register offsets */
        PDC_FEATURE             = 0x04, /* Feature/Error reg (per port) */
@@ -157,7 +158,7 @@ static struct scsi_host_template pdc_ata
        .queuecommand           = ata_scsi_queuecmd,
        .can_queue              = ATA_DEF_QUEUE,
        .this_id                = ATA_SHT_THIS_ID,
-       .sg_tablesize           = LIBATA_MAX_PRD,
+       .sg_tablesize           = PDC_MAX_PRD,
        .cmd_per_lun            = ATA_SHT_CMD_PER_LUN,
        .emulated               = ATA_SHT_EMULATED,
        .use_clustering         = ATA_SHT_USE_CLUSTERING,
@@ -531,6 +532,84 @@ static void pdc_atapi_pkt(struct ata_que
        memcpy(buf+31, cdb, cdb_len);
 }

+/**
+ *     pdc_fill_sg - Fill PCI IDE PRD table
+ *     @qc: Metadata associated with taskfile to be transferred
+ *
+ *     Fill PCI IDE PRD (scatter-gather) table with segments
+ *     associated with the current disk command.
+ *     Make sure hardware does not choke on it.
+ *
+ *     LOCKING:
+ *     spin_lock_irqsave(host lock)
+ *
+ */
+static void pdc_fill_sg(struct ata_queued_cmd *qc)
+{
+        struct ata_port *ap = qc->ap;
+        struct scatterlist *sg;
+        unsigned int idx;
+        const u32 SG_COUNT_ASIC_BUG = 41*4;
+
+        if (!(qc->flags & ATA_QCFLAG_DMAMAP))
+                return;
+
+        WARN_ON(qc->__sg == NULL);
+        WARN_ON(qc->n_elem == 0 && qc->pad_len == 0);
+
+        idx = 0;
+        ata_for_each_sg(sg, qc) {
+                u32 addr, offset;
+                u32 sg_len, len;
+
+                /* determine if physical DMA addr spans 64K boundary.
+                 * Note h/w doesn't support 64-bit, so we unconditionally
+                 * truncate dma_addr_t to u32.
+                 */
+                addr = (u32) sg_dma_address(sg);
+                sg_len = sg_dma_len(sg);
+
+                while (sg_len) {
+                        offset = addr & 0xffff;
+                        len = sg_len;
+                        if ((offset + sg_len) > 0x10000)
+                                len = 0x10000 - offset;
+
+                        ap->prd[idx].addr = cpu_to_le32(addr);
+                        ap->prd[idx].flags_len = cpu_to_le32(len & 0xffff);
+                        VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx, addr, len);
+
+                        idx++;
+                        sg_len -= len;
+                        addr += len;
+                }
+        }
+
+        if (idx) {
+                u32 len = le32_to_cpu(ap->prd[idx - 1].flags_len);
+
+                if (len > SG_COUNT_ASIC_BUG) {
+                        u32 addr;
+
+                        VPRINTK("Splitting last PRD.\n");
+
+                        addr = le32_to_cpu(ap->prd[idx - 1].addr);
+                        ap->prd[idx - 1].flags_len = cpu_to_le32(len -
SG_COUNT_ASIC_BUG);
+                        VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx - 1, addr,
SG_COUNT_ASIC_BUG);
+
+                        addr = addr + len - SG_COUNT_ASIC_BUG;
+                        len = SG_COUNT_ASIC_BUG;
+                        ap->prd[idx].addr = cpu_to_le32(addr);
+                        ap->prd[idx].flags_len = cpu_to_le32(len);
+                        VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx, addr, len);
+
+                        idx++;
+                }
+
+                ap->prd[idx - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
+        }
+}
+
 static void pdc_qc_prep(struct ata_queued_cmd *qc)
 {
        struct pdc_port_priv *pp = qc->ap->private_data;
@@ -540,7 +619,7 @@ static void pdc_qc_prep(struct ata_queue

        switch (qc->tf.protocol) {
        case ATA_PROT_DMA:
-               ata_qc_prep(qc);
+               pdc_fill_sg(qc);
                /* fall through */

        case ATA_PROT_NODATA:
@@ -556,11 +635,11 @@ static void pdc_qc_prep(struct ata_queue
                break;

        case ATA_PROT_ATAPI:
-               ata_qc_prep(qc);
+               pdc_fill_sg(qc);
                break;

        case ATA_PROT_ATAPI_DMA:
-               ata_qc_prep(qc);
+               pdc_fill_sg(qc);
                /*FALLTHROUGH*/
        case ATA_PROT_ATAPI_NODATA:
                pdc_atapi_pkt(qc);
Comment 1 Morey Roof 2008-02-24 05:26:54 EST
Created attachment 295736 [details]
Patch to correct Promise SATA PRD issue
Comment 2 David Milburn 2010-05-07 12:38:47 EDT
Verified fix is in the current RHEL5

commit b9ccd4a90bbb964506f01b4bdcff4f50f8d5d334
Author: Mikael Pettersson <mikpe@it.uu.se>
Date:   Tue Oct 30 14:20:49 2007 +0100

    sata_promise: ASIC PRD table bug workaround, take 2
    
    Second-generation Promise SATA controllers have an ASIC bug
    which can trigger if the last PRD entry is larger than 164 bytes,
    resulting in intermittent errors and possible data corruption.
    
    Work around this by replacing calls to ata_qc_prep() with a
    private version that fills the PRD, checks the size of the
    last entry, and if necessary splits it to avoid the bug.
    Also reduce sg_tablesize by 1 to accommodate the new entry.

Note You need to log in before you can comment on or make changes to this bug.