Bug 434684

Summary: Promise SATA Controller Data corruption
Product: Red Hat Enterprise Linux 5 Reporter: Morey Roof <mroof>
Component: kernelAssignee: David Milburn <dmilburn>
Status: CLOSED NEXTRELEASE QA Contact: Red Hat Kernel QE team <kernel-qe>
Severity: high Docs Contact:
Priority: low    
Version: 5.1CC: dmilburn, dzickus
Target Milestone: rc   
Target Release: ---   
Hardware: All   
OS: Linux   
Whiteboard:
Fixed In Version: Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of: Environment:
Last Closed: 2010-05-07 16:38:47 UTC Type: ---
Regression: --- Mount Type: ---
Documentation: --- CRM:
Verified Versions: Category: ---
oVirt Team: --- RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: --- Target Upstream Version:
Embargoed:
Attachments:
Description Flags
Patch to correct Promise SATA PRD issue none

Description Morey Roof 2008-02-24 10:24:17 UTC
The promise controllers supported in the sata_promise modules support can have
an ASIC bug with the PRD entries.  This bug often appears with messages on the
console and the event log that appear like this:

ata3.00: exception Emask 0x0 SAct 0x0 SErr 0x0 action 0x2
ata3.00: (port_status 0x20080000)
ata3.00: cmd 25/00:08:85:31:b3/00:00:14:00:00/e0 tag 0 cdb 0x0 data 4096 in
         res 50/00:00:8c:31:b3/00:00:14:00:00/e0 Emask 0x2 (HSM violation)
ata3: soft resetting port
ata3: SATA link up 3.0 Gbps (SStatus 123 SControl 300)
ata3.00: configured for UDMA/133
ata3: EH complete
SCSI device sdc: 586072368 512-byte hdwr sectors (300069 MB)
sdc: Write Protect is off
sdc: Mode Sense: 00 3a 00 00
SCSI device sdc: drive cache: write back


My testing shows that it can cause data corruption problems under high load. 
The Linux IDE mailing list has gone over the problems with the controllers and a
patch was correction was made.  I took the patch that emerged and put this one
together that applies to the currently released kernel for RHEL 5.1
(2.6.18-53.1.13.el5).  The patch is shown at the end of this, but it would be
really nice to have this in the package from redhat so that others don't have to
go searching for the fix.

----------------------------------------------
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -45,12 +45,13 @@
 #include "sata_promise.h"

 #define DRV_NAME       "sata_promise"
-#define DRV_VERSION    "2.07"
+#define DRV_VERSION    "2.07asicfix"


 enum {
        PDC_MAX_PORTS           = 4,
        PDC_MMIO_BAR            = 3,
+       PDC_MAX_PRD             = LIBATA_MAX_PRD - 1, /* -1 for ASIC PRD bug
workaround */

        /* register offsets */
        PDC_FEATURE             = 0x04, /* Feature/Error reg (per port) */
@@ -157,7 +158,7 @@ static struct scsi_host_template pdc_ata
        .queuecommand           = ata_scsi_queuecmd,
        .can_queue              = ATA_DEF_QUEUE,
        .this_id                = ATA_SHT_THIS_ID,
-       .sg_tablesize           = LIBATA_MAX_PRD,
+       .sg_tablesize           = PDC_MAX_PRD,
        .cmd_per_lun            = ATA_SHT_CMD_PER_LUN,
        .emulated               = ATA_SHT_EMULATED,
        .use_clustering         = ATA_SHT_USE_CLUSTERING,
@@ -531,6 +532,84 @@ static void pdc_atapi_pkt(struct ata_que
        memcpy(buf+31, cdb, cdb_len);
 }

+/**
+ *     pdc_fill_sg - Fill PCI IDE PRD table
+ *     @qc: Metadata associated with taskfile to be transferred
+ *
+ *     Fill PCI IDE PRD (scatter-gather) table with segments
+ *     associated with the current disk command.
+ *     Make sure hardware does not choke on it.
+ *
+ *     LOCKING:
+ *     spin_lock_irqsave(host lock)
+ *
+ */
+static void pdc_fill_sg(struct ata_queued_cmd *qc)
+{
+        struct ata_port *ap = qc->ap;
+        struct scatterlist *sg;
+        unsigned int idx;
+        const u32 SG_COUNT_ASIC_BUG = 41*4;
+
+        if (!(qc->flags & ATA_QCFLAG_DMAMAP))
+                return;
+
+        WARN_ON(qc->__sg == NULL);
+        WARN_ON(qc->n_elem == 0 && qc->pad_len == 0);
+
+        idx = 0;
+        ata_for_each_sg(sg, qc) {
+                u32 addr, offset;
+                u32 sg_len, len;
+
+                /* determine if physical DMA addr spans 64K boundary.
+                 * Note h/w doesn't support 64-bit, so we unconditionally
+                 * truncate dma_addr_t to u32.
+                 */
+                addr = (u32) sg_dma_address(sg);
+                sg_len = sg_dma_len(sg);
+
+                while (sg_len) {
+                        offset = addr & 0xffff;
+                        len = sg_len;
+                        if ((offset + sg_len) > 0x10000)
+                                len = 0x10000 - offset;
+
+                        ap->prd[idx].addr = cpu_to_le32(addr);
+                        ap->prd[idx].flags_len = cpu_to_le32(len & 0xffff);
+                        VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx, addr, len);
+
+                        idx++;
+                        sg_len -= len;
+                        addr += len;
+                }
+        }
+
+        if (idx) {
+                u32 len = le32_to_cpu(ap->prd[idx - 1].flags_len);
+
+                if (len > SG_COUNT_ASIC_BUG) {
+                        u32 addr;
+
+                        VPRINTK("Splitting last PRD.\n");
+
+                        addr = le32_to_cpu(ap->prd[idx - 1].addr);
+                        ap->prd[idx - 1].flags_len = cpu_to_le32(len -
SG_COUNT_ASIC_BUG);
+                        VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx - 1, addr,
SG_COUNT_ASIC_BUG);
+
+                        addr = addr + len - SG_COUNT_ASIC_BUG;
+                        len = SG_COUNT_ASIC_BUG;
+                        ap->prd[idx].addr = cpu_to_le32(addr);
+                        ap->prd[idx].flags_len = cpu_to_le32(len);
+                        VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx, addr, len);
+
+                        idx++;
+                }
+
+                ap->prd[idx - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
+        }
+}
+
 static void pdc_qc_prep(struct ata_queued_cmd *qc)
 {
        struct pdc_port_priv *pp = qc->ap->private_data;
@@ -540,7 +619,7 @@ static void pdc_qc_prep(struct ata_queue

        switch (qc->tf.protocol) {
        case ATA_PROT_DMA:
-               ata_qc_prep(qc);
+               pdc_fill_sg(qc);
                /* fall through */

        case ATA_PROT_NODATA:
@@ -556,11 +635,11 @@ static void pdc_qc_prep(struct ata_queue
                break;

        case ATA_PROT_ATAPI:
-               ata_qc_prep(qc);
+               pdc_fill_sg(qc);
                break;

        case ATA_PROT_ATAPI_DMA:
-               ata_qc_prep(qc);
+               pdc_fill_sg(qc);
                /*FALLTHROUGH*/
        case ATA_PROT_ATAPI_NODATA:
                pdc_atapi_pkt(qc);

Comment 1 Morey Roof 2008-02-24 10:26:54 UTC
Created attachment 295736 [details]
Patch to correct Promise SATA PRD issue

Comment 2 David Milburn 2010-05-07 16:38:47 UTC
Verified fix is in the current RHEL5

commit b9ccd4a90bbb964506f01b4bdcff4f50f8d5d334
Author: Mikael Pettersson <mikpe.se>
Date:   Tue Oct 30 14:20:49 2007 +0100

    sata_promise: ASIC PRD table bug workaround, take 2
    
    Second-generation Promise SATA controllers have an ASIC bug
    which can trigger if the last PRD entry is larger than 164 bytes,
    resulting in intermittent errors and possible data corruption.
    
    Work around this by replacing calls to ata_qc_prep() with a
    private version that fills the PRD, checks the size of the
    last entry, and if necessary splits it to avoid the bug.
    Also reduce sg_tablesize by 1 to accommodate the new entry.