Procházet zdrojové kódy

RP2040: Make accelerated SCSI routines work for odd number of bytes

Primarily this fixes issues with synchronous mode transfers of
ModeSense data. Previous fix in #89 (commit f11c0d4750) was a
workaround that didn't fully implement the sync mode behavior.

This commit unifies the behavior so that all writes and read go
through the same code path.

As-is, this commit causes a small performance degradation for
transfer rates above 7 MB/s.
It will be optimized in a following commit.
Petteri Aimonen před 2 roky
rodič
revize
6c0b08cf55

+ 2 - 32
lib/ZuluSCSI_platform_RP2040/scsiPhy.cpp

@@ -297,22 +297,7 @@ extern "C" void scsiWrite(const uint8_t* data, uint32_t count)
 extern "C" void scsiStartWrite(const uint8_t* data, uint32_t count)
 {
     scsiLogDataIn(data, count);
-
-    if ((count & 1) != 0 || ((uint32_t)data & 1) != 0)
-    {
-        // Unaligned write, do it byte-by-byte
-        scsiFinishWrite();
-        for (uint32_t i = 0; i < count; i++)
-        {
-            if (scsiDev.resetFlag) break;
-            scsiWriteOneByte(data[i]);
-        }
-    }
-    else
-    {
-        // Use accelerated routine
-        scsi_accel_rp2040_startWrite(data, count, &scsiDev.resetFlag);
-    }
+    scsi_accel_rp2040_startWrite(data, count, &scsiDev.resetFlag);
 }
 
 extern "C" bool scsiIsWriteFinished(const uint8_t *data)
@@ -358,21 +343,6 @@ extern "C" uint8_t scsiReadByte(void)
 extern "C" void scsiRead(uint8_t* data, uint32_t count, int* parityError)
 {
     *parityError = 0;
-
-    if ((count & 1) != 0 || ((uint32_t)data & 1) != 0)
-    {
-        // Unaligned transfer, do byte by byte
-        for (uint32_t i = 0; i < count; i++)
-        {
-            if (scsiDev.resetFlag) break;
-            data[i] = scsiReadOneByte(parityError);
-        }
-    }
-    else
-    {
-        // Use accelerated routine
-        scsi_accel_rp2040_read(data, count, parityError, &scsiDev.resetFlag);
-    }
-
+    scsi_accel_rp2040_read(data, count, parityError, &scsiDev.resetFlag);
     scsiLogDataOut(data, count);
 }

+ 7 - 7
lib/ZuluSCSI_platform_RP2040/scsi_accel.pio

@@ -14,21 +14,21 @@
 .define REQ_DLY 7
 
 ; Write to SCSI bus using asynchronous handshake.
-; Data is written as 16-bit words that contain the 8 data bits + 1 parity bit.
-; 7 bits in each word are discarded.
+; Data is written as 32-bit words that contain the 8 data bits + 1 parity bit.
+; 23 bits in each word are discarded.
 ; Number of bytes to send must be multiple of 2.
 .program scsi_accel_async_write
     .side_set 1
 
     pull ifempty block          side 1  ; Get data from TX FIFO
     out pins, 9                 side 1  ; Write data and parity bit
-    out null, 7 [REQ_DLY-2]     side 1  ; Discard unused bits, wait for data preset time
+    out null, 23 [REQ_DLY-2]    side 1  ; Discard unused bits, wait for data preset time
     wait 1 gpio ACK             side 1  ; Wait for ACK to be inactive
     wait 0 gpio ACK             side 0  ; Assert REQ, wait for ACK low
 
 ; Read from SCSI bus using asynchronous handshake.
 ; Also works for synchronous mode down to 50 ns transfer period.
-; Data is returned as 16-bit words that contain the 8 data bits + 1 parity bit.
+; Data is returned as 32-bit words that contain the 8 data bits + 1 parity bit.
 ; Number of bytes to receive minus 1 should be written to TX fifo.
 ; Number of bytes to receive must be divisible by 2.
 .program scsi_accel_async_read
@@ -41,11 +41,11 @@ start:
     wait 1 gpio ACK             side 1  ; Wait for ACK high
     wait 0 gpio ACK             side 0  ; Assert REQ, wait for ACK low
     in pins, 9                  side 1  ; Deassert REQ, read GPIO
-    in null, 7                  side 1  ; Padding bits
+    in null, 23                 side 1  ; Padding bits
     jmp x-- start               side 1  ; Decrement byte count and jump to start
 
 ; Data state machine for synchronous writes.
-; Takes the lowest 9 bits of each 16 bit word and writes them to bus with REQ pulse.
+; Takes the lowest 9 bits of each 32 bit word and writes them to bus with REQ pulse.
 ; The delay times will be rewritten by C code to match the negotiated SCSI sync speed.
 ;
 ; Shifts one bit to ISR per every byte transmitted. This is used to control the transfer
@@ -55,7 +55,7 @@ start:
     .side_set 1
 
     out pins, 9     [0]         side 1  ; Write data and parity bit, wait for deskew delay
-    out null, 7     [0]         side 0  ; Assert REQ, wait for assert time
+    out null, 23     [0]        side 0  ; Assert REQ, wait for assert time
     in null, 1      [0]         side 1  ; Deassert REQ, wait for transfer period, wait for space in ACK buffer
 
 ; Data pacing state machine for synchronous writes.

+ 10 - 9
lib/ZuluSCSI_platform_RP2040/scsi_accel.pio.h

@@ -19,7 +19,7 @@ static const uint16_t scsi_accel_async_write_program_instructions[] = {
             //     .wrap_target
     0x90e0, //  0: pull   ifempty block   side 1     
     0x7009, //  1: out    pins, 9         side 1     
-    0x7567, //  2: out    null, 7         side 1 [5] 
+    0x7577, //  2: out    null, 23        side 1 [5] 
     0x308a, //  3: wait   1 gpio, 10      side 1     
     0x200a, //  4: wait   0 gpio, 10      side 0     
             //     .wrap
@@ -54,8 +54,8 @@ static const uint16_t scsi_accel_async_read_program_instructions[] = {
     0x308a, //  2: wait   1 gpio, 10      side 1     
     0x200a, //  3: wait   0 gpio, 10      side 0     
     0x5009, //  4: in     pins, 9         side 1     
-    0x5067, //  5: in     null, 7         side 1     
-    0x1042, //  6: jmp    x--, 2          side 1
+    0x5077, //  5: in     null, 23        side 1     
+    0x1042, //  6: jmp    x--, 2          side 1     
             //     .wrap
 };
 
@@ -83,9 +83,9 @@ static inline pio_sm_config scsi_accel_async_read_program_get_default_config(uin
 
 static const uint16_t scsi_sync_write_program_instructions[] = {
             //     .wrap_target
-    0x7009, //  0: out    pins, 9         side 1
-    0x6067, //  1: out    null, 7         side 0
-    0x5061, //  2: in     null, 1         side 1
+    0x7009, //  0: out    pins, 9         side 1     
+    0x6077, //  1: out    null, 23        side 0     
+    0x5061, //  2: in     null, 1         side 1     
             //     .wrap
 };
 
@@ -113,9 +113,9 @@ static inline pio_sm_config scsi_sync_write_program_get_default_config(uint offs
 
 static const uint16_t scsi_sync_write_pacer_program_instructions[] = {
             //     .wrap_target
-    0x208a, //  0: wait   1 gpio, 10
-    0x200a, //  1: wait   0 gpio, 10
-    0x6061, //  2: out    null, 1
+    0x208a, //  0: wait   1 gpio, 10                 
+    0x200a, //  1: wait   0 gpio, 10                 
+    0x6061, //  2: out    null, 1                    
             //     .wrap
 };
 
@@ -132,3 +132,4 @@ static inline pio_sm_config scsi_sync_write_pacer_program_get_default_config(uin
     return c;
 }
 #endif
+

+ 14 - 22
lib/ZuluSCSI_platform_RP2040/scsi_accel_rp2040.cpp

@@ -25,7 +25,7 @@
 
 enum scsidma_buf_sel_t { SCSIBUF_NONE = 0, SCSIBUF_A = 1, SCSIBUF_B = 2 };
 
-#define DMA_BUF_SIZE 128
+#define DMA_BUF_SIZE 256
 static struct {
     uint8_t *app_buf; // Buffer provided by application
     uint32_t app_bytes; // Bytes available in application buffer
@@ -59,8 +59,8 @@ static struct {
     scsidma_buf_sel_t dma_current_buf;
     uint32_t dma_countA;
     uint32_t dma_countB;
-    uint32_t dma_bufA[DMA_BUF_SIZE];
-    uint32_t dma_bufB[DMA_BUF_SIZE];
+    uint16_t dma_bufA[DMA_BUF_SIZE];
+    uint16_t dma_bufB[DMA_BUF_SIZE];
 
     // Try to offload SCSI DMA interrupts to second core if possible
     volatile bool core1_active;
@@ -74,7 +74,7 @@ static volatile scsidma_state_t g_scsi_dma_state;
 static bool g_channels_claimed = false;
 
 // Fill DMA buffer and return number of words ready to be transferred
-static uint32_t refill_dmabuf(uint32_t *buf)
+static uint32_t refill_dmabuf(uint16_t *buf)
 {
     if (g_scsi_dma.app_bytes == 0 && g_scsi_dma.next_app_bytes > 0)
     {
@@ -85,20 +85,18 @@ static uint32_t refill_dmabuf(uint32_t *buf)
         g_scsi_dma.next_app_bytes = 0;
     }
 
-    uint32_t count = (g_scsi_dma.app_bytes - g_scsi_dma.dma_bytes) / 2;
+    uint32_t count = (g_scsi_dma.app_bytes - g_scsi_dma.dma_bytes);
     if (count > DMA_BUF_SIZE) count = DMA_BUF_SIZE;
 
-    uint16_t *src = (uint16_t*)&g_scsi_dma.app_buf[g_scsi_dma.dma_bytes];
-    uint16_t *end = src + count;
-    uint32_t *dst = buf;
+    uint8_t *src = &g_scsi_dma.app_buf[g_scsi_dma.dma_bytes];
+    uint8_t *end = src + count;
+    uint16_t *dst = buf;
     while (src < end)
     {
-        uint16_t input = *src++;
-        *dst++ = (g_scsi_parity_lookup[input & 0xFF])
-               | ((g_scsi_parity_lookup[input >> 8]) << 16);
+        *dst++ = (g_scsi_parity_lookup[*src++]);
     }
 
-    g_scsi_dma.dma_bytes += count * 2;
+    g_scsi_dma.dma_bytes += count;
 
     // Check if this buffer has been fully processed
     if (g_scsi_dma.dma_bytes >= g_scsi_dma.app_bytes)
@@ -309,9 +307,6 @@ static void scsi_dma_unblock_irqs()
 
 void scsi_accel_rp2040_startWrite(const uint8_t* data, uint32_t count, volatile int *resetFlag)
 {
-    // Number of bytes should always be divisible by 2.
-    assert((count & 1) == 0);
-
     scsi_dma_block_irqs();
     if (g_scsi_dma_state == SCSIDMA_WRITE)
     {
@@ -523,8 +518,7 @@ void scsi_accel_rp2040_read(uint8_t *buf, uint32_t count, int *parityError, vola
     scsidma_config_gpio();
     pio_sm_set_enabled(SCSI_DMA_PIO, SCSI_DMA_SM, true);
 
-    // Set the number of bytes to read, must be divisible by 2.
-    assert((count & 1) == 0);
+    // Set the number of bytes to read
     pio_sm_put(SCSI_DMA_PIO, SCSI_DMA_SM, count - 1);
 
     // Read results from PIO RX FIFO
@@ -547,17 +541,15 @@ void scsi_accel_rp2040_read(uint8_t *buf, uint32_t count, int *parityError, vola
             paritycheck ^= word;
             word = ~word;
             *dst++ = word & 0xFF;
-            *dst++ = word >> 16;
         }
     }
 
     // Check parity errors in whole block
     // This doesn't detect if there is even number of parity errors in block.
     uint8_t byte0 = ~(paritycheck & 0xFF);
-    uint8_t byte1 = ~(paritycheck >> 16);
-    if (paritycheck != ((g_scsi_parity_lookup[byte1] << 16) | g_scsi_parity_lookup[byte0]))
+    if (paritycheck != g_scsi_parity_lookup[byte0])
     {
-        azlog("Parity error in scsi_accel_rp2040_read(): ", paritycheck);
+        azdbg("Parity error in scsi_accel_rp2040_read(): ", paritycheck);
         *parityError = 1;
     }
 
@@ -615,7 +607,7 @@ void scsi_accel_rp2040_init()
 
     // Create DMA channel configuration so it can be applied quickly later
     dma_channel_config cfg = dma_channel_get_default_config(SCSI_DMA_CH);
-    channel_config_set_transfer_data_size(&cfg, DMA_SIZE_32);
+    channel_config_set_transfer_data_size(&cfg, DMA_SIZE_16);
     channel_config_set_read_increment(&cfg, true);
     channel_config_set_write_increment(&cfg, false);
     channel_config_set_dreq(&cfg, pio_get_dreq(SCSI_DMA_PIO, SCSI_DMA_SM, true));