소스 검색

RP2040: Initiator mode speed optimization

Petteri Aimonen 3 년 전
부모
커밋
7c5b0622bf

+ 1 - 1
lib/ZuluSCSI_platform_RP2040/ZuluSCSI_platform.h

@@ -17,7 +17,7 @@ extern const char *g_azplatform_name;
 #define PLATFORM_REVISION "2.0"
 #define PLATFORM_MAX_SCSI_SPEED S2S_CFG_SPEED_SYNC_10
 #define PLATFORM_OPTIMAL_MIN_SD_WRITE_SIZE 4096
-#define PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE 65536
+#define PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE 32768
 #define PLATFORM_OPTIMAL_LAST_SD_WRITE_SIZE 8192
 #define SD_USE_SDIO 1
 #define PLATFORM_HAS_INITIATOR_MODE 1

+ 31 - 8
lib/ZuluSCSI_platform_RP2040/scsiHostPhy.cpp

@@ -2,6 +2,7 @@
 #include "ZuluSCSI_platform.h"
 #include "ZuluSCSI_log.h"
 #include "ZuluSCSI_log_trace.h"
+#include "scsi_accel_host.h"
 #include <assert.h>
 
 #include <scsi2sd.h>
@@ -9,7 +10,7 @@ extern "C" {
 #include <scsi.h>
 }
 
-volatile bool g_scsiHostPhyReset;
+volatile int g_scsiHostPhyReset;
 
 // Release bus and pulse RST signal, initialize PHY to host mode.
 void scsiHostPhyReset(void)
@@ -17,6 +18,8 @@ void scsiHostPhyReset(void)
     SCSI_RELEASE_OUTPUTS();
     SCSI_ENABLE_INITIATOR();
 
+    scsi_accel_host_init();
+
     SCSI_OUT(RST, 1);
     delay(2);
     SCSI_OUT(RST, 0);
@@ -113,9 +116,12 @@ int scsiHostPhyGetPhase()
 
         // Still online, re-enable OUT_BSY to enable IO buffers
         SCSI_OUT(BSY, 1);
+        last_online_time = get_absolute_time();
+    }
+    else if (phase != 0)
+    {
+        last_online_time = get_absolute_time();
     }
-
-    last_online_time = get_absolute_time();
 
     if (!req_in)
     {
@@ -196,15 +202,32 @@ bool scsiHostWrite(const uint8_t *data, uint32_t count)
 
 bool scsiHostRead(uint8_t *data, uint32_t count)
 {
-    for (uint32_t i = 0; i < count; i++)
+    int parityError = 0;
+
+    if ((count & 1) == 0)
     {
-        if (g_scsiHostPhyReset) return false;
+        // Even number of bytes, use accelerated routine
+        scsi_accel_host_read(data, count, &parityError, &g_scsiHostPhyReset);
+    }
+    else
+    {
+        for (uint32_t i = 0; i < count; i++)
+        {
+            if (g_scsiHostPhyReset) return false;
 
-        data[i] = scsiHostReadOneByte(NULL);
+            data[i] = scsiHostReadOneByte(&parityError);
+        }
     }
 
-    scsiLogDataIn(data, count);
-    return true;
+    if (parityError || g_scsiHostPhyReset)
+    {
+        return false;
+    }
+    else
+    {
+        scsiLogDataIn(data, count);
+        return true;
+    }
 }
 
 // Release all bus signals

+ 1 - 1
lib/ZuluSCSI_platform_RP2040/scsiHostPhy.h

@@ -7,7 +7,7 @@
 #include <stdbool.h>
 
 // Request to stop activity and reset the bus
-extern volatile bool g_scsiHostPhyReset;
+extern volatile int g_scsiHostPhyReset;
 
 // Release bus and pulse RST signal, initialize PHY to host mode.
 void scsiHostPhyReset(void);

+ 131 - 0
lib/ZuluSCSI_platform_RP2040/scsi_accel_host.cpp

@@ -0,0 +1,131 @@
+// Accelerated SCSI subroutines for SCSI initiator/host side communication
+
+#include "scsi_accel_host.h"
+#include "ZuluSCSI_platform.h"
+#include "ZuluSCSI_log.h"
+#include "scsi_accel_host.pio.h"
+#include <hardware/pio.h>
+#include <hardware/dma.h>
+#include <hardware/irq.h>
+#include <hardware/structs/iobank0.h>
+#include <hardware/sync.h>
+
+#define SCSI_PIO pio0
+#define SCSI_SM 0
+
+static struct {
+    // PIO configurations
+    uint32_t pio_offset_async_read;
+    pio_sm_config pio_cfg_async_read;
+} g_scsi_host;
+
+enum scsidma_state_t { SCSIHOST_IDLE = 0,
+                       SCSIHOST_READ };
+static volatile scsidma_state_t g_scsi_host_state;
+
+static void scsi_accel_host_config_gpio()
+{
+    if (g_scsi_host_state == SCSIHOST_IDLE)
+    {
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_OUT_ACK].ctrl = GPIO_FUNC_SIO;
+    }
+    else if (g_scsi_host_state == SCSIHOST_READ)
+    {
+        // Data bus and REQ as input, ACK pin as output
+        pio_sm_set_pins(SCSI_PIO, SCSI_SM, 0x7FF);
+        pio_sm_set_consecutive_pindirs(SCSI_PIO, SCSI_SM, 0, 10, false);
+        pio_sm_set_consecutive_pindirs(SCSI_PIO, SCSI_SM, 10, 1, true);
+
+        iobank0_hw->io[SCSI_IO_DB0].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB1].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB2].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB3].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB4].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB5].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB6].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DB7].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_IO_DBP].ctrl  = GPIO_FUNC_SIO;
+        iobank0_hw->io[SCSI_OUT_ACK].ctrl = GPIO_FUNC_PIO0;
+    }
+}
+
+void scsi_accel_host_read(uint8_t *buf, uint32_t count, int *parityError, volatile int *resetFlag)
+{
+    // Currently this method just reads from the PIO RX fifo directly in software loop.
+    // The SD card access is parallelized using DMA, so there is limited benefit from using DMA here.
+    g_scsi_host_state = SCSIHOST_READ;
+
+    pio_sm_init(SCSI_PIO, SCSI_SM, g_scsi_host.pio_offset_async_read, &g_scsi_host.pio_cfg_async_read);
+    scsi_accel_host_config_gpio();
+    pio_sm_set_enabled(SCSI_PIO, SCSI_SM, true);
+
+    // Set the number of bytes to read, must be divisible by 2.
+    assert((count & 1) == 0);
+    pio_sm_put(SCSI_PIO, SCSI_SM, count - 1);
+
+    // Read results from PIO RX FIFO
+    uint8_t *dst = buf;
+    uint8_t *end = buf + count;
+    uint32_t paritycheck = 0;
+    while (dst < end)
+    {
+        if (*resetFlag)
+        {
+            break;
+        }
+
+        uint32_t available = pio_sm_get_rx_fifo_level(SCSI_PIO, SCSI_SM);
+
+        while (available > 0)
+        {
+            available--;
+            uint32_t word = pio_sm_get(SCSI_PIO, SCSI_SM);
+            paritycheck ^= word;
+            word = ~word;
+            *dst++ = word & 0xFF;
+            *dst++ = word >> 16;
+        }
+    }
+
+    // Check parity errors in whole block
+    // This doesn't detect if there is even number of parity errors in block.
+    uint8_t byte0 = ~(paritycheck & 0xFF);
+    uint8_t byte1 = ~(paritycheck >> 16);
+    if (paritycheck != ((g_scsi_parity_lookup[byte1] << 16) | g_scsi_parity_lookup[byte0]))
+    {
+        azlog("Parity error in scsi_accel_host_read(): ", paritycheck);
+        *parityError = 1;
+    }
+
+    g_scsi_host_state = SCSIHOST_IDLE;
+    SCSI_RELEASE_DATA_REQ();
+    scsi_accel_host_config_gpio();
+    pio_sm_set_enabled(SCSI_PIO, SCSI_SM, false);
+}
+
+
+void scsi_accel_host_init()
+{
+    g_scsi_host_state = SCSIHOST_IDLE;
+    scsi_accel_host_config_gpio();
+
+    // Load PIO programs
+    pio_clear_instruction_memory(SCSI_PIO);
+
+    // Asynchronous / synchronous SCSI read
+    g_scsi_host.pio_offset_async_read = pio_add_program(SCSI_PIO, &scsi_host_async_read_program);
+    g_scsi_host.pio_cfg_async_read = scsi_host_async_read_program_get_default_config(g_scsi_host.pio_offset_async_read);
+    sm_config_set_in_pins(&g_scsi_host.pio_cfg_async_read, SCSI_IO_DB0);
+    sm_config_set_sideset_pins(&g_scsi_host.pio_cfg_async_read, SCSI_OUT_ACK);
+    sm_config_set_out_shift(&g_scsi_host.pio_cfg_async_read, true, false, 32);
+    sm_config_set_in_shift(&g_scsi_host.pio_cfg_async_read, true, true, 32);
+}

+ 11 - 0
lib/ZuluSCSI_platform_RP2040/scsi_accel_host.h

@@ -0,0 +1,11 @@
+// Accelerated SCSI subroutines for SCSI initiator/host side communication
+
+#pragma once
+
+#include <stdint.h>
+
+void scsi_accel_host_init();
+
+// Read data from SCSI bus.
+// Number of bytes to read must be divisible by two.
+void scsi_accel_host_read(uint8_t *buf, uint32_t count, int *parityError, volatile int *resetFlag);

+ 26 - 0
lib/ZuluSCSI_platform_RP2040/scsi_accel_host.pio

@@ -0,0 +1,26 @@
+; RP2040 PIO program for accelerating SCSI initiator / host function
+; Run "pioasm scsi_accel_host.pio scsi_accel_host.pio.h" to regenerate the C header from this.
+; GPIO mapping:
+; - 0-7: DB0-DB7
+; -   8: DBP
+; Side set is ACK pin
+
+.define REQ 9
+.define ACK 10
+
+; Read from SCSI bus using asynchronous handshake.
+; Data is returned as 16-bit words that contain the 8 data bits + 1 parity bit.
+; Number of bytes to receive minus 1 should be written to TX fifo.
+; Number of bytes to receive must be divisible by 2.
+.program scsi_host_async_read
+    .side_set 1
+
+    pull block                  side 1  ; Get number of bytes to receive
+    mov x, osr                  side 1  ; Store to counter X
+
+start:
+    wait 0 gpio REQ             side 1  ; Wait for REQ low
+    in pins, 9                  side 0  ; Assert ACK, read GPIO
+    in null, 7                  side 0  ; Padding bits
+    wait 1 gpio REQ             side 0  ; Wait for REQ high
+    jmp x-- start               side 1  ; Deassert ACK, decrement byte count and jump to start

+ 44 - 0
lib/ZuluSCSI_platform_RP2040/scsi_accel_host.pio.h

@@ -0,0 +1,44 @@
+// -------------------------------------------------- //
+// This file is autogenerated by pioasm; do not edit! //
+// -------------------------------------------------- //
+
+#pragma once
+
+#if !PICO_NO_HARDWARE
+#include "hardware/pio.h"
+#endif
+
+// -------------------- //
+// scsi_host_async_read //
+// -------------------- //
+
+#define scsi_host_async_read_wrap_target 0
+#define scsi_host_async_read_wrap 6
+
+static const uint16_t scsi_host_async_read_program_instructions[] = {
+            //     .wrap_target
+    0x90a0, //  0: pull   block           side 1     
+    0xb027, //  1: mov    x, osr          side 1     
+    0x3009, //  2: wait   0 gpio, 9       side 1     
+    0x4009, //  3: in     pins, 9         side 0     
+    0x4067, //  4: in     null, 7         side 0     
+    0x2089, //  5: wait   1 gpio, 9       side 0     
+    0x1042, //  6: jmp    x--, 2          side 1     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_host_async_read_program = {
+    .instructions = scsi_host_async_read_program_instructions,
+    .length = 7,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_host_async_read_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_host_async_read_wrap_target, offset + scsi_host_async_read_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+

+ 40 - 14
src/ZuluSCSI_initiator.cpp

@@ -132,7 +132,8 @@ void scsiInitiatorMainLoop()
         }
 
         // Update status indicator, the led blinks every 5 seconds and is on the longer the more data has been transferred
-        int phase = (millis() % 5000);
+        uint32_t time_start = millis();
+        int phase = (time_start % 5000);
         int duty = g_initiator_state.sectors_done * 5000 / g_initiator_state.sectorcount;
         if (duty < 100) duty = 100;
         if (phase <= duty)
@@ -148,20 +149,24 @@ void scsiInitiatorMainLoop()
         int numtoread = g_initiator_state.sectorcount - g_initiator_state.sectors_done;
         if (numtoread > 512) numtoread = 512;
 
+        // Retry sector-by-sector
+        if (g_initiator_state.retrycount > 1)
+            numtoread = 1;
+
         bool status = scsiInitiatorReadDataToFile(g_initiator_state.target_id,
             g_initiator_state.sectors_done, numtoread, g_initiator_state.sectorsize,
             g_initiator_state.target_file);
 
         if (!status)
         {
-            azlog("Failed to transfer starting at sector ", (int)g_initiator_state.sectors_done);
+            azlog("Failed to transfer ", numtoread, " sectors starting at ", (int)g_initiator_state.sectors_done);
 
             if (g_initiator_state.retrycount < 5)
             {
                 azlog("Retrying.. ", g_initiator_state.retrycount, "/5");
-                delay(1000);
+                delay(200);
                 scsiHostPhyReset();
-                delay(1000);
+                delay(200);
 
                 g_initiator_state.retrycount++;
                 g_initiator_state.target_file.seek((uint64_t)g_initiator_state.sectors_done * g_initiator_state.sectorsize);
@@ -179,7 +184,11 @@ void scsiInitiatorMainLoop()
             g_initiator_state.retrycount = 0;
             g_initiator_state.sectors_done += numtoread;
             g_initiator_state.target_file.flush();
-            azlog("SCSI read succeeded, sectors done: ", (int)g_initiator_state.sectors_done, " / ", (int)g_initiator_state.sectorcount);
+
+            int speed_kbps = numtoread * g_initiator_state.sectorsize / (millis() - time_start);
+            azlog("SCSI read succeeded, sectors done: ",
+                  (int)g_initiator_state.sectors_done, " / ", (int)g_initiator_state.sectorcount,
+                  " speed ", speed_kbps, " kB/s");
         }
     }
 }
@@ -301,12 +310,12 @@ bool scsiInitiatorReadCapacity(int target_id, uint32_t *sectorcount, uint32_t *s
 
 // This uses callbacks to run SD and SCSI transfers in parallel
 static struct {
-    uint32_t bytes_sd; // Number of bytes that have been scheduled for transfer on SD card side
+    uint32_t bytes_sd; // Number of bytes that have been transferred on SD card side
+    uint32_t bytes_sd_scheduled; // Number of bytes scheduled for transfer on SD card side
     uint32_t bytes_scsi; // Number of bytes that have been scheduled for transfer on SCSI side
-
+    uint32_t bytes_scsi_done; // Number of bytes that have been transferred on SCSI side
+    
     uint32_t bytes_per_sector;
-    uint32_t bytes_scsi_done;
-    uint32_t sd_transfer_start;
     bool all_ok;
 } g_initiator_transfer;
 
@@ -344,6 +353,14 @@ static void initiatorReadSDCallback(uint32_t bytes_complete)
         uint32_t sd_ready_cnt = g_initiator_transfer.bytes_sd + bytes_complete;
         if (g_initiator_transfer.bytes_scsi_done + len > sd_ready_cnt + bufsize)
             len = sd_ready_cnt + bufsize - g_initiator_transfer.bytes_scsi_done;
+        
+        if (sd_ready_cnt == g_initiator_transfer.bytes_sd_scheduled &&
+            g_initiator_transfer.bytes_sd_scheduled + bytesPerSector <= g_initiator_transfer.bytes_scsi_done)
+        {
+            // Current SD transfer is complete, it is better we return now and offer a chance for the next
+            // transfer to begin.
+            return;
+        }
 
         // Keep transfers a multiple of sector size.
         if (remain >= bytesPerSector && len % bytesPerSector != 0)
@@ -385,6 +402,7 @@ static void scsiInitiatorWriteDataToSd(FsFile &file, bool use_callback)
         azplatform_set_sd_callback(&initiatorReadSDCallback, buf);
     }
 
+    g_initiator_transfer.bytes_sd_scheduled = g_initiator_transfer.bytes_sd + len;
     if (file.write(buf, len) != len)
     {
         azlog("scsiInitiatorReadDataToFile: SD card write failed");
@@ -420,20 +438,28 @@ bool scsiInitiatorReadDataToFile(int target_id, uint32_t start_sector, uint32_t
     g_initiator_transfer.bytes_scsi = sectorcount * sectorsize;
     g_initiator_transfer.bytes_per_sector = sectorsize;
     g_initiator_transfer.bytes_sd = 0;
+    g_initiator_transfer.bytes_sd_scheduled = 0;
     g_initiator_transfer.bytes_scsi_done = 0;
-    g_initiator_transfer.sd_transfer_start = 0;
     g_initiator_transfer.all_ok = true;
 
-    while ((phase = (SCSI_PHASE)scsiHostPhyGetPhase()) == DATA_IN)
+    while (true)
     {
+        phase = (SCSI_PHASE)scsiHostPhyGetPhase();
+        if (phase != DATA_IN && phase != BUS_BUSY)
+        {
+            break;
+        }
+
         // Read next block from SCSI bus if buffer empty
         if (g_initiator_transfer.bytes_sd == g_initiator_transfer.bytes_scsi_done)
         {
             initiatorReadSDCallback(0);
         }
-
-        // Write data to SD card and simultaneously read more from SCSI
-        scsiInitiatorWriteDataToSd(file, true);
+        else
+        {
+            // Write data to SD card and simultaneously read more from SCSI
+            scsiInitiatorWriteDataToSd(file, true);
+        }
     }
 
     // Write any remaining buffered data