Browse Source

Merge pull request #126 from ZuluSCSI/rp2040_perf_improvements

Rp2040 write performance improvements and bug fixes
Alex Perez 2 years ago
parent
commit
2e4a5fcd00

+ 54 - 1
lib/ZuluSCSI_platform_RP2040/ZuluSCSI_platform.cpp

@@ -9,6 +9,7 @@
 #include <hardware/spi.h>
 #include <hardware/structs/xip_ctrl.h>
 #include <platform/mbed_error.h>
+#include <multicore.h>
 
 extern "C" {
 
@@ -44,6 +45,9 @@ static void gpio_conf(uint gpio, enum gpio_function fn, bool pullup, bool pulldo
 
 void azplatform_init()
 {
+    // Make sure second core is stopped
+    multicore_reset_core1();
+
     /* First configure the pins that affect external buffer directions.
      * RP2040 defaults to pulldowns, while these pins have external pull-ups.
      */
@@ -554,6 +558,8 @@ bool azplatform_write_romdrive(const uint8_t *data, uint32_t start, uint32_t cou
 
 /* A lookup table is the fastest way to calculate parity and convert the IO pin mapping for data bus.
  * For RP2040 we expect that the bits are consecutive and in order.
+ * The PIO-based parity scheme also requires that the lookup table is aligned to 512-byte increment.
+ * The parity table is placed into SRAM4 area to reduce bus contention.
  */
 
 #define PARITY(n) ((1 ^ (n) ^ ((n)>>1) ^ ((n)>>2) ^ ((n)>>3) ^ ((n)>>4) ^ ((n)>>5) ^ ((n)>>6) ^ ((n)>>7)) & 1)
@@ -569,7 +575,7 @@ bool azplatform_write_romdrive(const uint8_t *data, uint32_t start, uint32_t cou
     (PARITY(n)  ? 0 : (1 << SCSI_IO_DBP)) \
 )
 
-const uint32_t g_scsi_parity_lookup[256] =
+const uint16_t g_scsi_parity_lookup[256] __attribute__((aligned(512), section(".scratch_x.parity"))) =
 {
     X(0x00), X(0x01), X(0x02), X(0x03), X(0x04), X(0x05), X(0x06), X(0x07), X(0x08), X(0x09), X(0x0a), X(0x0b), X(0x0c), X(0x0d), X(0x0e), X(0x0f),
     X(0x10), X(0x11), X(0x12), X(0x13), X(0x14), X(0x15), X(0x16), X(0x17), X(0x18), X(0x19), X(0x1a), X(0x1b), X(0x1c), X(0x1d), X(0x1e), X(0x1f),
@@ -591,6 +597,53 @@ const uint32_t g_scsi_parity_lookup[256] =
 
 #undef X
 
+/* Similarly, another lookup table is used to verify parity of received data.
+ * This table is indexed by the 8 data bits + 1 parity bit from SCSI bus (active low)
+ * Each word contains the data byte (inverted to active-high) and a bit indicating whether parity is valid.
+ */
+#define X(n) (\
+    ((n & 0xFF) ^ 0xFF) | \
+    (((PARITY(n & 0xFF) ^ (n >> 8)) & 1) << 8) \
+)
+
+const uint16_t g_scsi_parity_check_lookup[512] __attribute__((aligned(1024), section(".scratch_x.parity"))) =
+{
+    X(0x000), X(0x001), X(0x002), X(0x003), X(0x004), X(0x005), X(0x006), X(0x007), X(0x008), X(0x009), X(0x00a), X(0x00b), X(0x00c), X(0x00d), X(0x00e), X(0x00f),
+    X(0x010), X(0x011), X(0x012), X(0x013), X(0x014), X(0x015), X(0x016), X(0x017), X(0x018), X(0x019), X(0x01a), X(0x01b), X(0x01c), X(0x01d), X(0x01e), X(0x01f),
+    X(0x020), X(0x021), X(0x022), X(0x023), X(0x024), X(0x025), X(0x026), X(0x027), X(0x028), X(0x029), X(0x02a), X(0x02b), X(0x02c), X(0x02d), X(0x02e), X(0x02f),
+    X(0x030), X(0x031), X(0x032), X(0x033), X(0x034), X(0x035), X(0x036), X(0x037), X(0x038), X(0x039), X(0x03a), X(0x03b), X(0x03c), X(0x03d), X(0x03e), X(0x03f),
+    X(0x040), X(0x041), X(0x042), X(0x043), X(0x044), X(0x045), X(0x046), X(0x047), X(0x048), X(0x049), X(0x04a), X(0x04b), X(0x04c), X(0x04d), X(0x04e), X(0x04f),
+    X(0x050), X(0x051), X(0x052), X(0x053), X(0x054), X(0x055), X(0x056), X(0x057), X(0x058), X(0x059), X(0x05a), X(0x05b), X(0x05c), X(0x05d), X(0x05e), X(0x05f),
+    X(0x060), X(0x061), X(0x062), X(0x063), X(0x064), X(0x065), X(0x066), X(0x067), X(0x068), X(0x069), X(0x06a), X(0x06b), X(0x06c), X(0x06d), X(0x06e), X(0x06f),
+    X(0x070), X(0x071), X(0x072), X(0x073), X(0x074), X(0x075), X(0x076), X(0x077), X(0x078), X(0x079), X(0x07a), X(0x07b), X(0x07c), X(0x07d), X(0x07e), X(0x07f),
+    X(0x080), X(0x081), X(0x082), X(0x083), X(0x084), X(0x085), X(0x086), X(0x087), X(0x088), X(0x089), X(0x08a), X(0x08b), X(0x08c), X(0x08d), X(0x08e), X(0x08f),
+    X(0x090), X(0x091), X(0x092), X(0x093), X(0x094), X(0x095), X(0x096), X(0x097), X(0x098), X(0x099), X(0x09a), X(0x09b), X(0x09c), X(0x09d), X(0x09e), X(0x09f),
+    X(0x0a0), X(0x0a1), X(0x0a2), X(0x0a3), X(0x0a4), X(0x0a5), X(0x0a6), X(0x0a7), X(0x0a8), X(0x0a9), X(0x0aa), X(0x0ab), X(0x0ac), X(0x0ad), X(0x0ae), X(0x0af),
+    X(0x0b0), X(0x0b1), X(0x0b2), X(0x0b3), X(0x0b4), X(0x0b5), X(0x0b6), X(0x0b7), X(0x0b8), X(0x0b9), X(0x0ba), X(0x0bb), X(0x0bc), X(0x0bd), X(0x0be), X(0x0bf),
+    X(0x0c0), X(0x0c1), X(0x0c2), X(0x0c3), X(0x0c4), X(0x0c5), X(0x0c6), X(0x0c7), X(0x0c8), X(0x0c9), X(0x0ca), X(0x0cb), X(0x0cc), X(0x0cd), X(0x0ce), X(0x0cf),
+    X(0x0d0), X(0x0d1), X(0x0d2), X(0x0d3), X(0x0d4), X(0x0d5), X(0x0d6), X(0x0d7), X(0x0d8), X(0x0d9), X(0x0da), X(0x0db), X(0x0dc), X(0x0dd), X(0x0de), X(0x0df),
+    X(0x0e0), X(0x0e1), X(0x0e2), X(0x0e3), X(0x0e4), X(0x0e5), X(0x0e6), X(0x0e7), X(0x0e8), X(0x0e9), X(0x0ea), X(0x0eb), X(0x0ec), X(0x0ed), X(0x0ee), X(0x0ef),
+    X(0x0f0), X(0x0f1), X(0x0f2), X(0x0f3), X(0x0f4), X(0x0f5), X(0x0f6), X(0x0f7), X(0x0f8), X(0x0f9), X(0x0fa), X(0x0fb), X(0x0fc), X(0x0fd), X(0x0fe), X(0x0ff),
+    X(0x100), X(0x101), X(0x102), X(0x103), X(0x104), X(0x105), X(0x106), X(0x107), X(0x108), X(0x109), X(0x10a), X(0x10b), X(0x10c), X(0x10d), X(0x10e), X(0x10f),
+    X(0x110), X(0x111), X(0x112), X(0x113), X(0x114), X(0x115), X(0x116), X(0x117), X(0x118), X(0x119), X(0x11a), X(0x11b), X(0x11c), X(0x11d), X(0x11e), X(0x11f),
+    X(0x120), X(0x121), X(0x122), X(0x123), X(0x124), X(0x125), X(0x126), X(0x127), X(0x128), X(0x129), X(0x12a), X(0x12b), X(0x12c), X(0x12d), X(0x12e), X(0x12f),
+    X(0x130), X(0x131), X(0x132), X(0x133), X(0x134), X(0x135), X(0x136), X(0x137), X(0x138), X(0x139), X(0x13a), X(0x13b), X(0x13c), X(0x13d), X(0x13e), X(0x13f),
+    X(0x140), X(0x141), X(0x142), X(0x143), X(0x144), X(0x145), X(0x146), X(0x147), X(0x148), X(0x149), X(0x14a), X(0x14b), X(0x14c), X(0x14d), X(0x14e), X(0x14f),
+    X(0x150), X(0x151), X(0x152), X(0x153), X(0x154), X(0x155), X(0x156), X(0x157), X(0x158), X(0x159), X(0x15a), X(0x15b), X(0x15c), X(0x15d), X(0x15e), X(0x15f),
+    X(0x160), X(0x161), X(0x162), X(0x163), X(0x164), X(0x165), X(0x166), X(0x167), X(0x168), X(0x169), X(0x16a), X(0x16b), X(0x16c), X(0x16d), X(0x16e), X(0x16f),
+    X(0x170), X(0x171), X(0x172), X(0x173), X(0x174), X(0x175), X(0x176), X(0x177), X(0x178), X(0x179), X(0x17a), X(0x17b), X(0x17c), X(0x17d), X(0x17e), X(0x17f),
+    X(0x180), X(0x181), X(0x182), X(0x183), X(0x184), X(0x185), X(0x186), X(0x187), X(0x188), X(0x189), X(0x18a), X(0x18b), X(0x18c), X(0x18d), X(0x18e), X(0x18f),
+    X(0x190), X(0x191), X(0x192), X(0x193), X(0x194), X(0x195), X(0x196), X(0x197), X(0x198), X(0x199), X(0x19a), X(0x19b), X(0x19c), X(0x19d), X(0x19e), X(0x19f),
+    X(0x1a0), X(0x1a1), X(0x1a2), X(0x1a3), X(0x1a4), X(0x1a5), X(0x1a6), X(0x1a7), X(0x1a8), X(0x1a9), X(0x1aa), X(0x1ab), X(0x1ac), X(0x1ad), X(0x1ae), X(0x1af),
+    X(0x1b0), X(0x1b1), X(0x1b2), X(0x1b3), X(0x1b4), X(0x1b5), X(0x1b6), X(0x1b7), X(0x1b8), X(0x1b9), X(0x1ba), X(0x1bb), X(0x1bc), X(0x1bd), X(0x1be), X(0x1bf),
+    X(0x1c0), X(0x1c1), X(0x1c2), X(0x1c3), X(0x1c4), X(0x1c5), X(0x1c6), X(0x1c7), X(0x1c8), X(0x1c9), X(0x1ca), X(0x1cb), X(0x1cc), X(0x1cd), X(0x1ce), X(0x1cf),
+    X(0x1d0), X(0x1d1), X(0x1d2), X(0x1d3), X(0x1d4), X(0x1d5), X(0x1d6), X(0x1d7), X(0x1d8), X(0x1d9), X(0x1da), X(0x1db), X(0x1dc), X(0x1dd), X(0x1de), X(0x1df),
+    X(0x1e0), X(0x1e1), X(0x1e2), X(0x1e3), X(0x1e4), X(0x1e5), X(0x1e6), X(0x1e7), X(0x1e8), X(0x1e9), X(0x1ea), X(0x1eb), X(0x1ec), X(0x1ed), X(0x1ee), X(0x1ef),
+    X(0x1f0), X(0x1f1), X(0x1f2), X(0x1f3), X(0x1f4), X(0x1f5), X(0x1f6), X(0x1f7), X(0x1f8), X(0x1f9), X(0x1fa), X(0x1fb), X(0x1fc), X(0x1fd), X(0x1fe), X(0x1ff),
+};
+
+#undef X
+
 } /* extern "C" */
 
 /* Logging from mbed */

+ 7 - 3
lib/ZuluSCSI_platform_RP2040/ZuluSCSI_platform.h

@@ -16,8 +16,8 @@ extern const char *g_azplatform_name;
 #define PLATFORM_NAME "ZuluSCSI RP2040"
 #define PLATFORM_REVISION "2.0"
 #define PLATFORM_MAX_SCSI_SPEED S2S_CFG_SPEED_SYNC_10
-#define PLATFORM_OPTIMAL_MIN_SD_WRITE_SIZE 4096
-#define PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE 32768
+#define PLATFORM_OPTIMAL_MIN_SD_WRITE_SIZE 32768
+#define PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE 65536
 #define PLATFORM_OPTIMAL_LAST_SD_WRITE_SIZE 8192
 #define SD_USE_SDIO 1
 #define PLATFORM_HAS_INITIATOR_MODE 1
@@ -91,6 +91,11 @@ bool azplatform_read_romdrive(uint8_t *dest, uint32_t start, uint32_t count);
 bool azplatform_write_romdrive(const uint8_t *data, uint32_t start, uint32_t count);
 #endif
 
+// Parity lookup tables for write and read from SCSI bus.
+// These are used by macros below and the code in scsi_accel_rp2040.cpp
+extern const uint16_t g_scsi_parity_lookup[256];
+extern const uint16_t g_scsi_parity_check_lookup[512];
+
 // Below are GPIO access definitions that are used from scsiPhy.cpp.
 
 // Write a single SCSI pin.
@@ -123,7 +128,6 @@ bool azplatform_write_romdrive(const uint8_t *data, uint32_t start, uint32_t cou
      sio_hw->gpio_oe_set = SCSI_IO_DATA_MASK)
 
 // Write SCSI data bus, also sets REQ to inactive.
-extern const uint32_t g_scsi_parity_lookup[256];
 #define SCSI_OUT_DATA(data) \
     gpio_put_masked(SCSI_IO_DATA_MASK | (1 << SCSI_OUT_REQ), \
                     g_scsi_parity_lookup[(uint8_t)(data)] | (1 << SCSI_OUT_REQ)), \

+ 2 - 2
lib/ZuluSCSI_platform_RP2040/rp2040_sdio.cpp

@@ -19,8 +19,8 @@
 #define SDIO_PIO pio1
 #define SDIO_CMD_SM 0
 #define SDIO_DATA_SM 1
-#define SDIO_DMA_CH 2
-#define SDIO_DMA_CHB 3
+#define SDIO_DMA_CH 4
+#define SDIO_DMA_CHB 5
 
 // Maximum number of 512 byte blocks to transfer in one request
 #define SDIO_MAX_BLOCKS 256

+ 19 - 33
lib/ZuluSCSI_platform_RP2040/scsiPhy.cpp

@@ -188,13 +188,13 @@ extern "C" uint32_t scsiEnterPhaseImmediate(int phase)
         scsiLogPhaseChange(phase);
 
         // Select between synchronous vs. asynchronous SCSI writes
-        if (g_scsi_phase == DATA_IN && scsiDev.target->syncOffset > 0)
+        if (scsiDev.target->syncOffset > 0 && (g_scsi_phase == DATA_IN || g_scsi_phase == DATA_OUT))
         {
-            scsi_accel_rp2040_setWriteMode(scsiDev.target->syncOffset, scsiDev.target->syncPeriod);
+            scsi_accel_rp2040_setSyncMode(scsiDev.target->syncOffset, scsiDev.target->syncPeriod);
         }
         else
         {
-            scsi_accel_rp2040_setWriteMode(0, 0);
+            scsi_accel_rp2040_setSyncMode(0, 0);
         }
 
         if (phase < 0)
@@ -297,22 +297,7 @@ extern "C" void scsiWrite(const uint8_t* data, uint32_t count)
 extern "C" void scsiStartWrite(const uint8_t* data, uint32_t count)
 {
     scsiLogDataIn(data, count);
-
-    if ((count & 1) != 0 || ((uint32_t)data & 1) != 0)
-    {
-        // Unaligned write, do it byte-by-byte
-        scsiFinishWrite();
-        for (uint32_t i = 0; i < count; i++)
-        {
-            if (scsiDev.resetFlag) break;
-            scsiWriteOneByte(data[i]);
-        }
-    }
-    else
-    {
-        // Use accelerated routine
-        scsi_accel_rp2040_startWrite(data, count, &scsiDev.resetFlag);
-    }
+    scsi_accel_rp2040_startWrite(data, count, &scsiDev.resetFlag);
 }
 
 extern "C" bool scsiIsWriteFinished(const uint8_t *data)
@@ -358,21 +343,22 @@ extern "C" uint8_t scsiReadByte(void)
 extern "C" void scsiRead(uint8_t* data, uint32_t count, int* parityError)
 {
     *parityError = 0;
+    scsiStartRead(data, count, parityError);
+    scsiFinishRead(data, count, parityError);
+}
 
-    if ((count & 1) != 0 || ((uint32_t)data & 1) != 0)
-    {
-        // Unaligned transfer, do byte by byte
-        for (uint32_t i = 0; i < count; i++)
-        {
-            if (scsiDev.resetFlag) break;
-            data[i] = scsiReadOneByte(parityError);
-        }
-    }
-    else
-    {
-        // Use accelerated routine
-        scsi_accel_rp2040_read(data, count, parityError, &scsiDev.resetFlag);
-    }
+extern "C" void scsiStartRead(uint8_t* data, uint32_t count, int *parityError)
+{
+    scsi_accel_rp2040_startRead(data, count, parityError, &scsiDev.resetFlag);
+}
 
+extern "C" void scsiFinishRead(uint8_t* data, uint32_t count, int *parityError)
+{
+    scsi_accel_rp2040_finishRead(data, count, parityError, &scsiDev.resetFlag);
     scsiLogDataOut(data, count);
 }
+
+extern "C" bool scsiIsReadFinished(const uint8_t *data)
+{
+    return scsi_accel_rp2040_isReadFinished(data);
+}

+ 7 - 0
lib/ZuluSCSI_platform_RP2040/scsiPhy.h

@@ -54,11 +54,18 @@ uint8_t scsiReadByte(void);
 // either combine transfers or block until previous transfer completes.
 void scsiStartWrite(const uint8_t* data, uint32_t count);
 void scsiFinishWrite();
+void scsiStartRead(uint8_t* data, uint32_t count, int *parityError);
+void scsiFinishRead(uint8_t* data, uint32_t count, int *parityError);
 
 // Query whether the data at pointer has already been read, i.e. buffer can be reused.
 // If data is NULL, checks if all writes have completed.
 bool scsiIsWriteFinished(const uint8_t *data);
 
+// Query whether the data at pointer has already been written, i.e. can be processed.
+// If data is NULL, checks if all reads have completed.
+bool scsiIsReadFinished(const uint8_t *data);
+
+#define PLATFORM_SCSIPHY_HAS_NONBLOCKING_READ 1
 
 #define s2s_getScsiRateKBs() 0
 

+ 55 - 19
lib/ZuluSCSI_platform_RP2040/scsi_accel.pio

@@ -13,39 +13,48 @@
 ; One clock cycle is 8 ns => delay 7 clocks
 .define REQ_DLY 7
 
+; Adds parity to data that is to be written to SCSI
+; This works by generating addresses for DMA to fetch data from.
+; Register X should be initialized to the base address of the lookup table.
+.program scsi_parity
+    pull block
+    in NULL, 1
+    in OSR, 8
+    in X, 23
+
 ; Write to SCSI bus using asynchronous handshake.
-; Data is written as 16-bit words that contain the 8 data bits + 1 parity bit.
-; 7 bits in each word are discarded.
+; Data is written as 32-bit words that contain the 8 data bits + 1 parity bit.
+; 23 bits in each word are discarded.
 ; Number of bytes to send must be multiple of 2.
 .program scsi_accel_async_write
     .side_set 1
 
     pull ifempty block          side 1  ; Get data from TX FIFO
     out pins, 9                 side 1  ; Write data and parity bit
-    out null, 7 [REQ_DLY-2]     side 1  ; Discard unused bits, wait for data preset time
+    out null, 23 [REQ_DLY-2]    side 1  ; Discard unused bits, wait for data preset time
     wait 1 gpio ACK             side 1  ; Wait for ACK to be inactive
     wait 0 gpio ACK             side 0  ; Assert REQ, wait for ACK low
 
-; Read from SCSI bus using asynchronous handshake.
-; Also works for synchronous mode down to 50 ns transfer period.
-; Data is returned as 16-bit words that contain the 8 data bits + 1 parity bit.
-; Number of bytes to receive minus 1 should be written to TX fifo.
-; Number of bytes to receive must be divisible by 2.
-.program scsi_accel_async_read
+; Read from SCSI bus using sync or async handshake.
+; Data is returned as 32-bit words:
+; - bit  0: always zero
+; - bits 1-8: data byte
+; - bit  9: parity bit
+; - bits 10-31: lookup table address
+; Lookup table address should be loaded into register Y.
+; One dummy word should be written to TX fifo for every byte to receive.
+.program scsi_accel_read
     .side_set 1
 
-    pull block                  side 1  ; Get number of bytes to receive
-    mov x, osr                  side 1  ; Store to counter X
-
-start:
+    pull block                  side 1  ; Pull from TX fifo for counting bytes and pacing sync mode
     wait 1 gpio ACK             side 1  ; Wait for ACK high
+    in null, 1                  side 0  ; Zero bit because lookup table entries are 16-bit
     wait 0 gpio ACK             side 0  ; Assert REQ, wait for ACK low
     in pins, 9                  side 1  ; Deassert REQ, read GPIO
-    in null, 7                  side 1  ; Padding bits
-    jmp x-- start               side 1  ; Decrement byte count and jump to start
+    in y, 22                    side 1  ; Copy parity lookup table address
 
 ; Data state machine for synchronous writes.
-; Takes the lowest 9 bits of each 16 bit word and writes them to bus with REQ pulse.
+; Takes the lowest 9 bits of each 32 bit word and writes them to bus with REQ pulse.
 ; The delay times will be rewritten by C code to match the negotiated SCSI sync speed.
 ;
 ; Shifts one bit to ISR per every byte transmitted. This is used to control the transfer
@@ -54,9 +63,9 @@ start:
 .program scsi_sync_write
     .side_set 1
 
-    out pins, 9     [0]         side 1  ; Write data and parity bit, wait for deskew delay
-    out null, 7     [0]         side 0  ; Assert REQ, wait for assert time
-    in null, 1      [0]         side 1  ; Deassert REQ, wait for transfer period, wait for space in ACK buffer
+    out pins, 9      [0]        side 1  ; Write data and parity bit, wait for deskew delay
+    out null, 23     [0]        side 0  ; Assert REQ, wait for assert time
+    in null, 1       [0]        side 1  ; Deassert REQ, wait for transfer period, wait for space in ACK buffer
 
 ; Data pacing state machine for synchronous writes.
 ; Takes one bit from ISR on every falling edge of ACK.
@@ -66,3 +75,30 @@ start:
     wait 1 gpio ACK
     wait 0 gpio ACK   ; Wait for falling edge on ACK
     out null, 1       ; Let scsi_sync_write send one more byte
+
+; Data pacing state machine for synchronous reads.
+; The delay times will be rewritten by C code to match the negotiated SCSI sync speed.
+; Number of bytes to receive minus one should be loaded into register X.
+; In synchronous mode this generates the REQ pulses and dummy words.
+; In asynchronous mode it just generates dummy words to feed to scsi_accel_read.
+.program scsi_sync_read_pacer
+    .side_set 1
+
+start:
+    push block      [0]      side 1  ; Send dummy word to scsi_accel_read, wait for transfer period
+    jmp x-- start   [0]      side 0  ; Assert REQ, wait for assert time
+
+finish:
+    jmp finish      [0]      side 1
+
+; Parity checker for reads from SCSI bus.
+; Receives 16-bit words from g_scsi_parity_check_lookup
+; Bottom 8 bits are the data byte, which is passed to output FIFO
+; The 9th bit is parity valid bit, which is 1 for valid and 0 for parity error.
+.program scsi_read_parity
+parity_valid:
+    out isr, 8                ; Take the 8 data bits for passing to RX fifo
+    push block                ; Push the data to RX fifo
+    out x, 24                 ; Take the parity valid bit, and the rest of 32-bit word
+    jmp x-- parity_valid      ; If parity valid bit is 1, repeat from start
+    irq set 0                 ; Parity error, set interrupt flag

+ 113 - 22
lib/ZuluSCSI_platform_RP2040/scsi_accel.pio.h

@@ -8,6 +8,36 @@
 #include "hardware/pio.h"
 #endif
 
+// ----------- //
+// scsi_parity //
+// ----------- //
+
+#define scsi_parity_wrap_target 0
+#define scsi_parity_wrap 3
+
+static const uint16_t scsi_parity_program_instructions[] = {
+            //     .wrap_target
+    0x80a0, //  0: pull   block                      
+    0x4061, //  1: in     null, 1                    
+    0x40e8, //  2: in     osr, 8                     
+    0x4037, //  3: in     x, 23                      
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_parity_program = {
+    .instructions = scsi_parity_program_instructions,
+    .length = 4,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_parity_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_parity_wrap_target, offset + scsi_parity_wrap);
+    return c;
+}
+#endif
+
 // ---------------------- //
 // scsi_accel_async_write //
 // ---------------------- //
@@ -19,7 +49,7 @@ static const uint16_t scsi_accel_async_write_program_instructions[] = {
             //     .wrap_target
     0x90e0, //  0: pull   ifempty block   side 1     
     0x7009, //  1: out    pins, 9         side 1     
-    0x7567, //  2: out    null, 7         side 1 [5] 
+    0x7577, //  2: out    null, 23        side 1 [5] 
     0x308a, //  3: wait   1 gpio, 10      side 1     
     0x200a, //  4: wait   0 gpio, 10      side 0     
             //     .wrap
@@ -40,35 +70,34 @@ static inline pio_sm_config scsi_accel_async_write_program_get_default_config(ui
 }
 #endif
 
-// --------------------- //
-// scsi_accel_async_read //
-// --------------------- //
+// --------------- //
+// scsi_accel_read //
+// --------------- //
 
-#define scsi_accel_async_read_wrap_target 0
-#define scsi_accel_async_read_wrap 6
+#define scsi_accel_read_wrap_target 0
+#define scsi_accel_read_wrap 5
 
-static const uint16_t scsi_accel_async_read_program_instructions[] = {
+static const uint16_t scsi_accel_read_program_instructions[] = {
             //     .wrap_target
     0x90a0, //  0: pull   block           side 1     
-    0xb027, //  1: mov    x, osr          side 1     
-    0x308a, //  2: wait   1 gpio, 10      side 1     
+    0x308a, //  1: wait   1 gpio, 10      side 1     
+    0x4061, //  2: in     null, 1         side 0     
     0x200a, //  3: wait   0 gpio, 10      side 0     
     0x5009, //  4: in     pins, 9         side 1     
-    0x5067, //  5: in     null, 7         side 1     
-    0x1042, //  6: jmp    x--, 2          side 1
+    0x5056, //  5: in     y, 22           side 1     
             //     .wrap
 };
 
 #if !PICO_NO_HARDWARE
-static const struct pio_program scsi_accel_async_read_program = {
-    .instructions = scsi_accel_async_read_program_instructions,
-    .length = 7,
+static const struct pio_program scsi_accel_read_program = {
+    .instructions = scsi_accel_read_program_instructions,
+    .length = 6,
     .origin = -1,
 };
 
-static inline pio_sm_config scsi_accel_async_read_program_get_default_config(uint offset) {
+static inline pio_sm_config scsi_accel_read_program_get_default_config(uint offset) {
     pio_sm_config c = pio_get_default_sm_config();
-    sm_config_set_wrap(&c, offset + scsi_accel_async_read_wrap_target, offset + scsi_accel_async_read_wrap);
+    sm_config_set_wrap(&c, offset + scsi_accel_read_wrap_target, offset + scsi_accel_read_wrap);
     sm_config_set_sideset(&c, 1, false, false);
     return c;
 }
@@ -83,9 +112,9 @@ static inline pio_sm_config scsi_accel_async_read_program_get_default_config(uin
 
 static const uint16_t scsi_sync_write_program_instructions[] = {
             //     .wrap_target
-    0x7009, //  0: out    pins, 9         side 1
-    0x6067, //  1: out    null, 7         side 0
-    0x5061, //  2: in     null, 1         side 1
+    0x7009, //  0: out    pins, 9         side 1     
+    0x6077, //  1: out    null, 23        side 0     
+    0x5061, //  2: in     null, 1         side 1     
             //     .wrap
 };
 
@@ -113,9 +142,9 @@ static inline pio_sm_config scsi_sync_write_program_get_default_config(uint offs
 
 static const uint16_t scsi_sync_write_pacer_program_instructions[] = {
             //     .wrap_target
-    0x208a, //  0: wait   1 gpio, 10
-    0x200a, //  1: wait   0 gpio, 10
-    0x6061, //  2: out    null, 1
+    0x208a, //  0: wait   1 gpio, 10                 
+    0x200a, //  1: wait   0 gpio, 10                 
+    0x6061, //  2: out    null, 1                    
             //     .wrap
 };
 
@@ -132,3 +161,65 @@ static inline pio_sm_config scsi_sync_write_pacer_program_get_default_config(uin
     return c;
 }
 #endif
+
+// -------------------- //
+// scsi_sync_read_pacer //
+// -------------------- //
+
+#define scsi_sync_read_pacer_wrap_target 0
+#define scsi_sync_read_pacer_wrap 2
+
+static const uint16_t scsi_sync_read_pacer_program_instructions[] = {
+            //     .wrap_target
+    0x9020, //  0: push   block           side 1     
+    0x0040, //  1: jmp    x--, 0          side 0     
+    0x1002, //  2: jmp    2               side 1     
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_sync_read_pacer_program = {
+    .instructions = scsi_sync_read_pacer_program_instructions,
+    .length = 3,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_sync_read_pacer_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_sync_read_pacer_wrap_target, offset + scsi_sync_read_pacer_wrap);
+    sm_config_set_sideset(&c, 1, false, false);
+    return c;
+}
+#endif
+
+// ---------------- //
+// scsi_read_parity //
+// ---------------- //
+
+#define scsi_read_parity_wrap_target 0
+#define scsi_read_parity_wrap 4
+
+static const uint16_t scsi_read_parity_program_instructions[] = {
+            //     .wrap_target
+    0x60c8, //  0: out    isr, 8                     
+    0x8020, //  1: push   block                      
+    0x6038, //  2: out    x, 24                      
+    0x0040, //  3: jmp    x--, 0                     
+    0xc000, //  4: irq    nowait 0                   
+            //     .wrap
+};
+
+#if !PICO_NO_HARDWARE
+static const struct pio_program scsi_read_parity_program = {
+    .instructions = scsi_read_parity_program_instructions,
+    .length = 5,
+    .origin = -1,
+};
+
+static inline pio_sm_config scsi_read_parity_program_get_default_config(uint offset) {
+    pio_sm_config c = pio_get_default_sm_config();
+    sm_config_set_wrap(&c, offset + scsi_read_parity_wrap_target, offset + scsi_read_parity_wrap);
+    return c;
+}
+#endif
+

File diff suppressed because it is too large
+ 523 - 314
lib/ZuluSCSI_platform_RP2040/scsi_accel_rp2040.cpp


+ 27 - 7
lib/ZuluSCSI_platform_RP2040/scsi_accel_rp2040.h

@@ -6,19 +6,39 @@
 
 void scsi_accel_rp2040_init();
 
-// Set SCSI access mode for write requests.
+// Set SCSI access mode for synchronous transfers
 // Setting syncOffset = 0 enables asynchronous SCSI.
 // Setting syncOffset > 0 enables synchronous SCSI.
-void scsi_accel_rp2040_setWriteMode(int syncOffset, int syncPeriod);
+void scsi_accel_rp2040_setSyncMode(int syncOffset, int syncPeriod);
 
+// Queue a request to write data from the buffer to SCSI bus.
+// This function typically returns immediately and the request will complete in background.
+// If there are too many queued requests, this function will block until previous request finishes.
 void scsi_accel_rp2040_startWrite(const uint8_t* data, uint32_t count, volatile int *resetFlag);
-void scsi_accel_rp2040_stopWrite(volatile int *resetFlag);
-void scsi_accel_rp2040_finishWrite(volatile int *resetFlag);
 
 // Query whether the data at pointer has already been read, i.e. buffer can be reused.
 // If data is NULL, checks if all writes have completed.
 bool scsi_accel_rp2040_isWriteFinished(const uint8_t* data);
 
-// Read data from SCSI bus.
-// Works for both asynchronous and synchronous modes.
-void scsi_accel_rp2040_read(uint8_t *buf, uint32_t count, int *parityError, volatile int *resetFlag);
+// Wait for all write requests to finish and release the bus.
+// If resetFlag is non-zero, aborts write immediately.
+void scsi_accel_rp2040_finishWrite(volatile int *resetFlag);
+
+// Queue a request to read data from SCSI bus to the buffer.
+// This function typically returns immediately and the request will complete in background.
+// If there are too many queued requests, this function will block until previous request finishes.
+void scsi_accel_rp2040_startRead(uint8_t *data, uint32_t count, int *parityError, volatile int *resetFlag);
+
+// Query whether data at address is part of a queued read request.
+// Returns true if there is no outstanding request.
+// If data is NULL, checks if all reads have completed.
+bool scsi_accel_rp2040_isReadFinished(const uint8_t* data);
+
+// Wait for a read request to complete.
+// If buf is not NULL, waits only until the data at data[0] .. data[count-1] is valid.
+// If buf is NULL, waits for all read requests to complete.
+// If there are no further read requests, releases the bus.
+// If resetFlag is non-zero, aborts read immediately.
+// If a parity error has been noticed in any buffer since starting the read, parityError is set to 1.
+void scsi_accel_rp2040_finishRead(const uint8_t *data, uint32_t count, int *parityError, volatile int *resetFlag);
+

+ 130 - 58
src/ZuluSCSI_disk.cpp

@@ -44,6 +44,17 @@ extern "C" {
 #define PLATFORM_OPTIMAL_LAST_SD_WRITE_SIZE 512
 #endif
 
+// Optimal size for read block from SCSI bus
+// For platforms with nonblocking transfer, this can be large.
+// For Akai MPC60 compatibility this has to be at least 5120
+#ifndef PLATFORM_OPTIMAL_SCSI_READ_BLOCK_SIZE
+#ifdef PLATFORM_SCSIPHY_HAS_NONBLOCKING_READ
+#define PLATFORM_OPTIMAL_SCSI_READ_BLOCK_SIZE 65536
+#else
+#define PLATFORM_OPTIMAL_SCSI_READ_BLOCK_SIZE 8192
+#endif
+#endif
+
 #ifndef PLATFORM_HAS_ROM_DRIVE
 // Dummy defines for platforms without ROM drive support
 #define AZPLATFORM_ROMDRIVE_PAGE_SIZE 1024
@@ -52,6 +63,22 @@ bool azplatform_read_romdrive(uint8_t *dest, uint32_t start, uint32_t count) { r
 bool azplatform_write_romdrive(const uint8_t *data, uint32_t start, uint32_t count) { return false; }
 #endif
 
+#ifndef PLATFORM_SCSIPHY_HAS_NONBLOCKING_READ
+// For platforms that do not have non-blocking read from SCSI bus
+void scsiStartRead(uint8_t* data, uint32_t count, int *parityError)
+{
+    scsiRead(data, count, parityError);
+}
+void scsiFinishRead(uint8_t* data, uint32_t count, int *parityError)
+{
+    
+}
+bool scsiIsReadFinished(const uint8_t *data)
+{
+    return true;
+}
+#endif
+
 // SD card sector size is always 512 bytes
 #define SD_SECTOR_SIZE 512
 
@@ -1268,8 +1295,9 @@ static struct {
     uint32_t bytes_sd; // Number of bytes that have been scheduled for transfer on SD card side
     uint32_t bytes_scsi; // Number of bytes that have been scheduled for transfer on SCSI side
 
-    uint32_t bytes_scsi_done;
+    uint32_t bytes_scsi_started;
     uint32_t sd_transfer_start;
+    int parityError;
 } g_disk_transfer;
 
 #ifdef PREFETCH_BUFFER_SIZE
@@ -1355,42 +1383,33 @@ void diskDataOut_callback(uint32_t bytes_complete)
     // For best performance, do SCSI reads in blocks of 4 or more bytes
     bytes_complete &= ~3;
 
-    if (g_disk_transfer.bytes_scsi_done < g_disk_transfer.bytes_scsi)
+    if (g_disk_transfer.bytes_scsi_started < g_disk_transfer.bytes_scsi)
     {
         // How many bytes remaining in the transfer?
-        uint32_t remain = g_disk_transfer.bytes_scsi - g_disk_transfer.bytes_scsi_done;
+        uint32_t remain = g_disk_transfer.bytes_scsi - g_disk_transfer.bytes_scsi_started;
         uint32_t len = remain;
         
-        // Limit maximum amount of data transferred at one go, to give enough callbacks to SD driver.
-        // Select the limit based on total bytes in the transfer.
-        // Transfer size is reduced towards the end of transfer to reduce the dead time between
-        // end of SCSI transfer and the SD write completing.
-        uint32_t limit = g_disk_transfer.bytes_scsi / 8;
-        uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;
-        if (limit < PLATFORM_OPTIMAL_MIN_SD_WRITE_SIZE) limit = PLATFORM_OPTIMAL_MIN_SD_WRITE_SIZE;
-        if (limit > PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE) limit = PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE;
-        if (limit > len) limit = PLATFORM_OPTIMAL_LAST_SD_WRITE_SIZE;
-        if (limit < bytesPerSector) limit = bytesPerSector;
-
-        if (len > limit)
-        {
-            len = limit;
-        }
-
         // Split read so that it doesn't wrap around buffer edge
         uint32_t bufsize = sizeof(scsiDev.data);
-        uint32_t start = (g_disk_transfer.bytes_scsi_done % bufsize);
+        uint32_t start = (g_disk_transfer.bytes_scsi_started % bufsize);
         if (start + len > bufsize)
             len = bufsize - start;
 
+        // Apply platform-specific optimized transfer sizes
+        if (len > PLATFORM_OPTIMAL_SCSI_READ_BLOCK_SIZE)
+        {
+            len = PLATFORM_OPTIMAL_SCSI_READ_BLOCK_SIZE;
+        }
+
         // Don't overwrite data that has not yet been written to SD card
         uint32_t sd_ready_cnt = g_disk_transfer.bytes_sd + bytes_complete;
-        if (g_disk_transfer.bytes_scsi_done + len > sd_ready_cnt + bufsize)
-            len = sd_ready_cnt + bufsize - g_disk_transfer.bytes_scsi_done;
+        if (g_disk_transfer.bytes_scsi_started + len > sd_ready_cnt + bufsize)
+            len = sd_ready_cnt + bufsize - g_disk_transfer.bytes_scsi_started;
 
         // Keep transfers a multiple of sector size.
         // Macintosh SCSI driver seems to get confused if we have a delay
         // in middle of a sector.
+        uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;
         if (remain >= bytesPerSector && len % bytesPerSector != 0)
         {
             len -= len % bytesPerSector;
@@ -1400,17 +1419,8 @@ void diskDataOut_callback(uint32_t bytes_complete)
             return;
 
         // azdbg("SCSI read ", (int)start, " + ", (int)len);
-        int parityError = 0;
-        scsiRead(&scsiDev.data[start], len, &parityError);
-        g_disk_transfer.bytes_scsi_done += len;
-
-        if (parityError)
-        {
-            scsiDev.status = CHECK_CONDITION;
-            scsiDev.target->sense.code = ABORTED_COMMAND;
-            scsiDev.target->sense.asc = SCSI_PARITY_ERROR;
-            scsiDev.phase = STATUS;
-        }
+        scsiStartRead(&scsiDev.data[start], len, &g_disk_transfer.parityError);
+        g_disk_transfer.bytes_scsi_started += len;
     }
 }
 
@@ -1424,46 +1434,108 @@ void diskDataOut()
     g_disk_transfer.buffer = scsiDev.data;
     g_disk_transfer.bytes_scsi = blockcount * bytesPerSector;
     g_disk_transfer.bytes_sd = 0;
-    g_disk_transfer.bytes_scsi_done = 0;
+    g_disk_transfer.bytes_scsi_started = 0;
     g_disk_transfer.sd_transfer_start = 0;
+    g_disk_transfer.parityError = 0;
 
     while (g_disk_transfer.bytes_sd < g_disk_transfer.bytes_scsi
            && scsiDev.phase == DATA_OUT
            && !scsiDev.resetFlag)
     {
-        // Read next block from SCSI bus
-        if (g_disk_transfer.bytes_sd == g_disk_transfer.bytes_scsi_done)
+        // Figure out how many contiguous bytes are available for writing to SD card.
+        uint32_t bufsize = sizeof(scsiDev.data);
+        uint32_t start = g_disk_transfer.bytes_sd % bufsize;
+        uint32_t len = 0;
+
+        // How much data until buffer edge wrap?
+        uint32_t available = g_disk_transfer.bytes_scsi_started - g_disk_transfer.bytes_sd;
+        if (start + available > bufsize)
+            available = bufsize - start;
+
+        // Count number of finished sectors
+        if (scsiIsReadFinished(&scsiDev.data[start + available - 1]))
         {
-            diskDataOut_callback(0);
+            len = available;
+        }
+        else
+        {
+            while (len < available && scsiIsReadFinished(&scsiDev.data[start + len + SD_SECTOR_SIZE - 1]))
+            {
+                len += SD_SECTOR_SIZE;
+            }
         }
 
-        // Figure out longest continuous block in buffer
-        uint32_t bufsize = sizeof(scsiDev.data);
-        uint32_t start = g_disk_transfer.bytes_sd % bufsize;
-        uint32_t len = g_disk_transfer.bytes_scsi_done - g_disk_transfer.bytes_sd;
-        if (start + len > bufsize) len = bufsize - start;
+        // In case the last sector is partial (256 byte SCSI sectors)
+        if (len > available)
+        {
+            len = available;
+        }
 
-        // Try to do writes in multiple of 512 bytes
-        // This allows better performance for SD card access.
-        if (len >= 512) len &= ~511;
+        // Apply platform-specific write size blocks for optimization
+        if (len > PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE)
+        {
+            len = PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE;
+        }
 
-        // Start writing to SD card and simultaneously reading more from SCSI bus
-        uint8_t *buf = &scsiDev.data[start];
-        g_disk_transfer.sd_transfer_start = start;
-        // azdbg("SD write ", (int)start, " + ", (int)len);
-        azplatform_set_sd_callback(&diskDataOut_callback, buf);
-        if (img.file.write(buf, len) != len)
+        uint32_t remain_in_transfer = g_disk_transfer.bytes_scsi - g_disk_transfer.bytes_sd;
+        if (len < bufsize - start && len < remain_in_transfer)
         {
-            azlog("SD card write failed: ", SD.sdErrorCode());
-            scsiDev.status = CHECK_CONDITION;
-            scsiDev.target->sense.code = MEDIUM_ERROR;
-            scsiDev.target->sense.asc = WRITE_ERROR_AUTO_REALLOCATION_FAILED;
-            scsiDev.phase = STATUS;
+            // Use large write blocks in middle of transfer and smaller at the end of transfer.
+            // This improves performance for large writes and reduces latency at end of request.
+            uint32_t min_write_size = PLATFORM_OPTIMAL_MIN_SD_WRITE_SIZE;
+            if (remain_in_transfer <= PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE)
+            {
+                min_write_size = PLATFORM_OPTIMAL_LAST_SD_WRITE_SIZE;
+            }
+
+            if (len < min_write_size)
+            {                
+                len = 0;
+            }
+        }
+
+        if (len == 0)
+        {
+            // Nothing ready to transfer, check if we can read more from SCSI bus
+            diskDataOut_callback(0);
+        }
+        else
+        {
+            // Finalize transfer on SCSI side
+            scsiFinishRead(&scsiDev.data[start], len, &g_disk_transfer.parityError);
+
+            // Check parity error status before writing to SD card
+            if (g_disk_transfer.parityError)
+            {
+                scsiDev.status = CHECK_CONDITION;
+                scsiDev.target->sense.code = ABORTED_COMMAND;
+                scsiDev.target->sense.asc = SCSI_PARITY_ERROR;
+                scsiDev.phase = STATUS;
+                break;
+            }
+
+            // Start writing to SD card and simultaneously start new SCSI transfers
+            // when buffer space is freed.
+            uint8_t *buf = &scsiDev.data[start];
+            g_disk_transfer.sd_transfer_start = start;
+            // azdbg("SD write ", (int)start, " + ", (int)len, " ", bytearray(buf, len));
+            azplatform_set_sd_callback(&diskDataOut_callback, buf);
+            if (img.file.write(buf, len) != len)
+            {
+                azlog("SD card write failed: ", SD.sdErrorCode());
+                scsiDev.status = CHECK_CONDITION;
+                scsiDev.target->sense.code = MEDIUM_ERROR;
+                scsiDev.target->sense.asc = WRITE_ERROR_AUTO_REALLOCATION_FAILED;
+                scsiDev.phase = STATUS;
+            }
+            azplatform_set_sd_callback(NULL, NULL);
+            g_disk_transfer.bytes_sd += len;
         }
-        g_disk_transfer.bytes_sd += len;
     }
 
-    azplatform_set_sd_callback(NULL, NULL);
+    // Release SCSI bus
+    scsiFinishRead(NULL, 0, &g_disk_transfer.parityError);
+
     transfer.currentBlock += blockcount;
     scsiDev.dataPtr = scsiDev.dataLen = 0;
 

+ 85 - 0
utils/speed_tester.py

@@ -0,0 +1,85 @@
+#!/usr/bin/python3
+
+'''This script executes random-sized reads and writes to one or more block devices to test them.
+It will destroy the contents of the block device.'''
+
+import sys
+import os
+import mmap
+import random
+import time
+
+class BlockDevice:
+    def __init__(self, path, sectorsize = 512):
+        self.path = path
+        self.dev = os.fdopen(os.open(path, os.O_RDWR | os.O_DIRECT | os.O_SYNC), "rb+", 0)
+        self.sectorsize = sectorsize
+
+    def write_block(self, first_sector, sector_count, seed):
+        rnd = random.Random(seed)
+        buffer = mmap.mmap(-1, sector_count * self.sectorsize)
+        buffer.write(rnd.randbytes(sector_count * self.sectorsize))
+        
+        start = time.time()
+        self.dev.seek(first_sector * self.sectorsize)
+        self.dev.write(buffer)
+        elapsed = time.time() - start
+        speed = sector_count * self.sectorsize / elapsed / 1e6
+
+        print("Wrote  %16s, %8d, %8d, %8d, %8.3f MB/s" % (self.path, first_sector, sector_count, seed, speed))
+        return speed
+
+    def verify_block(self, first_sector, sector_count, seed):
+        rnd = random.Random(seed)
+        buffer = mmap.mmap(-1, sector_count * self.sectorsize)
+
+        start = time.time()
+        self.dev.seek(first_sector * self.sectorsize)
+        self.dev.readinto(buffer)
+        elapsed = time.time() - start
+        speed = sector_count * self.sectorsize / elapsed / 1e6
+
+        print("Verify %16s, %8d, %8d, %8d, %8.3f MB/s" % (self.path, first_sector, sector_count, seed, speed))
+
+        buffer.seek(0)
+        actual = buffer.read(sector_count * self.sectorsize)
+        expected = rnd.randbytes(sector_count * self.sectorsize)
+        if expected != actual:
+            print("Compare error, device = %s, sectorsize = %d, first_sector = %d, sector_count = %d, seed = %d"
+                % (self.path, self.sectorsize, first_sector, sector_count, seed))
+            fname = "%d" % time.time()
+            open(fname + ".expected", "wb").write(expected)
+            open(fname + ".actual", "wb").write(actual)
+            print("Saved data to %s.expected/actual" % fname)
+            raise Exception("Compare error")
+        
+        return speed
+
+if __name__ == "__main__":
+    blockdev = BlockDevice(sys.argv[1])
+    
+    seed = 1
+    
+    results = '# ReqSize(B)  RdSpeed(MB/s)  WrSpeed(MB/s)\n'
+    for i in range(12):
+        seed += 1
+        seccount = 2**i
+        wr_speeds = []
+        rd_speeds = []
+        samplecount = 8
+        for i in range(samplecount):
+            wr_speeds.append(blockdev.write_block(0, seccount, seed))
+            time.sleep(0.2)
+            rd_speeds.append(blockdev.verify_block(0, seccount, seed))
+            time.sleep(0.2)
+
+        # Get median
+        wr_speeds.sort()
+        rd_speeds.sort()
+        wr_speed = wr_speeds[samplecount//2]
+        rd_speed = rd_speeds[samplecount//2]
+        
+        results += '%8d %8.3f %8.3f\n' % (seccount * 512, rd_speed, wr_speed)
+    
+    print(results)
+

Some files were not shown because too many files changed in this diff