Просмотр исходного кода

Merge remote-tracking branch 'private/main' into pullreq_performance

Petteri Aimonen 3 лет назад
Родитель
Сommit
85413a4b1f

+ 1 - 0
azulscsi.ini

@@ -12,6 +12,7 @@ EnableUnitAttention = 0 # Post UNIT_ATTENTION status on power-on or SD card hotp
 EnableSCSI2 = 1 # Enable faster speeds of SCSI2
 EnableSelLatch = 0 # For Philips P2000C and other devices that release SEL signal before BSY
 MapLunsToIDs = 0 # For Philips P2000C simulate multiple LUNs
+MaxSyncSpeed = 10 # Set to 5 or 10 to enable synchronous SCSI mode, 0 to disable
 
 # Settings that can be specified either per-device or for all devices.
 #Vendor = "QUANTUM"

+ 42 - 32
greenpak/SCSI_Accelerator_SLG46824.gp6

@@ -1,9 +1,9 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<GPDProject version="22" oldestCompatibleVersion="22" GPDVersion="6.27.001" lastChange="3.3.2022 14.33">
+<GPDProject version="22" oldestCompatibleVersion="22" GPDVersion="6.27.001" lastChange="11.4.2022 12.26">
     <generalProjectSettings/>
     <chip family="04" type="06" friendlyName="GreenPAK 6" partNumber="40" package="16">
-        <nvmData registerLenght="2048">0 0 0 0 0 0 8D 30 49 C2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 D2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 30 70 0 30 30 30 30 0 0 30 30 30 0 30 30 30 30 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 22 30 C 0 0 0 0 0 0 0 0 0 0 0 D7 0 0 0 0 0 40 0 0 0 0 0 0 0 0 20 0 1 0 0 0 2 1 0 0 2 0 1 0 0 2 1 0 0 2 0 1 0 0 2 1 0 0 2 0 1 0 0 2 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 A5</nvmData>
-        <checksum crc32="0xE41C926D" version="5"/>
+        <nvmData registerLenght="2048">C4 9 0 0 0 0 0 0 38 C2 0 0 0 0 0 0 0 0 0 0 0 0 D0 8 13 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 E7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 30 70 0 30 20 30 30 0 0 30 30 30 0 30 30 30 30 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 22 30 C 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 40 0 0 0 0 0 0 D7 14 20 0 1 0 0 0 2 1 0 0 2 0 1 0 0 2 1 0 0 2 0 1 0 0 2 1 0 0 2 0 1 0 0 2 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 A5</nvmData>
+        <checksum crc32="0xB44E9FD4" version="5"/>
         <VDDItem id="0">
             <item id="0" caption="VDD (PIN 20)">
                 <graphics pos="(-590.00,30.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
@@ -17,20 +17,20 @@
         </IOPad>
         <IOPad id="2" useCaseMode="1">
             <item id="2" caption="PIN 18 (IO1)">
-                <graphics pos="(-171.00,277.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
+                <graphics pos="(-248.00,278.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
                 <textLabel pos="(-45.56,-27.00)" angle="0.00" textColor="#ffff00ff" backgroundColor="#14141464">PLD_IO1 / DATA_TOGGLE</textLabel>
             </item>
         </IOPad>
         <IOPad id="3" useCaseMode="1">
             <item id="3" caption="PIN 17 (IO2)">
-                <graphics pos="(-174.00,502.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
+                <graphics pos="(-251.00,504.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
                 <textLabel pos="(-29.23,-27.00)" angle="0.00" textColor="#ffff00ff" backgroundColor="#14141464">PLD_IO2 / ENABLE</textLabel>
             </item>
         </IOPad>
-        <IOPad id="4" useCaseMode="0">
+        <IOPad id="4" useCaseMode="1">
             <item id="4" caption="PIN 16 (IO3)">
-                <graphics pos="(-590.00,310.00)" angle="0" flipping="1" hidden="0" tOrigin="(20.00,10.00)"/>
-                <textLabel pos="(-4.71,-27.00)" angle="0.00" textColor="#ffff00ff" backgroundColor="#14141464">PLD_IO3</textLabel>
+                <graphics pos="(-249.00,197.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
+                <textLabel pos="(-25.28,-27.00)" angle="0.00" textColor="#ffff00ff" backgroundColor="#14141464">PLD_IO3 / RDWR</textLabel>
             </item>
         </IOPad>
         <IOPad id="5" useCaseMode="0">
@@ -102,13 +102,13 @@
         </IOPad>
         <IOPad id="18" useCaseMode="4">
             <item id="18" caption="PIN 2 (IO13)">
-                <graphics pos="(314.00,320.00)" angle="0" flipping="1" hidden="0" tOrigin="(20.00,10.00)"/>
+                <graphics pos="(411.00,320.00)" angle="0" flipping="1" hidden="0" tOrigin="(20.00,10.00)"/>
                 <textLabel pos="(-7.69,-27.00)" angle="0.00" textColor="#ffff00ff" backgroundColor="#14141464">OUT_REQ</textLabel>
             </item>
         </IOPad>
         <IOPad id="19" useCaseMode="1">
             <item id="19" caption="PIN 1 (IO14)">
-                <graphics pos="(-175.00,390.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
+                <graphics pos="(-250.00,387.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
                 <textLabel pos="(-6.55,-27.00)" angle="0.00" textColor="#ffff00ff" backgroundColor="#14141464">ACK_BUF</textLabel>
             </item>
         </IOPad>
@@ -147,7 +147,7 @@
         </item>
         <LUT id="35" regularShape="0" mode="0">
             <item id="35" caption="2-bit LUT0/DFF/LATCH0">
-                <graphics pos="(17.00,28.00)" angle="90" flipping="0" hidden="1" tOrigin="(20.00,15.00)"/>
+                <graphics pos="(-88.00,177.00)" angle="90" flipping="0" hidden="0" tOrigin="(20.00,15.00)"/>
             </item>
         </LUT>
         <LUT id="36" regularShape="0" mode="0">
@@ -167,8 +167,7 @@
         </LUT>
         <LUT id="39" regularShape="0" mode="0">
             <item id="39" caption="3-bit LUT0/DFF/LATCH3">
-                <graphics pos="(130.00,310.00)" angle="90" flipping="1" hidden="0" tOrigin="(20.00,15.00)"/>
-                <textLabel pos="(-28.00,68.36)" angle="-90.00" textColor="#ffff00ff" backgroundColor="#14141464">!(IN0 &amp; (IN1 ^ IN2))</textLabel>
+                <graphics pos="(130.00,310.00)" angle="90" flipping="1" hidden="1" tOrigin="(20.00,15.00)"/>
             </item>
         </LUT>
         <item id="40" caption="3-bit LUT1/DFF/LATCH4">
@@ -202,7 +201,7 @@ DATA_TOGGLE</textLabel>
             </item>
         </LUT>
         <item id="46" caption="MF0 (4-bit LUT0, DFF/LATCH9, 16-bit CNT0/DLY0/FSM0)">
-            <graphics pos="(-500.00,-20.00)" angle="0" flipping="0" hidden="1" tOrigin="(155.00,60.00)"/>
+            <graphics pos="(-399.00,35.00)" angle="0" flipping="0" hidden="1" tOrigin="(155.00,60.00)"/>
         </item>
         <item id="47" caption="MF1 (3-bit LUT7, DFF/LATCH10, 8-bit CNT1/DLY1)">
             <graphics pos="(-500.00,140.00)" angle="0" flipping="0" hidden="1" tOrigin="(155.00,60.00)"/>
@@ -227,15 +226,17 @@ DATA_TOGGLE</textLabel>
         </item>
         <LUT id="54" regularShape="0" mode="0">
             <item id="54" caption="4-bit LUT0 (MF0)">
-                <graphics pos="(-365.00,25.00)" angle="90" flipping="0" hidden="1" tOrigin="(20.00,15.00)"/>
+                <graphics pos="(136.00,310.00)" angle="90" flipping="0" hidden="0" tOrigin="(20.00,15.00)"/>
+                <textLabel pos="(46.00,52.19)" angle="-90.00" textColor="#ffff00ff" backgroundColor="#14141464">IN3 = 0 (Write to SCSI bus): !(IN0 &amp; (IN1 ^ IN2))
+IN3 = 1 (Read from SCSI bus): (!IN0) &amp; (IN1 ^ IN2)</textLabel>
             </item>
         </LUT>
         <item id="55" caption="DFF/LATCH9 (MF0)">
-            <graphics pos="(-450.00,-20.00)" angle="0" flipping="0" hidden="1" tOrigin="(25.00,10.00)"/>
+            <graphics pos="(-357.50,94.50)" angle="0" flipping="0" hidden="1" tOrigin="(25.00,10.00)"/>
         </item>
         <CNTDLY id="56" externalClockFrequence="0" externalClockFreqUnit="kHz">
             <item id="56" caption="16-bit CNT0/DLY0/FSM0 (MF0)">
-                <graphics pos="(-450.00,-20.00)" angle="0" flipping="0" hidden="1" tOrigin="(35.00,16.00)"/>
+                <graphics pos="(-208.50,87.50)" angle="0" flipping="0" hidden="1" tOrigin="(35.00,16.00)"/>
             </item>
         </CNTDLY>
         <LUT id="57" regularShape="0" mode="0">
@@ -335,29 +336,38 @@ DATA_TOGGLE</textLabel>
         <wire output="148" input="112" autoRouting="1" pen="#ff8000ff;1.00;1;32;128" lineType="2" protected="1" CWLid="1" wireText="NET1" wireState="0">
             <points>(-702.00,712.00); (-695.00,712.00)</points>
         </wire>
-        <wire output="142" input="549" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="8" wireText="NET8" wireState="0">
-            <points>(-109.00,293.00); (-31.00,293.00); (-31.00,326.00); (114.00,326.00)</points>
-        </wire>
-        <wire output="161" input="551" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="9" wireText="NET9" wireState="0">
-            <points>(185.00,326.00); (191.00,326.00); (191.00,227.00); (-8.00,227.00); (-8.00,192.00); (-2.00,192.00)</points>
-        </wire>
         <wire output="143" input="537" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="10" wireText="NET10" wireState="0">
-            <points>(-112.00,518.00); (335.00,518.00); (335.00,360.00)</points>
+            <points>(-189.00,520.00); (432.00,520.00); (432.00,360.00)</points>
         </wire>
         <wire output="143" input="553" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="10" wireText="NET10" wireState="0">
-            <points>(-112.00,518.00); (45.00,518.00); (45.00,216.00)</points>
+            <points>(-189.00,520.00); (45.00,520.00); (45.00,216.00)</points>
         </wire>
         <wire output="142" input="552" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="8" wireText="NET8" wireState="0">
-            <points>(-109.00,293.00); (-31.00,293.00); (-31.00,182.00); (-2.00,182.00)</points>
+            <points>(-186.00,294.00); (-13.00,294.00); (-13.00,182.00); (-2.00,182.00)</points>
+        </wire>
+        <wire output="157" input="621" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="7" wireText="NET7" wireState="0">
+            <points>(-188.00,403.00); (105.00,403.00); (105.00,341.00); (120.00,341.00)</points>
+        </wire>
+        <wire output="142" input="622" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="8" wireText="NET8" wireState="0">
+            <points>(-186.00,294.00); (105.00,294.00); (105.00,331.00); (120.00,331.00)</points>
+        </wire>
+        <wire output="162" input="623" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="11" wireText="NET11" wireState="0">
+            <points>(89.00,186.00); (114.00,186.00); (114.00,321.00); (120.00,321.00)</points>
+        </wire>
+        <wire output="191" input="536" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="9" wireText="NET9" wireState="0">
+            <points>(191.00,326.00); (388.00,326.00)</points>
+        </wire>
+        <wire output="144" input="624" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="12" wireText="NET12" wireState="0">
+            <points>(-187.00,213.00); (-127.00,213.00); (-127.00,311.00); (120.00,311.00)</points>
         </wire>
-        <wire output="157" input="548" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="7" wireText="NET7" wireState="0">
-            <points>(-113.00,406.00); (93.00,406.00); (93.00,339.00); (114.00,339.00)</points>
+        <wire output="84" input="551" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="13" wireText="NET13" wireState="0">
+            <points>(-30.00,192.00); (-2.00,192.00)</points>
         </wire>
-        <wire output="162" input="550" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="11" wireText="NET11" wireState="0">
-            <points>(89.00,186.00); (95.00,186.00); (95.00,312.00); (114.00,312.00)</points>
+        <wire output="191" input="262" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="9" wireText="NET9" wireState="0">
+            <points>(191.00,326.00); (213.00,326.00); (213.00,113.00); (-123.00,113.00); (-123.00,184.00); (-107.00,184.00)</points>
         </wire>
-        <wire output="161" input="536" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="9" wireText="NET9" wireState="0">
-            <points>(185.00,326.00); (291.00,326.00)</points>
+        <wire output="144" input="261" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="12" wireText="NET12" wireState="0">
+            <points>(-187.00,213.00); (-126.00,213.00); (-126.00,200.00); (-107.00,200.00)</points>
         </wire>
     </chip>
     <emulatorConfiguration version="1">

+ 6 - 6
greenpak/SCSI_Accelerator_SLG46824.hex

@@ -1,14 +1,14 @@
-:100000000000000000008D3049C200000000000028
-:1000100000000000000000000000000000000000E0
+:10000000C40900000000000038C200000000000029
+:10001000000000000000D0081301000000000000F4
 :1000200000000000000000000000000000000000D0
 :1000300000000000000000000000000000000000C0
-:100040000000D200000000000000000000000000DE
+:100040000000E700000000000000000000000000C9
 :1000500000000000000000000000000000000000A0
-:100060000030700030303030000030303000303040
+:100060000030700030203030000030303000303050
 :100070003030000000000000000000000000000020
 :1000800000000000001422300C00000000000000FE
-:1000900000000000D7000000000040000000000049
-:1000A0000000002000010000000201000002000129
+:10009000060000000000000000004000000000001A
+:1000A00000D714200001000000020100000200013E
 :1000B0000000020100000200010000020100000235
 :1000C0000001000002000100000001010100000029
 :1000D0000000000000000000000000000000000020

BIN
greenpak/SCSI_Accelerator_SLG46824.png


+ 4 - 1
lib/AzulSCSI_platform_GD32F205/AzulSCSI_platform.h

@@ -22,7 +22,10 @@ extern const char *g_azplatform_name;
 #elif defined(AZULSCSI_V1_1)
 #   define PLATFORM_NAME "AzulSCSI v1.1"
 #   define PLATFORM_REVISION "1.1"
-#   define PLATFORM_MAX_SCSI_SPEED S2S_CFG_SPEED_ASYNC_50
+#   define PLATFORM_MAX_SCSI_SPEED S2S_CFG_SPEED_SYNC_10
+#   define PLATFORM_OPTIMAL_MIN_SD_WRITE_SIZE 4096
+#   define PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE 65536
+#   define PLATFORM_OPTIMAL_LAST_SD_WRITE_SIZE 8192
 #   include "AzulSCSI_v1_1_gpio.h"
 #endif
 

+ 13 - 0
lib/AzulSCSI_platform_GD32F205/AzulSCSI_v1_1_gpio.h

@@ -95,6 +95,19 @@
 #define SCSI_ATN_PIN  GPIO_PIN_12
 #define SCSI_IN_ACK_IDX 0
 
+// Extra signals used with EXMC for synchronous mode
+#define SCSI_IN_ACK_EXMC_NWAIT_PORT GPIOD
+#define SCSI_IN_ACK_EXMC_NWAIT_PIN  GPIO_PIN_6
+#define SCSI_OUT_REQ_EXMC_NOE_PORT  GPIOD
+#define SCSI_OUT_REQ_EXMC_NOE_PIN   GPIO_PIN_4
+#define SCSI_OUT_REQ_EXMC_NOE_IDX   4
+#define SCSI_EXMC_DATA_SHIFT 5
+#define SCSI_EXMC_DMA DMA0
+#define SCSI_EXMC_DMA_RCU RCU_DMA0
+#define SCSI_EXMC_DMACH DMA_CH0
+#define SCSI_SYNC_TIMER TIMER1
+#define SCSI_SYNC_TIMER_RCU RCU_TIMER1
+
 // BSY pin uses EXTI interrupt
 #define SCSI_BSY_PORT GPIOB
 #define SCSI_BSY_PIN  GPIO_PIN_10

+ 1 - 1
lib/AzulSCSI_platform_GD32F205/greenpak.cpp

@@ -34,7 +34,7 @@ static void greenpak_gpio_init()
     gpio_init(GREENPAK_I2C_PORT, GPIO_MODE_OUT_PP, GPIO_OSPEED_2MHZ, GREENPAK_I2C_SCL);
 
     // Data bits used for communication
-    uint32_t greenpak_io = GREENPAK_PLD_IO1 | GREENPAK_PLD_IO2;
+    uint32_t greenpak_io = GREENPAK_PLD_IO1 | GREENPAK_PLD_IO2 | GREENPAK_PLD_IO3;
     gpio_bit_reset(SCSI_OUT_PORT, greenpak_io);
     gpio_init(SCSI_OUT_PORT, GPIO_MODE_OUT_PP, GPIO_OSPEED_50MHZ, greenpak_io);
 }

+ 7 - 7
lib/AzulSCSI_platform_GD32F205/greenpak_fw.h

@@ -1,18 +1,18 @@
 const uint8_t g_greenpak_fw[] = {
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8d, 0x30, 0x49, 0xc2, 0x00, 0x00,
+  0xc4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0xc2, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0, 0x08,
+  0x13, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe7, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd2, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x30, 0x70, 0x00, 0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x30, 0x30,
+  0x00, 0x30, 0x70, 0x00, 0x30, 0x20, 0x30, 0x30, 0x00, 0x00, 0x30, 0x30,
   0x30, 0x00, 0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x14, 0x22, 0x30, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xd7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x01, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0xd7, 0x14, 0x20, 0x00, 0x01, 0x00, 0x00,
   0x00, 0x02, 0x01, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x02, 0x01,
   0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x02,
   0x00, 0x01, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01,

+ 61 - 27
lib/AzulSCSI_platform_GD32F205/scsiPhy.cpp

@@ -6,6 +6,7 @@
 #include "scsi_accel_asm.h"
 #include "scsi_accel_dma.h"
 #include "scsi_accel_greenpak.h"
+#include "scsi_accel_sync.h"
 #include "AzulSCSI_log.h"
 #include "AzulSCSI_log_trace.h"
 #include "AzulSCSI_config.h"
@@ -29,6 +30,13 @@ static const char *g_scsi_phy_mode_names[] = {
     "Unknown", "PIO", "DMA_TIMER", "GREENPAK_PIO", "GREENPAK_DMA"
 };
 
+// State of polling write request
+static struct {
+    const uint8_t *data;
+    uint32_t count;
+    bool use_sync_mode;
+} g_scsi_writereq;
+
 static void init_irqs();
 
 /***********************/
@@ -169,8 +177,13 @@ extern "C" void scsiPhyReset(void)
 
     g_scsi_sts_selection = 0;
     g_scsi_ctrl_bsy = 0;
+    g_scsi_writereq.count = 0;
     init_irqs();
 
+#ifdef SCSI_SYNC_MODE_AVAILABLE
+    scsi_accel_sync_init();
+#endif
+
     selectPhyMode();
 
     if (g_scsi_phy_mode == PHY_MODE_DMA_TIMER)
@@ -295,16 +308,15 @@ extern "C" void scsiWrite(const uint8_t* data, uint32_t count)
     scsiFinishWrite();
 }
 
-static struct {
-    const uint8_t *data;
-    uint32_t count;
-} g_scsi_writereq;
-
 extern "C" void scsiStartWrite(const uint8_t* data, uint32_t count)
 {
     scsiLogDataIn(data, count);
 
-    if (g_scsi_phy_mode == PHY_MODE_PIO || g_scsi_phy_mode == PHY_MODE_GREENPAK_PIO)
+    g_scsi_writereq.use_sync_mode = (g_scsi_phase == DATA_IN && scsiDev.target->syncOffset > 0);
+
+    if (g_scsi_phy_mode == PHY_MODE_PIO
+        || g_scsi_phy_mode == PHY_MODE_GREENPAK_PIO
+        || g_scsi_writereq.use_sync_mode)
     {
         // Software based bit-banging.
         // Write requests are queued and then executed in isWriteFinished() callback.
@@ -346,20 +358,28 @@ static void processPollingWrite(uint32_t count)
     
     const uint8_t *data = g_scsi_writereq.data;
     uint32_t count_words = count / 4;
-    if (count_words * 4 == count)
+
+    if (g_scsi_writereq.use_sync_mode)
+    {
+        // Synchronous mode transfer
+        scsi_accel_sync_send(data, count, &scsiDev.resetFlag);
+    }
+    else if (count_words * 4 == count)
     {
-        // Use accelerated subroutine
         if (g_scsi_phy_mode == PHY_MODE_GREENPAK_PIO)
         {
+            // GreenPAK PIO accelerated asynchronous transfer
             scsi_accel_greenpak_send((const uint32_t*)data, count_words, &scsiDev.resetFlag);
         }
         else
         {
+            // Assembler optimized asynchronous transfer
             scsi_accel_asm_send((const uint32_t*)data, count_words, &scsiDev.resetFlag);
         }
     }
     else
     {
+        // Use simple loop for unaligned transfers
         for (uint32_t i = 0; i < count; i++)
         {
             if (scsiDev.resetFlag) break;
@@ -397,38 +417,39 @@ static bool isPollingWriteFinished(const uint8_t *data)
 
 extern "C" bool scsiIsWriteFinished(const uint8_t *data)
 {
+    // Check if there is still a polling transfer in progress
+    if (!isPollingWriteFinished(data))
+    {
+        // Process the transfer piece-by-piece while waiting
+        // for SD card to react.
+        int max_count = g_scsi_writereq.count / 8;
+        max_count &= ~255;
+        if (max_count < 256) max_count = 256;
+        processPollingWrite(max_count);
+        return isPollingWriteFinished(data);
+    }
+    
     if (g_scsi_phy_mode == PHY_MODE_DMA_TIMER || g_scsi_phy_mode == PHY_MODE_GREENPAK_DMA)
     {
         return scsi_accel_dma_isWriteFinished(data);
     }
     else
     {
-        // Check if there is still a polling transfer in progress
-        if (!isPollingWriteFinished(data))
-        {
-            // Process the transfer piece-by-piece while waiting
-            // for SD card to react.
-            processPollingWrite(256);
-            return isPollingWriteFinished(data);
-        }
-
         return true;
     }
 }
 
 extern "C" void scsiFinishWrite()
 {
-    if (g_scsi_phy_mode == PHY_MODE_DMA_TIMER || g_scsi_phy_mode == PHY_MODE_GREENPAK_DMA)
+    if (g_scsi_writereq.count)
     {
-        scsi_accel_dma_finishWrite(&scsiDev.resetFlag);
+        // Finish previously started polling write request.
+        processPollingWrite(g_scsi_writereq.count);
     }
-    else
+
+    if (g_scsi_phy_mode == PHY_MODE_DMA_TIMER || g_scsi_phy_mode == PHY_MODE_GREENPAK_DMA)
     {
-        // Finish previously started polling write request.
-        if (g_scsi_writereq.count)
-        {
-            processPollingWrite(g_scsi_writereq.count);
-        }
+        scsi_accel_dma_finishWrite(&scsiDev.resetFlag);
     }
 }
 
@@ -460,13 +481,26 @@ extern "C" void scsiRead(uint8_t* data, uint32_t count, int* parityError)
     *parityError = 0;
 
     uint32_t count_words = count / 4;
-    if (count_words * 4 == count)
+    bool use_greenpak = (g_scsi_phy_mode == PHY_MODE_GREENPAK_DMA || g_scsi_phy_mode == PHY_MODE_GREENPAK_PIO);
+
+    if (g_scsi_phase == DATA_OUT && scsiDev.target->syncOffset > 0)
+    {
+        // Synchronous data transfer
+        scsi_accel_sync_recv(data, count, parityError, &scsiDev.resetFlag);
+    }
+    else if (count_words * 4 == count && count_words >= 2 && use_greenpak)
+    {
+        // GreenPAK accelerated receive can handle a multiple of 4 bytes with minimum of 8 bytes.
+        scsi_accel_greenpak_recv((uint32_t*)data, count_words, &scsiDev.resetFlag);
+    }
+    else if (count_words * 4 == count && count_words >= 1)
     {
-        // Use accelerated subroutine
+        // Optimized ASM subroutine can handle multiple of 4 bytes with minimum of 4 bytes.
         scsi_accel_asm_recv((uint32_t*)data, count_words, &scsiDev.resetFlag);
     }
     else
     {
+        // Use a simple loop for short and unaligned transfers
         for (uint32_t i = 0; i < count; i++)
         {
             if (scsiDev.resetFlag) break;

+ 0 - 3
lib/AzulSCSI_platform_GD32F205/scsiPhy.h

@@ -42,9 +42,6 @@ uint32_t scsiEnterPhaseImmediate(int phase);
 // Release all signals
 void scsiEnterBusFree(void);
 
-//void scsiSetDataCount(uint32_t count);
-//int scsiFifoReady(void);
-
 // Blocking data transfer
 void scsiWrite(const uint8_t* data, uint32_t count);
 void scsiRead(uint8_t* data, uint32_t count, int* parityError);

+ 174 - 0
lib/AzulSCSI_platform_GD32F205/scsi_accel_greenpak.cpp

@@ -10,8 +10,17 @@ void scsi_accel_greenpak_send(const uint32_t *buf, uint32_t num_words, volatile
     assert(false);
 }
 
+void scsi_accel_greenpak_recv(uint32_t *buf, uint32_t num_words, volatile int *resetFlag)
+{
+    assert(false);
+}
+
 #else
 
+/*********************************************************/
+/* Optimized writes to SCSI bus in GREENPAK_PIO mode     */
+/*********************************************************/
+
 extern const uint32_t g_scsi_out_byte_to_bop_pld1hi[256];
 extern const uint32_t g_scsi_out_byte_to_bop_pld1lo[256];
 
@@ -188,4 +197,169 @@ const uint32_t g_scsi_out_byte_to_bop_pld1lo[256] =
 
 #undef X
 
+/*********************************************************/
+/* Optimized reads from SCSI bus in GREENPAK_PIO mode    */
+/*********************************************************/
+
+// Wait for ACK to go high and back low.
+// This indicates that there is a byte ready to be read
+// If interrupt occurs in middle, we may miss ACK going high.
+// In that case, verify that REQ is already low.
+#define ASM_WAIT_ACK(x) \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbnz    %[tmp2], ack_is_high_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbnz    %[tmp2], ack_is_high_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbnz    %[tmp2], ack_is_high_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbnz    %[tmp2], ack_is_high_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbnz    %[tmp2], ack_is_high_" x "_%= \n" \
+"    wait_req_low_" x "_%=: \n" \
+"        cpsid   i \n" \
+"        ldr     %[tmp2], [%[req_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_high_" x "_%= \n" \
+"        ldr     %[tmp2], [%[reset_flag]] \n" \
+"        cbnz    %[tmp2], ack_is_high_" x "_%= \n" \
+"        cpsie   i \n" \
+"        b.n     wait_req_low_" x "_%= \n" \
+"    ack_is_high_" x "_%=: \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"    wait_ack_low_" x "_%=: \n" \
+"        cpsid   i \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"        ldr     %[tmp2], [%[reset_flag]] \n" \
+"        cbnz    %[tmp2], ack_is_low_" x "_%= \n" \
+"        cpsie   i \n" \
+"        b.n     wait_ack_low_" x "_%= \n" \
+"    ack_is_low_" x "_%=: \n"
+
+// Prepare for reception of data by loading the next PLD_IO1 value
+// and disabling interrupts.
+#define ASM_PREP_RECV(l) \
+"        mov    %[tmp1], %[" l "] \n" \
+"        cpsid  i \n"
+
+// Read GPIO bus, take the data byte and toggle PLD_IO1.
+// Note that the PLD_IO1 write is done first to reduce latency, but
+// the istat value that is read by next instruction is still the old
+// one due to IO port delays. Interrupts must be disabled for this
+// sequence to work correctly.
+//
+// d is the name of register where data is to be stored
+// b is the bit offset to store the byte at
+// x is unique label
+#define ASM_RECV_DATA(d, b, x) \
+"    read_data_" x "_%=: \n" \
+"        str    %[tmp1], [%[out_port_bop]] \n" \
+"        ldr    %[tmp1], [%[in_port_istat]] \n" \
+"        ubfx   %[tmp1], %[tmp1], %[data_in_shift], #8 \n" \
+"        bfi    %[" d "], %[tmp1], #" b ", #8 \n" \
+"        cpsie  i \n"
+
+// Read bytes from SCSI bus using asynchronous handshake mechanism
+// Takes 4 bytes at a time.
+void scsi_accel_greenpak_recv(uint32_t *buf, uint32_t num_words, volatile int *resetFlag)
+{
+    volatile uint32_t *out_port_bop = (volatile uint32_t*)&GPIO_BOP(SCSI_OUT_PORT);
+    volatile uint32_t *in_port_istat = (volatile uint32_t*)&GPIO_ISTAT(SCSI_IN_PORT);
+    uint32_t ack_pin_bb = PERIPH_BB_BASE + (((uint32_t)&GPIO_ISTAT(SCSI_ACK_PORT)) - APB1_BUS_BASE) * 32 + SCSI_IN_ACK_IDX * 4;
+    uint32_t req_pin_bb = PERIPH_BB_BASE + (((uint32_t)&GPIO_ISTAT(SCSI_OUT_PORT)) - APB1_BUS_BASE) * 32 + SCSI_OUT_REQ_IDX * 4;
+    register uint32_t tmp1 = 0;
+    register uint32_t tmp2 = 0;
+    register uint32_t data = 0;
+
+    // Last word requires special handling so that hardware doesn't issue new REQ pulse.
+    assert(num_words >= 2);
+    num_words -= 1;
+
+    // Set PLD_IO3 high to enable read from SCSI bus
+    GPIO_BOP(SCSI_OUT_PORT) = GREENPAK_PLD_IO3;
+
+    // Make sure that the previous access has fully completed.
+    // E.g. Macintosh can hold ACK low for long time after last byte of block.
+    while (SCSI_IN(ACK) && !*resetFlag);
+
+    // Set REQ pin as input and PLD_IO2 high to enable logic
+    GPIO_BC(SCSI_OUT_PORT) = GREENPAK_PLD_IO1;
+    gpio_init(SCSI_OUT_PORT, GPIO_MODE_IPU, 0, SCSI_OUT_REQ);
+    GPIO_BOP(SCSI_OUT_PORT) = GREENPAK_PLD_IO2;
+
+    asm volatile (
+    "inner_loop_%=: \n"
+        ASM_PREP_RECV("pld1_hi")
+        ASM_WAIT_ACK("0")
+        ASM_RECV_DATA("data", "0", "0")
+        
+        ASM_PREP_RECV("pld1_lo")
+        ASM_WAIT_ACK("8")
+        ASM_RECV_DATA("data", "8", "8")
+        
+        ASM_PREP_RECV("pld1_hi")
+        ASM_WAIT_ACK("16")
+        ASM_RECV_DATA("data", "16", "16")
+        
+        ASM_PREP_RECV("pld1_lo")
+        ASM_WAIT_ACK("24")
+        ASM_RECV_DATA("data", "24", "24")
+
+    "   mvn      %[data], %[data] \n"
+    "   str      %[data], [%[buf]], #4 \n"
+    "   subs     %[num_words], %[num_words], #1 \n"
+    "   bne     inner_loop_%= \n"
+
+    // Process last word separately to avoid issuing extra REQ pulse at end.
+    "recv_last_word_%=: \n"
+        ASM_PREP_RECV("pld1_hi")
+        ASM_WAIT_ACK("0b")
+        ASM_RECV_DATA("data", "0", "0b")
+        
+        ASM_PREP_RECV("pld1_lo")
+        ASM_WAIT_ACK("8b")
+        ASM_RECV_DATA("data", "8", "8b")
+        
+        ASM_PREP_RECV("pld1_hi")
+        ASM_WAIT_ACK("16b")
+        ASM_RECV_DATA("data", "16", "16b")
+        
+        ASM_PREP_RECV("pld1_hi")
+        ASM_WAIT_ACK("24b")
+        ASM_RECV_DATA("data", "24", "24b")
+
+    "   mvn      %[data], %[data] \n"
+    "   str      %[data], [%[buf]], #4 \n"
+
+    : /* Output */ [tmp1] "+l" (tmp1), [tmp2] "+l" (tmp2), [data] "+r" (data),
+                   [buf] "+r" (buf), [num_words] "+r" (num_words)
+    : /* Input */ [ack_pin_bb] "r" (ack_pin_bb),
+                  [req_pin_bb] "r" (req_pin_bb),
+                  [out_port_bop] "r"(out_port_bop),
+                  [in_port_istat] "r" (in_port_istat),
+                  [reset_flag] "r" (resetFlag),
+                  [data_in_shift] "I" (SCSI_IN_SHIFT),
+                  [pld1_lo] "I" (SCSI_OUT_PLD1 << 16),
+                  [pld1_hi] "I" (SCSI_OUT_PLD1)
+    : /* Clobber */ );
+
+    SCSI_RELEASE_DATA_REQ();
+
+    // Disable external logic and set REQ pin as output
+    GPIO_BC(SCSI_OUT_PORT) = GREENPAK_PLD_IO2;
+    gpio_init(SCSI_OUT_PORT, GPIO_MODE_OUT_PP, GPIO_OSPEED_50MHZ, SCSI_OUT_REQ);
+    GPIO_BC(SCSI_OUT_PORT) = GREENPAK_PLD_IO3;
+}
+
 #endif

+ 2 - 1
lib/AzulSCSI_platform_GD32F205/scsi_accel_greenpak.h

@@ -5,4 +5,5 @@
 #include <stdint.h>
 #include "greenpak.h"
 
-void scsi_accel_greenpak_send(const uint32_t *buf, uint32_t num_words, volatile int *resetFlag);
+void scsi_accel_greenpak_send(const uint32_t *buf, uint32_t num_words, volatile int *resetFlag);
+void scsi_accel_greenpak_recv(uint32_t *buf, uint32_t num_words, volatile int *resetFlag);

+ 475 - 0
lib/AzulSCSI_platform_GD32F205/scsi_accel_sync.cpp

@@ -0,0 +1,475 @@
+/* Synchronous mode SCSI implementation.
+ *
+ * In synchronous mode, the handshake mechanism is not used. Instead
+ * either end of the communication will just send a bunch of bytes
+ * and only afterwards checks that the number of acknowledgement
+ * pulses matches.
+ * 
+ * The receiving end should latch in the data at the falling edge of
+ * the request pulse (on either REQ or ACK pin). We use the GD32 EXMC
+ * peripheral to implement this latching with the NWAIT pin when
+ * reading data from the host. NOE is used to generate the REQ pulses.
+ * 
+ * Writing data to the host is simpler, as we can just write it out
+ * from the GPIO port at our own pace. A timer is used for generating
+ * the output pulses on REQ pin.
+ */
+
+#include "scsi_accel_sync.h"
+#include <AzulSCSI_log.h>
+#include <gd32f20x_exmc.h>
+#include <scsi.h>
+
+#ifndef SCSI_SYNC_MODE_AVAILABLE
+
+void scsi_accel_sync_init() {}
+
+void scsi_accel_sync_recv(uint8_t *data, uint32_t count, int* parityError, volatile int *resetFlag) {}
+void scsi_accel_sync_send(const uint8_t* data, uint32_t count, volatile int *resetFlag) {}
+
+#else
+
+/********************************/
+/* Transfer from host to device */
+/********************************/
+
+#define SYNC_DMA_BUFSIZE 512
+static uint32_t g_sync_dma_buf[SYNC_DMA_BUFSIZE];
+
+void scsi_accel_sync_init()
+{
+    rcu_periph_clock_enable(RCU_EXMC);
+    rcu_periph_clock_enable(SCSI_EXMC_DMA_RCU);
+    rcu_periph_clock_enable(SCSI_SYNC_TIMER_RCU);
+
+    exmc_norsram_timing_parameter_struct timing_param = {
+        .asyn_access_mode = EXMC_ACCESS_MODE_A,
+        .syn_data_latency = EXMC_DATALAT_2_CLK,
+        .syn_clk_division = EXMC_SYN_CLOCK_RATIO_2_CLK,
+        .bus_latency = 1,
+        .asyn_data_setuptime = 2,
+        .asyn_address_holdtime = 2,
+        .asyn_address_setuptime = 16
+    };
+
+    exmc_norsram_parameter_struct sram_param = {
+        .norsram_region = EXMC_BANK0_NORSRAM_REGION0,
+        .write_mode = EXMC_ASYN_WRITE,
+        .extended_mode = DISABLE,
+        .asyn_wait = ENABLE,
+        .nwait_signal = ENABLE,
+        .memory_write = DISABLE,
+        .nwait_config = EXMC_NWAIT_CONFIG_DURING,
+        .wrap_burst_mode = DISABLE,
+        .nwait_polarity = EXMC_NWAIT_POLARITY_HIGH,
+        .burst_mode = DISABLE,
+        .databus_width = EXMC_NOR_DATABUS_WIDTH_16B,
+        .memory_type = EXMC_MEMORY_TYPE_SRAM,
+        .address_data_mux = DISABLE,
+        .read_write_timing = &timing_param
+    };
+
+    EXMC_SNCTL(EXMC_BANK0_NORSRAM_REGION0) &= ~EXMC_SNCTL_NRBKEN;
+    exmc_norsram_init(&sram_param);
+
+    // DMA used to transfer data from EXMC to RAM
+    // DMA is used so that if data transfer fails, we can at least abort by resetting CPU.
+    // Accessing EXMC from the CPU directly hangs it totally if ACK pulses are not received.
+    dma_parameter_struct exmc_dma_config =
+    {
+        .periph_addr = EXMC_NOR_PSRAM,
+        .periph_width = DMA_PERIPHERAL_WIDTH_16BIT,
+        .memory_addr = (uint32_t)g_sync_dma_buf,
+        .memory_width = DMA_MEMORY_WIDTH_16BIT,
+        .number = 0, // Filled before transfer
+        .priority = DMA_PRIORITY_MEDIUM,
+        .periph_inc = DMA_PERIPH_INCREASE_DISABLE,
+        .memory_inc = DMA_MEMORY_INCREASE_ENABLE,
+        .direction = DMA_PERIPHERAL_TO_MEMORY
+    };
+    dma_init(SCSI_EXMC_DMA, SCSI_EXMC_DMACH, &exmc_dma_config);
+    dma_memory_to_memory_enable(SCSI_EXMC_DMA, SCSI_EXMC_DMACH);
+
+    gpio_init(SCSI_IN_ACK_EXMC_NWAIT_PORT, GPIO_MODE_IN_FLOATING, 0, SCSI_IN_ACK_EXMC_NWAIT_PIN);
+    gpio_init(SCSI_TIMER_IN_PORT, GPIO_MODE_IN_FLOATING, 0, SCSI_TIMER_IN_PIN);
+
+    // TIMER1 is used to count ACK pulses
+    TIMER_CTL0(SCSI_SYNC_TIMER) = 0;
+    TIMER_SMCFG(SCSI_SYNC_TIMER) = TIMER_SLAVE_MODE_EXTERNAL0 | TIMER_SMCFG_TRGSEL_CI0FE0;
+    TIMER_CAR(SCSI_SYNC_TIMER) = 65535;
+    TIMER_PSC(SCSI_SYNC_TIMER) = 0;
+    TIMER_CHCTL0(SCSI_SYNC_TIMER) = 0x0001; // CH0 as input
+}
+
+void scsi_accel_sync_recv(uint8_t *data, uint32_t count, int* parityError, volatile int *resetFlag)
+{
+    // Enable EXMC to drive REQ from EXMC_NOE pin
+    EXMC_SNCTL(EXMC_BANK0_NORSRAM_REGION0) |= EXMC_SNCTL_NRBKEN;
+    uint32_t oldmode = GPIO_CTL0(SCSI_OUT_REQ_EXMC_NOE_PORT);
+    uint32_t newmode = oldmode & ~(0xF << (SCSI_OUT_REQ_EXMC_NOE_IDX * 4));
+    newmode |= 0xB << (SCSI_OUT_REQ_EXMC_NOE_IDX * 4);
+    GPIO_CTL0(SCSI_OUT_REQ_EXMC_NOE_PORT) = newmode;
+    
+    while (count > 0)
+    {
+        uint32_t blocksize = (count > SYNC_DMA_BUFSIZE * 2) ? (SYNC_DMA_BUFSIZE * 2) : count;
+        count -= blocksize;
+
+        DMA_CHCNT(SCSI_EXMC_DMA, SCSI_EXMC_DMACH) = blocksize;
+        DMA_CHCTL(SCSI_EXMC_DMA, SCSI_EXMC_DMACH) |= DMA_CHXCTL_CHEN;
+
+        uint16_t *src = (uint16_t*)g_sync_dma_buf;
+        uint8_t *dst = data;
+        uint8_t *end = data + blocksize;
+        uint32_t start = millis();
+        while (dst < end)
+        {
+            uint32_t remain = DMA_CHCNT(SCSI_EXMC_DMA, SCSI_EXMC_DMACH);
+
+            while (dst < end - remain)
+            {
+                *dst++ = ~(*src++) >> SCSI_EXMC_DATA_SHIFT;
+            }
+
+            if ((uint32_t)(millis() - start) > 500 || *resetFlag)
+            {
+                // We are in a pinch here: without ACK pulses coming, the EXMC and DMA peripherals
+                // are locked up. The only way out is a whole system reset.
+                azlog("SCSI Synchronous read timeout: resetting system");
+                NVIC_SystemReset();
+            }
+        }
+
+        DMA_CHCTL(SCSI_EXMC_DMA, SCSI_EXMC_DMACH) &= ~DMA_CHXCTL_CHEN;
+        data = end;
+    }
+
+    GPIO_CTL0(SCSI_OUT_REQ_EXMC_NOE_PORT) = oldmode;
+    EXMC_SNCTL(EXMC_BANK0_NORSRAM_REGION0) &= ~EXMC_SNCTL_NRBKEN;
+}
+
+/********************************/
+/* Transfer from device to host */
+/********************************/
+
+// Simple delay, about 10 ns.
+// This is less likely to get optimized away by CPU pipeline than nop
+#define ASM_DELAY()  \
+"   ldr     %[tmp2], [%[reset_flag]] \n"
+
+// Take 8 bits from d and format them for writing
+// d is name of data operand, b is bit offset
+#define ASM_LOAD_DATA(b) \
+"        ubfx    %[tmp1], %[data], #" b ", #8 \n" \
+"        ldr     %[tmp1], [%[byte_lookup], %[tmp1], lsl #2] \n"
+
+// Write data to SCSI port and set REQ high
+#define ASM_SEND_DATA() \
+"        str     %[tmp1], [%[out_port_bop]] \n"
+
+// Set REQ low
+#define ASM_SET_REQ_LOW() \
+"        mov     %[tmp2], %[bop_req_low] \n" \
+"        str     %[tmp2], [%[out_port_bop]] \n"
+
+// Wait for ACK_TIMER - n to be less than num_bytes
+#define ASM_WAIT_ACK_TIMER(n) \
+    "wait_acks_" n "_%=: \n" \
+        "   ldr     %[tmp2], [%[ack_timer]] \n" \
+        "   sub     %[tmp2], # " n " \n" \
+        "   cmp     %[tmp2], %[num_bytes] \n" \
+        "   ble     got_acks_" n "_%= \n" \
+        "   ldr     %[tmp2], [%[reset_flag]] \n" \
+        "   cmp     %[tmp2], #0 \n" \
+        "   bne     all_done_%= \n" \
+        "   b       wait_acks_" n "_%= \n" \
+    "got_acks_" n "_%=: \n"
+
+// Send 4 bytes
+#define ASM_SEND_4BYTES() \
+ASM_LOAD_DATA("0") \
+ASM_SEND_DATA() \
+ASM_DELAY1() \
+ASM_SET_REQ_LOW() \
+ASM_DELAY2() \
+ASM_LOAD_DATA("8") \
+ASM_SEND_DATA() \
+ASM_DELAY1() \
+ASM_SET_REQ_LOW() \
+ASM_DELAY2() \
+ASM_LOAD_DATA("16") \
+ASM_SEND_DATA() \
+ASM_DELAY1() \
+ASM_SET_REQ_LOW() \
+ASM_DELAY2() \
+ASM_LOAD_DATA("24") \
+ASM_SEND_DATA() \
+ASM_DELAY1() \
+ASM_SET_REQ_LOW()
+
+// Send 1 byte, wait for ACK_TIMER to be less than num_bytes + n and send 3 bytes more
+// This interleaving minimizes the delay caused by WAIT_ACK_TIMER.
+#define ASM_SEND_4BYTES_WAIT(n) \
+ASM_LOAD_DATA("0") \
+ASM_SEND_DATA() \
+ASM_DELAY2() \
+ASM_LOAD_DATA("8") \
+ASM_SET_REQ_LOW() \
+ASM_DELAY2() \
+"   ldr     %[tmp2], [%[ack_timer]] \n" \
+"   sub     %[tmp2], # " n " \n" \
+ASM_SEND_DATA() \
+"   cmp     %[tmp2], %[num_bytes] \n" \
+"   ble     got_acks_" n "_%= \n" \
+ASM_WAIT_ACK_TIMER(n) \
+ASM_DELAY2() \
+ASM_SET_REQ_LOW() \
+ASM_DELAY2() \
+ASM_LOAD_DATA("16") \
+ASM_SEND_DATA() \
+ASM_DELAY1() \
+ASM_SET_REQ_LOW() \
+ASM_DELAY2() \
+ASM_LOAD_DATA("24") \
+ASM_SEND_DATA() \
+ASM_DELAY1() \
+ASM_SET_REQ_LOW() \
+
+// Specialized routine for settings:
+// <=100 ns period, >=15 outstanding REQs
+static void sync_send_100ns_15off(const uint8_t *buf, uint32_t num_bytes, volatile int *resetFlag)
+{
+    volatile uint32_t *out_port_bop = (volatile uint32_t*)&GPIO_BOP(SCSI_OUT_PORT);
+    volatile uint32_t *ack_timer = &TIMER_CNT(SCSI_SYNC_TIMER);
+    const uint32_t *byte_lookup = g_scsi_out_byte_to_bop;
+    register uint32_t tmp1 = 0;
+    register uint32_t tmp2 = 0;
+    register uint32_t data = 0;
+
+#define ASM_DELAY1()
+#define ASM_DELAY2() ASM_DELAY()
+
+    asm volatile (
+    "main_loop_%=: \n"
+        "   subs  %[num_bytes], %[num_bytes], #16 \n"
+        "   bmi     last_bytes_%= \n"
+
+        /* At each point make sure there is at most 15 bytes in flight */
+        "   ldr   %[data], [%[buf]], #4 \n"
+        ASM_SEND_4BYTES_WAIT("22")
+        ASM_DELAY2()
+        "   ldr   %[data], [%[buf]], #4 \n"
+        ASM_SEND_4BYTES()
+        ASM_DELAY2()
+        "   ldr   %[data], [%[buf]], #4 \n"
+        ASM_SEND_4BYTES_WAIT("14")
+        ASM_DELAY2()
+        "   ldr   %[data], [%[buf]], #4 \n"
+        ASM_SEND_4BYTES()
+
+        "   cbz   %[num_bytes], all_done_%= \n"
+        "   b     main_loop_%= \n"
+
+    "last_bytes_%=: \n"
+        "   add  %[num_bytes], %[num_bytes], #16 \n"
+    "last_bytes_loop_%=: \n"
+        "   ldrb    %[data], [%[buf]], #1 \n"
+        ASM_LOAD_DATA("0")
+
+        ASM_WAIT_ACK_TIMER("15")
+        ASM_SEND_DATA()
+        ASM_DELAY1()
+        ASM_SET_REQ_LOW()
+        ASM_DELAY2()
+
+        "   subs %[num_bytes], %[num_bytes], #1 \n"
+        "   bne  last_bytes_loop_%= \n"
+    "all_done_%=: \n"
+        ASM_DELAY1()
+
+    : /* Output */ [tmp1] "+l" (tmp1), [tmp2] "+l" (tmp2), [data] "+r" (data),
+                   [buf] "+r" (buf), [num_bytes] "+r" (num_bytes)
+    : /* Input */ [ack_timer] "r" (ack_timer),
+                  [bop_req_low] "I" (SCSI_OUT_REQ << 16),
+                  [out_port_bop] "r"(out_port_bop),
+                  [byte_lookup] "r" (byte_lookup),
+                  [reset_flag] "r" (resetFlag)
+    : /* Clobber */);
+
+#undef ASM_DELAY1
+#undef ASM_DELAY2
+
+    SCSI_RELEASE_DATA_REQ();
+}
+
+// Specialized routine for settings:
+// <=200 ns period, >=15 outstanding REQs
+static void sync_send_200ns_15off(const uint8_t *buf, uint32_t num_bytes, volatile int *resetFlag)
+{
+    volatile uint32_t *out_port_bop = (volatile uint32_t*)&GPIO_BOP(SCSI_OUT_PORT);
+    volatile uint32_t *ack_timer = &TIMER_CNT(SCSI_SYNC_TIMER);
+    const uint32_t *byte_lookup = g_scsi_out_byte_to_bop;
+    register uint32_t tmp1 = 0;
+    register uint32_t tmp2 = 0;
+    register uint32_t data = 0;
+
+#define ASM_DELAY1() ASM_DELAY() ASM_DELAY() ASM_DELAY()
+#define ASM_DELAY2() ASM_DELAY() ASM_DELAY() ASM_DELAY() ASM_DELAY()
+
+    asm volatile (
+    "main_loop_%=: \n"
+        "   subs  %[num_bytes], %[num_bytes], #16 \n"
+        "   bmi     last_bytes_%= \n"
+
+        /* At each point make sure there is at most 15 bytes in flight */
+        "   ldr   %[data], [%[buf]], #4 \n"
+        ASM_SEND_4BYTES_WAIT("22")
+        ASM_DELAY2()
+        "   ldr   %[data], [%[buf]], #4 \n"
+        ASM_SEND_4BYTES()
+        ASM_DELAY2()
+        "   ldr   %[data], [%[buf]], #4 \n"
+        ASM_SEND_4BYTES_WAIT("14")
+        ASM_DELAY2()
+        "   ldr   %[data], [%[buf]], #4 \n"
+        ASM_SEND_4BYTES()
+
+        "   cbz   %[num_bytes], all_done_%= \n"
+        "   b     main_loop_%= \n"
+
+    "last_bytes_%=: \n"
+        "   add  %[num_bytes], %[num_bytes], #16 \n"
+    "last_bytes_loop_%=: \n"
+        "   ldrb    %[data], [%[buf]], #1 \n"
+        ASM_LOAD_DATA("0")
+
+        ASM_WAIT_ACK_TIMER("15")
+        ASM_SEND_DATA()
+        ASM_DELAY1()
+        ASM_SET_REQ_LOW()
+        ASM_DELAY2()
+
+        "   subs %[num_bytes], %[num_bytes], #1 \n"
+        "   bne  last_bytes_loop_%= \n"
+    "all_done_%=: \n"
+        ASM_DELAY1()
+
+    : /* Output */ [tmp1] "+l" (tmp1), [tmp2] "+l" (tmp2), [data] "+r" (data),
+                   [buf] "+r" (buf), [num_bytes] "+r" (num_bytes)
+    : /* Input */ [ack_timer] "r" (ack_timer),
+                  [bop_req_low] "I" (SCSI_OUT_REQ << 16),
+                  [out_port_bop] "r"(out_port_bop),
+                  [byte_lookup] "r" (byte_lookup),
+                  [reset_flag] "r" (resetFlag)
+    : /* Clobber */);
+
+#undef ASM_DELAY1
+#undef ASM_DELAY2
+
+    SCSI_RELEASE_DATA_REQ();
+}
+
+// Specialized routine for settings:
+// <=260 ns period, >=7 outstanding REQs
+static void sync_send_260ns_7off(const uint8_t *buf, uint32_t num_bytes, volatile int *resetFlag)
+{
+    volatile uint32_t *out_port_bop = (volatile uint32_t*)&GPIO_BOP(SCSI_OUT_PORT);
+    volatile uint32_t *ack_timer = &TIMER_CNT(SCSI_SYNC_TIMER);
+    const uint32_t *byte_lookup = g_scsi_out_byte_to_bop;
+    register uint32_t tmp1 = 0;
+    register uint32_t tmp2 = 0;
+    register uint32_t data = 0;
+
+#define ASM_DELAY1() ASM_DELAY() ASM_DELAY() ASM_DELAY() ASM_DELAY() \
+                     ASM_DELAY() ASM_DELAY()
+#define ASM_DELAY2() ASM_DELAY() ASM_DELAY() ASM_DELAY() ASM_DELAY() \
+                     ASM_DELAY() ASM_DELAY() ASM_DELAY() ASM_DELAY()
+
+    asm volatile (
+    "main_loop_%=: \n"
+        "   subs  %[num_bytes], %[num_bytes], #4 \n"
+        "   bmi     last_bytes_%= \n"
+
+        /* At each point make sure there is at most 3 bytes in flight */
+        "   ldr   %[data], [%[buf]], #4 \n"
+        ASM_SEND_4BYTES_WAIT("7")
+
+        "   cbz   %[num_bytes], all_done_%= \n"
+        "   b     main_loop_%= \n"
+
+    "last_bytes_%=: \n"
+        "   add  %[num_bytes], %[num_bytes], #4 \n"
+    "last_bytes_loop_%=: \n"
+        "   ldrb    %[data], [%[buf]], #1 \n"
+        ASM_LOAD_DATA("0")
+
+        ASM_WAIT_ACK_TIMER("5")
+        ASM_SEND_DATA()
+        ASM_DELAY1()
+        ASM_SET_REQ_LOW()
+        ASM_DELAY2()
+
+        "   subs %[num_bytes], %[num_bytes], #1 \n"
+        "   bne  last_bytes_loop_%= \n"
+    "all_done_%=: \n"
+        ASM_DELAY1()
+
+    : /* Output */ [tmp1] "+l" (tmp1), [tmp2] "+l" (tmp2), [data] "+r" (data),
+                   [buf] "+r" (buf), [num_bytes] "+r" (num_bytes)
+    : /* Input */ [ack_timer] "r" (ack_timer),
+                  [bop_req_low] "I" (SCSI_OUT_REQ << 16),
+                  [out_port_bop] "r"(out_port_bop),
+                  [byte_lookup] "r" (byte_lookup),
+                  [reset_flag] "r" (resetFlag)
+    : /* Clobber */);
+
+#undef ASM_DELAY1
+#undef ASM_DELAY2
+
+    SCSI_RELEASE_DATA_REQ();
+}
+
+void scsi_accel_sync_send(const uint8_t* data, uint32_t count, volatile int *resetFlag)
+{
+    // Timer counts down from the initial number of bytes.
+    TIMER_CNT(SCSI_SYNC_TIMER) = count;
+    TIMER_CTL0(SCSI_SYNC_TIMER) = TIMER_CTL0_CEN | TIMER_CTL0_DIR;
+
+    int syncOffset = scsiDev.target->syncOffset;
+    int syncPeriod = scsiDev.target->syncPeriod;
+
+    if (syncOffset >= 15 && syncPeriod <= 25)
+    {
+        sync_send_100ns_15off(data, count, resetFlag);
+    }
+    else if (syncOffset >= 15 && syncPeriod <= 50)
+    {
+        sync_send_200ns_15off(data, count, resetFlag);
+    }
+    else if (syncOffset >= 7 && syncPeriod <= 65)
+    {
+        sync_send_260ns_7off(data, count, resetFlag);
+    }
+    else
+    {
+        azdbg("No optimized routine for syncOffset=", syncOffset, " syndPeriod=", syncPeriod, ", using fallback");
+        while (count-- > 0)
+        {
+            while (TIMER_CNT(SCSI_SYNC_TIMER) > count + syncOffset && !*resetFlag);
+
+            SCSI_OUT_DATA(*data++);
+            delay_ns(syncPeriod * 2);
+            SCSI_OUT(REQ, 0);
+            delay_ns(syncPeriod * 2);
+        }
+        delay_ns(syncPeriod * 2);
+        SCSI_RELEASE_DATA_REQ();
+    }
+
+    while (TIMER_CNT(SCSI_SYNC_TIMER) > 0 && !*resetFlag);
+
+    TIMER_CTL0(SCSI_SYNC_TIMER) = 0;
+}
+
+
+#endif

+ 17 - 0
lib/AzulSCSI_platform_GD32F205/scsi_accel_sync.h

@@ -0,0 +1,17 @@
+// SCSI subroutines that implement synchronous mode SCSI.
+// Uses DMA for data transfer, EXMC for data input and
+// GD32 timer for the REQ pin toggling.
+
+#pragma once
+
+#include <stdint.h>
+#include "AzulSCSI_platform.h"
+
+#ifdef SCSI_IN_ACK_EXMC_NWAIT_PORT
+#define SCSI_SYNC_MODE_AVAILABLE
+#endif
+
+void scsi_accel_sync_init();
+
+void scsi_accel_sync_recv(uint8_t *data, uint32_t count, int* parityError, volatile int *resetFlag);
+void scsi_accel_sync_send(const uint8_t* data, uint32_t count, volatile int *resetFlag);

+ 1 - 1
lib/SCSI2SD/include/scsi2sd.h

@@ -39,7 +39,7 @@ extern "C" {
 
 #include "stdint.h"
 
-#define S2S_MAX_TARGETS 7
+#define S2S_MAX_TARGETS 8
 #define S2S_CFG_SIZE (S2S_MAX_TARGETS * sizeof(S2S_TargetCfg) + sizeof(S2S_BoardCfg))
 
 typedef enum

+ 1 - 1
src/AzulSCSI_config.h

@@ -26,7 +26,7 @@
 #define MAX_FILE_PATH 64                // Maximum file name length
 
 // SCSI config
-#define NUM_SCSIID  7          // Maximum number of supported SCSI-IDs (The minimum is 0)
+#define NUM_SCSIID  8          // Maximum number of supported SCSI-IDs (The minimum is 0)
 #define NUM_SCSILUN 1          // Maximum number of LUNs supported     (Currently has to be 1)
 #define READ_PARITY_CHECK 0    // Perform read parity check (unverified)
 

+ 108 - 25
src/AzulSCSI_disk.cpp

@@ -22,6 +22,26 @@ extern "C" {
 #define PLATFORM_MAX_SCSI_SPEED S2S_CFG_SPEED_ASYNC_50
 #endif
 
+// This can be overridden in platform file to set the size of the transfers
+// used when reading from SCSI bus and writing to SD card.
+// When SD card access is fast, these are usually better increased.
+// If SD card access is roughly same speed as SCSI bus, these can be left at 512
+#ifndef PLATFORM_OPTIMAL_MIN_SD_WRITE_SIZE
+#define PLATFORM_OPTIMAL_MIN_SD_WRITE_SIZE 512
+#endif
+
+#ifndef PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE
+#define PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE 1024
+#endif
+
+// Optimal size for the last write in a write request.
+// This is often better a bit smaller than PLATFORM_OPTIMAL_SD_WRITE_SIZE
+// to reduce the dead time between end of SCSI transfer and finishing of SD write.
+#ifndef PLATFORM_OPTIMAL_LAST_SD_WRITE_SIZE
+#define PLATFORM_OPTIMAL_LAST_SD_WRITE_SIZE 512
+#endif
+
+
 /***********************/
 /* Backing image files */
 /***********************/
@@ -342,6 +362,12 @@ void s2s_configInit(S2S_BoardCfg* config)
     config->selectionDelay = ini_getl("SCSI", "SelectionDelay", 255, CONFIGFILE);
     config->flags6 = 0;
     config->scsiSpeed = PLATFORM_MAX_SCSI_SPEED;
+
+    int maxSyncSpeed = ini_getl("SCSI", "MaxSyncSpeed", 10, CONFIGFILE);
+    if (maxSyncSpeed < 5 && config->scsiSpeed > S2S_CFG_SPEED_ASYNC_50)
+        config->scsiSpeed = S2S_CFG_SPEED_ASYNC_50;
+    else if (maxSyncSpeed < 10 && config->scsiSpeed > S2S_CFG_SPEED_SYNC_5)
+        config->scsiSpeed = S2S_CFG_SPEED_SYNC_5;
     
     azlog("-- SelectionDelay: ", (int)config->selectionDelay);
 
@@ -701,6 +727,9 @@ static struct {
     uint8_t *buffer;
     uint32_t bytes_sd; // Number of bytes that have been scheduled for transfer on SD card side
     uint32_t bytes_scsi; // Number of bytes that have been scheduled for transfer on SCSI side
+
+    uint32_t bytes_scsi_done;
+    uint32_t sd_transfer_start;
 } g_disk_transfer;
 
 #ifdef PREFETCH_BUFFER_SIZE
@@ -778,18 +807,63 @@ static void doWrite(uint32_t lba, uint32_t blocks)
     }
 }
 
+// Called to transfer next block from SCSI bus.
+// Usually called from SD card driver during waiting for SD card access.
 void diskDataOut_callback(uint32_t bytes_complete)
 {
-    if (scsiDev.dataPtr < scsiDev.dataLen)
+    // For best performance, do SCSI reads in blocks of 4 or more bytes
+    bytes_complete &= ~3;
+
+    if (g_disk_transfer.bytes_scsi_done < g_disk_transfer.bytes_scsi)
     {
-        // DMA is now writing to SD card.
-        // We can use this time to transfer next block from SCSI bus.
-        uint32_t len = scsiDev.dataLen - scsiDev.dataPtr;
-        if (len > 512) len = 512;
+        // How many bytes remaining in the transfer?
+        uint32_t remain = g_disk_transfer.bytes_scsi - g_disk_transfer.bytes_scsi_done;
+        uint32_t len = remain;
         
+        // Limit maximum amount of data transferred at one go, to give enough callbacks to SD driver.
+        // Select the limit based on total bytes in the transfer.
+        // Transfer size is reduced towards the end of transfer to reduce the dead time between
+        // end of SCSI transfer and the SD write completing.
+        uint32_t limit = g_disk_transfer.bytes_scsi / 8;
+        if (limit < PLATFORM_OPTIMAL_MIN_SD_WRITE_SIZE) limit = PLATFORM_OPTIMAL_MIN_SD_WRITE_SIZE;
+        if (limit > PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE) limit = PLATFORM_OPTIMAL_MAX_SD_WRITE_SIZE;
+
+        if (len > limit)
+        {
+            len = limit;
+        }
+        else if (len > PLATFORM_OPTIMAL_LAST_SD_WRITE_SIZE)
+        {
+            len = len - PLATFORM_OPTIMAL_LAST_SD_WRITE_SIZE;
+        }
+
+        // Split read so that it doesn't wrap around buffer edge
+        uint32_t bufsize = sizeof(scsiDev.data);
+        uint32_t start = (g_disk_transfer.bytes_scsi_done % bufsize);
+        if (start + len > bufsize)
+            len = bufsize - start;
+
+        // Don't overwrite data that has not yet been written to SD card
+        uint32_t sd_ready_cnt = g_disk_transfer.bytes_sd + bytes_complete;
+        if (g_disk_transfer.bytes_scsi_done + len > sd_ready_cnt + bufsize)
+            len = sd_ready_cnt + bufsize - g_disk_transfer.bytes_scsi_done;
+
+        // Keep transfers a multiple of sector size.
+        // Macintosh SCSI driver seems to get confused if we have a delay
+        // in middle of a sector.
+        uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;
+        if (remain >= bytesPerSector && len % bytesPerSector != 0)
+        {
+            len -= len % bytesPerSector;
+        }
+
+        if (len == 0)
+            return;
+
+        // azdbg("SCSI read ", (int)start, " + ", (int)len);
         int parityError = 0;
-        scsiRead(scsiDev.data + scsiDev.dataPtr, len, &parityError);
-        scsiDev.dataPtr += len;
+        scsiRead(&scsiDev.data[start], len, &parityError);
+        g_disk_transfer.bytes_scsi_done += len;
 
         if (parityError)
         {
@@ -805,31 +879,41 @@ void diskDataOut()
 {
     scsiEnterPhase(DATA_OUT);
 
-    // Figure out how many blocks we can fit in buffer
+    image_config_t &img = *(image_config_t*)scsiDev.target->cfg;
     uint32_t blockcount = (transfer.blocks - transfer.currentBlock);
     uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;
-    uint32_t maxblocks = sizeof(scsiDev.data) / bytesPerSector;
-    if (blockcount > maxblocks) blockcount = maxblocks;
-    uint32_t transferlen = blockcount * bytesPerSector;
-    scsiDev.dataLen = transferlen;
-    scsiDev.dataPtr = 0;
-    
-    image_config_t &img = *(image_config_t*)scsiDev.target->cfg;
-    uint32_t written = 0;
-    while (written < transferlen)
+    g_disk_transfer.buffer = scsiDev.data;
+    g_disk_transfer.bytes_scsi = blockcount * bytesPerSector;
+    g_disk_transfer.bytes_sd = 0;
+    g_disk_transfer.bytes_scsi_done = 0;
+    g_disk_transfer.sd_transfer_start = 0;
+
+    while (g_disk_transfer.bytes_sd < g_disk_transfer.bytes_scsi
+           && scsiDev.phase == DATA_OUT
+           && !scsiDev.resetFlag)
     {
         // Read next block from SCSI bus
-        if (scsiDev.dataPtr == written)
+        if (g_disk_transfer.bytes_sd == g_disk_transfer.bytes_scsi_done)
         {
             diskDataOut_callback(0);
         }
 
-        // Start writing blocks to SD card.
-        // The callback will simultaneously read the next block from SCSI bus.    
-        uint8_t *buf = scsiDev.data + written;
-        uint32_t buflen = scsiDev.dataPtr - written;
+        // Figure out longest continuous block in buffer
+        uint32_t bufsize = sizeof(scsiDev.data);
+        uint32_t start = g_disk_transfer.bytes_sd % bufsize;
+        uint32_t len = g_disk_transfer.bytes_scsi_done - g_disk_transfer.bytes_sd;
+        if (start + len > bufsize) len = bufsize - start;
+
+        // Try to do writes in multiple of 512 bytes
+        // This allows better performance for SD card access.
+        if (len >= 512) len &= ~511;
+
+        // Start writing to SD card and simultaneously reading more from SCSI bus
+        uint8_t *buf = &scsiDev.data[start];
+        g_disk_transfer.sd_transfer_start = start;
+        // azdbg("SD write ", (int)start, " + ", (int)len);
         azplatform_set_sd_callback(&diskDataOut_callback, buf);
-        if (img.file.write(buf, buflen) != buflen)
+        if (img.file.write(buf, len) != len)
         {
             azlog("SD card write failed: ", SD.sdErrorCode());
             scsiDev.status = CHECK_CONDITION;
@@ -837,8 +921,7 @@ void diskDataOut()
             scsiDev.target->sense.asc = WRITE_ERROR_AUTO_REALLOCATION_FAILED;
             scsiDev.phase = STATUS;
         }
-
-        written += buflen;
+        g_disk_transfer.bytes_sd += len;
     }
 
     azplatform_set_sd_callback(NULL, NULL);

+ 36 - 16
src/AzulSCSI_log_trace.cpp

@@ -18,31 +18,43 @@ static const char *getCommandName(uint8_t cmd)
     switch (cmd)
     {
         case 0x00: return "TestUnitReady";
-        case 0x1A: return "ModeSense";
-        case 0x5A: return "ModeSense10";
-        case 0x0A: return "Write6";
-        case 0x2A: return "Write10";
+        case 0x01: return "RezeroUnit";
+        case 0x03: return "RequestSense";
+        case 0x04: return "FormatUnit";
         case 0x08: return "Read6";
-        case 0x28: return "Read10";
+        case 0x0A: return "Write6";
+        case 0x0B: return "Seek6";
+        case 0x0F: return "WriteSectorBuffer";
         case 0x12: return "Inquiry";
-        case 0x25: return "ReadCapacity";
-        case 0x43: return "CDROM Read TOC";
-        case 0x44: return "CDROM Read Header";
-        case 0x2C: return "Erase10";
-        case 0xAC: return "Erase12";
         case 0x15: return "ModeSelect6";
-        case 0x55: return "ModeSelect10";
-        case 0x03: return "RequestSense";
         case 0x16: return "Reserve";
         case 0x17: return "Release";
+        case 0x1A: return "ModeSense";
+        case 0x1B: return "StartStopUnit";
         case 0x1C: return "ReceiveDiagnostic";
         case 0x1D: return "SendDiagnostic";
+        case 0x1E: return "PreventAllowMediumRemoval";
+        case 0x25: return "ReadCapacity";
+        case 0x28: return "Read10";
+        case 0x2A: return "Write10";
+        case 0x2B: return "Seek10";
+        case 0x2C: return "Erase10";
+        case 0x2E: return "WriteVerify";
+        case 0x2F: return "Verify";
+        case 0x34: return "PreFetch";
+        case 0x35: return "SynchronizeCache";
+        case 0x36: return "LockUnlockCache";
+        case 0x37: return "ReadDefectData";
         case 0x3B: return "WriteBuffer";
-        case 0x0F: return "WriteSectorBuffer";
         case 0x3C: return "ReadBuffer";
+        case 0x43: return "CDROM Read TOC";
+        case 0x44: return "CDROM Read Header";
+        case 0x4A: return "GetEventStatusNotification";
+        case 0x55: return "ModeSelect10";
+        case 0x5A: return "ModeSense10";
+        case 0xAC: return "Erase12";
         case 0xC0: return "OMTI-5204 DefineFlexibleDiskFormat";
         case 0xC2: return "OMTI-5204 AssignDiskParameters";
-        case 0x4A: return "GetEventStatusNotification";
         default:   return "Unknown";
     }
 }
@@ -97,11 +109,19 @@ static void printNewPhase(int phase)
             break;
         
         case DATA_IN:
-            azdbg("---- DATA_IN");
+            if (scsiDev.target->syncOffset > 0)
+                azdbg("---- DATA_IN, syncOffset ", (int)scsiDev.target->syncOffset,
+                                   " syncPeriod ", (int)scsiDev.target->syncPeriod);
+            else
+                azdbg("---- DATA_IN");
             break;
         
         case DATA_OUT:
-            azdbg("---- DATA_OUT");
+            if (scsiDev.target->syncOffset > 0)
+                azdbg("---- DATA_OUT, syncOffset ", (int)scsiDev.target->syncOffset,
+                                    " syncPeriod ", (int)scsiDev.target->syncPeriod);
+            else
+                azdbg("---- DATA_OUT");
             break;
         
         case MESSAGE_IN: