瀏覽代碼

Use GreenPAK for accelerating disk writes also.

Petteri Aimonen 3 年之前
父節點
當前提交
fdf4d1a073

+ 42 - 32
greenpak/SCSI_Accelerator_SLG46824.gp6

@@ -1,9 +1,9 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<GPDProject version="22" oldestCompatibleVersion="22" GPDVersion="6.27.001" lastChange="3.3.2022 14.33">
+<GPDProject version="22" oldestCompatibleVersion="22" GPDVersion="6.27.001" lastChange="11.4.2022 12.26">
     <generalProjectSettings/>
     <chip family="04" type="06" friendlyName="GreenPAK 6" partNumber="40" package="16">
-        <nvmData registerLenght="2048">0 0 0 0 0 0 8D 30 49 C2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 D2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 30 70 0 30 30 30 30 0 0 30 30 30 0 30 30 30 30 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 22 30 C 0 0 0 0 0 0 0 0 0 0 0 D7 0 0 0 0 0 40 0 0 0 0 0 0 0 0 20 0 1 0 0 0 2 1 0 0 2 0 1 0 0 2 1 0 0 2 0 1 0 0 2 1 0 0 2 0 1 0 0 2 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 A5</nvmData>
-        <checksum crc32="0xE41C926D" version="5"/>
+        <nvmData registerLenght="2048">C4 9 0 0 0 0 0 0 38 C2 0 0 0 0 0 0 0 0 0 0 0 0 D0 8 13 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 E7 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 30 70 0 30 20 30 30 0 0 30 30 30 0 30 30 30 30 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 22 30 C 0 0 0 0 0 0 0 6 0 0 0 0 0 0 0 0 0 40 0 0 0 0 0 0 D7 14 20 0 1 0 0 0 2 1 0 0 2 0 1 0 0 2 1 0 0 2 0 1 0 0 2 1 0 0 2 0 1 0 0 2 0 1 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 A5</nvmData>
+        <checksum crc32="0xB44E9FD4" version="5"/>
         <VDDItem id="0">
             <item id="0" caption="VDD (PIN 20)">
                 <graphics pos="(-590.00,30.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
@@ -17,20 +17,20 @@
         </IOPad>
         <IOPad id="2" useCaseMode="1">
             <item id="2" caption="PIN 18 (IO1)">
-                <graphics pos="(-171.00,277.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
+                <graphics pos="(-248.00,278.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
                 <textLabel pos="(-45.56,-27.00)" angle="0.00" textColor="#ffff00ff" backgroundColor="#14141464">PLD_IO1 / DATA_TOGGLE</textLabel>
             </item>
         </IOPad>
         <IOPad id="3" useCaseMode="1">
             <item id="3" caption="PIN 17 (IO2)">
-                <graphics pos="(-174.00,502.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
+                <graphics pos="(-251.00,504.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
                 <textLabel pos="(-29.23,-27.00)" angle="0.00" textColor="#ffff00ff" backgroundColor="#14141464">PLD_IO2 / ENABLE</textLabel>
             </item>
         </IOPad>
-        <IOPad id="4" useCaseMode="0">
+        <IOPad id="4" useCaseMode="1">
             <item id="4" caption="PIN 16 (IO3)">
-                <graphics pos="(-590.00,310.00)" angle="0" flipping="1" hidden="0" tOrigin="(20.00,10.00)"/>
-                <textLabel pos="(-4.71,-27.00)" angle="0.00" textColor="#ffff00ff" backgroundColor="#14141464">PLD_IO3</textLabel>
+                <graphics pos="(-249.00,197.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
+                <textLabel pos="(-25.28,-27.00)" angle="0.00" textColor="#ffff00ff" backgroundColor="#14141464">PLD_IO3 / RDWR</textLabel>
             </item>
         </IOPad>
         <IOPad id="5" useCaseMode="0">
@@ -102,13 +102,13 @@
         </IOPad>
         <IOPad id="18" useCaseMode="4">
             <item id="18" caption="PIN 2 (IO13)">
-                <graphics pos="(314.00,320.00)" angle="0" flipping="1" hidden="0" tOrigin="(20.00,10.00)"/>
+                <graphics pos="(411.00,320.00)" angle="0" flipping="1" hidden="0" tOrigin="(20.00,10.00)"/>
                 <textLabel pos="(-7.69,-27.00)" angle="0.00" textColor="#ffff00ff" backgroundColor="#14141464">OUT_REQ</textLabel>
             </item>
         </IOPad>
         <IOPad id="19" useCaseMode="1">
             <item id="19" caption="PIN 1 (IO14)">
-                <graphics pos="(-175.00,390.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
+                <graphics pos="(-250.00,387.00)" angle="0" flipping="0" hidden="0" tOrigin="(20.00,10.00)"/>
                 <textLabel pos="(-6.55,-27.00)" angle="0.00" textColor="#ffff00ff" backgroundColor="#14141464">ACK_BUF</textLabel>
             </item>
         </IOPad>
@@ -147,7 +147,7 @@
         </item>
         <LUT id="35" regularShape="0" mode="0">
             <item id="35" caption="2-bit LUT0/DFF/LATCH0">
-                <graphics pos="(17.00,28.00)" angle="90" flipping="0" hidden="1" tOrigin="(20.00,15.00)"/>
+                <graphics pos="(-88.00,177.00)" angle="90" flipping="0" hidden="0" tOrigin="(20.00,15.00)"/>
             </item>
         </LUT>
         <LUT id="36" regularShape="0" mode="0">
@@ -167,8 +167,7 @@
         </LUT>
         <LUT id="39" regularShape="0" mode="0">
             <item id="39" caption="3-bit LUT0/DFF/LATCH3">
-                <graphics pos="(130.00,310.00)" angle="90" flipping="1" hidden="0" tOrigin="(20.00,15.00)"/>
-                <textLabel pos="(-28.00,68.36)" angle="-90.00" textColor="#ffff00ff" backgroundColor="#14141464">!(IN0 &amp; (IN1 ^ IN2))</textLabel>
+                <graphics pos="(130.00,310.00)" angle="90" flipping="1" hidden="1" tOrigin="(20.00,15.00)"/>
             </item>
         </LUT>
         <item id="40" caption="3-bit LUT1/DFF/LATCH4">
@@ -202,7 +201,7 @@ DATA_TOGGLE</textLabel>
             </item>
         </LUT>
         <item id="46" caption="MF0 (4-bit LUT0, DFF/LATCH9, 16-bit CNT0/DLY0/FSM0)">
-            <graphics pos="(-500.00,-20.00)" angle="0" flipping="0" hidden="1" tOrigin="(155.00,60.00)"/>
+            <graphics pos="(-399.00,35.00)" angle="0" flipping="0" hidden="1" tOrigin="(155.00,60.00)"/>
         </item>
         <item id="47" caption="MF1 (3-bit LUT7, DFF/LATCH10, 8-bit CNT1/DLY1)">
             <graphics pos="(-500.00,140.00)" angle="0" flipping="0" hidden="1" tOrigin="(155.00,60.00)"/>
@@ -227,15 +226,17 @@ DATA_TOGGLE</textLabel>
         </item>
         <LUT id="54" regularShape="0" mode="0">
             <item id="54" caption="4-bit LUT0 (MF0)">
-                <graphics pos="(-365.00,25.00)" angle="90" flipping="0" hidden="1" tOrigin="(20.00,15.00)"/>
+                <graphics pos="(136.00,310.00)" angle="90" flipping="0" hidden="0" tOrigin="(20.00,15.00)"/>
+                <textLabel pos="(46.00,52.19)" angle="-90.00" textColor="#ffff00ff" backgroundColor="#14141464">IN3 = 0 (Write to SCSI bus): !(IN0 &amp; (IN1 ^ IN2))
+IN3 = 1 (Read from SCSI bus): (!IN0) &amp; (IN1 ^ IN2)</textLabel>
             </item>
         </LUT>
         <item id="55" caption="DFF/LATCH9 (MF0)">
-            <graphics pos="(-450.00,-20.00)" angle="0" flipping="0" hidden="1" tOrigin="(25.00,10.00)"/>
+            <graphics pos="(-357.50,94.50)" angle="0" flipping="0" hidden="1" tOrigin="(25.00,10.00)"/>
         </item>
         <CNTDLY id="56" externalClockFrequence="0" externalClockFreqUnit="kHz">
             <item id="56" caption="16-bit CNT0/DLY0/FSM0 (MF0)">
-                <graphics pos="(-450.00,-20.00)" angle="0" flipping="0" hidden="1" tOrigin="(35.00,16.00)"/>
+                <graphics pos="(-208.50,87.50)" angle="0" flipping="0" hidden="1" tOrigin="(35.00,16.00)"/>
             </item>
         </CNTDLY>
         <LUT id="57" regularShape="0" mode="0">
@@ -335,29 +336,38 @@ DATA_TOGGLE</textLabel>
         <wire output="148" input="112" autoRouting="1" pen="#ff8000ff;1.00;1;32;128" lineType="2" protected="1" CWLid="1" wireText="NET1" wireState="0">
             <points>(-702.00,712.00); (-695.00,712.00)</points>
         </wire>
-        <wire output="142" input="549" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="8" wireText="NET8" wireState="0">
-            <points>(-109.00,293.00); (-31.00,293.00); (-31.00,326.00); (114.00,326.00)</points>
-        </wire>
-        <wire output="161" input="551" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="9" wireText="NET9" wireState="0">
-            <points>(185.00,326.00); (191.00,326.00); (191.00,227.00); (-8.00,227.00); (-8.00,192.00); (-2.00,192.00)</points>
-        </wire>
         <wire output="143" input="537" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="10" wireText="NET10" wireState="0">
-            <points>(-112.00,518.00); (335.00,518.00); (335.00,360.00)</points>
+            <points>(-189.00,520.00); (432.00,520.00); (432.00,360.00)</points>
         </wire>
         <wire output="143" input="553" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="10" wireText="NET10" wireState="0">
-            <points>(-112.00,518.00); (45.00,518.00); (45.00,216.00)</points>
+            <points>(-189.00,520.00); (45.00,520.00); (45.00,216.00)</points>
         </wire>
         <wire output="142" input="552" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="8" wireText="NET8" wireState="0">
-            <points>(-109.00,293.00); (-31.00,293.00); (-31.00,182.00); (-2.00,182.00)</points>
+            <points>(-186.00,294.00); (-13.00,294.00); (-13.00,182.00); (-2.00,182.00)</points>
+        </wire>
+        <wire output="157" input="621" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="7" wireText="NET7" wireState="0">
+            <points>(-188.00,403.00); (105.00,403.00); (105.00,341.00); (120.00,341.00)</points>
+        </wire>
+        <wire output="142" input="622" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="8" wireText="NET8" wireState="0">
+            <points>(-186.00,294.00); (105.00,294.00); (105.00,331.00); (120.00,331.00)</points>
+        </wire>
+        <wire output="162" input="623" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="11" wireText="NET11" wireState="0">
+            <points>(89.00,186.00); (114.00,186.00); (114.00,321.00); (120.00,321.00)</points>
+        </wire>
+        <wire output="191" input="536" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="9" wireText="NET9" wireState="0">
+            <points>(191.00,326.00); (388.00,326.00)</points>
+        </wire>
+        <wire output="144" input="624" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="12" wireText="NET12" wireState="0">
+            <points>(-187.00,213.00); (-127.00,213.00); (-127.00,311.00); (120.00,311.00)</points>
         </wire>
-        <wire output="157" input="548" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="7" wireText="NET7" wireState="0">
-            <points>(-113.00,406.00); (93.00,406.00); (93.00,339.00); (114.00,339.00)</points>
+        <wire output="84" input="551" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="13" wireText="NET13" wireState="0">
+            <points>(-30.00,192.00); (-2.00,192.00)</points>
         </wire>
-        <wire output="162" input="550" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="11" wireText="NET11" wireState="0">
-            <points>(89.00,186.00); (95.00,186.00); (95.00,312.00); (114.00,312.00)</points>
+        <wire output="191" input="262" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="9" wireText="NET9" wireState="0">
+            <points>(191.00,326.00); (213.00,326.00); (213.00,113.00); (-123.00,113.00); (-123.00,184.00); (-107.00,184.00)</points>
         </wire>
-        <wire output="161" input="536" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="9" wireText="NET9" wireState="0">
-            <points>(185.00,326.00); (291.00,326.00)</points>
+        <wire output="144" input="261" autoRouting="1" pen="#00ff00ff;1.00;1;32;128" lineType="1" protected="0" CWLid="12" wireText="NET12" wireState="0">
+            <points>(-187.00,213.00); (-126.00,213.00); (-126.00,200.00); (-107.00,200.00)</points>
         </wire>
     </chip>
     <emulatorConfiguration version="1">

+ 6 - 6
greenpak/SCSI_Accelerator_SLG46824.hex

@@ -1,14 +1,14 @@
-:100000000000000000008D3049C200000000000028
-:1000100000000000000000000000000000000000E0
+:10000000C40900000000000038C200000000000029
+:10001000000000000000D0081301000000000000F4
 :1000200000000000000000000000000000000000D0
 :1000300000000000000000000000000000000000C0
-:100040000000D200000000000000000000000000DE
+:100040000000E700000000000000000000000000C9
 :1000500000000000000000000000000000000000A0
-:100060000030700030303030000030303000303040
+:100060000030700030203030000030303000303050
 :100070003030000000000000000000000000000020
 :1000800000000000001422300C00000000000000FE
-:1000900000000000D7000000000040000000000049
-:1000A0000000002000010000000201000002000129
+:10009000060000000000000000004000000000001A
+:1000A00000D714200001000000020100000200013E
 :1000B0000000020100000200010000020100000235
 :1000C0000001000002000100000001010100000029
 :1000D0000000000000000000000000000000000020

二進制
greenpak/SCSI_Accelerator_SLG46824.png


+ 1 - 1
lib/AzulSCSI_platform_GD32F205/greenpak.cpp

@@ -34,7 +34,7 @@ static void greenpak_gpio_init()
     gpio_init(GREENPAK_I2C_PORT, GPIO_MODE_OUT_PP, GPIO_OSPEED_2MHZ, GREENPAK_I2C_SCL);
 
     // Data bits used for communication
-    uint32_t greenpak_io = GREENPAK_PLD_IO1 | GREENPAK_PLD_IO2;
+    uint32_t greenpak_io = GREENPAK_PLD_IO1 | GREENPAK_PLD_IO2 | GREENPAK_PLD_IO3;
     gpio_bit_reset(SCSI_OUT_PORT, greenpak_io);
     gpio_init(SCSI_OUT_PORT, GPIO_MODE_OUT_PP, GPIO_OSPEED_50MHZ, greenpak_io);
 }

+ 7 - 7
lib/AzulSCSI_platform_GD32F205/greenpak_fw.h

@@ -1,18 +1,18 @@
 const uint8_t g_greenpak_fw[] = {
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x8d, 0x30, 0x49, 0xc2, 0x00, 0x00,
+  0xc4, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x38, 0xc2, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd0, 0x08,
+  0x13, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe7, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xd2, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x30, 0x70, 0x00, 0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x30, 0x30,
+  0x00, 0x30, 0x70, 0x00, 0x30, 0x20, 0x30, 0x30, 0x00, 0x00, 0x30, 0x30,
   0x30, 0x00, 0x30, 0x30, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   0x00, 0x14, 0x22, 0x30, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xd7, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x01, 0x00, 0x00,
+  0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0xd7, 0x14, 0x20, 0x00, 0x01, 0x00, 0x00,
   0x00, 0x02, 0x01, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x02, 0x01,
   0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x02, 0x01, 0x00, 0x00, 0x02,
   0x00, 0x01, 0x00, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x01,

+ 10 - 2
lib/AzulSCSI_platform_GD32F205/scsiPhy.cpp

@@ -460,13 +460,21 @@ extern "C" void scsiRead(uint8_t* data, uint32_t count, int* parityError)
     *parityError = 0;
 
     uint32_t count_words = count / 4;
-    if (count_words * 4 == count)
+    bool use_greenpak = (g_scsi_phy_mode == PHY_MODE_GREENPAK_DMA || g_scsi_phy_mode == PHY_MODE_GREENPAK_PIO);
+
+    if (count_words * 4 == count && count_words >= 2 && use_greenpak)
     {
-        // Use accelerated subroutine
+        // GreenPAK accelerated receive can handle a multiple of 4 bytes with minimum of 8 bytes.
+        scsi_accel_greenpak_recv((uint32_t*)data, count_words, &scsiDev.resetFlag);
+    }
+    else if (count_words * 4 == count && count_words >= 1)
+    {
+        // Optimized ASM subroutine can handle multiple of 4 bytes with minimum of 4 bytes.
         scsi_accel_asm_recv((uint32_t*)data, count_words, &scsiDev.resetFlag);
     }
     else
     {
+        // Use a simple loop for short and unaligned transfers
         for (uint32_t i = 0; i < count; i++)
         {
             if (scsiDev.resetFlag) break;

+ 174 - 0
lib/AzulSCSI_platform_GD32F205/scsi_accel_greenpak.cpp

@@ -10,8 +10,17 @@ void scsi_accel_greenpak_send(const uint32_t *buf, uint32_t num_words, volatile
     assert(false);
 }
 
+void scsi_accel_greenpak_recv(uint32_t *buf, uint32_t num_words, volatile int *resetFlag)
+{
+    assert(false);
+}
+
 #else
 
+/*********************************************************/
+/* Optimized writes to SCSI bus in GREENPAK_PIO mode     */
+/*********************************************************/
+
 extern const uint32_t g_scsi_out_byte_to_bop_pld1hi[256];
 extern const uint32_t g_scsi_out_byte_to_bop_pld1lo[256];
 
@@ -188,4 +197,169 @@ const uint32_t g_scsi_out_byte_to_bop_pld1lo[256] =
 
 #undef X
 
+/*********************************************************/
+/* Optimized reads from SCSI bus in GREENPAK_PIO mode    */
+/*********************************************************/
+
+// Wait for ACK to go high and back low.
+// This indicates that there is a byte ready to be read
+// If interrupt occurs in middle, we may miss ACK going high.
+// In that case, verify that REQ is already low.
+#define ASM_WAIT_ACK(x) \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbnz    %[tmp2], ack_is_high_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbnz    %[tmp2], ack_is_high_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbnz    %[tmp2], ack_is_high_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbnz    %[tmp2], ack_is_high_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbnz    %[tmp2], ack_is_high_" x "_%= \n" \
+"    wait_req_low_" x "_%=: \n" \
+"        cpsid   i \n" \
+"        ldr     %[tmp2], [%[req_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_high_" x "_%= \n" \
+"        ldr     %[tmp2], [%[reset_flag]] \n" \
+"        cbnz    %[tmp2], ack_is_high_" x "_%= \n" \
+"        cpsie   i \n" \
+"        b.n     wait_req_low_" x "_%= \n" \
+"    ack_is_high_" x "_%=: \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"    wait_ack_low_" x "_%=: \n" \
+"        cpsid   i \n" \
+"        ldr     %[tmp2], [%[ack_pin_bb]] \n" \
+"        cbz     %[tmp2], ack_is_low_" x "_%= \n" \
+"        ldr     %[tmp2], [%[reset_flag]] \n" \
+"        cbnz    %[tmp2], ack_is_low_" x "_%= \n" \
+"        cpsie   i \n" \
+"        b.n     wait_ack_low_" x "_%= \n" \
+"    ack_is_low_" x "_%=: \n"
+
+// Prepare for reception of data by loading the next PLD_IO1 value
+// and disabling interrupts.
+#define ASM_PREP_RECV(l) \
+"        mov    %[tmp1], %[" l "] \n" \
+"        cpsid  i \n"
+
+// Read GPIO bus, take the data byte and toggle PLD_IO1.
+// Note that the PLD_IO1 write is done first to reduce latency, but
+// the istat value that is read by next instruction is still the old
+// one due to IO port delays. Interrupts must be disabled for this
+// sequence to work correctly.
+//
+// d is the name of register where data is to be stored
+// b is the bit offset to store the byte at
+// x is unique label
+#define ASM_RECV_DATA(d, b, x) \
+"    read_data_" x "_%=: \n" \
+"        str    %[tmp1], [%[out_port_bop]] \n" \
+"        ldr    %[tmp1], [%[in_port_istat]] \n" \
+"        ubfx   %[tmp1], %[tmp1], %[data_in_shift], #8 \n" \
+"        bfi    %[" d "], %[tmp1], #" b ", #8 \n" \
+"        cpsie  i \n"
+
+// Read bytes from SCSI bus using asynchronous handshake mechanism
+// Takes 4 bytes at a time.
+void scsi_accel_greenpak_recv(uint32_t *buf, uint32_t num_words, volatile int *resetFlag)
+{
+    volatile uint32_t *out_port_bop = (volatile uint32_t*)&GPIO_BOP(SCSI_OUT_PORT);
+    volatile uint32_t *in_port_istat = (volatile uint32_t*)&GPIO_ISTAT(SCSI_IN_PORT);
+    uint32_t ack_pin_bb = PERIPH_BB_BASE + (((uint32_t)&GPIO_ISTAT(SCSI_ACK_PORT)) - APB1_BUS_BASE) * 32 + SCSI_IN_ACK_IDX * 4;
+    uint32_t req_pin_bb = PERIPH_BB_BASE + (((uint32_t)&GPIO_ISTAT(SCSI_OUT_PORT)) - APB1_BUS_BASE) * 32 + SCSI_OUT_REQ_IDX * 4;
+    register uint32_t tmp1 = 0;
+    register uint32_t tmp2 = 0;
+    register uint32_t data = 0;
+
+    // Last word requires special handling so that hardware doesn't issue new REQ pulse.
+    assert(num_words >= 2);
+    num_words -= 1;
+
+    // Set PLD_IO3 high to enable read from SCSI bus
+    GPIO_BOP(SCSI_OUT_PORT) = GREENPAK_PLD_IO3;
+
+    // Make sure that the previous access has fully completed.
+    // E.g. Macintosh can hold ACK low for long time after last byte of block.
+    while (SCSI_IN(ACK) && !*resetFlag);
+
+    // Set REQ pin as input and PLD_IO2 high to enable logic
+    GPIO_BC(SCSI_OUT_PORT) = GREENPAK_PLD_IO1;
+    gpio_init(SCSI_OUT_PORT, GPIO_MODE_IPU, 0, SCSI_OUT_REQ);
+    GPIO_BOP(SCSI_OUT_PORT) = GREENPAK_PLD_IO2;
+
+    asm volatile (
+    "inner_loop_%=: \n"
+        ASM_PREP_RECV("pld1_hi")
+        ASM_WAIT_ACK("0")
+        ASM_RECV_DATA("data", "0", "0")
+        
+        ASM_PREP_RECV("pld1_lo")
+        ASM_WAIT_ACK("8")
+        ASM_RECV_DATA("data", "8", "8")
+        
+        ASM_PREP_RECV("pld1_hi")
+        ASM_WAIT_ACK("16")
+        ASM_RECV_DATA("data", "16", "16")
+        
+        ASM_PREP_RECV("pld1_lo")
+        ASM_WAIT_ACK("24")
+        ASM_RECV_DATA("data", "24", "24")
+
+    "   mvn      %[data], %[data] \n"
+    "   str      %[data], [%[buf]], #4 \n"
+    "   subs     %[num_words], %[num_words], #1 \n"
+    "   bne     inner_loop_%= \n"
+
+    // Process last word separately to avoid issuing extra REQ pulse at end.
+    "recv_last_word_%=: \n"
+        ASM_PREP_RECV("pld1_hi")
+        ASM_WAIT_ACK("0b")
+        ASM_RECV_DATA("data", "0", "0b")
+        
+        ASM_PREP_RECV("pld1_lo")
+        ASM_WAIT_ACK("8b")
+        ASM_RECV_DATA("data", "8", "8b")
+        
+        ASM_PREP_RECV("pld1_hi")
+        ASM_WAIT_ACK("16b")
+        ASM_RECV_DATA("data", "16", "16b")
+        
+        ASM_PREP_RECV("pld1_hi")
+        ASM_WAIT_ACK("24b")
+        ASM_RECV_DATA("data", "24", "24b")
+
+    "   mvn      %[data], %[data] \n"
+    "   str      %[data], [%[buf]], #4 \n"
+
+    : /* Output */ [tmp1] "+l" (tmp1), [tmp2] "+l" (tmp2), [data] "+r" (data),
+                   [buf] "+r" (buf), [num_words] "+r" (num_words)
+    : /* Input */ [ack_pin_bb] "r" (ack_pin_bb),
+                  [req_pin_bb] "r" (req_pin_bb),
+                  [out_port_bop] "r"(out_port_bop),
+                  [in_port_istat] "r" (in_port_istat),
+                  [reset_flag] "r" (resetFlag),
+                  [data_in_shift] "I" (SCSI_IN_SHIFT),
+                  [pld1_lo] "I" (SCSI_OUT_PLD1 << 16),
+                  [pld1_hi] "I" (SCSI_OUT_PLD1)
+    : /* Clobber */ );
+
+    SCSI_RELEASE_DATA_REQ();
+
+    // Disable external logic and set REQ pin as output
+    GPIO_BC(SCSI_OUT_PORT) = GREENPAK_PLD_IO2;
+    gpio_init(SCSI_OUT_PORT, GPIO_MODE_OUT_PP, GPIO_OSPEED_50MHZ, SCSI_OUT_REQ);
+    GPIO_BC(SCSI_OUT_PORT) = GREENPAK_PLD_IO3;
+}
+
 #endif

+ 2 - 1
lib/AzulSCSI_platform_GD32F205/scsi_accel_greenpak.h

@@ -5,4 +5,5 @@
 #include <stdint.h>
 #include "greenpak.h"
 
-void scsi_accel_greenpak_send(const uint32_t *buf, uint32_t num_words, volatile int *resetFlag);
+void scsi_accel_greenpak_send(const uint32_t *buf, uint32_t num_words, volatile int *resetFlag);
+void scsi_accel_greenpak_recv(uint32_t *buf, uint32_t num_words, volatile int *resetFlag);