浏览代码

Improved scsi signal noise rejection, fixed write performance, fixed bug with multiple devices on the bus, and fixed bugs with non-512byte sectors.

Michael McMaster 7 年之前
父节点
当前提交
2ee15b4b61

+ 5 - 0
lib/SCSI2SD/CHANGELOG

@@ -1,3 +1,8 @@
+2018XXXX        6.2.1
+	- Fix bug with non-512 byte sectors.
+	- Fix bug when writing with multiple SCSI devices on the chain
+	- Performance improvements to write speeds.
+
 20180430		6.1.4
 	- Fix bug in self-test function
 

二进制
lib/SCSI2SD/rtl/fpga_bitmap.o


+ 12 - 0
lib/SCSI2SD/src/firmware/bsp.c

@@ -23,6 +23,18 @@ static int usingFastClock = 0;
 
 // TODO keep clock routines consistent with those in STM32Cubemx main.c
 
+uint32_t s2s_getSdRateMBs()
+{
+	if (usingFastClock)
+	{
+		return 18; // ((72MHz / 2) / 8bits) * 4bitparallel
+	}
+	else
+	{
+		return 12; // ((48MHz / 2) / 8bits) * 4bitparallel
+	}
+}
+
 // The standard clock is 108MHz with 48MHz SDIO clock
 void s2s_setNormalClock()
 {

+ 4 - 0
lib/SCSI2SD/src/firmware/bsp.h

@@ -18,6 +18,8 @@
 #ifndef S2S_BSP_h
 #define S2S_BSP_h
 
+#include <stdint.h>
+
 // For the STM32F205, DMA bursts may not cross 1KB address boundaries.
 // The maximum burst is 16 bytes.
 #define S2S_DMA_ALIGN __attribute__((aligned(1024)))
@@ -25,5 +27,7 @@
 void s2s_setNormalClock();
 void s2s_setFastClock();
 
+uint32_t s2s_getSdRateMBs();
+
 #endif
 

+ 1 - 1
lib/SCSI2SD/src/firmware/config.c

@@ -38,7 +38,7 @@
 
 #include <string.h>
 
-static const uint16_t FIRMWARE_VERSION = 0x0614;
+static const uint16_t FIRMWARE_VERSION = 0x0620;
 
 // 1 flash row
 static const uint8_t DEFAULT_CONFIG[128] =

+ 160 - 163
lib/SCSI2SD/src/firmware/disk.c

@@ -18,12 +18,18 @@
 
 #include "stm32f2xx.h"
 
+// For SD write direct routines
+#include "sdio.h"
+#include "bsp_driver_sd.h"
+
+
 #include "scsi.h"
 #include "scsiPhy.h"
 #include "config.h"
 #include "disk.h"
 #include "sd.h"
 #include "time.h"
+#include "bsp.h"
 
 #include <string.h>
 
@@ -533,7 +539,6 @@ void scsiDiskPoll()
 	if (scsiDev.phase == DATA_IN &&
 		transfer.currentBlock != transfer.blocks)
 	{
-		scsiEnterPhase(DATA_IN);
 
 		int totalSDSectors =
 			transfer.blocks * SDSectorsPerSCSISector(bytesPerSector);
@@ -549,8 +554,15 @@ void scsiDiskPoll()
 		int i = 0;
 		int scsiActive __attribute__((unused)) = 0; // unused if DMA disabled
 		int sdActive = 0;
+
+		uint32_t partialScsiChunk = 0;
+
+		// Start reading from the SD card FIRST, because we change state and
+		// wai for SCSI signals
+		int dataInStarted = 0;
+
 		while ((i < totalSDSectors) &&
-			likely(scsiDev.phase == DATA_IN) &&
+			(!dataInStarted || likely(scsiDev.phase == DATA_IN)) &&
 			likely(!scsiDev.resetFlag))
 		{
 			int completedDmaSectors;
@@ -594,9 +606,16 @@ void scsiDiskPoll()
 				sdReadDMA(sdLBA + prep, sectors, &scsiDev.data[SD_SECTOR_SIZE * startBuffer]);
 
 				sdActive = sectors;
+
+				if (!dataInStarted)
+				{
+					dataInStarted = 1;
+					scsiEnterPhase(DATA_IN); // Will wait a few microseconds.
+				}
 			}
 
 #ifdef SCSI_FSMC_DMA
+			#error this code not updated for 256 max bytes in scsi fifo
 			if (scsiActive && scsiPhyComplete() && scsiWriteDMAPoll())
 			{
 				scsiActive = 0;
@@ -624,33 +643,49 @@ void scsiDiskPoll()
 					if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE;
 				}
 
-				uint16_t* scsiDmaData = (uint16_t*) &(scsiDev.data[SD_SECTOR_SIZE * (i % buffers)]);
 				// Manually unrolled loop for performance.
 				// -Os won't unroll this for us automatically,
 				// especially since scsiPhyTx does volatile stuff.
 				// Reduces bus utilisation by making the fsmc split
 				// 32bits into 2 16 bit writes.
+
+				uint16_t* scsiDmaData = (uint16_t*) &(scsiDev.data[SD_SECTOR_SIZE * (i % buffers) + partialScsiChunk]);
+
+				uint32_t chunk = ((dmaBytes - partialScsiChunk) > SCSI_FIFO_DEPTH)
+					? SCSI_FIFO_DEPTH : (dmaBytes - partialScsiChunk);
+
 				int k = 0;
-				for (; k + 4 < (dmaBytes + 1) / 2; k += 4)
+				for (; k + 4 < (chunk + 1) / 2; k += 4)
 				{
 					scsiPhyTx32(scsiDmaData[k], scsiDmaData[k+1]);
 					scsiPhyTx32(scsiDmaData[k+2], scsiDmaData[k+3]);
 				}
-				for (; k < (dmaBytes + 1) / 2; ++k)
+				for (; k < (chunk + 1) / 2; ++k)
 				{
 					scsiPhyTx(scsiDmaData[k]);
 				}
-				i++;
 				while (!scsiPhyComplete() && !scsiDev.resetFlag)
 				{
 					__WFE(); // Wait for event
 				}
 				scsiPhyFifoFlip();
-				scsiSetDataCount(dmaBytes);
+				scsiSetDataCount(chunk);
+
+				partialScsiChunk += chunk;
+				if (partialScsiChunk == dmaBytes)
+				{
+					partialScsiChunk = 0;
+					++i;
+				}
 			}
 #endif
 		}
 
+		if (!dataInStarted && !scsiDev.resetFlag) // zero bytes ?
+		{
+			scsiEnterPhase(DATA_IN); // Will wait a few microseconds.
+		}
+
 		// We've finished transferring the data to the FPGA, now wait until it's
 		// written to he SCSI bus.
 		while (!scsiPhyComplete() &&
@@ -679,22 +714,16 @@ void scsiDiskPoll()
 				scsiDev.target->cfg->sdSectorStart,
 				bytesPerSector,
 				transfer.lba);
-		// int buffers = sizeof(scsiDev.data) / SD_SECTOR_SIZE;
-		// int prep = 0;
 		int i = 0;
-		// int scsiDisconnected = 0;
-		int scsiComplete = 0;
-		//uint32_t lastActivityTime = s2s_getTime_ms();
-		// int scsiActive = 0;
-		// int sdActive = 0;
 		int clearBSY = 0;
 
 		int parityError = 0;
+		int enableParity = scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY;
+
 		while ((i < totalSDSectors) &&
-			(likely(scsiDev.phase == DATA_OUT) || // scsiDisconnect keeps our phase.
-				scsiComplete) &&
+			likely(scsiDev.phase == DATA_OUT) &&
 			likely(!scsiDev.resetFlag) &&
-			likely(!parityError))
+			likely(!parityError || !enableParity))
 		{
 			// Well, until we have some proper non-blocking SD code, we must
 			// do this in a half-duplex fashion. We need to write as much as
@@ -703,178 +732,146 @@ void scsiDiskPoll()
 			uint32_t rem = totalSDSectors - i;
 			uint32_t sectors =
 				rem < maxSectors ? rem : maxSectors;
-			scsiRead(&scsiDev.data[0], sectors * SD_SECTOR_SIZE, &parityError);
 
-			if (i + sectors >= totalSDSectors)
+			if (bytesPerSector == SD_SECTOR_SIZE)
 			{
-				// We're transferring over the SCSI bus faster than the SD card
-				// can write.  All data is buffered, and we're just waiting for
-				// the SD card to complete. The host won't let us disconnect.
-				// Some drivers set a 250ms timeout on transfers to complete.
-				// SD card writes are supposed to complete
-				// within 200ms, but sometimes they don'to.
-				// Just pretend we're finished.
-				process_Status();
-				process_MessageIn(); // Will go to BUS_FREE state
-
-				// Try and prevent anyone else using the SCSI bus while we're not ready.
-				if (*SCSI_CTRL_BSY == 0) // Could be busy for a linked command
+				// We assume the SD card is faster than the SCSI interface, but has
+				// no flow control. This can be handled if a) the scsi interface
+				// doesn't block and b) we read enough SCSI sectors first so that
+				// the SD interface cannot catch up.
+				uint32_t totalBytes = sectors * SD_SECTOR_SIZE;
+				uint32_t readAheadBytes = sectors * SD_SECTOR_SIZE;
+				uint32_t sdSpeed = s2s_getSdRateMBs() + (scsiDev.sdUnderrunCount / 2);
+				uint32_t scsiSpeed = s2s_getScsiRateMBs();
+				// if (have blind writes)
+				if (scsiSpeed > 0 && scsiDev.sdUnderrunCount < 16)
 				{
-					*SCSI_CTRL_BSY = 1;
-					clearBSY = 1;
+					// readAhead = sectors * (sd / scsi - 1 + 0.1);
+					readAheadBytes = totalBytes * sdSpeed / scsiSpeed - totalBytes + SCSI_FIFO_DEPTH;
+					if (readAheadBytes < SCSI_FIFO_DEPTH)
+					{
+						readAheadBytes = SCSI_FIFO_DEPTH;
+					}
+
+					if (readAheadBytes > totalBytes)
+					{
+						readAheadBytes = totalBytes;
+					}
 				}
-			}
 
+				uint32_t chunk = (readAheadBytes > SCSI_FIFO_DEPTH) ? SCSI_FIFO_DEPTH : readAheadBytes;
+				scsiSetDataCount(chunk);
 
-			if (!parityError)
-			{
-				sdTmpWrite(&scsiDev.data[0], i + sdLBA, sectors);
-			}
-			i += sectors;
+				uint32_t scsiBytesRead = 0;
+				while (scsiBytesRead < readAheadBytes)
+				{
+					while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))
+					{
+						__WFE(); // Wait for event
+					}
+					parityError |= scsiParityError();
+					scsiPhyFifoFlip();
+					uint32_t nextChunk = ((totalBytes - scsiBytesRead - chunk) > SCSI_FIFO_DEPTH)
+						? SCSI_FIFO_DEPTH : (totalBytes - scsiBytesRead - chunk);
+
+					if (nextChunk > 0) scsiSetDataCount(nextChunk);
+					scsiReadPIO(&scsiDev.data[scsiBytesRead], chunk);
+					scsiBytesRead += chunk;
+					chunk = nextChunk;
+				}
 
-#if 0
-			// Wait for the next DMA interrupt. It's beneficial to halt the
-			// processor to give the DMA controller more memory bandwidth to
-			// work with.
-			int scsiBusy = 1;
-			int sdBusy = 1;
-			while (scsiBusy && sdBusy)
-			{
-				uint8_t intr = CyEnterCriticalSection();
-				scsiBusy = scsiDMABusy();
-				sdBusy = sdDMABusy();
-				if (scsiBusy && sdBusy)
+				HAL_SD_WriteBlocks_DMA(&hsd, (uint32_t*) (&scsiDev.data[0]), (i + sdLBA) * 512ll, SD_SECTOR_SIZE, sectors);
+
+				while (scsiBytesRead < totalBytes)
 				{
-					__WFI();
+					while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))
+					{
+						__WFE(); // Wait for event
+					}
+					parityError |= scsiParityError();
+					scsiPhyFifoFlip();
+					uint32_t nextChunk = ((totalBytes - scsiBytesRead - chunk) > SCSI_FIFO_DEPTH)
+						? SCSI_FIFO_DEPTH : (totalBytes - scsiBytesRead - chunk);
+
+					if (nextChunk > 0) scsiSetDataCount(nextChunk);
+					scsiReadPIO(&scsiDev.data[scsiBytesRead], chunk);
+					scsiBytesRead += chunk;
+					chunk = nextChunk;
 				}
-				CyExitCriticalSection(intr);
-			}
 
-			if (sdActive && !sdBusy && sdWriteSectorDMAPoll())
-			{
-				sdActive = 0;
-				i++;
-			}
-			if (!sdActive && ((prep - i) > 0))
-			{
-				// Start an SD transfer if we have space.
-				sdWriteMultiSectorDMA(&scsiDev.data[SD_SECTOR_SIZE * (i % buffers)]);
-				sdActive = 1;
-			}
+				// Oh dear, SD finished first.
+				int underrun = totalBytes > readAheadBytes && hsd.DmaTransferCplt;
 
-			uint32_t now = getTime_ms();
+				uint32_t dmaFinishTime = s2s_getTime_ms();
+				while (!hsd.SdTransferCplt &&
+					s2s_elapsedTime_ms(dmaFinishTime) < 180)
+				{
+					// Wait while keeping BSY.
+				}
+				while((__HAL_SD_SDIO_GET_FLAG(&hsd, SDIO_FLAG_TXACT)) &&
+					s2s_elapsedTime_ms(dmaFinishTime) < 180)
+				{
+					// Wait for SD card while keeping BSY.
+				}
 
-			if (scsiActive && !scsiBusy && scsiReadDMAPoll())
-			{
-				scsiActive = 0;
-				++prep;
-				lastActivityTime = now;
-			}
-			if (!scsiActive &&
-				((prep - i) < buffers) &&
-				(prep < totalSDSectors) &&
-				likely(!scsiDisconnected))
-			{
-				int dmaBytes = SD_SECTOR_SIZE;
-				if ((prep % sdPerScsi) == (sdPerScsi - 1))
+				if (i + sectors >= totalSDSectors &&
+					!underrun &&
+					(!parityError || !enableParity))
 				{
-					dmaBytes = bytesPerSector % SD_SECTOR_SIZE;
-					if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE;
+					// We're transferring over the SCSI bus faster than the SD card
+					// can write.  All data is buffered, and we're just waiting for
+					// the SD card to complete. The host won't let us disconnect.
+					// Some drivers set a 250ms timeout on transfers to complete.
+					// SD card writes are supposed to complete
+					// within 200ms, but sometimes they don't.
+					// Just pretend we're finished.
+					process_Status();
+					clearBSY = process_MessageIn(0); // Will go to BUS_FREE state but keep BSY asserted.
 				}
-				scsiReadDMA(&scsiDev.data[SD_SECTOR_SIZE * (prep % buffers)], dmaBytes);
-				scsiActive = 1;
-			}
-			else if (
-				(scsiDev.boardCfg.flags & CONFIG_ENABLE_DISCONNECT) &&
-				(scsiActive == 0) &&
-				likely(!scsiDisconnected) &&
-				unlikely(scsiDev.discPriv) &&
-				unlikely(diffTime_ms(lastActivityTime, now) >= 20) &&
-				likely(scsiDev.phase == DATA_OUT))
-			{
-				// We're transferring over the SCSI bus faster than the SD card
-				// can write.  There is no more buffer space once we've finished
-				// this SCSI transfer.
-				// The NCR 53C700 interface chips have a 250ms "byte-to-byte"
-				// timeout buffer. SD card writes are supposed to complete
-				// within 200ms, but sometimes they don't.
-				// The NCR 53C700 series is used on HP 9000 workstations.
-				scsiDisconnect();
-				scsiDisconnected = 1;
-				lastActivityTime = getTime_ms();
+
+				HAL_SD_CheckWriteOperation(&hsd, (uint32_t)SD_DATATIMEOUT);
+
+				if (underrun)
+				{
+					// Try again. Data is still in memory.
+					sdTmpWrite(&scsiDev.data[0], i + sdLBA, sectors);
+					scsiDev.sdUnderrunCount++;
+				}
+				i += sectors;
+
 			}
-			else if (unlikely(scsiDisconnected) &&
-				(
-					(prep == i) || // Buffers empty.
-					// Send some messages every 100ms so we don't timeout.
-					// At a minimum, a reselection involves an IDENTIFY message.
-					unlikely(diffTime_ms(lastActivityTime, now) >= 100)
-				))
+			else
 			{
-				int reconnected = scsiReconnect();
-				if (reconnected)
+				// Well, until we have some proper non-blocking SD code, we must
+				// do this in a half-duplex fashion. We need to write as much as
+				// possible in each SD card transaction.
+				// use sg_dd from sg_utils3 tools to test.
+				uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE;
+				uint32_t rem = totalSDSectors - i;
+				uint32_t sectors = rem < maxSectors ? rem : maxSectors;
+				int scsiSector;
+				for (scsiSector = i; scsiSector < i + sectors; ++scsiSector)
 				{
-					scsiDisconnected = 0;
-					lastActivityTime = getTime_ms(); // Don't disconnect immediately.
+					int dmaBytes = SD_SECTOR_SIZE;
+					if ((scsiSector % sdPerScsi) == (sdPerScsi - 1))
+					{
+						dmaBytes = bytesPerSector % SD_SECTOR_SIZE;
+						if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE;
+					}
+					scsiRead(&scsiDev.data[SD_SECTOR_SIZE * (scsiSector - i)], dmaBytes, &parityError);
 				}
-				else if (diffTime_ms(lastActivityTime, getTime_ms()) >= 10000)
+				if (!parityError)
 				{
-					// Give up after 10 seconds of trying to reconnect.
-					scsiDev.resetFlag = 1;
+					sdTmpWrite(&scsiDev.data[0], i + sdLBA, sectors);
 				}
+				i += sectors;
 			}
-			else if (
-				likely(!scsiComplete) &&
-				(sdActive == 1) &&
-				(prep == totalSDSectors) && // All scsi data read and buffered
-				likely(!scsiDev.discPriv) && // Prefer disconnect where possible.
-				unlikely(diffTime_ms(lastActivityTime, now) >= 150) &&
-
-				likely(scsiDev.phase == DATA_OUT) &&
-				!(scsiDev.cdb[scsiDev.cdbLen - 1] & 0x01) // Not linked command
-				)
-			{
-				// We're transferring over the SCSI bus faster than the SD card
-				// can write.  All data is buffered, and we're just waiting for
-				// the SD card to complete. The host won't let us disconnect.
-				// Some drivers set a 250ms timeout on transfers to complete.
-				// SD card writes are supposed to complete
-				// within 200ms, but sometimes they don'to.
-				// Just pretend we're finished.
-				scsiComplete = 1;
-
-				process_Status();
-				process_MessageIn(); // Will go to BUS_FREE state
-
-				// Try and prevent anyone else using the SCSI bus while we're not ready.
-				SCSI_SetPin(SCSI_Out_BSY); 
-			}
-#endif
 		}
 
 		if (clearBSY)
 		{
-			*SCSI_CTRL_BSY = 0;
-		}
-
-#if 0
-		if (scsiComplete)
-		{
-			SCSI_ClearPin(SCSI_Out_BSY);
-		}
-		while (
-			!scsiDev.resetFlag &&
-			unlikely(scsiDisconnected) &&
-			(s2s_elapsedTime_ms(lastActivityTime) <= 10000))
-		{
-			scsiDisconnected = !scsiReconnect();
-		}
-		if (scsiDisconnected)
-		{
-			// Failed to reconnect
-			scsiDev.resetFlag = 1;
+			enter_BusFree();
 		}
-#endif
 
 		if (scsiDev.phase == DATA_OUT)
 		{

+ 13 - 5
lib/SCSI2SD/src/firmware/scsi.c

@@ -38,7 +38,6 @@ ScsiDevice scsiDev S2S_DMA_ALIGN;
 
 static void enter_SelectionPhase(void);
 static void process_SelectionPhase(void);
-static void enter_BusFree(void);
 static void enter_MessageIn(uint8_t message);
 static void enter_Status(uint8_t status);
 static void enter_DataIn(int len);
@@ -48,7 +47,7 @@ static void process_Command(void);
 
 static void doReserveRelease(void);
 
-static void enter_BusFree()
+void enter_BusFree()
 {
 	// This delay probably isn't needed for most SCSI hosts, but it won't
 	// hurt either. It's possible some of the samplers needed this delay.
@@ -84,7 +83,7 @@ static void enter_MessageIn(uint8_t message)
 	scsiDev.phase = MESSAGE_IN;
 }
 
-void process_MessageIn()
+int process_MessageIn(int releaseBusFree)
 {
 	scsiEnterPhase(MESSAGE_IN);
 	scsiWriteByte(scsiDev.msgIn);
@@ -94,6 +93,7 @@ void process_MessageIn()
 		// If there was a parity error, we go
 		// back to MESSAGE_OUT first, get out parity error message, then come
 		// back here.
+		return 0;
 	}
 	else if ((scsiDev.msgIn == MSG_LINKED_COMMAND_COMPLETE) ||
 		(scsiDev.msgIn == MSG_LINKED_COMMAND_COMPLETE_WITH_FLAG))
@@ -106,10 +106,16 @@ void process_MessageIn()
 		scsiDev.status = GOOD;
 		transfer.blocks = 0;
 		transfer.currentBlock = 0;
+		return 0;
 	}
-	else /*if (scsiDev.msgIn == MSG_COMMAND_COMPLETE)*/
+	else if (releaseBusFree) /*if (scsiDev.msgIn == MSG_COMMAND_COMPLETE)*/
 	{
 		enter_BusFree();
+		return 1;
+	}
+	else
+	{
+		return 1;
 	}
 }
 
@@ -517,6 +523,7 @@ static void scsiReset()
 
 	scsiDev.postDataOutHook = NULL;
 
+	scsiDev.sdUnderrunCount = 0;
 
 	// Sleep to allow the bus to settle down a bit.
 	// We must be ready again within the "Reset to selection time" of
@@ -865,6 +872,7 @@ static void process_MessageOut()
 				uint8_t SDTR[] = {0x01, 0x03, 0x01, scsiDev.target->syncPeriod, scsiDev.target->syncOffset};
 				scsiWrite(SDTR, sizeof(SDTR));
 				scsiDev.needSyncNegotiationAck = 1; // Check if this message is rejected.
+				scsiDev.sdUnderrunCount = 0;  // reset counter, may work now.
 			}
 		}
 		else
@@ -1004,7 +1012,7 @@ void scsiPoll(void)
 		}
 		else
 		{
-			process_MessageIn();
+			process_MessageIn(1);
 		}
 
 	break;

+ 3 - 1
lib/SCSI2SD/src/firmware/scsi.h

@@ -162,12 +162,14 @@ typedef struct
 	uint8_t minSyncPeriod; // Debug use only.
 
 	int needSyncNegotiationAck;
+	int sdUnderrunCount;
 } ScsiDevice;
 
 extern ScsiDevice scsiDev;
 
 void process_Status(void);
-void process_MessageIn(void);
+int process_MessageIn(int releaseBusFree);
+void enter_BusFree(void);
 
 void scsiInit(void);
 void scsiPoll(void);

+ 24 - 1
lib/SCSI2SD/src/firmware/scsiPhy.c

@@ -187,7 +187,7 @@ scsiReadByte(void)
 }
 
 
-static void
+void
 scsiReadPIO(uint8_t* data, uint32_t count)
 {
 	uint16_t* fifoData = (uint16_t*)data;
@@ -598,6 +598,29 @@ void scsiEnterPhase(int newPhase)
 	}
 }
 
+uint32_t s2s_getScsiRateMBs()
+{
+	if (scsiDev.target->syncOffset)
+	{
+		if (scsiDev.target->syncPeriod < 23)
+		{
+			return 20;
+		}
+		else if (scsiDev.target->syncPeriod <= 25)
+		{
+			return 10;
+		}
+		else
+		{
+			return 1000 / (scsiDev.target->syncPeriod * 4);
+		}
+	}
+	else
+	{
+		return 0;
+	}
+}
+
 void scsiPhyReset()
 {
 	trace(trace_scsiPhyReset);

+ 6 - 1
lib/SCSI2SD/src/firmware/scsiPhy.h

@@ -47,7 +47,7 @@
 #define SCSI_STS_PARITY_ERR ((volatile uint8_t*)0x6000002C)
 
 #define SCSI_FIFO_DATA ((volatile uint16_t*)0x60000040)
-#define SCSI_FIFO_DEPTH 512
+#define SCSI_FIFO_DEPTH 256
 
 
 #define scsiPhyFifoFull() ((*SCSI_STS_FIFO & 0x01) == 0x01)
@@ -109,9 +109,14 @@ extern volatile uint8_t scsiTxDMAComplete;
 void scsiReadDMA(uint8_t* data, uint32_t count);
 int scsiReadDMAPoll();
 
+// Low-level.
+void scsiReadPIO(uint8_t* data, uint32_t count);
+
 void scsiWriteDMA(const uint8_t* data, uint32_t count);
 int scsiWriteDMAPoll();
 
 int scsiSelfTest(void);
 
+uint32_t s2s_getScsiRateMBs();
+
 #endif

+ 18 - 2
lib/SCSI2SD/src/firmware/usb_device/usbd_msc_scsi.c

@@ -662,8 +662,24 @@ static int8_t SCSI_CheckAddressRange (USBD_HandleTypeDef  *pdev, uint8_t lun , u
 {
 	USBD_CompositeClassData *classData = (USBD_CompositeClassData*) pdev->pClassData;
 	USBD_MSC_BOT_HandleTypeDef *hmsc = &(classData->msc);
-  
-  if ((blk_offset + blk_nbr) > hmsc->scsi_blk_nbr )
+
+	// michael@codesrc.com: Re-check block limits in cause we have different values
+	// for different LUN's.
+    uint32_t blkNbr;
+	uint16_t blkSize;
+	if(((USBD_StorageTypeDef *)pdev->pUserData)->GetCapacity(lun, &blkNbr, &blkSize) != 0)
+	{
+    SCSI_SenseCode(pdev,
+                   lun,
+                   NOT_READY, 
+                   MEDIUM_NOT_PRESENT);
+    return -1;
+  } 
+	// global variables. wooo
+	hmsc->scsi_blk_size = blkSize;
+	hmsc->scsi_blk_nbr = blkNbr;
+
+  if ((blk_offset + blk_nbr) > blkNbr )
   {
     SCSI_SenseCode(pdev,
                    lun, 

+ 2 - 2
lib/SCSI2SD/src/firmware/usb_device/usbd_msc_storage_sd.c

@@ -205,6 +205,8 @@ int8_t s2s_usbd_storage_Write (uint8_t lun,
 			for (int i = 0; i < SDSectorsPerSCSISector(cfg->bytesPerSector); ++i)
 			{
 				uint8_t partial[512] S2S_DMA_ALIGN;
+				memcpy(partial, buf, 512);
+
 				BSP_SD_WriteBlocks_DMA(
 					(uint32_t*) partial,
 					sdSectorNum * 512LL,
@@ -215,8 +217,6 @@ int8_t s2s_usbd_storage_Write (uint8_t lun,
 				int validBytes = cfg->bytesPerSector % SD_SECTOR_SIZE;
 				if (validBytes == 0) validBytes = SD_SECTOR_SIZE;
 
-				memcpy(buf, partial, validBytes);
-
 				buf += validBytes;
 			}