浏览代码

Fix terrible performance

Michael McMaster 9 年之前
父节点
当前提交
ed9e9ca1c1

+ 21 - 0
lib/SCSI2SD/STM32CubeMX/SCSI2SD-V6/Drivers/STM32F2xx_HAL_Driver/Src/stm32f2xx_hal_sd.c

@@ -1048,6 +1048,27 @@ HAL_SD_ErrorTypedef HAL_SD_WriteBlocks_DMA(SD_HandleTypeDef *hsd, uint32_t *pWri
   }
   else
   {
+    /* MM: Prepare for write */
+    /* Set Block Size for Card */ 
+    sdio_cmdinitstructure.Argument         = (uint32_t)(hsd->RCA << 16);
+    sdio_cmdinitstructure.CmdIndex         = SD_CMD_APP_CMD;
+    SDIO_SendCommand(hsd->Instance, &sdio_cmdinitstructure);
+    errorstate = SD_CmdResp1Error(hsd, SD_CMD_APP_CMD);
+    if (errorstate != SD_OK)
+    {
+      return errorstate;
+    }
+    sdio_cmdinitstructure.Argument         = (uint32_t)NumberOfBlocks;
+    sdio_cmdinitstructure.CmdIndex         = SD_CMD_SET_BLOCK_COUNT;
+    SDIO_SendCommand(hsd->Instance, &sdio_cmdinitstructure);
+    errorstate = SD_CmdResp1Error(hsd, SD_CMD_SET_BLOCK_COUNT);
+    if (errorstate != SD_OK)
+    {
+      return errorstate;
+    }
+  
+    /* /MM */
+
     /* Send CMD25 WRITE_MULT_BLOCK with argument data address */
     sdio_cmdinitstructure.CmdIndex = SD_CMD_WRITE_MULT_BLOCK;
   }

+ 1 - 1
lib/SCSI2SD/src/firmware/config.c

@@ -37,7 +37,7 @@
 
 #include <string.h>
 
-static const uint16_t FIRMWARE_VERSION = 0x0605;
+static const uint16_t FIRMWARE_VERSION = 0x0606;
 
 // 1 flash row
 static const uint8_t DEFAULT_CONFIG[128] =

+ 2 - 9
lib/SCSI2SD/src/firmware/disk.c

@@ -553,14 +553,6 @@ void scsiDiskPoll()
 			likely(scsiDev.phase == DATA_IN) &&
 			likely(!scsiDev.resetFlag))
 		{
-			// Wait for the next DMA interrupt. It's beneficial to halt the
-			// processor to give the DMA controller more memory bandwidth to
-			// work with.
-			if (sdActive && scsiActive)
-			{
-				__WFI();
-			}
-
 			if (sdActive && sdReadDMAPoll())
 			{
 				prep += sdActive;
@@ -574,11 +566,13 @@ void scsiDiskPoll()
 				// Start an SD transfer if we have space.
 				uint32_t startBuffer = prep % buffers;
 				uint32_t sectors = totalSDSectors - prep;
+#if 0
 				if (!scsiActive && prep == i)
 				{
 					sectors = 1; // We need to get some data to send ASAP !
 				}
 				else
+#endif
 				{
 					uint32_t freeBuffers = buffers - (prep - i);
 					uint32_t contiguousBuffers = buffers - startBuffer;
@@ -616,7 +610,6 @@ void scsiDiskPoll()
 			likely(scsiDev.phase == DATA_IN) &&
 			likely(!scsiDev.resetFlag))
 		{
-			__WFI();
 		}
 
 

+ 5 - 1
lib/SCSI2SD/src/firmware/main.c

@@ -106,6 +106,9 @@ void mainLoop()
 		}
 		else
 		{
+			// TODO this hurts performance significantly! Work out why __WFI()
+			// doesn't wake up immediately !
+#if 0
 			// Wait for our 1ms timer to save some power.
 			// There's an interrupt on the SEL signal to ensure we respond
 			// quickly to any SCSI commands. The selection abort time is
@@ -116,12 +119,13 @@ void mainLoop()
 
 			if (!*SCSI_STS_SELECTED)
 			{
-				__WFI(); // Will wake on interrupt, regardless of mask
+				//__WFI(); // Will wake on interrupt, regardless of mask
 			}
 			if (!interruptState)
 			{
 				__enable_irq();
 			}
+#endif
 		}
 	}
 	else if (scsiDev.phase >= 0)

+ 2 - 1
lib/SCSI2SD/src/firmware/scsi.h

@@ -97,7 +97,8 @@ typedef struct
 
 typedef struct
 {
-	uint8_t data[MAX_SECTOR_SIZE * 2]; // Must be aligned for DMA
+	// TODO reduce this buffer size and add a proper cache
+	uint8_t data[MAX_SECTOR_SIZE * 8]; // Must be aligned for DMA
 
 	TargetState targets[S2S_MAX_TARGETS];
 	TargetState* target;

+ 22 - 31
lib/SCSI2SD/src/firmware/scsiPhy.c

@@ -176,30 +176,32 @@ void
 scsiRead(uint8_t* data, uint32_t count)
 {
 	int i = 0;
+
+
+	uint32_t chunk = ((count - i) > SCSI_FIFO_DEPTH)
+		? SCSI_FIFO_DEPTH : (count - i);
+	if (chunk >= 16)
+	{
+		// DMA is doing 32bit transfers.
+		chunk = chunk & 0xFFFFFFF8;
+	}
+	startScsiRx(chunk);
+
 	while (i < count && likely(!scsiDev.resetFlag))
 	{
-		uint32_t chunk = ((count - i) > SCSI_FIFO_DEPTH)
-			? SCSI_FIFO_DEPTH : (count - i);
+		while (!scsiPhyComplete() && likely(!scsiDev.resetFlag)) {}
+		scsiPhyFifoFlip();
 
-		if (chunk >= 16)
+		uint32_t nextChunk = ((count - i - chunk) > SCSI_FIFO_DEPTH)
+			? SCSI_FIFO_DEPTH : (count - i - chunk);
+		if (nextChunk >= 16)
 		{
-			// DMA is doing 32bit transfers.
-			chunk = chunk & 0xFFFFFFF8;
+			nextChunk = nextChunk & 0xFFFFFFF8;
 		}
-
-#if FIFODEBUG
-		if (!scsiPhyFifoAltEmpty()) {
-			// Force a lock-up.
-			assertFail();
+		if (nextChunk > 0)
+		{
+			startScsiRx(nextChunk);
 		}
-#endif
-
-		startScsiRx(chunk);
-		// Wait for the next scsi interrupt (or the 1ms systick)
-		__WFI();
-
-		while (!scsiPhyComplete() && likely(!scsiDev.resetFlag)) {}
-		scsiPhyFifoFlip();
 
 		if (chunk < 16)
 		{
@@ -209,15 +211,10 @@ scsiRead(uint8_t* data, uint32_t count)
 		{
 			scsiReadDMA(data + i, chunk);
 
-			// Wait for the next DMA interrupt (or the 1ms systick)
-			// It's beneficial to halt the processor to
-			// give the DMA controller more memory bandwidth to work with.
-			__WFI();
 			trace(trace_spinReadDMAPoll);
 
 			while (!scsiReadDMAPoll() && likely(!scsiDev.resetFlag))
 			{
-				__WFI();
 			};
 		}
 
@@ -230,6 +227,7 @@ scsiRead(uint8_t* data, uint32_t count)
 		}
 #endif
 		i += chunk;
+		chunk = nextChunk;
 	}
 }
 
@@ -328,21 +326,15 @@ scsiWrite(const uint8_t* data, uint32_t count)
 			chunk = chunk & 0xFFFFFFF8;
 			scsiWriteDMA(data + i, chunk);
 
-			// Wait for the next DMA interrupt (or the 1ms systick)
-			// It's beneficial to halt the processor to
-			// give the DMA controller more memory bandwidth to work with.
-			__WFI();
 			trace(trace_spinReadDMAPoll);
 
 			while (!scsiWriteDMAPoll() && likely(!scsiDev.resetFlag))
 			{
-				__WFI();
-			};
+			}
 		}
 
 		while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))
 		{
-			__WFI();
 		}
 
 #if FIFODEBUG
@@ -357,7 +349,6 @@ scsiWrite(const uint8_t* data, uint32_t count)
 	}
 	while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))
 	{
-		__WFI();
 	}
 
 #if FIFODEBUG