| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479 | /* Synchronous mode SCSI implementation. * * In synchronous mode, the handshake mechanism is not used. Instead * either end of the communication will just send a bunch of bytes * and only afterwards checks that the number of acknowledgement * pulses matches. * * The receiving end should latch in the data at the falling edge of * the request pulse (on either REQ or ACK pin). We use the GD32 EXMC * peripheral to implement this latching with the NWAIT pin when * reading data from the host. NOE is used to generate the REQ pulses. * * Writing data to the host is simpler, as we can just write it out * from the GPIO port at our own pace. A timer is used for generating * the output pulses on REQ pin. */#include "scsi_accel_sync.h"#include <BlueSCSI_log.h>#include <gd32f20x_exmc.h>#include <scsi.h>#ifndef SCSI_SYNC_MODE_AVAILABLEvoid scsi_accel_sync_init() {}void scsi_accel_sync_recv(uint8_t *data, uint32_t count, int* parityError, volatile int *resetFlag) {}void scsi_accel_sync_send(const uint8_t* data, uint32_t count, volatile int *resetFlag) {}#else/********************************//* Transfer from host to device *//********************************/#define SYNC_DMA_BUFSIZE 512static uint32_t g_sync_dma_buf[SYNC_DMA_BUFSIZE];void scsi_accel_sync_init(){    rcu_periph_clock_enable(RCU_EXMC);    rcu_periph_clock_enable(SCSI_EXMC_DMA_RCU);    rcu_periph_clock_enable(SCSI_SYNC_TIMER_RCU);    exmc_norsram_timing_parameter_struct timing_param = {        .asyn_access_mode = EXMC_ACCESS_MODE_A,        .syn_data_latency = EXMC_DATALAT_2_CLK,        .syn_clk_division = EXMC_SYN_CLOCK_RATIO_2_CLK,        .bus_latency = 1,        .asyn_data_setuptime = 2,        .asyn_address_holdtime = 2,        .asyn_address_setuptime = 16    };    exmc_norsram_parameter_struct sram_param = {        .norsram_region = EXMC_BANK0_NORSRAM_REGION0,        .write_mode = EXMC_ASYN_WRITE,        .extended_mode = DISABLE,        .asyn_wait = ENABLE,        .nwait_signal = ENABLE,        .memory_write = DISABLE,        .nwait_config = EXMC_NWAIT_CONFIG_DURING,        .wrap_burst_mode = DISABLE,        .nwait_polarity = EXMC_NWAIT_POLARITY_HIGH,        .burst_mode = DISABLE,        .databus_width = EXMC_NOR_DATABUS_WIDTH_16B,        .memory_type = EXMC_MEMORY_TYPE_SRAM,        .address_data_mux = DISABLE,        .read_write_timing = &timing_param    };    EXMC_SNCTL(EXMC_BANK0_NORSRAM_REGION0) &= ~EXMC_SNCTL_NRBKEN;    exmc_norsram_init(&sram_param);    // DMA used to transfer data from EXMC to RAM    // DMA is used so that if data transfer fails, we can at least abort by resetting CPU.    // Accessing EXMC from the CPU directly hangs it totally if ACK pulses are not received.    dma_parameter_struct exmc_dma_config =    {        .periph_addr = EXMC_NOR_PSRAM,        .periph_width = DMA_PERIPHERAL_WIDTH_16BIT,        .memory_addr = (uint32_t)g_sync_dma_buf,        .memory_width = DMA_MEMORY_WIDTH_16BIT,        .number = 0, // Filled before transfer        .priority = DMA_PRIORITY_MEDIUM,        .periph_inc = DMA_PERIPH_INCREASE_DISABLE,        .memory_inc = DMA_MEMORY_INCREASE_ENABLE,        .direction = DMA_PERIPHERAL_TO_MEMORY    };    dma_init(SCSI_EXMC_DMA, SCSI_EXMC_DMACH, &exmc_dma_config);    dma_memory_to_memory_enable(SCSI_EXMC_DMA, SCSI_EXMC_DMACH);    gpio_init(SCSI_IN_ACK_EXMC_NWAIT_PORT, GPIO_MODE_IN_FLOATING, 0, SCSI_IN_ACK_EXMC_NWAIT_PIN);    gpio_init(SCSI_TIMER_IN_PORT, GPIO_MODE_IN_FLOATING, 0, SCSI_TIMER_IN_PIN);    // TIMER1 is used to count ACK pulses    TIMER_CTL0(SCSI_SYNC_TIMER) = 0;    TIMER_SMCFG(SCSI_SYNC_TIMER) = TIMER_SLAVE_MODE_EXTERNAL0 | TIMER_SMCFG_TRGSEL_CI0FE0;    TIMER_CAR(SCSI_SYNC_TIMER) = 65535;    TIMER_PSC(SCSI_SYNC_TIMER) = 0;    TIMER_CHCTL0(SCSI_SYNC_TIMER) = 0x0001; // CH0 as input}void scsi_accel_sync_recv(uint8_t *data, uint32_t count, int* parityError, volatile int *resetFlag){    // Enable EXMC to drive REQ from EXMC_NOE pin    EXMC_SNCTL(EXMC_BANK0_NORSRAM_REGION0) |= EXMC_SNCTL_NRBKEN;    uint32_t oldmode = GPIO_CTL0(SCSI_OUT_REQ_EXMC_NOE_PORT);    uint32_t newmode = oldmode & ~(0xF << (SCSI_OUT_REQ_EXMC_NOE_IDX * 4));    newmode |= 0xB << (SCSI_OUT_REQ_EXMC_NOE_IDX * 4);    GPIO_CTL0(SCSI_OUT_REQ_EXMC_NOE_PORT) = newmode;    while (count > 0)    {        uint32_t blocksize = (count > SYNC_DMA_BUFSIZE * 2) ? (SYNC_DMA_BUFSIZE * 2) : count;        count -= blocksize;        DMA_CHCNT(SCSI_EXMC_DMA, SCSI_EXMC_DMACH) = blocksize;        DMA_CHCTL(SCSI_EXMC_DMA, SCSI_EXMC_DMACH) |= DMA_CHXCTL_CHEN;        uint16_t *src = (uint16_t*)g_sync_dma_buf;        uint8_t *dst = data;        uint8_t *end = data + blocksize;        uint32_t start = millis();        while (dst < end)        {            uint32_t remain = DMA_CHCNT(SCSI_EXMC_DMA, SCSI_EXMC_DMACH);            while (dst < end - remain)            {                *dst++ = ~(*src++) >> SCSI_EXMC_DATA_SHIFT;            }            if ((uint32_t)(millis() - start) > 500 || *resetFlag)            {                // We are in a pinch here: without ACK pulses coming, the EXMC and DMA peripherals                // are locked up. The only way out is a whole system reset.                bluelog("SCSI Synchronous read timeout: resetting system");                NVIC_SystemReset();            }        }        DMA_CHCTL(SCSI_EXMC_DMA, SCSI_EXMC_DMACH) &= ~DMA_CHXCTL_CHEN;        data = end;    }    GPIO_CTL0(SCSI_OUT_REQ_EXMC_NOE_PORT) = oldmode;    EXMC_SNCTL(EXMC_BANK0_NORSRAM_REGION0) &= ~EXMC_SNCTL_NRBKEN;}/********************************//* Transfer from device to host *//********************************/// Simple delay, about 20 ns.// This is less likely to get optimized away by CPU pipeline than nop#define ASM_DELAY()  \"   ldr     %[tmp2], [%[reset_flag]] \n"// Take 8 bits from d and format them for writing// d is name of data operand, b is bit offset#define ASM_LOAD_DATA(b) \"        ubfx    %[tmp1], %[data], #" b ", #8 \n" \"        ldr     %[tmp1], [%[byte_lookup], %[tmp1], lsl #2] \n"// Write data to SCSI port and set REQ high#define ASM_SEND_DATA() \"        str     %[tmp1], [%[out_port_bop]] \n"// Set REQ low#define ASM_SET_REQ_LOW() \"        mov     %[tmp2], %[bop_req_low] \n" \"        str     %[tmp2], [%[out_port_bop]] \n"// Wait for ACK_TIMER - n to be less than num_bytes#define ASM_WAIT_ACK_TIMER(n) \    "wait_acks_" n "_%=: \n" \        "   ldr     %[tmp2], [%[ack_timer]] \n" \        "   sub     %[tmp2], # " n " \n" \        "   cmp     %[tmp2], %[num_bytes] \n" \        "   ble     got_acks_" n "_%= \n" \        "   ldr     %[tmp2], [%[reset_flag]] \n" \        "   cmp     %[tmp2], #0 \n" \        "   bne     all_done_%= \n" \        "   b       wait_acks_" n "_%= \n" \    "got_acks_" n "_%=: \n"// Send 4 bytes#define ASM_SEND_4BYTES() \ASM_LOAD_DATA("0") \ASM_SEND_DATA() \ASM_DELAY1() \ASM_SET_REQ_LOW() \ASM_DELAY2() \ASM_LOAD_DATA("8") \ASM_SEND_DATA() \ASM_DELAY1() \ASM_SET_REQ_LOW() \ASM_DELAY2() \ASM_LOAD_DATA("16") \ASM_SEND_DATA() \ASM_DELAY1() \ASM_SET_REQ_LOW() \ASM_DELAY2() \ASM_LOAD_DATA("24") \ASM_SEND_DATA() \ASM_DELAY1() \ASM_SET_REQ_LOW()// Send 1 byte, wait for ACK_TIMER to be less than num_bytes + n and send 3 bytes more// This interleaving minimizes the delay caused by WAIT_ACK_TIMER.#define ASM_SEND_4BYTES_WAIT(n) \ASM_LOAD_DATA("0") \ASM_SEND_DATA() \ASM_DELAY2() \ASM_LOAD_DATA("8") \ASM_SET_REQ_LOW() \ASM_DELAY2() \"   ldr     %[tmp2], [%[ack_timer]] \n" \"   sub     %[tmp2], # " n " \n" \ASM_SEND_DATA() \"   cmp     %[tmp2], %[num_bytes] \n" \"   ble     got_acks_" n "_%= \n" \ASM_WAIT_ACK_TIMER(n) \ASM_DELAY2() \ASM_SET_REQ_LOW() \ASM_DELAY2() \ASM_LOAD_DATA("16") \ASM_SEND_DATA() \ASM_DELAY1() \ASM_SET_REQ_LOW() \ASM_DELAY2() \ASM_LOAD_DATA("24") \ASM_SEND_DATA() \ASM_DELAY1() \ASM_SET_REQ_LOW() \// Specialized routine for settings:// <=100 ns period, >=15 outstanding REQsstatic void sync_send_100ns_15off(const uint8_t *buf, uint32_t num_bytes, volatile int *resetFlag){    volatile uint32_t *out_port_bop = (volatile uint32_t*)&GPIO_BOP(SCSI_OUT_PORT);    volatile uint32_t *ack_timer = &TIMER_CNT(SCSI_SYNC_TIMER);    const uint32_t *byte_lookup = g_scsi_out_byte_to_bop;    register uint32_t tmp1 = 0;    register uint32_t tmp2 = 0;    register uint32_t data = 0;// Delay 1 is typically longest and delay 2 shortest.// Tuning these is just trial and error.#define ASM_DELAY1() "    nop\n   nop\n   nop\n"#define ASM_DELAY2() "    nop\n   nop\n"    asm volatile (    "main_loop_%=: \n"        "   subs  %[num_bytes], %[num_bytes], #16 \n"        "   bmi     last_bytes_%= \n"        /* At each point make sure there is at most 15 bytes in flight */        "   ldr   %[data], [%[buf]], #4 \n"        ASM_SEND_4BYTES_WAIT("22")        "   ldr   %[data], [%[buf]], #4 \n"        ASM_SEND_4BYTES()        "   ldr   %[data], [%[buf]], #4 \n"        ASM_SEND_4BYTES_WAIT("14")        "   ldr   %[data], [%[buf]], #4 \n"        ASM_SEND_4BYTES()        "   cbz   %[num_bytes], all_done_%= \n"        "   b     main_loop_%= \n"    "last_bytes_%=: \n"        "   add  %[num_bytes], %[num_bytes], #16 \n"    "last_bytes_loop_%=: \n"        "   ldrb    %[data], [%[buf]], #1 \n"        ASM_LOAD_DATA("0")        ASM_WAIT_ACK_TIMER("15")        ASM_SEND_DATA()        ASM_DELAY1()        ASM_SET_REQ_LOW()        "   subs %[num_bytes], %[num_bytes], #1 \n"        "   bne  last_bytes_loop_%= \n"    "all_done_%=: \n"        ASM_DELAY()    : /* Output */ [tmp1] "+l" (tmp1), [tmp2] "+l" (tmp2), [data] "+r" (data),                   [buf] "+r" (buf), [num_bytes] "+r" (num_bytes)    : /* Input */ [ack_timer] "r" (ack_timer),                  [bop_req_low] "I" (SCSI_OUT_REQ << 16),                  [out_port_bop] "r"(out_port_bop),                  [byte_lookup] "r" (byte_lookup),                  [reset_flag] "r" (resetFlag)    : /* Clobber */);#undef ASM_DELAY1#undef ASM_DELAY2    SCSI_RELEASE_DATA_REQ();}// Specialized routine for settings:// <=200 ns period, >=15 outstanding REQsstatic void sync_send_200ns_15off(const uint8_t *buf, uint32_t num_bytes, volatile int *resetFlag){    volatile uint32_t *out_port_bop = (volatile uint32_t*)&GPIO_BOP(SCSI_OUT_PORT);    volatile uint32_t *ack_timer = &TIMER_CNT(SCSI_SYNC_TIMER);    const uint32_t *byte_lookup = g_scsi_out_byte_to_bop;    register uint32_t tmp1 = 0;    register uint32_t tmp2 = 0;    register uint32_t data = 0;#define ASM_DELAY1() ASM_DELAY() ASM_DELAY() ASM_DELAY() ASM_DELAY()#define ASM_DELAY2() ASM_DELAY() ASM_DELAY() ASM_DELAY()    asm volatile (    "main_loop_%=: \n"        "   subs  %[num_bytes], %[num_bytes], #16 \n"        "   bmi     last_bytes_%= \n"        /* At each point make sure there is at most 15 bytes in flight */        "   ldr   %[data], [%[buf]], #4 \n"        ASM_SEND_4BYTES_WAIT("22")        ASM_DELAY2()        "   ldr   %[data], [%[buf]], #4 \n"        ASM_SEND_4BYTES()        ASM_DELAY2()        "   ldr   %[data], [%[buf]], #4 \n"        ASM_SEND_4BYTES_WAIT("14")        ASM_DELAY2()        "   ldr   %[data], [%[buf]], #4 \n"        ASM_SEND_4BYTES()        "   cbz   %[num_bytes], all_done_%= \n"        "   b     main_loop_%= \n"    "last_bytes_%=: \n"        "   add  %[num_bytes], %[num_bytes], #16 \n"    "last_bytes_loop_%=: \n"        "   ldrb    %[data], [%[buf]], #1 \n"        ASM_LOAD_DATA("0")        ASM_WAIT_ACK_TIMER("15")        ASM_SEND_DATA()        ASM_DELAY1()        ASM_SET_REQ_LOW()        ASM_DELAY2()        "   subs %[num_bytes], %[num_bytes], #1 \n"        "   bne  last_bytes_loop_%= \n"    "all_done_%=: \n"        ASM_DELAY1()    : /* Output */ [tmp1] "+l" (tmp1), [tmp2] "+l" (tmp2), [data] "+r" (data),                   [buf] "+r" (buf), [num_bytes] "+r" (num_bytes)    : /* Input */ [ack_timer] "r" (ack_timer),                  [bop_req_low] "I" (SCSI_OUT_REQ << 16),                  [out_port_bop] "r"(out_port_bop),                  [byte_lookup] "r" (byte_lookup),                  [reset_flag] "r" (resetFlag)    : /* Clobber */);#undef ASM_DELAY1#undef ASM_DELAY2    SCSI_RELEASE_DATA_REQ();}// Specialized routine for settings:// <=260 ns period, >=7 outstanding REQsstatic void sync_send_260ns_7off(const uint8_t *buf, uint32_t num_bytes, volatile int *resetFlag){    volatile uint32_t *out_port_bop = (volatile uint32_t*)&GPIO_BOP(SCSI_OUT_PORT);    volatile uint32_t *ack_timer = &TIMER_CNT(SCSI_SYNC_TIMER);    const uint32_t *byte_lookup = g_scsi_out_byte_to_bop;    register uint32_t tmp1 = 0;    register uint32_t tmp2 = 0;    register uint32_t data = 0;#define ASM_DELAY1() ASM_DELAY() ASM_DELAY() ASM_DELAY() ASM_DELAY() \                     ASM_DELAY() ASM_DELAY()#define ASM_DELAY2() ASM_DELAY() ASM_DELAY() ASM_DELAY() ASM_DELAY() \                     ASM_DELAY() ASM_DELAY() ASM_DELAY() ASM_DELAY()    asm volatile (    "main_loop_%=: \n"        "   subs  %[num_bytes], %[num_bytes], #4 \n"        "   bmi     last_bytes_%= \n"        /* At each point make sure there is at most 3 bytes in flight */        "   ldr   %[data], [%[buf]], #4 \n"        ASM_SEND_4BYTES_WAIT("7")        "   cbz   %[num_bytes], all_done_%= \n"        "   b     main_loop_%= \n"    "last_bytes_%=: \n"        "   add  %[num_bytes], %[num_bytes], #4 \n"    "last_bytes_loop_%=: \n"        "   ldrb    %[data], [%[buf]], #1 \n"        ASM_LOAD_DATA("0")        ASM_WAIT_ACK_TIMER("5")        ASM_SEND_DATA()        ASM_DELAY1()        ASM_SET_REQ_LOW()        ASM_DELAY2()        "   subs %[num_bytes], %[num_bytes], #1 \n"        "   bne  last_bytes_loop_%= \n"    "all_done_%=: \n"        ASM_DELAY1()    : /* Output */ [tmp1] "+l" (tmp1), [tmp2] "+l" (tmp2), [data] "+r" (data),                   [buf] "+r" (buf), [num_bytes] "+r" (num_bytes)    : /* Input */ [ack_timer] "r" (ack_timer),                  [bop_req_low] "I" (SCSI_OUT_REQ << 16),                  [out_port_bop] "r"(out_port_bop),                  [byte_lookup] "r" (byte_lookup),                  [reset_flag] "r" (resetFlag)    : /* Clobber */);#undef ASM_DELAY1#undef ASM_DELAY2    SCSI_RELEASE_DATA_REQ();}void scsi_accel_sync_send(const uint8_t* data, uint32_t count, volatile int *resetFlag){    // Timer counts down from the initial number of bytes.    TIMER_CNT(SCSI_SYNC_TIMER) = count;    TIMER_CTL0(SCSI_SYNC_TIMER) = TIMER_CTL0_CEN | TIMER_CTL0_DIR;    int syncOffset = scsiDev.target->syncOffset;    int syncPeriod = scsiDev.target->syncPeriod;    if (syncOffset >= 15 && syncPeriod <= 25)    {        sync_send_100ns_15off(data, count, resetFlag);    }    else if (syncOffset >= 15 && syncPeriod <= 50)    {        sync_send_200ns_15off(data, count, resetFlag);    }    else if (syncOffset >= 7 && syncPeriod <= 65)    {        sync_send_260ns_7off(data, count, resetFlag);    }    else    {        bluedbg("No optimized routine for syncOffset=", syncOffset, " syndPeriod=", syncPeriod, ", using fallback");        while (count-- > 0)        {            while (TIMER_CNT(SCSI_SYNC_TIMER) > count + syncOffset && !*resetFlag);            SCSI_OUT_DATA(*data++);            delay_ns(syncPeriod * 2);            SCSI_OUT(REQ, 0);            delay_ns(syncPeriod * 2);        }        delay_ns(syncPeriod * 2);        SCSI_RELEASE_DATA_REQ();    }    while (TIMER_CNT(SCSI_SYNC_TIMER) > 0 && !*resetFlag);    if (*resetFlag)    {        bluedbg("Bus reset during sync transfer, total ", (int)count,              " bytes, remaining ACK count ", (int)TIMER_CNT(SCSI_SYNC_TIMER));    }    TIMER_CTL0(SCSI_SYNC_TIMER) = 0;}#endif
 |