4 anos atrás · 17607c3b5c
--- a/.gitignore
+++ b/.gitignore
@@ -15,4 +15,6 @@
 
				 *.scp
			
 
				 *.pyc
			
 
				 scripts/greaseweazle/version.py
			
 
				+scripts/greaseweazle/optimised/optimised*
			
 
				+scripts/c_ext/build/
			
 
				 Greaseweazle-*
			
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 
				 export FW_MAJOR := 0
			
 
				 export FW_MINOR := 23
			
 
				 
			
 
				-TARGETS := all blinky clean dist windist mrproper ocd flash start serial
			
 
				+TARGETS := all blinky clean dist windist mrproper ocd flash start serial pysetup
			
 
				 .PHONY: $(TARGETS)
			
 
				 
			
 
				 ifneq ($(RULES_MK),y)
			
@@ -33,6 +33,7 @@ blinky:
 
				 		Blinky.elf Blinky.bin Blinky.hex
			
 
				 
			
 
				 clean::
			
 
				+	rm -rf scripts/greaseweazle/optimised/optimised* scripts/c_ext/build
			
 
				 	rm -f *.hex *.upd scripts/greaseweazle/*.pyc
			
 
				 	rm -f scripts/greaseweazle/version.py
			
 
				 	find . -name __pycache__ | xargs rm -rf
			
@@ -52,8 +53,10 @@ dist:
 
				 	cp -a README.md $(PROJ)-$(VER)/
			
 
				 	cp -a gw $(PROJ)-$(VER)/
			
 
				 	cp -a scripts/49-greaseweazle.rules $(PROJ)-$(VER)/scripts/
			
 
				+	cp -a scripts/setup.sh $(PROJ)-$(VER)/scripts/
			
 
				 	cp -a scripts/gw.py $(PROJ)-$(VER)/scripts/
			
 
				 	cp -a scripts/greaseweazle $(PROJ)-$(VER)/scripts
			
 
				+	cp -a scripts/c_ext $(PROJ)-$(VER)/scripts
			
 
				 	cp -a scripts/misc/*.py $(PROJ)-$(VER)/scripts/misc/
			
 
				 	cp -a RELEASE_NOTES $(PROJ)-$(VER)/
			
 
				 	$(MAKE) clean
			
@@ -64,12 +67,13 @@ dist:
 
				 	$(MAKE) clean
			
 
				 	$(ZIP) $(PROJ)-$(VER).zip $(PROJ)-$(VER)
			
 
				 
			
 
				-windist:
			
 
				+windist: pysetup
			
 
				 	rm -rf $(PROJ)-$(VER) ipf ipf.zip
			
 
				 	[ -e $(PROJ)-$(VER).zip ] || \
			
 
				 	curl -L https://github.com/keirf/Greaseweazle/releases/download/$(VER)/$(PROJ)-$(VER).zip --output $(PROJ)-$(VER).zip
			
 
				 	$(UNZIP) $(PROJ)-$(VER).zip
			
 
				 	cp -a scripts/setup.py $(PROJ)-$(VER)/scripts
			
 
				+	cp -a scripts/greaseweazle/optimised/optimised* $(PROJ)-$(VER)/scripts/greaseweazle/optimised
			
 
				 	cd $(PROJ)-$(VER)/scripts && $(PYTHON) setup.py build
			
 
				 	cp -a $(PROJ)-$(VER)/scripts/build/exe.win*/* $(PROJ)-$(VER)/
			
 
				 	cp -a $(PROJ)-$(VER)/lib/bitarray/VCRUNTIME140.DLL $(PROJ)-$(VER)/
			
@@ -87,6 +91,9 @@ scripts/greaseweazle/version.py: Makefile
 
				 	echo "major = $(FW_MAJOR)" >$@
			
 
				 	echo "minor = $(FW_MINOR)" >>$@
			
 
				 
			
 
				+pysetup:
			
 
				+	PYTHON=$(PYTHON) . ./scripts/setup.sh
			
 
				+
			
 
				 BAUD=115200
			
 
				 DEV=/dev/ttyUSB0
			
 
				 
			
--- a/scripts/c_ext/optimised.c
+++ b/scripts/c_ext/optimised.c
@@ -0,0 +1,247 @@
 
				+
			
 
				+#define PY_SSIZE_T_CLEAN
			
 
				+#include "Python.h"
			
 
				+#include <stdio.h>
			
 
				+#include <stdint.h>
			
 
				+
			
 
				+#define FLUXOP_INDEX   1
			
 
				+#define FLUXOP_SPACE   2
			
 
				+#define FLUXOP_ASTABLE 3
			
 
				+
			
 
				+/* bitarray.append(value) */
			
 
				+static PyObject *append_s;
			
 
				+static int bitarray_append(PyObject *bitarray, PyObject *value)
			
 
				+{
			
 
				+    PyObject *res = PyObject_CallMethodObjArgs(
			
 
				+        bitarray, append_s, value, NULL);
			
 
				+    if (res == NULL)
			
 
				+        return 0;
			
 
				+    Py_DECREF(res);
			
 
				+    return 1;
			
 
				+}
			
 
				+
			
 
				+static PyObject *
			
 
				+flux_to_bitcells(PyObject *self, PyObject *args)
			
 
				+{
			
 
				+    /* Parameters */
			
 
				+    PyObject *bit_array, *time_array, *revolutions;
			
 
				+    PyObject *index_iter, *flux_iter;
			
 
				+    double freq, clock_centre, clock_min, clock_max;
			
 
				+    double pll_period_adj, pll_phase_adj;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    PyObject *item;
			
 
				+    double clock, new_ticks, ticks, to_index;
			
 
				+    int zeros, nbits;
			
 
				+
			
 
				+    if (!PyArg_ParseTuple(args, "OOOOOdddddd",
			
 
				+                          &bit_array, &time_array, &revolutions,
			
 
				+                          &index_iter, &flux_iter,
			
 
				+                          &freq, &clock_centre, &clock_min, &clock_max,
			
 
				+                          &pll_period_adj, &pll_phase_adj))
			
 
				+        return NULL;
			
 
				+
			
 
				+    nbits = 0;
			
 
				+    ticks = 0.0;
			
 
				+    clock = clock_centre;
			
 
				+
			
 
				+    /* to_index = next(index_iter) */
			
 
				+    item = PyIter_Next(index_iter);
			
 
				+    to_index = PyFloat_AsDouble(item);
			
 
				+    Py_DECREF(item);
			
 
				+    if (PyErr_Occurred())
			
 
				+        return NULL;
			
 
				+
			
 
				+    /* for x in flux_iter: */
			
 
				+    assert(PyIter_Check(flux_iter));
			
 
				+    while ((item = PyIter_Next(flux_iter)) != NULL) {
			
 
				+
			
 
				+        double x = PyFloat_AsDouble(item);
			
 
				+        Py_DECREF(item);
			
 
				+        if (PyErr_Occurred())
			
 
				+            return NULL;
			
 
				+
			
 
				+        /* Gather enough ticks to generate at least one bitcell. */
			
 
				+        ticks += x / freq;
			
 
				+        if (ticks < clock/2)
			
 
				+            continue;
			
 
				+
			
 
				+        /* Clock out zero or more 0s, followed by a 1. */
			
 
				+        for (zeros = 0; ; zeros++) {
			
 
				+
			
 
				+            /* Check if we cross the index mark. */
			
 
				+            to_index -= clock;
			
 
				+            if (to_index < 0) {
			
 
				+                if (PyList_Append(revolutions, PyLong_FromLong(nbits)) < 0)
			
 
				+                    return NULL;
			
 
				+                nbits = 0;
			
 
				+                item = PyIter_Next(index_iter);
			
 
				+                to_index += PyFloat_AsDouble(item);
			
 
				+                Py_DECREF(item);
			
 
				+                if (PyErr_Occurred())
			
 
				+                    return NULL;
			
 
				+            }
			
 
				+
			
 
				+            nbits += 1;
			
 
				+            ticks -= clock;
			
 
				+            if (PyList_Append(time_array, PyFloat_FromDouble(clock)) < 0)
			
 
				+                return NULL;
			
 
				+            if (ticks < clock/2) {
			
 
				+                if (!bitarray_append(bit_array, Py_True))
			
 
				+                    return NULL;
			
 
				+                break;
			
 
				+            }
			
 
				+
			
 
				+            if (!bitarray_append(bit_array, Py_False))
			
 
				+                return NULL;
			
 
				+
			
 
				+        }
			
 
				+
			
 
				+        /* PLL: Adjust clock frequency according to phase mismatch. */
			
 
				+        if (zeros <= 3) {
			
 
				+            /* In sync: adjust clock by a fraction of the phase mismatch. */
			
 
				+            clock += ticks * pll_period_adj;
			
 
				+        } else {
			
 
				+            /* Out of sync: adjust clock towards centre. */
			
 
				+            clock += (clock_centre - clock) * pll_period_adj;
			
 
				+        }
			
 
				+        /* Clamp the clock's adjustment range. */
			
 
				+        if (clock < clock_min)
			
 
				+            clock = clock_min;
			
 
				+        else if (clock > clock_max)
			
 
				+            clock = clock_max;
			
 
				+        /* PLL: Adjust clock phase according to mismatch. */
			
 
				+        new_ticks = ticks * (1.0 - pll_phase_adj);
			
 
				+        if (PyList_SetItem(time_array, PyList_Size(time_array)-1,
			
 
				+                           PyFloat_FromDouble(ticks - new_ticks)) < 0)
			
 
				+            return NULL;
			
 
				+        ticks = new_ticks;
			
 
				+
			
 
				+    }
			
 
				+
			
 
				+    Py_RETURN_NONE;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static int _read_28bit(uint8_t *p)
			
 
				+{
			
 
				+    int x;
			
 
				+    x  = (p[0]       ) >>  1;
			
 
				+    x |= (p[1] & 0xfe) <<  6;
			
 
				+    x |= (p[2] & 0xfe) << 13;
			
 
				+    x |= (p[3] & 0xfe) << 20;
			
 
				+    return x;
			
 
				+}
			
 
				+
			
 
				+static PyObject *
			
 
				+decode_flux(PyObject *self, PyObject *args)
			
 
				+{
			
 
				+    /* Parameters */
			
 
				+    Py_buffer bytearray;
			
 
				+    PyObject *res = NULL;
			
 
				+
			
 
				+    /* bytearray buffer */
			
 
				+    uint8_t *p;
			
 
				+    Py_ssize_t l;
			
 
				+
			
 
				+    /* Local variables */
			
 
				+    PyObject *flux, *index;
			
 
				+    long val, ticks, ticks_since_index;
			
 
				+    int i, opcode;
			
 
				+
			
 
				+    if (!PyArg_ParseTuple(args, "y*", &bytearray))
			
 
				+        return NULL;
			
 
				+    p = bytearray.buf;
			
 
				+    l = bytearray.len;
			
 
				+
			
 
				+    /* assert dat[-1] == 0 */
			
 
				+    if ((l == 0) || (p[l-1] != 0)) {
			
 
				+        PyErr_SetString(PyExc_ValueError, "Flux is not NUL-terminated");
			
 
				+        PyBuffer_Release(&bytearray);
			
 
				+        return NULL;
			
 
				+    }
			
 
				+    /* len(dat) -= 1 */
			
 
				+    l -= 1;
			
 
				+
			
 
				+    /* flux, index = [], [] */
			
 
				+    flux = PyList_New(0);
			
 
				+    index = PyList_New(0);
			
 
				+    /* ticks, ticks_since_index = 0, 0 */
			
 
				+    ticks = 0;
			
 
				+    ticks_since_index = 0;
			
 
				+
			
 
				+    while (l != 0) {
			
 
				+        i = *p++;
			
 
				+        if (i == 255) {
			
 
				+            if ((l -= 2) < 0)
			
 
				+                goto oos;
			
 
				+            opcode = *p++;
			
 
				+            switch (opcode) {
			
 
				+            case FLUXOP_INDEX:
			
 
				+                if ((l -= 4) < 0)
			
 
				+                    goto oos;
			
 
				+                val = _read_28bit(p);
			
 
				+                p += 4;
			
 
				+                if (PyList_Append(index, PyLong_FromLong(
			
 
				+                                      ticks_since_index + ticks + val)) < 0)
			
 
				+                    goto out;
			
 
				+                ticks_since_index = -(ticks + val);
			
 
				+                break;
			
 
				+            case FLUXOP_SPACE:
			
 
				+                if ((l -= 4) < 0)
			
 
				+                    goto oos;
			
 
				+                ticks += _read_28bit(p);
			
 
				+                p += 4;
			
 
				+                break;
			
 
				+            default:
			
 
				+                PyErr_Format(PyExc_ValueError,
			
 
				+                             "Bad opcode in flux stream (%d)", opcode);
			
 
				+                goto out;
			
 
				+            }
			
 
				+        } else {
			
 
				+            if (i < 250) {
			
 
				+                l -= 1;
			
 
				+                val = i;
			
 
				+            } else {
			
 
				+                if ((l -= 2) < 0)
			
 
				+                    goto oos;
			
 
				+                val = 250 + (i - 250) * 255;
			
 
				+                val += *p++ - 1;
			
 
				+            }
			
 
				+            ticks += val;
			
 
				+            if (PyList_Append(flux, PyLong_FromLong(ticks)) < 0)
			
 
				+                goto out;
			
 
				+            ticks_since_index += ticks;
			
 
				+            ticks = 0;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    res = Py_BuildValue("OO", flux, index);
			
 
				+
			
 
				+out:
			
 
				+    PyBuffer_Release(&bytearray);
			
 
				+    Py_DECREF(flux);
			
 
				+    Py_DECREF(index);
			
 
				+    return res;
			
 
				+
			
 
				+oos:
			
 
				+    PyErr_SetString(PyExc_ValueError, "Unexpected end of flux");
			
 
				+    goto out;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static PyMethodDef modulefuncs[] = {
			
 
				+    { "flux_to_bitcells", flux_to_bitcells, METH_VARARGS, NULL },
			
 
				+    { "decode_flux", decode_flux, METH_VARARGS, NULL },
			
 
				+    { NULL }
			
 
				+};
			
 
				+
			
 
				+static PyModuleDef moduledef = {
			
 
				+    PyModuleDef_HEAD_INIT, "optimised", 0, -1, modulefuncs,
			
 
				+};
			
 
				+
			
 
				+PyMODINIT_FUNC PyInit_optimised(void)
			
 
				+{
			
 
				+    append_s = Py_BuildValue("s", "append");
			
 
				+    return PyModule_Create(&moduledef);
			
 
				+}
			
--- a/scripts/c_ext/setup.py
+++ b/scripts/c_ext/setup.py
@@ -0,0 +1,6 @@
 
				+from distutils.core import setup, Extension
			
 
				+
			
 
				+module1 = Extension('optimised', sources = ['optimised.c'])
			
 
				+
			
 
				+setup(name = 'optimised',
			
 
				+      ext_modules = [module1])
			
--- a/scripts/greaseweazle/optimised/__init__.py
+++ b/scripts/greaseweazle/optimised/__init__.py
@@ -0,0 +1,25 @@
 
				+# greaseweazle/optimised/__init__.py
			
 
				+#
			
 
				+# Written & released by Keir Fraser <keir.xen@gmail.com>
			
 
				+#
			
 
				+# This is free and unencumbered software released into the public domain.
			
 
				+# See the file COPYING for more details, or visit <http://unlicense.org>.
			
 
				+
			
 
				+import os
			
 
				+
			
 
				+gw_opt = os.environ.get('GW_OPT')
			
 
				+enabled = gw_opt is None or gw_opt.lower().startswith('y')
			
 
				+if enabled:
			
 
				+    try:
			
 
				+        from .optimised import *
			
 
				+    except ModuleNotFoundError:
			
 
				+        enabled = False
			
 
				+        print('*** WARNING: Optimised data routines not found: '
			
 
				+              'Run scripts/setup.sh')
			
 
				+else:
			
 
				+    print('*** WARNING: Optimised data routines disabled (GW_OPT=%s)'
			
 
				+          % gw_opt)
			
 
				+
			
 
				+# Local variables:
			
 
				+# python-indent: 4
			
 
				+# End:
			
--- a/scripts/greaseweazle/track.py
+++ b/scripts/greaseweazle/track.py
@@ -9,6 +9,7 @@ import binascii
 
				 import itertools as it
			
 
				 from bitarray import bitarray
			
 
				 from greaseweazle.flux import WriteoutFlux
			
 
				+from greaseweazle import optimised
			
 
				 
			
 
				 # A pristine representation of a track, from a codec and/or a perfect image.
			
 
				 class MasterTrack:
			
@@ -205,65 +206,80 @@ class RawTrack:
 
				         clock = self.clock
			
 
				         clock_min = self.clock * (1 - self.clock_max_adj)
			
 
				         clock_max = self.clock * (1 + self.clock_max_adj)
			
 
				-        ticks = 0.0
			
 
				 
			
 
				         index_iter = it.chain(iter(map(lambda x: x/freq, flux.index_list)),
			
 
				                               [float('inf')])
			
 
				 
			
 
				-        bits, times = bitarray(endian='big'), []
			
 
				-        to_index = next(index_iter)
			
 
				-
			
 
				         # Make sure there's enough time in the flux list to cover all
			
 
				         # revolutions by appending a "large enough" final flux value.
			
 
				         tail = max(0, sum(flux.index_list) - sum(flux.list) + clock*freq*2)
			
 
				-        for x in it.chain(flux.list, [tail]):
			
 
				-
			
 
				-            # Gather enough ticks to generate at least one bitcell.
			
 
				-            ticks += x / freq
			
 
				-            if ticks < clock/2:
			
 
				-                continue
			
 
				-
			
 
				-            # Clock out zero or more 0s, followed by a 1.
			
 
				-            zeros = 0
			
 
				-            while True:
			
 
				-
			
 
				-                # Check if we cross the index mark.
			
 
				-                to_index -= clock
			
 
				-                if to_index < 0:
			
 
				-                    self.bitarray += bits
			
 
				-                    self.timearray += times
			
 
				-                    self.revolutions.append(len(times))
			
 
				-                    assert len(times) == len(bits)
			
 
				-                    to_index += next(index_iter)
			
 
				-                    bits, times = bitarray(endian='big'), []
			
 
				-
			
 
				-                ticks -= clock
			
 
				-                times.append(clock)
			
 
				-                if ticks >= clock/2:
			
 
				-                    zeros += 1
			
 
				-                    bits.append(False)
			
 
				-                else:
			
 
				-                    bits.append(True)
			
 
				-                    break
			
 
				-
			
 
				-            # PLL: Adjust clock frequency according to phase mismatch.
			
 
				-            if zeros <= 3:
			
 
				-                # In sync: adjust clock by a fraction of the phase mismatch.
			
 
				-                clock += ticks * self.pll_period_adj
			
 
				+        flux_iter = it.chain(flux.list, [tail])
			
 
				+
			
 
				+        try:
			
 
				+            optimised.flux_to_bitcells(
			
 
				+                self.bitarray, self.timearray, self.revolutions,
			
 
				+                index_iter, flux_iter,
			
 
				+                freq, clock, clock_min, clock_max,
			
 
				+                self.pll_period_adj, self.pll_phase_adj)
			
 
				+        except AttributeError:
			
 
				+            flux_to_bitcells(
			
 
				+                self.bitarray, self.timearray, self.revolutions,
			
 
				+                index_iter, flux_iter,
			
 
				+                freq, clock, clock_min, clock_max,
			
 
				+                self.pll_period_adj, self.pll_phase_adj)
			
 
				+
			
 
				+            
			
 
				+def flux_to_bitcells(bit_array, time_array, revolutions,
			
 
				+                     index_iter, flux_iter,
			
 
				+                     freq, clock_centre, clock_min, clock_max,
			
 
				+                     pll_period_adj, pll_phase_adj):
			
 
				+
			
 
				+    nbits = 0
			
 
				+    ticks = 0.0
			
 
				+    clock = clock_centre
			
 
				+    to_index = next(index_iter)
			
 
				+
			
 
				+    for x in flux_iter:
			
 
				+
			
 
				+        # Gather enough ticks to generate at least one bitcell.
			
 
				+        ticks += x / freq
			
 
				+        if ticks < clock/2:
			
 
				+            continue
			
 
				+
			
 
				+        # Clock out zero or more 0s, followed by a 1.
			
 
				+        zeros = 0
			
 
				+        while True:
			
 
				+
			
 
				+            # Check if we cross the index mark.
			
 
				+            to_index -= clock
			
 
				+            if to_index < 0:
			
 
				+                revolutions.append(nbits)
			
 
				+                nbits = 0
			
 
				+                to_index += next(index_iter)
			
 
				+
			
 
				+            nbits += 1
			
 
				+            ticks -= clock
			
 
				+            time_array.append(clock)
			
 
				+            if ticks >= clock/2:
			
 
				+                zeros += 1
			
 
				+                bit_array.append(False)
			
 
				             else:
			
 
				-                # Out of sync: adjust clock towards centre.
			
 
				-                clock += (self.clock - clock) * self.pll_period_adj
			
 
				-            # Clamp the clock's adjustment range.
			
 
				-            clock = min(max(clock, clock_min), clock_max)
			
 
				-            # PLL: Adjust clock phase according to mismatch.
			
 
				-            new_ticks = ticks * (1 - self.pll_phase_adj)
			
 
				-            times[-1] += ticks - new_ticks
			
 
				-            ticks = new_ticks
			
 
				-
			
 
				-        # Append trailing bits.
			
 
				-        self.bitarray += bits
			
 
				-        self.timearray += times
			
 
				+                bit_array.append(True)
			
 
				+                break
			
 
				 
			
 
				+        # PLL: Adjust clock frequency according to phase mismatch.
			
 
				+        if zeros <= 3:
			
 
				+            # In sync: adjust clock by a fraction of the phase mismatch.
			
 
				+            clock += ticks * pll_period_adj
			
 
				+        else:
			
 
				+            # Out of sync: adjust clock towards centre.
			
 
				+            clock += (clock_centre - clock) * pll_period_adj
			
 
				+        # Clamp the clock's adjustment range.
			
 
				+        clock = min(max(clock, clock_min), clock_max)
			
 
				+        # PLL: Adjust clock phase according to mismatch.
			
 
				+        new_ticks = ticks * (1 - pll_phase_adj)
			
 
				+        time_array[-1] += ticks - new_ticks
			
 
				+        ticks = new_ticks
			
 
				 
			
 
				 # Local variables:
			
 
				 # python-indent: 4
			
--- a/scripts/greaseweazle/usb.py
+++ b/scripts/greaseweazle/usb.py
@@ -10,6 +10,7 @@ import itertools as it
 
				 from greaseweazle import version
			
 
				 from greaseweazle import error
			
 
				 from greaseweazle.flux import Flux
			
 
				+from greaseweazle import optimised
			
 
				 
			
 
				 ## Control-Path command set
			
 
				 class ControlCmd:
			
@@ -391,8 +392,11 @@ class Unit:
 
				                 # Success!
			
 
				                 break
			
 
				 
			
 
				-        # Decode the flux list and read the index-times list.
			
 
				-        flux_list, index_list = self._decode_flux(dat)
			
 
				+        try:
			
 
				+            # Decode the flux list and read the index-times list.
			
 
				+            flux_list, index_list = optimised.decode_flux(dat)
			
 
				+        except AttributeError:
			
 
				+            flux_list, index_list = self._decode_flux(dat)
			
 
				 
			
 
				         # Success: Return the requested full index-to-index revolutions.
			
 
				         return Flux(index_list, flux_list, self.sample_freq, index_cued=False)
			
--- a/scripts/setup.sh
+++ b/scripts/setup.sh
@@ -0,0 +1,4 @@
 
				+#!/bin/bash
			
 
				+PYTHON="${PYTHON:-python3}"
			
 
				+$PYTHON -m pip install --user bitarray crcmod pyserial
			
 
				+(cd ./scripts/c_ext && $PYTHON setup.py install --install-platlib=../greaseweazle/optimised)