浏览代码

prepare FW upload

Christian Herzog 5 年之前
父节点
当前提交
603c846823
共有 100 个文件被更改,包括 2498 次插入39922 次删除
  1. 4 0
      TODO
  2. 1 1
      build-scripts/16M-sdkconfig.defaults
  3. 3 3
      build-scripts/I2S-16MFlash-sdkconfig.defaults
  4. 3 3
      build-scripts/I2S-4MFlash-sdkconfig.defaults
  5. 1 1
      build-scripts/NonOTA-16M-sdkconfig.defaults
  6. 3 3
      build-scripts/NonOTA-I2S-16MFlash-sdkconfig.defaults
  7. 3 3
      build-scripts/NonOTA-I2S-4MFlash-sdkconfig.defaults
  8. 3 3
      build-scripts/NonOTA-SqueezeAmp-sdkconfig.defaults
  9. 3 3
      build-scripts/SqueezeAmp4MBFlash-sdkconfig.defaults
  10. 6 561
      build-scripts/SqueezeAmp8MBFlash-sdkconfig.defaults
  11. 1 1
      build-scripts/squeezelite-esp32-16M-sdkconfig.defaults
  12. 3 3
      build-scripts/squeezelite-esp32-I2S-16MFlash-sdkconfig.defaults
  13. 3 3
      build-scripts/squeezelite-esp32-I2S-4MFlash-NOAirplay-sdkconfig.defaults
  14. 33 3
      build-scripts/squeezelite-esp32-I2S-4MFlash-sdkconfig.defaults
  15. 3 3
      build-scripts/squeezelite-esp32-SqueezeAmp-sdkconfig.defaults
  16. 0 21
      components/bootloader/CMakeLists.txt
  17. 0 520
      components/bootloader/Kconfig.projbuild
  18. 0 133
      components/bootloader/Makefile.projbuild
  19. 0 7
      components/bootloader/component.mk
  20. 0 4
      components/bootloader/flash_bootloader_args.in
  21. 0 127
      components/bootloader/project_include.cmake
  22. 0 22
      components/bootloader/sdkconfig.rename
  23. 0 2
      components/bootloader/subproject/.gitignore
  24. 0 128
      components/bootloader/subproject/CMakeLists.txt
  25. 0 35
      components/bootloader/subproject/Makefile
  26. 0 3
      components/bootloader/subproject/components/micro-ecc/CMakeLists.txt
  27. 0 8
      components/bootloader/subproject/components/micro-ecc/component.mk
  28. 0 8
      components/bootloader/subproject/components/micro-ecc/micro-ecc/.gitignore
  29. 0 21
      components/bootloader/subproject/components/micro-ecc/micro-ecc/LICENSE.txt
  30. 0 41
      components/bootloader/subproject/components/micro-ecc/micro-ecc/README.md
  31. 0 820
      components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_arm.inc
  32. 0 2311
      components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_arm_mult_square.inc
  33. 0 1202
      components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_arm_mult_square_umaal.inc
  34. 0 1089
      components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_avr.inc
  35. 0 26311
      components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_avr_mult_square.inc
  36. 0 1248
      components/bootloader/subproject/components/micro-ecc/micro-ecc/curve-specific.inc
  37. 0 127
      components/bootloader/subproject/components/micro-ecc/micro-ecc/emk_project.py
  38. 0 3
      components/bootloader/subproject/components/micro-ecc/micro-ecc/emk_rules.py
  39. 0 85
      components/bootloader/subproject/components/micro-ecc/micro-ecc/examples/ecc_test/ecc_test.ino
  40. 0 9
      components/bootloader/subproject/components/micro-ecc/micro-ecc/library.properties
  41. 0 71
      components/bootloader/subproject/components/micro-ecc/micro-ecc/platform-specific.inc
  42. 0 188
      components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/mult_arm.py
  43. 0 203
      components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/mult_avr.py
  44. 0 143
      components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/mult_avr_extra.py
  45. 0 242
      components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/square_arm.py
  46. 0 327
      components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/square_avr.py
  47. 0 4
      components/bootloader/subproject/components/micro-ecc/micro-ecc/test/emk_rules.py
  48. 0 79
      components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_compress.c
  49. 0 81
      components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_compute.c
  50. 0 90
      components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_ecdh.c
  51. 0 59
      components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_ecdsa.c
  52. 0 93
      components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_ecdsa_deterministic.c.example
  53. 0 108
      components/bootloader/subproject/components/micro-ecc/micro-ecc/types.h
  54. 0 1634
      components/bootloader/subproject/components/micro-ecc/micro-ecc/uECC.c
  55. 0 365
      components/bootloader/subproject/components/micro-ecc/micro-ecc/uECC.h
  56. 0 172
      components/bootloader/subproject/components/micro-ecc/micro-ecc/uECC_vli.h
  57. 0 8
      components/bootloader/subproject/main/CMakeLists.txt
  58. 0 4
      components/bootloader/subproject/main/Makefile.projbuild
  59. 0 126
      components/bootloader/subproject/main/bootloader_start.c
  60. 0 21
      components/bootloader/subproject/main/component.mk
  61. 0 167
      components/bootloader/subproject/main/esp32.bootloader.ld
  62. 0 9
      components/bootloader/subproject/main/esp32.bootloader.rom.ld
  63. 4 52
      components/cmd_nvs/cmd_nvs.c
  64. 1 0
      components/cmd_nvs/cmd_nvs.h
  65. 82 16
      components/cmd_system/cmd_system.c
  66. 1 0
      components/cmd_system/cmd_system.h
  67. 1 0
      components/cmd_system/component.mk
  68. 29 8
      components/driver_bt/bt_app_sink.c
  69. 2 2
      components/driver_bt/bt_app_source.c
  70. 2 1
      components/driver_i2s/component.mk
  71. 24 6
      components/io/led.c
  72. 2 4
      components/raop/raop.c
  73. 1 1
      components/raop/raop_sink.c
  74. 2 1
      components/raop/rtp.c
  75. 2 2
      components/squeezelite-ota/cmd_ota.c
  76. 2 2
      components/squeezelite-ota/component.mk
  77. 364 106
      components/squeezelite-ota/squeezelite-ota.c
  78. 23 1
      components/squeezelite-ota/squeezelite-ota.h
  79. 5 0
      components/squeezelite/decode_external.c
  80. 3 5
      components/squeezelite/embedded.h
  81. 6 5
      components/squeezelite/helix-aac.c
  82. 23 8
      components/squeezelite/output_i2s.c
  83. 4 3
      components/squeezelite/slimproto.c
  84. 4 0
      components/squeezelite/squeezelite.h
  85. 3 2
      components/wifi-manager/CMakeLists.txt
  86. 40 16
      components/wifi-manager/code.js
  87. 2 1
      components/wifi-manager/component.mk
  88. 162 159
      components/wifi-manager/http_server.c
  89. 12 3
      components/wifi-manager/index.html
  90. 0 144
      components/wifi-manager/json.c
  91. 0 47
      components/wifi-manager/json.h
  92. 617 214
      components/wifi-manager/wifi_manager.c
  93. 37 25
      components/wifi-manager/wifi_manager.h
  94. 1 1
      main/CMakeLists.txt
  95. 7 7
      main/cmd_squeezelite.c
  96. 190 0
      main/cmd_wifi.c
  97. 3 5
      main/cmd_wifi.h
  98. 4 2
      main/component.mk
  99. 719 0
      main/config.c
  100. 41 0
      main/config.h

+ 4 - 0
TODO

@@ -0,0 +1,4 @@
+- in squeezelite some buffers (stream, output, header, recv) are allocated 
+although they are almost static (expect output). This creates a risk of 
+memory fragmentation, especially because the large output is re-allocated for
+AirPlay

+ 1 - 1
build-scripts/16M-sdkconfig.defaults

@@ -143,4 +143,4 @@ CONFIG_DEFAULT_AP_PASSWORD="squeezelite"
 CONFIG_DEFAULT_AP_IP="192.168.4.1"
 CONFIG_DEFAULT_AP_GATEWAY="192.168.4.1"
 CONFIG_DEFAULT_AP_NETMASK="255.255.255.0"
-CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info"
+CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info "

+ 3 - 3
build-scripts/I2S-16MFlash-sdkconfig.defaults

@@ -116,7 +116,7 @@ CONFIG_DEFAULT_AP_GATEWAY="192.168.4.1"
 CONFIG_DEFAULT_AP_NETMASK="255.255.255.0"
 CONFIG_DEFAULT_AP_MAX_CONNECTIONS=4
 CONFIG_DEFAULT_AP_BEACON_INTERVAL=100
-CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info"
+CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info "
 
 CONFIG_COMPILER_OPTIMIZATION_LEVEL_RELEASE=y
 CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_ENABLE=y
@@ -648,7 +648,7 @@ CONFIG_LWIP_TCP_OVERSIZE_MSS=y
 
 
 CONFIG_LWIP_MAX_UDP_PCBS=16
-CONFIG_LWIP_UDP_RECVMBOX_SIZE=6
+CONFIG_LWIP_UDP_RECVMBOX_SIZE=32
 CONFIG_LWIP_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_LWIP_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 
@@ -1124,7 +1124,7 @@ CONFIG_TCP_QUEUE_OOSEQ=y
 CONFIG_TCP_OVERSIZE_MSS=y
 
 
-CONFIG_UDP_RECVMBOX_SIZE=6
+CONFIG_UDP_RECVMBOX_SIZE=32
 CONFIG_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 

+ 3 - 3
build-scripts/I2S-4MFlash-sdkconfig.defaults

@@ -116,7 +116,7 @@ CONFIG_DEFAULT_AP_GATEWAY="192.168.4.1"
 CONFIG_DEFAULT_AP_NETMASK="255.255.255.0"
 CONFIG_DEFAULT_AP_MAX_CONNECTIONS=4
 CONFIG_DEFAULT_AP_BEACON_INTERVAL=100
-CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info"
+CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info "
 
 CONFIG_COMPILER_OPTIMIZATION_LEVEL_RELEASE=y
 CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_ENABLE=y
@@ -647,7 +647,7 @@ CONFIG_LWIP_TCP_OVERSIZE_MSS=y
 
 
 CONFIG_LWIP_MAX_UDP_PCBS=16
-CONFIG_LWIP_UDP_RECVMBOX_SIZE=6
+CONFIG_LWIP_UDP_RECVMBOX_SIZE=32
 CONFIG_LWIP_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_LWIP_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 
@@ -1123,7 +1123,7 @@ CONFIG_TCP_QUEUE_OOSEQ=y
 CONFIG_TCP_OVERSIZE_MSS=y
 
 
-CONFIG_UDP_RECVMBOX_SIZE=6
+CONFIG_UDP_RECVMBOX_SIZE=32
 CONFIG_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 

+ 1 - 1
build-scripts/NonOTA-16M-sdkconfig.defaults

@@ -139,4 +139,4 @@ CONFIG_DEFAULT_AP_PASSWORD="squeezelite"
 CONFIG_DEFAULT_AP_IP="192.168.4.1"
 CONFIG_DEFAULT_AP_GATEWAY="192.168.4.1"
 CONFIG_DEFAULT_AP_NETMASK="255.255.255.0"
-CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info"
+CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info "

+ 3 - 3
build-scripts/NonOTA-I2S-16MFlash-sdkconfig.defaults

@@ -116,7 +116,7 @@ CONFIG_DEFAULT_AP_GATEWAY="192.168.4.1"
 CONFIG_DEFAULT_AP_NETMASK="255.255.255.0"
 CONFIG_DEFAULT_AP_MAX_CONNECTIONS=4
 CONFIG_DEFAULT_AP_BEACON_INTERVAL=100
-CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info"
+CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info "
 
 CONFIG_COMPILER_OPTIMIZATION_LEVEL_RELEASE=y
 CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_ENABLE=y
@@ -648,7 +648,7 @@ CONFIG_LWIP_TCP_OVERSIZE_MSS=y
 
 
 CONFIG_LWIP_MAX_UDP_PCBS=16
-CONFIG_LWIP_UDP_RECVMBOX_SIZE=6
+CONFIG_LWIP_UDP_RECVMBOX_SIZE=32
 CONFIG_LWIP_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_LWIP_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 
@@ -1123,7 +1123,7 @@ CONFIG_TCP_QUEUE_OOSEQ=y
 CONFIG_TCP_OVERSIZE_MSS=y
 
 
-CONFIG_UDP_RECVMBOX_SIZE=6
+CONFIG_UDP_RECVMBOX_SIZE=32
 CONFIG_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 

+ 3 - 3
build-scripts/NonOTA-I2S-4MFlash-sdkconfig.defaults

@@ -116,7 +116,7 @@ CONFIG_DEFAULT_AP_GATEWAY="192.168.4.1"
 CONFIG_DEFAULT_AP_NETMASK="255.255.255.0"
 CONFIG_DEFAULT_AP_MAX_CONNECTIONS=4
 CONFIG_DEFAULT_AP_BEACON_INTERVAL=100
-CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info"
+CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info "
 
 CONFIG_COMPILER_OPTIMIZATION_LEVEL_RELEASE=y
 CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_ENABLE=y
@@ -647,7 +647,7 @@ CONFIG_LWIP_TCP_OVERSIZE_MSS=y
 
 
 CONFIG_LWIP_MAX_UDP_PCBS=16
-CONFIG_LWIP_UDP_RECVMBOX_SIZE=6
+CONFIG_LWIP_UDP_RECVMBOX_SIZE=32
 CONFIG_LWIP_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_LWIP_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 
@@ -1122,7 +1122,7 @@ CONFIG_TCP_QUEUE_OOSEQ=y
 CONFIG_TCP_OVERSIZE_MSS=y
 
 
-CONFIG_UDP_RECVMBOX_SIZE=6
+CONFIG_UDP_RECVMBOX_SIZE=32
 CONFIG_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 

+ 3 - 3
build-scripts/NonOTA-SqueezeAmp-sdkconfig.defaults

@@ -104,7 +104,7 @@ CONFIG_DEFAULT_AP_GATEWAY="192.168.4.1"
 CONFIG_DEFAULT_AP_NETMASK="255.255.255.0"
 CONFIG_DEFAULT_AP_MAX_CONNECTIONS=4
 CONFIG_DEFAULT_AP_BEACON_INTERVAL=100
-CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info"
+CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info "
 
 CONFIG_COMPILER_OPTIMIZATION_LEVEL_RELEASE=y
 CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_ENABLE=y
@@ -636,7 +636,7 @@ CONFIG_LWIP_TCP_OVERSIZE_MSS=y
 
 
 CONFIG_LWIP_MAX_UDP_PCBS=16
-CONFIG_LWIP_UDP_RECVMBOX_SIZE=6
+CONFIG_LWIP_UDP_RECVMBOX_SIZE=32
 CONFIG_LWIP_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_LWIP_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 
@@ -1111,7 +1111,7 @@ CONFIG_TCP_QUEUE_OOSEQ=y
 CONFIG_TCP_OVERSIZE_MSS=y
 
 
-CONFIG_UDP_RECVMBOX_SIZE=6
+CONFIG_UDP_RECVMBOX_SIZE=32
 CONFIG_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 

+ 3 - 3
build-scripts/SqueezeAmp4MBFlash-sdkconfig.defaults

@@ -104,7 +104,7 @@ CONFIG_DEFAULT_AP_GATEWAY="192.168.4.1"
 CONFIG_DEFAULT_AP_NETMASK="255.255.255.0"
 CONFIG_DEFAULT_AP_MAX_CONNECTIONS=4
 CONFIG_DEFAULT_AP_BEACON_INTERVAL=100
-CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info"
+CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info "
 
 CONFIG_COMPILER_OPTIMIZATION_LEVEL_RELEASE=y
 CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_ENABLE=y
@@ -636,7 +636,7 @@ CONFIG_LWIP_TCP_OVERSIZE_MSS=y
 
 
 CONFIG_LWIP_MAX_UDP_PCBS=16
-CONFIG_LWIP_UDP_RECVMBOX_SIZE=6
+CONFIG_LWIP_UDP_RECVMBOX_SIZE=32
 CONFIG_LWIP_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_LWIP_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 
@@ -1112,7 +1112,7 @@ CONFIG_TCP_QUEUE_OOSEQ=y
 CONFIG_TCP_OVERSIZE_MSS=y
 
 
-CONFIG_UDP_RECVMBOX_SIZE=6
+CONFIG_UDP_RECVMBOX_SIZE=32
 CONFIG_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 

文件差异内容过多而无法显示
+ 6 - 561
build-scripts/SqueezeAmp8MBFlash-sdkconfig.defaults


+ 1 - 1
build-scripts/squeezelite-esp32-16M-sdkconfig.defaults

@@ -137,4 +137,4 @@ CONFIG_DEFAULT_AP_PASSWORD="squeezelite"
 CONFIG_DEFAULT_AP_IP="192.168.4.1"
 CONFIG_DEFAULT_AP_GATEWAY="192.168.4.1"
 CONFIG_DEFAULT_AP_NETMASK="255.255.255.0"
-CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info"
+CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info "

+ 3 - 3
build-scripts/squeezelite-esp32-I2S-16MFlash-sdkconfig.defaults

@@ -115,7 +115,7 @@ CONFIG_DEFAULT_AP_GATEWAY="192.168.4.1"
 CONFIG_DEFAULT_AP_NETMASK="255.255.255.0"
 CONFIG_DEFAULT_AP_MAX_CONNECTIONS=4
 CONFIG_DEFAULT_AP_BEACON_INTERVAL=100
-CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info"
+CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info "
 
 CONFIG_COMPILER_OPTIMIZATION_LEVEL_RELEASE=y
 CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_ENABLE=y
@@ -647,7 +647,7 @@ CONFIG_LWIP_TCP_OVERSIZE_MSS=y
 
 
 CONFIG_LWIP_MAX_UDP_PCBS=16
-CONFIG_LWIP_UDP_RECVMBOX_SIZE=6
+CONFIG_LWIP_UDP_RECVMBOX_SIZE=32
 CONFIG_LWIP_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_LWIP_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 
@@ -1122,7 +1122,7 @@ CONFIG_TCP_QUEUE_OOSEQ=y
 CONFIG_TCP_OVERSIZE_MSS=y
 
 
-CONFIG_UDP_RECVMBOX_SIZE=6
+CONFIG_UDP_RECVMBOX_SIZE=32
 CONFIG_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 

+ 3 - 3
build-scripts/squeezelite-esp32-I2S-4MFlash-NOAirplay-sdkconfig.defaults

@@ -112,7 +112,7 @@ CONFIG_DEFAULT_AP_GATEWAY="192.168.4.1"
 CONFIG_DEFAULT_AP_NETMASK="255.255.255.0"
 CONFIG_DEFAULT_AP_MAX_CONNECTIONS=4
 CONFIG_DEFAULT_AP_BEACON_INTERVAL=100
-CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info"
+CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info "
 
 CONFIG_COMPILER_OPTIMIZATION_LEVEL_RELEASE=y
 CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_ENABLE=y
@@ -642,7 +642,7 @@ CONFIG_LWIP_TCP_OVERSIZE_MSS=y
 
 
 CONFIG_LWIP_MAX_UDP_PCBS=16
-CONFIG_LWIP_UDP_RECVMBOX_SIZE=6
+CONFIG_LWIP_UDP_RECVMBOX_SIZE=32
 CONFIG_LWIP_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_LWIP_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 
@@ -1117,7 +1117,7 @@ CONFIG_TCP_QUEUE_OOSEQ=y
 CONFIG_TCP_OVERSIZE_MSS=y
 
 
-CONFIG_UDP_RECVMBOX_SIZE=6
+CONFIG_UDP_RECVMBOX_SIZE=32
 CONFIG_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 

+ 33 - 3
build-scripts/squeezelite-esp32-I2S-4MFlash-sdkconfig.defaults

@@ -9,6 +9,7 @@ CONFIG_IDF_TARGET="esp32"
 # SDK tool configuration
 #
 CONFIG_SDK_TOOLPREFIX="xtensa-esp32-elf-"
+						  
 CONFIG_SDK_MAKE_WARN_UNDEFINED_VARIABLES=y
 CONFIG_APP_COMPILE_TIME_DATE=y
 
@@ -31,7 +32,15 @@ CONFIG_BOOTLOADER_WDT_TIME_MS=9000
 
 
 
+CONFIG_ESPTOOLPY_PORT="com6"					
+CONFIG_ESPTOOLPY_BAUD_2MB=y
+										  
+										  
+						   
+										
 CONFIG_ESPTOOLPY_BAUD_OTHER_VAL=115200
+CONFIG_ESPTOOLPY_BAUD=2000000
+CONFIG_ESPTOOLPY_COMPRESSED=y
 CONFIG_ESPTOOLPY_FLASHMODE_QIO=y
 
 
@@ -48,6 +57,7 @@ CONFIG_ESPTOOLPY_FLASHSIZE_4MB=y
 
 
 CONFIG_ESPTOOLPY_FLASHSIZE="4MB"
+								
 CONFIG_ESPTOOLPY_FLASHSIZE_DETECT=y
 CONFIG_ESPTOOLPY_BEFORE_RESET=y
 
@@ -105,6 +115,13 @@ CONFIG_BT_SINK_PIN=1234
 CONFIG_AIRPLAY_SINK=y
 CONFIG_AIRPLAY_NAME="ESP32-AirPlay"
 CONFIG_AIRPLAY_PORT="5000"
+											  
+										   
+							   
+													   
+												
+									
+							
 CONFIG_WIFI_MANAGER_TASK_PRIORITY=5
 CONFIG_WIFI_MANAGER_MAX_RETRY=2
 CONFIG_DEFAULT_AP_SSID="squeezelite"
@@ -115,7 +132,7 @@ CONFIG_DEFAULT_AP_GATEWAY="192.168.4.1"
 CONFIG_DEFAULT_AP_NETMASK="255.255.255.0"
 CONFIG_DEFAULT_AP_MAX_CONNECTIONS=4
 CONFIG_DEFAULT_AP_BEACON_INTERVAL=100
-CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info"
+CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info "
 
 CONFIG_COMPILER_OPTIMIZATION_LEVEL_RELEASE=y
 CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_ENABLE=y
@@ -386,7 +403,9 @@ CONFIG_SPIRAM_SIZE=-1
 CONFIG_SPIRAM_SPEED_80M=y
 CONFIG_SPIRAM_MEMTEST=y
 CONFIG_SPIRAM_CACHE_WORKAROUND=y
+											
 CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL=256
+												 
 CONFIG_SPIRAM_MALLOC_RESERVE_INTERNAL=65536
 CONFIG_SPIRAM_ALLOW_BSS_SEG_EXTERNAL_MEMORY=y
 CONFIG_SPIRAM_ALLOW_STACK_EXTERNAL_MEMORY=y
@@ -503,6 +522,8 @@ CONFIG_ESP32_WIFI_STATIC_TX_BUFFER_NUM=12
 
 
 
+									
+							 
 CONFIG_ESP32_WIFI_NVS_ENABLED=y
 CONFIG_ESP32_WIFI_TASK_PINNED_TO_CORE_0=y
 
@@ -519,6 +540,10 @@ CONFIG_ESP32_PHY_MAX_TX_POWER=20
 CONFIG_ESP32_ENABLE_COREDUMP_TO_NONE=y
 
 
+							  
+									   
+								   
+										  
 CONFIG_FATFS_CODEPAGE_437=y
 
 
@@ -646,7 +671,7 @@ CONFIG_LWIP_TCP_OVERSIZE_MSS=y
 
 
 CONFIG_LWIP_MAX_UDP_PCBS=16
-CONFIG_LWIP_UDP_RECVMBOX_SIZE=6
+CONFIG_LWIP_UDP_RECVMBOX_SIZE=32
 CONFIG_LWIP_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_LWIP_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 
@@ -793,12 +818,17 @@ CONFIG_SEMIHOSTFS_HOST_PATH_MAX_LEN=128
 
 CONFIG_WL_SECTOR_SIZE_4096=y
 CONFIG_WL_SECTOR_SIZE=4096
+						 
+									   
+							
+					   
 CONFIG_WIFI_PROV_SCAN_MAX_ENTRIES=16
 
 
 
 # Deprecated options for backward compatibility
 CONFIG_TOOLPREFIX="xtensa-esp32-elf-"
+					  
 CONFIG_MAKE_WARN_UNDEFINED_VARIABLES=y
 
 
@@ -1121,7 +1151,7 @@ CONFIG_TCP_QUEUE_OOSEQ=y
 CONFIG_TCP_OVERSIZE_MSS=y
 
 
-CONFIG_UDP_RECVMBOX_SIZE=6
+CONFIG_UDP_RECVMBOX_SIZE=32
 CONFIG_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 

+ 3 - 3
build-scripts/squeezelite-esp32-SqueezeAmp-sdkconfig.defaults

@@ -103,7 +103,7 @@ CONFIG_DEFAULT_AP_GATEWAY="192.168.4.1"
 CONFIG_DEFAULT_AP_NETMASK="255.255.255.0"
 CONFIG_DEFAULT_AP_MAX_CONNECTIONS=4
 CONFIG_DEFAULT_AP_BEACON_INTERVAL=100
-CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info"
+CONFIG_DEFAULT_COMMAND_LINE="squeezelite -o I2S -b 500:2000 -d all=info "
 
 CONFIG_COMPILER_OPTIMIZATION_LEVEL_RELEASE=y
 CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_ENABLE=y
@@ -635,7 +635,7 @@ CONFIG_LWIP_TCP_OVERSIZE_MSS=y
 
 
 CONFIG_LWIP_MAX_UDP_PCBS=16
-CONFIG_LWIP_UDP_RECVMBOX_SIZE=6
+CONFIG_LWIP_UDP_RECVMBOX_SIZE=32
 CONFIG_LWIP_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_LWIP_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 
@@ -1110,7 +1110,7 @@ CONFIG_TCP_QUEUE_OOSEQ=y
 CONFIG_TCP_OVERSIZE_MSS=y
 
 
-CONFIG_UDP_RECVMBOX_SIZE=6
+CONFIG_UDP_RECVMBOX_SIZE=32
 CONFIG_TCPIP_TASK_STACK_SIZE=3072
 CONFIG_TCPIP_TASK_AFFINITY_NO_AFFINITY=y
 

+ 0 - 21
components/bootloader/CMakeLists.txt

@@ -1,21 +0,0 @@
-idf_component_register(PRIV_REQUIRES partition_table)
-
-# Do not generate flash file when building bootloader or is in early expansion of the build
-if(BOOTLOADER_BUILD)
-    return()
-endif()
-
-# When secure boot is enabled, do not flash bootloader along with invocation of `idf.py flash`
-if(NOT CONFIG_SECURE_BOOT_ENABLED)
-    set(flash_bootloader FLASH_IN_PROJECT)
-endif()
-
-# Set values used in flash_bootloader_args.in and generate flash file
-# for bootloader
-esptool_py_flash_project_args(bootloader 0x1000
-                            ${BOOTLOADER_BUILD_DIR}/bootloader.bin
-                            ${flash_bootloader}
-                            FLASH_FILE_TEMPLATE flash_bootloader_args.in)
-
-esptool_py_custom_target(bootloader-flash bootloader "bootloader")
-add_dependencies(bootloader partition_table)

+ 0 - 520
components/bootloader/Kconfig.projbuild

@@ -1,520 +0,0 @@
-menu "Bootloader config"
-    choice BOOTLOADER_LOG_LEVEL
-        bool "Bootloader log verbosity"
-        default BOOTLOADER_LOG_LEVEL_INFO
-        help
-            Specify how much output to see in bootloader logs.
-
-        config BOOTLOADER_LOG_LEVEL_NONE
-            bool "No output"
-        config BOOTLOADER_LOG_LEVEL_ERROR
-            bool "Error"
-        config BOOTLOADER_LOG_LEVEL_WARN
-            bool "Warning"
-        config BOOTLOADER_LOG_LEVEL_INFO
-            bool "Info"
-        config BOOTLOADER_LOG_LEVEL_DEBUG
-            bool "Debug"
-        config BOOTLOADER_LOG_LEVEL_VERBOSE
-            bool "Verbose"
-    endchoice
-
-    config BOOTLOADER_LOG_LEVEL
-        int
-        default 0 if BOOTLOADER_LOG_LEVEL_NONE
-        default 1 if BOOTLOADER_LOG_LEVEL_ERROR
-        default 2 if BOOTLOADER_LOG_LEVEL_WARN
-        default 3 if BOOTLOADER_LOG_LEVEL_INFO
-        default 4 if BOOTLOADER_LOG_LEVEL_DEBUG
-        default 5 if BOOTLOADER_LOG_LEVEL_VERBOSE
-
-    config BOOTLOADER_SPI_WP_PIN
-        int "SPI Flash WP Pin when customising pins via eFuse (read help)"
-        range 0 33
-        default 7
-        depends on ESPTOOLPY_FLASHMODE_QIO || ESPTOOLPY_FLASHMODE_QOUT
-        help
-            This value is ignored unless flash mode is set to QIO or QOUT *and* the SPI flash pins have been
-            overriden by setting the eFuses SPI_PAD_CONFIG_xxx.
-
-            When this is the case, the eFuse config only defines 3 of the 4 Quad I/O data pins. The WP pin (aka ESP32
-            pin "SD_DATA_3" or SPI flash pin "IO2") is not specified in eFuse. That pin number is compiled into the
-            bootloader instead.
-
-            The default value (GPIO 7) is correct for WP pin on ESP32-D2WD integrated flash.
-
-    choice BOOTLOADER_VDDSDIO_BOOST
-        bool "VDDSDIO LDO voltage"
-        default BOOTLOADER_VDDSDIO_BOOST_1_9V
-        help
-            If this option is enabled, and VDDSDIO LDO is set to 1.8V (using eFuse
-            or MTDI bootstrapping pin), bootloader will change LDO settings to
-            output 1.9V instead. This helps prevent flash chip from browning out
-            during flash programming operations.
-
-            This option has no effect if VDDSDIO is set to 3.3V, or if the internal
-            VDDSDIO regulator is disabled via eFuse.
-
-        config BOOTLOADER_VDDSDIO_BOOST_1_8V
-            bool "1.8V"
-            depends on !ESPTOOLPY_FLASHFREQ_80M
-        config BOOTLOADER_VDDSDIO_BOOST_1_9V
-            bool "1.9V"
-    endchoice
-
-    config BOOTLOADER_FACTORY_RESET
-        bool "GPIO triggers factory reset"
-        default N
-        help
-            Allows to reset the device to factory settings:
-            - clear one or more data partitions;
-            - boot from "factory" partition.
-            The factory reset will occur if there is a GPIO input pulled low while device starts up.
-            See settings below.
-
-    config BOOTLOADER_NUM_PIN_FACTORY_RESET
-        int "Number of the GPIO input for factory reset"
-        depends on BOOTLOADER_FACTORY_RESET
-        range 0 39
-        default 4
-        help
-            The selected GPIO will be configured as an input with internal pull-up enabled.
-            To trigger a factory reset, this GPIO must be pulled low on reset.
-            Note that GPIO34-39 do not have an internal pullup and an external one must be provided.
-
-    config BOOTLOADER_OTA_DATA_ERASE
-        bool "Clear OTA data on factory reset (select factory partition)"
-        depends on BOOTLOADER_FACTORY_RESET
-        help
-            The device will boot from "factory" partition (or OTA slot 0 if no factory partition is present) after a
-            factory reset.
-
-    config BOOTLOADER_DATA_FACTORY_RESET
-        string "Comma-separated names of partitions to clear on factory reset"
-        depends on BOOTLOADER_FACTORY_RESET
-        default "nvs"
-        help
-            Allows customers to select which data partitions will be erased while factory reset.
-
-            Specify the names of partitions as a comma-delimited with optional spaces for readability. (Like this:
-            "nvs, phy_init, ...")
-            Make sure that the name specified in the partition table and here are the same.
-            Partitions of type "app" cannot be specified here.
-
-    config BOOTLOADER_APP_TEST
-        bool "GPIO triggers boot from test app partition"
-        default N
-        help
-            Allows to run the test app from "TEST" partition.
-            A boot from "test" partition will occur if there is a GPIO input pulled low while device starts up.
-            See settings below.
-
-    config BOOTLOADER_NUM_PIN_APP_TEST
-        int "Number of the GPIO input to boot TEST partition"
-        depends on BOOTLOADER_APP_TEST
-        range 0 39
-        default 18
-        help
-            The selected GPIO will be configured as an input with internal pull-up enabled.
-            To trigger a test app, this GPIO must be pulled low on reset.
-            After the GPIO input is deactivated and the device reboots, the old application will boot.
-            (factory or OTA[x]).
-            Note that GPIO34-39 do not have an internal pullup and an external one must be provided.
-
-    config BOOTLOADER_HOLD_TIME_GPIO
-        int "Hold time of GPIO for reset/test mode (seconds)"
-        depends on BOOTLOADER_FACTORY_RESET || BOOTLOADER_APP_TEST
-        default 5
-        help
-            The GPIO must be held low continuously for this period of time after reset
-            before a factory reset or test partition boot (as applicable) is performed.
-
-    config BOOTLOADER_WDT_ENABLE
-        bool "Use RTC watchdog in start code"
-        default y
-        help
-            Tracks the execution time of startup code.
-            If the execution time is exceeded, the RTC_WDT will restart system.
-            It is also useful to prevent a lock up in start code caused by an unstable power source.
-            NOTE: Tracks the execution time starts from the bootloader code - re-set timeout, while selecting the
-            source for slow_clk - and ends calling app_main.
-            Re-set timeout is needed due to WDT uses a SLOW_CLK clock source. After changing a frequency slow_clk a
-            time of WDT needs to re-set for new frequency.
-            slow_clk depends on ESP32_RTC_CLK_SRC (INTERNAL_RC or EXTERNAL_CRYSTAL).
-
-    config BOOTLOADER_WDT_DISABLE_IN_USER_CODE
-        bool "Allows RTC watchdog disable in user code"
-        depends on BOOTLOADER_WDT_ENABLE
-        default n
-        help
-            If it is set, the client must itself reset or disable rtc_wdt in their code (app_main()).
-            Otherwise rtc_wdt will be disabled before calling app_main function.
-            Use function rtc_wdt_feed() for resetting counter of rtc_wdt.
-            Use function rtc_wdt_disable() for disabling rtc_wdt.
-
-    config BOOTLOADER_WDT_TIME_MS
-        int "Timeout for RTC watchdog (ms)"
-        depends on BOOTLOADER_WDT_ENABLE
-        default 9000
-        range 0 120000
-        help
-            Verify that this parameter is correct and more then the execution time.
-            Pay attention to options such as reset to factory, trigger test partition and encryption on boot
-            - these options can increase the execution time.
-            Note: RTC_WDT will reset while encryption operations will be performed.
-
-    config BOOTLOADER_APP_ROLLBACK_ENABLE
-        bool "Enable app rollback support"
-        default n
-        help
-            After updating the app, the bootloader runs a new app with the "ESP_OTA_IMG_PENDING_VERIFY" state set.
-            This state prevents the re-run of this app. After the first boot of the new app in the user code, the
-            function should be called to confirm the operability of the app or vice versa about its non-operability.
-            If the app is working, then it is marked as valid. Otherwise, it is marked as not valid and rolls back to
-            the previous working app. A reboot is performed, and the app is booted before the software update.
-            Note: If during the first boot a new app the power goes out or the WDT works, then roll back will happen.
-            Rollback is possible only between the apps with the same security versions.
-
-    config BOOTLOADER_APP_ANTI_ROLLBACK
-        bool "Enable app anti-rollback support"
-        depends on BOOTLOADER_APP_ROLLBACK_ENABLE
-        default n
-        help
-            This option prevents rollback to previous firmware/application image with lower security version.
-
-    config BOOTLOADER_APP_SECURE_VERSION
-        int "eFuse secure version of app"
-        depends on BOOTLOADER_APP_ANTI_ROLLBACK
-        default 0
-        help
-            The secure version is the sequence number stored in the header of each firmware.
-            The security version is set in the bootloader, version is recorded in the eFuse field
-            as the number of set ones. The allocated number of bits in the efuse field
-            for storing the security version is limited (see BOOTLOADER_APP_SEC_VER_SIZE_EFUSE_FIELD option).
-
-            Bootloader: When bootloader selects an app to boot, an app is selected that has
-            a security version greater or equal that recorded in eFuse field.
-            The app is booted with a higher (or equal) secure version.
-
-            The security version is worth increasing if in previous versions there is
-            a significant vulnerability and their use is not acceptable.
-
-            Your partition table should has a scheme with ota_0 + ota_1 (without factory).
-
-    config BOOTLOADER_APP_SEC_VER_SIZE_EFUSE_FIELD
-        int "Size of the efuse secure version field"
-        depends on BOOTLOADER_APP_ANTI_ROLLBACK
-        range 1 32
-        default 32
-        help
-            The size of the efuse secure version field. Its length is limited to 32 bits.
-            This determines how many times the security version can be increased.
-
-    config BOOTLOADER_EFUSE_SECURE_VERSION_EMULATE
-        bool "Emulate operations with efuse secure version(only test)"
-        default n
-        depends on BOOTLOADER_APP_ANTI_ROLLBACK
-        help
-            This option allow emulate read/write operations with efuse secure version.
-            It allow to test anti-rollback implemention without permanent write eFuse bits.
-            In partition table should be exist this partition `emul_efuse, data, 5, , 0x2000`.
-
-endmenu  # Bootloader
-
-
-menu "Security features"
-
-    # These three are the actual options to check in code,
-    # selected by the displayed options
-    config SECURE_SIGNED_ON_BOOT
-        bool
-        default y
-        depends on SECURE_BOOT_ENABLED || SECURE_SIGNED_ON_BOOT_NO_SECURE_BOOT
-
-    config SECURE_SIGNED_ON_UPDATE
-        bool
-        default y
-        select MBEDTLS_ECP_DP_SECP256R1_ENABLED
-        depends on SECURE_BOOT_ENABLED || SECURE_SIGNED_ON_UPDATE_NO_SECURE_BOOT
-
-    config SECURE_SIGNED_APPS
-        bool
-        default y
-        depends on SECURE_SIGNED_ON_BOOT || SECURE_SIGNED_ON_UPDATE
-
-
-    config SECURE_SIGNED_APPS_NO_SECURE_BOOT
-        bool "Require signed app images"
-        default n
-        depends on !SECURE_BOOT_ENABLED
-        help
-            Require apps to be signed to verify their integrity.
-
-            This option uses the same app signature scheme as hardware secure boot, but unlike hardware secure boot it
-            does not prevent the bootloader from being physically updated. This means that the device can be secured
-            against remote network access, but not physical access. Compared to using hardware Secure Boot this option
-            is much simpler to implement.
-
-    config SECURE_SIGNED_ON_BOOT_NO_SECURE_BOOT
-        bool "Bootloader verifies app signatures"
-        default n
-        depends on SECURE_SIGNED_APPS_NO_SECURE_BOOT
-        help
-            If this option is set, the bootloader will be compiled with code to verify that an app is signed before
-            booting it.
-
-            If hardware secure boot is enabled, this option is always enabled and cannot be disabled.
-            If hardware secure boot is not enabled, this option doesn't add significant security by itself so most
-            users will want to leave it disabled.
-
-    config SECURE_SIGNED_ON_UPDATE_NO_SECURE_BOOT
-        bool "Verify app signature on update"
-        default y
-        depends on SECURE_SIGNED_APPS_NO_SECURE_BOOT
-        help
-            If this option is set, any OTA updated apps will have the signature verified before being considered valid.
-
-            When enabled, the signature is automatically checked whenever the esp_ota_ops.h APIs are used for OTA
-            updates, or esp_image_format.h APIs are used to verify apps.
-
-            If hardware secure boot is enabled, this option is always enabled and cannot be disabled.
-            If hardware secure boot is not enabled, this option still adds significant security against network-based
-            attackers by preventing spoofing of OTA updates.
-
-    config SECURE_BOOT_ENABLED
-        bool "Enable hardware secure boot in bootloader (READ DOCS FIRST)"
-        default n
-        help
-            Build a bootloader which enables secure boot on first boot.
-
-            Once enabled, secure boot will not boot a modified bootloader. The bootloader will only load a partition
-            table or boot an app if the data has a verified digital signature. There are implications for reflashing
-            updated apps once secure boot is enabled.
-
-            When enabling secure boot, JTAG and ROM BASIC Interpreter are permanently disabled by default.
-
-            Refer to https://docs.espressif.com/projects/esp-idf/en/latest/security/secure-boot.html before enabling.
-
-    choice SECURE_BOOTLOADER_MODE
-        bool "Secure bootloader mode"
-        depends on SECURE_BOOT_ENABLED
-        default SECURE_BOOTLOADER_ONE_TIME_FLASH
-
-        config SECURE_BOOTLOADER_ONE_TIME_FLASH
-            bool "One-time flash"
-            help
-                On first boot, the bootloader will generate a key which is not readable externally or by software. A
-                digest is generated from the bootloader image itself. This digest will be verified on each subsequent
-                boot.
-
-                Enabling this option means that the bootloader cannot be changed after the first time it is booted.
-
-        config SECURE_BOOTLOADER_REFLASHABLE
-            bool "Reflashable"
-            help
-                Generate a reusable secure bootloader key, derived (via SHA-256) from the secure boot signing key.
-
-                This allows the secure bootloader to be re-flashed by anyone with access to the secure boot signing
-                key.
-
-                This option is less secure than one-time flash, because a leak of the digest key from one device
-                allows reflashing of any device that uses it.
-
-    endchoice
-
-    config SECURE_BOOT_BUILD_SIGNED_BINARIES
-        bool "Sign binaries during build"
-        depends on SECURE_SIGNED_APPS
-        default y
-        help
-            Once secure boot or signed app requirement is enabled, app images are required to be signed.
-
-            If enabled (default), these binary files are signed as part of the build process. The file named in
-            "Secure boot private signing key" will be used to sign the image.
-
-            If disabled, unsigned app/partition data will be built. They must be signed manually using espsecure.py
-            (for example, on a remote signing server.)
-
-    config SECURE_BOOT_SIGNING_KEY
-        string "Secure boot private signing key"
-        depends on SECURE_BOOT_BUILD_SIGNED_BINARIES
-        default secure_boot_signing_key.pem
-        help
-            Path to the key file used to sign app images.
-
-            Key file is an ECDSA private key (NIST256p curve) in PEM format.
-
-            Path is evaluated relative to the project directory.
-
-            You can generate a new signing key by running the following command:
-            espsecure.py generate_signing_key secure_boot_signing_key.pem
-
-            See https://docs.espressif.com/projects/esp-idf/en/latest/security/secure-boot.html for details.
-
-    config SECURE_BOOT_VERIFICATION_KEY
-        string "Secure boot public signature verification key"
-        depends on SECURE_SIGNED_APPS && !SECURE_BOOT_BUILD_SIGNED_BINARIES
-        default signature_verification_key.bin
-        help
-            Path to a public key file used to verify signed images. This key is compiled into the bootloader and/or
-            app, to verify app images.
-
-            Key file is in raw binary format, and can be extracted from a
-            PEM formatted private key using the espsecure.py
-            extract_public_key command.
-
-            Refer to https://docs.espressif.com/projects/esp-idf/en/latest/security/secure-boot.html before enabling.
-
-    choice SECURE_BOOTLOADER_KEY_ENCODING
-        bool "Hardware Key Encoding"
-        depends on SECURE_BOOTLOADER_REFLASHABLE
-        default SECURE_BOOTLOADER_NO_ENCODING
-        help
-
-            In reflashable secure bootloader mode, a hardware key is derived from the signing key (with SHA-256) and
-            can be written to eFuse with espefuse.py.
-
-            Normally this is a 256-bit key, but if 3/4 Coding Scheme is used on the device then the eFuse key is
-            truncated to 192 bits.
-
-            This configuration item doesn't change any firmware code, it only changes the size of key binary which is
-            generated at build time.
-
-        config SECURE_BOOTLOADER_KEY_ENCODING_256BIT
-            bool "No encoding (256 bit key)"
-
-        config SECURE_BOOTLOADER_KEY_ENCODING_192BIT
-            bool "3/4 encoding (192 bit key)"
-
-    endchoice
-
-    config SECURE_BOOT_INSECURE
-        bool "Allow potentially insecure options"
-        depends on SECURE_BOOT_ENABLED
-        default N
-        help
-            You can disable some of the default protections offered by secure boot, in order to enable testing or a
-            custom combination of security features.
-
-            Only enable these options if you are very sure.
-
-            Refer to https://docs.espressif.com/projects/esp-idf/en/latest/security/secure-boot.html before enabling.
-
-    config SECURE_FLASH_ENC_ENABLED
-        bool "Enable flash encryption on boot (READ DOCS FIRST)"
-        default N
-        help
-            If this option is set, flash contents will be encrypted by the bootloader on first boot.
-
-            Note: After first boot, the system will be permanently encrypted. Re-flashing an encrypted
-            system is complicated and not always possible.
-
-            Read https://docs.espressif.com/projects/esp-idf/en/latest/security/flash-encryption.html
-            before enabling.
-
-    choice SECURE_FLASH_ENCRYPTION_MODE
-        bool "Enable usage mode"
-        depends on SECURE_FLASH_ENC_ENABLED
-        default SECURE_FLASH_ENCRYPTION_MODE_DEVELOPMENT
-        help
-            By default Development mode is enabled which allows UART bootloader to perform flash encryption operations
-
-            Select Release mode only for production or manufacturing. Once enabled you can not reflash using UART
-            bootloader
-
-            Refer to https://docs.espressif.com/projects/esp-idf/en/latest/security/secure-boot.html and
-            https://docs.espressif.com/projects/esp-idf/en/latest/security/flash-encryption.html for details.
-
-        config SECURE_FLASH_ENCRYPTION_MODE_DEVELOPMENT
-            bool "Development(NOT SECURE)"
-            select SECURE_FLASH_UART_BOOTLOADER_ALLOW_ENC
-
-        config SECURE_FLASH_ENCRYPTION_MODE_RELEASE
-            bool "Release"
-
-    endchoice
-
-    menu "Potentially insecure options"
-        visible if SECURE_FLASH_ENCRYPTION_MODE_DEVELOPMENT || SECURE_BOOT_INSECURE
-
-        # NOTE: Options in this menu NEED to have SECURE_BOOT_INSECURE
-        # and/or SECURE_FLASH_ENCRYPTION_MODE_DEVELOPMENT in "depends on", as the menu
-        # itself doesn't enable/disable its children (if it's not set,
-        # it's possible for the insecure menu to be disabled but the insecure option
-        # to remain on which is very bad.)
-
-        config SECURE_BOOT_ALLOW_ROM_BASIC
-            bool "Leave ROM BASIC Interpreter available on reset"
-            depends on SECURE_BOOT_INSECURE || SECURE_FLASH_ENCRYPTION_MODE_DEVELOPMENT
-            default N
-            help
-                By default, the BASIC ROM Console starts on reset if no valid bootloader is
-                read from the flash.
-
-                When either flash encryption or secure boot are enabled, the default is to
-                disable this BASIC fallback mode permanently via eFuse.
-
-                If this option is set, this eFuse is not burned and the BASIC ROM Console may
-                remain accessible.  Only set this option in testing environments.
-
-        config SECURE_BOOT_ALLOW_JTAG
-            bool "Allow JTAG Debugging"
-            depends on SECURE_BOOT_INSECURE || SECURE_FLASH_ENCRYPTION_MODE_DEVELOPMENT
-            default N
-            help
-                If not set (default), the bootloader will permanently disable JTAG (across entire chip) on first boot
-                when either secure boot or flash encryption is enabled.
-
-                Setting this option leaves JTAG on for debugging, which negates all protections of flash encryption
-                and some of the protections of secure boot.
-
-                Only set this option in testing environments.
-
-        config SECURE_BOOT_ALLOW_SHORT_APP_PARTITION
-            bool "Allow app partition length not 64KB aligned"
-            depends on SECURE_BOOT_INSECURE
-            help
-                If not set (default), app partition size must be a multiple of 64KB. App images are padded to 64KB
-                length, and the bootloader checks any trailing bytes after the signature (before the next 64KB
-                boundary) have not been written. This is because flash cache maps entire 64KB pages into the address
-                space. This prevents an attacker from appending unverified data after the app image in the flash,
-                causing it to be mapped into the address space.
-
-                Setting this option allows the app partition length to be unaligned, and disables padding of the app
-                image to this length. It is generally not recommended to set this option, unless you have a legacy
-                partitioning scheme which doesn't support 64KB aligned partition lengths.
-
-        config SECURE_FLASH_UART_BOOTLOADER_ALLOW_ENC
-            bool "Leave UART bootloader encryption enabled"
-            depends on SECURE_FLASH_ENCRYPTION_MODE_DEVELOPMENT
-            default N
-            help
-                If not set (default), the bootloader will permanently disable UART bootloader encryption access on
-                first boot. If set, the UART bootloader will still be able to access hardware encryption.
-
-                It is recommended to only set this option in testing environments.
-
-        config SECURE_FLASH_UART_BOOTLOADER_ALLOW_DEC
-            bool "Leave UART bootloader decryption enabled"
-            depends on SECURE_FLASH_ENCRYPTION_MODE_DEVELOPMENT
-            default N
-            help
-                If not set (default), the bootloader will permanently disable UART bootloader decryption access on
-                first boot. If set, the UART bootloader will still be able to access hardware decryption.
-
-                Only set this option in testing environments. Setting this option allows complete bypass of flash
-                encryption.
-
-        config SECURE_FLASH_UART_BOOTLOADER_ALLOW_CACHE
-            bool "Leave UART bootloader flash cache enabled"
-            depends on SECURE_FLASH_ENCRYPTION_MODE_DEVELOPMENT
-            default N
-            help
-                If not set (default), the bootloader will permanently disable UART bootloader flash cache access on
-                first boot. If set, the UART bootloader will still be able to access the flash cache.
-
-                Only set this option in testing environments.
-
-    endmenu  # Potentially Insecure
-endmenu  # Security features
-

+ 0 - 133
components/bootloader/Makefile.projbuild

@@ -1,133 +0,0 @@
-# Bootloader component (top-level project parts)
-#
-# The bootloader is not a real component that gets linked into the project.
-# Instead it is an entire standalone project (in subproject/) that gets
-# built in the upper project's build directory. This Makefile.projbuild provides
-# the glue to build the bootloader project from the original project. It
-# basically runs Make in the subproject/ directory but it needs to
-# zero some variables the ESP-IDF project.mk makefile exports first, to not
-# let them interfere.
-#
-BOOTLOADER_COMPONENT_PATH := $(COMPONENT_PATH)
-BOOTLOADER_BUILD_DIR=$(abspath $(BUILD_DIR_BASE)/bootloader)
-BOOTLOADER_BIN=$(BOOTLOADER_BUILD_DIR)/bootloader.bin
-
-# signing key path is resolved relative to the project directory
-CONFIG_SECURE_BOOT_SIGNING_KEY ?=
-SECURE_BOOT_SIGNING_KEY=$(abspath $(call dequote,$(CONFIG_SECURE_BOOT_SIGNING_KEY)))
-export SECURE_BOOT_SIGNING_KEY  # used by bootloader_support component
-
-# Has a matching value in bootloader_support esp_flash_partitions.h
-BOOTLOADER_OFFSET := 0x1000
-
-# Custom recursive make for bootloader sub-project
-#
-# NB: Some variables are cleared in the environment, not
-# overriden, because they need to be re-defined in the child
-# project.
-BOOTLOADER_MAKE= +\
-	PROJECT_PATH= \
-	COMPONENT_DIRS= \
-	$(MAKE) -C $(BOOTLOADER_COMPONENT_PATH)/subproject \
-	V=$(V) \
-	BUILD_DIR_BASE=$(BOOTLOADER_BUILD_DIR) \
-	TEST_COMPONENTS= \
-	TESTS_ALL= \
-	EXCLUDE_COMPONENTS=
-
-.PHONY: bootloader-clean bootloader-flash bootloader-list-components bootloader $(BOOTLOADER_BIN)
-
-$(BOOTLOADER_BIN): $(SDKCONFIG_MAKEFILE)
-	$(BOOTLOADER_MAKE) $@
-
-clean: bootloader-clean
-
-bootloader-list-components:
-	$(BOOTLOADER_MAKE) list-components
-
-ifndef CONFIG_SECURE_BOOT_ENABLED
-# If secure boot disabled, bootloader flashing is integrated
-# with 'make flash' and no warnings are printed.
-
-bootloader: $(BOOTLOADER_BIN) | check_python_dependencies
-	@echo $(SEPARATOR)
-	@echo "Bootloader built. Default flash command is:"
-	@echo "$(ESPTOOLPY_WRITE_FLASH) $(BOOTLOADER_OFFSET) $^"
-
-ESPTOOL_ALL_FLASH_ARGS += $(BOOTLOADER_OFFSET) $(BOOTLOADER_BIN)
-
-bootloader-flash: $(BOOTLOADER_BIN) $(call prereq_if_explicit,erase_flash) | check_python_dependencies
-	$(ESPTOOLPY_WRITE_FLASH) 0x1000 $^
-
-else ifdef CONFIG_SECURE_BOOTLOADER_ONE_TIME_FLASH
-
-# One time flashing requires user to run esptool.py command themselves,
-# and warning is printed about inability to reflash.
-#
-# The flashing command is deliberately printed without an auto-reset
-# step, so the device doesn't immediately reset to flash itself.
-
-bootloader: $(BOOTLOADER_BIN) | check_python_dependencies
-	@echo $(SEPARATOR)
-	@echo "Bootloader built. One-time flash command is:"
-	@echo "$(subst hard_reset,no_reset,$(ESPTOOLPY_WRITE_FLASH)) $(BOOTLOADER_OFFSET) $(BOOTLOADER_BIN)"
-	@echo $(SEPARATOR)
-	@echo "* IMPORTANT: After first boot, BOOTLOADER CANNOT BE RE-FLASHED on same device"
-
-else ifdef CONFIG_SECURE_BOOTLOADER_REFLASHABLE
-# Reflashable secure bootloader
-# generates a digest binary (bootloader + digest)
-
-ifdef CONFIG_SECURE_BOOTLOADER_KEY_ENCODING_192BIT
-KEY_DIGEST_LEN=192
-else
-KEY_DIGEST_LEN=256
-endif
-
-BOOTLOADER_DIGEST_BIN := $(BOOTLOADER_BUILD_DIR)/bootloader-reflash-digest.bin
-SECURE_BOOTLOADER_KEY := $(BOOTLOADER_BUILD_DIR)/secure-bootloader-key-$(KEY_DIGEST_LEN).bin
-
-ifdef CONFIG_SECURE_BOOT_BUILD_SIGNED_BINARIES
-$(SECURE_BOOTLOADER_KEY): $(SECURE_BOOT_SIGNING_KEY) | check_python_dependencies
-	$(ESPSECUREPY) digest_private_key --keylen $(KEY_DIGEST_LEN) -k $< $@
-else
-$(SECURE_BOOTLOADER_KEY):
-	@echo "No pre-generated key for a reflashable secure bootloader is available, due to signing configuration."
-	@echo "To generate one, you can use this command:"
-	@echo "espsecure.py generate_flash_encryption_key $@"
-	@echo "then re-run make."
-	exit 1
-endif
-
-bootloader: $(BOOTLOADER_DIGEST_BIN)
-	@echo $(SEPARATOR)
-	@echo "Bootloader built and secure digest generated. First time flash command is:"
-	@echo "$(ESPEFUSEPY) burn_key secure_boot $(SECURE_BOOTLOADER_KEY)"
-	@echo "$(ESPTOOLPY_WRITE_FLASH) $(BOOTLOADER_OFFSET) $(BOOTLOADER_BIN)"
-	@echo $(SEPARATOR)
-	@echo "To reflash the bootloader after initial flash:"
-	@echo "$(ESPTOOLPY_WRITE_FLASH) 0x0 $(BOOTLOADER_DIGEST_BIN)"
-	@echo $(SEPARATOR)
-	@echo "* After first boot, only re-flashes of this kind (with same key) will be accepted."
-	@echo "* Not recommended to re-use the same secure boot keyfile on multiple production devices."
-
-$(BOOTLOADER_DIGEST_BIN): $(BOOTLOADER_BIN) $(SECURE_BOOTLOADER_KEY) | check_python_dependencies
-	@echo "DIGEST $(notdir $@)"
-	$(ESPSECUREPY) digest_secure_bootloader -k $(SECURE_BOOTLOADER_KEY) -o $@ $<
-
-else # CONFIG_SECURE_BOOT_ENABLED && !CONFIG_SECURE_BOOTLOADER_REFLASHABLE && !CONFIG_SECURE_BOOTLOADER_ONE_TIME_FLASH
-bootloader:
-	@echo "Invalid bootloader target: bad sdkconfig?"
-	@exit 1
-endif
-
-ifndef CONFIG_SECURE_BOOT_ENABLED
-# don't build bootloader by default if secure boot is enabled
-all_binaries: $(BOOTLOADER_BIN)
-endif
-
-bootloader-clean: $(SDKCONFIG_MAKEFILE)
-	$(BOOTLOADER_MAKE) app-clean
-ifdef CONFIG_SECURE_BOOTLOADER_REFLASHABLE
-	rm -f $(SECURE_BOOTLOADER_KEY) $(BOOTLOADER_DIGEST_BIN)
-endif

+ 0 - 7
components/bootloader/component.mk

@@ -1,7 +0,0 @@
-# bootloader component is special, as bootloader is also a project.
-#
-# This top-level component is only configuration files for the IDF project.
-#
-# See Makefile.projbuild for the targets which actually build the bootloader.
-COMPONENT_CONFIG_ONLY := 1
-

+ 0 - 4
components/bootloader/flash_bootloader_args.in

@@ -1,4 +0,0 @@
---flash_mode ${ESPFLASHMODE}
---flash_size ${ESPFLASHSIZE}
---flash_freq ${ESPFLASHFREQ}
-${OFFSET} ${IMAGE}

+ 0 - 127
components/bootloader/project_include.cmake

@@ -1,127 +0,0 @@
-set(BOOTLOADER_OFFSET 0x1000)
-
-# Do not generate flash file when building bootloader
-if(BOOTLOADER_BUILD)
-    return()
-endif()
-
-# Glue to build the bootloader subproject binary as an external
-# cmake project under this one
-#
-#
-idf_build_get_property(build_dir BUILD_DIR)
-set(BOOTLOADER_BUILD_DIR "${build_dir}/bootloader")
-set(bootloader_binary_files
-    "${BOOTLOADER_BUILD_DIR}/bootloader.elf"
-    "${BOOTLOADER_BUILD_DIR}/bootloader.bin"
-    "${BOOTLOADER_BUILD_DIR}/bootloader.map"
-    )
-
-idf_build_get_property(project_dir PROJECT_DIR)
-
-# There are some additional processing when CONFIG_CONFIG_SECURE_SIGNED_APPS. This happens
-# when either CONFIG_SECURE_BOOT_ENABLED or SECURE_BOOT_BUILD_SIGNED_BINARIES.
-# For both cases, the user either sets binaries to be signed during build or not
-# using CONFIG_SECURE_BOOT_BUILD_SIGNED_BINARIES.
-#
-# Regardless, pass the main project's keys (signing/verification) to the bootloader subproject
-# via config.
-if(CONFIG_SECURE_SIGNED_APPS)
-    add_custom_target(gen_secure_boot_keys)
-
-    if(CONFIG_SECURE_BOOT_ENABLED)
-        # Check that the configuration is sane
-        if((CONFIG_SECURE_BOOTLOADER_REFLASHABLE AND CONFIG_SECURE_BOOTLOADER_ONE_TIME_FLASH) OR
-            (NOT CONFIG_SECURE_BOOTLOADER_REFLASHABLE AND NOT CONFIG_SECURE_BOOTLOADER_ONE_TIME_FLASH))
-            fail_at_build_time(bootloader "Invalid bootloader target: bad sdkconfig?")
-        endif()
-
-        if(CONFIG_SECURE_BOOTLOADER_REFLASHABLE)
-            set(bootloader_binary_files
-                ${bootloader_binary_files}
-                "${BOOTLOADER_BUILD_DIR}/bootloader-reflash-digest.bin"
-                "${BOOTLOADER_BUILD_DIR}/secure-bootloader-key-192.bin"
-                "${BOOTLOADER_BUILD_DIR}/secure-bootloader-key-256.bin"
-                )
-        endif()
-    endif()
-
-    # Since keys are usually given relative to main project dir, get the absolute paths to the keys
-    # for use by the bootloader subproject. Replace the values in config with these absolute paths,
-    # so that bootloader subproject does not need to assume main project dir to obtain path to the keys.
-    if(CONFIG_SECURE_BOOT_BUILD_SIGNED_BINARIES)
-        get_filename_component(secure_boot_signing_key
-            "${CONFIG_SECURE_BOOT_SIGNING_KEY}"
-            ABSOLUTE BASE_DIR "${project_dir}")
-
-        if(NOT EXISTS ${secure_boot_signing_key})
-            # If the signing key is not found, create a phony gen_secure_boot_signing_key target that
-            # fails the build. fail_at_build_time causes a cmake run next time
-            # (to pick up a new signing key if one exists, etc.)
-            fail_at_build_time(gen_secure_boot_signing_key
-                "Secure Boot Signing Key ${CONFIG_SECURE_BOOT_SIGNING_KEY} does not exist. Generate using:"
-                "\tespsecure.py generate_signing_key ${CONFIG_SECURE_BOOT_SIGNING_KEY}")
-        else()
-            add_custom_target(gen_secure_boot_signing_key)
-        endif()
-
-        set(SECURE_BOOT_SIGNING_KEY ${secure_boot_signing_key}) # needed by some other components
-        set(sign_key_arg "-DSECURE_BOOT_SIGNING_KEY=${secure_boot_signing_key}")
-
-        add_dependencies(gen_secure_boot_keys gen_secure_boot_signing_key)
-    else()
-
-        get_filename_component(secure_boot_verification_key
-            ${CONFIG_SECURE_BOOT_VERIFICATION_KEY}
-            ABSOLUTE BASE_DIR "${project_dir}")
-
-        if(NOT EXISTS ${secure_boot_verification_key})
-            # If the verification key is not found, create a phony gen_secure_boot_verification_key target that
-            # fails the build. fail_at_build_time causes a cmake run next time
-            # (to pick up a new verification key if one exists, etc.)
-            fail_at_build_time(gen_secure_boot_verification_key
-                "Secure Boot Verification Public Key ${CONFIG_SECURE_BOOT_VERIFICATION_KEY} does not exist."
-                "\tThis can be extracted from the private signing key."
-                "\tSee docs/security/secure-boot.rst for details.")
-        else()
-            add_custom_target(gen_secure_boot_verification_key)
-        endif()
-
-        set(ver_key_arg "-DSECURE_BOOT_VERIFICATION_KEY=${secure_boot_verification_key}")
-
-        add_dependencies(gen_secure_boot_keys gen_secure_boot_verification_key)
-    endif()
-endif()
-
-idf_build_get_property(idf_path IDF_PATH)
-idf_build_get_property(idf_target IDF_TARGET)
-idf_build_get_property(sdkconfig SDKCONFIG)
-
-externalproject_add(bootloader
-    SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}/subproject"
-    BINARY_DIR "${BOOTLOADER_BUILD_DIR}"
-    CMAKE_ARGS  -DSDKCONFIG=${sdkconfig} -DIDF_PATH=${idf_path} -DIDF_TARGET=${idf_target}
-                -DPYTHON_DEPS_CHECKED=1
-                -DEXTRA_COMPONENT_DIRS=${CMAKE_CURRENT_LIST_DIR}
-                ${sign_key_arg} ${ver_key_arg}
-                # LEGACY_INCLUDE_COMMON_HEADERS has to be passed in via cache variable since
-                # the bootloader common component requirements depends on this and
-                # config variables are not available before project() call.
-                -DLEGACY_INCLUDE_COMMON_HEADERS=${CONFIG_LEGACY_INCLUDE_COMMON_HEADERS}
-    INSTALL_COMMAND ""
-    BUILD_ALWAYS 1  # no easy way around this...
-    BUILD_BYPRODUCTS ${bootloader_binary_files}
-    )
-
-if(CONFIG_SECURE_SIGNED_APPS)
-    add_dependencies(bootloader gen_secure_boot_keys)
-endif()
-
-# this is a hack due to an (annoying) shortcoming in cmake, it can't
-# extend the 'clean' target to the external project
-# see thread: https://cmake.org/pipermail/cmake/2016-December/064660.html
-#
-# So for now we just have the top-level build remove the final build products...
-set_property(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" APPEND PROPERTY
-    ADDITIONAL_MAKE_CLEAN_FILES
-    ${bootloader_binary_files})

+ 0 - 22
components/bootloader/sdkconfig.rename

@@ -1,22 +0,0 @@
-# sdkconfig replacement configurations for deprecated options formatted as
-# CONFIG_DEPRECATED_OPTION CONFIG_NEW_OPTION
-
-CONFIG_LOG_BOOTLOADER_LEVEL                                 CONFIG_BOOTLOADER_LOG_LEVEL
-CONFIG_LOG_BOOTLOADER_LEVEL_NONE                            CONFIG_BOOTLOADER_LOG_LEVEL_NONE
-CONFIG_LOG_BOOTLOADER_LEVEL_ERROR                           CONFIG_BOOTLOADER_LOG_LEVEL_ERROR
-CONFIG_LOG_BOOTLOADER_LEVEL_WARN                            CONFIG_BOOTLOADER_LOG_LEVEL_WARN
-CONFIG_LOG_BOOTLOADER_LEVEL_INFO                            CONFIG_BOOTLOADER_LOG_LEVEL_INFO
-CONFIG_LOG_BOOTLOADER_LEVEL_DEBUG                           CONFIG_BOOTLOADER_LOG_LEVEL_DEBUG
-CONFIG_LOG_BOOTLOADER_LEVEL_VERBOSE                         CONFIG_BOOTLOADER_LOG_LEVEL_VERBOSE
-
-CONFIG_APP_ROLLBACK_ENABLE                                  CONFIG_BOOTLOADER_APP_ROLLBACK_ENABLE
-CONFIG_APP_ANTI_ROLLBACK                                    CONFIG_BOOTLOADER_APP_ANTI_ROLLBACK
-CONFIG_APP_SECURE_VERSION                                   CONFIG_BOOTLOADER_APP_SECURE_VERSION
-CONFIG_APP_SECURE_VERSION_SIZE_EFUSE_FIELD                  CONFIG_BOOTLOADER_APP_SEC_VER_SIZE_EFUSE_FIELD
-CONFIG_EFUSE_SECURE_VERSION_EMULATE                         CONFIG_BOOTLOADER_EFUSE_SECURE_VERSION_EMULATE
-
-CONFIG_FLASH_ENCRYPTION_ENABLED                             CONFIG_SECURE_FLASH_ENC_ENABLED
-CONFIG_FLASH_ENCRYPTION_INSECURE                            CONFIG_SECURE_FLASH_ENCRYPTION_MODE_DEVELOPMENT
-CONFIG_FLASH_ENCRYPTION_UART_BOOTLOADER_ALLOW_ENCRYPT       CONFIG_SECURE_FLASH_UART_BOOTLOADER_ALLOW_ENC
-CONFIG_FLASH_ENCRYPTION_UART_BOOTLOADER_ALLOW_DECRYPT       CONFIG_SECURE_FLASH_UART_BOOTLOADER_ALLOW_DEC
-CONFIG_FLASH_ENCRYPTION_UART_BOOTLOADER_ALLOW_CACHE         CONFIG_SECURE_FLASH_UART_BOOTLOADER_ALLOW_CACHE

+ 0 - 2
components/bootloader/subproject/.gitignore

@@ -1,2 +0,0 @@
-build
-sdkconfig

+ 0 - 128
components/bootloader/subproject/CMakeLists.txt

@@ -1,128 +0,0 @@
-cmake_minimum_required(VERSION 3.5)
-
-if(NOT SDKCONFIG)
-    message(FATAL_ERROR "Bootloader subproject expects the SDKCONFIG variable to be passed "
-        "in by the parent build process.")
-endif()
-
-if(NOT IDF_PATH)
-    message(FATAL_ERROR "Bootloader subproject expects the IDF_PATH variable to be passed "
-        "in by the parent build process.")
-endif()
-
-if(NOT IDF_TARGET)
-    message(FATAL_ERROR "Bootloader subproject expects the IDF_TARGET variable to be passed "
-        "in by the parent build process.")
-endif()
-
-set(COMPONENTS bootloader esptool_py partition_table soc bootloader_support log spi_flash micro-ecc main efuse)
-set(BOOTLOADER_BUILD 1)
-include("${IDF_PATH}/tools/cmake/project.cmake")
-set(common_req log esp_rom esp_common xtensa)
-if(LEGACY_INCLUDE_COMMON_HEADERS)
-    list(APPEND common_req soc)
-endif()
-idf_build_set_property(__COMPONENT_REQUIRES_COMMON "${common_req}")
-idf_build_set_property(__OUTPUT_SDKCONFIG 0)
-project(bootloader)
-
-idf_build_set_property(COMPILE_DEFINITIONS "-DBOOTLOADER_BUILD=1" APPEND)
-idf_build_set_property(COMPILE_OPTIONS "-fno-stack-protector" APPEND)
-
-string(REPLACE ";" " " espsecurepy "${ESPSECUREPY}")
-string(REPLACE ";" " " espefusepy "${ESPEFUSEPY}")
-set(esptoolpy_write_flash "${ESPTOOLPY_WRITE_FLASH_STR}")
-
-if(CONFIG_SECURE_BOOTLOADER_REFLASHABLE)
-    if(CONFIG_SECURE_BOOTLOADER_KEY_ENCODING_192BIT)
-        set(key_digest_len 192)
-    else()
-        set(key_digest_len 256)
-    endif()
-
-    get_filename_component(bootloader_digest_bin
-        "bootloader-reflash-digest.bin"
-        ABSOLUTE BASE_DIR "${CMAKE_BINARY_DIR}")
-
-    get_filename_component(secure_bootloader_key
-        "secure-bootloader-key-${key_digest_len}.bin"
-        ABSOLUTE BASE_DIR "${CMAKE_BINARY_DIR}")
-
-    add_custom_command(OUTPUT "${secure_bootloader_key}"
-        COMMAND ${ESPSECUREPY} digest_private_key
-            --keylen "${key_digest_len}"
-            --keyfile "${SECURE_BOOT_SIGNING_KEY}"
-            "${secure_bootloader_key}"
-        VERBATIM)
-
-    if(CONFIG_SECURE_BOOT_BUILD_SIGNED_BINARIES)
-        add_custom_target(gen_secure_bootloader_key ALL DEPENDS "${secure_bootloader_key}")
-    else()
-        if(NOT EXISTS "${secure_bootloader_key}")
-            message(FATAL_ERROR
-                "No pre-generated key for a reflashable secure bootloader is available, "
-                "due to signing configuration."
-                "\nTo generate one, you can use this command:"
-                "\n\t${espsecurepy} generate_flash_encryption_key ${secure_bootloader_key}"
-                "\nIf a signing key is present, then instead use:"
-                "\n\t${espsecurepy} digest_private_key "
-                "--keylen (192/256) --keyfile KEYFILE "
-                "${secure_bootloader_key}")
-        endif()
-        add_custom_target(gen_secure_bootloader_key)
-    endif()
-
-    add_custom_command(OUTPUT "${bootloader_digest_bin}"
-        COMMAND ${CMAKE_COMMAND} -E echo "DIGEST ${bootloader_digest_bin}"
-        COMMAND ${ESPSECUREPY} digest_secure_bootloader --keyfile "${secure_bootloader_key}"
-            -o "${bootloader_digest_bin}" "${CMAKE_BINARY_DIR}/bootloader.bin"
-        DEPENDS gen_secure_bootloader_key gen_project_binary
-        VERBATIM)
-
-    add_custom_target (gen_bootloader_digest_bin ALL DEPENDS "${bootloader_digest_bin}")
-endif()
-
-if(CONFIG_SECURE_BOOTLOADER_ONE_TIME_FLASH)
-    add_custom_command(TARGET bootloader.elf POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "=============================================================================="
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "Bootloader built. Secure boot enabled, so bootloader not flashed automatically."
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "One-time flash command is:"
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "\t${esptoolpy_write_flash} ${BOOTLOADER_OFFSET} ${CMAKE_BINARY_DIR}/bootloader.bin"
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "* IMPORTANT: After first boot, BOOTLOADER CANNOT BE RE-FLASHED on same device"
-        VERBATIM)
-elseif(CONFIG_SECURE_BOOTLOADER_REFLASHABLE)
-    add_custom_command(TARGET bootloader.elf POST_BUILD
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "=============================================================================="
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "Bootloader built and secure digest generated."
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "Secure boot enabled, so bootloader not flashed automatically."
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "Burn secure boot key to efuse using:"
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "\t${espefusepy} burn_key secure_boot ${secure_bootloader_key}"
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "First time flash command is:"
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "\t${esptoolpy_write_flash} ${BOOTLOADER_OFFSET} ${CMAKE_BINARY_DIR}/bootloader.bin"
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "=============================================================================="
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "To reflash the bootloader after initial flash:"
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "\t${esptoolpy_write_flash} 0x0 ${bootloader_digest_bin}"
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "=============================================================================="
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "* After first boot, only re-flashes of this kind (with same key) will be accepted."
-        COMMAND ${CMAKE_COMMAND} -E echo
-            "* Not recommended to re-use the same secure boot keyfile on multiple production devices."
-        DEPENDS gen_secure_bootloader_key gen_bootloader_digest_bin
-        VERBATIM)
-endif()

+ 0 - 35
components/bootloader/subproject/Makefile

@@ -1,35 +0,0 @@
-#
-# This is a project Makefile. It is assumed the directory this Makefile resides in is a
-# project subdirectory.
-#
-ifeq ("$(MAKELEVEL)","0")
-$(error Bootloader makefile expects to be run as part of 'make bootloader' from a top-level project.)
-endif
-
-PROJECT_NAME := bootloader
-
-COMPONENTS := esptool_py bootloader_support log spi_flash micro-ecc soc main efuse
-
-# Clear C and CXX from top level project
-CFLAGS =
-CXXFLAGS =
-
-#We cannot include the idf_target, esp_rom, esp_common component directly but we need their includes.
-CFLAGS += -I $(IDF_PATH)/components/$(IDF_TARGET)/include
-CFLAGS += -I $(IDF_PATH)/components/esp_rom/include
-CFLAGS += -I $(IDF_PATH)/components/esp_common/include
-CFLAGS += -I $(IDF_PATH)/components/xtensa/include -I $(IDF_PATH)/components/xtensa/$(IDF_TARGET)/include
-
-# The bootloader pseudo-component is also included in this build, for its Kconfig.projbuild to be included.
-#
-# IS_BOOTLOADER_BUILD tells the component Makefile.projbuild to be a no-op
-IS_BOOTLOADER_BUILD := 1
-export IS_BOOTLOADER_BUILD
-
-# BOOTLOADER_BUILD macro is the same, for source file changes
-CFLAGS += -D BOOTLOADER_BUILD=1
-
-# include the top-level "project" include directory, for sdkconfig.h
-CFLAGS += -I$(BUILD_DIR_BASE)/../include
-
-include $(IDF_PATH)/make/project.mk

+ 0 - 3
components/bootloader/subproject/components/micro-ecc/CMakeLists.txt

@@ -1,3 +0,0 @@
-# only compile the "micro-ecc/uECC.c" source file
-idf_component_register(SRCS "micro-ecc/uECC.c"
-                    INCLUDE_DIRS micro-ecc)

+ 0 - 8
components/bootloader/subproject/components/micro-ecc/component.mk

@@ -1,8 +0,0 @@
-# only compile the micro-ecc/uECC.c source file
-# (SRCDIRS is needed so build system can find the source file)
-COMPONENT_SRCDIRS := micro-ecc
-COMPONENT_OBJS := micro-ecc/uECC.o
-
-COMPONENT_ADD_INCLUDEDIRS := micro-ecc
-
-COMPONENT_SUBMODULES := micro-ecc

+ 0 - 8
components/bootloader/subproject/components/micro-ecc/micro-ecc/.gitignore

@@ -1,8 +0,0 @@
-__build__/
-__pycache__
-*.pyc
-*.pyo
-*.pyd
-*.pyz
-*.egg-info/
-.DS_Store

+ 0 - 21
components/bootloader/subproject/components/micro-ecc/micro-ecc/LICENSE.txt

@@ -1,21 +0,0 @@
-Copyright (c) 2014, Kenneth MacKay
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright notice, this
-   list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright notice,
-   this list of conditions and the following disclaimer in the documentation
-   and/or other materials provided with the distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ 0 - 41
components/bootloader/subproject/components/micro-ecc/micro-ecc/README.md

@@ -1,41 +0,0 @@
-micro-ecc
-==========
-
-A small and fast ECDH and ECDSA implementation for 8-bit, 32-bit, and 64-bit processors.
-
-The static version of micro-ecc (ie, where the curve was selected at compile-time) can be found in the "static" branch.
-
-Features
---------
-
- * Resistant to known side-channel attacks.
- * Written in C, with optional GCC inline assembly for AVR, ARM and Thumb platforms.
- * Supports 8, 32, and 64-bit architectures.
- * Small code size.
- * No dynamic memory allocation.
- * Support for 5 standard curves: secp160r1, secp192r1, secp224r1, secp256r1, and secp256k1.
- * BSD 2-clause license.
-
-Usage Notes
------------
-### Point Representation ###
-Compressed points are represented in the standard format as defined in http://www.secg.org/collateral/sec1_final.pdf; uncompressed points are represented in standard format, but without the `0x04` prefix. All functions except `uECC_compress()` only accept uncompressed points; use `uECC_compress()` and `uECC_decompress()` to convert between compressed and uncompressed point representations.
-
-Private keys are represented in the standard format.
-
-### Using the Code ###
-
-I recommend just copying (or symlink) the uECC files into your project. Then just `#include "uECC.h"` to use the micro-ecc functions.
-
-For use with Arduino, you can use the Library Manager to download micro-ecc (**Sketch**=>**Include Library**=>**Manage Libraries**). You can then use uECC just like any other Arduino library (uECC should show up in the **Sketch**=>**Import Library** submenu).
-
-See uECC.h for documentation for each function.
-
-### Compilation Notes ###
-
- * Should compile with any C/C++ compiler that supports stdint.h (this includes Visual Studio 2013).
- * If you want to change the defaults for any of the uECC compile-time options (such as `uECC_OPTIMIZATION_LEVEL`), you must change them in your Makefile or similar so that uECC.c is compiled with the desired values (ie, compile uECC.c with `-DuECC_OPTIMIZATION_LEVEL=3` or whatever).
- * When compiling for a Thumb-1 platform, you must use the `-fomit-frame-pointer` GCC option (this is enabled by default when compiling with `-O1` or higher).
- * When compiling for an ARM/Thumb-2 platform with `uECC_OPTIMIZATION_LEVEL` >= 3, you must use the `-fomit-frame-pointer` GCC option (this is enabled by default when compiling with `-O1` or higher).
- * When compiling for AVR, you must have optimizations enabled (compile with `-O1` or higher).
- * When building for Windows, you will need to link in the `advapi32.lib` system library.

+ 0 - 820
components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_arm.inc

@@ -1,820 +0,0 @@
-/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#ifndef _UECC_ASM_ARM_H_
-#define _UECC_ASM_ARM_H_
-
-#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
-    #define uECC_MIN_WORDS 8
-#endif
-#if uECC_SUPPORTS_secp224r1
-    #undef uECC_MIN_WORDS
-    #define uECC_MIN_WORDS 7
-#endif
-#if uECC_SUPPORTS_secp192r1
-    #undef uECC_MIN_WORDS
-    #define uECC_MIN_WORDS 6
-#endif
-#if uECC_SUPPORTS_secp160r1
-    #undef uECC_MIN_WORDS
-    #define uECC_MIN_WORDS 5
-#endif
-
-#if (uECC_PLATFORM == uECC_arm_thumb)
-    #define REG_RW "+l"
-    #define REG_WRITE "=l"
-#else
-    #define REG_RW "+r"
-    #define REG_WRITE "=r"
-#endif
-
-#if (uECC_PLATFORM == uECC_arm_thumb || uECC_PLATFORM == uECC_arm_thumb2)
-    #define REG_RW_LO "+l"
-    #define REG_WRITE_LO "=l"
-#else
-    #define REG_RW_LO "+r"
-    #define REG_WRITE_LO "=r"
-#endif
-
-#if (uECC_PLATFORM == uECC_arm_thumb2)
-    #define RESUME_SYNTAX
-#else
-    #define RESUME_SYNTAX ".syntax divided \n\t"
-#endif
-
-#if (uECC_OPTIMIZATION_LEVEL >= 2)
-
-uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
-                                      const uECC_word_t *left,
-                                      const uECC_word_t *right,
-                                      wordcount_t num_words) {
-#if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-  #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2)
-    uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1;
-  #else /* ARM */
-    uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4;
-  #endif
-#endif
-    uint32_t carry;
-    uint32_t left_word;
-    uint32_t right_word;
-    
-    __asm__ volatile (
-        ".syntax unified \n\t"
-        "movs %[carry], #0 \n\t"
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-        "adr %[left], 1f \n\t"
-        ".align 4 \n\t"
-        "adds %[jump], %[left] \n\t"
-    #endif
-        
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "adds %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-        
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-        "bx %[jump] \n\t"
-    #endif
-        "1: \n\t"
-        REPEAT(DEC(uECC_MAX_WORDS),
-            "ldmia %[lptr]!, {%[left]} \n\t"
-            "ldmia %[rptr]!, {%[right]} \n\t"
-            "adcs %[left], %[right] \n\t"
-            "stmia %[dptr]!, {%[left]} \n\t")
-        
-        "adcs %[carry], %[carry] \n\t"
-        RESUME_SYNTAX
-        : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right),
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-          [jump] REG_RW_LO (jump),
-    #endif
-          [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word),
-          [right] REG_WRITE_LO (right_word)
-        :
-        : "cc", "memory"
-    );
-    return carry;
-}
-#define asm_add 1
-
-uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
-                                      const uECC_word_t *left,
-                                      const uECC_word_t *right,
-                                      wordcount_t num_words) {
-#if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-  #if (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2)
-    uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 2 + 1;
-  #else /* ARM */
-    uint32_t jump = (uECC_MAX_WORDS - num_words) * 4 * 4;
-  #endif
-#endif
-    uint32_t carry;
-    uint32_t left_word;
-    uint32_t right_word;
-    
-    __asm__ volatile (
-        ".syntax unified \n\t"
-        "movs %[carry], #0 \n\t"
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-        "adr %[left], 1f \n\t"
-        ".align 4 \n\t"
-        "adds %[jump], %[left] \n\t"
-    #endif
-        
-        "ldmia %[lptr]!, {%[left]} \n\t"
-        "ldmia %[rptr]!, {%[right]} \n\t"
-        "subs %[left], %[right] \n\t"
-        "stmia %[dptr]!, {%[left]} \n\t"
-        
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-        "bx %[jump] \n\t"
-    #endif
-        "1: \n\t"
-        REPEAT(DEC(uECC_MAX_WORDS),
-            "ldmia %[lptr]!, {%[left]} \n\t"
-            "ldmia %[rptr]!, {%[right]} \n\t"
-            "sbcs %[left], %[right] \n\t"
-            "stmia %[dptr]!, {%[left]} \n\t")
-        
-        "adcs %[carry], %[carry] \n\t"
-        RESUME_SYNTAX
-        : [dptr] REG_RW_LO (result), [lptr] REG_RW_LO (left), [rptr] REG_RW_LO (right),
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-          [jump] REG_RW_LO (jump),
-    #endif
-          [carry] REG_WRITE_LO (carry), [left] REG_WRITE_LO (left_word),
-          [right] REG_WRITE_LO (right_word)
-        :
-        : "cc", "memory"
-    );
-    return !carry; /* Note that on ARM, carry flag set means "no borrow" when subtracting
-                      (for some reason...) */
-}
-#define asm_sub 1
-
-#endif /* (uECC_OPTIMIZATION_LEVEL >= 2) */
-
-#if (uECC_OPTIMIZATION_LEVEL >= 3)
-
-#if (uECC_PLATFORM != uECC_arm_thumb)
-
-#if uECC_ARM_USE_UMAAL
-    #include "asm_arm_mult_square_umaal.inc"
-#else
-    #include "asm_arm_mult_square.inc"
-#endif
-
-#if (uECC_OPTIMIZATION_LEVEL == 3)
-
-uECC_VLI_API void uECC_vli_mult(uint32_t *result,
-                                const uint32_t *left,
-                                const uint32_t *right,
-                                wordcount_t num_words) {
-    register uint32_t *r0 __asm__("r0") = result;
-    register const uint32_t *r1 __asm__("r1") = left;
-    register const uint32_t *r2 __asm__("r2") = right;
-    register uint32_t r3 __asm__("r3") = num_words;
-    
-    __asm__ volatile (
-        ".syntax unified \n\t"
-#if (uECC_MIN_WORDS == 5)
-        FAST_MULT_ASM_5
-    #if (uECC_MAX_WORDS > 5)
-        FAST_MULT_ASM_5_TO_6
-    #endif
-    #if (uECC_MAX_WORDS > 6)
-        FAST_MULT_ASM_6_TO_7
-    #endif
-    #if (uECC_MAX_WORDS > 7)
-        FAST_MULT_ASM_7_TO_8
-    #endif
-#elif (uECC_MIN_WORDS == 6)
-        FAST_MULT_ASM_6
-    #if (uECC_MAX_WORDS > 6)
-        FAST_MULT_ASM_6_TO_7
-    #endif
-    #if (uECC_MAX_WORDS > 7)
-        FAST_MULT_ASM_7_TO_8
-    #endif
-#elif (uECC_MIN_WORDS == 7)
-        FAST_MULT_ASM_7
-    #if (uECC_MAX_WORDS > 7)
-        FAST_MULT_ASM_7_TO_8
-    #endif
-#elif (uECC_MIN_WORDS == 8)
-        FAST_MULT_ASM_8
-#endif
-        "1: \n\t"
-        RESUME_SYNTAX
-        : "+r" (r0), "+r" (r1), "+r" (r2)
-        : "r" (r3)
-        : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
-    );
-}
-#define asm_mult 1
-
-#if uECC_SQUARE_FUNC
-uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
-                                  const uECC_word_t *left,
-                                  wordcount_t num_words) {
-    register uint32_t *r0 __asm__("r0") = result;
-    register const uint32_t *r1 __asm__("r1") = left;
-    register uint32_t r2 __asm__("r2") = num_words;
-    
-    __asm__ volatile (
-        ".syntax unified \n\t"
-#if (uECC_MIN_WORDS == 5)
-        FAST_SQUARE_ASM_5
-    #if (uECC_MAX_WORDS > 5)
-        FAST_SQUARE_ASM_5_TO_6
-    #endif
-    #if (uECC_MAX_WORDS > 6)
-        FAST_SQUARE_ASM_6_TO_7
-    #endif
-    #if (uECC_MAX_WORDS > 7)
-        FAST_SQUARE_ASM_7_TO_8
-    #endif
-#elif (uECC_MIN_WORDS == 6)
-        FAST_SQUARE_ASM_6
-    #if (uECC_MAX_WORDS > 6)
-        FAST_SQUARE_ASM_6_TO_7
-    #endif
-    #if (uECC_MAX_WORDS > 7)
-        FAST_SQUARE_ASM_7_TO_8
-    #endif
-#elif (uECC_MIN_WORDS == 7)
-        FAST_SQUARE_ASM_7
-    #if (uECC_MAX_WORDS > 7)
-        FAST_SQUARE_ASM_7_TO_8
-    #endif
-#elif (uECC_MIN_WORDS == 8)
-        FAST_SQUARE_ASM_8
-#endif
-
-        "1: \n\t"
-        RESUME_SYNTAX
-        : "+r" (r0), "+r" (r1)
-        : "r" (r2)
-        : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
-    );
-}
-#define asm_square 1
-#endif /* uECC_SQUARE_FUNC */
-
-#else /* (uECC_OPTIMIZATION_LEVEL > 3) */
-
-uECC_VLI_API void uECC_vli_mult(uint32_t *result,
-                                const uint32_t *left,
-                                const uint32_t *right,
-                                wordcount_t num_words) {
-    register uint32_t *r0 __asm__("r0") = result;
-    register const uint32_t *r1 __asm__("r1") = left;
-    register const uint32_t *r2 __asm__("r2") = right;
-    register uint32_t r3 __asm__("r3") = num_words;
-    
-#if uECC_SUPPORTS_secp160r1
-    if (num_words == 5) {
-        __asm__ volatile (
-            ".syntax unified \n\t"
-            FAST_MULT_ASM_5
-            RESUME_SYNTAX
-            : "+r" (r0), "+r" (r1), "+r" (r2)
-            : "r" (r3)
-            : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
-        );
-        return;
-    }
-#endif
-#if uECC_SUPPORTS_secp192r1
-    if (num_words == 6) {
-        __asm__ volatile (
-            ".syntax unified \n\t"
-            FAST_MULT_ASM_6
-            RESUME_SYNTAX
-            : "+r" (r0), "+r" (r1), "+r" (r2)
-            : "r" (r3)
-            : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
-        );
-        return;
-    }
-#endif
-#if uECC_SUPPORTS_secp224r1
-    if (num_words == 7) {
-        __asm__ volatile (
-            ".syntax unified \n\t"
-            FAST_MULT_ASM_7
-            RESUME_SYNTAX
-            : "+r" (r0), "+r" (r1), "+r" (r2)
-            : "r" (r3)
-            : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
-        );
-        return;
-    }
-#endif
-#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
-    if (num_words == 8) {
-        __asm__ volatile (
-            ".syntax unified \n\t"
-            FAST_MULT_ASM_8
-            RESUME_SYNTAX
-            : "+r" (r0), "+r" (r1), "+r" (r2)
-            : "r" (r3)
-            : "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
-        );
-        return;
-    }
-#endif
-}
-#define asm_mult 1
-
-#if uECC_SQUARE_FUNC
-uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
-                                  const uECC_word_t *left,
-                                  wordcount_t num_words) {
-    register uint32_t *r0 __asm__("r0") = result;
-    register const uint32_t *r1 __asm__("r1") = left;
-    register uint32_t r2 __asm__("r2") = num_words;
-    
-#if uECC_SUPPORTS_secp160r1
-    if (num_words == 5) {
-        __asm__ volatile (
-            ".syntax unified \n\t"
-            FAST_SQUARE_ASM_5
-            RESUME_SYNTAX
-            : "+r" (r0), "+r" (r1)
-            : "r" (r2)
-            : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
-        );
-        return;
-    }
-#endif
-#if uECC_SUPPORTS_secp192r1
-    if (num_words == 6) {
-        __asm__ volatile (
-            ".syntax unified \n\t"
-            FAST_SQUARE_ASM_6
-            RESUME_SYNTAX
-            : "+r" (r0), "+r" (r1)
-            : "r" (r2)
-            : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
-        );
-        return;
-    }
-#endif
-#if uECC_SUPPORTS_secp224r1
-    if (num_words == 7) {
-        __asm__ volatile (
-            ".syntax unified \n\t"
-            FAST_SQUARE_ASM_7
-            RESUME_SYNTAX
-            : "+r" (r0), "+r" (r1)
-            : "r" (r2)
-            : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
-        );
-        return;
-    }
-#endif
-#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
-    if (num_words == 8) {
-        __asm__ volatile (
-            ".syntax unified \n\t"
-            FAST_SQUARE_ASM_8
-            RESUME_SYNTAX
-            : "+r" (r0), "+r" (r1)
-            : "r" (r2)
-            : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
-        );
-        return;
-    }
-#endif
-}
-#define asm_square 1
-#endif /* uECC_SQUARE_FUNC */
-
-#endif /* (uECC_OPTIMIZATION_LEVEL > 3) */
-
-#endif /* uECC_PLATFORM != uECC_arm_thumb */
-
-#endif /* (uECC_OPTIMIZATION_LEVEL >= 3) */
-
-/* ---- "Small" implementations ---- */
-
-#if !asm_add
-uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
-                                      const uECC_word_t *left,
-                                      const uECC_word_t *right,
-                                      wordcount_t num_words) {
-    uint32_t carry = 0;
-    uint32_t left_word;
-    uint32_t right_word;
-    
-    __asm__ volatile (
-        ".syntax unified \n\t"
-        "1: \n\t"
-        "ldmia %[lptr]!, {%[left]} \n\t"  /* Load left word. */
-        "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
-        "lsrs %[carry], #1 \n\t"          /* Set up carry flag (carry = 0 after this). */
-        "adcs %[left], %[left], %[right] \n\t"   /* Add with carry. */
-        "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */
-        "stmia %[dptr]!, {%[left]} \n\t"  /* Store result word. */
-        "subs %[ctr], #1 \n\t"            /* Decrement counter. */
-        "bne 1b \n\t"                     /* Loop until counter == 0. */
-        RESUME_SYNTAX
-        : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right),
-          [ctr] REG_RW (num_words), [carry] REG_RW (carry),
-          [left] REG_WRITE (left_word), [right] REG_WRITE (right_word)
-        :
-        : "cc", "memory"
-    );
-    return carry;
-}
-#define asm_add 1
-#endif
-
-#if !asm_sub
-uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
-                                      const uECC_word_t *left,
-                                      const uECC_word_t *right,
-                                      wordcount_t num_words) {
-    uint32_t carry = 1; /* carry = 1 initially (means don't borrow) */
-    uint32_t left_word;
-    uint32_t right_word;
-    
-    __asm__ volatile (
-        ".syntax unified \n\t"
-        "1: \n\t"
-        "ldmia %[lptr]!, {%[left]} \n\t"  /* Load left word. */
-        "ldmia %[rptr]!, {%[right]} \n\t" /* Load right word. */
-        "lsrs %[carry], #1 \n\t"          /* Set up carry flag (carry = 0 after this). */
-        "sbcs %[left], %[left], %[right] \n\t"   /* Subtract with borrow. */
-        "adcs %[carry], %[carry], %[carry] \n\t" /* Store carry bit. */
-        "stmia %[dptr]!, {%[left]} \n\t"  /* Store result word. */
-        "subs %[ctr], #1 \n\t"            /* Decrement counter. */
-        "bne 1b \n\t"                     /* Loop until counter == 0. */
-        RESUME_SYNTAX
-        : [dptr] REG_RW (result), [lptr] REG_RW (left), [rptr] REG_RW (right),
-          [ctr] REG_RW (num_words), [carry] REG_RW (carry),
-          [left] REG_WRITE (left_word), [right] REG_WRITE (right_word)
-        :
-        : "cc", "memory"
-    );
-    return !carry;
-}
-#define asm_sub 1
-#endif
-
-#if !asm_mult
-uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
-                                const uECC_word_t *left,
-                                const uECC_word_t *right,
-                                wordcount_t num_words) {
-#if (uECC_PLATFORM != uECC_arm_thumb)
-    uint32_t c0 = 0;
-    uint32_t c1 = 0;
-    uint32_t c2 = 0;
-    uint32_t k = 0;
-    uint32_t i;
-    uint32_t t0, t1;
-    
-    __asm__ volatile (
-        ".syntax unified \n\t"
-        
-        "1: \n\t" /* outer loop (k < num_words) */
-        "movs %[i], #0 \n\t" /* i = 0 */
-        "b 3f \n\t"
-        
-        "2: \n\t" /* outer loop (k >= num_words) */
-        "movs %[i], %[k] \n\t"         /* i = k */
-        "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */
-        
-        "3: \n\t" /* inner loop */
-        "subs %[t0], %[k], %[i] \n\t" /* t0 = k-i */
-        
-        "ldr %[t1], [%[right], %[t0]] \n\t" /* t1 = right[k - i] */
-        "ldr %[t0], [%[left], %[i]] \n\t"   /* t0 = left[i] */
-        
-        "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */
-        
-        "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */
-        "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */
-        "adcs %[c2], %[c2], #0 \n\t"    /* add carry to c2 */
-
-        "adds %[i], #4 \n\t"          /* i += 4 */
-        "cmp %[i], %[last_word] \n\t" /* i > (num_words - 1) (times 4)? */
-        "bgt 4f \n\t"                 /*   if so, exit the loop */
-        "cmp %[i], %[k] \n\t"         /* i <= k? */
-        "ble 3b \n\t"                 /*   if so, continue looping */
-        
-        "4: \n\t" /* end inner loop */
-        
-        "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */
-        "mov %[c0], %[c1] \n\t"       /* c0 = c1 */
-        "mov %[c1], %[c2] \n\t"       /* c1 = c2 */
-        "movs %[c2], #0 \n\t"         /* c2 = 0 */
-        "adds %[k], #4 \n\t"          /* k += 4 */
-        "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */
-        "ble 1b \n\t"                 /*   if so, loop back, start with i = 0 */
-        "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */
-        "ble 2b \n\t"                 /*   if so, loop back, start with i = (k + 1) - num_words */
-        /* end outer loop */
-        
-        "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */
-        RESUME_SYNTAX
-        : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2),
-          [k] "+r" (k), [i] "=&r" (i), [t0] "=&r" (t0), [t1] "=&r" (t1)
-        : [result] "r" (result), [left] "r" (left), [right] "r" (right),
-          [last_word] "r" ((num_words - 1) * 4)
-        : "cc", "memory"
-    );
-    
-#else /* Thumb-1 */
-    uint32_t r4, r5, r6, r7;
-
-    __asm__ volatile (
-        ".syntax unified \n\t"
-        "subs %[r3], #1 \n\t" /* r3 = num_words - 1 */
-        "lsls %[r3], #2 \n\t" /* r3 = (num_words - 1) * 4 */
-        "mov r8, %[r3] \n\t"  /* r8 = (num_words - 1) * 4 */
-        "lsls %[r3], #1 \n\t" /* r3 = (num_words - 1) * 8 */
-        "mov r9, %[r3] \n\t"  /* r9 = (num_words - 1) * 8 */
-        "movs %[r3], #0 \n\t" /* c0 = 0 */
-        "movs %[r4], #0 \n\t" /* c1 = 0 */
-        "movs %[r5], #0 \n\t" /* c2 = 0 */
-        "movs %[r6], #0 \n\t" /* k = 0 */
-        
-        "push {%[r0]} \n\t" /* keep result on the stack */
-        
-        "1: \n\t" /* outer loop (k < num_words) */
-        "movs %[r7], #0 \n\t" /* r7 = i = 0 */
-        "b 3f \n\t"
-        
-        "2: \n\t" /* outer loop (k >= num_words) */
-        "movs %[r7], %[r6] \n\t" /* r7 = k */
-        "mov %[r0], r8 \n\t"     /* r0 = (num_words - 1) * 4 */
-        "subs %[r7], %[r0] \n\t" /* r7 = i = k - (num_words - 1) (times 4) */
-        
-        "3: \n\t" /* inner loop */
-        "mov r10, %[r3] \n\t"
-        "mov r11, %[r4] \n\t"
-        "mov r12, %[r5] \n\t"
-        "mov r14, %[r6] \n\t"
-        "subs %[r0], %[r6], %[r7] \n\t"          /* r0 = k - i */
-        
-        "ldr %[r4], [%[r2], %[r0]] \n\t" /* r4 = right[k - i] */
-        "ldr %[r0], [%[r1], %[r7]] \n\t" /* r0 = left[i] */
-        
-        "lsrs %[r3], %[r0], #16 \n\t" /* r3 = a1 */
-        "uxth %[r0], %[r0] \n\t"      /* r0 = a0 */
-        
-        "lsrs %[r5], %[r4], #16 \n\t" /* r5 = b1 */
-        "uxth %[r4], %[r4] \n\t"      /* r4 = b0 */
-        
-        "movs %[r6], %[r3] \n\t"        /* r6 = a1 */
-        "muls %[r6], %[r5], %[r6] \n\t" /* r6 = a1 * b1 */
-        "muls %[r3], %[r4], %[r3] \n\t" /* r3 = b0 * a1 */
-        "muls %[r5], %[r0], %[r5] \n\t" /* r5 = a0 * b1 */
-        "muls %[r0], %[r4], %[r0] \n\t" /* r0 = a0 * b0 */
-        
-        /* Add middle terms */
-        "lsls %[r4], %[r3], #16 \n\t"
-        "lsrs %[r3], %[r3], #16 \n\t"
-        "adds %[r0], %[r4] \n\t"
-        "adcs %[r6], %[r3] \n\t"
-        
-        "lsls %[r4], %[r5], #16 \n\t"
-        "lsrs %[r5], %[r5], #16 \n\t"
-        "adds %[r0], %[r4] \n\t"
-        "adcs %[r6], %[r5] \n\t"
-        
-        "mov %[r3], r10\n\t"
-        "mov %[r4], r11\n\t"
-        "mov %[r5], r12\n\t"
-        "adds %[r3], %[r0] \n\t"         /* add low word to c0 */
-        "adcs %[r4], %[r6] \n\t"         /* add high word to c1, including carry */
-        "movs %[r0], #0 \n\t"            /* r0 = 0 (does not affect carry bit) */
-        "adcs %[r5], %[r0] \n\t"         /* add carry to c2 */
-        
-        "mov %[r6], r14\n\t" /* r6 = k */
-
-        "adds %[r7], #4 \n\t"   /* i += 4 */
-        "cmp %[r7], r8 \n\t"    /* i > (num_words - 1) (times 4)? */
-        "bgt 4f \n\t"           /*   if so, exit the loop */
-        "cmp %[r7], %[r6] \n\t" /* i <= k? */
-        "ble 3b \n\t"           /*   if so, continue looping */
-        
-        "4: \n\t" /* end inner loop */
-        
-        "ldr %[r0], [sp, #0] \n\t" /* r0 = result */
-        
-        "str %[r3], [%[r0], %[r6]] \n\t" /* result[k] = c0 */
-        "mov %[r3], %[r4] \n\t"          /* c0 = c1 */
-        "mov %[r4], %[r5] \n\t"          /* c1 = c2 */
-        "movs %[r5], #0 \n\t"            /* c2 = 0 */
-        "adds %[r6], #4 \n\t"            /* k += 4 */
-        "cmp %[r6], r8 \n\t"             /* k <= (num_words - 1) (times 4) ? */
-        "ble 1b \n\t"                    /*   if so, loop back, start with i = 0 */
-        "cmp %[r6], r9 \n\t"             /* k <= (num_words * 2 - 2) (times 4) ? */
-        "ble 2b \n\t"                    /*   if so, loop back, with i = (k + 1) - num_words */
-        /* end outer loop */
-        
-        "str %[r3], [%[r0], %[r6]] \n\t" /* result[num_words * 2 - 1] = c0 */
-        "pop {%[r0]} \n\t"               /* pop result off the stack */
-        
-        ".syntax divided \n\t"
-        : [r3] "+l" (num_words), [r4] "=&l" (r4),
-          [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7)
-        : [r0] "l" (result), [r1] "l" (left), [r2] "l" (right)
-        : "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
-    );
-#endif
-}
-#define asm_mult 1
-#endif
-
-#if uECC_SQUARE_FUNC
-#if !asm_square
-uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
-                                  const uECC_word_t *left,
-                                  wordcount_t num_words) {
-#if (uECC_PLATFORM != uECC_arm_thumb)
-    uint32_t c0 = 0;
-    uint32_t c1 = 0;
-    uint32_t c2 = 0;
-    uint32_t k = 0;
-    uint32_t i, tt;
-    uint32_t t0, t1;
-    
-    __asm__ volatile (
-        ".syntax unified \n\t"
-        
-        "1: \n\t" /* outer loop (k < num_words) */
-        "movs %[i], #0 \n\t" /* i = 0 */
-        "b 3f \n\t"
-        
-        "2: \n\t" /* outer loop (k >= num_words) */
-        "movs %[i], %[k] \n\t"         /* i = k */
-        "subs %[i], %[last_word] \n\t" /* i = k - (num_words - 1) (times 4) */
-        
-        "3: \n\t" /* inner loop */
-        "subs %[tt], %[k], %[i] \n\t" /* tt = k-i */
-        
-        "ldr %[t1], [%[left], %[tt]] \n\t" /* t1 = left[k - i] */
-        "ldr %[t0], [%[left], %[i]] \n\t"  /* t0 = left[i] */
-        
-        "umull %[t0], %[t1], %[t0], %[t1] \n\t" /* (t0, t1) = left[i] * right[k - i] */
-        
-        "cmp %[i], %[tt] \n\t"      /* (i < k - i) ? */
-        "bge 4f \n\t"               /*   if i >= k - i, skip */
-        "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */
-        "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */
-        "adcs %[c2], %[c2], #0 \n\t"    /* add carry to c2 */
-        
-        "4: \n\t"
-        "adds %[c0], %[c0], %[t0] \n\t" /* add low word to c0 */
-        "adcs %[c1], %[c1], %[t1] \n\t" /* add high word to c1, including carry */
-        "adcs %[c2], %[c2], #0 \n\t"    /* add carry to c2 */
-        
-        "adds %[i], #4 \n\t"          /* i += 4 */
-        "cmp %[i], %[k] \n\t"         /* i >= k? */
-        "bge 5f \n\t"                 /*   if so, exit the loop */
-        "subs %[tt], %[k], %[i] \n\t" /* tt = k - i */
-        "cmp %[i], %[tt] \n\t"        /* i <= k - i? */
-        "ble 3b \n\t"                 /*   if so, continue looping */
-        
-        "5: \n\t" /* end inner loop */
-        
-        "str %[c0], [%[result], %[k]] \n\t" /* result[k] = c0 */
-        "mov %[c0], %[c1] \n\t"       /* c0 = c1 */
-        "mov %[c1], %[c2] \n\t"       /* c1 = c2 */
-        "movs %[c2], #0 \n\t"         /* c2 = 0 */
-        "adds %[k], #4 \n\t"          /* k += 4 */
-        "cmp %[k], %[last_word] \n\t" /* k <= (num_words - 1) (times 4) ? */
-        "ble 1b \n\t"                 /*   if so, loop back, start with i = 0 */
-        "cmp %[k], %[last_word], lsl #1 \n\t" /* k <= (num_words * 2 - 2) (times 4) ? */
-        "ble 2b \n\t"                 /*   if so, loop back, start with i = (k + 1) - num_words */
-        /* end outer loop */
-        
-        "str %[c0], [%[result], %[k]] \n\t" /* result[num_words * 2 - 1] = c0 */
-        RESUME_SYNTAX
-        : [c0] "+r" (c0), [c1] "+r" (c1), [c2] "+r" (c2),
-          [k] "+r" (k), [i] "=&r" (i), [tt] "=&r" (tt), [t0] "=&r" (t0), [t1] "=&r" (t1)
-        : [result] "r" (result), [left] "r" (left), [last_word] "r" ((num_words - 1) * 4)
-        : "cc", "memory"
-    );
-    
-#else
-    uint32_t r3, r4, r5, r6, r7;
-
-    __asm__ volatile (
-        ".syntax unified \n\t"
-        "subs %[r2], #1 \n\t" /* r2 = num_words - 1 */
-        "lsls %[r2], #2 \n\t" /* r2 = (num_words - 1) * 4 */
-        "mov r8, %[r2] \n\t"  /* r8 = (num_words - 1) * 4 */
-        "lsls %[r2], #1 \n\t" /* r2 = (num_words - 1) * 8 */
-        "mov r9, %[r2] \n\t"  /* r9 = (num_words - 1) * 8 */
-        "movs %[r2], #0 \n\t" /* c0 = 0 */
-        "movs %[r3], #0 \n\t" /* c1 = 0 */
-        "movs %[r4], #0 \n\t" /* c2 = 0 */
-        "movs %[r5], #0 \n\t" /* k = 0 */
-        
-        "push {%[r0]} \n\t" /* keep result on the stack */
-        
-        "1: \n\t" /* outer loop (k < num_words) */
-        "movs %[r6], #0 \n\t" /* r6 = i = 0 */
-        "b 3f \n\t"
-        
-        "2: \n\t" /* outer loop (k >= num_words) */
-        "movs %[r6], %[r5] \n\t" /* r6 = k */
-        "mov %[r0], r8 \n\t"     /* r0 = (num_words - 1) * 4 */
-        "subs %[r6], %[r0] \n\t" /* r6 = i = k - (num_words - 1) (times 4) */
-        
-        "3: \n\t" /* inner loop */
-        "mov r10, %[r2] \n\t"
-        "mov r11, %[r3] \n\t"
-        "mov r12, %[r4] \n\t"
-        "mov r14, %[r5] \n\t"
-        "subs %[r7], %[r5], %[r6] \n\t"  /* r7 = k - i */
-        
-        "ldr %[r3], [%[r1], %[r7]] \n\t" /* r3 = left[k - i] */
-        "ldr %[r0], [%[r1], %[r6]] \n\t" /* r0 = left[i] */
-        
-        "lsrs %[r2], %[r0], #16 \n\t" /* r2 = a1 */
-        "uxth %[r0], %[r0] \n\t"      /* r0 = a0 */
-        
-        "lsrs %[r4], %[r3], #16 \n\t" /* r4 = b1 */
-        "uxth %[r3], %[r3] \n\t"      /* r3 = b0 */
-        
-        "movs %[r5], %[r2] \n\t"        /* r5 = a1 */
-        "muls %[r5], %[r4], %[r5] \n\t" /* r5 = a1 * b1 */
-        "muls %[r2], %[r3], %[r2] \n\t" /* r2 = b0 * a1 */
-        "muls %[r4], %[r0], %[r4] \n\t" /* r4 = a0 * b1 */
-        "muls %[r0], %[r3], %[r0] \n\t" /* r0 = a0 * b0 */
-        
-        /* Add middle terms */
-        "lsls %[r3], %[r2], #16 \n\t"
-        "lsrs %[r2], %[r2], #16 \n\t"
-        "adds %[r0], %[r3] \n\t"
-        "adcs %[r5], %[r2] \n\t"
-        
-        "lsls %[r3], %[r4], #16 \n\t"
-        "lsrs %[r4], %[r4], #16 \n\t"
-        "adds %[r0], %[r3] \n\t"
-        "adcs %[r5], %[r4] \n\t"
-        
-        /* Add to acc, doubling if necessary */
-        "mov %[r2], r10\n\t"
-        "mov %[r3], r11\n\t"
-        "mov %[r4], r12\n\t"
-        
-        "cmp %[r6], %[r7] \n\t"    /* (i < k - i) ? */
-        "bge 4f \n\t"            /*   if i >= k - i, skip */
-        "movs %[r7], #0 \n\t"    /* r7 = 0 */
-        "adds %[r2], %[r0] \n\t" /* add low word to c0 */
-        "adcs %[r3], %[r5] \n\t" /* add high word to c1, including carry */
-        "adcs %[r4], %[r7] \n\t" /* add carry to c2 */
-        "4: \n\t"
-        "movs %[r7], #0 \n\t"    /* r7 = 0 */
-        "adds %[r2], %[r0] \n\t" /* add low word to c0 */
-        "adcs %[r3], %[r5] \n\t" /* add high word to c1, including carry */
-        "adcs %[r4], %[r7] \n\t" /* add carry to c2 */
-        
-        "mov %[r5], r14\n\t" /* r5 = k */
-        
-        "adds %[r6], #4 \n\t"           /* i += 4 */
-        "cmp %[r6], %[r5] \n\t"         /* i >= k? */
-        "bge 5f \n\t"                   /*   if so, exit the loop */
-        "subs %[r7], %[r5], %[r6] \n\t" /* r7 = k - i */
-        "cmp %[r6], %[r7] \n\t"         /* i <= k - i? */
-        "ble 3b \n\t"                   /*   if so, continue looping */
-        
-        "5: \n\t" /* end inner loop */
-        
-        "ldr %[r0], [sp, #0] \n\t" /* r0 = result */
-        
-        "str %[r2], [%[r0], %[r5]] \n\t" /* result[k] = c0 */
-        "mov %[r2], %[r3] \n\t"          /* c0 = c1 */
-        "mov %[r3], %[r4] \n\t"          /* c1 = c2 */
-        "movs %[r4], #0 \n\t"            /* c2 = 0 */
-        "adds %[r5], #4 \n\t"            /* k += 4 */
-        "cmp %[r5], r8 \n\t"             /* k <= (num_words - 1) (times 4) ? */
-        "ble 1b \n\t"                    /*   if so, loop back, start with i = 0 */
-        "cmp %[r5], r9 \n\t"             /* k <= (num_words * 2 - 2) (times 4) ? */
-        "ble 2b \n\t"                    /*   if so, loop back, with i = (k + 1) - num_words */
-        /* end outer loop */
-        
-        "str %[r2], [%[r0], %[r5]] \n\t" /* result[num_words * 2 - 1] = c0 */
-        "pop {%[r0]} \n\t"               /* pop result off the stack */
-
-        ".syntax divided \n\t"
-        : [r2] "+l" (num_words), [r3] "=&l" (r3), [r4] "=&l" (r4),
-          [r5] "=&l" (r5), [r6] "=&l" (r6), [r7] "=&l" (r7)
-        : [r0] "l" (result), [r1] "l" (left)
-        : "r8", "r9", "r10", "r11", "r12", "r14", "cc", "memory"
-    );
-#endif
-}
-#define asm_square 1
-#endif
-#endif /* uECC_SQUARE_FUNC */
-
-#endif /* _UECC_ASM_ARM_H_ */

+ 0 - 2311
components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_arm_mult_square.inc

@@ -1,2311 +0,0 @@
-/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#ifndef _UECC_ASM_ARM_MULT_SQUARE_H_
-#define _UECC_ASM_ARM_MULT_SQUARE_H_
-
-#define FAST_MULT_ASM_5                \
-    "push {r3} \n\t"                   \
-    "add r0, 12 \n\t"                  \
-    "add r2, 12 \n\t"                  \
-    "ldmia r1!, {r3,r4} \n\t"          \
-    "ldmia r2!, {r6,r7} \n\t"          \
-                                       \
-    "umull r11, r12, r3, r6 \n\t"      \
-    "stmia r0!, {r11} \n\t"            \
-                                       \
-    "mov r10, #0 \n\t"                 \
-    "umull r11, r9, r3, r7 \n\t"       \
-    "adds r12, r12, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "umull r11, r14, r4, r6 \n\t"      \
-    "adds r12, r12, r11 \n\t"          \
-    "adcs r9, r9, r14 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "stmia r0!, {r12} \n\t"            \
-                                       \
-    "umull r12, r14, r4, r7 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adc r10, r10, r14 \n\t"           \
-    "stmia r0!, {r9, r10} \n\t"        \
-                                       \
-    "sub r0, 28 \n\t"                  \
-    "sub r2, 20 \n\t"                  \
-    "ldmia r2!, {r6,r7,r8} \n\t"       \
-    "ldmia r1!, {r5} \n\t"             \
-                                       \
-    "umull r11, r12, r3, r6 \n\t"      \
-    "stmia r0!, {r11} \n\t"            \
-                                       \
-    "mov r10, #0 \n\t"                 \
-    "umull r11, r9, r3, r7 \n\t"       \
-    "adds r12, r12, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "umull r11, r14, r4, r6 \n\t"      \
-    "adds r12, r12, r11 \n\t"          \
-    "adcs r9, r9, r14 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "stmia r0!, {r12} \n\t"            \
-                                       \
-    "mov r11, #0 \n\t"                 \
-    "umull r12, r14, r3, r8 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "umull r12, r14, r4, r7 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "umull r12, r14, r5, r6 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "stmia r0!, {r9} \n\t"             \
-                                       \
-    "ldmia r1!, {r3} \n\t"             \
-    "mov r12, #0 \n\t"                 \
-    "umull r14, r9, r4, r8 \n\t"       \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, r9 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "umull r14, r9, r5, r7 \n\t"       \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, r9 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "umull r14, r9, r3, r6 \n\t"       \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, r9 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "ldr r14, [r0] \n\t"               \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, #0 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "stmia r0!, {r10} \n\t"            \
-                                       \
-    "ldmia r1!, {r4} \n\t"             \
-    "mov r14, #0 \n\t"                 \
-    "umull r9, r10, r5, r8 \n\t"       \
-    "adds r11, r11, r9 \n\t"           \
-    "adcs r12, r12, r10 \n\t"          \
-    "adc r14, r14, #0 \n\t"            \
-    "umull r9, r10, r3, r7 \n\t"       \
-    "adds r11, r11, r9 \n\t"           \
-    "adcs r12, r12, r10 \n\t"          \
-    "adc r14, r14, #0 \n\t"            \
-    "umull r9, r10, r4, r6 \n\t"       \
-    "adds r11, r11, r9 \n\t"           \
-    "adcs r12, r12, r10 \n\t"          \
-    "adc r14, r14, #0 \n\t"            \
-    "ldr r9, [r0] \n\t"                \
-    "adds r11, r11, r9 \n\t"           \
-    "adcs r12, r12, #0 \n\t"           \
-    "adc r14, r14, #0 \n\t"            \
-    "stmia r0!, {r11} \n\t"            \
-                                       \
-    "ldmia r2!, {r6} \n\t"             \
-    "mov r9, #0 \n\t"                  \
-    "umull r10, r11, r5, r6 \n\t"      \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "umull r10, r11, r3, r8 \n\t"      \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "umull r10, r11, r4, r7 \n\t"      \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "ldr r10, [r0] \n\t"               \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, #0 \n\t"           \
-    "adc r9, r9, #0 \n\t"              \
-    "stmia r0!, {r12} \n\t"            \
-                                       \
-    "ldmia r2!, {r7} \n\t"             \
-    "mov r10, #0 \n\t"                 \
-    "umull r11, r12, r5, r7 \n\t"      \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, r12 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "umull r11, r12, r3, r6 \n\t"      \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, r12 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "umull r11, r12, r4, r8 \n\t"      \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, r12 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "ldr r11, [r0] \n\t"               \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, #0 \n\t"             \
-    "adc r10, r10, #0 \n\t"            \
-    "stmia r0!, {r14} \n\t"            \
-                                       \
-    "mov r11, #0 \n\t"                 \
-    "umull r12, r14, r3, r7 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "umull r12, r14, r4, r6 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "stmia r0!, {r9} \n\t"             \
-                                       \
-    "umull r14, r9, r4, r7 \n\t"       \
-    "adds r10, r10, r14 \n\t"          \
-    "adc r11, r11, r9 \n\t"            \
-    "stmia r0!, {r10, r11} \n\t"       \
-    "pop {r3} \n\t"
-
-#define FAST_MULT_ASM_5_TO_6                 \
-    "cmp r3, #5 \n\t"                        \
-    "beq 1f \n\t"                            \
-                                             \
-    /* r4 = left high, r5 = right high */    \
-    "ldr r4, [r1] \n\t"                      \
-    "ldr r5, [r2] \n\t"                      \
-                                             \
-    "sub r0, #20 \n\t"                       \
-    "sub r1, #20 \n\t"                       \
-    "sub r2, #20 \n\t"                       \
-                                             \
-    "ldr r6, [r0] \n\t"                      \
-    "ldr r7, [r1], #4 \n\t"                  \
-    "ldr r8, [r2], #4 \n\t"                  \
-    "mov r14, #0 \n\t"                       \
-    "umull r9, r10, r4, r8 \n\t"             \
-    "umull r11, r12, r5, r7 \n\t"            \
-    "adds r9, r9, r6 \n\t"                   \
-    "adc r10, r10, #0 \n\t"                  \
-    "adds r9, r9, r11 \n\t"                  \
-    "adcs r10, r10, r12 \n\t"                \
-    "adc r14, r14, #0 \n\t"                  \
-    "str r9, [r0], #4 \n\t"                  \
-                                             \
-    "ldr r6, [r0] \n\t"                      \
-    "adds r10, r10, r6 \n\t"                 \
-    "adcs r14, r14, #0 \n\t"                 \
-    "ldr r7, [r1], #4 \n\t"                  \
-    "ldr r8, [r2], #4 \n\t"                  \
-    "mov r9, #0 \n\t"                        \
-    "umull r11, r12, r4, r8 \n\t"            \
-    "adds r10, r10, r11 \n\t"                \
-    "adcs r14, r14, r12 \n\t"                \
-    "adc r9, r9, #0 \n\t"                    \
-    "umull r11, r12, r5, r7 \n\t"            \
-    "adds r10, r10, r11 \n\t"                \
-    "adcs r14, r14, r12 \n\t"                \
-    "adc r9, r9, #0 \n\t"                    \
-    "str r10, [r0], #4 \n\t"                 \
-                                             \
-    "ldr r6, [r0] \n\t"                      \
-    "adds r14, r14, r6 \n\t"                 \
-    "adcs r9, r9, #0 \n\t"                   \
-    "ldr r7, [r1], #4 \n\t"                  \
-    "ldr r8, [r2], #4 \n\t"                  \
-    "mov r10, #0 \n\t"                       \
-    "umull r11, r12, r4, r8 \n\t"            \
-    "adds r14, r14, r11 \n\t"                \
-    "adcs r9, r9, r12 \n\t"                  \
-    "adc r10, r10, #0 \n\t"                  \
-    "umull r11, r12, r5, r7 \n\t"            \
-    "adds r14, r14, r11 \n\t"                \
-    "adcs r9, r9, r12 \n\t"                  \
-    "adc r10, r10, #0 \n\t"                  \
-    "str r14, [r0], #4 \n\t"                 \
-                                             \
-    "ldr r6, [r0] \n\t"                      \
-    "adds r9, r9, r6 \n\t"                   \
-    "adcs r10, r10, #0 \n\t"                 \
-    "ldr r7, [r1], #4 \n\t"                  \
-    "ldr r8, [r2], #4 \n\t"                  \
-    "mov r14, #0 \n\t"                       \
-    "umull r11, r12, r4, r8 \n\t"            \
-    "adds r9, r9, r11 \n\t"                  \
-    "adcs r10, r10, r12 \n\t"                \
-    "adc r14, r14, #0 \n\t"                  \
-    "umull r11, r12, r5, r7 \n\t"            \
-    "adds r9, r9, r11 \n\t"                  \
-    "adcs r10, r10, r12 \n\t"                \
-    "adc r14, r14, #0 \n\t"                  \
-    "str r9, [r0], #4 \n\t"                  \
-                                             \
-    "ldr r6, [r0] \n\t"                      \
-    "adds r10, r10, r6 \n\t"                 \
-    "adcs r14, r14, #0 \n\t"                 \
-    /* skip past already-loaded (r4, r5) */  \
-    "ldr r7, [r1], #8 \n\t"                  \
-    "ldr r8, [r2], #8 \n\t"                  \
-    "mov r9, #0 \n\t"                        \
-    "umull r11, r12, r4, r8 \n\t"            \
-    "adds r10, r10, r11 \n\t"                \
-    "adcs r14, r14, r12 \n\t"                \
-    "adc r9, r9, #0 \n\t"                    \
-    "umull r11, r12, r5, r7 \n\t"            \
-    "adds r10, r10, r11 \n\t"                \
-    "adcs r14, r14, r12 \n\t"                \
-    "adc r9, r9, #0 \n\t"                    \
-    "str r10, [r0], #4 \n\t"                 \
-                                             \
-    "umull r11, r12, r4, r5 \n\t"            \
-    "adds r11, r11, r14 \n\t"                \
-    "adc r12, r12, r9 \n\t"                  \
-    "stmia r0!, {r11, r12} \n\t"
-
-#define FAST_MULT_ASM_6             \
-    "push {r3} \n\t"                \
-    "add r0, 12 \n\t"               \
-    "add r2, 12 \n\t"               \
-    "ldmia r1!, {r3,r4,r5} \n\t"    \
-    "ldmia r2!, {r6,r7,r8} \n\t"    \
-                                    \
-    "umull r11, r12, r3, r6 \n\t"   \
-    "stmia r0!, {r11} \n\t"         \
-                                    \
-    "mov r10, #0 \n\t"              \
-    "umull r11, r9, r3, r7 \n\t"    \
-    "adds r12, r12, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "umull r11, r14, r4, r6 \n\t"   \
-    "adds r12, r12, r11 \n\t"       \
-    "adcs r9, r9, r14 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "stmia r0!, {r12} \n\t"         \
-                                    \
-    "mov r11, #0 \n\t"              \
-    "umull r12, r14, r3, r8 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r4, r7 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r5, r6 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "stmia r0!, {r9} \n\t"          \
-                                    \
-    "mov r12, #0 \n\t"              \
-    "umull r14, r9, r4, r8 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "umull r14, r9, r5, r7 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "stmia r0!, {r10} \n\t"         \
-                                    \
-    "umull r9, r10, r5, r8 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adc r12, r12, r10 \n\t"        \
-    "stmia r0!, {r11, r12} \n\t"    \
-                                    \
-    "sub r0, 36 \n\t"               \
-    "sub r2, 24 \n\t"               \
-    "ldmia r2!, {r6,r7,r8} \n\t"    \
-                                    \
-    "umull r11, r12, r3, r6 \n\t"   \
-    "stmia r0!, {r11} \n\t"         \
-                                    \
-    "mov r10, #0 \n\t"              \
-    "umull r11, r9, r3, r7 \n\t"    \
-    "adds r12, r12, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "umull r11, r14, r4, r6 \n\t"   \
-    "adds r12, r12, r11 \n\t"       \
-    "adcs r9, r9, r14 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "stmia r0!, {r12} \n\t"         \
-                                    \
-    "mov r11, #0 \n\t"              \
-    "umull r12, r14, r3, r8 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r4, r7 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r5, r6 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "stmia r0!, {r9} \n\t"          \
-                                    \
-    "ldmia r1!, {r3} \n\t"          \
-    "mov r12, #0 \n\t"              \
-    "umull r14, r9, r4, r8 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "umull r14, r9, r5, r7 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "umull r14, r9, r3, r6 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "ldr r14, [r0] \n\t"            \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, #0 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "stmia r0!, {r10} \n\t"         \
-                                    \
-    "ldmia r1!, {r4} \n\t"          \
-    "mov r14, #0 \n\t"              \
-    "umull r9, r10, r5, r8 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "umull r9, r10, r3, r7 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "umull r9, r10, r4, r6 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "ldr r9, [r0] \n\t"             \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, #0 \n\t"        \
-    "adc r14, r14, #0 \n\t"         \
-    "stmia r0!, {r11} \n\t"         \
-                                    \
-    "ldmia r1!, {r5} \n\t"          \
-    "mov r9, #0 \n\t"               \
-    "umull r10, r11, r3, r8 \n\t"   \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "umull r10, r11, r4, r7 \n\t"   \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "umull r10, r11, r5, r6 \n\t"   \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "ldr r10, [r0] \n\t"            \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, #0 \n\t"        \
-    "adc r9, r9, #0 \n\t"           \
-    "stmia r0!, {r12} \n\t"         \
-                                    \
-    "ldmia r2!, {r6} \n\t"          \
-    "mov r10, #0 \n\t"              \
-    "umull r11, r12, r3, r6 \n\t"   \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, r12 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "umull r11, r12, r4, r8 \n\t"   \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, r12 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "umull r11, r12, r5, r7 \n\t"   \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, r12 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "ldr r11, [r0] \n\t"            \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, #0 \n\t"          \
-    "adc r10, r10, #0 \n\t"         \
-    "stmia r0!, {r14} \n\t"         \
-                                    \
-    "ldmia r2!, {r7} \n\t"          \
-    "mov r11, #0 \n\t"              \
-    "umull r12, r14, r3, r7 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r4, r6 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r5, r8 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "ldr r12, [r0] \n\t"            \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, #0 \n\t"        \
-    "adc r11, r11, #0 \n\t"         \
-    "stmia r0!, {r9} \n\t"          \
-                                    \
-    "ldmia r2!, {r8} \n\t"          \
-    "mov r12, #0 \n\t"              \
-    "umull r14, r9, r3, r8 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "umull r14, r9, r4, r7 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "umull r14, r9, r5, r6 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "ldr r14, [r0] \n\t"            \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, #0 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "stmia r0!, {r10} \n\t"         \
-                                    \
-    "mov r14, #0 \n\t"              \
-    "umull r9, r10, r4, r8 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "umull r9, r10, r5, r7 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "stmia r0!, {r11} \n\t"         \
-                                    \
-    "umull r10, r11, r5, r8 \n\t"   \
-    "adds r12, r12, r10 \n\t"       \
-    "adc r14, r14, r11 \n\t"        \
-    "stmia r0!, {r12, r14} \n\t"    \
-    "pop {r3} \n\t"
-
-#define FAST_MULT_ASM_6_TO_7                    \
-    "cmp r3, #6 \n\t"                           \
-    "beq 1f \n\t"                               \
-                                                \
-    /* r4 = left high, r5 = right high */       \
-    "ldr r4, [r1] \n\t"                         \
-    "ldr r5, [r2] \n\t"                         \
-                                                \
-    "sub r0, #24 \n\t"                          \
-    "sub r1, #24 \n\t"                          \
-    "sub r2, #24 \n\t"                          \
-                                                \
-    "ldr r6, [r0] \n\t"                         \
-    "ldr r7, [r1], #4 \n\t"                     \
-    "ldr r8, [r2], #4 \n\t"                     \
-    "mov r14, #0 \n\t"                          \
-    "umull r9, r10, r4, r8 \n\t"                \
-    "umull r11, r12, r5, r7 \n\t"               \
-    "adds r9, r9, r6 \n\t"                      \
-    "adc r10, r10, #0 \n\t"                     \
-    "adds r9, r9, r11 \n\t"                     \
-    "adcs r10, r10, r12 \n\t"                   \
-    "adc r14, r14, #0 \n\t"                     \
-    "str r9, [r0], #4 \n\t"                     \
-                                                \
-    "ldr r6, [r0] \n\t"                         \
-    "adds r10, r10, r6 \n\t"                    \
-    "adcs r14, r14, #0 \n\t"                    \
-    "ldr r7, [r1], #4 \n\t"                     \
-    "ldr r8, [r2], #4 \n\t"                     \
-    "mov r9, #0 \n\t"                           \
-    "umull r11, r12, r4, r8 \n\t"               \
-    "adds r10, r10, r11 \n\t"                   \
-    "adcs r14, r14, r12 \n\t"                   \
-    "adc r9, r9, #0 \n\t"                       \
-    "umull r11, r12, r5, r7 \n\t"               \
-    "adds r10, r10, r11 \n\t"                   \
-    "adcs r14, r14, r12 \n\t"                   \
-    "adc r9, r9, #0 \n\t"                       \
-    "str r10, [r0], #4 \n\t"                    \
-                                                \
-    "ldr r6, [r0] \n\t"                         \
-    "adds r14, r14, r6 \n\t"                    \
-    "adcs r9, r9, #0 \n\t"                      \
-    "ldr r7, [r1], #4 \n\t"                     \
-    "ldr r8, [r2], #4 \n\t"                     \
-    "mov r10, #0 \n\t"                          \
-    "umull r11, r12, r4, r8 \n\t"               \
-    "adds r14, r14, r11 \n\t"                   \
-    "adcs r9, r9, r12 \n\t"                     \
-    "adc r10, r10, #0 \n\t"                     \
-    "umull r11, r12, r5, r7 \n\t"               \
-    "adds r14, r14, r11 \n\t"                   \
-    "adcs r9, r9, r12 \n\t"                     \
-    "adc r10, r10, #0 \n\t"                     \
-    "str r14, [r0], #4 \n\t"                    \
-                                                \
-    "ldr r6, [r0] \n\t"                         \
-    "adds r9, r9, r6 \n\t"                      \
-    "adcs r10, r10, #0 \n\t"                    \
-    "ldr r7, [r1], #4 \n\t"                     \
-    "ldr r8, [r2], #4 \n\t"                     \
-    "mov r14, #0 \n\t"                          \
-    "umull r11, r12, r4, r8 \n\t"               \
-    "adds r9, r9, r11 \n\t"                     \
-    "adcs r10, r10, r12 \n\t"                   \
-    "adc r14, r14, #0 \n\t"                     \
-    "umull r11, r12, r5, r7 \n\t"               \
-    "adds r9, r9, r11 \n\t"                     \
-    "adcs r10, r10, r12 \n\t"                   \
-    "adc r14, r14, #0 \n\t"                     \
-    "str r9, [r0], #4 \n\t"                     \
-                                                \
-    "ldr r6, [r0] \n\t"                         \
-    "adds r10, r10, r6 \n\t"                    \
-    "adcs r14, r14, #0 \n\t"                    \
-    "ldr r7, [r1], #4 \n\t"                     \
-    "ldr r8, [r2], #4 \n\t"                     \
-    "mov r9, #0 \n\t"                           \
-    "umull r11, r12, r4, r8 \n\t"               \
-    "adds r10, r10, r11 \n\t"                   \
-    "adcs r14, r14, r12 \n\t"                   \
-    "adc r9, r9, #0 \n\t"                       \
-    "umull r11, r12, r5, r7 \n\t"               \
-    "adds r10, r10, r11 \n\t"                   \
-    "adcs r14, r14, r12 \n\t"                   \
-    "adc r9, r9, #0 \n\t"                       \
-    "str r10, [r0], #4 \n\t"                    \
-                                                \
-    "ldr r6, [r0] \n\t"                         \
-    "adds r14, r14, r6 \n\t"                    \
-    "adcs r9, r9, #0 \n\t"                      \
-    /* skip past already-loaded (r4, r5) */     \
-    "ldr r7, [r1], #8 \n\t"                     \
-    "ldr r8, [r2], #8 \n\t"                     \
-    "mov r10, #0 \n\t"                          \
-    "umull r11, r12, r4, r8 \n\t"               \
-    "adds r14, r14, r11 \n\t"                   \
-    "adcs r9, r9, r12 \n\t"                     \
-    "adc r10, r10, #0 \n\t"                     \
-    "umull r11, r12, r5, r7 \n\t"               \
-    "adds r14, r14, r11 \n\t"                   \
-    "adcs r9, r9, r12 \n\t"                     \
-    "adc r10, r10, #0 \n\t"                     \
-    "str r14, [r0], #4 \n\t"                    \
-                                                \
-    "umull r11, r12, r4, r5 \n\t"               \
-    "adds r11, r11, r9 \n\t"                    \
-    "adc r12, r12, r10 \n\t"                    \
-    "stmia r0!, {r11, r12} \n\t"
-
-#define FAST_MULT_ASM_7                \
-    "push {r3} \n\t"                   \
-    "add r0, 24 \n\t"                  \
-    "add r2, 24 \n\t"                  \
-    "ldmia r1!, {r3} \n\t"             \
-    "ldmia r2!, {r6} \n\t"             \
-                                       \
-    "umull r9, r10, r3, r6 \n\t"       \
-    "stmia r0!, {r9, r10} \n\t"        \
-                                       \
-    "sub r0, 20 \n\t"                  \
-    "sub r2, 16 \n\t"                  \
-    "ldmia r2!, {r6, r7, r8} \n\t"     \
-    "ldmia r1!, {r4, r5} \n\t"         \
-                                       \
-    "umull r9, r10, r3, r6 \n\t"       \
-    "stmia r0!, {r9} \n\t"             \
-                                       \
-    "mov r14, #0 \n\t"                 \
-    "umull r9, r12, r3, r7 \n\t"       \
-    "adds r10, r10, r9 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "umull r9, r11, r4, r6 \n\t"       \
-    "adds r10, r10, r9 \n\t"           \
-    "adcs r12, r12, r11 \n\t"          \
-    "adc r14, r14, #0 \n\t"            \
-    "stmia r0!, {r10} \n\t"            \
-                                       \
-    "mov r9, #0 \n\t"                  \
-    "umull r10, r11, r3, r8 \n\t"      \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "umull r10, r11, r4, r7 \n\t"      \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "umull r10, r11, r5, r6 \n\t"      \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "stmia r0!, {r12} \n\t"            \
-                                       \
-    "ldmia r1!, {r3} \n\t"             \
-    "mov r10, #0 \n\t"                 \
-    "umull r11, r12, r4, r8 \n\t"      \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, r12 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "umull r11, r12, r5, r7 \n\t"      \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, r12 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "umull r11, r12, r3, r6 \n\t"      \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, r12 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "ldr r11, [r0] \n\t"               \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, #0 \n\t"             \
-    "adc r10, r10, #0 \n\t"            \
-    "stmia r0!, {r14} \n\t"            \
-                                       \
-    "ldmia r2!, {r6} \n\t"             \
-    "mov r11, #0 \n\t"                 \
-    "umull r12, r14, r4, r6 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "umull r12, r14, r5, r8 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "umull r12, r14, r3, r7 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "ldr r12, [r0] \n\t"               \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, #0 \n\t"           \
-    "adc r11, r11, #0 \n\t"            \
-    "stmia r0!, {r9} \n\t"             \
-                                       \
-    "mov r12, #0 \n\t"                 \
-    "umull r14, r9, r5, r6 \n\t"       \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, r9 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "umull r14, r9, r3, r8 \n\t"       \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, r9 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "stmia r0!, {r10} \n\t"            \
-                                       \
-    "umull r9, r10, r3, r6 \n\t"       \
-    "adds r11, r11, r9 \n\t"           \
-    "adc r12, r12, r10 \n\t"           \
-    "stmia r0!, {r11, r12} \n\t"       \
-                                       \
-    "sub r0, 44 \n\t"                  \
-    "sub r1, 16 \n\t"                  \
-    "sub r2, 28 \n\t"                  \
-    "ldmia r1!, {r3,r4,r5} \n\t"       \
-    "ldmia r2!, {r6,r7,r8} \n\t"       \
-                                       \
-    "umull r9, r10, r3, r6 \n\t"       \
-    "stmia r0!, {r9} \n\t"             \
-                                       \
-    "mov r14, #0 \n\t"                 \
-    "umull r9, r12, r3, r7 \n\t"       \
-    "adds r10, r10, r9 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "umull r9, r11, r4, r6 \n\t"       \
-    "adds r10, r10, r9 \n\t"           \
-    "adcs r12, r12, r11 \n\t"          \
-    "adc r14, r14, #0 \n\t"            \
-    "stmia r0!, {r10} \n\t"            \
-                                       \
-    "mov r9, #0 \n\t"                  \
-    "umull r10, r11, r3, r8 \n\t"      \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "umull r10, r11, r4, r7 \n\t"      \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "umull r10, r11, r5, r6 \n\t"      \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "stmia r0!, {r12} \n\t"            \
-                                       \
-    "ldmia r1!, {r3} \n\t"             \
-    "mov r10, #0 \n\t"                 \
-    "umull r11, r12, r4, r8 \n\t"      \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, r12 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "umull r11, r12, r5, r7 \n\t"      \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, r12 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "umull r11, r12, r3, r6 \n\t"      \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, r12 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "ldr r11, [r0] \n\t"               \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, #0 \n\t"             \
-    "adc r10, r10, #0 \n\t"            \
-    "stmia r0!, {r14} \n\t"            \
-                                       \
-    "ldmia r1!, {r4} \n\t"             \
-    "mov r11, #0 \n\t"                 \
-    "umull r12, r14, r5, r8 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "umull r12, r14, r3, r7 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "umull r12, r14, r4, r6 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "ldr r12, [r0] \n\t"               \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, #0 \n\t"           \
-    "adc r11, r11, #0 \n\t"            \
-    "stmia r0!, {r9} \n\t"             \
-                                       \
-    "ldmia r1!, {r5} \n\t"             \
-    "mov r12, #0 \n\t"                 \
-    "umull r14, r9, r3, r8 \n\t"       \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, r9 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "umull r14, r9, r4, r7 \n\t"       \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, r9 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "umull r14, r9, r5, r6 \n\t"       \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, r9 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "ldr r14, [r0] \n\t"               \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, #0 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "stmia r0!, {r10} \n\t"            \
-                                       \
-    "ldmia r1!, {r3} \n\t"             \
-    "mov r14, #0 \n\t"                 \
-    "umull r9, r10, r4, r8 \n\t"       \
-    "adds r11, r11, r9 \n\t"           \
-    "adcs r12, r12, r10 \n\t"          \
-    "adc r14, r14, #0 \n\t"            \
-    "umull r9, r10, r5, r7 \n\t"       \
-    "adds r11, r11, r9 \n\t"           \
-    "adcs r12, r12, r10 \n\t"          \
-    "adc r14, r14, #0 \n\t"            \
-    "umull r9, r10, r3, r6 \n\t"       \
-    "adds r11, r11, r9 \n\t"           \
-    "adcs r12, r12, r10 \n\t"          \
-    "adc r14, r14, #0 \n\t"            \
-    "ldr r9, [r0] \n\t"                \
-    "adds r11, r11, r9 \n\t"           \
-    "adcs r12, r12, #0 \n\t"           \
-    "adc r14, r14, #0 \n\t"            \
-    "stmia r0!, {r11} \n\t"            \
-                                       \
-    "ldmia r2!, {r6} \n\t"             \
-    "mov r9, #0 \n\t"                  \
-    "umull r10, r11, r4, r6 \n\t"      \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "umull r10, r11, r5, r8 \n\t"      \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "umull r10, r11, r3, r7 \n\t"      \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, r11 \n\t"          \
-    "adc r9, r9, #0 \n\t"              \
-    "ldr r10, [r0] \n\t"               \
-    "adds r12, r12, r10 \n\t"          \
-    "adcs r14, r14, #0 \n\t"           \
-    "adc r9, r9, #0 \n\t"              \
-    "stmia r0!, {r12} \n\t"            \
-                                       \
-    "ldmia r2!, {r7} \n\t"             \
-    "mov r10, #0 \n\t"                 \
-    "umull r11, r12, r4, r7 \n\t"      \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, r12 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "umull r11, r12, r5, r6 \n\t"      \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, r12 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "umull r11, r12, r3, r8 \n\t"      \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, r12 \n\t"            \
-    "adc r10, r10, #0 \n\t"            \
-    "ldr r11, [r0] \n\t"               \
-    "adds r14, r14, r11 \n\t"          \
-    "adcs r9, r9, #0 \n\t"             \
-    "adc r10, r10, #0 \n\t"            \
-    "stmia r0!, {r14} \n\t"            \
-                                       \
-    "ldmia r2!, {r8} \n\t"             \
-    "mov r11, #0 \n\t"                 \
-    "umull r12, r14, r4, r8 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "umull r12, r14, r5, r7 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "umull r12, r14, r3, r6 \n\t"      \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, r14 \n\t"          \
-    "adc r11, r11, #0 \n\t"            \
-    "ldr r12, [r0] \n\t"               \
-    "adds r9, r9, r12 \n\t"            \
-    "adcs r10, r10, #0 \n\t"           \
-    "adc r11, r11, #0 \n\t"            \
-    "stmia r0!, {r9} \n\t"             \
-                                       \
-    "ldmia r2!, {r6} \n\t"             \
-    "mov r12, #0 \n\t"                 \
-    "umull r14, r9, r4, r6 \n\t"       \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, r9 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "umull r14, r9, r5, r8 \n\t"       \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, r9 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "umull r14, r9, r3, r7 \n\t"       \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, r9 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "ldr r14, [r0] \n\t"               \
-    "adds r10, r10, r14 \n\t"          \
-    "adcs r11, r11, #0 \n\t"           \
-    "adc r12, r12, #0 \n\t"            \
-    "stmia r0!, {r10} \n\t"            \
-                                       \
-    "mov r14, #0 \n\t"                 \
-    "umull r9, r10, r5, r6 \n\t"       \
-    "adds r11, r11, r9 \n\t"           \
-    "adcs r12, r12, r10 \n\t"          \
-    "adc r14, r14, #0 \n\t"            \
-    "umull r9, r10, r3, r8 \n\t"       \
-    "adds r11, r11, r9 \n\t"           \
-    "adcs r12, r12, r10 \n\t"          \
-    "adc r14, r14, #0 \n\t"            \
-    "stmia r0!, {r11} \n\t"            \
-                                       \
-    "umull r10, r11, r3, r6 \n\t"      \
-    "adds r12, r12, r10 \n\t"          \
-    "adc r14, r14, r11 \n\t"           \
-    "stmia r0!, {r12, r14} \n\t"       \
-    "pop {r3} \n\t"
-
-#define FAST_MULT_ASM_7_TO_8                 \
-    "cmp r3, #7 \n\t"                        \
-    "beq 1f \n\t"                            \
-                                             \
-    /* r4 = left high, r5 = right high */    \
-    "ldr r4, [r1] \n\t"                      \
-    "ldr r5, [r2] \n\t"                      \
-                                             \
-    "sub r0, #28 \n\t"                       \
-    "sub r1, #28 \n\t"                       \
-    "sub r2, #28 \n\t"                       \
-                                             \
-    "ldr r6, [r0] \n\t"                      \
-    "ldr r7, [r1], #4 \n\t"                  \
-    "ldr r8, [r2], #4 \n\t"                  \
-    "mov r14, #0 \n\t"                       \
-    "umull r9, r10, r4, r8 \n\t"             \
-    "umull r11, r12, r5, r7 \n\t"            \
-    "adds r9, r9, r6 \n\t"                   \
-    "adc r10, r10, #0 \n\t"                  \
-    "adds r9, r9, r11 \n\t"                  \
-    "adcs r10, r10, r12 \n\t"                \
-    "adc r14, r14, #0 \n\t"                  \
-    "str r9, [r0], #4 \n\t"                  \
-                                             \
-    "ldr r6, [r0] \n\t"                      \
-    "adds r10, r10, r6 \n\t"                 \
-    "adcs r14, r14, #0 \n\t"                 \
-    "ldr r7, [r1], #4 \n\t"                  \
-    "ldr r8, [r2], #4 \n\t"                  \
-    "mov r9, #0 \n\t"                        \
-    "umull r11, r12, r4, r8 \n\t"            \
-    "adds r10, r10, r11 \n\t"                \
-    "adcs r14, r14, r12 \n\t"                \
-    "adc r9, r9, #0 \n\t"                    \
-    "umull r11, r12, r5, r7 \n\t"            \
-    "adds r10, r10, r11 \n\t"                \
-    "adcs r14, r14, r12 \n\t"                \
-    "adc r9, r9, #0 \n\t"                    \
-    "str r10, [r0], #4 \n\t"                 \
-                                             \
-    "ldr r6, [r0] \n\t"                      \
-    "adds r14, r14, r6 \n\t"                 \
-    "adcs r9, r9, #0 \n\t"                   \
-    "ldr r7, [r1], #4 \n\t"                  \
-    "ldr r8, [r2], #4 \n\t"                  \
-    "mov r10, #0 \n\t"                       \
-    "umull r11, r12, r4, r8 \n\t"            \
-    "adds r14, r14, r11 \n\t"                \
-    "adcs r9, r9, r12 \n\t"                  \
-    "adc r10, r10, #0 \n\t"                  \
-    "umull r11, r12, r5, r7 \n\t"            \
-    "adds r14, r14, r11 \n\t"                \
-    "adcs r9, r9, r12 \n\t"                  \
-    "adc r10, r10, #0 \n\t"                  \
-    "str r14, [r0], #4 \n\t"                 \
-                                             \
-    "ldr r6, [r0] \n\t"                      \
-    "adds r9, r9, r6 \n\t"                   \
-    "adcs r10, r10, #0 \n\t"                 \
-    "ldr r7, [r1], #4 \n\t"                  \
-    "ldr r8, [r2], #4 \n\t"                  \
-    "mov r14, #0 \n\t"                       \
-    "umull r11, r12, r4, r8 \n\t"            \
-    "adds r9, r9, r11 \n\t"                  \
-    "adcs r10, r10, r12 \n\t"                \
-    "adc r14, r14, #0 \n\t"                  \
-    "umull r11, r12, r5, r7 \n\t"            \
-    "adds r9, r9, r11 \n\t"                  \
-    "adcs r10, r10, r12 \n\t"                \
-    "adc r14, r14, #0 \n\t"                  \
-    "str r9, [r0], #4 \n\t"                  \
-                                             \
-    "ldr r6, [r0] \n\t"                      \
-    "adds r10, r10, r6 \n\t"                 \
-    "adcs r14, r14, #0 \n\t"                 \
-    "ldr r7, [r1], #4 \n\t"                  \
-    "ldr r8, [r2], #4 \n\t"                  \
-    "mov r9, #0 \n\t"                        \
-    "umull r11, r12, r4, r8 \n\t"            \
-    "adds r10, r10, r11 \n\t"                \
-    "adcs r14, r14, r12 \n\t"                \
-    "adc r9, r9, #0 \n\t"                    \
-    "umull r11, r12, r5, r7 \n\t"            \
-    "adds r10, r10, r11 \n\t"                \
-    "adcs r14, r14, r12 \n\t"                \
-    "adc r9, r9, #0 \n\t"                    \
-    "str r10, [r0], #4 \n\t"                 \
-                                             \
-    "ldr r6, [r0] \n\t"                      \
-    "adds r14, r14, r6 \n\t"                 \
-    "adcs r9, r9, #0 \n\t"                   \
-    "ldr r7, [r1], #4 \n\t"                  \
-    "ldr r8, [r2], #4 \n\t"                  \
-    "mov r10, #0 \n\t"                       \
-    "umull r11, r12, r4, r8 \n\t"            \
-    "adds r14, r14, r11 \n\t"                \
-    "adcs r9, r9, r12 \n\t"                  \
-    "adc r10, r10, #0 \n\t"                  \
-    "umull r11, r12, r5, r7 \n\t"            \
-    "adds r14, r14, r11 \n\t"                \
-    "adcs r9, r9, r12 \n\t"                  \
-    "adc r10, r10, #0 \n\t"                  \
-    "str r14, [r0], #4 \n\t"                 \
-                                             \
-    "ldr r6, [r0] \n\t"                      \
-    "adds r9, r9, r6 \n\t"                   \
-    "adcs r10, r10, #0 \n\t"                 \
-    /* skip past already-loaded (r4, r5) */  \
-    "ldr r7, [r1], #8 \n\t"                  \
-    "ldr r8, [r2], #8 \n\t"                  \
-    "mov r14, #0 \n\t"                       \
-    "umull r11, r12, r4, r8 \n\t"            \
-    "adds r9, r9, r11 \n\t"                  \
-    "adcs r10, r10, r12 \n\t"                \
-    "adc r14, r14, #0 \n\t"                  \
-    "umull r11, r12, r5, r7 \n\t"            \
-    "adds r9, r9, r11 \n\t"                  \
-    "adcs r10, r10, r12 \n\t"                \
-    "adc r14, r14, #0 \n\t"                  \
-    "str r9, [r0], #4 \n\t"                  \
-                                             \
-    "umull r11, r12, r4, r5 \n\t"            \
-    "adds r11, r11, r10 \n\t"                \
-    "adc r12, r12, r14 \n\t"                 \
-    "stmia r0!, {r11, r12} \n\t"
-
-#define FAST_MULT_ASM_8             \
-    "push {r3} \n\t"                \
-    "add r0, 24 \n\t"               \
-    "add r2, 24 \n\t"               \
-    "ldmia r1!, {r3,r4} \n\t"       \
-    "ldmia r2!, {r6,r7} \n\t"       \
-                                    \
-    "umull r11, r12, r3, r6 \n\t"   \
-    "stmia r0!, {r11} \n\t"         \
-                                    \
-    "mov r10, #0 \n\t"              \
-    "umull r11, r9, r3, r7 \n\t"    \
-    "adds r12, r12, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "umull r11, r14, r4, r6 \n\t"   \
-    "adds r12, r12, r11 \n\t"       \
-    "adcs r9, r9, r14 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "stmia r0!, {r12} \n\t"         \
-                                    \
-    "umull r12, r14, r4, r7 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adc r10, r10, r14 \n\t"        \
-    "stmia r0!, {r9, r10} \n\t"     \
-                                    \
-    "sub r0, 28 \n\t"               \
-    "sub r2, 20 \n\t"               \
-    "ldmia r2!, {r6,r7,r8} \n\t"    \
-    "ldmia r1!, {r5} \n\t"          \
-                                    \
-    "umull r11, r12, r3, r6 \n\t"   \
-    "stmia r0!, {r11} \n\t"         \
-                                    \
-    "mov r10, #0 \n\t"              \
-    "umull r11, r9, r3, r7 \n\t"    \
-    "adds r12, r12, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "umull r11, r14, r4, r6 \n\t"   \
-    "adds r12, r12, r11 \n\t"       \
-    "adcs r9, r9, r14 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "stmia r0!, {r12} \n\t"         \
-                                    \
-    "mov r11, #0 \n\t"              \
-    "umull r12, r14, r3, r8 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r4, r7 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r5, r6 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "stmia r0!, {r9} \n\t"          \
-                                    \
-    "ldmia r1!, {r3} \n\t"          \
-    "mov r12, #0 \n\t"              \
-    "umull r14, r9, r4, r8 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "umull r14, r9, r5, r7 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "umull r14, r9, r3, r6 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "ldr r14, [r0] \n\t"            \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, #0 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "stmia r0!, {r10} \n\t"         \
-                                    \
-    "ldmia r1!, {r4} \n\t"          \
-    "mov r14, #0 \n\t"              \
-    "umull r9, r10, r5, r8 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "umull r9, r10, r3, r7 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "umull r9, r10, r4, r6 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "ldr r9, [r0] \n\t"             \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, #0 \n\t"        \
-    "adc r14, r14, #0 \n\t"         \
-    "stmia r0!, {r11} \n\t"         \
-                                    \
-    "ldmia r2!, {r6} \n\t"          \
-    "mov r9, #0 \n\t"               \
-    "umull r10, r11, r5, r6 \n\t"   \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "umull r10, r11, r3, r8 \n\t"   \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "umull r10, r11, r4, r7 \n\t"   \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "ldr r10, [r0] \n\t"            \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, #0 \n\t"        \
-    "adc r9, r9, #0 \n\t"           \
-    "stmia r0!, {r12} \n\t"         \
-                                    \
-    "ldmia r2!, {r7} \n\t"          \
-    "mov r10, #0 \n\t"              \
-    "umull r11, r12, r5, r7 \n\t"   \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, r12 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "umull r11, r12, r3, r6 \n\t"   \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, r12 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "umull r11, r12, r4, r8 \n\t"   \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, r12 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "ldr r11, [r0] \n\t"            \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, #0 \n\t"          \
-    "adc r10, r10, #0 \n\t"         \
-    "stmia r0!, {r14} \n\t"         \
-                                    \
-    "mov r11, #0 \n\t"              \
-    "umull r12, r14, r3, r7 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r4, r6 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "stmia r0!, {r9} \n\t"          \
-                                    \
-    "umull r14, r9, r4, r7 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adc r11, r11, r9 \n\t"         \
-    "stmia r0!, {r10, r11} \n\t"    \
-                                    \
-    "sub r0, 52 \n\t"               \
-    "sub r1, 20 \n\t"               \
-    "sub r2, 32 \n\t"               \
-    "ldmia r1!, {r3,r4,r5} \n\t"    \
-    "ldmia r2!, {r6,r7,r8} \n\t"    \
-                                    \
-    "umull r11, r12, r3, r6 \n\t"   \
-    "stmia r0!, {r11} \n\t"         \
-                                    \
-    "mov r10, #0 \n\t"              \
-    "umull r11, r9, r3, r7 \n\t"    \
-    "adds r12, r12, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "umull r11, r14, r4, r6 \n\t"   \
-    "adds r12, r12, r11 \n\t"       \
-    "adcs r9, r9, r14 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "stmia r0!, {r12} \n\t"         \
-                                    \
-    "mov r11, #0 \n\t"              \
-    "umull r12, r14, r3, r8 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r4, r7 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r5, r6 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "stmia r0!, {r9} \n\t"          \
-                                    \
-    "ldmia r1!, {r3} \n\t"          \
-    "mov r12, #0 \n\t"              \
-    "umull r14, r9, r4, r8 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "umull r14, r9, r5, r7 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "umull r14, r9, r3, r6 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "ldr r14, [r0] \n\t"            \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, #0 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "stmia r0!, {r10} \n\t"         \
-                                    \
-    "ldmia r1!, {r4} \n\t"          \
-    "mov r14, #0 \n\t"              \
-    "umull r9, r10, r5, r8 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "umull r9, r10, r3, r7 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "umull r9, r10, r4, r6 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "ldr r9, [r0] \n\t"             \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, #0 \n\t"        \
-    "adc r14, r14, #0 \n\t"         \
-    "stmia r0!, {r11} \n\t"         \
-                                    \
-    "ldmia r1!, {r5} \n\t"          \
-    "mov r9, #0 \n\t"               \
-    "umull r10, r11, r3, r8 \n\t"   \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "umull r10, r11, r4, r7 \n\t"   \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "umull r10, r11, r5, r6 \n\t"   \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "ldr r10, [r0] \n\t"            \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, #0 \n\t"        \
-    "adc r9, r9, #0 \n\t"           \
-    "stmia r0!, {r12} \n\t"         \
-                                    \
-    "ldmia r1!, {r3} \n\t"          \
-    "mov r10, #0 \n\t"              \
-    "umull r11, r12, r4, r8 \n\t"   \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, r12 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "umull r11, r12, r5, r7 \n\t"   \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, r12 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "umull r11, r12, r3, r6 \n\t"   \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, r12 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "ldr r11, [r0] \n\t"            \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, #0 \n\t"          \
-    "adc r10, r10, #0 \n\t"         \
-    "stmia r0!, {r14} \n\t"         \
-                                    \
-    "ldmia r1!, {r4} \n\t"          \
-    "mov r11, #0 \n\t"              \
-    "umull r12, r14, r5, r8 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r3, r7 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r4, r6 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "ldr r12, [r0] \n\t"            \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, #0 \n\t"        \
-    "adc r11, r11, #0 \n\t"         \
-    "stmia r0!, {r9} \n\t"          \
-                                    \
-    "ldmia r2!, {r6} \n\t"          \
-    "mov r12, #0 \n\t"              \
-    "umull r14, r9, r5, r6 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "umull r14, r9, r3, r8 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "umull r14, r9, r4, r7 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "ldr r14, [r0] \n\t"            \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, #0 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "stmia r0!, {r10} \n\t"         \
-                                    \
-    "ldmia r2!, {r7} \n\t"          \
-    "mov r14, #0 \n\t"              \
-    "umull r9, r10, r5, r7 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "umull r9, r10, r3, r6 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "umull r9, r10, r4, r8 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, r10 \n\t"       \
-    "adc r14, r14, #0 \n\t"         \
-    "ldr r9, [r0] \n\t"             \
-    "adds r11, r11, r9 \n\t"        \
-    "adcs r12, r12, #0 \n\t"        \
-    "adc r14, r14, #0 \n\t"         \
-    "stmia r0!, {r11} \n\t"         \
-                                    \
-    "ldmia r2!, {r8} \n\t"          \
-    "mov r9, #0 \n\t"               \
-    "umull r10, r11, r5, r8 \n\t"   \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "umull r10, r11, r3, r7 \n\t"   \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "umull r10, r11, r4, r6 \n\t"   \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, r11 \n\t"       \
-    "adc r9, r9, #0 \n\t"           \
-    "ldr r10, [r0] \n\t"            \
-    "adds r12, r12, r10 \n\t"       \
-    "adcs r14, r14, #0 \n\t"        \
-    "adc r9, r9, #0 \n\t"           \
-    "stmia r0!, {r12} \n\t"         \
-                                    \
-    "ldmia r2!, {r6} \n\t"          \
-    "mov r10, #0 \n\t"              \
-    "umull r11, r12, r5, r6 \n\t"   \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, r12 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "umull r11, r12, r3, r8 \n\t"   \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, r12 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "umull r11, r12, r4, r7 \n\t"   \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, r12 \n\t"         \
-    "adc r10, r10, #0 \n\t"         \
-    "ldr r11, [r0] \n\t"            \
-    "adds r14, r14, r11 \n\t"       \
-    "adcs r9, r9, #0 \n\t"          \
-    "adc r10, r10, #0 \n\t"         \
-    "stmia r0!, {r14} \n\t"         \
-                                    \
-    "ldmia r2!, {r7} \n\t"          \
-    "mov r11, #0 \n\t"              \
-    "umull r12, r14, r5, r7 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r3, r6 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "umull r12, r14, r4, r8 \n\t"   \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, r14 \n\t"       \
-    "adc r11, r11, #0 \n\t"         \
-    "ldr r12, [r0] \n\t"            \
-    "adds r9, r9, r12 \n\t"         \
-    "adcs r10, r10, #0 \n\t"        \
-    "adc r11, r11, #0 \n\t"         \
-    "stmia r0!, {r9} \n\t"          \
-                                    \
-    "mov r12, #0 \n\t"              \
-    "umull r14, r9, r3, r7 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "umull r14, r9, r4, r6 \n\t"    \
-    "adds r10, r10, r14 \n\t"       \
-    "adcs r11, r11, r9 \n\t"        \
-    "adc r12, r12, #0 \n\t"         \
-    "stmia r0!, {r10} \n\t"         \
-                                    \
-    "umull r9, r10, r4, r7 \n\t"    \
-    "adds r11, r11, r9 \n\t"        \
-    "adc r12, r12, r10 \n\t"        \
-    "stmia r0!, {r11, r12} \n\t"    \
-    "pop {r3} \n\t"
-
-#define FAST_SQUARE_ASM_5               \
-    "push   {r2} \n\t"                  \
-    "ldmia r1!, {r2,r3,r4,r5,r6} \n\t"  \
-    "push   {r1} \n\t"                  \
-                                        \
-    "umull r11, r12, r2, r2 \n\t"       \
-    "stmia r0!, {r11} \n\t"             \
-                                        \
-    "mov r9, #0 \n\t"                   \
-    "umull r10, r11, r2, r3 \n\t"       \
-    "adds r12, r12, r10 \n\t"           \
-    "adcs r8, r11, #0 \n\t"             \
-    "adc r9, r9, #0 \n\t"               \
-    "adds r12, r12, r10 \n\t"           \
-    "adcs r8, r8, r11 \n\t"             \
-    "adc r9, r9, #0 \n\t"               \
-    "stmia r0!, {r12} \n\t"             \
-                                        \
-    "mov r10, #0 \n\t"                  \
-    "umull r11, r12, r2, r4 \n\t"       \
-    "adds r11, r11, r11 \n\t"           \
-    "adcs r12, r12, r12 \n\t"           \
-    "adc r10, r10, #0 \n\t"             \
-    "adds r8, r8, r11 \n\t"             \
-    "adcs r9, r9, r12 \n\t"             \
-    "adc r10, r10, #0 \n\t"             \
-    "umull r11, r12, r3, r3 \n\t"       \
-    "adds r8, r8, r11 \n\t"             \
-    "adcs r9, r9, r12 \n\t"             \
-    "adc r10, r10, #0 \n\t"             \
-    "stmia r0!, {r8} \n\t"              \
-                                        \
-    "mov r12, #0 \n\t"                  \
-    "umull r8, r11, r2, r5 \n\t"        \
-    "umull r1, r14, r3, r4 \n\t"        \
-    "adds r8, r8, r1 \n\t"              \
-    "adcs r11, r11, r14 \n\t"           \
-    "adc r12, r12, #0 \n\t"             \
-    "adds r8, r8, r8 \n\t"              \
-    "adcs r11, r11, r11 \n\t"           \
-    "adc r12, r12, r12 \n\t"            \
-    "adds r8, r8, r9 \n\t"              \
-    "adcs r11, r11, r10 \n\t"           \
-    "adc r12, r12, #0 \n\t"             \
-    "stmia r0!, {r8} \n\t"              \
-                                        \
-    "mov r10, #0 \n\t"                  \
-    "umull r8, r9, r2, r6 \n\t"         \
-    "umull r1, r14, r3, r5 \n\t"        \
-    "adds r8, r8, r1 \n\t"              \
-    "adcs r9, r9, r14 \n\t"             \
-    "adc r10, r10, #0 \n\t"             \
-    "adds r8, r8, r8 \n\t"              \
-    "adcs r9, r9, r9 \n\t"              \
-    "adc r10, r10, r10 \n\t"            \
-    "umull r1, r14, r4, r4 \n\t"        \
-    "adds r8, r8, r1 \n\t"              \
-    "adcs r9, r9, r14 \n\t"             \
-    "adc r10, r10, #0 \n\t"             \
-    "adds r8, r8, r11 \n\t"             \
-    "adcs r9, r9, r12 \n\t"             \
-    "adc r10, r10, #0 \n\t"             \
-    "stmia r0!, {r8} \n\t"              \
-                                        \
-    "mov r12, #0 \n\t"                  \
-    "umull r8, r11, r3, r6 \n\t"        \
-    "umull r1, r14, r4, r5 \n\t"        \
-    "adds r8, r8, r1 \n\t"              \
-    "adcs r11, r11, r14 \n\t"           \
-    "adc r12, r12, #0 \n\t"             \
-    "adds r8, r8, r8 \n\t"              \
-    "adcs r11, r11, r11 \n\t"           \
-    "adc r12, r12, r12 \n\t"            \
-    "adds r8, r8, r9 \n\t"              \
-    "adcs r11, r11, r10 \n\t"           \
-    "adc r12, r12, #0 \n\t"             \
-    "stmia r0!, {r8} \n\t"              \
-                                        \
-    "mov r8, #0 \n\t"                   \
-    "umull r1, r10, r4, r6 \n\t"        \
-    "adds r1, r1, r1 \n\t"              \
-    "adcs r10, r10, r10 \n\t"           \
-    "adc r8, r8, #0 \n\t"               \
-    "adds r11, r11, r1 \n\t"            \
-    "adcs r12, r12, r10 \n\t"           \
-    "adc r8, r8, #0 \n\t"               \
-    "umull r1, r10, r5, r5 \n\t"        \
-    "adds r11, r11, r1 \n\t"            \
-    "adcs r12, r12, r10 \n\t"           \
-    "adc r8, r8, #0 \n\t"               \
-    "stmia r0!, {r11} \n\t"             \
-                                        \
-    "mov r11, #0 \n\t"                  \
-    "umull r1, r10, r5, r6 \n\t"        \
-    "adds r1, r1, r1 \n\t"              \
-    "adcs r10, r10, r10 \n\t"           \
-    "adc r11, r11, #0 \n\t"             \
-    "adds r12, r12, r1 \n\t"            \
-    "adcs r8, r8, r10 \n\t"             \
-    "adc r11, r11, #0 \n\t"             \
-    "stmia r0!, {r12} \n\t"             \
-                                        \
-    "umull r1, r10, r6, r6 \n\t"        \
-    "adds r8, r8, r1 \n\t"              \
-    "adcs r11, r11, r10 \n\t"           \
-    "stmia r0!, {r8, r11} \n\t"         \
-    "pop {r1, r2} \n\t"
-
-#define FAST_SQUARE_ASM_5_TO_6           \
-    "cmp r2, #5 \n\t"                    \
-    "beq 1f \n\t"                        \
-                                         \
-    "sub r0, #20 \n\t"                   \
-    "sub r1, #20 \n\t"                   \
-                                         \
-    /* Do off-center multiplication */   \
-    "ldmia r1!, {r6,r7,r8,r9,r10,r11} \n\t" \
-    "umull r3, r4, r6, r11 \n\t"         \
-    "umull r6, r5, r7, r11 \n\t"         \
-    "adds r4, r4, r6 \n\t"               \
-    "umull r7, r6, r8, r11 \n\t"         \
-    "adcs r5, r5, r7 \n\t"               \
-    "umull r8, r7, r9, r11 \n\t"         \
-    "adcs r6, r6, r8 \n\t"               \
-    "umull r9, r8, r10, r11 \n\t"        \
-    "adcs r7, r7, r9 \n\t"               \
-    "adcs r8, r8, #0 \n\t"               \
-                                         \
-    /* Multiply by 2 */                  \
-    "mov r9, #0 \n\t"                    \
-    "adds r3, r3, r3 \n\t"               \
-    "adcs r4, r4, r4 \n\t"               \
-    "adcs r5, r5, r5 \n\t"               \
-    "adcs r6, r6, r6 \n\t"               \
-    "adcs r7, r7, r7 \n\t"               \
-    "adcs r8, r8, r8 \n\t"               \
-    "adcs r9, r9, #0 \n\t"               \
-                                         \
-    /* Add into previous */              \
-    "ldr r14, [r0], #4 \n\t"             \
-    "adds r3, r3, r14 \n\t"              \
-    "ldr r14, [r0], #4 \n\t"             \
-    "adcs r4, r4, r14 \n\t"              \
-    "ldr r14, [r0], #4 \n\t"             \
-    "adcs r5, r5, r14 \n\t"              \
-    "ldr r14, [r0], #4 \n\t"             \
-    "adcs r6, r6, r14 \n\t"              \
-    "ldr r14, [r0], #4 \n\t"             \
-    "adcs r7, r7, r14 \n\t"              \
-    "adcs r8, r8, #0 \n\t"               \
-    "adcs r9, r9, #0 \n\t"               \
-    "sub r0, #20 \n\t"                   \
-                                         \
-    /* Perform center multiplication */  \
-    "umlal r8, r9, r11, r11 \n\t"        \
-    "stmia r0!, {r3,r4,r5,r6,r7,r8,r9} \n\t"
-
-#define FAST_SQUARE_ASM_6                  \
-    "push   {r2} \n\t"                     \
-    "ldmia r1!, {r2,r3,r4,r5,r6,r7} \n\t"  \
-    "push   {r1} \n\t"                     \
-                                           \
-    "umull r11, r12, r2, r2 \n\t"          \
-    "stmia r0!, {r11} \n\t"                \
-                                           \
-    "mov r9, #0 \n\t"                      \
-    "umull r10, r11, r2, r3 \n\t"          \
-    "adds r12, r12, r10 \n\t"              \
-    "adcs r8, r11, #0 \n\t"                \
-    "adc r9, r9, #0 \n\t"                  \
-    "adds r12, r12, r10 \n\t"              \
-    "adcs r8, r8, r11 \n\t"                \
-    "adc r9, r9, #0 \n\t"                  \
-    "stmia r0!, {r12} \n\t"                \
-                                           \
-    "mov r10, #0 \n\t"                     \
-    "umull r11, r12, r2, r4 \n\t"          \
-    "adds r11, r11, r11 \n\t"              \
-    "adcs r12, r12, r12 \n\t"              \
-    "adc r10, r10, #0 \n\t"                \
-    "adds r8, r8, r11 \n\t"                \
-    "adcs r9, r9, r12 \n\t"                \
-    "adc r10, r10, #0 \n\t"                \
-    "umull r11, r12, r3, r3 \n\t"          \
-    "adds r8, r8, r11 \n\t"                \
-    "adcs r9, r9, r12 \n\t"                \
-    "adc r10, r10, #0 \n\t"                \
-    "stmia r0!, {r8} \n\t"                 \
-                                           \
-    "mov r12, #0 \n\t"                     \
-    "umull r8, r11, r2, r5 \n\t"           \
-    "umull r1, r14, r3, r4 \n\t"           \
-    "adds r8, r8, r1 \n\t"                 \
-    "adcs r11, r11, r14 \n\t"              \
-    "adc r12, r12, #0 \n\t"                \
-    "adds r8, r8, r8 \n\t"                 \
-    "adcs r11, r11, r11 \n\t"              \
-    "adc r12, r12, r12 \n\t"               \
-    "adds r8, r8, r9 \n\t"                 \
-    "adcs r11, r11, r10 \n\t"              \
-    "adc r12, r12, #0 \n\t"                \
-    "stmia r0!, {r8} \n\t"                 \
-                                           \
-    "mov r10, #0 \n\t"                     \
-    "umull r8, r9, r2, r6 \n\t"            \
-    "umull r1, r14, r3, r5 \n\t"           \
-    "adds r8, r8, r1 \n\t"                 \
-    "adcs r9, r9, r14 \n\t"                \
-    "adc r10, r10, #0 \n\t"                \
-    "adds r8, r8, r8 \n\t"                 \
-    "adcs r9, r9, r9 \n\t"                 \
-    "adc r10, r10, r10 \n\t"               \
-    "umull r1, r14, r4, r4 \n\t"           \
-    "adds r8, r8, r1 \n\t"                 \
-    "adcs r9, r9, r14 \n\t"                \
-    "adc r10, r10, #0 \n\t"                \
-    "adds r8, r8, r11 \n\t"                \
-    "adcs r9, r9, r12 \n\t"                \
-    "adc r10, r10, #0 \n\t"                \
-    "stmia r0!, {r8} \n\t"                 \
-                                           \
-    "mov r12, #0 \n\t"                     \
-    "umull r8, r11, r2, r7 \n\t"           \
-    "umull r1, r14, r3, r6 \n\t"           \
-    "adds r8, r8, r1 \n\t"                 \
-    "adcs r11, r11, r14 \n\t"              \
-    "adc r12, r12, #0 \n\t"                \
-    "umull r1, r14, r4, r5 \n\t"           \
-    "adds r8, r8, r1 \n\t"                 \
-    "adcs r11, r11, r14 \n\t"              \
-    "adc r12, r12, #0 \n\t"                \
-    "adds r8, r8, r8 \n\t"                 \
-    "adcs r11, r11, r11 \n\t"              \
-    "adc r12, r12, r12 \n\t"               \
-    "adds r8, r8, r9 \n\t"                 \
-    "adcs r11, r11, r10 \n\t"              \
-    "adc r12, r12, #0 \n\t"                \
-    "stmia r0!, {r8} \n\t"                 \
-                                           \
-    "mov r10, #0 \n\t"                     \
-    "umull r8, r9, r3, r7 \n\t"            \
-    "umull r1, r14, r4, r6 \n\t"           \
-    "adds r8, r8, r1 \n\t"                 \
-    "adcs r9, r9, r14 \n\t"                \
-    "adc r10, r10, #0 \n\t"                \
-    "adds r8, r8, r8 \n\t"                 \
-    "adcs r9, r9, r9 \n\t"                 \
-    "adc r10, r10, r10 \n\t"               \
-    "umull r1, r14, r5, r5 \n\t"           \
-    "adds r8, r8, r1 \n\t"                 \
-    "adcs r9, r9, r14 \n\t"                \
-    "adc r10, r10, #0 \n\t"                \
-    "adds r8, r8, r11 \n\t"                \
-    "adcs r9, r9, r12 \n\t"                \
-    "adc r10, r10, #0 \n\t"                \
-    "stmia r0!, {r8} \n\t"                 \
-                                           \
-    "mov r12, #0 \n\t"                     \
-    "umull r8, r11, r4, r7 \n\t"           \
-    "umull r1, r14, r5, r6 \n\t"           \
-    "adds r8, r8, r1 \n\t"                 \
-    "adcs r11, r11, r14 \n\t"              \
-    "adc r12, r12, #0 \n\t"                \
-    "adds r8, r8, r8 \n\t"                 \
-    "adcs r11, r11, r11 \n\t"              \
-    "adc r12, r12, r12 \n\t"               \
-    "adds r8, r8, r9 \n\t"                 \
-    "adcs r11, r11, r10 \n\t"              \
-    "adc r12, r12, #0 \n\t"                \
-    "stmia r0!, {r8} \n\t"                 \
-                                           \
-    "mov r8, #0 \n\t"                      \
-    "umull r1, r10, r5, r7 \n\t"           \
-    "adds r1, r1, r1 \n\t"                 \
-    "adcs r10, r10, r10 \n\t"              \
-    "adc r8, r8, #0 \n\t"                  \
-    "adds r11, r11, r1 \n\t"               \
-    "adcs r12, r12, r10 \n\t"              \
-    "adc r8, r8, #0 \n\t"                  \
-    "umull r1, r10, r6, r6 \n\t"           \
-    "adds r11, r11, r1 \n\t"               \
-    "adcs r12, r12, r10 \n\t"              \
-    "adc r8, r8, #0 \n\t"                  \
-    "stmia r0!, {r11} \n\t"                \
-                                           \
-    "mov r11, #0 \n\t"                     \
-    "umull r1, r10, r6, r7 \n\t"           \
-    "adds r1, r1, r1 \n\t"                 \
-    "adcs r10, r10, r10 \n\t"              \
-    "adc r11, r11, #0 \n\t"                \
-    "adds r12, r12, r1 \n\t"               \
-    "adcs r8, r8, r10 \n\t"                \
-    "adc r11, r11, #0 \n\t"                \
-    "stmia r0!, {r12} \n\t"                \
-                                           \
-    "umull r1, r10, r7, r7 \n\t"           \
-    "adds r8, r8, r1 \n\t"                 \
-    "adcs r11, r11, r10 \n\t"              \
-    "stmia r0!, {r8, r11} \n\t"            \
-    "pop {r1, r2} \n\t"
-
-#define FAST_SQUARE_ASM_6_TO_7               \
-    "cmp r2, #6 \n\t"                        \
-    "beq 1f \n\t"                            \
-                                             \
-    "sub r0, #24 \n\t"                       \
-    "sub r1, #24 \n\t"                       \
-                                             \
-    /* Do off-center multiplication */       \
-    "ldmia r1!, {r6,r7,r8,r9,r10,r11,r12} \n\t" \
-    "umull r3, r4, r6, r12 \n\t"             \
-    "umull r6, r5, r7, r12 \n\t"             \
-    "adds r4, r4, r6 \n\t"                   \
-    "umull r7, r6, r8, r12 \n\t"             \
-    "adcs r5, r5, r7 \n\t"                   \
-    "umull r8, r7, r9, r12 \n\t"             \
-    "adcs r6, r6, r8 \n\t"                   \
-    "umull r9, r8, r10, r12 \n\t"            \
-    "adcs r7, r7, r9 \n\t"                   \
-    "umull r10, r9, r11, r12 \n\t"           \
-    "adcs r8, r8, r10 \n\t"                  \
-    "adcs r9, r9, #0 \n\t"                   \
-                                             \
-    /* Multiply by 2 */                      \
-    "mov r10, #0 \n\t"                       \
-    "adds r3, r3, r3 \n\t"                   \
-    "adcs r4, r4, r4 \n\t"                   \
-    "adcs r5, r5, r5 \n\t"                   \
-    "adcs r6, r6, r6 \n\t"                   \
-    "adcs r7, r7, r7 \n\t"                   \
-    "adcs r8, r8, r8 \n\t"                   \
-    "adcs r9, r9, r9 \n\t"                   \
-    "adcs r10, r10, #0 \n\t"                 \
-                                             \
-    /* Add into previous */                  \
-    "ldr r14, [r0], #4 \n\t"                 \
-    "adds r3, r3, r14 \n\t"                  \
-    "ldr r14, [r0], #4 \n\t"                 \
-    "adcs r4, r4, r14 \n\t"                  \
-    "ldr r14, [r0], #4 \n\t"                 \
-    "adcs r5, r5, r14 \n\t"                  \
-    "ldr r14, [r0], #4 \n\t"                 \
-    "adcs r6, r6, r14 \n\t"                  \
-    "ldr r14, [r0], #4 \n\t"                 \
-    "adcs r7, r7, r14 \n\t"                  \
-    "ldr r14, [r0], #4 \n\t"                 \
-    "adcs r8, r8, r14 \n\t"                  \
-    "adcs r9, r9, #0 \n\t"                   \
-    "adcs r10, r10, #0 \n\t"                 \
-    "sub r0, #24 \n\t"                       \
-                                             \
-    /* Perform center multiplication */      \
-    "umlal r9, r10, r12, r12 \n\t"           \
-    "stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10} \n\t"
-
-#define FAST_SQUARE_ASM_7                          \
-    "push   {r2} \n\t"                             \
-    "ldmia r1!, {r2, r3, r4, r5, r6, r7, r8} \n\t" \
-    "push   {r1} \n\t"                             \
-    "sub r1, 4 \n\t"                               \
-                                                   \
-    "add r0, 24 \n\t"                              \
-    "umull r9, r10, r2, r8 \n\t"                   \
-    "stmia r0!, {r9, r10} \n\t"                    \
-    "sub r0, 32 \n\t"                              \
-                                                   \
-    "umull r11, r12, r2, r2 \n\t"                  \
-    "stmia r0!, {r11} \n\t"                        \
-                                                   \
-    "mov r9, #0 \n\t"                              \
-    "umull r10, r11, r2, r3 \n\t"                  \
-    "adds r12, r12, r10 \n\t"                      \
-    "adcs r8, r11, #0 \n\t"                        \
-    "adc r9, r9, #0 \n\t"                          \
-    "adds r12, r12, r10 \n\t"                      \
-    "adcs r8, r8, r11 \n\t"                        \
-    "adc r9, r9, #0 \n\t"                          \
-    "stmia r0!, {r12} \n\t"                        \
-                                                   \
-    "mov r10, #0 \n\t"                             \
-    "umull r11, r12, r2, r4 \n\t"                  \
-    "adds r11, r11, r11 \n\t"                      \
-    "adcs r12, r12, r12 \n\t"                      \
-    "adc r10, r10, #0 \n\t"                        \
-    "adds r8, r8, r11 \n\t"                        \
-    "adcs r9, r9, r12 \n\t"                        \
-    "adc r10, r10, #0 \n\t"                        \
-    "umull r11, r12, r3, r3 \n\t"                  \
-    "adds r8, r8, r11 \n\t"                        \
-    "adcs r9, r9, r12 \n\t"                        \
-    "adc r10, r10, #0 \n\t"                        \
-    "stmia r0!, {r8} \n\t"                         \
-                                                   \
-    "mov r12, #0 \n\t"                             \
-    "umull r8, r11, r2, r5 \n\t"                   \
-    "mov r14, r11 \n\t"                            \
-    "umlal r8, r11, r3, r4 \n\t"                   \
-    "cmp r14, r11 \n\t"                            \
-    "it hi \n\t"                                   \
-    "adchi r12, r12, #0 \n\t"                      \
-    "adds r8, r8, r8 \n\t"                         \
-    "adcs r11, r11, r11 \n\t"                      \
-    "adc r12, r12, r12 \n\t"                       \
-    "adds r8, r8, r9 \n\t"                         \
-    "adcs r11, r11, r10 \n\t"                      \
-    "adc r12, r12, #0 \n\t"                        \
-    "stmia r0!, {r8} \n\t"                         \
-                                                   \
-    "mov r10, #0 \n\t"                             \
-    "umull r8, r9, r2, r6 \n\t"                    \
-    "mov r14, r9 \n\t"                             \
-    "umlal r8, r9, r3, r5 \n\t"                    \
-    "cmp r14, r9 \n\t"                             \
-    "it hi \n\t"                                   \
-    "adchi r10, r10, #0 \n\t"                      \
-    "adds r8, r8, r8 \n\t"                         \
-    "adcs r9, r9, r9 \n\t"                         \
-    "adc r10, r10, r10 \n\t"                       \
-    "mov r14, r9 \n\t"                             \
-    "umlal r8, r9, r4, r4 \n\t"                    \
-    "cmp r14, r9 \n\t"                             \
-    "it hi \n\t"                                   \
-    "adchi r10, r10, #0 \n\t"                      \
-    "adds r8, r8, r11 \n\t"                        \
-    "adcs r9, r9, r12 \n\t"                        \
-    "adc r10, r10, #0 \n\t"                        \
-    "stmia r0!, {r8} \n\t"                         \
-                                                   \
-    "mov r12, #0 \n\t"                             \
-    "umull r8, r11, r2, r7 \n\t"                   \
-    "mov r14, r11 \n\t"                            \
-    "umlal r8, r11, r3, r6 \n\t"                   \
-    "cmp r14, r11 \n\t"                            \
-    "it hi \n\t"                                   \
-    "adchi r12, r12, #0 \n\t"                      \
-    "mov r14, r11 \n\t"                            \
-    "umlal r8, r11, r4, r5 \n\t"                   \
-    "cmp r14, r11 \n\t"                            \
-    "it hi \n\t"                                   \
-    "adchi r12, r12, #0 \n\t"                      \
-    "adds r8, r8, r8 \n\t"                         \
-    "adcs r11, r11, r11 \n\t"                      \
-    "adc r12, r12, r12 \n\t"                       \
-    "adds r8, r8, r9 \n\t"                         \
-    "adcs r11, r11, r10 \n\t"                      \
-    "adc r12, r12, #0 \n\t"                        \
-    "stmia r0!, {r8} \n\t"                         \
-                                                   \
-    "ldmia r1!, {r2} \n\t"                         \
-    "mov r10, #0 \n\t"                             \
-    "umull r8, r9, r3, r7 \n\t"                    \
-    "mov r14, r9 \n\t"                             \
-    "umlal r8, r9, r4, r6 \n\t"                    \
-    "cmp r14, r9 \n\t"                             \
-    "it hi \n\t"                                   \
-    "adchi r10, r10, #0 \n\t"                      \
-    "ldr r14, [r0] \n\t"                           \
-    "adds r8, r8, r14 \n\t"                        \
-    "adcs r9, r9, #0 \n\t"                         \
-    "adc r10, r10, #0 \n\t"                        \
-    "adds r8, r8, r8 \n\t"                         \
-    "adcs r9, r9, r9 \n\t"                         \
-    "adc r10, r10, r10 \n\t"                       \
-    "mov r14, r9 \n\t"                             \
-    "umlal r8, r9, r5, r5 \n\t"                    \
-    "cmp r14, r9 \n\t"                             \
-    "it hi \n\t"                                   \
-    "adchi r10, r10, #0 \n\t"                      \
-    "adds r8, r8, r11 \n\t"                        \
-    "adcs r9, r9, r12 \n\t"                        \
-    "adc r10, r10, #0 \n\t"                        \
-    "stmia r0!, {r8} \n\t"                         \
-                                                   \
-    "mov r12, #0 \n\t"                             \
-    "umull r8, r11, r3, r2 \n\t"                   \
-    "mov r14, r11 \n\t"                            \
-    "umlal r8, r11, r4, r7 \n\t"                   \
-    "cmp r14, r11 \n\t"                            \
-    "it hi \n\t"                                   \
-    "adchi r12, r12, #0 \n\t"                      \
-    "mov r14, r11 \n\t"                            \
-    "umlal r8, r11, r5, r6 \n\t"                   \
-    "cmp r14, r11 \n\t"                            \
-    "it hi \n\t"                                   \
-    "adchi r12, r12, #0 \n\t"                      \
-    "ldr r14, [r0] \n\t"                           \
-    "adds r8, r8, r14 \n\t"                        \
-    "adcs r11, r11, #0 \n\t"                       \
-    "adc r12, r12, #0 \n\t"                        \
-    "adds r8, r8, r8 \n\t"                         \
-    "adcs r11, r11, r11 \n\t"                      \
-    "adc r12, r12, r12 \n\t"                       \
-    "adds r8, r8, r9 \n\t"                         \
-    "adcs r11, r11, r10 \n\t"                      \
-    "adc r12, r12, #0 \n\t"                        \
-    "stmia r0!, {r8} \n\t"                         \
-                                                   \
-    "mov r10, #0 \n\t"                             \
-    "umull r8, r9, r4, r2 \n\t"                    \
-    "mov r14, r9 \n\t"                             \
-    "umlal r8, r9, r5, r7 \n\t"                    \
-    "cmp r14, r9 \n\t"                             \
-    "it hi \n\t"                                   \
-    "adchi r10, r10, #0 \n\t"                      \
-    "adds r8, r8, r8 \n\t"                         \
-    "adcs r9, r9, r9 \n\t"                         \
-    "adc r10, r10, r10 \n\t"                       \
-    "mov r14, r9 \n\t"                             \
-    "umlal r8, r9, r6, r6 \n\t"                    \
-    "cmp r14, r9 \n\t"                             \
-    "it hi \n\t"                                   \
-    "adchi r10, r10, #0 \n\t"                      \
-    "adds r8, r8, r11 \n\t"                        \
-    "adcs r9, r9, r12 \n\t"                        \
-    "adc r10, r10, #0 \n\t"                        \
-    "stmia r0!, {r8} \n\t"                         \
-                                                   \
-    "mov r12, #0 \n\t"                             \
-    "umull r8, r11, r5, r2 \n\t"                   \
-    "mov r14, r11 \n\t"                            \
-    "umlal r8, r11, r6, r7 \n\t"                   \
-    "cmp r14, r11 \n\t"                            \
-    "it hi \n\t"                                   \
-    "adchi r12, r12, #0 \n\t"                      \
-    "adds r8, r8, r8 \n\t"                         \
-    "adcs r11, r11, r11 \n\t"                      \
-    "adc r12, r12, r12 \n\t"                       \
-    "adds r8, r8, r9 \n\t"                         \
-    "adcs r11, r11, r10 \n\t"                      \
-    "adc r12, r12, #0 \n\t"                        \
-    "stmia r0!, {r8} \n\t"                         \
-                                                   \
-    "mov r8, #0 \n\t"                              \
-    "umull r1, r10, r6, r2 \n\t"                   \
-    "adds r1, r1, r1 \n\t"                         \
-    "adcs r10, r10, r10 \n\t"                      \
-    "adc r8, r8, #0 \n\t"                          \
-    "adds r11, r11, r1 \n\t"                       \
-    "adcs r12, r12, r10 \n\t"                      \
-    "adc r8, r8, #0 \n\t"                          \
-    "umull r1, r10, r7, r7 \n\t"                   \
-    "adds r11, r11, r1 \n\t"                       \
-    "adcs r12, r12, r10 \n\t"                      \
-    "adc r8, r8, #0 \n\t"                          \
-    "stmia r0!, {r11} \n\t"                        \
-                                                   \
-    "mov r11, #0 \n\t"                             \
-    "umull r1, r10, r7, r2 \n\t"                   \
-    "adds r1, r1, r1 \n\t"                         \
-    "adcs r10, r10, r10 \n\t"                      \
-    "adc r11, r11, #0 \n\t"                        \
-    "adds r12, r12, r1 \n\t"                       \
-    "adcs r8, r8, r10 \n\t"                        \
-    "adc r11, r11, #0 \n\t"                        \
-    "stmia r0!, {r12} \n\t"                        \
-                                                   \
-    "umull r1, r10, r2, r2 \n\t"                   \
-    "adds r8, r8, r1 \n\t"                         \
-    "adcs r11, r11, r10 \n\t"                      \
-    "stmia r0!, {r8, r11} \n\t"                    \
-    "pop {r1, r2} \n\t"
-
-#define FAST_SQUARE_ASM_7_TO_8           \
-    "cmp r2, #7 \n\t"                    \
-    "beq 1f \n\t"                        \
-                                         \
-    "sub r0, #28 \n\t"                   \
-    "sub r1, #28 \n\t"                   \
-                                         \
-    /* Do off-center multiplication */   \
-    "ldmia r1!, {r6,r7,r8,r9,r10,r11,r12,r14} \n\t" \
-    "umull r3, r4, r6, r14 \n\t"         \
-    "umull r6, r5, r7, r14 \n\t"         \
-    "adds r4, r4, r6 \n\t"               \
-    "umull r7, r6, r8, r14 \n\t"         \
-    "adcs r5, r5, r7 \n\t"               \
-    "umull r8, r7, r9, r14 \n\t"         \
-    "adcs r6, r6, r8 \n\t"               \
-    "umull r9, r8, r10, r14 \n\t"        \
-    "adcs r7, r7, r9 \n\t"               \
-    "umull r10, r9, r11, r14 \n\t"       \
-    "adcs r8, r8, r10 \n\t"              \
-    "umull r11, r10, r12, r14 \n\t"      \
-    "adcs r9, r9, r11 \n\t"              \
-    "adcs r10, r10, #0 \n\t"             \
-                                         \
-    /* Multiply by 2 */                  \
-    "mov r11, #0 \n\t"                   \
-    "adds r3, r3, r3 \n\t"               \
-    "adcs r4, r4, r4 \n\t"               \
-    "adcs r5, r5, r5 \n\t"               \
-    "adcs r6, r6, r6 \n\t"               \
-    "adcs r7, r7, r7 \n\t"               \
-    "adcs r8, r8, r8 \n\t"               \
-    "adcs r9, r9, r9 \n\t"               \
-    "adcs r10, r10, r10 \n\t"            \
-    "adcs r11, r11, #0 \n\t"             \
-                                         \
-    /* Add into previous */              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adds r3, r3, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r4, r4, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r5, r5, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r6, r6, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r7, r7, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r8, r8, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r9, r9, r12 \n\t"              \
-    "adcs r10, r10, #0 \n\t"             \
-    "adcs r11, r11, #0 \n\t"             \
-    "sub r0, #28 \n\t"                   \
-                                         \
-    /* Perform center multiplication */  \
-    "umlal r10, r11, r14, r14 \n\t"      \
-    "stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10,r11} \n\t"
-
-#define FAST_SQUARE_ASM_8                   \
-    "push   {r2} \n\t"                      \
-    "ldmia r1!, {r2,r3,r4,r5,r6,r7,r8,r9} \n\t" \
-    "push   {r1} \n\t"                      \
-    "sub r1, 8 \n\t"                        \
-                                            \
-    "add r0, 24 \n\t"                       \
-    "umull r10, r11, r2, r8 \n\t"           \
-    "umull r12, r14, r2, r9 \n\t"           \
-    "umull r8, r9, r3, r9 \n\t"             \
-    "adds r11, r11, r12 \n\t"               \
-    "adcs r12, r14, r8 \n\t"                \
-    "adcs r14, r9, #0 \n\t"                 \
-    "stmia r0!, {r10, r11, r12, r14} \n\t"  \
-    "sub r0, 40 \n\t"                       \
-                                            \
-    "umull r11, r12, r2, r2 \n\t"           \
-    "stmia r0!, {r11} \n\t"                 \
-                                            \
-    "mov r9, #0 \n\t"                       \
-    "umull r10, r11, r2, r3 \n\t"           \
-    "adds r12, r12, r10 \n\t"               \
-    "adcs r8, r11, #0 \n\t"                 \
-    "adc r9, r9, #0 \n\t"                   \
-    "adds r12, r12, r10 \n\t"               \
-    "adcs r8, r8, r11 \n\t"                 \
-    "adc r9, r9, #0 \n\t"                   \
-    "stmia r0!, {r12} \n\t"                 \
-                                            \
-    "mov r10, #0 \n\t"                      \
-    "umull r11, r12, r2, r4 \n\t"           \
-    "adds r11, r11, r11 \n\t"               \
-    "adcs r12, r12, r12 \n\t"               \
-    "adc r10, r10, #0 \n\t"                 \
-    "adds r8, r8, r11 \n\t"                 \
-    "adcs r9, r9, r12 \n\t"                 \
-    "adc r10, r10, #0 \n\t"                 \
-    "umull r11, r12, r3, r3 \n\t"           \
-    "adds r8, r8, r11 \n\t"                 \
-    "adcs r9, r9, r12 \n\t"                 \
-    "adc r10, r10, #0 \n\t"                 \
-    "stmia r0!, {r8} \n\t"                  \
-                                            \
-    "mov r12, #0 \n\t"                      \
-    "umull r8, r11, r2, r5 \n\t"            \
-    "mov r14, r11 \n\t"                     \
-    "umlal r8, r11, r3, r4 \n\t"            \
-    "cmp r14, r11 \n\t"                     \
-    "it hi \n\t"                            \
-    "adchi r12, r12, #0 \n\t"               \
-    "adds r8, r8, r8 \n\t"                  \
-    "adcs r11, r11, r11 \n\t"               \
-    "adc r12, r12, r12 \n\t"                \
-    "adds r8, r8, r9 \n\t"                  \
-    "adcs r11, r11, r10 \n\t"               \
-    "adc r12, r12, #0 \n\t"                 \
-    "stmia r0!, {r8} \n\t"                  \
-                                            \
-    "mov r10, #0 \n\t"                      \
-    "umull r8, r9, r2, r6 \n\t"             \
-    "mov r14, r9 \n\t"                      \
-    "umlal r8, r9, r3, r5 \n\t"             \
-    "cmp r14, r9 \n\t"                      \
-    "it hi \n\t"                            \
-    "adchi r10, r10, #0 \n\t"               \
-    "adds r8, r8, r8 \n\t"                  \
-    "adcs r9, r9, r9 \n\t"                  \
-    "adc r10, r10, r10 \n\t"                \
-    "mov r14, r9 \n\t"                      \
-    "umlal r8, r9, r4, r4 \n\t"             \
-    "cmp r14, r9 \n\t"                      \
-    "it hi \n\t"                            \
-    "adchi r10, r10, #0 \n\t"               \
-    "adds r8, r8, r11 \n\t"                 \
-    "adcs r9, r9, r12 \n\t"                 \
-    "adc r10, r10, #0 \n\t"                 \
-    "stmia r0!, {r8} \n\t"                  \
-                                            \
-    "mov r12, #0 \n\t"                      \
-    "umull r8, r11, r2, r7 \n\t"            \
-    "mov r14, r11 \n\t"                     \
-    "umlal r8, r11, r3, r6 \n\t"            \
-    "cmp r14, r11 \n\t"                     \
-    "it hi \n\t"                            \
-    "adchi r12, r12, #0 \n\t"               \
-    "mov r14, r11 \n\t"                     \
-    "umlal r8, r11, r4, r5 \n\t"            \
-    "cmp r14, r11 \n\t"                     \
-    "it hi \n\t"                            \
-    "adchi r12, r12, #0 \n\t"               \
-    "adds r8, r8, r8 \n\t"                  \
-    "adcs r11, r11, r11 \n\t"               \
-    "adc r12, r12, r12 \n\t"                \
-    "adds r8, r8, r9 \n\t"                  \
-    "adcs r11, r11, r10 \n\t"               \
-    "adc r12, r12, #0 \n\t"                 \
-    "stmia r0!, {r8} \n\t"                  \
-                                            \
-    "ldmia r1!, {r2} \n\t"                  \
-    "mov r10, #0 \n\t"                      \
-    "umull r8, r9, r3, r7 \n\t"             \
-    "mov r14, r9 \n\t"                      \
-    "umlal r8, r9, r4, r6 \n\t"             \
-    "cmp r14, r9 \n\t"                      \
-    "it hi \n\t"                            \
-    "adchi r10, r10, #0 \n\t"               \
-    "ldr r14, [r0] \n\t"                    \
-    "adds r8, r8, r14 \n\t"                 \
-    "adcs r9, r9, #0 \n\t"                  \
-    "adc r10, r10, #0 \n\t"                 \
-    "adds r8, r8, r8 \n\t"                  \
-    "adcs r9, r9, r9 \n\t"                  \
-    "adc r10, r10, r10 \n\t"                \
-    "mov r14, r9 \n\t"                      \
-    "umlal r8, r9, r5, r5 \n\t"             \
-    "cmp r14, r9 \n\t"                      \
-    "it hi \n\t"                            \
-    "adchi r10, r10, #0 \n\t"               \
-    "adds r8, r8, r11 \n\t"                 \
-    "adcs r9, r9, r12 \n\t"                 \
-    "adc r10, r10, #0 \n\t"                 \
-    "stmia r0!, {r8} \n\t"                  \
-                                            \
-    "mov r12, #0 \n\t"                      \
-    "umull r8, r11, r3, r2 \n\t"            \
-    "mov r14, r11 \n\t"                     \
-    "umlal r8, r11, r4, r7 \n\t"            \
-    "cmp r14, r11 \n\t"                     \
-    "it hi \n\t"                            \
-    "adchi r12, r12, #0 \n\t"               \
-    "mov r14, r11 \n\t"                     \
-    "umlal r8, r11, r5, r6 \n\t"            \
-    "cmp r14, r11 \n\t"                     \
-    "it hi \n\t"                            \
-    "adchi r12, r12, #0 \n\t"               \
-    "ldr r14, [r0] \n\t"                    \
-    "adds r8, r8, r14 \n\t"                 \
-    "adcs r11, r11, #0 \n\t"                \
-    "adc r12, r12, #0 \n\t"                 \
-    "adds r8, r8, r8 \n\t"                  \
-    "adcs r11, r11, r11 \n\t"               \
-    "adc r12, r12, r12 \n\t"                \
-    "adds r8, r8, r9 \n\t"                  \
-    "adcs r11, r11, r10 \n\t"               \
-    "adc r12, r12, #0 \n\t"                 \
-    "stmia r0!, {r8} \n\t"                  \
-                                            \
-    "ldmia r1!, {r3} \n\t"                  \
-    "mov r10, #0 \n\t"                      \
-    "umull r8, r9, r4, r2 \n\t"             \
-    "mov r14, r9 \n\t"                      \
-    "umlal r8, r9, r5, r7 \n\t"             \
-    "cmp r14, r9 \n\t"                      \
-    "it hi \n\t"                            \
-    "adchi r10, r10, #0 \n\t"               \
-    "ldr r14, [r0] \n\t"                    \
-    "adds r8, r8, r14 \n\t"                 \
-    "adcs r9, r9, #0 \n\t"                  \
-    "adc r10, r10, #0 \n\t"                 \
-    "adds r8, r8, r8 \n\t"                  \
-    "adcs r9, r9, r9 \n\t"                  \
-    "adc r10, r10, r10 \n\t"                \
-    "mov r14, r9 \n\t"                      \
-    "umlal r8, r9, r6, r6 \n\t"             \
-    "cmp r14, r9 \n\t"                      \
-    "it hi \n\t"                            \
-    "adchi r10, r10, #0 \n\t"               \
-    "adds r8, r8, r11 \n\t"                 \
-    "adcs r9, r9, r12 \n\t"                 \
-    "adc r10, r10, #0 \n\t"                 \
-    "stmia r0!, {r8} \n\t"                  \
-                                            \
-    "mov r12, #0 \n\t"                      \
-    "umull r8, r11, r4, r3 \n\t"            \
-    "mov r14, r11 \n\t"                     \
-    "umlal r8, r11, r5, r2 \n\t"            \
-    "cmp r14, r11 \n\t"                     \
-    "it hi \n\t"                            \
-    "adchi r12, r12, #0 \n\t"               \
-    "mov r14, r11 \n\t"                     \
-    "umlal r8, r11, r6, r7 \n\t"            \
-    "cmp r14, r11 \n\t"                     \
-    "it hi \n\t"                            \
-    "adchi r12, r12, #0 \n\t"               \
-    "ldr r14, [r0] \n\t"                    \
-    "adds r8, r8, r14 \n\t"                 \
-    "adcs r11, r11, #0 \n\t"                \
-    "adc r12, r12, #0 \n\t"                 \
-    "adds r8, r8, r8 \n\t"                  \
-    "adcs r11, r11, r11 \n\t"               \
-    "adc r12, r12, r12 \n\t"                \
-    "adds r8, r8, r9 \n\t"                  \
-    "adcs r11, r11, r10 \n\t"               \
-    "adc r12, r12, #0 \n\t"                 \
-    "stmia r0!, {r8} \n\t"                  \
-                                            \
-    "mov r10, #0 \n\t"                      \
-    "umull r8, r9, r5, r3 \n\t"             \
-    "mov r14, r9 \n\t"                      \
-    "umlal r8, r9, r6, r2 \n\t"             \
-    "cmp r14, r9 \n\t"                      \
-    "it hi \n\t"                            \
-    "adchi r10, r10, #0 \n\t"               \
-    "adds r8, r8, r8 \n\t"                  \
-    "adcs r9, r9, r9 \n\t"                  \
-    "adc r10, r10, r10 \n\t"                \
-    "mov r14, r9 \n\t"                      \
-    "umlal r8, r9, r7, r7 \n\t"             \
-    "cmp r14, r9 \n\t"                      \
-    "it hi \n\t"                            \
-    "adchi r10, r10, #0 \n\t"               \
-    "adds r8, r8, r11 \n\t"                 \
-    "adcs r9, r9, r12 \n\t"                 \
-    "adc r10, r10, #0 \n\t"                 \
-    "stmia r0!, {r8} \n\t"                  \
-                                            \
-    "mov r12, #0 \n\t"                      \
-    "umull r8, r11, r6, r3 \n\t"            \
-    "mov r14, r11 \n\t"                     \
-    "umlal r8, r11, r7, r2 \n\t"            \
-    "cmp r14, r11 \n\t"                     \
-    "it hi \n\t"                            \
-    "adchi r12, r12, #0 \n\t"               \
-    "adds r8, r8, r8 \n\t"                  \
-    "adcs r11, r11, r11 \n\t"               \
-    "adc r12, r12, r12 \n\t"                \
-    "adds r8, r8, r9 \n\t"                  \
-    "adcs r11, r11, r10 \n\t"               \
-    "adc r12, r12, #0 \n\t"                 \
-    "stmia r0!, {r8} \n\t"                  \
-                                            \
-    "mov r8, #0 \n\t"                       \
-    "umull r1, r10, r7, r3 \n\t"            \
-    "adds r1, r1, r1 \n\t"                  \
-    "adcs r10, r10, r10 \n\t"               \
-    "adc r8, r8, #0 \n\t"                   \
-    "adds r11, r11, r1 \n\t"                \
-    "adcs r12, r12, r10 \n\t"               \
-    "adc r8, r8, #0 \n\t"                   \
-    "umull r1, r10, r2, r2 \n\t"            \
-    "adds r11, r11, r1 \n\t"                \
-    "adcs r12, r12, r10 \n\t"               \
-    "adc r8, r8, #0 \n\t"                   \
-    "stmia r0!, {r11} \n\t"                 \
-                                            \
-    "mov r11, #0 \n\t"                      \
-    "umull r1, r10, r2, r3 \n\t"            \
-    "adds r1, r1, r1 \n\t"                  \
-    "adcs r10, r10, r10 \n\t"               \
-    "adc r11, r11, #0 \n\t"                 \
-    "adds r12, r12, r1 \n\t"                \
-    "adcs r8, r8, r10 \n\t"                 \
-    "adc r11, r11, #0 \n\t"                 \
-    "stmia r0!, {r12} \n\t"                 \
-                                            \
-    "umull r1, r10, r3, r3 \n\t"            \
-    "adds r8, r8, r1 \n\t"                  \
-    "adcs r11, r11, r10 \n\t"               \
-    "stmia r0!, {r8, r11} \n\t"             \
-    "pop {r1, r2} \n\t"
-
-#endif /* _UECC_ASM_ARM_MULT_SQUARE_H_ */

+ 0 - 1202
components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_arm_mult_square_umaal.inc

@@ -1,1202 +0,0 @@
-/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#ifndef _UECC_ASM_ARM_MULT_SQUARE_H_
-#define _UECC_ASM_ARM_MULT_SQUARE_H_
-
-#define FAST_MULT_ASM_5                     \
-    "push   {r3} \n\t"                      \
-    "ldmia  r2!, {r3, r4, r5, r6, r7} \n\t" \
-    "push   {r2} \n\t"                      \
-                                            \
-    "ldr    r2, [r1], #4 \n\t"              \
-    "umull  r8, r9, r3, r2 \n\t"            \
-    "str    r8, [r0], #4 \n\t"              \
-    "mov    r10, #0 \n\t"                   \
-    "umaal  r9, r10, r4, r2 \n\t"           \
-    "mov    r11, #0 \n\t"                   \
-    "umaal  r10, r11, r5, r2 \n\t"          \
-    "mov    r12, #0 \n\t"                   \
-    "umaal  r11, r12, r6, r2 \n\t"          \
-    "mov    r14, #0 \n\t"                   \
-    "umaal  r12, r14, r7, r2 \n\t"          \
-                                            \
-    "ldr    r2, [r1], #4 \n\t"              \
-    "mov    r8, #0 \n\t"                    \
-    "umaal  r8, r9, r3, r2 \n\t"            \
-    "str    r8, [r0], #4 \n\t"              \
-    "umaal  r9, r10, r4, r2 \n\t"           \
-    "umaal  r10, r11, r5, r2 \n\t"          \
-    "umaal  r11, r12, r6, r2 \n\t"          \
-    "umaal  r12, r14, r7, r2 \n\t"          \
-                                            \
-    "ldr    r2, [r1], #4 \n\t"              \
-    "mov    r8, #0 \n\t"                    \
-    "umaal  r8, r9, r3, r2 \n\t"            \
-    "str    r8, [r0], #4 \n\t"              \
-    "umaal  r9, r10, r4, r2 \n\t"           \
-    "umaal  r10, r11, r5, r2 \n\t"          \
-    "umaal  r11, r12, r6, r2 \n\t"          \
-    "umaal  r12, r14, r7, r2 \n\t"          \
-                                            \
-    "ldr    r2, [r1], #4 \n\t"              \
-    "mov    r8, #0 \n\t"                    \
-    "umaal  r8, r9, r3, r2 \n\t"            \
-    "str    r8, [r0], #4 \n\t"              \
-    "umaal  r9, r10, r4, r2 \n\t"           \
-    "umaal  r10, r11, r5, r2 \n\t"          \
-    "umaal  r11, r12, r6, r2 \n\t"          \
-    "umaal  r12, r14, r7, r2 \n\t"          \
-                                            \
-    "ldr    r2, [r1], #4 \n\t"              \
-    "mov    r8, #0 \n\t"                    \
-    "umaal  r8, r9, r3, r2 \n\t"            \
-    "str    r8, [r0], #4 \n\t"              \
-    "umaal  r9, r10, r4, r2 \n\t"           \
-    "umaal  r10, r11, r5, r2 \n\t"          \
-    "umaal  r11, r12, r6, r2 \n\t"          \
-    "umaal  r12, r14, r7, r2 \n\t"          \
-                                            \
-    "str    r9, [r0], #4 \n\t"              \
-    "str    r10, [r0], #4 \n\t"             \
-    "str    r11, [r0], #4 \n\t"             \
-    "str    r12, [r0], #4 \n\t"             \
-    "str    r14, [r0], #4 \n\t"             \
-                                            \
-    "pop   {r2, r3} \n\t"
-
-#define FAST_MULT_ASM_5_TO_6                 \
-    "cmp r3, #5 \n\t"                        \
-    "beq 1f \n\t"                            \
-                                             \
-    /* r4 = left high */                     \
-    "ldr r4, [r1] \n\t"                      \
-                                             \
-    "sub r0, #20 \n\t"                       \
-    "sub r1, #20 \n\t"                       \
-    "sub r2, #20 \n\t"                       \
-                                             \
-    /* Do right side */                      \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "mov r5, #0 \n\t"                        \
-    "ldr r6, [r0], #4 \n\t"                  \
-    "umaal  r5, r6, r4, r14 \n\t"            \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r7, [r0], #4 \n\t"                  \
-    "umaal  r6, r7, r4, r14 \n\t"            \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r8, [r0], #4 \n\t"                  \
-    "umaal  r7, r8, r4, r14 \n\t"            \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r9, [r0], #4 \n\t"                  \
-    "umaal  r8, r9, r4, r14 \n\t"            \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r10, [r0], #4 \n\t"                 \
-    "umaal  r9, r10, r4, r14 \n\t"           \
-    "sub r0, #20 \n\t"                       \
-                                             \
-    /* r4 = right high */                    \
-    "ldr r4, [r2], #4 \n\t"                  \
-                                             \
-    /* Do left side */                       \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "mov r12, #0 \n\t"                       \
-    "umaal  r12, r5, r4, r14 \n\t"           \
-    "str r12, [r0], #4 \n\t"                 \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r5, r6, r4, r14 \n\t"            \
-    "str r5, [r0], #4 \n\t"                  \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r6, r7, r4, r14 \n\t"            \
-    "str r6, [r0], #4 \n\t"                  \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r7, r8, r4, r14 \n\t"            \
-    "str r7, [r0], #4 \n\t"                  \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"            \
-    "str r8, [r0], #4 \n\t"                  \
-                                             \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r9, r10, r4, r14 \n\t"           \
-    "stmia r0!, {r9, r10} \n\t"
-
-#define FAST_MULT_ASM_6                  \
-    "ldmia  r2!, {r4, r5, r6} \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "umull  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "mov    r10, #0 \n\t"                \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "mov    r11, #0 \n\t"                \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "str    r9, [r0], #4 \n\t"           \
-    "str    r10, [r0], #4 \n\t"          \
-    "str    r11, [r0], #4 \n\t"          \
-                                         \
-    "sub r0, #24 \n\t"                   \
-    "sub r1, #24 \n\t"                   \
-    "ldmia  r2!, {r4, r5, r6} \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "mov    r9, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "mov    r10, #0 \n\t"                \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "mov    r11, #0 \n\t"                \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "str    r9, [r0], #4 \n\t"           \
-    "str    r10, [r0], #4 \n\t"          \
-    "str    r11, [r0], #4 \n\t"
-
-#define FAST_MULT_ASM_6_TO_7                 \
-    "cmp r3, #6 \n\t"                        \
-    "beq 1f \n\t"                            \
-                                             \
-    /* r4 = left high */                     \
-    "ldr r4, [r1] \n\t"                      \
-                                             \
-    "sub r0, #24 \n\t"                       \
-    "sub r1, #24 \n\t"                       \
-    "sub r2, #24 \n\t"                       \
-                                             \
-    /* Do right side */                      \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "mov r5, #0 \n\t"                        \
-    "ldr r6, [r0], #4 \n\t"                  \
-    "umaal  r5, r6, r4, r14 \n\t"            \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r7, [r0], #4 \n\t"                  \
-    "umaal  r6, r7, r4, r14 \n\t"            \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r8, [r0], #4 \n\t"                  \
-    "umaal  r7, r8, r4, r14 \n\t"            \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r9, [r0], #4 \n\t"                  \
-    "umaal  r8, r9, r4, r14 \n\t"            \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r10, [r0], #4 \n\t"                 \
-    "umaal  r9, r10, r4, r14 \n\t"           \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r11, [r0], #4 \n\t"                 \
-    "umaal  r10, r11, r4, r14 \n\t"          \
-    "sub r0, #24 \n\t"                       \
-                                             \
-    /* r4 = right high */                    \
-    "ldr r4, [r2], #4 \n\t"                  \
-                                             \
-    /* Do left side */                       \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "mov r12, #0 \n\t"                       \
-    "umaal  r12, r5, r4, r14 \n\t"           \
-    "str r12, [r0], #4 \n\t"                 \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r5, r6, r4, r14 \n\t"            \
-    "str r5, [r0], #4 \n\t"                  \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r6, r7, r4, r14 \n\t"            \
-    "str r6, [r0], #4 \n\t"                  \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r7, r8, r4, r14 \n\t"            \
-    "str r7, [r0], #4 \n\t"                  \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"            \
-    "str r8, [r0], #4 \n\t"                  \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r9, r10, r4, r14 \n\t"           \
-    "str r9, [r0], #4 \n\t"                  \
-                                             \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r10, r11, r4, r14 \n\t"          \
-    "stmia r0!, {r10, r11} \n\t"
-
-#define FAST_MULT_ASM_7                  \
-    "ldmia  r2!, {r4, r5, r6, r7} \n\t"  \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "umull  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "mov    r10, #0 \n\t"                \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "mov    r11, #0 \n\t"                \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "mov    r12, #0 \n\t"                \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "str    r9, [r0], #4 \n\t"           \
-    "str    r10, [r0], #4 \n\t"          \
-    "str    r11, [r0], #4 \n\t"          \
-    "str    r12, [r0], #4 \n\t"          \
-                                         \
-    "sub r0, #28 \n\t"                   \
-    "sub r1, #28 \n\t"                   \
-    "ldmia  r2!, {r4, r5, r6} \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "mov    r9, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "mov    r10, #0 \n\t"                \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "mov    r11, #0 \n\t"                \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-                                         \
-    "str    r9, [r0], #4 \n\t"           \
-    "str    r10, [r0], #4 \n\t"          \
-    "str    r11, [r0], #4 \n\t"
-
-#define FAST_MULT_ASM_7_TO_8                 \
-    "cmp r3, #7 \n\t"                        \
-    "beq 1f \n\t"                            \
-    "push {r3} \n\t"                         \
-                                             \
-    /* r4 = left high */                     \
-    "ldr r4, [r1] \n\t"                      \
-                                             \
-    "sub r0, #28 \n\t"                       \
-    "sub r1, #28 \n\t"                       \
-    "sub r2, #28 \n\t"                       \
-                                             \
-    /* Do right side */                      \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "mov r5, #0 \n\t"                        \
-    "ldr r6, [r0], #4 \n\t"                  \
-    "umaal  r5, r6, r4, r14 \n\t"            \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r7, [r0], #4 \n\t"                  \
-    "umaal  r6, r7, r4, r14 \n\t"            \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r8, [r0], #4 \n\t"                  \
-    "umaal  r7, r8, r4, r14 \n\t"            \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r9, [r0], #4 \n\t"                  \
-    "umaal  r8, r9, r4, r14 \n\t"            \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r10, [r0], #4 \n\t"                 \
-    "umaal  r9, r10, r4, r14 \n\t"           \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r11, [r0], #4 \n\t"                 \
-    "umaal  r10, r11, r4, r14 \n\t"          \
-    "ldr r14, [r2], #4 \n\t"                 \
-    "ldr r12, [r0], #4 \n\t"                 \
-    "umaal  r11, r12, r4, r14 \n\t"          \
-    "sub r0, #28 \n\t"                       \
-                                             \
-    /* r4 = right high */                    \
-    "ldr r4, [r2], #4 \n\t"                  \
-                                             \
-    /* Do left side */                       \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "mov r3, #0 \n\t"                        \
-    "umaal  r3, r5, r4, r14 \n\t"            \
-    "str r3, [r0], #4 \n\t"                  \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r5, r6, r4, r14 \n\t"            \
-    "str r5, [r0], #4 \n\t"                  \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r6, r7, r4, r14 \n\t"            \
-    "str r6, [r0], #4 \n\t"                  \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r7, r8, r4, r14 \n\t"            \
-    "str r7, [r0], #4 \n\t"                  \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"            \
-    "str r8, [r0], #4 \n\t"                  \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r9, r10, r4, r14 \n\t"           \
-    "str r9, [r0], #4 \n\t"                  \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r10, r11, r4, r14 \n\t"          \
-    "str r10, [r0], #4 \n\t"                 \
-                                             \
-    "ldr r14, [r1], #4 \n\t"                 \
-    "umaal  r11, r12, r4, r14 \n\t"          \
-    "stmia r0!, {r11, r12} \n\t"             \
-    "pop {r3} \n\t"
-
-#define FAST_MULT_ASM_8                  \
-    "ldmia  r2!, {r4, r5, r6, r7} \n\t"  \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "umull  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "mov    r10, #0 \n\t"                \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "mov    r11, #0 \n\t"                \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "mov    r12, #0 \n\t"                \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "mov    r8, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "str    r9, [r0], #4 \n\t"           \
-    "str    r10, [r0], #4 \n\t"          \
-    "str    r11, [r0], #4 \n\t"          \
-    "str    r12, [r0], #4 \n\t"          \
-                                         \
-    "sub r0, #32 \n\t"                   \
-    "sub r1, #32 \n\t"                   \
-    "ldmia  r2!, {r4, r5, r6, r7} \n\t"  \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "mov    r9, #0 \n\t"                 \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "mov    r10, #0 \n\t"                \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "mov    r11, #0 \n\t"                \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "mov    r12, #0 \n\t"                \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "ldr    r14, [r1], #4 \n\t"          \
-    "ldr    r8, [r0] \n\t"               \
-    "umaal  r8, r9, r4, r14 \n\t"        \
-    "str    r8, [r0], #4 \n\t"           \
-    "umaal  r9, r10, r5, r14 \n\t"       \
-    "umaal  r10, r11, r6, r14 \n\t"      \
-    "umaal  r11, r12, r7, r14 \n\t"      \
-                                         \
-    "str    r9, [r0], #4 \n\t"           \
-    "str    r10, [r0], #4 \n\t"          \
-    "str    r11, [r0], #4 \n\t"          \
-    "str    r12, [r0], #4 \n\t"
-
-#define FAST_SQUARE_ASM_5               \
-    "ldmia r1!, {r9,r10,r11,r12,r14} \n\t" \
-    "push {r1, r2} \n\t"                \
-                                        \
-    "umull r1, r2, r10, r9 \n\t"        \
-    "mov r3, #0 \n\t"                   \
-    "umaal r2, r3, r11, r9 \n\t"        \
-    "mov r4, #0 \n\t"                   \
-    "umaal r3, r4, r12, r9 \n\t"        \
-    "mov r5, #0 \n\t"                   \
-    "umaal r4, r5, r14, r9 \n\t"        \
-                                        \
-    "mov r6, #0 \n\t"                   \
-    "umaal r6, r3, r11, r10 \n\t"       \
-    "umaal r3, r4, r12, r10 \n\t"       \
-    "adds r1, r1, r1 \n\t"              \
-    "adcs r2, r2, r2 \n\t"              \
-    "adcs r6, r6, r6 \n\t"              \
-    "adcs r3, r3, r3 \n\t"              \
-                                        \
-    "umull r7, r8, r9, r9 \n\t"         \
-    /* Store carry in r9 */             \
-    "mov r9, #0 \n\t"                   \
-    "adc r9, r9, #0 \n\t"               \
-    "adds r8, r8, r1 \n\t"              \
-    "stmia r0!, {r7,r8} \n\t"           \
-                                        \
-    "umull r7, r8, r10, r10 \n\t"       \
-    "adcs r7, r7, r2 \n\t"              \
-    "adcs r8, r8, r6 \n\t"              \
-    "stmia r0!, {r7,r8} \n\t"           \
-                                        \
-    "umaal r4, r5, r14, r10 \n\t"       \
-    /* Store carry in r10 */            \
-    "mov r10, #0 \n\t"                  \
-    "adc r10, r10, #0 \n\t"             \
-                                        \
-    "mov r1, #0 \n\t"                   \
-    "umaal r1, r4, r12, r11 \n\t"       \
-    "umaal r4, r5, r14, r11 \n\t"       \
-                                        \
-    "mov r2, #0 \n\t"                   \
-    "umaal r2, r5, r14, r12 \n\t"       \
-    /* Load carry from r9 */            \
-    "lsrs r9, #1 \n\t"                  \
-    "adcs r1, r1, r1 \n\t"              \
-    "adcs r4, r4, r4 \n\t"              \
-    "adcs r2, r2, r2 \n\t"              \
-    "adcs r5, r5, r5 \n\t"              \
-    /* r9 is 0 now */                   \
-    "adc r9, r9, #0 \n\t"               \
-                                        \
-    /* Use carry from r10 */            \
-    "umaal r3, r10, r11, r11 \n\t"      \
-    "adds r10, r10, r1 \n\t"            \
-    "stmia r0!, {r3,r10} \n\t"          \
-                                        \
-    "umull r6, r10, r12, r12 \n\t"      \
-    "adcs r6, r6, r4 \n\t"              \
-    "adcs r10, r10, r2 \n\t"            \
-    "stmia r0!, {r6,r10} \n\t"          \
-                                        \
-    "umull r6, r10, r14, r14 \n\t"      \
-    "adcs r6, r6, r5 \n\t"              \
-    "adcs r10, r10, r9 \n\t"            \
-    "stmia r0!, {r6,r10} \n\t"          \
-    "pop {r1, r2} \n\t"
-
-#define FAST_SQUARE_ASM_5_TO_6           \
-    "cmp r2, #5 \n\t"                    \
-    "beq 1f \n\t"                        \
-                                         \
-    "sub r0, #20 \n\t"                   \
-    "sub r1, #20 \n\t"                   \
-                                         \
-    /* Do off-center multiplication */   \
-    "ldmia r1!, {r5,r6,r7,r8,r9,r14} \n\t" \
-    "umull r3, r4, r5, r14 \n\t"         \
-    "mov r5, #0 \n\t"                    \
-    "umaal r4, r5, r6, r14 \n\t"         \
-    "mov r6, #0 \n\t"                    \
-    "umaal r5, r6, r7, r14 \n\t"         \
-    "mov r7, #0 \n\t"                    \
-    "umaal r6, r7, r8, r14 \n\t"         \
-    "mov r8, #0 \n\t"                    \
-    "umaal r7, r8, r9, r14 \n\t"         \
-                                         \
-    /* Multiply by 2 */                  \
-    "mov r9, #0 \n\t"                    \
-    "adds r3, r3, r3 \n\t"               \
-    "adcs r4, r4, r4 \n\t"               \
-    "adcs r5, r5, r5 \n\t"               \
-    "adcs r6, r6, r6 \n\t"               \
-    "adcs r7, r7, r7 \n\t"               \
-    "adcs r8, r8, r8 \n\t"               \
-    "adcs r9, r9, #0 \n\t"               \
-                                         \
-    /* Add into previous */              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adds r3, r3, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r4, r4, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r5, r5, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r6, r6, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r7, r7, r12 \n\t"              \
-    "adcs r8, r8, #0 \n\t"               \
-    "adcs r9, r9, #0 \n\t"               \
-    "sub r0, #20 \n\t"                   \
-                                         \
-    /* Perform center multiplication */  \
-    "umlal r8, r9, r14, r14 \n\t"        \
-    "stmia r0!, {r3,r4,r5,r6,r7,r8,r9} \n\t"
-
-#define FAST_SQUARE_ASM_6               \
-    "ldmia r1!, {r8,r9,r10,r11,r12,r14} \n\t" \
-    "push {r1, r2} \n\t"                \
-                                        \
-    "umull r1, r2, r9, r8 \n\t"         \
-    "mov r3, #0 \n\t"                   \
-    "umaal r2, r3, r10, r8 \n\t"        \
-    "mov r4, #0 \n\t"                   \
-    "umaal r3, r4, r11, r8 \n\t"        \
-    "mov r5, #0 \n\t"                   \
-    "umaal r4, r5, r12, r8 \n\t"        \
-    "mov r6, #0 \n\t"                   \
-    "umaal r5, r6, r14, r8 \n\t"        \
-                                        \
-    "mov r7, #0 \n\t"                   \
-    "umaal r7, r3, r10, r9 \n\t"        \
-    "umaal r3, r4, r11, r9 \n\t"        \
-    "umaal r4, r5, r12, r9 \n\t"        \
-    "push {r4, r5} \n\t"                \
-    "adds r1, r1, r1 \n\t"              \
-    "adcs r2, r2, r2 \n\t"              \
-    "adcs r7, r7, r7 \n\t"              \
-    "adcs r3, r3, r3 \n\t"              \
-                                        \
-    "umull r4, r5, r8, r8 \n\t"         \
-    /* Store carry in r8 */             \
-    "mov r8, #0 \n\t"                   \
-    "adc r8, r8, #0 \n\t"               \
-    "adds r5, r5, r1 \n\t"              \
-    "stmia r0!, {r4,r5} \n\t"           \
-                                        \
-    "umull r4, r5, r9, r9 \n\t"         \
-    "adcs r4, r4, r2 \n\t"              \
-    "adcs r5, r5, r7 \n\t"              \
-    "stmia r0!, {r4,r5} \n\t"           \
-                                        \
-    "pop {r4, r5} \n\t"                 \
-    "umaal r5, r6, r14, r9 \n\t"        \
-    /* Store carry in r9 */             \
-    "mov r9, #0 \n\t"                   \
-    "adc r9, r9, #0 \n\t"               \
-                                        \
-    "mov r1, #0 \n\t"                   \
-    "umaal r1, r4, r11, r10 \n\t"       \
-    "umaal r4, r5, r12, r10 \n\t"       \
-    "umaal r5, r6, r14, r10 \n\t"       \
-                                        \
-    "mov r2, #0 \n\t"                   \
-    "umaal r2, r5, r12, r11 \n\t"       \
-    "umaal r5, r6, r14, r11 \n\t"       \
-                                        \
-    "mov r7, #0 \n\t"                   \
-    "umaal r7, r6, r14, r12 \n\t"       \
-                                        \
-    /* Load carry from r8 */            \
-    "lsrs r8, #1 \n\t"                  \
-    "adcs r1, r1, r1 \n\t"              \
-    "adcs r4, r4, r4 \n\t"              \
-    "adcs r2, r2, r2 \n\t"              \
-    "adcs r5, r5, r5 \n\t"              \
-    "adcs r7, r7, r7 \n\t"              \
-    "adcs r6, r6, r6 \n\t"              \
-    "adc r8, r8, #0 \n\t"               \
-                                        \
-    /* Use carry from r9 */             \
-    "umaal r3, r9, r10, r10 \n\t"       \
-    "adds r9, r9, r1 \n\t"              \
-    "stmia r0!, {r3,r9} \n\t"           \
-                                        \
-    "umull r9, r10, r11, r11 \n\t"      \
-    "adcs r9, r9, r4 \n\t"              \
-    "adcs r10, r10, r2 \n\t"            \
-    "stmia r0!, {r9,r10} \n\t"          \
-                                        \
-    "umull r9, r10, r12, r12 \n\t"      \
-    "adcs r9, r9, r5 \n\t"              \
-    "adcs r10, r10, r7 \n\t"            \
-    "stmia r0!, {r9,r10} \n\t"          \
-                                        \
-    "umull r9, r10, r14, r14 \n\t"      \
-    "adcs r9, r9, r6 \n\t"              \
-    "adcs r10, r10, r8 \n\t"            \
-    "stmia r0!, {r9,r10} \n\t"          \
-    "pop {r1, r2} \n\t"
-
-#define FAST_SQUARE_ASM_6_TO_7               \
-    "cmp r2, #6 \n\t"                        \
-    "beq 1f \n\t"                            \
-                                             \
-    "sub r0, #24 \n\t"                       \
-    "sub r1, #24 \n\t"                       \
-                                             \
-    /* Do off-center multiplication */       \
-    "ldmia r1!, {r5,r6,r7,r8,r9,r10,r14} \n\t" \
-    "umull r3, r4, r5, r14 \n\t"             \
-    "mov r5, #0 \n\t"                        \
-    "umaal r4, r5, r6, r14 \n\t"             \
-    "mov r6, #0 \n\t"                        \
-    "umaal r5, r6, r7, r14 \n\t"             \
-    "mov r7, #0 \n\t"                        \
-    "umaal r6, r7, r8, r14 \n\t"             \
-    "mov r8, #0 \n\t"                        \
-    "umaal r7, r8, r9, r14 \n\t"             \
-    "mov r9, #0 \n\t"                        \
-    "umaal r8, r9, r10, r14 \n\t"            \
-                                             \
-    /* Multiply by 2 */                      \
-    "mov r10, #0 \n\t"                       \
-    "adds r3, r3, r3 \n\t"                   \
-    "adcs r4, r4, r4 \n\t"                   \
-    "adcs r5, r5, r5 \n\t"                   \
-    "adcs r6, r6, r6 \n\t"                   \
-    "adcs r7, r7, r7 \n\t"                   \
-    "adcs r8, r8, r8 \n\t"                   \
-    "adcs r9, r9, r9 \n\t"                   \
-    "adcs r10, r10, #0 \n\t"                 \
-                                             \
-    /* Add into previous */                  \
-    "ldr r12, [r0], #4 \n\t"                 \
-    "adds r3, r3, r12 \n\t"                  \
-    "ldr r12, [r0], #4 \n\t"                 \
-    "adcs r4, r4, r12 \n\t"                  \
-    "ldr r12, [r0], #4 \n\t"                 \
-    "adcs r5, r5, r12 \n\t"                  \
-    "ldr r12, [r0], #4 \n\t"                 \
-    "adcs r6, r6, r12 \n\t"                  \
-    "ldr r12, [r0], #4 \n\t"                 \
-    "adcs r7, r7, r12 \n\t"                  \
-    "ldr r12, [r0], #4 \n\t"                 \
-    "adcs r8, r8, r12 \n\t"                  \
-    "adcs r9, r9, #0 \n\t"                   \
-    "adcs r10, r10, #0 \n\t"                 \
-    "sub r0, #24 \n\t"                       \
-                                             \
-    /* Perform center multiplication */      \
-    "umlal r9, r10, r14, r14 \n\t"           \
-    "stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10} \n\t"
-
-#define FAST_SQUARE_ASM_7               \
-    "ldmia r1!, {r9,r10,r11,r12} \n\t"  \
-    "push {r2} \n\t"                    \
-                                        \
-    "umull r14, r2, r10, r9 \n\t"       \
-    "mov r3, #0 \n\t"                   \
-    "umaal r2, r3, r11, r9 \n\t"        \
-    "mov r4, #0 \n\t"                   \
-    "umaal r3, r4, r12, r9 \n\t"        \
-                                        \
-    "mov r5, #0 \n\t"                   \
-    "umaal r5, r3, r11, r10 \n\t"       \
-    "adds r14, r14, r14 \n\t"           \
-    "adcs r2, r2, r2 \n\t"              \
-    "adcs r5, r5, r5 \n\t"              \
-    /* Store carry in r7 */             \
-    "mov r7, #0 \n\t"                   \
-    "adc r7, r7, #0 \n\t"               \
-                                        \
-    "umull r6, r8, r9, r9 \n\t"         \
-    "adds r8, r8, r14 \n\t"             \
-    "stmia r0!, {r6,r8} \n\t"           \
-                                        \
-    "umull r6, r8, r10, r10 \n\t"       \
-    "adcs r6, r6, r2 \n\t"              \
-    "adcs r8, r8, r5 \n\t"              \
-    "stmia r0!, {r6,r8} \n\t"           \
-    /* Store carry in r8 */             \
-    "mov r8, #0 \n\t"                   \
-    "adc r8, r8, #0 \n\t"               \
-                                        \
-    "ldmia r1!, {r2, r6, r14} \n\t"     \
-    "push {r1} \n\t"                    \
-    "umaal r3, r4, r2, r9 \n\t"         \
-    "mov r5, #0 \n\t"                   \
-    "umaal r4, r5, r6, r9 \n\t"         \
-    "mov r1, #0 \n\t"                   \
-    "umaal r5, r1, r14, r9 \n\t"        \
-                                        \
-    "mov r9, #0 \n\t"                   \
-    "umaal r3, r9, r12, r10 \n\t"       \
-    "umaal r9, r4, r2, r10 \n\t"        \
-    "umaal r4, r5, r6, r10 \n\t"        \
-    "umaal r5, r1, r14, r10 \n\t"       \
-                                        \
-    "mov r10, #0 \n\t"                  \
-    "umaal r10, r9, r12, r11 \n\t"      \
-    "umaal r9, r4, r2, r11 \n\t"        \
-    "umaal r4, r5, r6, r11 \n\t"        \
-    "umaal r5, r1, r14, r11 \n\t"       \
-                                        \
-    /* Load carry from r7 */            \
-    "lsrs r7, #1 \n\t"                  \
-    "adcs r3, r3, r3 \n\t"              \
-    "adcs r10, r10, r10 \n\t"           \
-    "adcs r9, r9, r9 \n\t"              \
-    /* Store carry back in r7 */        \
-    "adc r7, r7, #0 \n\t"               \
-                                        \
-    /* Use carry from r8 */             \
-    "umaal r3, r8, r11, r11 \n\t"       \
-    "adds r8, r8, r10 \n\t"             \
-    "stmia r0!, {r3,r8} \n\t"           \
-    /* Store carry back in r8 */        \
-    "mov r8, #0 \n\t"                   \
-    "adc r8, r8, #0 \n\t"               \
-                                        \
-    "mov r3, #0 \n\t"                   \
-    "umaal r3, r4, r2, r12 \n\t"        \
-    "umaal r4, r5, r6, r12 \n\t"        \
-    "umaal r5, r1, r14, r12 \n\t"       \
-                                        \
-    "mov r10, #0 \n\t"                  \
-    "umaal r10, r5, r6, r2 \n\t"        \
-    "umaal r5, r1, r14, r2 \n\t"        \
-                                        \
-    "mov r11, #0 \n\t"                  \
-    "umaal r11, r1, r14, r6 \n\t"       \
-                                        \
-    /* Load carry from r7 */            \
-    "lsrs r7, #1 \n\t"                  \
-    "adcs r3, r3, r3 \n\t"              \
-    "adcs r4, r4, r4 \n\t"              \
-    "adcs r10, r10, r10 \n\t"           \
-    "adcs r5, r5, r5 \n\t"              \
-    "adcs r11, r11, r11 \n\t"           \
-    "adcs r1, r1, r1 \n\t"              \
-    "adc r7, r7, #0 \n\t"               \
-                                        \
-    /* Use carry from r8 */             \
-    "umaal r8, r9, r12, r12 \n\t"       \
-    "adds r9, r9, r3 \n\t"              \
-    "stmia r0!, {r8,r9} \n\t"           \
-                                        \
-    "umull r8, r9, r2, r2 \n\t"         \
-    "adcs r8, r8, r4 \n\t"              \
-    "adcs r9, r9, r10 \n\t"             \
-    "stmia r0!, {r8,r9} \n\t"           \
-                                        \
-    "umull r8, r9, r6, r6 \n\t"         \
-    "adcs r8, r8, r5 \n\t"              \
-    "adcs r9, r9, r11 \n\t"             \
-    "stmia r0!, {r8,r9} \n\t"           \
-                                        \
-    "umull r8, r9, r14, r14 \n\t"       \
-    "adcs r8, r8, r1 \n\t"              \
-    "adcs r9, r9, r7 \n\t"              \
-    "stmia r0!, {r8,r9} \n\t"           \
-    "pop {r1, r2} \n\t"
-
-#define FAST_SQUARE_ASM_7_TO_8           \
-    "cmp r2, #7 \n\t"                    \
-    "beq 1f \n\t"                        \
-                                         \
-    "sub r0, #28 \n\t"                   \
-    "sub r1, #28 \n\t"                   \
-                                         \
-    /* Do off-center multiplication */   \
-    "ldmia r1!, {r5,r6,r7,r8,r9,r10,r11,r14} \n\t" \
-    "umull r3, r4, r5, r14 \n\t"         \
-    "mov r5, #0 \n\t"                    \
-    "umaal r4, r5, r6, r14 \n\t"         \
-    "mov r6, #0 \n\t"                    \
-    "umaal r5, r6, r7, r14 \n\t"         \
-    "mov r7, #0 \n\t"                    \
-    "umaal r6, r7, r8, r14 \n\t"         \
-    "mov r8, #0 \n\t"                    \
-    "umaal r7, r8, r9, r14 \n\t"         \
-    "mov r9, #0 \n\t"                    \
-    "umaal r8, r9, r10, r14 \n\t"        \
-    "mov r10, #0 \n\t"                   \
-    "umaal r9, r10, r11, r14 \n\t"       \
-                                         \
-    /* Multiply by 2 */                  \
-    "mov r11, #0 \n\t"                   \
-    "adds r3, r3, r3 \n\t"               \
-    "adcs r4, r4, r4 \n\t"               \
-    "adcs r5, r5, r5 \n\t"               \
-    "adcs r6, r6, r6 \n\t"               \
-    "adcs r7, r7, r7 \n\t"               \
-    "adcs r8, r8, r8 \n\t"               \
-    "adcs r9, r9, r9 \n\t"               \
-    "adcs r10, r10, r10 \n\t"            \
-    "adcs r11, r11, #0 \n\t"             \
-                                         \
-    /* Add into previous */              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adds r3, r3, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r4, r4, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r5, r5, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r6, r6, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r7, r7, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r8, r8, r12 \n\t"              \
-    "ldr r12, [r0], #4 \n\t"             \
-    "adcs r9, r9, r12 \n\t"              \
-    "adcs r10, r10, #0 \n\t"             \
-    "adcs r11, r11, #0 \n\t"             \
-    "sub r0, #28 \n\t"                   \
-                                         \
-    /* Perform center multiplication */  \
-    "umlal r10, r11, r14, r14 \n\t"      \
-    "stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10,r11} \n\t"
-
-#define FAST_SQUARE_ASM_8               \
-    "ldmia r1!, {r10,r11,r12,r14} \n\t" \
-    "push {r2} \n\t"                    \
-                                        \
-    "umull r2, r3, r11, r10 \n\t"       \
-    "mov r4, #0 \n\t"                   \
-    "umaal r3, r4, r12, r10 \n\t"       \
-    "mov r5, #0 \n\t"                   \
-    "umaal r4, r5, r14, r10 \n\t"       \
-                                        \
-    "mov r6, #0 \n\t"                   \
-    "umaal r6, r4, r12, r11 \n\t"       \
-    "adds r2, r2, r2 \n\t"              \
-    "adcs r3, r3, r3 \n\t"              \
-    "adcs r6, r6, r6 \n\t"              \
-    /* Store carry in r7 */             \
-    "mov r7, #0 \n\t"                   \
-    "adc r7, r7, #0 \n\t"               \
-                                        \
-    "umull r8, r9, r10, r10 \n\t"       \
-    "adds r9, r9, r2 \n\t"              \
-    "stmia r0!, {r8,r9} \n\t"           \
-                                        \
-    "umull r8, r9, r11, r11 \n\t"       \
-    "adcs r8, r8, r3 \n\t"              \
-    "adcs r9, r9, r6 \n\t"              \
-    "stmia r0!, {r8,r9} \n\t"           \
-    /* Store carry in r8 */             \
-    "mov r8, #0 \n\t"                   \
-    "adc r8, r8, #0 \n\t"               \
-                                        \
-    "ldmia r1!, {r2, r3} \n\t"          \
-    "push {r1} \n\t"                    \
-    "umaal r4, r5, r2, r10 \n\t"        \
-    "mov r6, #0 \n\t"                   \
-    "umaal r5, r6, r3, r10 \n\t"        \
-                                        \
-    "mov r9, #0 \n\t"                   \
-    "umaal r9, r4, r14, r11 \n\t"       \
-    "umaal r4, r5, r2, r11 \n\t"        \
-                                        \
-    "mov r1, #0 \n\t"                   \
-    "umaal r1, r4, r14, r12 \n\t"       \
-                                        \
-    /* Load carry from r7 */            \
-    "lsrs r7, #1 \n\t"                  \
-    "adcs r9, r9, r9 \n\t"              \
-    "adcs r1, r1, r1 \n\t"              \
-    /* Store carry back in r7 */        \
-    "adc r7, r7, #0 \n\t"               \
-                                        \
-    /* Use carry from r8 */             \
-    "umaal r8, r9, r12, r12 \n\t"       \
-    "adds r9, r9, r1  \n\t"             \
-    "stmia r0!, {r8,r9} \n\t"           \
-    /* Store carry back in r8 */        \
-    "mov r8, #0 \n\t"                   \
-    "adc r8, r8, #0 \n\t"               \
-                                        \
-    "pop {r1} \n\t"                     \
-    /* TODO could fix up r1 value on stack here */      \
-    /* and leave the value on the stack (rather */      \
-    /* than popping) if supporting curves > 256 bits */ \
-    "ldr r9, [r1], #4 \n\t"             \
-    "ldr r1, [r1] \n\t"                 \
-                                        \
-    "push {r7} \n\t"                    \
-    "umaal r5, r6, r9, r10 \n\t"        \
-    "mov r7, #0 \n\t"                   \
-    "umaal r6, r7, r1, r10 \n\t"        \
-    /* Carry now stored in r10 */       \
-    "pop {r10} \n\t"                    \
-                                        \
-    "umaal r4, r5, r3, r11 \n\t"        \
-    "umaal r5, r6, r9, r11 \n\t"        \
-    "umaal r6, r7, r1, r11 \n\t"        \
-                                        \
-    "mov r11, #0 \n\t"                  \
-    "umaal r11, r4, r2, r12 \n\t"       \
-    "umaal r4, r5, r3, r12 \n\t"        \
-    "umaal r5, r6, r9, r12 \n\t"        \
-    "umaal r6, r7, r1, r12 \n\t"        \
-                                        \
-    "mov r12, #0 \n\t"                  \
-    "umaal r12, r4, r2, r14 \n\t"       \
-    "umaal r4, r5, r3, r14 \n\t"        \
-    "umaal r5, r6, r9, r14 \n\t"        \
-    "umaal r6, r7, r1, r14 \n\t"        \
-                                        \
-    /* Load carry from r10 */           \
-    "lsrs r10, #1 \n\t"                 \
-    "adcs r11, r11, r11 \n\t"           \
-    "adcs r12, r12, r12 \n\t"           \
-    "adc r10, r10, #0 \n\t"             \
-                                        \
-    /* Use carry from r8 */             \
-    "umaal r8, r11, r14, r14 \n\t"      \
-    "adds r11, r11, r12 \n\t"           \
-    "stmia r0!, {r8,r11} \n\t"          \
-    /* Store carry back in r8 */        \
-    "mov r8, #0 \n\t"                   \
-    "adc r8, r8, #0 \n\t"               \
-                                        \
-    "mov r11, #0 \n\t"                  \
-    "umaal r11, r5, r3, r2 \n\t"        \
-    "umaal r5, r6, r9, r2 \n\t"         \
-    "umaal r6, r7, r1, r2 \n\t"         \
-                                        \
-    "mov r12, #0 \n\t"                  \
-    "umaal r12, r6, r9, r3 \n\t"        \
-    "umaal r6, r7, r1, r3 \n\t"         \
-                                        \
-    "mov r14, #0 \n\t"                  \
-    "umaal r14, r7, r1, r9 \n\t"        \
-                                        \
-    /* Load carry from r10 */           \
-    "lsrs r10, #1 \n\t"                 \
-    "adcs r4, r4, r4 \n\t"              \
-    "adcs r11, r11, r11 \n\t"           \
-    "adcs r5, r5, r5 \n\t"              \
-    "adcs r12, r12, r12 \n\t"           \
-    "adcs r6, r6, r6 \n\t"              \
-    "adcs r14, r14, r14 \n\t"           \
-    "adcs r7, r7, r7 \n\t"              \
-    "adc r10, r10, #0 \n\t"             \
-                                        \
-    /* Use carry from r8 */             \
-    "umaal r4, r8, r2, r2 \n\t"         \
-    "adds r8, r8, r11 \n\t"             \
-    "stmia r0!, {r4,r8} \n\t"           \
-                                        \
-    "umull r4, r8, r3, r3 \n\t"         \
-    "adcs r4, r4, r5 \n\t"              \
-    "adcs r8, r8, r12 \n\t"             \
-    "stmia r0!, {r4,r8} \n\t"           \
-                                        \
-    "umull r4, r8, r9, r9 \n\t"         \
-    "adcs r4, r4, r6 \n\t"              \
-    "adcs r8, r8, r14 \n\t"             \
-    "stmia r0!, {r4,r8} \n\t"           \
-                                        \
-    "umull r4, r8, r1, r1 \n\t"         \
-    "adcs r4, r4, r7 \n\t"              \
-    "adcs r8, r8, r10 \n\t"             \
-    "stmia r0!, {r4,r8} \n\t"           \
-    /* TODO pop {r1, r2} if supporting curves > 256 bits */ \
-    "pop {r2} \n\t"
-
-#endif /* _UECC_ASM_ARM_MULT_SQUARE_H_ */

+ 0 - 1089
components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_avr.inc

@@ -1,1089 +0,0 @@
-/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#ifndef _UECC_ASM_AVR_H_
-#define _UECC_ASM_AVR_H_
-
-#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
-    #define uECC_MIN_WORDS 32
-#endif
-#if uECC_SUPPORTS_secp224r1
-    #undef uECC_MIN_WORDS
-    #define uECC_MIN_WORDS 28
-#endif
-#if uECC_SUPPORTS_secp192r1
-    #undef uECC_MIN_WORDS
-    #define uECC_MIN_WORDS 24
-#endif
-#if uECC_SUPPORTS_secp160r1
-    #undef uECC_MIN_WORDS
-    #define uECC_MIN_WORDS 20
-#endif
-
-#if __AVR_HAVE_EIJMP_EICALL__
-    #define IJMP "eijmp \n\t"
-#else
-    #define IJMP "ijmp \n\t"
-#endif
-
-#if (uECC_OPTIMIZATION_LEVEL >= 2)
-
-uECC_VLI_API void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words) {
-    volatile uECC_word_t *v = vli;
-    __asm__ volatile (
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-        "ldi r30, pm_lo8(1f) \n\t"
-        "ldi r31, pm_hi8(1f) \n\t"
-        "sub r30, %[num] \n\t"
-        "sbc r31, __zero_reg__ \n\t"
-        IJMP
-    #endif
-        
-        REPEAT(uECC_MAX_WORDS, "st x+, __zero_reg__ \n\t")
-        "1: \n\t"
-        : "+x" (v)
-        : [num] "r" (num_words)
-        : 
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-          "r30", "r31", "cc"
-    #endif
-    );
-}
-#define asm_clear 1
-
-uECC_VLI_API void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordcount_t num_words) {
-    volatile uECC_word_t *d = dest;
-    __asm__ volatile (
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-        "ldi r30, pm_lo8(1f) \n\t"
-        "ldi r31, pm_hi8(1f) \n\t"
-        "sub r30, %[num] \n\t"
-        "sbc r31, __zero_reg__ \n\t"
-        IJMP
-    #endif
-        
-        REPEAT(uECC_MAX_WORDS,
-            "ld r0, y+ \n\t"
-            "st x+, r0 \n\t")
-        "1: \n\t"
-        : "+x" (d), "+y" (src)
-        : [num] "r" ((uint8_t)(num_words * 2))
-        : "r0",
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-          "r30", "r31", "cc"
-    #endif
-    );
-}
-#define asm_set 1
-
-uECC_VLI_API void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words) {
-    volatile uECC_word_t *v = vli;
-    __asm__ volatile (
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-        "ldi r30, pm_lo8(1f) \n\t"
-        "ldi r31, pm_hi8(1f) \n\t"
-        "sub r30, %[jump] \n\t"
-        "sbc r31, __zero_reg__ \n\t"
-    #endif
-        
-        "add r26, %[num] \n\t"
-        "adc r27, __zero_reg__ \n\t"
-        "ld r0, -x \n\t"
-        "lsr r0 \n\t"
-        "st x, r0 \n\t"
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-        IJMP
-    #endif
-        
-        REPEAT(DEC(uECC_MAX_WORDS),
-            "ld r0, -x \n\t"
-            "ror r0 \n\t"
-            "st x, r0 \n\t")
-        "1: \n\t"
-        : "+x" (v)
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-        : [num] "r" (num_words), [jump] "r" ((uint8_t)(3 * (num_words - 1)))
-        : "r0", "r30", "r31", "cc"
-    #else
-        : [num] "r" (num_words)
-        : "r0", "cc"
-    #endif
-    );
-}
-#define asm_rshift1 1
-
-#define ADD_RJPM_TABLE(N)       \
-    "movw r30, %A[result] \n\t" \
-    "rjmp add_%=_" #N " \n\t"
-
-#define ADD_RJPM_DEST(N)     \
-    "add_%=_" #N ":"         \
-    "ld %[clb], x+ \n\t"     \
-    "ld %[rb], y+ \n\t"      \
-    "adc %[clb], %[rb] \n\t" \
-    "st z+, %[clb] \n\t"
-
-uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
-                                      const uECC_word_t *left,
-                                      const uECC_word_t *right,
-                                      wordcount_t num_words) {
-    volatile uECC_word_t *r = result;
-    uint8_t carry;
-    uint8_t right_byte;
-
-    __asm__ volatile (
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-        "ldi r30, pm_lo8(add_%=_" STR(uECC_MAX_WORDS) ") \n\t"
-        "ldi r31, pm_hi8(add_%=_" STR(uECC_MAX_WORDS) ") \n\t"
-        "sub r30, %[num] \n\t"
-        "sbc r31, __zero_reg__ \n\t"
-    #endif
-    
-        "clc \n\t"
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-        IJMP
-        REPEATM(uECC_MAX_WORDS, ADD_RJPM_TABLE)
-    #endif
-    
-        REPEATM(uECC_MAX_WORDS, ADD_RJPM_DEST)
-        
-        "mov %[clb], __zero_reg__ \n\t"
-        "adc %[clb], %[clb] \n\t" /* Store carry bit. */
-
-        : "+x" (left), "+y" (right),
-          [clb] "=&r" (carry), [rb] "=&r" (right_byte)
-        : [result] "r" (r), [num] "r" ((uint8_t)(num_words * 2))
-        : "r30", "r31", "cc"
-    );
-    return carry;
-}
-#define asm_add 1
-
-#define SUB_RJPM_TABLE(N)       \
-    "movw r30, %A[result] \n\t" \
-    "rjmp sub_%=_" #N " \n\t"
-
-#define SUB_RJPM_DEST(N)     \
-    "sub_%=_" #N ":"         \
-    "ld %[clb], x+ \n\t"     \
-    "ld %[rb], y+ \n\t"      \
-    "sbc %[clb], %[rb] \n\t" \
-    "st z+, %[clb] \n\t"
-
-uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
-                                      const uECC_word_t *left,
-                                      const uECC_word_t *right,
-                                      wordcount_t num_words) {
-    volatile uECC_word_t *r = result;
-    uint8_t carry;
-    uint8_t right_byte;
-
-    __asm__ volatile (
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-        "ldi r30, pm_lo8(sub_%=_" STR(uECC_MAX_WORDS) ") \n\t"
-        "ldi r31, pm_hi8(sub_%=_" STR(uECC_MAX_WORDS) ") \n\t"
-        "sub r30, %[num] \n\t"
-        "sbc r31, __zero_reg__ \n\t"
-    #endif
-    
-        "clc \n\t"
-    #if (uECC_MAX_WORDS != uECC_MIN_WORDS)
-        IJMP
-        REPEATM(uECC_MAX_WORDS, SUB_RJPM_TABLE)
-    #endif
-    
-        REPEATM(uECC_MAX_WORDS, SUB_RJPM_DEST)
-        
-        "mov %[clb], __zero_reg__ \n\t"
-        "adc %[clb], %[clb] \n\t" /* Store carry bit. */
-
-        : "+x" (left), "+y" (right),
-          [clb] "=&r" (carry), [rb] "=&r" (right_byte)
-        : [result] "r" (r), [num] "r" ((uint8_t)(num_words * 2))
-        : "r30", "r31", "cc"
-    );
-    return carry;
-}
-#define asm_sub 1
-
-#if (uECC_OPTIMIZATION_LEVEL >= 3)
-
-#include "asm_avr_mult_square.inc"
-
-__attribute((noinline))
-uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
-                                const uECC_word_t *left,
-                                const uECC_word_t *right,
-                                wordcount_t num_words) {
-    /* num_words should already be in r18. */
-    register wordcount_t r18 __asm__("r18") = num_words;
-    
-    __asm__ volatile (
-        "push r18 \n\t"
-#if (uECC_MIN_WORDS == 20)
-        FAST_MULT_ASM_20
-        "pop r18 \n\t"
-    #if (uECC_MAX_WORDS > 20)
-        FAST_MULT_ASM_20_TO_24
-    #endif
-    #if (uECC_MAX_WORDS > 24)
-        FAST_MULT_ASM_24_TO_28
-    #endif
-    #if (uECC_MAX_WORDS > 28)
-        FAST_MULT_ASM_28_TO_32
-    #endif
-#elif (uECC_MIN_WORDS == 24)
-        FAST_MULT_ASM_24
-        "pop r18 \n\t"
-    #if (uECC_MAX_WORDS > 24)
-        FAST_MULT_ASM_24_TO_28
-    #endif
-    #if (uECC_MAX_WORDS > 28)
-        FAST_MULT_ASM_28_TO_32
-    #endif
-#elif (uECC_MIN_WORDS == 28)
-        FAST_MULT_ASM_28
-        "pop r18 \n\t"
-    #if (uECC_MAX_WORDS > 28)
-        FAST_MULT_ASM_28_TO_32
-    #endif
-#elif (uECC_MIN_WORDS == 32)
-        FAST_MULT_ASM_32
-        "pop r18 \n\t"
-#endif
-        "2: \n\t"
-        "eor r1, r1 \n\t"
-        : "+x" (left), "+y" (right), "+z" (result)
-        : "r" (r18)
-        : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
-          "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r19", "r20",
-          "r21", "r22", "r23", "r24", "r25", "cc"
-    );
-}
-#define asm_mult 1
-
-#if uECC_SQUARE_FUNC
-__attribute((noinline))
-uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
-                                  const uECC_word_t *left,
-                                  wordcount_t num_words) {
-    /* num_words should already be in r20. */
-    register wordcount_t r20 __asm__("r20") = num_words;
-    
-    __asm__ volatile (
-        "push r20 \n\t"
-#if (uECC_MIN_WORDS == 20)
-        FAST_SQUARE_ASM_20
-        "pop r20 \n\t"
-    #if (uECC_MAX_WORDS > 20)
-        FAST_SQUARE_ASM_20_TO_24
-    #endif
-    #if (uECC_MAX_WORDS > 24)
-        FAST_SQUARE_ASM_24_TO_28
-    #endif
-    #if (uECC_MAX_WORDS > 28)
-        FAST_SQUARE_ASM_28_TO_32
-    #endif
-#elif (uECC_MIN_WORDS == 24)
-        FAST_SQUARE_ASM_24
-        "pop r20 \n\t"
-    #if (uECC_MAX_WORDS > 24)
-        FAST_SQUARE_ASM_24_TO_28
-    #endif
-    #if (uECC_MAX_WORDS > 28)
-        FAST_SQUARE_ASM_28_TO_32
-    #endif
-#elif (uECC_MIN_WORDS == 28)
-        FAST_SQUARE_ASM_28
-        "pop r20 \n\t"
-    #if (uECC_MAX_WORDS > 28)
-        FAST_SQUARE_ASM_28_TO_32
-    #endif
-#elif (uECC_MIN_WORDS == 32)
-        FAST_SQUARE_ASM_32
-        "pop r20 \n\t"
-#endif
-        "2: \n\t"
-        "eor r1, r1 \n\t"
-        : "+x" (left), "+z" (result)
-        : "r" (r20)
-        : "r0", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "r10",
-          "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19",
-          "r21", "r22", "r23", "r24", "r25", "r28", "r29", "cc"
-    );
-}
-#define asm_square 1
-#endif /* uECC_SQUARE_FUNC */
-
-#endif /* (uECC_OPTIMIZATION_LEVEL >= 3) */
-
-#if uECC_SUPPORTS_secp160r1
-static const struct uECC_Curve_t curve_secp160r1;
-static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) {
-    uint8_t carry = 0;
-    __asm__ volatile (
-        "in r30, __SP_L__ \n\t"
-    	"in r31, __SP_H__ \n\t"
-    	"sbiw r30, 24 \n\t"
-    	"in r0, __SREG__ \n\t"
-    	"cli \n\t"
-    	"out __SP_H__, r31 \n\t"
-    	"out __SREG__, r0 \n\t"
-    	"out __SP_L__, r30 \n\t"
-    	
-    	"adiw r30, 25 \n\t" /* we are shifting by 31 bits, so shift over 4 bytes
-    	                       (+ 1 since z initially points below the stack) */
-        "adiw r26, 40 \n\t" /* end of product */
-        "ld r18, -x \n\t"   /* Load word. */
-        "lsr r18 \n\t"      /* Shift. */
-        "st -z, r18 \n\t"   /* Store the first result word. */
-
-        /* Now we just do the remaining words with the carry bit (using ROR) */
-        REPEAT(19,
-            "ld r18, -x \n\t"
-            "ror r18 \n\t"
-            "st -z, r18 \n\t")
-
-        "eor r18, r18 \n\t" /* r18 = 0 */
-        "ror r18 \n\t"      /* get last bit */
-        "st -z, r18 \n\t"   /* store it */
-
-        "sbiw r30, 3 \n\t" /* move z back to point at tmp */
-        /* now we add right */
-        "ld r18, x+ \n\t"
-        "st z+, r18 \n\t" /* the first 3 bytes do not need to be added */
-        "ld r18, x+ \n\t"
-        "st z+, r18 \n\t"
-        "ld r18, x+ \n\t"
-        "st z+, r18 \n\t"
-
-        "ld r18, x+ \n\t"
-        "ld r19, z \n\t"
-        "add r18, r19 \n\t"
-        "st z+, r18 \n\t"
-
-        /* Now we just do the remaining words with the carry bit (using ADC) */
-        REPEAT(16,
-            "ld r18, x+ \n\t"
-            "ld r19, z \n\t"
-            "adc r18, r19 \n\t"
-            "st z+, r18 \n\t")
-
-        /* Propagate over the remaining bytes of result */
-        "ld r18, z \n\t"
-        "adc r18, r1 \n\t"
-        "st z+, r18 \n\t"
-
-        "ld r18, z \n\t"
-        "adc r18, r1 \n\t"
-        "st z+, r18 \n\t"
-
-        "ld r18, z \n\t"
-        "adc r18, r1 \n\t"
-        "st z+, r18 \n\t"
-
-        "ld r18, z \n\t"
-        "adc r18, r1 \n\t"
-        "st z+, r18 \n\t"
-        
-        "sbiw r30, 24 \n\t" /* move z back to point at tmp */
-        "sbiw r26, 40 \n\t" /* move x back to point at product */
-        
-        /* add low bytes of tmp to product, storing in result */
-        "ld r18, z+ \n\t"
-        "ld r19, x+ \n\t"
-        "add r18, r19 \n\t"
-        "st y+, r18 \n\t"
-        REPEAT(19,
-            "ld r18, z+ \n\t"
-            "ld r19, x+ \n\t"
-            "adc r18, r19 \n\t"
-            "st y+, r18 \n\t")
-        "adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
-        /* at this point x is at the end of product, y is at the end of result,
-           z is 20 bytes into tmp */
-        "sbiw r28, 20 \n\t" /* move y back to point at result */
-        "adiw r30, 4 \n\t"  /* move z to point to the end of tmp */
-        
-        /* do omega_mult again with the 4 relevant bytes */
-        /* z points to the end of tmp, x points to the end of product */
-        "ld r18, -z \n\t" /* Load word. */
-        "lsr r18 \n\t"    /* Shift. */
-        "st -x, r18 \n\t" /* Store the first result word. */
-        
-        "ld r18, -z \n\t"
-        "ror r18 \n\t"
-        "st -x, r18 \n\t"
-        "ld r18, -z \n\t"
-        "ror r18 \n\t"
-        "st -x, r18 \n\t"
-        "ld r18, -z \n\t"
-        "ror r18 \n\t"
-        "st -x, r18 \n\t"
-        
-        "eor r18, r18 \n\t" /* r18 = 0 */
-        "ror r18 \n\t"      /* get last bit */
-        "st -x, r18 \n\t"   /* store it */
-        
-        "sbiw r26, 3 \n\t" /* move x back to point at beginning */
-        /* now we add a copy of the 4 bytes */
-        "ld r18, z+ \n\t"
-        "st x+, r18 \n\t" /* the first 3 bytes do not need to be added */
-        "ld r18, z+ \n\t"
-        "st x+, r18 \n\t"
-        "ld r18, z+ \n\t"
-        "st x+, r18 \n\t"
-        
-        "ld r18, z+ \n\t"
-        "ld r19, x \n\t"
-        "add r18, r19 \n\t"
-        "st x+, r18 \n\t"
-        
-        /* Propagate over the remaining bytes */
-        "ld r18, x \n\t"
-        "adc r18, r1 \n\t"
-        "st x+, r18 \n\t"
-        
-        "ld r18, x \n\t"
-        "adc r18, r1 \n\t"
-        "st x+, r18 \n\t"
-        
-        "ld r18, x \n\t"
-        "adc r18, r1 \n\t"
-        "st x+, r18 \n\t"
-        
-        "ld r18, x \n\t"
-        "adc r18, r1 \n\t"
-        "st x+, r18 \n\t"
-        
-        /* now z points to the end of tmp, x points to the end of product
-           (y still points at result) */
-        "sbiw r26, 8 \n\t" /* move x back to point at beginning of actual data */
-        /* add into result */
-        "ld r18, x+ \n\t"
-        "ld r19, y \n\t"
-        "add r18, r19 \n\t"
-        "st y+, r18 \n\t"
-        REPEAT(7,
-            "ld r18, x+ \n\t"
-            "ld r19, y \n\t"
-            "adc r18, r19 \n\t"
-            "st y+, r18 \n\t")
-        
-        /* Done adding, now propagate carry bit */
-        REPEAT(12,
-            "ld r18, y \n\t"
-            "adc r18, __zero_reg__ \n\t"
-            "st y+, r18 \n\t")
-        
-        "adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
-        "sbiw r28, 20 \n\t" /* move y back to point at result */
-        
-        "sbiw r30, 1 \n\t" /* fix stack pointer */
-    	"in r0, __SREG__ \n\t"
-    	"cli \n\t"
-    	"out __SP_H__, r31 \n\t"
-    	"out __SREG__, r0 \n\t"
-    	"out __SP_L__, r30 \n\t"
-        
-        : "+x" (product), [carry] "+r" (carry)
-        : "y" (result)
-        : "r0", "r18", "r19", "r30", "r31", "cc"
-    );
-
-    if (carry > 0) {
-        --carry;
-        uECC_vli_sub(result, result, curve_secp160r1.p, 20);
-    }
-    if (carry > 0) {
-        uECC_vli_sub(result, result, curve_secp160r1.p, 20);
-    }
-    if (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, 20) > 0) {
-        uECC_vli_sub(result, result, curve_secp160r1.p, 20);
-    }
-}
-#define asm_mmod_fast_secp160r1 1
-#endif /* uECC_SUPPORTS_secp160r1 */
-
-#if uECC_SUPPORTS_secp256k1
-static const struct uECC_Curve_t curve_secp256k1;
-static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product) {
-    uint8_t carry = 0;
-    __asm__ volatile (
-        "in r30, __SP_L__ \n\t"
-    	"in r31, __SP_H__ \n\t"
-    	"sbiw r30, 37 \n\t"
-    	"in r0, __SREG__ \n\t"
-    	"cli \n\t"
-    	"out __SP_H__, r31 \n\t"
-    	"out __SREG__, r0 \n\t"
-    	"out __SP_L__, r30 \n\t"
-    	
-    	"adiw r30, 1 \n\t"  /* add 1 since z initially points below the stack */
-        "adiw r26, 32 \n\t" /* product + uECC_WORDS */
-        "ldi r25, 0x03 \n\t"
-        "ldi r24, 0xD1 \n\t"
-        "ld r18, x+ \n\t"
-        "ld r19, x+ \n\t"
-        "ld r20, x+ \n\t"
-        "ld r21, x+ \n\t"
-        
-        "mul r24, r18 \n\t"
-        "st z+, r0 \n\t"
-        "mov r22, r1 \n\t"
-        "ldi r23, 0 \n\t"
-        
-        "mul r24, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t" /* can't overflow */
-        "mul r25, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t" /* can't overflow */
-        "st z+, r22 \n\t"
-        "ldi r22, 0 \n\t"
-        
-        "mul r24, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "mul r25, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "st z+, r23 \n\t"
-        "ldi r23, 0 \n\t"
-        
-        "mul r24, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r25, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "st z+, r22 \n\t"
-        "ldi r22, 0 \n\t"
-        
-        /* now we start adding the 2^32 part as well */
-        "add r23, r18 \n\t" // 28
-        "adc r22, r22 \n\t"
-        "ld r18, x+ \n\t"
-        "mul r24, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "mul r25, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "st z+, r23 \n\t"
-        "ldi r23, 0 \n\t"
-        
-        "add r22, r19 \n\t" // 27
-        "adc r23, r23 \n\t"
-        "ld r19, x+ \n\t"
-        "mul r24, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r25, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "st z+, r22 \n\t"
-        "ldi r22, 0 \n\t"
-        
-        REPEAT(6, // 26 - 3
-            "add r23, r20 \n\t"
-            "adc r22, r22 \n\t"
-            "ld r20, x+ \n\t"
-            "mul r24, r20 \n\t"
-            "add r23, r0 \n\t"
-            "adc r22, r1 \n\t"
-            "mul r25, r19 \n\t"
-            "add r23, r0 \n\t"
-            "adc r22, r1 \n\t"
-            "st z+, r23 \n\t"
-            "ldi r23, 0 \n\t"
-            
-            "add r22, r21 \n\t"
-            "adc r23, r23 \n\t"
-            "ld r21, x+ \n\t"
-            "mul r24, r21 \n\t"
-            "add r22, r0 \n\t"
-            "adc r23, r1 \n\t"
-            "mul r25, r20 \n\t"
-            "add r22, r0 \n\t"
-            "adc r23, r1 \n\t"
-            "st z+, r22 \n\t"
-            "ldi r22, 0 \n\t"
-            
-            "add r23, r18 \n\t"
-            "adc r22, r22 \n\t"
-            "ld r18, x+ \n\t"
-            "mul r24, r18 \n\t"
-            "add r23, r0 \n\t"
-            "adc r22, r1 \n\t"
-            "mul r25, r21 \n\t"
-            "add r23, r0 \n\t"
-            "adc r22, r1 \n\t"
-            "st z+, r23 \n\t"
-            "ldi r23, 0 \n\t"
-            
-            "add r22, r19 \n\t"
-            "adc r23, r23 \n\t"
-            "ld r19, x+ \n\t"
-            "mul r24, r19 \n\t"
-            "add r22, r0 \n\t"
-            "adc r23, r1 \n\t"
-            "mul r25, r18 \n\t"
-            "add r22, r0 \n\t"
-            "adc r23, r1 \n\t"
-            "st z+, r22 \n\t"
-            "ldi r22, 0 \n\t")
-
-        "add r23, r20 \n\t" // 2
-        "adc r22, r22 \n\t"
-        "ld r20, x+ \n\t"
-        "mul r24, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "mul r25, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "st z+, r23 \n\t"
-        "ldi r23, 0 \n\t"
-        
-        "add r22, r21 \n\t" // 1
-        "adc r23, r23 \n\t"
-        "ld r21, x+ \n\t"
-        "mul r24, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r25, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "st z+, r22 \n\t"
-        "ldi r22, 0 \n\t"
-        
-        /* Now finish the carries etc */
-        "add r23, r18 \n\t"
-        "adc r22, r22 \n\t"
-        "mul r25, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "st z+, r23 \n\t"
-        "ldi r23, 0 \n\t"
-        
-        "add r22, r19 \n\t"
-        "adc r23, r23 \n\t"
-        "st z+, r22 \n\t"
-        "ldi r22, 0 \n\t"
-        
-        "add r23, r20 \n\t"
-        "adc r22, r22 \n\t"
-        "st z+, r23 \n\t"
-        "ldi r23, 0 \n\t"
-        
-        "add r22, r21 \n\t"
-        "adc r23, r23 \n\t"
-        "st z+, r22 \n\t"
-        "st z+, r23 \n\t"
-        "eor r1, r1 \n\t" /* make r1 be 0 again */
-        
-        "sbiw r30, 37 \n\t" /* move z back to point at tmp */
-        "subi r26, 64 \n\t" /* move x back to point at product */
-        "sbc r27, __zero_reg__ \n\t"
-        
-        /* add low bytes of tmp to product, storing in result */
-        "ld r18, z+ \n\t"
-        "ld r19, x+ \n\t"
-        "add r18, r19 \n\t"
-        "st y+, r18 \n\t"
-        REPEAT(31,
-            "ld r18, z+ \n\t"
-            "ld r19, x+ \n\t"
-            "adc r18, r19 \n\t"
-            "st y+, r18 \n\t")
-        
-        "adc %[carry], __zero_reg__ \n\t" /* Store carry bit (carry flag is cleared). */
-        /* at this point x is at the end of product, y is at the end of result,
-           z is 32 bytes into tmp */
-        "sbiw r28, 32 \n\t" /* move y back to point at result */
-
-        /* do omega_mult again with the 5 relevant bytes */
-        /* z points to tmp + uECC_WORDS, x points to the end of product */
-        "sbiw r26, 32 \n\t" /* shift x back to point into the product buffer
-                               (we can overwrite it now) */
-        "ld r18, z+ \n\t"
-        "ld r19, z+ \n\t"
-        "ld r20, z+ \n\t"
-        "ld r21, z+ \n\t"
-        
-        "mul r24, r18 \n\t"
-        "st x+, r0 \n\t"
-        "mov r22, r1 \n\t"
-        "ldi r23, 0 \n\t"
-        
-        "mul r24, r19 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t" /* can't overflow */
-        "mul r25, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t" /* can't overflow */
-        "st x+, r22 \n\t"
-        "ldi r22, 0 \n\t"
-        
-        "mul r24, r20 \n\t"
-        "add r23, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "mul r25, r19 \n\t"
-        "add r23, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "st x+, r23 \n\t"
-        "ldi r23, 0 \n\t"
-        
-        "mul r24, r21 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "mul r25, r20 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "st x+, r22 \n\t"
-        "ldi r22, 0 \n\t"
-        
-        "add r23, r18 \n\t"
-        "adc r22, r22 \n\t"
-        "ld r18, z+ \n\t"
-        "mul r24, r18 \n\t"
-        "add r23, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "mul r25, r21 \n\t"
-        "add r23, r0 \n\t"
-        "adc r22, r1 \n\t"
-        "st x+, r23 \n\t"
-        "ldi r23, 0 \n\t"
-        
-        /* Now finish the carries etc */
-        "add r22, r19 \n\t"
-        "adc r23, r23 \n\t"
-        "mul r25, r18 \n\t"
-        "add r22, r0 \n\t"
-        "adc r23, r1 \n\t"
-        "st x+, r22 \n\t"
-        "ldi r22, 0 \n\t"
-        
-        "add r23, r20 \n\t"
-        "adc r22, r22 \n\t"
-        "st x+, r23 \n\t"
-        "ldi r23, 0 \n\t"
-        
-        "add r22, r21 \n\t"
-        "adc r23, r23 \n\t"
-        "st x+, r22 \n\t"
-        "ldi r22, 0 \n\t"
-        
-        "add r23, r18 \n\t"
-        "adc r22, r22 \n\t"
-        "st x+, r23 \n\t"
-        "st x+, r22 \n\t"
-        "eor r1, r1 \n\t" /* make r1 be 0 again */
-        
-        /* now z points to the end of tmp, x points to the end of product
-           (y still points at result) */
-        "sbiw r26, 10 \n\t" /* move x back to point at beginning of actual data */
-        /* add into result */
-        "ld r18, x+ \n\t"
-        "ld r19, y \n\t"
-        "add r18, r19 \n\t"
-        "st y+, r18 \n\t"
-        REPEAT(9,
-            "ld r18, x+ \n\t"
-            "ld r19, y \n\t"
-            "adc r18, r19 \n\t"
-            "st y+, r18 \n\t")
-        
-        /* Done adding, now propagate carry bit */
-        REPEAT(22,
-            "ld r18, y \n\t"
-            "adc r18, __zero_reg__ \n\t"
-            "st y+, r18 \n\t")
-        
-        "adc %[carry], __zero_reg__ \n\t"    /* Store carry bit (carry flag is cleared). */
-        "sbiw r28, 32 \n\t" /* move y back to point at result */
-        
-        "sbiw r30, 1 \n\t" /* fix stack pointer */
-    	"in r0, __SREG__ \n\t"
-    	"cli \n\t"
-    	"out __SP_H__, r31 \n\t"
-    	"out __SREG__, r0 \n\t"
-    	"out __SP_L__, r30 \n\t"
-        
-        : "+x" (product), [carry] "+r" (carry)
-        : "y" (result)
-        : "r0", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r30", "r31", "cc"
-    );
-    
-    if (carry > 0) {
-        --carry;
-        uECC_vli_sub(result, result, curve_secp256k1.p, 32);
-    }
-    if (carry > 0) {
-        uECC_vli_sub(result, result, curve_secp256k1.p, 32);
-    }
-    if (uECC_vli_cmp_unsafe(result, curve_secp256k1.p, 32) > 0) {
-        uECC_vli_sub(result, result, curve_secp256k1.p, 32);
-    }
-}
-#define asm_mmod_fast_secp256k1 1
-#endif /* uECC_SUPPORTS_secp256k1 */
-
-#endif /* (uECC_OPTIMIZATION_LEVEL >= 2) */
-
-/* ---- "Small" implementations ---- */
-
-#if !asm_add
-uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
-                                      const uECC_word_t *left,
-                                      const uECC_word_t *right,
-                                      wordcount_t num_words) {
-    volatile uECC_word_t *r = result;
-    uint8_t carry = 0;
-    uint8_t left_byte;
-    uint8_t right_byte;
-
-    __asm__ volatile (
-        "clc \n\t"
-        
-        "1: \n\t"
-        "ld %[left], x+ \n\t"  /* Load left byte. */
-        "ld %[right], y+ \n\t" /* Load right byte. */
-        "adc %[left], %[right] \n\t" /* Add. */
-        "st z+, %[left] \n\t"  /* Store the result. */
-        "dec %[i] \n\t"
-        "brne 1b \n\t"
-        
-        "adc %[carry], %[carry] \n\t" /* Store carry bit. */
-
-        : "+z" (r), "+x" (left), "+y" (right), [i] "+r" (num_words),
-            [carry] "+r" (carry), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
-        : 
-        : "cc"
-    );
-    return carry;
-}
-#define asm_add 1
-#endif
-
-#if !asm_sub
-uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
-                                      const uECC_word_t *left,
-                                      const uECC_word_t *right,
-                                      wordcount_t num_words) {
-    volatile uECC_word_t *r = result;
-    uint8_t borrow = 0;
-    uint8_t left_byte;
-    uint8_t right_byte;
-
-    __asm__ volatile (
-        "clc \n\t"
-        
-        "1: \n\t"
-        "ld %[left], x+ \n\t"  /* Load left byte. */
-        "ld %[right], y+ \n\t" /* Load right byte. */
-        "sbc %[left], %[right] \n\t" /* Subtract. */
-        "st z+, %[left] \n\t"  /* Store the result. */
-        "dec %[i] \n\t"
-        "brne 1b \n\t"
-        
-        "adc %[borrow], %[borrow] \n\t" /* Store carry bit in borrow. */
-
-        : "+z" (r), "+x" (left), "+y" (right), [i] "+r" (i),
-            [borrow] "+r" (borrow), [left] "=&r" (left_byte), [right] "=&r" (right_byte)
-        :
-        : "cc"
-    );
-    return borrow;
-}
-#define asm_sub 1
-#endif
-
-#if !asm_mult
-__attribute((noinline))
-uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
-                                const uECC_word_t *left,
-                                const uECC_word_t *right,
-                                wordcount_t num_words) {
-    volatile uECC_word_t *r = result;
-    uint8_t r0 = 0;
-    uint8_t r1 = 0;
-    uint8_t r2 = 0;
-    uint8_t zero = 0;
-    uint8_t k, i;
-    
-    __asm__ volatile (
-        "ldi %[k], 1 \n\t" /* k = 1; k < num_words; ++k */
-        
-        "1: \n\t"
-        "ldi %[i], 0 \n\t"  /* i = 0; i < k; ++i */
-        
-        "add r28, %[k] \n\t" /* pre-add right ptr */
-        "adc r29, %[zero] \n\t"
-        
-        "2: \n\t"
-        "ld r0, x+ \n\t"
-        "ld r1, -y \n\t"
-        "mul r0, r1 \n\t"
-        
-        "add %[r0], r0 \n\t"
-        "adc %[r1], r1 \n\t"
-        "adc %[r2], %[zero] \n\t"
-        
-        "inc %[i] \n\t"
-        "cp %[i], %[k] \n\t"
-        "brlo 2b \n\t" /* loop if i < k */
-        
-        "sub r26, %[k] \n\t" /* fix up left ptr */
-        "sbc r27, %[zero] \n\t"
-        
-        "st z+, %[r0] \n\t"  /* Store the result. */
-        "mov %[r0], %[r1] \n\t"
-        "mov %[r1], %[r2] \n\t"
-        "mov %[r2], %[zero] \n\t"
-        
-        "inc %[k] \n\t"
-        "cp %[k], %[num] \n\t"
-        "brlo 1b \n\t" /* loop if k < num_words */
-        
-        /* second half */
-        "mov %[k], %[num] \n\t" /* k = num_words; k > 0; --k */
-        "add r28, %[num] \n\t" /* move right ptr to point at the end of right */
-        "adc r29, %[zero] \n\t"
-        
-        "1: \n\t"
-        "ldi %[i], 0 \n\t" /* i = 0; i < k; ++i */
-        
-        "2: \n\t"
-        "ld r0, x+ \n\t"
-        "ld r1, -y \n\t"
-        "mul r0, r1 \n\t"
-        
-        "add %[r0], r0 \n\t"
-        "adc %[r1], r1 \n\t"
-        "adc %[r2], %[zero] \n\t"
-        
-        "inc %[i] \n\t"
-        "cp %[i], %[k] \n\t"
-        "brlo 2b \n\t" /* loop if i < k */
-        
-        "add r28, %[k] \n\t" /* fix up right ptr */
-        "adc r29, %[zero] \n\t"
-        
-        "st z+, %[r0] \n\t"  /* Store the result. */
-        "mov %[r0], %[r1] \n\t"
-        "mov %[r1], %[r2] \n\t"
-        "mov %[r2], %[zero] \n\t"
-        
-        "dec %[k] \n\t"
-        "sub r26, %[k] \n\t" /* fix up left ptr (after k is decremented, so next time
-                                we start 1 higher) */
-        "sbc r27, %[zero] \n\t"
-        
-        "cp %[k], %[zero] \n\t"
-        "brne 1b \n\t" /* loop if k > 0 */
-        
-        "st z+, %[r0] \n\t"  /* Store last result byte. */
-        "eor r1, r1 \n\t" /* fix r1 to be 0 again */
-    
-        : "+z" (result), "+x" (left), "+y" (right),
-          [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2),
-          [zero] "+r" (zero), [num] "+r" (num_words),
-          [k] "=&r" (k), [i] "=&r" (i)
-        : 
-        : "r0", "cc"
-    );
-}
-#define asm_mult 1
-#endif
-
-#if (uECC_SQUARE_FUNC && !asm_square)
-uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
-                                  const uECC_word_t *left,
-                                  wordcount_t num_words) {
-    volatile uECC_word_t *r = result;
-    uint8_t r0 = 0;
-    uint8_t r1 = 0;
-    uint8_t r2 = 0;
-    uint8_t zero = 0;
-    uint8_t k;
-    
-    __asm__ volatile (
-        "ldi %[k], 1 \n\t" /* k = 1; k < num_words * 2; ++k */
-        
-        "1: \n\t"
-        
-        "movw r26, %[orig] \n\t"  /* copy orig ptr to 'left' ptr */
-        "movw r30, %[orig] \n\t"  /* copy orig ptr to 'right' ptr */
-        "cp %[k], %[num] \n\t"
-        "brlo 2f \n\t"
-        "breq 2f \n\t"
-        
-        /* when k > num_words, we start from (k - num_words) on the 'left' ptr */
-        "add r26, %[k] \n\t"
-        "adc r27, %[zero] \n\t"
-        "sub r26, %[num] \n\t"
-        "sbc r27, %[zero] \n\t"
-        "add r30, %[num] \n\t" /* move right ptr to point at the end */
-        "adc r31, %[zero] \n\t"
-        "rjmp 3f \n\t"
-        
-        "2: \n\t" /* when k <= num_words, we add k to the 'right' ptr */
-        "add r30, %[k] \n\t" /* pre-add 'right' ptr */
-        "adc r31, %[zero] \n\t"
-        
-        "3: \n\t"
-        "ld r0, x+ \n\t"
-        "cp r26, r30 \n\t" /* if left == right here, then we are done after this mult
-                              (and we don't need to double) */
-        "breq 4f \n\t"
-        "ld r1, -z \n\t"
-        "mul r0, r1 \n\t"
-        
-        /* add twice since it costs the same as doubling */
-        "add %[r0], r0 \n\t"
-        "adc %[r1], r1 \n\t"
-        "adc %[r2], %[zero] \n\t"
-        "add %[r0], r0 \n\t"
-        "adc %[r1], r1 \n\t"
-        "adc %[r2], %[zero] \n\t"
-        
-        "cpse r26, r30 \n\t" /* if left == right here, then we are done */
-        "rjmp 3b \n\t"
-        "rjmp 5f \n\t" /* skip code for non-doubled mult */
-        
-        "4: \n\t"
-        "ld r1, -z \n\t"
-        "mul r0, r1 \n\t"
-        "add %[r0], r0 \n\t"
-        "adc %[r1], r1 \n\t"
-        "adc %[r2], %[zero] \n\t"
-        
-        "5: \n\t"
-        "movw r30, %[result] \n\t" /* make z point to result */
-        "st z+, %[r0] \n\t"        /* Store the result. */
-        "movw %[result], r30 \n\t" /* update result ptr*/
-        "mov %[r0], %[r1] \n\t"
-        "mov %[r1], %[r2] \n\t"
-        "mov %[r2], %[zero] \n\t"
-        
-        "inc %[k] \n\t"
-        "cp %[k], %[max] \n\t"
-        "brlo 1b \n\t" /* loop if k < num_words * 2 */
-        
-        "movw r30, %[result] \n\t"  /* make z point to result */
-        "st z+, %[r0] \n\t"  /* Store last result byte. */
-        "eor r1, r1 \n\t" /* fix r1 to be 0 again */
-    
-        : [result] "+r" (r),
-          [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), [zero] "+r" (zero),
-          [k] "=&a" (k)
-        : [orig] "r" (left), [max] "r" ((uint8_t)(2 * num_words)),
-          [num] "r" (num_words)
-        : "r0", "r26", "r27", "r30", "r31", "cc"
-    );
-}
-#define asm_square 1
-#endif /* uECC_SQUARE_FUNC && !asm_square */
-
-#endif /* _UECC_ASM_AVR_H_ */

+ 0 - 26311
components/bootloader/subproject/components/micro-ecc/micro-ecc/asm_avr_mult_square.inc

@@ -1,26311 +0,0 @@
-/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#ifndef _UECC_ASM_AVR_MULT_SQUARE_H_
-#define _UECC_ASM_AVR_MULT_SQUARE_H_
-
-#define FAST_MULT_ASM_20   \
-    "adiw r30, 10 \n\t"    \
-    "adiw r28, 10 \n\t"    \
-    "ld r2, x+ \n\t"       \
-    "ld r3, x+ \n\t"       \
-    "ld r4, x+ \n\t"       \
-    "ld r5, x+ \n\t"       \
-    "ld r6, x+ \n\t"       \
-    "ld r7, x+ \n\t"       \
-    "ld r8, x+ \n\t"       \
-    "ld r9, x+ \n\t"       \
-    "ld r10, x+ \n\t"      \
-    "ld r11, x+ \n\t"      \
-    "ld r12, y+ \n\t"      \
-    "ld r13, y+ \n\t"      \
-    "ld r14, y+ \n\t"      \
-    "ld r15, y+ \n\t"      \
-    "ld r16, y+ \n\t"      \
-    "ld r17, y+ \n\t"      \
-    "ld r18, y+ \n\t"      \
-    "ld r19, y+ \n\t"      \
-    "ld r20, y+ \n\t"      \
-    "ld r21, y+ \n\t"      \
-    "ldi r25, 0 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r12 \n\t"     \
-    "st z+, r0 \n\t"       \
-    "mov r22, r1 \n\t"     \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "mul r3, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r12 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r13 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r12 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r3, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r14 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r13 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r4, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r15 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r14 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r5, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r16 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r15 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r6, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r17 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r16 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r7, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r18 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r17 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r8, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r19 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r18 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r9, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r20 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r19 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r10, r21 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r20 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "mul r11, r21 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "st z+, r24 \n\t"      \
-    "st z+, r22 \n\t"      \
-                           \
-    "sbiw r30, 30 \n\t"    \
-    "sbiw r28, 20 \n\t"    \
-    "ld r12, y+ \n\t"      \
-    "ld r13, y+ \n\t"      \
-    "ld r14, y+ \n\t"      \
-    "ld r15, y+ \n\t"      \
-    "ld r16, y+ \n\t"      \
-    "ld r17, y+ \n\t"      \
-    "ld r18, y+ \n\t"      \
-    "ld r19, y+ \n\t"      \
-    "ld r20, y+ \n\t"      \
-    "ld r21, y+ \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r12 \n\t"     \
-    "st z+, r0 \n\t"       \
-    "mov r22, r1 \n\t"     \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "mul r3, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r12 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r13 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r12 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r2, x+ \n\t"       \
-    "ldi r24, 0 \n\t"      \
-    "mul r3, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r14 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r13 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r3, x+ \n\t"       \
-    "ldi r22, 0 \n\t"      \
-    "mul r4, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r15 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r14 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r4, x+ \n\t"       \
-    "ldi r23, 0 \n\t"      \
-    "mul r5, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r16 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r15 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r5, x+ \n\t"       \
-    "ldi r24, 0 \n\t"      \
-    "mul r6, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r17 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r16 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r6, x+ \n\t"       \
-    "ldi r22, 0 \n\t"      \
-    "mul r7, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r18 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r17 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r7, x+ \n\t"       \
-    "ldi r23, 0 \n\t"      \
-    "mul r8, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r19 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r18 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r8, x+ \n\t"       \
-    "ldi r24, 0 \n\t"      \
-    "mul r9, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r20 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r19 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r9, x+ \n\t"       \
-    "ldi r22, 0 \n\t"      \
-    "mul r10, r21 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r20 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r10, x+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r11, r21 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r12 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r11, x+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r13 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r12 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r12, y+ \n\t"      \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r14 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r13 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r13, y+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r15 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r14 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r14, y+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r16 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r15 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r15, y+ \n\t"      \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r17 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r16 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r16, y+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r18 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r17 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r17, y+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r19 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r18 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r18, y+ \n\t"      \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r20 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r19 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r19, y+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r21 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r20 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r20, y+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r12 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r21 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r21, y+ \n\t"      \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r13 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r12 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r3, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r14 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r13 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r4, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r15 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r14 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r5, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r16 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r15 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r6, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r17 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r16 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r7, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r18 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r17 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r8, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r19 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r18 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r9, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r20 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r19 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r10, r21 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r20 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "mul r11, r21 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "st z+, r23 \n\t"      \
-    "st z+, r24 \n\t"
-
-#define FAST_MULT_ASM_20_TO_24     \
-    "cpi r18, 20 \n\t"             \
-    "brne 1f \n\t"                 \
-    "jmp 2f \n\t"                  \
-    "1: \n\t"                      \
-    "ld r2, x+ \n\t"               \
-    "ld r6, y+ \n\t"               \
-    "ld r3, x+ \n\t"               \
-    "ld r7, y+ \n\t"               \
-    "ld r4, x+ \n\t"               \
-    "ld r8, y+ \n\t"               \
-    "ld r5, x+ \n\t"               \
-    "ld r9, y+ \n\t"               \
-    "sbiw r26, 24 \n\t"            \
-    "sbiw r28, 24 \n\t"            \
-    "sbiw r30, 20 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-                                   \
-    "mul r2, r14 \n\t"             \
-    "mov r19, r0 \n\t"             \
-    "mov r20, r1 \n\t"             \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r2, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r2, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r2, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "mul r11, r9 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r12, r8 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r13, r7 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r2, r6 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "mul r12, r9 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r13, r8 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r2, r7 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r6 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "mul r13, r9 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r2, r8 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r7 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r6 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "mul r2, r9 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r8 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r7 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r6 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "mul r3, r9 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r8 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r7 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "mul r4, r9 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r8 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "mul r5, r9 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "st z+, r21 \n\t"              \
-    "st z+, r19 \n\t"              \
-    "adiw r26, 4 \n\t"             \
-    "adiw r28, 4 \n\t"
-
-#define FAST_MULT_ASM_24       \
-    "adiw r30, 20 \n\t"        \
-    "adiw r28, 20 \n\t"        \
-    "ld r2, x+ \n\t"           \
-    "ld r3, x+ \n\t"           \
-    "ld r4, x+ \n\t"           \
-    "ld r5, x+ \n\t"           \
-    "ld r12, y+ \n\t"          \
-    "ld r13, y+ \n\t"          \
-    "ld r14, y+ \n\t"          \
-    "ld r15, y+ \n\t"          \
-    "ldi r25, 0 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r12 \n\t"         \
-    "st z+, r0 \n\t"           \
-    "mov r22, r1 \n\t"         \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "mul r3, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r3, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "mul r5, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "st z+, r24 \n\t"          \
-    "st z+, r22 \n\t"          \
-                               \
-    "sbiw r30, 18 \n\t"        \
-    "sbiw r28, 14 \n\t"        \
-    "ld r12, y+ \n\t"          \
-    "ld r13, y+ \n\t"          \
-    "ld r14, y+ \n\t"          \
-    "ld r15, y+ \n\t"          \
-    "ld r16, y+ \n\t"          \
-    "ld r17, y+ \n\t"          \
-    "ld r18, y+ \n\t"          \
-    "ld r19, y+ \n\t"          \
-    "ld r20, y+ \n\t"          \
-    "ld r21, y+ \n\t"          \
-    "ld r6, x+ \n\t"           \
-    "ld r7, x+ \n\t"           \
-    "ld r8, x+ \n\t"           \
-    "ld r9, x+ \n\t"           \
-    "ld r10, x+ \n\t"          \
-    "ld r11, x+ \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r12 \n\t"         \
-    "st z+, r0 \n\t"           \
-    "mov r22, r1 \n\t"         \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "mul r3, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r2, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r3, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r3, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r15 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r4, x+ \n\t"           \
-    "ldi r23, 0 \n\t"          \
-    "mul r5, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r16 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r15 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r5, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r6, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r17 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r16 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r12, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r6, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r18 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r17 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r13, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r6, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r19 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r18 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r14, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r6, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r20 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r19 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r15, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r6, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r21 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r20 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r7, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r21 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r8, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r9, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r10, r15 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r11, r15 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r3, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "mul r5, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "st z+, r23 \n\t"          \
-    "st z+, r24 \n\t"          \
-                               \
-    "sbiw r30, 38 \n\t"        \
-    "sbiw r28, 24 \n\t"        \
-    "sbiw r26, 14 \n\t"        \
-    "ld r2, x+ \n\t"           \
-    "ld r12, y+ \n\t"          \
-    "ld r3, x+ \n\t"           \
-    "ld r13, y+ \n\t"          \
-    "ld r4, x+ \n\t"           \
-    "ld r14, y+ \n\t"          \
-    "ld r5, x+ \n\t"           \
-    "ld r15, y+ \n\t"          \
-    "ld r6, x+ \n\t"           \
-    "ld r16, y+ \n\t"          \
-    "ld r7, x+ \n\t"           \
-    "ld r17, y+ \n\t"          \
-    "ld r8, x+ \n\t"           \
-    "ld r18, y+ \n\t"          \
-    "ld r9, x+ \n\t"           \
-    "ld r19, y+ \n\t"          \
-    "ld r10, x+ \n\t"          \
-    "ld r20, y+ \n\t"          \
-    "ld r11, x+ \n\t"          \
-    "ld r21, y+ \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r12 \n\t"         \
-    "st z+, r0 \n\t"           \
-    "mov r22, r1 \n\t"         \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "mul r3, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r2, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r3, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r3, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r15 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r4, x+ \n\t"           \
-    "ldi r23, 0 \n\t"          \
-    "mul r5, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r16 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r15 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r5, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r6, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r17 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r16 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r6, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r7, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r18 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r17 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r7, x+ \n\t"           \
-    "ldi r23, 0 \n\t"          \
-    "mul r8, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r19 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r18 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r8, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r9, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r20 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r19 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r9, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r10, r21 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r20 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r10, x+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r11, r21 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r11, x+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r2, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r3, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r3, x+ \n\t"           \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r15 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r4, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r5, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r16 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r15 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r5, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r6, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r17 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r16 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r12, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r6, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r18 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r17 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r13, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r6, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r19 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r18 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r14, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r6, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r20 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r19 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r15, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r6, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r21 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r20 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r16, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r6, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r21 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r17, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r6, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r18, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r6, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r19, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r6, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r15 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r20, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r6, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r16 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r15 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r21, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r6, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r17 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r16 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r12, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r6, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r18 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r17 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r13, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r6, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r19 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r18 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r14, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r6, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r20 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r19 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r15, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r6, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r21 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r20 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r7, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r21 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r8, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r9, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r10, r15 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r11, r15 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r3, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "mul r5, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "st z+, r22 \n\t"          \
-    "st z+, r23 \n\t"
-
-#define FAST_MULT_ASM_24_TO_28     \
-    "cpi r18, 24 \n\t"             \
-    "brne 1f \n\t"                 \
-    "jmp 2f \n\t"                  \
-    "1: \n\t"                      \
-    "ld r2, x+ \n\t"               \
-    "ld r6, y+ \n\t"               \
-    "ld r3, x+ \n\t"               \
-    "ld r7, y+ \n\t"               \
-    "ld r4, x+ \n\t"               \
-    "ld r8, y+ \n\t"               \
-    "ld r5, x+ \n\t"               \
-    "ld r9, y+ \n\t"               \
-    "sbiw r26, 28 \n\t"            \
-    "sbiw r28, 28 \n\t"            \
-    "sbiw r30, 24 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-                                   \
-    "mul r2, r14 \n\t"             \
-    "mov r19, r0 \n\t"             \
-    "mov r20, r1 \n\t"             \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r2, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r2, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r2, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "mul r11, r9 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r12, r8 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r13, r7 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r2, r6 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "mul r12, r9 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r13, r8 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r2, r7 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r6 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "mul r13, r9 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r2, r8 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r7 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r6 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "mul r2, r9 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r8 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r7 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r6 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "mul r3, r9 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r8 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r7 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "mul r4, r9 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r8 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "mul r5, r9 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "st z+, r19 \n\t"              \
-    "st z+, r20 \n\t"              \
-    "adiw r26, 4 \n\t"             \
-    "adiw r28, 4 \n\t"
-
-#define FAST_MULT_ASM_28   \
-    "adiw r30, 20 \n\t"    \
-    "adiw r28, 20 \n\t"    \
-    "ld r2, x+ \n\t"       \
-    "ld r3, x+ \n\t"       \
-    "ld r4, x+ \n\t"       \
-    "ld r5, x+ \n\t"       \
-    "ld r6, x+ \n\t"       \
-    "ld r7, x+ \n\t"       \
-    "ld r8, x+ \n\t"       \
-    "ld r9, x+ \n\t"       \
-    "ld r12, y+ \n\t"      \
-    "ld r13, y+ \n\t"      \
-    "ld r14, y+ \n\t"      \
-    "ld r15, y+ \n\t"      \
-    "ld r16, y+ \n\t"      \
-    "ld r17, y+ \n\t"      \
-    "ld r18, y+ \n\t"      \
-    "ld r19, y+ \n\t"      \
-    "ldi r25, 0 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r12 \n\t"     \
-    "st z+, r0 \n\t"       \
-    "mov r22, r1 \n\t"     \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "mul r3, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r3, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r4, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r5, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r6, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r7, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r8, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "mul r9, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "st z+, r23 \n\t"      \
-    "st z+, r24 \n\t"      \
-                           \
-    "sbiw r30, 26 \n\t"    \
-    "sbiw r28, 18 \n\t"    \
-    "ld r12, y+ \n\t"      \
-    "ld r13, y+ \n\t"      \
-    "ld r14, y+ \n\t"      \
-    "ld r15, y+ \n\t"      \
-    "ld r16, y+ \n\t"      \
-    "ld r17, y+ \n\t"      \
-    "ld r18, y+ \n\t"      \
-    "ld r19, y+ \n\t"      \
-    "ld r20, y+ \n\t"      \
-    "ld r21, y+ \n\t"      \
-    "ld r10, x+ \n\t"      \
-    "ld r11, x+ \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r12 \n\t"     \
-    "st z+, r0 \n\t"       \
-    "mov r22, r1 \n\t"     \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "mul r3, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r12 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r13 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r12 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r2, x+ \n\t"       \
-    "ldi r24, 0 \n\t"      \
-    "mul r3, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r14 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r13 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r3, x+ \n\t"       \
-    "ldi r22, 0 \n\t"      \
-    "mul r4, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r15 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r14 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r4, x+ \n\t"       \
-    "ldi r23, 0 \n\t"      \
-    "mul r5, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r16 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r15 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r5, x+ \n\t"       \
-    "ldi r24, 0 \n\t"      \
-    "mul r6, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r17 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r16 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r6, x+ \n\t"       \
-    "ldi r22, 0 \n\t"      \
-    "mul r7, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r18 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r17 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r7, x+ \n\t"       \
-    "ldi r23, 0 \n\t"      \
-    "mul r8, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r19 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r18 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r8, x+ \n\t"       \
-    "ldi r24, 0 \n\t"      \
-    "mul r9, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r20 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r19 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r9, x+ \n\t"       \
-    "ldi r22, 0 \n\t"      \
-    "mul r10, r21 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r20 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r12, y+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r10, r12 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r21 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r13, y+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r10, r13 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r12 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r14, y+ \n\t"      \
-    "ldi r22, 0 \n\t"      \
-    "mul r10, r14 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r13 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r15, y+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r10, r15 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r14 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r16, y+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r10, r16 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r15 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r17, y+ \n\t"      \
-    "ldi r22, 0 \n\t"      \
-    "mul r10, r17 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r16 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r18, y+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r10, r18 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r17 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r19, y+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r10, r19 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r18 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r11, r19 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r3, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r4, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r5, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r6, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r7, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r8, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "mul r9, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "st z+, r22 \n\t"      \
-    "st z+, r23 \n\t"      \
-                           \
-    "sbiw r30, 46 \n\t"    \
-    "sbiw r28, 28 \n\t"    \
-    "sbiw r26, 18 \n\t"    \
-    "ld r2, x+ \n\t"       \
-    "ld r12, y+ \n\t"      \
-    "ld r3, x+ \n\t"       \
-    "ld r13, y+ \n\t"      \
-    "ld r4, x+ \n\t"       \
-    "ld r14, y+ \n\t"      \
-    "ld r5, x+ \n\t"       \
-    "ld r15, y+ \n\t"      \
-    "ld r6, x+ \n\t"       \
-    "ld r16, y+ \n\t"      \
-    "ld r7, x+ \n\t"       \
-    "ld r17, y+ \n\t"      \
-    "ld r8, x+ \n\t"       \
-    "ld r18, y+ \n\t"      \
-    "ld r9, x+ \n\t"       \
-    "ld r19, y+ \n\t"      \
-    "ld r10, x+ \n\t"      \
-    "ld r20, y+ \n\t"      \
-    "ld r11, x+ \n\t"      \
-    "ld r21, y+ \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r12 \n\t"     \
-    "st z+, r0 \n\t"       \
-    "mov r22, r1 \n\t"     \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "mul r3, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r12 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r2, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r13 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r12 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r2, x+ \n\t"       \
-    "ldi r24, 0 \n\t"      \
-    "mul r3, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r14 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r13 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r3, x+ \n\t"       \
-    "ldi r22, 0 \n\t"      \
-    "mul r4, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r15 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r14 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r4, x+ \n\t"       \
-    "ldi r23, 0 \n\t"      \
-    "mul r5, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r16 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r15 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r5, x+ \n\t"       \
-    "ldi r24, 0 \n\t"      \
-    "mul r6, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r17 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r16 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r6, x+ \n\t"       \
-    "ldi r22, 0 \n\t"      \
-    "mul r7, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r18 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r17 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r7, x+ \n\t"       \
-    "ldi r23, 0 \n\t"      \
-    "mul r8, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r19 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r18 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r8, x+ \n\t"       \
-    "ldi r24, 0 \n\t"      \
-    "mul r9, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r20 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r19 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r9, x+ \n\t"       \
-    "ldi r22, 0 \n\t"      \
-    "mul r10, r21 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r20 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r10, x+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r11, r21 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r12 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r11, x+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r2, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r13 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r12 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r2, x+ \n\t"       \
-    "ldi r22, 0 \n\t"      \
-    "mul r3, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r14 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r13 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r3, x+ \n\t"       \
-    "ldi r23, 0 \n\t"      \
-    "mul r4, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r15 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r14 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r4, x+ \n\t"       \
-    "ldi r24, 0 \n\t"      \
-    "mul r5, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r16 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r15 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r5, x+ \n\t"       \
-    "ldi r22, 0 \n\t"      \
-    "mul r6, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r17 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r16 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r6, x+ \n\t"       \
-    "ldi r23, 0 \n\t"      \
-    "mul r7, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r10, r18 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r17 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r7, x+ \n\t"       \
-    "ldi r24, 0 \n\t"      \
-    "mul r8, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r10, r19 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r18 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r8, x+ \n\t"       \
-    "ldi r22, 0 \n\t"      \
-    "mul r9, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r10, r20 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r19 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r9, x+ \n\t"       \
-    "ldi r23, 0 \n\t"      \
-    "mul r10, r21 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r20 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r12, y+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r10, r12 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r21 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r13, y+ \n\t"      \
-    "ldi r22, 0 \n\t"      \
-    "mul r10, r13 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r12 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r14, y+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r10, r14 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r13 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r15, y+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r10, r15 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r14 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r16, y+ \n\t"      \
-    "ldi r22, 0 \n\t"      \
-    "mul r10, r16 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r15 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r17, y+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r10, r17 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r16 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r18, y+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r10, r18 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r17 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r19, y+ \n\t"      \
-    "ldi r22, 0 \n\t"      \
-    "mul r10, r19 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r18 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r20, y+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r10, r20 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r19 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r21, y+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r10, r21 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r20 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r12, y+ \n\t"      \
-    "ldi r22, 0 \n\t"      \
-    "mul r10, r12 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r21 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r13, y+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r10, r13 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r12 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r14, y+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r10, r14 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r13 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r15, y+ \n\t"      \
-    "ldi r22, 0 \n\t"      \
-    "mul r10, r15 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r14 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r16, y+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r10, r16 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r15 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ld r17, y+ \n\t"      \
-    "ldi r24, 0 \n\t"      \
-    "mul r10, r17 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r11, r16 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r20 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r22, r0 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ld r18, y+ \n\t"      \
-    "ldi r22, 0 \n\t"      \
-    "mul r10, r18 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r11, r17 \n\t"    \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r2, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r21 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r20 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r23, r0 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ld r19, y+ \n\t"      \
-    "ldi r23, 0 \n\t"      \
-    "mul r10, r19 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r11, r18 \n\t"    \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r2, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r3, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r12 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r21 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r20 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "ld r0, z \n\t"        \
-    "add r24, r0 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r11, r19 \n\t"    \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r2, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r3, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r4, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r13 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r12 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r21 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r2, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r3, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r4, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r5, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r14 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r13 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r12 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r3, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r4, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r5, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r6, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r15 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r14 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r13 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r4, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r5, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r6, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r7, r16 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r15 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r14 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r5, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r6, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r7, r17 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r8, r16 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r15 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "ldi r23, 0 \n\t"      \
-    "mul r6, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r7, r18 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r8, r17 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "mul r9, r16 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "adc r23, r25 \n\t"    \
-    "st z+, r24 \n\t"      \
-                           \
-    "ldi r24, 0 \n\t"      \
-    "mul r7, r19 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r8, r18 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "mul r9, r17 \n\t"     \
-    "add r22, r0 \n\t"     \
-    "adc r23, r1 \n\t"     \
-    "adc r24, r25 \n\t"    \
-    "st z+, r22 \n\t"      \
-                           \
-    "ldi r22, 0 \n\t"      \
-    "mul r8, r19 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "mul r9, r18 \n\t"     \
-    "add r23, r0 \n\t"     \
-    "adc r24, r1 \n\t"     \
-    "adc r22, r25 \n\t"    \
-    "st z+, r23 \n\t"      \
-                           \
-    "mul r9, r19 \n\t"     \
-    "add r24, r0 \n\t"     \
-    "adc r22, r1 \n\t"     \
-    "st z+, r24 \n\t"      \
-    "st z+, r22 \n\t"
-
-#define FAST_MULT_ASM_28_TO_32     \
-    "cpi r18, 28 \n\t"             \
-    "brne 1f \n\t"                 \
-    "jmp 2f \n\t"                  \
-    "1: \n\t"                      \
-    "ld r2, x+ \n\t"               \
-    "ld r6, y+ \n\t"               \
-    "ld r3, x+ \n\t"               \
-    "ld r7, y+ \n\t"               \
-    "ld r4, x+ \n\t"               \
-    "ld r8, y+ \n\t"               \
-    "ld r5, x+ \n\t"               \
-    "ld r9, y+ \n\t"               \
-    "sbiw r26, 32 \n\t"            \
-    "sbiw r28, 32 \n\t"            \
-    "sbiw r30, 28 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-                                   \
-    "mul r2, r14 \n\t"             \
-    "mov r19, r0 \n\t"             \
-    "mov r20, r1 \n\t"             \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r2, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r2, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r2, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r10, x+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r20, r0 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "ld r11, x+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r21, r0 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "ld r12, x+ \n\t"              \
-    "ld r16, y+ \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "ld r0, z \n\t"                \
-    "add r19, r0 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "ld r13, x+ \n\t"              \
-    "ld r17, y+ \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r16 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "mul r11, r9 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r12, r8 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r13, r7 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r2, r6 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r17 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "mul r12, r9 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r13, r8 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r2, r7 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r3, r6 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "mul r13, r9 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r2, r8 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r3, r7 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r4, r6 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "ldi r19, 0 \n\t"              \
-    "mul r2, r9 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r3, r8 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r4, r7 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "mul r5, r6 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "adc r19, r25 \n\t"            \
-    "st z+, r20 \n\t"              \
-                                   \
-    "ldi r20, 0 \n\t"              \
-    "mul r3, r9 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r4, r8 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "mul r5, r7 \n\t"              \
-    "add r21, r0 \n\t"             \
-    "adc r19, r1 \n\t"             \
-    "adc r20, r25 \n\t"            \
-    "st z+, r21 \n\t"              \
-                                   \
-    "ldi r21, 0 \n\t"              \
-    "mul r4, r9 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "mul r5, r8 \n\t"              \
-    "add r19, r0 \n\t"             \
-    "adc r20, r1 \n\t"             \
-    "adc r21, r25 \n\t"            \
-    "st z+, r19 \n\t"              \
-                                   \
-    "mul r5, r9 \n\t"              \
-    "add r20, r0 \n\t"             \
-    "adc r21, r1 \n\t"             \
-    "st z+, r20 \n\t"              \
-    "st z+, r21 \n\t"
-    /* Not necessary to move ptrs since we don't support sizes > 32 */
-
-#define FAST_MULT_ASM_32       \
-    "adiw r30, 30 \n\t"        \
-    "adiw r28, 30 \n\t"        \
-    "ld r2, x+ \n\t"           \
-    "ld r3, x+ \n\t"           \
-    "ld r12, y+ \n\t"          \
-    "ld r13, y+ \n\t"          \
-    "ldi r25, 0 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r12 \n\t"         \
-    "st z+, r0 \n\t"           \
-    "mov r22, r1 \n\t"         \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "mul r3, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "mul r3, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "st z+, r23 \n\t"          \
-    "st z+, r24 \n\t"          \
-                               \
-    "sbiw r30, 14 \n\t"        \
-    "sbiw r28, 12 \n\t"        \
-    "ld r12, y+ \n\t"          \
-    "ld r13, y+ \n\t"          \
-    "ld r14, y+ \n\t"          \
-    "ld r15, y+ \n\t"          \
-    "ld r16, y+ \n\t"          \
-    "ld r17, y+ \n\t"          \
-    "ld r18, y+ \n\t"          \
-    "ld r19, y+ \n\t"          \
-    "ld r20, y+ \n\t"          \
-    "ld r21, y+ \n\t"          \
-    "ld r4, x+ \n\t"           \
-    "ld r5, x+ \n\t"           \
-    "ld r6, x+ \n\t"           \
-    "ld r7, x+ \n\t"           \
-    "ld r8, x+ \n\t"           \
-    "ld r9, x+ \n\t"           \
-    "ld r10, x+ \n\t"          \
-    "ld r11, x+ \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r12 \n\t"         \
-    "st z+, r0 \n\t"           \
-    "mov r22, r1 \n\t"         \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "mul r3, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r2, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r3, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r3, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r15 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r12, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r16 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r15 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r13, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r17 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r16 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r5, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r18 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r17 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r6, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r19 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r18 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r7, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r20 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r19 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r8, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r21 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r20 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r9, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r21 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r10, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r11, r13 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "mul r3, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "st z+, r22 \n\t"          \
-    "st z+, r23 \n\t"          \
-                               \
-    "sbiw r30, 34 \n\t"        \
-    "sbiw r28, 22 \n\t"        \
-    "sbiw r26, 12 \n\t"        \
-    "ld r2, x+ \n\t"           \
-    "ld r12, y+ \n\t"          \
-    "ld r3, x+ \n\t"           \
-    "ld r13, y+ \n\t"          \
-    "ld r4, x+ \n\t"           \
-    "ld r14, y+ \n\t"          \
-    "ld r5, x+ \n\t"           \
-    "ld r15, y+ \n\t"          \
-    "ld r6, x+ \n\t"           \
-    "ld r16, y+ \n\t"          \
-    "ld r7, x+ \n\t"           \
-    "ld r17, y+ \n\t"          \
-    "ld r8, x+ \n\t"           \
-    "ld r18, y+ \n\t"          \
-    "ld r9, x+ \n\t"           \
-    "ld r19, y+ \n\t"          \
-    "ld r10, x+ \n\t"          \
-    "ld r20, y+ \n\t"          \
-    "ld r11, x+ \n\t"          \
-    "ld r21, y+ \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r12 \n\t"         \
-    "st z+, r0 \n\t"           \
-    "mov r22, r1 \n\t"         \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "mul r3, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r2, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r3, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r3, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r15 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r4, x+ \n\t"           \
-    "ldi r23, 0 \n\t"          \
-    "mul r5, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r16 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r15 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r5, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r6, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r17 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r16 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r6, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r7, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r18 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r17 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r7, x+ \n\t"           \
-    "ldi r23, 0 \n\t"          \
-    "mul r8, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r19 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r18 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r8, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r9, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r20 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r19 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r9, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r10, r21 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r20 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r10, x+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r11, r21 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r11, x+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r2, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r3, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r3, x+ \n\t"           \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r15 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r12, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r16 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r15 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r13, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r17 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r16 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r14, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r18 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r17 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r15, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r19 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r18 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r16, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r20 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r19 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r17, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r21 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r20 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r18, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r21 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r19, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r20, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r21, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r15 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r12, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r16 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r15 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r13, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r17 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r16 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r5, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r18 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r17 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r6, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r19 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r18 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r7, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r20 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r19 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r8, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r21 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r20 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r9, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r21 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r10, r13 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r11, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "mul r3, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "st z+, r24 \n\t"          \
-    "st z+, r22 \n\t"          \
-                               \
-    "sbiw r30, 54 \n\t"        \
-    "sbiw r28, 32 \n\t"        \
-    "sbiw r26, 22 \n\t"        \
-    "ld r2, x+ \n\t"           \
-    "ld r12, y+ \n\t"          \
-    "ld r3, x+ \n\t"           \
-    "ld r13, y+ \n\t"          \
-    "ld r4, x+ \n\t"           \
-    "ld r14, y+ \n\t"          \
-    "ld r5, x+ \n\t"           \
-    "ld r15, y+ \n\t"          \
-    "ld r6, x+ \n\t"           \
-    "ld r16, y+ \n\t"          \
-    "ld r7, x+ \n\t"           \
-    "ld r17, y+ \n\t"          \
-    "ld r8, x+ \n\t"           \
-    "ld r18, y+ \n\t"          \
-    "ld r9, x+ \n\t"           \
-    "ld r19, y+ \n\t"          \
-    "ld r10, x+ \n\t"          \
-    "ld r20, y+ \n\t"          \
-    "ld r11, x+ \n\t"          \
-    "ld r21, y+ \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r12 \n\t"         \
-    "st z+, r0 \n\t"           \
-    "mov r22, r1 \n\t"         \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "mul r3, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r2, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r2, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r3, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r3, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r15 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r4, x+ \n\t"           \
-    "ldi r23, 0 \n\t"          \
-    "mul r5, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r16 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r15 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r5, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r6, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r17 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r16 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r6, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r7, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r18 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r17 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r7, x+ \n\t"           \
-    "ldi r23, 0 \n\t"          \
-    "mul r8, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r19 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r18 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r8, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r9, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r20 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r19 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r9, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r10, r21 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r20 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r10, x+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r11, r21 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r11, x+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r2, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r3, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r3, x+ \n\t"           \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r15 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r4, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r5, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r16 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r15 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r5, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r6, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r17 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r16 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r6, x+ \n\t"           \
-    "ldi r23, 0 \n\t"          \
-    "mul r7, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r18 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r17 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r7, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r8, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r19 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r18 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r8, x+ \n\t"           \
-    "ldi r22, 0 \n\t"          \
-    "mul r9, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r20 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r19 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r9, x+ \n\t"           \
-    "ldi r23, 0 \n\t"          \
-    "mul r10, r21 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r20 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r10, x+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r11, r21 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r4, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r11, x+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r2, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r4, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r2, x+ \n\t"           \
-    "ldi r23, 0 \n\t"          \
-    "mul r3, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r4, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r3, x+ \n\t"           \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r15 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r12, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r16 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r15 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r13, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r17 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r16 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r14, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r18 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r17 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r15, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r19 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r18 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r16, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r20 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r19 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r17, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r21 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r20 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r18, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r21 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r19, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r20, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r21, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r15 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r12, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r16 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r15 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r13, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r17 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r16 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r14, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r18 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r17 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r15, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r19 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r18 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r16, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r20 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r19 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r17, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r21 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r20 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r18, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r14 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r21 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r19, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r15 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r13 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r20, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r16 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r14 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r13 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ld r21, y+ \n\t"          \
-    "ldi r23, 0 \n\t"          \
-    "mul r4, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r5, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r17 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r15 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r14 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r24, r0 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ld r12, y+ \n\t"          \
-    "ldi r24, 0 \n\t"          \
-    "mul r4, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r5, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r6, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r18 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r16 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r15 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r14 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r22, r0 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ld r13, y+ \n\t"          \
-    "ldi r22, 0 \n\t"          \
-    "mul r4, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r5, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r6, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r7, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r19 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r17 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r16 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r15 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r14 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "ld r0, z \n\t"            \
-    "add r23, r0 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r5, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r6, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r7, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r8, r20 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r18 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r17 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r16 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r15 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r6, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r7, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r8, r21 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r9, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r19 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r18 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r17 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r16 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r7, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r8, r12 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r9, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r10, r20 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r19 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r18 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r17 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r8, r13 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r9, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r10, r21 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r11, r20 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r19 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r18 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r9, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r10, r12 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r11, r21 \n\t"        \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r2, r20 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r19 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "ldi r22, 0 \n\t"          \
-    "mul r10, r13 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r11, r12 \n\t"        \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r2, r21 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "mul r3, r20 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "adc r22, r25 \n\t"        \
-    "st z+, r23 \n\t"          \
-                               \
-    "ldi r23, 0 \n\t"          \
-    "mul r11, r13 \n\t"        \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r2, r12 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "mul r3, r21 \n\t"         \
-    "add r24, r0 \n\t"         \
-    "adc r22, r1 \n\t"         \
-    "adc r23, r25 \n\t"        \
-    "st z+, r24 \n\t"          \
-                               \
-    "ldi r24, 0 \n\t"          \
-    "mul r2, r13 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "mul r3, r12 \n\t"         \
-    "add r22, r0 \n\t"         \
-    "adc r23, r1 \n\t"         \
-    "adc r24, r25 \n\t"        \
-    "st z+, r22 \n\t"          \
-                               \
-    "mul r3, r13 \n\t"         \
-    "add r23, r0 \n\t"         \
-    "adc r24, r1 \n\t"         \
-    "st z+, r23 \n\t"          \
-    "st z+, r24 \n\t"
-
-#define FAST_SQUARE_ASM_20         \
-    "ld r2, x+ \n\t"               \
-    "ld r3, x+ \n\t"               \
-    "ld r4, x+ \n\t"               \
-    "ld r5, x+ \n\t"               \
-    "ld r6, x+ \n\t"               \
-    "ld r7, x+ \n\t"               \
-    "ld r8, x+ \n\t"               \
-    "ld r9, x+ \n\t"               \
-    "ld r10, x+ \n\t"              \
-    "ld r11, x+ \n\t"              \
-    "ld r12, x+ \n\t"              \
-    "ld r13, x+ \n\t"              \
-    "ld r14, x+ \n\t"              \
-    "ld r15, x+ \n\t"              \
-    "ld r16, x+ \n\t"              \
-    "ld r17, x+ \n\t"              \
-    "ld r18, x+ \n\t"              \
-    "ld r19, x+ \n\t"              \
-    "ld r20, x+ \n\t"              \
-    "ld r21, x+ \n\t"              \
-    "push r26 \n\t"                \
-    "push r27 \n\t"                \
-    "ldi r25, 0 \n\t"              \
-                                   \
-    "ldi r23, 0 \n\t"              \
-    "mul r2, r2 \n\t"              \
-    "st z+, r0 \n\t"               \
-    "mov r22, r1 \n\t"             \
-                                   \
-    "ldi r24, 0 \n\t"              \
-    "mul r2, r3 \n\t"              \
-    "lsl r0 \n\t"                  \
-    "rol r1 \n\t"                  \
-    "adc r24, r25 \n\t"            \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "st z+, r22 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r4 \n\t"              \
-    "lsl r0 \n\t"                  \
-    "rol r1 \n\t"                  \
-    "adc r22, r25 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r3, r3 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r2, r5 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r3, r4 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r6 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r5 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r4, r4 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r2, r7 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r3, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r4, r5 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r8 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r5, r5 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r2, r9 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r3, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r4, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r5, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r10 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r6, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r2, r11 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r3, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r4, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r5, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r6, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r12 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r6, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r7, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r2, r13 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r3, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r4, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r5, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r6, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r7, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r7, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r8, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r3, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r4, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r5, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r8, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r9, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r3, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r18 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r6, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r10, r10 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r2, r19 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r3, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r6, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r7, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r10, r11 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r20 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r6, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r7, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r8, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r10, r12 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r11, r11 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r2, r21 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r3, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r4, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r5, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r6, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r7, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r8, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r9, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r10, r13 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r11, r12 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r3, r21 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r4, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r6, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r7, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r8, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r9, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r10, r14 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r11, r13 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r12, r12 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r4, r21 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r5, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r6, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r7, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r8, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r9, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r10, r15 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r11, r14 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r12, r13 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r5, r21 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r6, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r7, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r8, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r9, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r10, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r11, r15 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r12, r14 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r13, r13 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r6, r21 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r7, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r8, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r9, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r10, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r11, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r12, r15 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r13, r14 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r7, r21 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r8, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r9, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r10, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r11, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r12, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r13, r15 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r14, r14 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r8, r21 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r9, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r10, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r11, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r12, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r13, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r14, r15 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r9, r21 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r10, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r11, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r12, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r13, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r14, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r15, r15 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r10, r21 \n\t"            \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r11, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r12, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r13, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r14, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r15, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r11, r21 \n\t"            \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r12, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r13, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r14, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r15, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r16, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r12, r21 \n\t"            \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r13, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r14, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r15, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r16, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r13, r21 \n\t"            \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r14, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r15, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r16, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r17, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r14, r21 \n\t"            \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r15, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r16, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r17, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r15, r21 \n\t"            \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r16, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r17, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r18, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r16, r21 \n\t"            \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r17, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "mul r18, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r17, r21 \n\t"            \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r18, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r19, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r27 \n\t"            \
-    "adc r24, r26 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r26, 0 \n\t"              \
-    "mul r18, r21 \n\t"            \
-    "mov r23, r0 \n\t"             \
-    "mov r27, r1 \n\t"             \
-    "mul r19, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "adc r26, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r27 \n\t"                 \
-    "rol r26 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r27, r22 \n\t"            \
-    "adc r26, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r23, 0 \n\t"              \
-    "mul r19, r21 \n\t"            \
-    "lsl r0 \n\t"                  \
-    "rol r1 \n\t"                  \
-    "adc r23, r25 \n\t"            \
-    "add r27, r0 \n\t"             \
-    "adc r26, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "mul r20, r20 \n\t"            \
-    "add r27, r0 \n\t"             \
-    "adc r26, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "st z+, r27 \n\t"              \
-                                   \
-    "ldi r27, 0 \n\t"              \
-    "mul r20, r21 \n\t"            \
-    "lsl r0 \n\t"                  \
-    "rol r1 \n\t"                  \
-    "adc r27, r25 \n\t"            \
-    "add r26, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r27, r25 \n\t"            \
-    "st z+, r26 \n\t"              \
-                                   \
-    "mul r21, r21 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r27, r1 \n\t"             \
-    "st z+, r23 \n\t"              \
-    "st z+, r27 \n\t"              \
-    "pop r27 \n\t"                 \
-    "pop r26 \n\t"
-
-#define FAST_SQUARE_ASM_20_TO_24           \
-    "cpi r20, 20 \n\t"                     \
-    "brne 1f \n\t"                         \
-    "jmp 2f \n\t"                          \
-    "1: \n\t"                              \
-    "ld r2, x+ \n\t"                       \
-    "ld r3, x+ \n\t"                       \
-    "ld r4, x+ \n\t"                       \
-    "ld r5, x+ \n\t"                       \
-    "sbiw r26, 24 \n\t"                    \
-    "sbiw r30, 20 \n\t"                    \
-    "ld r6, x+ \n\t"                       \
-    "ld r7, x+ \n\t"                       \
-    "ld r8, x+ \n\t"                       \
-    "ld r9, x+ \n\t"                       \
-                                           \
-    "mul r2, r6 \n\t"                      \
-    "mov r10, r0 \n\t"                     \
-    "mov r11, r1 \n\t"                     \
-    "mov r12, r25 \n\t"                    \
-    "mov r13, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-                                           \
-    "mov r14, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-                                           \
-    "mov r15, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r16, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r17, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r18, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r19, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r21, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r22, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r23, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r24, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r28, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r29, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-                                           \
-    "lsl r10 \n\t"                         \
-    "rol r11 \n\t"                         \
-    "rol r12 \n\t"                         \
-    "rol r13 \n\t"                         \
-    "rol r14 \n\t"                         \
-    "rol r15 \n\t"                         \
-    "rol r16 \n\t"                         \
-    "rol r17 \n\t"                         \
-    "rol r18 \n\t"                         \
-    "rol r19 \n\t"                         \
-    "rol r21 \n\t"                         \
-    "rol r22 \n\t"                         \
-    "rol r23 \n\t"                         \
-    "rol r24 \n\t"                         \
-    "rol r28 \n\t"                         \
-    "rol r29 \n\t"                         \
-    "ld r0, z \n\t"                        \
-    "add r10, r0 \n\t"                     \
-    "st z+, r10 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r11, r0 \n\t"                     \
-    "st z+, r11 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r12, r0 \n\t"                     \
-    "st z+, r12 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r13, r0 \n\t"                     \
-    "st z+, r13 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r14, r0 \n\t"                     \
-    "st z+, r14 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r15, r0 \n\t"                     \
-    "st z+, r15 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r16, r0 \n\t"                     \
-    "st z+, r16 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r17, r0 \n\t"                     \
-    "st z+, r17 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r18, r0 \n\t"                     \
-    "st z+, r18 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r19, r0 \n\t"                     \
-    "st z+, r19 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r21, r0 \n\t"                     \
-    "st z+, r21 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r22, r0 \n\t"                     \
-    "st z+, r22 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r23, r0 \n\t"                     \
-    "st z+, r23 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r24, r0 \n\t"                     \
-    "st z+, r24 \n\t"                      \
-    "adc r28, r25 \n\t"                    \
-    "adc r29, r25 \n\t"                    \
-    "bst r28, 0 \n\t"                      \
-    "lsr r29 \n\t"                         \
-    "ror r28 \n\t"                         \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r10, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r11, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r12, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r10, r0 \n\t"                     \
-    "adc r11, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r10, r0 \n\t"                     \
-    "adc r11, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r10, r0 \n\t"                     \
-    "adc r11, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r10, r0 \n\t"                     \
-    "adc r11, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r13, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r14, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r15, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-                                           \
-    "lsl r28 \n\t"                         \
-    "bld r28, 0 \n\t"                      \
-    "rol r29 \n\t"                         \
-    "rol r10 \n\t"                         \
-    "rol r11 \n\t"                         \
-    "rol r12 \n\t"                         \
-    "rol r13 \n\t"                         \
-    "rol r14 \n\t"                         \
-    "rol r15 \n\t"                         \
-    "ld r0, z \n\t"                        \
-    "add r28, r0 \n\t"                     \
-    "st z+, r28 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r29, r0 \n\t"                     \
-    "st z+, r29 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r10, r0 \n\t"                     \
-    "st z+, r10 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r11, r0 \n\t"                     \
-    "st z+, r11 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r12, r0 \n\t"                     \
-    "st z+, r12 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r13, r0 \n\t"                     \
-    "st z+, r13 \n\t"                      \
-    "adc r14, r25 \n\t"                    \
-    "adc r15, r25 \n\t"                    \
-                                           \
-    "mul r2, r2 \n\t"                      \
-    "mov r16, r0 \n\t"                     \
-    "mov r17, r1 \n\t"                     \
-    "mul r3, r3 \n\t"                      \
-    "mov r18, r0 \n\t"                     \
-    "mov r19, r1 \n\t"                     \
-    "mul r4, r4 \n\t"                      \
-    "mov r21, r0 \n\t"                     \
-    "mov r22, r1 \n\t"                     \
-    "mul r5, r5 \n\t"                      \
-    "mov r23, r0 \n\t"                     \
-    "mov r24, r1 \n\t"                     \
-    "add r16, r14 \n\t"                    \
-    "adc r17, r15 \n\t"                    \
-    "adc r18, r25 \n\t"                    \
-    "adc r19, r25 \n\t"                    \
-                                           \
-    "mul r7, r5 \n\t"                      \
-    "mov r14, r0 \n\t"                     \
-    "mov r15, r1 \n\t"                     \
-    "mov r28, r25 \n\t"                    \
-    "mul r8, r4 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-    "mul r9, r3 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-    "mov r29, r25 \n\t"                    \
-    "mul r8, r5 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-    "mul r9, r4 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-    "mul r2, r3 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-    "mov r10, r25 \n\t"                    \
-    "mul r9, r5 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-    "mul r2, r4 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-    "mov r11, r25 \n\t"                    \
-    "mul r2, r5 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-    "mul r3, r4 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-    "mov r12, r25 \n\t"                    \
-    "mul r3, r5 \n\t"                      \
-    "add r10, r0 \n\t"                     \
-    "adc r11, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-    "mul r4, r5 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-                                           \
-    "lsl r14 \n\t"                         \
-    "rol r15 \n\t"                         \
-    "rol r28 \n\t"                         \
-    "rol r29 \n\t"                         \
-    "rol r10 \n\t"                         \
-    "rol r11 \n\t"                         \
-    "rol r12 \n\t"                         \
-    "adc r24, r25 \n\t"                    \
-    "add r16, r14 \n\t"                    \
-    "adc r17, r15 \n\t"                    \
-    "adc r18, r28 \n\t"                    \
-    "adc r19, r29 \n\t"                    \
-    "adc r21, r10 \n\t"                    \
-    "adc r22, r11 \n\t"                    \
-    "adc r23, r12 \n\t"                    \
-    "adc r24, r25 \n\t"                    \
-                                           \
-    "st z+, r16 \n\t"                      \
-    "st z+, r17 \n\t"                      \
-    "st z+, r18 \n\t"                      \
-    "st z+, r19 \n\t"                      \
-    "st z+, r21 \n\t"                      \
-    "st z+, r22 \n\t"                      \
-    "st z+, r23 \n\t"                      \
-    "st z+, r24 \n\t"                      \
-    "adiw r26, 4 \n\t"
-
-#define FAST_SQUARE_ASM_24             \
-    "ldi r25, 0 \n\t"                  \
-    "movw r28, r26 \n\t"               \
-    "ld r2, x+ \n\t"                   \
-    "ld r3, x+ \n\t"                   \
-    "adiw r28, 20 \n\t"                \
-    "ld r12, y+ \n\t"                  \
-    "ld r13, y+ \n\t"                  \
-    "adiw r30, 20 \n\t"                \
-                                       \
-    "ldi r23, 0 \n\t"                  \
-    "mul 2, 12 \n\t"                   \
-    "st z+, r0 \n\t"                   \
-    "mov r22, r1 \n\t"                 \
-                                       \
-    "ldi r24, 0 \n\t"                  \
-    "mul r2, r13 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "st z+, r22 \n\t"                  \
-                                       \
-    "ld r12, y+ \n\t"                  \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r3, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r13, y+ \n\t"                  \
-    "ldi r23, 0 \n\t"                  \
-    "mul r2, r13 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r3, r12 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "st z+, r24 \n\t"                  \
-                                       \
-    "ld r2, x+ \n\t"                   \
-    "ldi r24, 0 \n\t"                  \
-    "mul r3, r13 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r2, r12 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "st z+, r22 \n\t"                  \
-                                       \
-    "ld r3, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "mul r3, r13 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "st z+, r24 \n\t"                  \
-    "st z+, r22 \n\t"                  \
-                                       \
-    "sbiw r26, 4 \n\t"                 \
-    "sbiw r30, 28 \n\t"                \
-    "ld r2, x+ \n\t"                   \
-    "ld r3, x+ \n\t"                   \
-    "ld r4, x+ \n\t"                   \
-    "ld r5, x+ \n\t"                   \
-    "ld r6, x+ \n\t"                   \
-    "ld r7, x+ \n\t"                   \
-    "ld r8, x+ \n\t"                   \
-    "ld r9, x+ \n\t"                   \
-    "ld r10, x+ \n\t"                  \
-    "ld r11, x+ \n\t"                  \
-    "ld r12, x+ \n\t"                  \
-    "ld r13, x+ \n\t"                  \
-    "ld r14, x+ \n\t"                  \
-    "ld r15, x+ \n\t"                  \
-    "ld r16, x+ \n\t"                  \
-    "ld r17, x+ \n\t"                  \
-    "ld r18, x+ \n\t"                  \
-    "ld r19, x+ \n\t"                  \
-    "ld r20, x+ \n\t"                  \
-    "ld r21, x+ \n\t"                  \
-                                       \
-    "ldi r23, 0 \n\t"                  \
-    "mul r2, r2 \n\t"                  \
-    "st z+, r0 \n\t"                   \
-    "mov r22, r1 \n\t"                 \
-                                       \
-    "ldi r24, 0 \n\t"                  \
-    "mul r2, r3 \n\t"                  \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "st z+, r22 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r3, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r5 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r6 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r5 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r4, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r7 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r5 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r8 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r5, r5 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r9 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r10 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r6, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r11 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r12 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r7, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r13 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r14 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r8, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r15 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r16 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r9, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r17 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r18 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r10, r10 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r19 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r20 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r10, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r11, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r21 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r13 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r2, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r3, r21 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r4, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r10, r14 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r11, r13 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r12, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r3, r2 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r4, r21 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r15 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r14 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r13 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r3, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r4, r2 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r5, r21 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r10, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r11, r15 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r12, r14 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r13, r13 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r4, r3 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r5, r2 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r21 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r15 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r13, r14 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r4, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r5, r3 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r6, r2 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r21 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r10, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r11, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r12, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r13, r15 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r14, r14 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r5, r4 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r6, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r2 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r21 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r13, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r14, r15 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r5, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r6, r4 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r7, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r2 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r21 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r10, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r11, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r12, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r13, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r14, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r15, r15 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r6, r5 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r7, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r2 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r13, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r14, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r15, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r7, r5 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r8, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r10, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r11, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r12, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r13, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r14, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r15, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r16, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r8, r5 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r9, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r13, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r14, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r15, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r16, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r9, r5 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r10, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r11, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r12, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r13, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r14, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r15, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r16, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r17, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r10, r5 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r11, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r13, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r14, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r15, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r16, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r17, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r11, r5 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r12, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r13, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r14, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r15, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r16, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r17, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r18, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r12, r5 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r13, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r14, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r15, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r16, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r17, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r18, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r13, r5 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r14, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r15, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r16, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r17, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r18, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r19, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r14, r5 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r15, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r16, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r17, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r18, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r19, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r15, r5 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r16, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r17, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r18, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r19, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r20, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r16, r5 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r17, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r18, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r19, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r20, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r17, r5 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r18, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r19, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r20, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r21, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r18, r5 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r19, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r20, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r21, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r19, r5 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r20, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r21, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r2, r2 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r20, r5 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r21, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r2, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r21, r5 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r2, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r3, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r5 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r23, 0 \n\t"                  \
-    "mul r3, r5 \n\t"                  \
-    "add r28, r0 \n\t"                 \
-    "adc r29, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "add r28, r0 \n\t"                 \
-    "adc r29, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r4, r4 \n\t"                  \
-    "add r28, r0 \n\t"                 \
-    "adc r29, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "st z+, r28 \n\t"                  \
-                                       \
-    "ldi r28, 0 \n\t"                  \
-    "mul r4, r5 \n\t"                  \
-    "add r29, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "add r29, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "st z+, r29 \n\t"                  \
-                                       \
-    "mul r5, r5 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "st z+, r23 \n\t"                  \
-    "st z+, r28 \n\t"
-
-#define FAST_SQUARE_ASM_24_TO_28           \
-    "cpi r20, 24 \n\t"                     \
-    "brne 1f \n\t"                         \
-    "jmp 2f \n\t"                          \
-    "1: \n\t"                              \
-    "ld r2, x+ \n\t"                       \
-    "ld r3, x+ \n\t"                       \
-    "ld r4, x+ \n\t"                       \
-    "ld r5, x+ \n\t"                       \
-    "sbiw r26, 28 \n\t"                    \
-    "sbiw r30, 24 \n\t"                    \
-    "ld r6, x+ \n\t"                       \
-    "ld r7, x+ \n\t"                       \
-    "ld r8, x+ \n\t"                       \
-    "ld r9, x+ \n\t"                       \
-                                           \
-    "mul r2, r6 \n\t"                      \
-    "mov r10, r0 \n\t"                     \
-    "mov r11, r1 \n\t"                     \
-    "mov r12, r25 \n\t"                    \
-    "mov r13, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-                                           \
-    "mov r14, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-                                           \
-    "mov r15, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r16, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r17, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r18, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r19, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r21, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r22, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r23, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r24, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r28, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r29, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-                                           \
-    "lsl r10 \n\t"                         \
-    "rol r11 \n\t"                         \
-    "rol r12 \n\t"                         \
-    "rol r13 \n\t"                         \
-    "rol r14 \n\t"                         \
-    "rol r15 \n\t"                         \
-    "rol r16 \n\t"                         \
-    "rol r17 \n\t"                         \
-    "rol r18 \n\t"                         \
-    "rol r19 \n\t"                         \
-    "rol r21 \n\t"                         \
-    "rol r22 \n\t"                         \
-    "rol r23 \n\t"                         \
-    "rol r24 \n\t"                         \
-    "rol r28 \n\t"                         \
-    "rol r29 \n\t"                         \
-    "ld r0, z \n\t"                        \
-    "add r10, r0 \n\t"                     \
-    "st z+, r10 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r11, r0 \n\t"                     \
-    "st z+, r11 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r12, r0 \n\t"                     \
-    "st z+, r12 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r13, r0 \n\t"                     \
-    "st z+, r13 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r14, r0 \n\t"                     \
-    "st z+, r14 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r15, r0 \n\t"                     \
-    "st z+, r15 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r16, r0 \n\t"                     \
-    "st z+, r16 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r17, r0 \n\t"                     \
-    "st z+, r17 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r18, r0 \n\t"                     \
-    "st z+, r18 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r19, r0 \n\t"                     \
-    "st z+, r19 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r21, r0 \n\t"                     \
-    "st z+, r21 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r22, r0 \n\t"                     \
-    "st z+, r22 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r23, r0 \n\t"                     \
-    "st z+, r23 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r24, r0 \n\t"                     \
-    "st z+, r24 \n\t"                      \
-    "adc r28, r25 \n\t"                    \
-    "adc r29, r25 \n\t"                    \
-    "bst r28, 0 \n\t"                      \
-    "lsr r29 \n\t"                         \
-    "ror r28 \n\t"                         \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r10, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r11, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r12, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r10, r0 \n\t"                     \
-    "adc r11, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r10, r0 \n\t"                     \
-    "adc r11, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r10, r0 \n\t"                     \
-    "adc r11, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r10, r0 \n\t"                     \
-    "adc r11, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r13, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r14, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r15, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r16, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r17, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r18, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r19, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-                                           \
-    "lsl r28 \n\t"                         \
-    "bld r28, 0 \n\t"                      \
-    "rol r29 \n\t"                         \
-    "rol r10 \n\t"                         \
-    "rol r11 \n\t"                         \
-    "rol r12 \n\t"                         \
-    "rol r13 \n\t"                         \
-    "rol r14 \n\t"                         \
-    "rol r15 \n\t"                         \
-    "rol r16 \n\t"                         \
-    "rol r17 \n\t"                         \
-    "rol r18 \n\t"                         \
-    "rol r19 \n\t"                         \
-    "ld r0, z \n\t"                        \
-    "add r28, r0 \n\t"                     \
-    "st z+, r28 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r29, r0 \n\t"                     \
-    "st z+, r29 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r10, r0 \n\t"                     \
-    "st z+, r10 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r11, r0 \n\t"                     \
-    "st z+, r11 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r12, r0 \n\t"                     \
-    "st z+, r12 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r13, r0 \n\t"                     \
-    "st z+, r13 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r14, r0 \n\t"                     \
-    "st z+, r14 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r15, r0 \n\t"                     \
-    "st z+, r15 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r16, r0 \n\t"                     \
-    "st z+, r16 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r17, r0 \n\t"                     \
-    "st z+, r17 \n\t"                      \
-    "adc r18, r25 \n\t"                    \
-    "adc r19, r25 \n\t"                    \
-                                           \
-    "mul r2, r2 \n\t"                      \
-    "mov r21, r0 \n\t"                     \
-    "mov r22, r1 \n\t"                     \
-    "mul r3, r3 \n\t"                      \
-    "mov r23, r0 \n\t"                     \
-    "mov r24, r1 \n\t"                     \
-    "mul r4, r4 \n\t"                      \
-    "mov r28, r0 \n\t"                     \
-    "mov r29, r1 \n\t"                     \
-    "mul r5, r5 \n\t"                      \
-    "mov r10, r0 \n\t"                     \
-    "mov r11, r1 \n\t"                     \
-    "add r21, r18 \n\t"                    \
-    "adc r22, r19 \n\t"                    \
-    "adc r23, r25 \n\t"                    \
-    "adc r24, r25 \n\t"                    \
-                                           \
-    "mul r7, r5 \n\t"                      \
-    "mov r18, r0 \n\t"                     \
-    "mov r19, r1 \n\t"                     \
-    "mov r12, r25 \n\t"                    \
-    "mul r8, r4 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-    "mul r9, r3 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-    "mov r13, r25 \n\t"                    \
-    "mul r8, r5 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r9, r4 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r2, r3 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mov r14, r25 \n\t"                    \
-    "mul r9, r5 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r2, r4 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mov r15, r25 \n\t"                    \
-    "mul r2, r5 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r3, r4 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mov r16, r25 \n\t"                    \
-    "mul r3, r5 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r4, r5 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-                                           \
-    "lsl r18 \n\t"                         \
-    "rol r19 \n\t"                         \
-    "rol r12 \n\t"                         \
-    "rol r13 \n\t"                         \
-    "rol r14 \n\t"                         \
-    "rol r15 \n\t"                         \
-    "rol r16 \n\t"                         \
-    "adc r11, r25 \n\t"                    \
-    "add r21, r18 \n\t"                    \
-    "adc r22, r19 \n\t"                    \
-    "adc r23, r12 \n\t"                    \
-    "adc r24, r13 \n\t"                    \
-    "adc r28, r14 \n\t"                    \
-    "adc r29, r15 \n\t"                    \
-    "adc r10, r16 \n\t"                    \
-    "adc r11, r25 \n\t"                    \
-                                           \
-    "st z+, r21 \n\t"                      \
-    "st z+, r22 \n\t"                      \
-    "st z+, r23 \n\t"                      \
-    "st z+, r24 \n\t"                      \
-    "st z+, r28 \n\t"                      \
-    "st z+, r29 \n\t"                      \
-    "st z+, r10 \n\t"                      \
-    "st z+, r11 \n\t"                      \
-    "adiw r26, 4 \n\t"
-
-#define FAST_SQUARE_ASM_28         \
-    "ldi r25, 0 \n\t"              \
-    "movw r28, r26 \n\t"           \
-    "ld r2, x+ \n\t"               \
-    "ld r3, x+ \n\t"               \
-    "ld r4, x+ \n\t"               \
-    "ld r5, x+ \n\t"               \
-    "adiw r28, 20 \n\t"            \
-    "ld r12, y+ \n\t"              \
-    "ld r13, y+ \n\t"              \
-    "ld r14, y+ \n\t"              \
-    "ld r15, y+ \n\t"              \
-    "adiw r30, 20 \n\t"            \
-                                   \
-    "ldi r23, 0 \n\t"              \
-    "mul 2, 12 \n\t"               \
-    "st z+, r0 \n\t"               \
-    "mov r22, r1 \n\t"             \
-                                   \
-    "ldi r24, 0 \n\t"              \
-    "mul r2, r13 \n\t"             \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "st z+, r22 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r3, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r23, 0 \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r24, r0 \n\t"             \
-    "adc r22, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r24, r0 \n\t"             \
-    "adc r22, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "st z+, r24 \n\t"              \
-                                   \
-    "ld r12, y+ \n\t"              \
-    "ldi r24, 0 \n\t"              \
-    "mul r2, r12 \n\t"             \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "mul r3, r15 \n\t"             \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "st z+, r22 \n\t"              \
-                                   \
-    "ld r13, y+ \n\t"              \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r3, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ld r14, y+ \n\t"              \
-    "ldi r23, 0 \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "add r24, r0 \n\t"             \
-    "adc r22, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "mul r3, r13 \n\t"             \
-    "add r24, r0 \n\t"             \
-    "adc r22, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "mul r4, r12 \n\t"             \
-    "add r24, r0 \n\t"             \
-    "adc r22, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r24, r0 \n\t"             \
-    "adc r22, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "st z+, r24 \n\t"              \
-                                   \
-    "ld r15, y+ \n\t"              \
-    "ldi r24, 0 \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "mul r4, r13 \n\t"             \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "mul r5, r12 \n\t"             \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "st z+, r22 \n\t"              \
-                                   \
-    "ld r2, x+ \n\t"               \
-    "ldi r22, 0 \n\t"              \
-    "mul r3, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r2, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ld r3, x+ \n\t"               \
-    "ldi r23, 0 \n\t"              \
-    "mul r4, r15 \n\t"             \
-    "add r24, r0 \n\t"             \
-    "adc r22, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r24, r0 \n\t"             \
-    "adc r22, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "mul r2, r13 \n\t"             \
-    "add r24, r0 \n\t"             \
-    "adc r22, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "st z+, r24 \n\t"              \
-                                   \
-    "ld r4, x+ \n\t"               \
-    "ldi r24, 0 \n\t"              \
-    "mul r5, r15 \n\t"             \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "mul r2, r14 \n\t"             \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "mul r3, r13 \n\t"             \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "st z+, r22 \n\t"              \
-                                   \
-    "ld r5, x+ \n\t"               \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r3, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r23, 0 \n\t"              \
-    "mul r3, r15 \n\t"             \
-    "add r24, r0 \n\t"             \
-    "adc r22, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r24, r0 \n\t"             \
-    "adc r22, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "st z+, r24 \n\t"              \
-                                   \
-    "ldi r24, 0 \n\t"              \
-    "mul r4, r15 \n\t"             \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "st z+, r22 \n\t"              \
-                                   \
-    "mul r5, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "st z+, r23 \n\t"              \
-    "st z+, r24 \n\t"              \
-                                   \
-    "sbiw r26, 8 \n\t"             \
-    "sbiw r30, 36 \n\t"            \
-    "ld r2, x+ \n\t"               \
-    "ld r3, x+ \n\t"               \
-    "ld r4, x+ \n\t"               \
-    "ld r5, x+ \n\t"               \
-    "ld r6, x+ \n\t"               \
-    "ld r7, x+ \n\t"               \
-    "ld r8, x+ \n\t"               \
-    "ld r9, x+ \n\t"               \
-    "ld r10, x+ \n\t"              \
-    "ld r11, x+ \n\t"              \
-    "ld r12, x+ \n\t"              \
-    "ld r13, x+ \n\t"              \
-    "ld r14, x+ \n\t"              \
-    "ld r15, x+ \n\t"              \
-    "ld r16, x+ \n\t"              \
-    "ld r17, x+ \n\t"              \
-    "ld r18, x+ \n\t"              \
-    "ld r19, x+ \n\t"              \
-    "ld r20, x+ \n\t"              \
-    "ld r21, x+ \n\t"              \
-                                   \
-    "ldi r23, 0 \n\t"              \
-    "mul r2, r2 \n\t"              \
-    "st z+, r0 \n\t"               \
-    "mov r22, r1 \n\t"             \
-                                   \
-    "ldi r24, 0 \n\t"              \
-    "mul r2, r3 \n\t"              \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "add r22, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "st z+, r22 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r4 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r3, r3 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r2, r5 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r3, r4 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r6 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r5 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r4, r4 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r2, r7 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r3, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r4, r5 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r8 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r5, r5 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r2, r9 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r3, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r4, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r5, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r10 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r6, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r2, r11 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r3, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r4, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r5, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r6, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r12 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r6, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r7, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r2, r13 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r3, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r4, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r5, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r6, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r7, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r14 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r6, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r7, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r8, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r2, r15 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r3, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r4, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r5, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r6, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r7, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r8, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r16 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r6, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r7, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r8, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r9, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r2, r17 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r3, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r4, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r5, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r6, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r7, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r8, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r9, r10 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r18 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r6, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r7, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r8, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r9, r11 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r10, r10 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r2, r19 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r3, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r4, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r5, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r6, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r7, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r8, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r9, r12 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r10, r11 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r2, r20 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r3, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r6, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r7, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r8, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r9, r13 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r10, r12 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r11, r11 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r2, r21 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r3, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r4, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r5, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r6, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r7, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r8, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r9, r14 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r10, r13 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r11, r12 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ld r2, x+ \n\t"               \
-    "ldi r22, 0 \n\t"              \
-    "mul r3, r21 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r4, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r6, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r7, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r8, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r9, r15 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r10, r14 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r11, r13 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r12, r12 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r3, r2 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r4, r21 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r5, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r6, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r7, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r8, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r9, r16 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r10, r15 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r11, r14 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r12, r13 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r28, r25 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ld r3, x+ \n\t"               \
-    "ldi r22, 0 \n\t"              \
-    "mul r4, r2 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r5, r21 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r6, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r7, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r8, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r9, r17 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r10, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r11, r15 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r12, r14 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r13, r13 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r4, r3 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r5, r2 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r6, r21 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r7, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r8, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r9, r18 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r10, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r11, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r12, r15 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r13, r14 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r28, r25 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ld r4, x+ \n\t"               \
-    "ldi r22, 0 \n\t"              \
-    "mul r5, r3 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r6, r2 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r7, r21 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r8, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r9, r19 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r10, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r11, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r12, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r13, r15 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r14, r14 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r5, r4 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r6, r3 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r7, r2 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r8, r21 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r9, r20 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r10, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r11, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r12, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r13, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r14, r15 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r28, r25 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ld r5, x+ \n\t"               \
-    "ldi r22, 0 \n\t"              \
-    "mul r6, r4 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r7, r3 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r8, r2 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r9, r21 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r10, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r11, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r12, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r13, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r14, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r15, r15 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r6, r5 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r7, r4 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r8, r3 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r9, r2 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r10, r21 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r11, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r12, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r13, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r14, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r15, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r28, r25 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ld r6, x+ \n\t"               \
-    "ldi r22, 0 \n\t"              \
-    "mul r7, r5 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r8, r4 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r9, r3 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r10, r2 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r11, r21 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r12, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r13, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r14, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r15, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r16, r16 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r7, r6 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r8, r5 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r9, r4 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r10, r3 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r11, r2 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r12, r21 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r13, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r14, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r15, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r16, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r28, r25 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ld r7, x+ \n\t"               \
-    "ldi r22, 0 \n\t"              \
-    "mul r8, r6 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r9, r5 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r10, r4 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r11, r3 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r12, r2 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r13, r21 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r14, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r15, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r16, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r17, r17 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r8, r7 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r9, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r10, r5 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r11, r4 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r12, r3 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r13, r2 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r14, r21 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r15, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r16, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r17, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r28, r25 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ld r8, x+ \n\t"               \
-    "ldi r22, 0 \n\t"              \
-    "mul r9, r7 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r10, r6 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r11, r5 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r12, r4 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r13, r3 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r14, r2 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r15, r21 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r16, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r17, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r18, r18 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r9, r8 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r10, r7 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r11, r6 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r12, r5 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r13, r4 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r14, r3 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r15, r2 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r16, r21 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r17, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r18, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r28, r25 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ld r9, x+ \n\t"               \
-    "ldi r22, 0 \n\t"              \
-    "mul r10, r8 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r11, r7 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r12, r6 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r13, r5 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r14, r4 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r15, r3 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r16, r2 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r17, r21 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r18, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r24, r25 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r19, r19 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r10, r9 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r11, r8 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r12, r7 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r13, r6 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r14, r5 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r15, r4 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r16, r3 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r17, r2 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r18, r21 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r19, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "ld r0, z \n\t"                \
-    "add r23, r0 \n\t"             \
-    "adc r28, r25 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r11, r9 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r12, r8 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r13, r7 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r14, r6 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r15, r5 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r16, r4 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r17, r3 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r18, r2 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r19, r21 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r20, r20 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r12, r9 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r13, r8 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r14, r7 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r15, r6 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r16, r5 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r17, r4 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r18, r3 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r19, r2 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r20, r21 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r13, r9 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r14, r8 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r15, r7 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r16, r6 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r17, r5 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r18, r4 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r19, r3 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r20, r2 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r21, r21 \n\t"            \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r14, r9 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r15, r8 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r16, r7 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r17, r6 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r18, r5 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r19, r4 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r20, r3 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r21, r2 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r15, r9 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r16, r8 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r17, r7 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r18, r6 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r19, r5 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r20, r4 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r21, r3 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r2, r2 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r16, r9 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r17, r8 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r18, r7 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r19, r6 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r20, r5 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r21, r4 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r2, r3 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r17, r9 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r18, r8 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r19, r7 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r20, r6 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r21, r5 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r2, r4 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r3, r3 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r18, r9 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r19, r8 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r20, r7 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r21, r6 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r2, r5 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r3, r4 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r19, r9 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r20, r8 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r21, r7 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r2, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r3, r5 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r4, r4 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r20, r9 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r21, r8 \n\t"             \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r2, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r3, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r4, r5 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r21, r9 \n\t"             \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r2, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r3, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r4, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r5, r5 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r2, r9 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r3, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r4, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r5, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r3, r9 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r4, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "mul r5, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r6, r6 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r4, r9 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r5, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "mul r6, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r22, 0 \n\t"              \
-    "mul r5, r9 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r24, r1 \n\t"             \
-    "mul r6, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r24 \n\t"                 \
-    "rol r22 \n\t"                 \
-    "mul r7, r7 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r24, r1 \n\t"             \
-    "adc r22, r25 \n\t"            \
-    "add r23, r28 \n\t"            \
-    "adc r24, r29 \n\t"            \
-    "adc r22, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r29, 0 \n\t"              \
-    "mul r6, r9 \n\t"              \
-    "mov r23, r0 \n\t"             \
-    "mov r28, r1 \n\t"             \
-    "mul r7, r8 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "adc r29, r25 \n\t"            \
-    "lsl r23 \n\t"                 \
-    "rol r28 \n\t"                 \
-    "rol r29 \n\t"                 \
-    "add r23, r24 \n\t"            \
-    "adc r28, r22 \n\t"            \
-    "adc r29, r25 \n\t"            \
-    "st z+, r23 \n\t"              \
-                                   \
-    "ldi r23, 0 \n\t"              \
-    "mul r7, r9 \n\t"              \
-    "add r28, r0 \n\t"             \
-    "adc r29, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "add r28, r0 \n\t"             \
-    "adc r29, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "mul r8, r8 \n\t"              \
-    "add r28, r0 \n\t"             \
-    "adc r29, r1 \n\t"             \
-    "adc r23, r25 \n\t"            \
-    "st z+, r28 \n\t"              \
-                                   \
-    "ldi r28, 0 \n\t"              \
-    "mul r8, r9 \n\t"              \
-    "add r29, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r28, r25 \n\t"            \
-    "add r29, r0 \n\t"             \
-    "adc r23, r1 \n\t"             \
-    "adc r28, r25 \n\t"            \
-    "st z+, r29 \n\t"              \
-                                   \
-    "mul r9, r9 \n\t"              \
-    "add r23, r0 \n\t"             \
-    "adc r28, r1 \n\t"             \
-    "st z+, r23 \n\t"              \
-    "st z+, r28 \n\t"
-
-#define FAST_SQUARE_ASM_28_TO_32           \
-    "cpi r20, 28 \n\t"                     \
-    "brne 1f \n\t"                         \
-    "jmp 2f \n\t"                          \
-    "1: \n\t"                              \
-    "ld r2, x+ \n\t"                       \
-    "ld r3, x+ \n\t"                       \
-    "ld r4, x+ \n\t"                       \
-    "ld r5, x+ \n\t"                       \
-    "sbiw r26, 32 \n\t"                    \
-    "sbiw r30, 28 \n\t"                    \
-    "ld r6, x+ \n\t"                       \
-    "ld r7, x+ \n\t"                       \
-    "ld r8, x+ \n\t"                       \
-    "ld r9, x+ \n\t"                       \
-                                           \
-    "mul r2, r6 \n\t"                      \
-    "mov r10, r0 \n\t"                     \
-    "mov r11, r1 \n\t"                     \
-    "mov r12, r25 \n\t"                    \
-    "mov r13, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-                                           \
-    "mov r14, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-                                           \
-    "mov r15, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r16, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r17, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r18, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r19, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r21, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r22, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r23, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r24, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r28, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r28, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r29, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r28, r1 \n\t"                     \
-    "adc r29, r25 \n\t"                    \
-                                           \
-    "lsl r10 \n\t"                         \
-    "rol r11 \n\t"                         \
-    "rol r12 \n\t"                         \
-    "rol r13 \n\t"                         \
-    "rol r14 \n\t"                         \
-    "rol r15 \n\t"                         \
-    "rol r16 \n\t"                         \
-    "rol r17 \n\t"                         \
-    "rol r18 \n\t"                         \
-    "rol r19 \n\t"                         \
-    "rol r21 \n\t"                         \
-    "rol r22 \n\t"                         \
-    "rol r23 \n\t"                         \
-    "rol r24 \n\t"                         \
-    "rol r28 \n\t"                         \
-    "rol r29 \n\t"                         \
-    "ld r0, z \n\t"                        \
-    "add r10, r0 \n\t"                     \
-    "st z+, r10 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r11, r0 \n\t"                     \
-    "st z+, r11 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r12, r0 \n\t"                     \
-    "st z+, r12 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r13, r0 \n\t"                     \
-    "st z+, r13 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r14, r0 \n\t"                     \
-    "st z+, r14 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r15, r0 \n\t"                     \
-    "st z+, r15 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r16, r0 \n\t"                     \
-    "st z+, r16 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r17, r0 \n\t"                     \
-    "st z+, r17 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r18, r0 \n\t"                     \
-    "st z+, r18 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r19, r0 \n\t"                     \
-    "st z+, r19 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r21, r0 \n\t"                     \
-    "st z+, r21 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r22, r0 \n\t"                     \
-    "st z+, r22 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r23, r0 \n\t"                     \
-    "st z+, r23 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r24, r0 \n\t"                     \
-    "st z+, r24 \n\t"                      \
-    "adc r28, r25 \n\t"                    \
-    "adc r29, r25 \n\t"                    \
-    "bst r28, 0 \n\t"                      \
-    "lsr r29 \n\t"                         \
-    "ror r28 \n\t"                         \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r10, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r28, r0 \n\t"                     \
-    "adc r29, r1 \n\t"                     \
-    "adc r10, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r11, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r29, r0 \n\t"                     \
-    "adc r10, r1 \n\t"                     \
-    "adc r11, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r12, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r10, r0 \n\t"                     \
-    "adc r11, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r10, r0 \n\t"                     \
-    "adc r11, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r10, r0 \n\t"                     \
-    "adc r11, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r10, r0 \n\t"                     \
-    "adc r11, r1 \n\t"                     \
-    "adc r12, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r13, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r11, r0 \n\t"                     \
-    "adc r12, r1 \n\t"                     \
-    "adc r13, r25 \n\t"                    \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r14, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r12, r0 \n\t"                     \
-    "adc r13, r1 \n\t"                     \
-    "adc r14, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r15, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r13, r0 \n\t"                     \
-    "adc r14, r1 \n\t"                     \
-    "adc r15, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r16, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r14, r0 \n\t"                     \
-    "adc r15, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r17, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r15, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r18, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r19, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-                                           \
-    "ld r6, x+ \n\t"                       \
-    "mov r21, r25 \n\t"                    \
-    "mul r2, r6 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-    "mul r3, r9 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-    "mul r4, r8 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-    "mul r5, r7 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-                                           \
-    "ld r7, x+ \n\t"                       \
-    "mov r22, r25 \n\t"                    \
-    "mul r2, r7 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-    "mul r3, r6 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-    "mul r4, r9 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-    "mul r5, r8 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-    "adc r22, r25 \n\t"                    \
-                                           \
-    "ld r8, x+ \n\t"                       \
-    "mov r23, r25 \n\t"                    \
-    "mul r2, r8 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-    "mul r3, r7 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-    "mul r4, r6 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-    "mul r5, r9 \n\t"                      \
-    "add r21, r0 \n\t"                     \
-    "adc r22, r1 \n\t"                     \
-    "adc r23, r25 \n\t"                    \
-                                           \
-    "ld r9, x+ \n\t"                       \
-    "mov r24, r25 \n\t"                    \
-    "mul r2, r9 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-    "mul r3, r8 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-    "mul r4, r7 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-    "mul r5, r6 \n\t"                      \
-    "add r22, r0 \n\t"                     \
-    "adc r23, r1 \n\t"                     \
-    "adc r24, r25 \n\t"                    \
-                                           \
-    "lsl r28 \n\t"                         \
-    "bld r28, 0 \n\t"                      \
-    "rol r29 \n\t"                         \
-    "rol r10 \n\t"                         \
-    "rol r11 \n\t"                         \
-    "rol r12 \n\t"                         \
-    "rol r13 \n\t"                         \
-    "rol r14 \n\t"                         \
-    "rol r15 \n\t"                         \
-    "rol r16 \n\t"                         \
-    "rol r17 \n\t"                         \
-    "rol r18 \n\t"                         \
-    "rol r19 \n\t"                         \
-    "rol r21 \n\t"                         \
-    "rol r22 \n\t"                         \
-    "rol r23 \n\t"                         \
-    "rol r24 \n\t"                         \
-    "ld r0, z \n\t"                        \
-    "add r28, r0 \n\t"                     \
-    "st z+, r28 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r29, r0 \n\t"                     \
-    "st z+, r29 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r10, r0 \n\t"                     \
-    "st z+, r10 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r11, r0 \n\t"                     \
-    "st z+, r11 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r12, r0 \n\t"                     \
-    "st z+, r12 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r13, r0 \n\t"                     \
-    "st z+, r13 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r14, r0 \n\t"                     \
-    "st z+, r14 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r15, r0 \n\t"                     \
-    "st z+, r15 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r16, r0 \n\t"                     \
-    "st z+, r16 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r17, r0 \n\t"                     \
-    "st z+, r17 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r18, r0 \n\t"                     \
-    "st z+, r18 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r19, r0 \n\t"                     \
-    "st z+, r19 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r21, r0 \n\t"                     \
-    "st z+, r21 \n\t"                      \
-    "ld r0, z \n\t"                        \
-    "adc r22, r0 \n\t"                     \
-    "st z+, r22 \n\t"                      \
-    "adc r23, r25 \n\t"                    \
-    "adc r24, r25 \n\t"                    \
-                                           \
-    "mul r2, r2 \n\t"                      \
-    "mov r28, r0 \n\t"                     \
-    "mov r29, r1 \n\t"                     \
-    "mul r3, r3 \n\t"                      \
-    "mov r10, r0 \n\t"                     \
-    "mov r11, r1 \n\t"                     \
-    "mul r4, r4 \n\t"                      \
-    "mov r12, r0 \n\t"                     \
-    "mov r13, r1 \n\t"                     \
-    "mul r5, r5 \n\t"                      \
-    "mov r14, r0 \n\t"                     \
-    "mov r15, r1 \n\t"                     \
-    "add r28, r23 \n\t"                    \
-    "adc r29, r24 \n\t"                    \
-    "adc r10, r25 \n\t"                    \
-    "adc r11, r25 \n\t"                    \
-                                           \
-    "mul r7, r5 \n\t"                      \
-    "mov r23, r0 \n\t"                     \
-    "mov r24, r1 \n\t"                     \
-    "mov r16, r25 \n\t"                    \
-    "mul r8, r4 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mul r9, r3 \n\t"                      \
-    "add r23, r0 \n\t"                     \
-    "adc r24, r1 \n\t"                     \
-    "adc r16, r25 \n\t"                    \
-    "mov r17, r25 \n\t"                    \
-    "mul r8, r5 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r9, r4 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mul r2, r3 \n\t"                      \
-    "add r24, r0 \n\t"                     \
-    "adc r16, r1 \n\t"                     \
-    "adc r17, r25 \n\t"                    \
-    "mov r18, r25 \n\t"                    \
-    "mul r9, r5 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mul r2, r4 \n\t"                      \
-    "add r16, r0 \n\t"                     \
-    "adc r17, r1 \n\t"                     \
-    "adc r18, r25 \n\t"                    \
-    "mov r19, r25 \n\t"                    \
-    "mul r2, r5 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mul r3, r4 \n\t"                      \
-    "add r17, r0 \n\t"                     \
-    "adc r18, r1 \n\t"                     \
-    "adc r19, r25 \n\t"                    \
-    "mov r21, r25 \n\t"                    \
-    "mul r3, r5 \n\t"                      \
-    "add r18, r0 \n\t"                     \
-    "adc r19, r1 \n\t"                     \
-    "adc r21, r25 \n\t"                    \
-    "mul r4, r5 \n\t"                      \
-    "add r19, r0 \n\t"                     \
-    "adc r21, r1 \n\t"                     \
-                                           \
-    "lsl r23 \n\t"                         \
-    "rol r24 \n\t"                         \
-    "rol r16 \n\t"                         \
-    "rol r17 \n\t"                         \
-    "rol r18 \n\t"                         \
-    "rol r19 \n\t"                         \
-    "rol r21 \n\t"                         \
-    "adc r15, r25 \n\t"                    \
-    "add r28, r23 \n\t"                    \
-    "adc r29, r24 \n\t"                    \
-    "adc r10, r16 \n\t"                    \
-    "adc r11, r17 \n\t"                    \
-    "adc r12, r18 \n\t"                    \
-    "adc r13, r19 \n\t"                    \
-    "adc r14, r21 \n\t"                    \
-    "adc r15, r25 \n\t"                    \
-                                           \
-    "st z+, r28 \n\t"                      \
-    "st z+, r29 \n\t"                      \
-    "st z+, r10 \n\t"                      \
-    "st z+, r11 \n\t"                      \
-    "st z+, r12 \n\t"                      \
-    "st z+, r13 \n\t"                      \
-    "st z+, r14 \n\t"                      \
-    "st z+, r15 \n\t"                      \
-    "adiw r26, 4 \n\t"
-
-#define FAST_SQUARE_ASM_32             \
-    "ldi r25, 0 \n\t"                  \
-    "movw r28, r26 \n\t"               \
-    "ld r2, x+ \n\t"                   \
-    "ld r3, x+ \n\t"                   \
-    "ld r4, x+ \n\t"                   \
-    "ld r5, x+ \n\t"                   \
-    "ld r6, x+ \n\t"                   \
-    "ld r7, x+ \n\t"                   \
-    "adiw r28, 20 \n\t"                \
-    "ld r12, y+ \n\t"                  \
-    "ld r13, y+ \n\t"                  \
-    "ld r14, y+ \n\t"                  \
-    "ld r15, y+ \n\t"                  \
-    "ld r16, y+ \n\t"                  \
-    "ld r17, y+ \n\t"                  \
-    "adiw r30, 20 \n\t"                \
-                                       \
-    "ldi r23, 0 \n\t"                  \
-    "mul 2, 12 \n\t"                   \
-    "st z+, r0 \n\t"                   \
-    "mov r22, r1 \n\t"                 \
-                                       \
-    "ldi r24, 0 \n\t"                  \
-    "mul r2, r13 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "st z+, r22 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r3, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r23, 0 \n\t"                  \
-    "mul r2, r15 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r3, r14 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "st z+, r24 \n\t"                  \
-                                       \
-    "ldi r24, 0 \n\t"                  \
-    "mul r2, r16 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r3, r15 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r4, r14 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "st z+, r22 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r3, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r12, y+ \n\t"                  \
-    "ldi r23, 0 \n\t"                  \
-    "mul r2, r12 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r3, r17 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r4, r16 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r5, r15 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "st z+, r24 \n\t"                  \
-                                       \
-    "ld r13, y+ \n\t"                  \
-    "ldi r24, 0 \n\t"                  \
-    "mul r2, r13 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r3, r12 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r4, r17 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r5, r16 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "st z+, r22 \n\t"                  \
-                                       \
-    "ld r14, y+ \n\t"                  \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r3, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r15, y+ \n\t"                  \
-    "ldi r23, 0 \n\t"                  \
-    "mul r2, r15 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r3, r14 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r4, r13 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r5, r12 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r6, r17 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "st z+, r24 \n\t"                  \
-                                       \
-    "ld r16, y+ \n\t"                  \
-    "ldi r24, 0 \n\t"                  \
-    "mul r2, r16 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r3, r15 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r4, r14 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r5, r13 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r6, r12 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r7, r17 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "st z+, r22 \n\t"                  \
-                                       \
-    "ld r17, y+ \n\t"                  \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r3, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r2, x+ \n\t"                   \
-    "ldi r23, 0 \n\t"                  \
-    "mul r3, r17 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r4, r16 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r5, r15 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r6, r14 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r7, r13 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r2, r12 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "st z+, r24 \n\t"                  \
-                                       \
-    "ld r3, x+ \n\t"                   \
-    "ldi r24, 0 \n\t"                  \
-    "mul r4, r17 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r5, r16 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r6, r15 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r7, r14 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r2, r13 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "st z+, r22 \n\t"                  \
-                                       \
-    "ld r4, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r5, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r2, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r3, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r5, x+ \n\t"                   \
-    "ldi r23, 0 \n\t"                  \
-    "mul r6, r17 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r7, r16 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r2, r15 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r3, r14 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "st z+, r24 \n\t"                  \
-                                       \
-    "ld r6, x+ \n\t"                   \
-    "ldi r24, 0 \n\t"                  \
-    "mul r7, r17 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r2, r16 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r3, r15 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r4, r14 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "st z+, r22 \n\t"                  \
-                                       \
-    "ld r7, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r3, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r23, 0 \n\t"                  \
-    "mul r3, r17 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r4, r16 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r5, r15 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "st z+, r24 \n\t"                  \
-                                       \
-    "ldi r24, 0 \n\t"                  \
-    "mul r4, r17 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "mul r5, r16 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "st z+, r22 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r5, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r23, 0 \n\t"                  \
-    "mul r6, r17 \n\t"                 \
-    "add r24, r0 \n\t"                 \
-    "adc r22, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "st z+, r24 \n\t"                  \
-                                       \
-    "mul r7, r17 \n\t"                 \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "st z+, r22 \n\t"                  \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "sbiw r26, 12 \n\t"                \
-    "sbiw r30, 44 \n\t"                \
-    "ld r2, x+ \n\t"                   \
-    "ld r3, x+ \n\t"                   \
-    "ld r4, x+ \n\t"                   \
-    "ld r5, x+ \n\t"                   \
-    "ld r6, x+ \n\t"                   \
-    "ld r7, x+ \n\t"                   \
-    "ld r8, x+ \n\t"                   \
-    "ld r9, x+ \n\t"                   \
-    "ld r10, x+ \n\t"                  \
-    "ld r11, x+ \n\t"                  \
-    "ld r12, x+ \n\t"                  \
-    "ld r13, x+ \n\t"                  \
-    "ld r14, x+ \n\t"                  \
-    "ld r15, x+ \n\t"                  \
-    "ld r16, x+ \n\t"                  \
-    "ld r17, x+ \n\t"                  \
-    "ld r18, x+ \n\t"                  \
-    "ld r19, x+ \n\t"                  \
-    "ld r20, x+ \n\t"                  \
-    "ld r21, x+ \n\t"                  \
-                                       \
-    "ldi r23, 0 \n\t"                  \
-    "mul r2, r2 \n\t"                  \
-    "st z+, r0 \n\t"                   \
-    "mov r22, r1 \n\t"                 \
-                                       \
-    "ldi r24, 0 \n\t"                  \
-    "mul r2, r3 \n\t"                  \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "add r22, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "st z+, r22 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r3, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r5 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r6 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r5 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r4, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r7 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r5 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r8 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r5, r5 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r9 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r10 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r6, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r11 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r12 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r7, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r13 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r14 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r8, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r15 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r16 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r9, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r17 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r18 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r10, r10 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r19 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r2, r20 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r3, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r13 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r10, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r11, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r21 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r14 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r13 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r2, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r3, r21 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r4, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r15 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r10, r14 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r11, r13 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r12, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r3, r2 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r4, r21 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r16 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r15 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r14 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r13 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r3, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r4, r2 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r5, r21 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r17 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r10, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r11, r15 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r12, r14 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r13, r13 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r4, r3 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r5, r2 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r21 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r18 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r15 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r13, r14 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r4, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r5, r3 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r6, r2 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r21 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r19 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r10, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r11, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r12, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r13, r15 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r14, r14 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r5, r4 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r6, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r2 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r21 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r20 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r13, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r14, r15 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r5, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r6, r4 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r7, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r2 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r21 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r10, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r11, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r12, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r13, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r14, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r15, r15 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r6, r5 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r7, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r2 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r13, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r14, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r15, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r6, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r7, r5 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r8, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r10, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r11, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r12, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r13, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r14, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r15, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r16, r16 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r7, r6 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r8, r5 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r13, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r14, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r15, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r16, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r7, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r8, r6 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r9, r5 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r10, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r11, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r12, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r13, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r14, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r15, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r16, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r17, r17 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r8, r7 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r9, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r5 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r13, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r14, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r15, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r16, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r17, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r8, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r9, r7 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r10, r6 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r11, r5 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r12, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r13, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r14, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r15, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r16, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r17, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r18, r18 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r9, r8 \n\t"                  \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r10, r7 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r11, r6 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r5 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r13, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r14, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r15, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r16, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r17, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r18, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r9, x+ \n\t"                   \
-    "ldi r22, 0 \n\t"                  \
-    "mul r10, r8 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r11, r7 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r12, r6 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r13, r5 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r14, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r15, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r16, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r17, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r18, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r19, r19 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r10, r9 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r11, r8 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r12, r7 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r13, r6 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r14, r5 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r15, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r16, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r17, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r18, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r19, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r10, x+ \n\t"                  \
-    "ldi r22, 0 \n\t"                  \
-    "mul r11, r9 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r12, r8 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r13, r7 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r14, r6 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r15, r5 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r16, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r17, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r18, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r19, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r20, r20 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r11, r10 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r12, r9 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r13, r8 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r14, r7 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r15, r6 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r16, r5 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r17, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r18, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r19, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r20, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r11, x+ \n\t"                  \
-    "ldi r22, 0 \n\t"                  \
-    "mul r12, r10 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r13, r9 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r14, r8 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r15, r7 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r16, r6 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r17, r5 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r18, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r19, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r20, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r21, r21 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r12, r11 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r13, r10 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r14, r9 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r15, r8 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r16, r7 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r17, r6 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r18, r5 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r19, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r20, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r21, r2 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r12, x+ \n\t"                  \
-    "ldi r22, 0 \n\t"                  \
-    "mul r13, r11 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r14, r10 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r15, r9 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r16, r8 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r17, r7 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r18, r6 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r19, r5 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r20, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r21, r3 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r2, r2 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r13, r12 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r14, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r15, r10 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r16, r9 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r17, r8 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r18, r7 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r19, r6 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r20, r5 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r21, r4 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r2, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ld r13, x+ \n\t"                  \
-    "ldi r22, 0 \n\t"                  \
-    "mul r14, r12 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r15, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r16, r10 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r17, r9 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r18, r8 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r19, r7 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r20, r6 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r21, r5 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r2, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r25 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r3, r3 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r14, r13 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r15, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r16, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r17, r10 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r18, r9 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r19, r8 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r20, r7 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r21, r6 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r2, r5 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r3, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "ld r0, z \n\t"                    \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r15, r13 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r16, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r17, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r18, r10 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r19, r9 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r20, r8 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r21, r7 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r2, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r3, r5 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r4, r4 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r16, r13 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r17, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r18, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r19, r10 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r20, r9 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r21, r8 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r2, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r3, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r5 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r17, r13 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r18, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r19, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r20, r10 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r21, r9 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r2, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r3, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r5, r5 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r18, r13 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r19, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r20, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r21, r10 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r2, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r3, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r19, r13 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r20, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r21, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r2, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r3, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r6, r6 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r20, r13 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r21, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r2, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r3, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r21, r13 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r2, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r3, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r4, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r7, r7 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r2, r13 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r3, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r4, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r5, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r3, r13 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r4, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r5, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r6, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r8, r8 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r4, r13 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r5, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r6, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r7, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r5, r13 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r6, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r7, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r8, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r9, r9 \n\t"                  \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r6, r13 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r7, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r8, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r9, r10 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r7, r13 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r8, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "mul r9, r11 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r10, r10 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r8, r13 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r9, r12 \n\t"                 \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "mul r10, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r22, 0 \n\t"                  \
-    "mul r9, r13 \n\t"                 \
-    "mov r23, r0 \n\t"                 \
-    "mov r24, r1 \n\t"                 \
-    "mul r10, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r24 \n\t"                     \
-    "rol r22 \n\t"                     \
-    "mul r11, r11 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r24, r1 \n\t"                 \
-    "adc r22, r25 \n\t"                \
-    "add r23, r28 \n\t"                \
-    "adc r24, r29 \n\t"                \
-    "adc r22, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r29, 0 \n\t"                  \
-    "mul r10, r13 \n\t"                \
-    "mov r23, r0 \n\t"                 \
-    "mov r28, r1 \n\t"                 \
-    "mul r11, r12 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "adc r29, r25 \n\t"                \
-    "lsl r23 \n\t"                     \
-    "rol r28 \n\t"                     \
-    "rol r29 \n\t"                     \
-    "add r23, r24 \n\t"                \
-    "adc r28, r22 \n\t"                \
-    "adc r29, r25 \n\t"                \
-    "st z+, r23 \n\t"                  \
-                                       \
-    "ldi r23, 0 \n\t"                  \
-    "mul r11, r13 \n\t"                \
-    "add r28, r0 \n\t"                 \
-    "adc r29, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "add r28, r0 \n\t"                 \
-    "adc r29, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "mul r12, r12 \n\t"                \
-    "add r28, r0 \n\t"                 \
-    "adc r29, r1 \n\t"                 \
-    "adc r23, r25 \n\t"                \
-    "st z+, r28 \n\t"                  \
-                                       \
-    "ldi r28, 0 \n\t"                  \
-    "mul r12, r13 \n\t"                \
-    "add r29, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "add r29, r0 \n\t"                 \
-    "adc r23, r1 \n\t"                 \
-    "adc r28, r25 \n\t"                \
-    "st z+, r29 \n\t"                  \
-                                       \
-    "mul r13, r13 \n\t"                \
-    "add r23, r0 \n\t"                 \
-    "adc r28, r1 \n\t"                 \
-    "st z+, r23 \n\t"                  \
-    "st z+, r28 \n\t"
-
-#endif /* _UECC_ASM_AVR_MULT_SQUARE_H_ */

+ 0 - 1248
components/bootloader/subproject/components/micro-ecc/micro-ecc/curve-specific.inc

@@ -1,1248 +0,0 @@
-/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#ifndef _UECC_CURVE_SPECIFIC_H_
-#define _UECC_CURVE_SPECIFIC_H_
-
-#define num_bytes_secp160r1 20
-#define num_bytes_secp192r1 24
-#define num_bytes_secp224r1 28
-#define num_bytes_secp256r1 32
-#define num_bytes_secp256k1 32
-
-#if (uECC_WORD_SIZE == 1)
-
-#define num_words_secp160r1 20
-#define num_words_secp192r1 24
-#define num_words_secp224r1 28
-#define num_words_secp256r1 32
-#define num_words_secp256k1 32
-
-#define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) \
-    0x##a, 0x##b, 0x##c, 0x##d, 0x##e, 0x##f, 0x##g, 0x##h
-#define BYTES_TO_WORDS_4(a, b, c, d) 0x##a, 0x##b, 0x##c, 0x##d
-
-#elif (uECC_WORD_SIZE == 4)
-
-#define num_words_secp160r1 5
-#define num_words_secp192r1 6
-#define num_words_secp224r1 7
-#define num_words_secp256r1 8
-#define num_words_secp256k1 8
-
-#define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) 0x##d##c##b##a, 0x##h##g##f##e
-#define BYTES_TO_WORDS_4(a, b, c, d) 0x##d##c##b##a
-
-#elif (uECC_WORD_SIZE == 8)
-
-#define num_words_secp160r1 3
-#define num_words_secp192r1 3
-#define num_words_secp224r1 4
-#define num_words_secp256r1 4
-#define num_words_secp256k1 4
-
-#define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) 0x##h##g##f##e##d##c##b##a##ull
-#define BYTES_TO_WORDS_4(a, b, c, d) 0x##d##c##b##a##ull
-
-#endif /* uECC_WORD_SIZE */
-
-#if uECC_SUPPORTS_secp160r1 || uECC_SUPPORTS_secp192r1 || \
-    uECC_SUPPORTS_secp224r1 || uECC_SUPPORTS_secp256r1
-static void double_jacobian_default(uECC_word_t * X1,
-                                    uECC_word_t * Y1,
-                                    uECC_word_t * Z1,
-                                    uECC_Curve curve) {
-    /* t1 = X, t2 = Y, t3 = Z */
-    uECC_word_t t4[uECC_MAX_WORDS];
-    uECC_word_t t5[uECC_MAX_WORDS];
-    wordcount_t num_words = curve->num_words;
-
-    if (uECC_vli_isZero(Z1, num_words)) {
-        return;
-    }
-
-    uECC_vli_modSquare_fast(t4, Y1, curve);   /* t4 = y1^2 */
-    uECC_vli_modMult_fast(t5, X1, t4, curve); /* t5 = x1*y1^2 = A */
-    uECC_vli_modSquare_fast(t4, t4, curve);   /* t4 = y1^4 */
-    uECC_vli_modMult_fast(Y1, Y1, Z1, curve); /* t2 = y1*z1 = z3 */
-    uECC_vli_modSquare_fast(Z1, Z1, curve);   /* t3 = z1^2 */
-
-    uECC_vli_modAdd(X1, X1, Z1, curve->p, num_words); /* t1 = x1 + z1^2 */
-    uECC_vli_modAdd(Z1, Z1, Z1, curve->p, num_words); /* t3 = 2*z1^2 */
-    uECC_vli_modSub(Z1, X1, Z1, curve->p, num_words); /* t3 = x1 - z1^2 */
-    uECC_vli_modMult_fast(X1, X1, Z1, curve);                /* t1 = x1^2 - z1^4 */
-
-    uECC_vli_modAdd(Z1, X1, X1, curve->p, num_words); /* t3 = 2*(x1^2 - z1^4) */
-    uECC_vli_modAdd(X1, X1, Z1, curve->p, num_words); /* t1 = 3*(x1^2 - z1^4) */
-    if (uECC_vli_testBit(X1, 0)) {
-        uECC_word_t l_carry = uECC_vli_add(X1, X1, curve->p, num_words);
-        uECC_vli_rshift1(X1, num_words);
-        X1[num_words - 1] |= l_carry << (uECC_WORD_BITS - 1);
-    } else {
-        uECC_vli_rshift1(X1, num_words);
-    }
-    /* t1 = 3/2*(x1^2 - z1^4) = B */
-
-    uECC_vli_modSquare_fast(Z1, X1, curve);                  /* t3 = B^2 */
-    uECC_vli_modSub(Z1, Z1, t5, curve->p, num_words); /* t3 = B^2 - A */
-    uECC_vli_modSub(Z1, Z1, t5, curve->p, num_words); /* t3 = B^2 - 2A = x3 */
-    uECC_vli_modSub(t5, t5, Z1, curve->p, num_words); /* t5 = A - x3 */
-    uECC_vli_modMult_fast(X1, X1, t5, curve);                /* t1 = B * (A - x3) */
-    uECC_vli_modSub(t4, X1, t4, curve->p, num_words); /* t4 = B * (A - x3) - y1^4 = y3 */
-
-    uECC_vli_set(X1, Z1, num_words);
-    uECC_vli_set(Z1, Y1, num_words);
-    uECC_vli_set(Y1, t4, num_words);
-}
-
-/* Computes result = x^3 + ax + b. result must not overlap x. */
-static void x_side_default(uECC_word_t *result, const uECC_word_t *x, uECC_Curve curve) {
-    uECC_word_t _3[uECC_MAX_WORDS] = {3}; /* -a = 3 */
-    wordcount_t num_words = curve->num_words;
-
-    uECC_vli_modSquare_fast(result, x, curve);                             /* r = x^2 */
-    uECC_vli_modSub(result, result, _3, curve->p, num_words);       /* r = x^2 - 3 */
-    uECC_vli_modMult_fast(result, result, x, curve);                       /* r = x^3 - 3x */
-    uECC_vli_modAdd(result, result, curve->b, curve->p, num_words); /* r = x^3 - 3x + b */
-}
-#endif /* uECC_SUPPORTS_secp... */
-
-#if uECC_SUPPORT_COMPRESSED_POINT
-#if uECC_SUPPORTS_secp160r1 || uECC_SUPPORTS_secp192r1 || \
-    uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1
-/* Compute a = sqrt(a) (mod curve_p). */
-static void mod_sqrt_default(uECC_word_t *a, uECC_Curve curve) {
-    bitcount_t i;
-    uECC_word_t p1[uECC_MAX_WORDS] = {1};
-    uECC_word_t l_result[uECC_MAX_WORDS] = {1};
-    wordcount_t num_words = curve->num_words;
-    
-    /* When curve->p == 3 (mod 4), we can compute
-       sqrt(a) = a^((curve->p + 1) / 4) (mod curve->p). */
-    uECC_vli_add(p1, curve->p, p1, num_words); /* p1 = curve_p + 1 */
-    for (i = uECC_vli_numBits(p1, num_words) - 1; i > 1; --i) {
-        uECC_vli_modSquare_fast(l_result, l_result, curve);
-        if (uECC_vli_testBit(p1, i)) {
-            uECC_vli_modMult_fast(l_result, l_result, a, curve);
-        }
-    }
-    uECC_vli_set(a, l_result, num_words);
-}
-#endif /* uECC_SUPPORTS_secp... */
-#endif /* uECC_SUPPORT_COMPRESSED_POINT */
-
-#if uECC_SUPPORTS_secp160r1
-
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product);
-#endif
-
-static const struct uECC_Curve_t curve_secp160r1 = {
-    num_words_secp160r1,
-    num_bytes_secp160r1,
-    161, /* num_n_bits */
-    { BYTES_TO_WORDS_8(FF, FF, FF, 7F, FF, FF, FF, FF),
-        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
-        BYTES_TO_WORDS_4(FF, FF, FF, FF) },
-    { BYTES_TO_WORDS_8(57, 22, 75, CA, D3, AE, 27, F9),
-        BYTES_TO_WORDS_8(C8, F4, 01, 00, 00, 00, 00, 00),
-        BYTES_TO_WORDS_8(00, 00, 00, 00, 01, 00, 00, 00) },
-    { BYTES_TO_WORDS_8(82, FC, CB, 13, B9, 8B, C3, 68),
-        BYTES_TO_WORDS_8(89, 69, 64, 46, 28, 73, F5, 8E),
-        BYTES_TO_WORDS_4(68, B5, 96, 4A),
-
-        BYTES_TO_WORDS_8(32, FB, C5, 7A, 37, 51, 23, 04),
-        BYTES_TO_WORDS_8(12, C9, DC, 59, 7D, 94, 68, 31),
-        BYTES_TO_WORDS_4(55, 28, A6, 23) },
-    { BYTES_TO_WORDS_8(45, FA, 65, C5, AD, D4, D4, 81),
-        BYTES_TO_WORDS_8(9F, F8, AC, 65, 8B, 7A, BD, 54),
-        BYTES_TO_WORDS_4(FC, BE, 97, 1C) },
-    &double_jacobian_default,
-#if uECC_SUPPORT_COMPRESSED_POINT
-    &mod_sqrt_default,
-#endif
-    &x_side_default,
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-    &vli_mmod_fast_secp160r1
-#endif
-};
-
-uECC_Curve uECC_secp160r1(void) { return &curve_secp160r1; }
-
-#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp160r1)
-/* Computes result = product % curve_p
-    see http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf page 354
-    
-    Note that this only works if log2(omega) < log2(p) / 2 */
-static void omega_mult_secp160r1(uECC_word_t *result, const uECC_word_t *right);
-#if uECC_WORD_SIZE == 8
-static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) {
-    uECC_word_t tmp[2 * num_words_secp160r1];
-    uECC_word_t copy;
-    
-    uECC_vli_clear(tmp, num_words_secp160r1);
-    uECC_vli_clear(tmp + num_words_secp160r1, num_words_secp160r1);
-
-    omega_mult_secp160r1(tmp, product + num_words_secp160r1 - 1); /* (Rq, q) = q * c */
-    
-    product[num_words_secp160r1 - 1] &= 0xffffffff;
-    copy = tmp[num_words_secp160r1 - 1];
-    tmp[num_words_secp160r1 - 1] &= 0xffffffff;
-    uECC_vli_add(result, product, tmp, num_words_secp160r1); /* (C, r) = r + q */
-    uECC_vli_clear(product, num_words_secp160r1);
-    tmp[num_words_secp160r1 - 1] = copy;
-    omega_mult_secp160r1(product, tmp + num_words_secp160r1 - 1); /* Rq*c */
-    uECC_vli_add(result, result, product, num_words_secp160r1); /* (C1, r) = r + Rq*c */
-
-    while (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, num_words_secp160r1) > 0) {
-        uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1);
-    }
-}
-
-static void omega_mult_secp160r1(uint64_t *result, const uint64_t *right) {
-    uint32_t carry;
-    unsigned i;
-    
-    /* Multiply by (2^31 + 1). */
-    carry = 0;
-    for (i = 0; i < num_words_secp160r1; ++i) {
-        uint64_t tmp = (right[i] >> 32) | (right[i + 1] << 32);
-        result[i] = (tmp << 31) + tmp + carry;
-        carry = (tmp >> 33) + (result[i] < tmp || (carry && result[i] == tmp));
-    }
-    result[i] = carry;
-}
-#else
-static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) {
-    uECC_word_t tmp[2 * num_words_secp160r1];
-    uECC_word_t carry;
-    
-    uECC_vli_clear(tmp, num_words_secp160r1);
-    uECC_vli_clear(tmp + num_words_secp160r1, num_words_secp160r1);
-
-    omega_mult_secp160r1(tmp, product + num_words_secp160r1); /* (Rq, q) = q * c */
-    
-    carry = uECC_vli_add(result, product, tmp, num_words_secp160r1); /* (C, r) = r + q */
-    uECC_vli_clear(product, num_words_secp160r1);
-    omega_mult_secp160r1(product, tmp + num_words_secp160r1); /* Rq*c */
-    carry += uECC_vli_add(result, result, product, num_words_secp160r1); /* (C1, r) = r + Rq*c */
-
-    while (carry > 0) {
-        --carry;
-        uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1);
-    }
-    if (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, num_words_secp160r1) > 0) {
-        uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1);
-    }
-}
-#endif
-
-#if uECC_WORD_SIZE == 1
-static void omega_mult_secp160r1(uint8_t *result, const uint8_t *right) {
-    uint8_t carry;
-    uint8_t i;
-    
-    /* Multiply by (2^31 + 1). */
-    uECC_vli_set(result + 4, right, num_words_secp160r1); /* 2^32 */
-    uECC_vli_rshift1(result + 4, num_words_secp160r1); /* 2^31 */
-    result[3] = right[0] << 7; /* get last bit from shift */
-    
-    carry = uECC_vli_add(result, result, right, num_words_secp160r1); /* 2^31 + 1 */
-    for (i = num_words_secp160r1; carry; ++i) {
-        uint16_t sum = (uint16_t)result[i] + carry;
-        result[i] = (uint8_t)sum;
-        carry = sum >> 8;
-    }
-}
-#elif uECC_WORD_SIZE == 4
-static void omega_mult_secp160r1(uint32_t *result, const uint32_t *right) {
-    uint32_t carry;
-    unsigned i;
-    
-    /* Multiply by (2^31 + 1). */
-    uECC_vli_set(result + 1, right, num_words_secp160r1); /* 2^32 */
-    uECC_vli_rshift1(result + 1, num_words_secp160r1); /* 2^31 */
-    result[0] = right[0] << 31; /* get last bit from shift */
-    
-    carry = uECC_vli_add(result, result, right, num_words_secp160r1); /* 2^31 + 1 */
-    for (i = num_words_secp160r1; carry; ++i) {
-        uint64_t sum = (uint64_t)result[i] + carry;
-        result[i] = (uint32_t)sum;
-        carry = sum >> 32;
-    }
-}
-#endif /* uECC_WORD_SIZE */
-#endif /* (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp160r1) */
-
-#endif /* uECC_SUPPORTS_secp160r1 */
-
-#if uECC_SUPPORTS_secp192r1
-
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-static void vli_mmod_fast_secp192r1(uECC_word_t *result, uECC_word_t *product);
-#endif
-
-static const struct uECC_Curve_t curve_secp192r1 = {
-    num_words_secp192r1,
-    num_bytes_secp192r1,
-    192, /* num_n_bits */
-    { BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
-        BYTES_TO_WORDS_8(FE, FF, FF, FF, FF, FF, FF, FF),
-        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF) },
-    { BYTES_TO_WORDS_8(31, 28, D2, B4, B1, C9, 6B, 14),
-        BYTES_TO_WORDS_8(36, F8, DE, 99, FF, FF, FF, FF),
-        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF) },
-    { BYTES_TO_WORDS_8(12, 10, FF, 82, FD, 0A, FF, F4),
-        BYTES_TO_WORDS_8(00, 88, A1, 43, EB, 20, BF, 7C),
-        BYTES_TO_WORDS_8(F6, 90, 30, B0, 0E, A8, 8D, 18),
-
-        BYTES_TO_WORDS_8(11, 48, 79, 1E, A1, 77, F9, 73),
-        BYTES_TO_WORDS_8(D5, CD, 24, 6B, ED, 11, 10, 63),
-        BYTES_TO_WORDS_8(78, DA, C8, FF, 95, 2B, 19, 07) },
-    { BYTES_TO_WORDS_8(B1, B9, 46, C1, EC, DE, B8, FE),
-        BYTES_TO_WORDS_8(49, 30, 24, 72, AB, E9, A7, 0F),
-        BYTES_TO_WORDS_8(E7, 80, 9C, E5, 19, 05, 21, 64) },
-    &double_jacobian_default,
-#if uECC_SUPPORT_COMPRESSED_POINT
-    &mod_sqrt_default,
-#endif
-    &x_side_default,
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-    &vli_mmod_fast_secp192r1
-#endif
-};
-
-uECC_Curve uECC_secp192r1(void) { return &curve_secp192r1; }
-
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-/* Computes result = product % curve_p.
-   See algorithm 5 and 6 from http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf */
-#if uECC_WORD_SIZE == 1
-static void vli_mmod_fast_secp192r1(uint8_t *result, uint8_t *product) {
-    uint8_t tmp[num_words_secp192r1];
-    uint8_t carry;
-    
-    uECC_vli_set(result, product, num_words_secp192r1);
-    
-    uECC_vli_set(tmp, &product[24], num_words_secp192r1);
-    carry = uECC_vli_add(result, result, tmp, num_words_secp192r1);
-    
-    tmp[0] = tmp[1] = tmp[2] = tmp[3] = tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0;
-    tmp[8] = product[24]; tmp[9] = product[25]; tmp[10] = product[26]; tmp[11] = product[27];
-    tmp[12] = product[28]; tmp[13] = product[29]; tmp[14] = product[30]; tmp[15] = product[31];
-    tmp[16] = product[32]; tmp[17] = product[33]; tmp[18] = product[34]; tmp[19] = product[35];
-    tmp[20] = product[36]; tmp[21] = product[37]; tmp[22] = product[38]; tmp[23] = product[39];
-    carry += uECC_vli_add(result, result, tmp, num_words_secp192r1);
-    
-    tmp[0] = tmp[8] = product[40];
-    tmp[1] = tmp[9] = product[41];
-    tmp[2] = tmp[10] = product[42];
-    tmp[3] = tmp[11] = product[43];
-    tmp[4] = tmp[12] = product[44];
-    tmp[5] = tmp[13] = product[45];
-    tmp[6] = tmp[14] = product[46];
-    tmp[7] = tmp[15] = product[47];
-    tmp[16] = tmp[17] = tmp[18] = tmp[19] = tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
-    carry += uECC_vli_add(result, result, tmp, num_words_secp192r1);
-    
-    while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, num_words_secp192r1) != 1) {
-        carry -= uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1);
-    }
-}
-#elif uECC_WORD_SIZE == 4
-static void vli_mmod_fast_secp192r1(uint32_t *result, uint32_t *product) {
-    uint32_t tmp[num_words_secp192r1];
-    int carry;
-    
-    uECC_vli_set(result, product, num_words_secp192r1);
-    
-    uECC_vli_set(tmp, &product[6], num_words_secp192r1);
-    carry = uECC_vli_add(result, result, tmp, num_words_secp192r1);
-    
-    tmp[0] = tmp[1] = 0;
-    tmp[2] = product[6];
-    tmp[3] = product[7];
-    tmp[4] = product[8];
-    tmp[5] = product[9];
-    carry += uECC_vli_add(result, result, tmp, num_words_secp192r1);
-    
-    tmp[0] = tmp[2] = product[10];
-    tmp[1] = tmp[3] = product[11];
-    tmp[4] = tmp[5] = 0;
-    carry += uECC_vli_add(result, result, tmp, num_words_secp192r1);
-    
-    while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, num_words_secp192r1) != 1) {
-        carry -= uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1);
-    }
-}
-#else
-static void vli_mmod_fast_secp192r1(uint64_t *result, uint64_t *product) {
-    uint64_t tmp[num_words_secp192r1];
-    int carry;
-    
-    uECC_vli_set(result, product, num_words_secp192r1);
-    
-    uECC_vli_set(tmp, &product[3], num_words_secp192r1);
-    carry = (int)uECC_vli_add(result, result, tmp, num_words_secp192r1);
-    
-    tmp[0] = 0;
-    tmp[1] = product[3];
-    tmp[2] = product[4];
-    carry += uECC_vli_add(result, result, tmp, num_words_secp192r1);
-    
-    tmp[0] = tmp[1] = product[5];
-    tmp[2] = 0;
-    carry += uECC_vli_add(result, result, tmp, num_words_secp192r1);
-    
-    while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, num_words_secp192r1) != 1) {
-        carry -= uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1);
-    }
-}
-#endif /* uECC_WORD_SIZE */
-#endif /* (uECC_OPTIMIZATION_LEVEL > 0) */
-
-#endif /* uECC_SUPPORTS_secp192r1 */
-
-#if uECC_SUPPORTS_secp224r1
-
-#if uECC_SUPPORT_COMPRESSED_POINT
-static void mod_sqrt_secp224r1(uECC_word_t *a, uECC_Curve curve);
-#endif
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-static void vli_mmod_fast_secp224r1(uECC_word_t *result, uECC_word_t *product);
-#endif
-
-static const struct uECC_Curve_t curve_secp224r1 = {
-    num_words_secp224r1,
-    num_bytes_secp224r1,
-    224, /* num_n_bits */
-    { BYTES_TO_WORDS_8(01, 00, 00, 00, 00, 00, 00, 00),
-        BYTES_TO_WORDS_8(00, 00, 00, 00, FF, FF, FF, FF),
-        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
-        BYTES_TO_WORDS_4(FF, FF, FF, FF) },
-    { BYTES_TO_WORDS_8(3D, 2A, 5C, 5C, 45, 29, DD, 13),
-        BYTES_TO_WORDS_8(3E, F0, B8, E0, A2, 16, FF, FF),
-        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
-        BYTES_TO_WORDS_4(FF, FF, FF, FF) },
-    { BYTES_TO_WORDS_8(21, 1D, 5C, 11, D6, 80, 32, 34),
-        BYTES_TO_WORDS_8(22, 11, C2, 56, D3, C1, 03, 4A),
-        BYTES_TO_WORDS_8(B9, 90, 13, 32, 7F, BF, B4, 6B),
-        BYTES_TO_WORDS_4(BD, 0C, 0E, B7),
-
-        BYTES_TO_WORDS_8(34, 7E, 00, 85, 99, 81, D5, 44),
-        BYTES_TO_WORDS_8(64, 47, 07, 5A, A0, 75, 43, CD),
-        BYTES_TO_WORDS_8(E6, DF, 22, 4C, FB, 23, F7, B5),
-        BYTES_TO_WORDS_4(88, 63, 37, BD) },
-    { BYTES_TO_WORDS_8(B4, FF, 55, 23, 43, 39, 0B, 27),
-        BYTES_TO_WORDS_8(BA, D8, BF, D7, B7, B0, 44, 50),
-        BYTES_TO_WORDS_8(56, 32, 41, F5, AB, B3, 04, 0C),
-        BYTES_TO_WORDS_4(85, 0A, 05, B4) },
-    &double_jacobian_default,
-#if uECC_SUPPORT_COMPRESSED_POINT
-    &mod_sqrt_secp224r1,
-#endif
-    &x_side_default,
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-    &vli_mmod_fast_secp224r1
-#endif
-};
-
-uECC_Curve uECC_secp224r1(void) { return &curve_secp224r1; }
-
-
-#if uECC_SUPPORT_COMPRESSED_POINT
-/* Routine 3.2.4 RS;  from http://www.nsa.gov/ia/_files/nist-routines.pdf */
-static void mod_sqrt_secp224r1_rs(uECC_word_t *d1,
-                                  uECC_word_t *e1,
-                                  uECC_word_t *f1,
-                                  const uECC_word_t *d0,
-                                  const uECC_word_t *e0,
-                                  const uECC_word_t *f0) {
-    uECC_word_t t[num_words_secp224r1];
-
-    uECC_vli_modSquare_fast(t, d0, &curve_secp224r1);                    /* t <-- d0 ^ 2 */
-    uECC_vli_modMult_fast(e1, d0, e0, &curve_secp224r1);                 /* e1 <-- d0 * e0 */
-    uECC_vli_modAdd(d1, t, f0, curve_secp224r1.p, num_words_secp224r1);  /* d1 <-- t  + f0 */
-    uECC_vli_modAdd(e1, e1, e1, curve_secp224r1.p, num_words_secp224r1); /* e1 <-- e1 + e1 */
-    uECC_vli_modMult_fast(f1, t, f0, &curve_secp224r1);                  /* f1 <-- t  * f0 */
-    uECC_vli_modAdd(f1, f1, f1, curve_secp224r1.p, num_words_secp224r1); /* f1 <-- f1 + f1 */
-    uECC_vli_modAdd(f1, f1, f1, curve_secp224r1.p, num_words_secp224r1); /* f1 <-- f1 + f1 */
-}
-
-/* Routine 3.2.5 RSS;  from http://www.nsa.gov/ia/_files/nist-routines.pdf */
-static void mod_sqrt_secp224r1_rss(uECC_word_t *d1,
-                                   uECC_word_t *e1,
-                                   uECC_word_t *f1,
-                                   const uECC_word_t *d0,
-                                   const uECC_word_t *e0,
-                                   const uECC_word_t *f0,
-                                   const bitcount_t j) {
-    bitcount_t i;
-
-    uECC_vli_set(d1, d0, num_words_secp224r1); /* d1 <-- d0 */
-    uECC_vli_set(e1, e0, num_words_secp224r1); /* e1 <-- e0 */
-    uECC_vli_set(f1, f0, num_words_secp224r1); /* f1 <-- f0 */
-    for (i = 1; i <= j; i++) {
-        mod_sqrt_secp224r1_rs(d1, e1, f1, d1, e1, f1); /* RS (d1,e1,f1,d1,e1,f1) */
-    }
-}
-
-/* Routine 3.2.6 RM;  from http://www.nsa.gov/ia/_files/nist-routines.pdf */
-static void mod_sqrt_secp224r1_rm(uECC_word_t *d2,
-                                  uECC_word_t *e2,
-                                  uECC_word_t *f2,
-                                  const uECC_word_t *c,
-                                  const uECC_word_t *d0,
-                                  const uECC_word_t *e0,
-                                  const uECC_word_t *d1,
-                                  const uECC_word_t *e1) {
-    uECC_word_t t1[num_words_secp224r1];
-    uECC_word_t t2[num_words_secp224r1];
-
-    uECC_vli_modMult_fast(t1, e0, e1, &curve_secp224r1); /* t1 <-- e0 * e1 */
-    uECC_vli_modMult_fast(t1, t1, c, &curve_secp224r1);  /* t1 <-- t1 * c */
-    /* t1 <-- p  - t1 */
-    uECC_vli_modSub(t1, curve_secp224r1.p, t1, curve_secp224r1.p, num_words_secp224r1);
-    uECC_vli_modMult_fast(t2, d0, d1, &curve_secp224r1);                 /* t2 <-- d0 * d1 */
-    uECC_vli_modAdd(t2, t2, t1, curve_secp224r1.p, num_words_secp224r1); /* t2 <-- t2 + t1 */
-    uECC_vli_modMult_fast(t1, d0, e1, &curve_secp224r1);                 /* t1 <-- d0 * e1 */
-    uECC_vli_modMult_fast(e2, d1, e0, &curve_secp224r1);                 /* e2 <-- d1 * e0 */
-    uECC_vli_modAdd(e2, e2, t1, curve_secp224r1.p, num_words_secp224r1); /* e2 <-- e2 + t1 */
-    uECC_vli_modSquare_fast(f2, e2, &curve_secp224r1);                   /* f2 <-- e2^2 */
-    uECC_vli_modMult_fast(f2, f2, c, &curve_secp224r1);                  /* f2 <-- f2 * c */
-    /* f2 <-- p  - f2 */
-    uECC_vli_modSub(f2, curve_secp224r1.p, f2, curve_secp224r1.p, num_words_secp224r1);
-    uECC_vli_set(d2, t2, num_words_secp224r1); /* d2 <-- t2 */
-}
-
-/* Routine 3.2.7 RP;  from http://www.nsa.gov/ia/_files/nist-routines.pdf */
-static void mod_sqrt_secp224r1_rp(uECC_word_t *d1,
-                                  uECC_word_t *e1,
-                                  uECC_word_t *f1,
-                                  const uECC_word_t *c,
-                                  const uECC_word_t *r) {
-    wordcount_t i;
-    wordcount_t pow2i = 1;
-    uECC_word_t d0[num_words_secp224r1];
-    uECC_word_t e0[num_words_secp224r1] = {1}; /* e0 <-- 1 */
-    uECC_word_t f0[num_words_secp224r1];
-
-    uECC_vli_set(d0, r, num_words_secp224r1); /* d0 <-- r */
-    /* f0 <-- p  - c */
-    uECC_vli_modSub(f0, curve_secp224r1.p, c, curve_secp224r1.p, num_words_secp224r1);
-    for (i = 0; i <= 6; i++) {
-        mod_sqrt_secp224r1_rss(d1, e1, f1, d0, e0, f0, pow2i); /* RSS (d1,e1,f1,d0,e0,f0,2^i) */
-        mod_sqrt_secp224r1_rm(d1, e1, f1, c, d1, e1, d0, e0);  /* RM (d1,e1,f1,c,d1,e1,d0,e0) */
-        uECC_vli_set(d0, d1, num_words_secp224r1); /* d0 <-- d1 */
-        uECC_vli_set(e0, e1, num_words_secp224r1); /* e0 <-- e1 */
-        uECC_vli_set(f0, f1, num_words_secp224r1); /* f0 <-- f1 */
-        pow2i *= 2;
-    }
-}
-
-/* Compute a = sqrt(a) (mod curve_p). */
-/* Routine 3.2.8 mp_mod_sqrt_224; from http://www.nsa.gov/ia/_files/nist-routines.pdf */
-static void mod_sqrt_secp224r1(uECC_word_t *a, uECC_Curve curve) {
-    bitcount_t i;
-    uECC_word_t e1[num_words_secp224r1];
-    uECC_word_t f1[num_words_secp224r1];
-    uECC_word_t d0[num_words_secp224r1];
-    uECC_word_t e0[num_words_secp224r1];
-    uECC_word_t f0[num_words_secp224r1];
-    uECC_word_t d1[num_words_secp224r1];
-
-    /* s = a; using constant instead of random value */
-    mod_sqrt_secp224r1_rp(d0, e0, f0, a, a);           /* RP (d0, e0, f0, c, s) */
-    mod_sqrt_secp224r1_rs(d1, e1, f1, d0, e0, f0);     /* RS (d1, e1, f1, d0, e0, f0) */
-    for (i = 1; i <= 95; i++) {
-        uECC_vli_set(d0, d1, num_words_secp224r1);          /* d0 <-- d1 */
-        uECC_vli_set(e0, e1, num_words_secp224r1);          /* e0 <-- e1 */
-        uECC_vli_set(f0, f1, num_words_secp224r1);          /* f0 <-- f1 */
-        mod_sqrt_secp224r1_rs(d1, e1, f1, d0, e0, f0); /* RS (d1, e1, f1, d0, e0, f0) */
-        if (uECC_vli_isZero(d1, num_words_secp224r1)) {     /* if d1 == 0 */
-                break;
-        }
-    }
-    uECC_vli_modInv(f1, e0, curve_secp224r1.p, num_words_secp224r1); /* f1 <-- 1 / e0 */
-    uECC_vli_modMult_fast(a, d0, f1, &curve_secp224r1);              /* a  <-- d0 / e0 */
-}
-#endif /* uECC_SUPPORT_COMPRESSED_POINT */
-
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-/* Computes result = product % curve_p
-   from http://www.nsa.gov/ia/_files/nist-routines.pdf */
-#if uECC_WORD_SIZE == 1
-static void vli_mmod_fast_secp224r1(uint8_t *result, uint8_t *product) {
-    uint8_t tmp[num_words_secp224r1];
-    int8_t carry;
-
-    /* t */
-    uECC_vli_set(result, product, num_words_secp224r1);
-
-    /* s1 */
-    tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0;
-    tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0;
-    tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0;
-    tmp[12] = product[28]; tmp[13] = product[29]; tmp[14] = product[30]; tmp[15] = product[31];
-    tmp[16] = product[32]; tmp[17] = product[33]; tmp[18] = product[34]; tmp[19] = product[35];
-    tmp[20] = product[36]; tmp[21] = product[37]; tmp[22] = product[38]; tmp[23] = product[39];
-    tmp[24] = product[40]; tmp[25] = product[41]; tmp[26] = product[42]; tmp[27] = product[43];
-    carry = uECC_vli_add(result, result, tmp, num_words_secp224r1);
-
-    /* s2 */
-    tmp[12] = product[44]; tmp[13] = product[45]; tmp[14] = product[46]; tmp[15] = product[47];
-    tmp[16] = product[48]; tmp[17] = product[49]; tmp[18] = product[50]; tmp[19] = product[51];
-    tmp[20] = product[52]; tmp[21] = product[53]; tmp[22] = product[54]; tmp[23] = product[55];
-    tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0;
-    carry += uECC_vli_add(result, result, tmp, num_words_secp224r1);
-
-    /* d1 */
-    tmp[0]  = product[28]; tmp[1]  = product[29]; tmp[2]  = product[30]; tmp[3]  = product[31];
-    tmp[4]  = product[32]; tmp[5]  = product[33]; tmp[6]  = product[34]; tmp[7]  = product[35];
-    tmp[8]  = product[36]; tmp[9]  = product[37]; tmp[10] = product[38]; tmp[11] = product[39];
-    tmp[12] = product[40]; tmp[13] = product[41]; tmp[14] = product[42]; tmp[15] = product[43];
-    tmp[16] = product[44]; tmp[17] = product[45]; tmp[18] = product[46]; tmp[19] = product[47];
-    tmp[20] = product[48]; tmp[21] = product[49]; tmp[22] = product[50]; tmp[23] = product[51];
-    tmp[24] = product[52]; tmp[25] = product[53]; tmp[26] = product[54]; tmp[27] = product[55];
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1);
-
-    /* d2 */
-    tmp[0]  = product[44]; tmp[1]  = product[45]; tmp[2]  = product[46]; tmp[3]  = product[47];
-    tmp[4]  = product[48]; tmp[5]  = product[49]; tmp[6]  = product[50]; tmp[7]  = product[51];
-    tmp[8]  = product[52]; tmp[9]  = product[53]; tmp[10] = product[54]; tmp[11] = product[55];
-    tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0;
-    tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0;
-    tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
-    tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0;
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1);
-
-    if (carry < 0) {
-        do {
-            carry += uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1);
-        } while (carry < 0);
-    } else {
-        while (carry || uECC_vli_cmp_unsafe(curve_secp224r1.p, result, num_words_secp224r1) != 1) {
-            carry -= uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1);
-        }
-    }
-}
-#elif uECC_WORD_SIZE == 4
-static void vli_mmod_fast_secp224r1(uint32_t *result, uint32_t *product)
-{
-    uint32_t tmp[num_words_secp224r1];
-    int carry;
-
-    /* t */
-    uECC_vli_set(result, product, num_words_secp224r1);
-
-    /* s1 */
-    tmp[0] = tmp[1] = tmp[2] = 0;
-    tmp[3] = product[7];
-    tmp[4] = product[8];
-    tmp[5] = product[9];
-    tmp[6] = product[10];
-    carry = uECC_vli_add(result, result, tmp, num_words_secp224r1);
-
-    /* s2 */
-    tmp[3] = product[11];
-    tmp[4] = product[12];
-    tmp[5] = product[13];
-    tmp[6] = 0;
-    carry += uECC_vli_add(result, result, tmp, num_words_secp224r1);
-
-    /* d1 */
-    tmp[0] = product[7];
-    tmp[1] = product[8];
-    tmp[2] = product[9];
-    tmp[3] = product[10];
-    tmp[4] = product[11];
-    tmp[5] = product[12];
-    tmp[6] = product[13];
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1);
-
-    /* d2 */
-    tmp[0] = product[11];
-    tmp[1] = product[12];
-    tmp[2] = product[13];
-    tmp[3] = tmp[4] = tmp[5] = tmp[6] = 0;
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1);
-
-    if (carry < 0) {
-        do {
-            carry += uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1);
-        } while (carry < 0);
-    } else {
-        while (carry || uECC_vli_cmp_unsafe(curve_secp224r1.p, result, num_words_secp224r1) != 1) {
-            carry -= uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1);
-        }
-    }
-}
-#else
-static void vli_mmod_fast_secp224r1(uint64_t *result, uint64_t *product)
-{
-    uint64_t tmp[num_words_secp224r1];
-    int carry = 0;
-
-    /* t */
-    uECC_vli_set(result, product, num_words_secp224r1);
-    result[num_words_secp224r1 - 1] &= 0xffffffff;
-
-    /* s1 */
-    tmp[0] = 0;
-    tmp[1] = product[3] & 0xffffffff00000000ull;
-    tmp[2] = product[4];
-    tmp[3] = product[5] & 0xffffffff;
-    uECC_vli_add(result, result, tmp, num_words_secp224r1);
-
-    /* s2 */
-    tmp[1] = product[5] & 0xffffffff00000000ull;
-    tmp[2] = product[6];
-    tmp[3] = 0;
-    uECC_vli_add(result, result, tmp, num_words_secp224r1);
-
-    /* d1 */
-    tmp[0] = (product[3] >> 32) | (product[4] << 32);
-    tmp[1] = (product[4] >> 32) | (product[5] << 32);
-    tmp[2] = (product[5] >> 32) | (product[6] << 32);
-    tmp[3] = product[6] >> 32;
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1);
-
-    /* d2 */
-    tmp[0] = (product[5] >> 32) | (product[6] << 32);
-    tmp[1] = product[6] >> 32;
-    tmp[2] = tmp[3] = 0;
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1);
-
-    if (carry < 0) {
-        do {
-            carry += uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1);
-        } while (carry < 0);
-    } else {
-        while (uECC_vli_cmp_unsafe(curve_secp224r1.p, result, num_words_secp224r1) != 1) {
-            uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1);
-        }
-    }
-}
-#endif /* uECC_WORD_SIZE */
-#endif /* (uECC_OPTIMIZATION_LEVEL > 0) */
-
-#endif /* uECC_SUPPORTS_secp224r1 */
-
-#if uECC_SUPPORTS_secp256r1
-
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-static void vli_mmod_fast_secp256r1(uECC_word_t *result, uECC_word_t *product);
-#endif
-
-static const struct uECC_Curve_t curve_secp256r1 = {
-    num_words_secp256r1,
-    num_bytes_secp256r1,
-    256, /* num_n_bits */
-    { BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
-        BYTES_TO_WORDS_8(FF, FF, FF, FF, 00, 00, 00, 00),
-        BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00),
-        BYTES_TO_WORDS_8(01, 00, 00, 00, FF, FF, FF, FF) },
-    { BYTES_TO_WORDS_8(51, 25, 63, FC, C2, CA, B9, F3),
-        BYTES_TO_WORDS_8(84, 9E, 17, A7, AD, FA, E6, BC),
-        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
-        BYTES_TO_WORDS_8(00, 00, 00, 00, FF, FF, FF, FF) },
-    { BYTES_TO_WORDS_8(96, C2, 98, D8, 45, 39, A1, F4),
-        BYTES_TO_WORDS_8(A0, 33, EB, 2D, 81, 7D, 03, 77),
-        BYTES_TO_WORDS_8(F2, 40, A4, 63, E5, E6, BC, F8),
-        BYTES_TO_WORDS_8(47, 42, 2C, E1, F2, D1, 17, 6B),
-
-        BYTES_TO_WORDS_8(F5, 51, BF, 37, 68, 40, B6, CB),
-        BYTES_TO_WORDS_8(CE, 5E, 31, 6B, 57, 33, CE, 2B),
-        BYTES_TO_WORDS_8(16, 9E, 0F, 7C, 4A, EB, E7, 8E),
-        BYTES_TO_WORDS_8(9B, 7F, 1A, FE, E2, 42, E3, 4F) },
-    { BYTES_TO_WORDS_8(4B, 60, D2, 27, 3E, 3C, CE, 3B),
-        BYTES_TO_WORDS_8(F6, B0, 53, CC, B0, 06, 1D, 65),
-        BYTES_TO_WORDS_8(BC, 86, 98, 76, 55, BD, EB, B3),
-        BYTES_TO_WORDS_8(E7, 93, 3A, AA, D8, 35, C6, 5A) },
-    &double_jacobian_default,
-#if uECC_SUPPORT_COMPRESSED_POINT
-    &mod_sqrt_default,
-#endif
-    &x_side_default,
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-    &vli_mmod_fast_secp256r1
-#endif
-};
-
-uECC_Curve uECC_secp256r1(void) { return &curve_secp256r1; }
-
-
-#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256r1)
-/* Computes result = product % curve_p
-   from http://www.nsa.gov/ia/_files/nist-routines.pdf */
-#if uECC_WORD_SIZE == 1
-static void vli_mmod_fast_secp256r1(uint8_t *result, uint8_t *product) {
-    uint8_t tmp[num_words_secp256r1];
-    int8_t carry;
-    
-    /* t */
-    uECC_vli_set(result, product, num_words_secp256r1);
-    
-    /* s1 */
-    tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0;
-    tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0;
-    tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0;
-    tmp[12] = product[44]; tmp[13] = product[45]; tmp[14] = product[46]; tmp[15] = product[47];
-    tmp[16] = product[48]; tmp[17] = product[49]; tmp[18] = product[50]; tmp[19] = product[51];
-    tmp[20] = product[52]; tmp[21] = product[53]; tmp[22] = product[54]; tmp[23] = product[55];
-    tmp[24] = product[56]; tmp[25] = product[57]; tmp[26] = product[58]; tmp[27] = product[59];
-    tmp[28] = product[60]; tmp[29] = product[61]; tmp[30] = product[62]; tmp[31] = product[63];
-    carry = uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1);
-    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
-    
-    /* s2 */
-    tmp[12] = product[48]; tmp[13] = product[49]; tmp[14] = product[50]; tmp[15] = product[51];
-    tmp[16] = product[52]; tmp[17] = product[53]; tmp[18] = product[54]; tmp[19] = product[55];
-    tmp[20] = product[56]; tmp[21] = product[57]; tmp[22] = product[58]; tmp[23] = product[59];
-    tmp[24] = product[60]; tmp[25] = product[61]; tmp[26] = product[62]; tmp[27] = product[63];
-    tmp[28] = tmp[29] = tmp[30] = tmp[31] = 0;
-    carry += uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1);
-    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
-    
-    /* s3 */
-    tmp[0] = product[32]; tmp[1] = product[33]; tmp[2] = product[34]; tmp[3] = product[35];
-    tmp[4] = product[36]; tmp[5] = product[37]; tmp[6] = product[38]; tmp[7] = product[39];
-    tmp[8] = product[40]; tmp[9] = product[41]; tmp[10] = product[42]; tmp[11] = product[43];
-    tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0;
-    tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0;
-    tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
-    tmp[24] = product[56]; tmp[25] = product[57]; tmp[26] = product[58]; tmp[27] = product[59];
-    tmp[28] = product[60]; tmp[29] = product[61]; tmp[30] = product[62]; tmp[31] = product[63];
-    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
-    
-    /* s4 */
-    tmp[0] = product[36]; tmp[1] = product[37]; tmp[2] = product[38]; tmp[3] = product[39];
-    tmp[4] = product[40]; tmp[5] = product[41]; tmp[6] = product[42]; tmp[7] = product[43];
-    tmp[8] = product[44]; tmp[9] = product[45]; tmp[10] = product[46]; tmp[11] = product[47];
-    tmp[12] = product[52]; tmp[13] = product[53]; tmp[14] = product[54]; tmp[15] = product[55];
-    tmp[16] = product[56]; tmp[17] = product[57]; tmp[18] = product[58]; tmp[19] = product[59];
-    tmp[20] = product[60]; tmp[21] = product[61]; tmp[22] = product[62]; tmp[23] = product[63];
-    tmp[24] = product[52]; tmp[25] = product[53]; tmp[26] = product[54]; tmp[27] = product[55];
-    tmp[28] = product[32]; tmp[29] = product[33]; tmp[30] = product[34]; tmp[31] = product[35];
-    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
-    
-    /* d1 */
-    tmp[0] = product[44]; tmp[1] = product[45]; tmp[2] = product[46]; tmp[3] = product[47];
-    tmp[4] = product[48]; tmp[5] = product[49]; tmp[6] = product[50]; tmp[7] = product[51];
-    tmp[8] = product[52]; tmp[9] = product[53]; tmp[10] = product[54]; tmp[11] = product[55];
-    tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0;
-    tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0;
-    tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
-    tmp[24] = product[32]; tmp[25] = product[33]; tmp[26] = product[34]; tmp[27] = product[35];
-    tmp[28] = product[40]; tmp[29] = product[41]; tmp[30] = product[42]; tmp[31] = product[43];
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
-    
-    /* d2 */
-    tmp[0] = product[48]; tmp[1] = product[49]; tmp[2] = product[50]; tmp[3] = product[51];
-    tmp[4] = product[52]; tmp[5] = product[53]; tmp[6] = product[54]; tmp[7] = product[55];
-    tmp[8] = product[56]; tmp[9] = product[57]; tmp[10] = product[58]; tmp[11] = product[59];
-    tmp[12] = product[60]; tmp[13] = product[61]; tmp[14] = product[62]; tmp[15] = product[63];
-    tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0;
-    tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0;
-    tmp[24] = product[36]; tmp[25] = product[37]; tmp[26] = product[38]; tmp[27] = product[39];
-    tmp[28] = product[44]; tmp[29] = product[45]; tmp[30] = product[46]; tmp[31] = product[47];
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
-    
-    /* d3 */
-    tmp[0] = product[52]; tmp[1] = product[53]; tmp[2] = product[54]; tmp[3] = product[55];
-    tmp[4] = product[56]; tmp[5] = product[57]; tmp[6] = product[58]; tmp[7] = product[59];
-    tmp[8] = product[60]; tmp[9] = product[61]; tmp[10] = product[62]; tmp[11] = product[63];
-    tmp[12] = product[32]; tmp[13] = product[33]; tmp[14] = product[34]; tmp[15] = product[35];
-    tmp[16] = product[36]; tmp[17] = product[37]; tmp[18] = product[38]; tmp[19] = product[39];
-    tmp[20] = product[40]; tmp[21] = product[41]; tmp[22] = product[42]; tmp[23] = product[43];
-    tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0;
-    tmp[28] = product[48]; tmp[29] = product[49]; tmp[30] = product[50]; tmp[31] = product[51];
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
-    
-    /* d4 */
-    tmp[0] = product[56]; tmp[1] = product[57]; tmp[2] = product[58]; tmp[3] = product[59];
-    tmp[4] = product[60]; tmp[5] = product[61]; tmp[6] = product[62]; tmp[7] = product[63];
-    tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0;
-    tmp[12] = product[36]; tmp[13] = product[37]; tmp[14] = product[38]; tmp[15] = product[39];
-    tmp[16] = product[40]; tmp[17] = product[41]; tmp[18] = product[42]; tmp[19] = product[43];
-    tmp[20] = product[44]; tmp[21] = product[45]; tmp[22] = product[46]; tmp[23] = product[47];
-    tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0;
-    tmp[28] = product[52]; tmp[29] = product[53]; tmp[30] = product[54]; tmp[31] = product[55];
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
-    
-    if (carry < 0) {
-        do {
-            carry += uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1);
-        } while (carry < 0);
-    } else {
-        while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, num_words_secp256r1) != 1) {
-            carry -= uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1);
-        }
-    }
-}
-#elif uECC_WORD_SIZE == 4
-static void vli_mmod_fast_secp256r1(uint32_t *result, uint32_t *product) {
-    uint32_t tmp[num_words_secp256r1];
-    int carry;
-    
-    /* t */
-    uECC_vli_set(result, product, num_words_secp256r1);
-    
-    /* s1 */
-    tmp[0] = tmp[1] = tmp[2] = 0;
-    tmp[3] = product[11];
-    tmp[4] = product[12];
-    tmp[5] = product[13];
-    tmp[6] = product[14];
-    tmp[7] = product[15];
-    carry = uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1);
-    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
-    
-    /* s2 */
-    tmp[3] = product[12];
-    tmp[4] = product[13];
-    tmp[5] = product[14];
-    tmp[6] = product[15];
-    tmp[7] = 0;
-    carry += uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1);
-    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
-    
-    /* s3 */
-    tmp[0] = product[8];
-    tmp[1] = product[9];
-    tmp[2] = product[10];
-    tmp[3] = tmp[4] = tmp[5] = 0;
-    tmp[6] = product[14];
-    tmp[7] = product[15];
-    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
-    
-    /* s4 */
-    tmp[0] = product[9];
-    tmp[1] = product[10];
-    tmp[2] = product[11];
-    tmp[3] = product[13];
-    tmp[4] = product[14];
-    tmp[5] = product[15];
-    tmp[6] = product[13];
-    tmp[7] = product[8];
-    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
-    
-    /* d1 */
-    tmp[0] = product[11];
-    tmp[1] = product[12];
-    tmp[2] = product[13];
-    tmp[3] = tmp[4] = tmp[5] = 0;
-    tmp[6] = product[8];
-    tmp[7] = product[10];
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
-    
-    /* d2 */
-    tmp[0] = product[12];
-    tmp[1] = product[13];
-    tmp[2] = product[14];
-    tmp[3] = product[15];
-    tmp[4] = tmp[5] = 0;
-    tmp[6] = product[9];
-    tmp[7] = product[11];
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
-    
-    /* d3 */
-    tmp[0] = product[13];
-    tmp[1] = product[14];
-    tmp[2] = product[15];
-    tmp[3] = product[8];
-    tmp[4] = product[9];
-    tmp[5] = product[10];
-    tmp[6] = 0;
-    tmp[7] = product[12];
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
-    
-    /* d4 */
-    tmp[0] = product[14];
-    tmp[1] = product[15];
-    tmp[2] = 0;
-    tmp[3] = product[9];
-    tmp[4] = product[10];
-    tmp[5] = product[11];
-    tmp[6] = 0;
-    tmp[7] = product[13];
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
-    
-    if (carry < 0) {
-        do {
-            carry += uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1);
-        } while (carry < 0);
-    } else {
-        while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, num_words_secp256r1) != 1) {
-            carry -= uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1);
-        }
-    }
-}
-#else
-static void vli_mmod_fast_secp256r1(uint64_t *result, uint64_t *product) {
-    uint64_t tmp[num_words_secp256r1];
-    int carry;
-    
-    /* t */
-    uECC_vli_set(result, product, num_words_secp256r1);
-    
-    /* s1 */
-    tmp[0] = 0;
-    tmp[1] = product[5] & 0xffffffff00000000ull;
-    tmp[2] = product[6];
-    tmp[3] = product[7];
-    carry = (int)uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1);
-    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
-    
-    /* s2 */
-    tmp[1] = product[6] << 32;
-    tmp[2] = (product[6] >> 32) | (product[7] << 32);
-    tmp[3] = product[7] >> 32;
-    carry += uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1);
-    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
-    
-    /* s3 */
-    tmp[0] = product[4];
-    tmp[1] = product[5] & 0xffffffff;
-    tmp[2] = 0;
-    tmp[3] = product[7];
-    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
-    
-    /* s4 */
-    tmp[0] = (product[4] >> 32) | (product[5] << 32);
-    tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull);
-    tmp[2] = product[7];
-    tmp[3] = (product[6] >> 32) | (product[4] << 32);
-    carry += uECC_vli_add(result, result, tmp, num_words_secp256r1);
-    
-    /* d1 */
-    tmp[0] = (product[5] >> 32) | (product[6] << 32);
-    tmp[1] = (product[6] >> 32);
-    tmp[2] = 0;
-    tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32);
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
-    
-    /* d2 */
-    tmp[0] = product[6];
-    tmp[1] = product[7];
-    tmp[2] = 0;
-    tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull);
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
-    
-    /* d3 */
-    tmp[0] = (product[6] >> 32) | (product[7] << 32);
-    tmp[1] = (product[7] >> 32) | (product[4] << 32);
-    tmp[2] = (product[4] >> 32) | (product[5] << 32);
-    tmp[3] = (product[6] << 32);
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
-    
-    /* d4 */
-    tmp[0] = product[7];
-    tmp[1] = product[4] & 0xffffffff00000000ull;
-    tmp[2] = product[5];
-    tmp[3] = product[6] & 0xffffffff00000000ull;
-    carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1);
-    
-    if (carry < 0) {
-        do {
-            carry += uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1);
-        } while (carry < 0);
-    } else {
-        while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, num_words_secp256r1) != 1) {
-            carry -= uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1);
-        }
-    }
-}
-#endif /* uECC_WORD_SIZE */
-#endif /* (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256r1) */
-
-#endif /* uECC_SUPPORTS_secp256r1 */
-
-#if uECC_SUPPORTS_secp256k1
-
-static void double_jacobian_secp256k1(uECC_word_t * X1,
-                                      uECC_word_t * Y1,
-                                      uECC_word_t * Z1,
-                                      uECC_Curve curve);
-static void x_side_secp256k1(uECC_word_t *result, const uECC_word_t *x, uECC_Curve curve);
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product);
-#endif
-
-static const struct uECC_Curve_t curve_secp256k1 = {
-    num_words_secp256k1,
-    num_bytes_secp256k1,
-    256, /* num_n_bits */
-    { BYTES_TO_WORDS_8(2F, FC, FF, FF, FE, FF, FF, FF),
-        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
-        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF),
-        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF) },
-    { BYTES_TO_WORDS_8(41, 41, 36, D0, 8C, 5E, D2, BF),
-        BYTES_TO_WORDS_8(3B, A0, 48, AF, E6, DC, AE, BA),
-        BYTES_TO_WORDS_8(FE, FF, FF, FF, FF, FF, FF, FF),
-        BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF) },
-    { BYTES_TO_WORDS_8(98, 17, F8, 16, 5B, 81, F2, 59),
-        BYTES_TO_WORDS_8(D9, 28, CE, 2D, DB, FC, 9B, 02),
-        BYTES_TO_WORDS_8(07, 0B, 87, CE, 95, 62, A0, 55),
-        BYTES_TO_WORDS_8(AC, BB, DC, F9, 7E, 66, BE, 79),
-
-        BYTES_TO_WORDS_8(B8, D4, 10, FB, 8F, D0, 47, 9C),
-        BYTES_TO_WORDS_8(19, 54, 85, A6, 48, B4, 17, FD),
-        BYTES_TO_WORDS_8(A8, 08, 11, 0E, FC, FB, A4, 5D),
-        BYTES_TO_WORDS_8(65, C4, A3, 26, 77, DA, 3A, 48) },
-    { BYTES_TO_WORDS_8(07, 00, 00, 00, 00, 00, 00, 00),
-        BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00),
-        BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00),
-        BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00) },
-    &double_jacobian_secp256k1,
-#if uECC_SUPPORT_COMPRESSED_POINT
-    &mod_sqrt_default,
-#endif
-    &x_side_secp256k1,
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-    &vli_mmod_fast_secp256k1
-#endif
-};
-
-uECC_Curve uECC_secp256k1(void) { return &curve_secp256k1; }
-
-
-/* Double in place */
-static void double_jacobian_secp256k1(uECC_word_t * X1,
-                                      uECC_word_t * Y1,
-                                      uECC_word_t * Z1,
-                                      uECC_Curve curve) {
-    /* t1 = X, t2 = Y, t3 = Z */
-    uECC_word_t t4[num_words_secp256k1];
-    uECC_word_t t5[num_words_secp256k1];
-    
-    if (uECC_vli_isZero(Z1, num_words_secp256k1)) {
-        return;
-    }
-    
-    uECC_vli_modSquare_fast(t5, Y1, curve);   /* t5 = y1^2 */
-    uECC_vli_modMult_fast(t4, X1, t5, curve); /* t4 = x1*y1^2 = A */
-    uECC_vli_modSquare_fast(X1, X1, curve);   /* t1 = x1^2 */
-    uECC_vli_modSquare_fast(t5, t5, curve);   /* t5 = y1^4 */
-    uECC_vli_modMult_fast(Z1, Y1, Z1, curve); /* t3 = y1*z1 = z3 */
-    
-    uECC_vli_modAdd(Y1, X1, X1, curve->p, num_words_secp256k1); /* t2 = 2*x1^2 */
-    uECC_vli_modAdd(Y1, Y1, X1, curve->p, num_words_secp256k1); /* t2 = 3*x1^2 */
-    if (uECC_vli_testBit(Y1, 0)) {
-        uECC_word_t carry = uECC_vli_add(Y1, Y1, curve->p, num_words_secp256k1);
-        uECC_vli_rshift1(Y1, num_words_secp256k1);
-        Y1[num_words_secp256k1 - 1] |= carry << (uECC_WORD_BITS - 1);
-    } else {
-        uECC_vli_rshift1(Y1, num_words_secp256k1);
-    }
-    /* t2 = 3/2*(x1^2) = B */
-    
-    uECC_vli_modSquare_fast(X1, Y1, curve);                     /* t1 = B^2 */
-    uECC_vli_modSub(X1, X1, t4, curve->p, num_words_secp256k1); /* t1 = B^2 - A */
-    uECC_vli_modSub(X1, X1, t4, curve->p, num_words_secp256k1); /* t1 = B^2 - 2A = x3 */
-    
-    uECC_vli_modSub(t4, t4, X1, curve->p, num_words_secp256k1); /* t4 = A - x3 */
-    uECC_vli_modMult_fast(Y1, Y1, t4, curve);                   /* t2 = B * (A - x3) */
-    uECC_vli_modSub(Y1, Y1, t5, curve->p, num_words_secp256k1); /* t2 = B * (A - x3) - y1^4 = y3 */
-}
-
-/* Computes result = x^3 + b. result must not overlap x. */
-static void x_side_secp256k1(uECC_word_t *result, const uECC_word_t *x, uECC_Curve curve) {
-    uECC_vli_modSquare_fast(result, x, curve);                                /* r = x^2 */
-    uECC_vli_modMult_fast(result, result, x, curve);                          /* r = x^3 */
-    uECC_vli_modAdd(result, result, curve->b, curve->p, num_words_secp256k1); /* r = x^3 + b */
-}
-
-#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256k1)
-static void omega_mult_secp256k1(uECC_word_t *result, const uECC_word_t *right);
-static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product) {
-    uECC_word_t tmp[2 * num_words_secp256k1];
-    uECC_word_t carry;
-    
-    uECC_vli_clear(tmp, num_words_secp256k1);
-    uECC_vli_clear(tmp + num_words_secp256k1, num_words_secp256k1);
-    
-    omega_mult_secp256k1(tmp, product + num_words_secp256k1); /* (Rq, q) = q * c */
-    
-    carry = uECC_vli_add(result, product, tmp, num_words_secp256k1); /* (C, r) = r + q       */
-    uECC_vli_clear(product, num_words_secp256k1);
-    omega_mult_secp256k1(product, tmp + num_words_secp256k1); /* Rq*c */
-    carry += uECC_vli_add(result, result, product, num_words_secp256k1); /* (C1, r) = r + Rq*c */
-    
-    while (carry > 0) {
-        --carry;
-        uECC_vli_sub(result, result, curve_secp256k1.p, num_words_secp256k1);
-    }
-    if (uECC_vli_cmp_unsafe(result, curve_secp256k1.p, num_words_secp256k1) > 0) {
-        uECC_vli_sub(result, result, curve_secp256k1.p, num_words_secp256k1);
-    }
-}
-
-#if uECC_WORD_SIZE == 1
-static void omega_mult_secp256k1(uint8_t * result, const uint8_t * right) {
-    /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */
-    uECC_word_t r0 = 0;
-    uECC_word_t r1 = 0;
-    uECC_word_t r2 = 0;
-    wordcount_t k;
-    
-    /* Multiply by (2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */
-    muladd(0xD1, right[0], &r0, &r1, &r2);
-    result[0] = r0;
-    r0 = r1;
-    r1 = r2;
-    /* r2 is still 0 */
-    
-    for (k = 1; k < num_words_secp256k1; ++k) {
-        muladd(0x03, right[k - 1], &r0, &r1, &r2);
-        muladd(0xD1, right[k], &r0, &r1, &r2);
-        result[k] = r0;
-        r0 = r1;
-        r1 = r2;
-        r2 = 0;
-    }
-    muladd(0x03, right[num_words_secp256k1 - 1], &r0, &r1, &r2);
-    result[num_words_secp256k1] = r0;
-    result[num_words_secp256k1 + 1] = r1;
-    /* add the 2^32 multiple */
-    result[4 + num_words_secp256k1] =
-        uECC_vli_add(result + 4, result + 4, right, num_words_secp256k1); 
-}
-#elif uECC_WORD_SIZE == 4
-static void omega_mult_secp256k1(uint32_t * result, const uint32_t * right) {
-    /* Multiply by (2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */
-    uint32_t carry = 0;
-    wordcount_t k;
-    
-    for (k = 0; k < num_words_secp256k1; ++k) {
-        uint64_t p = (uint64_t)0x3D1 * right[k] + carry;
-        result[k] = (uint32_t) p;
-        carry = p >> 32;
-    }
-    result[num_words_secp256k1] = carry;
-    /* add the 2^32 multiple */
-    result[1 + num_words_secp256k1] =
-        uECC_vli_add(result + 1, result + 1, right, num_words_secp256k1); 
-}
-#else
-static void omega_mult_secp256k1(uint64_t * result, const uint64_t * right) {
-    uECC_word_t r0 = 0;
-    uECC_word_t r1 = 0;
-    uECC_word_t r2 = 0;
-    wordcount_t k;
-    
-    /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */
-    for (k = 0; k < num_words_secp256k1; ++k) {
-        muladd(0x1000003D1ull, right[k], &r0, &r1, &r2);
-        result[k] = r0;
-        r0 = r1;
-        r1 = r2;
-        r2 = 0;
-    }
-    result[num_words_secp256k1] = r0;
-}
-#endif /* uECC_WORD_SIZE */
-#endif /* (uECC_OPTIMIZATION_LEVEL > 0 &&  && !asm_mmod_fast_secp256k1) */
-
-#endif /* uECC_SUPPORTS_secp256k1 */
-
-#endif /* _UECC_CURVE_SPECIFIC_H_ */

+ 0 - 127
components/bootloader/subproject/components/micro-ecc/micro-ecc/emk_project.py

@@ -1,127 +0,0 @@
-import os
-
-c, link, asm, utils = emk.module("c", "link", "asm", "utils")
-
-default_compile_flags = ["-fvisibility=hidden", "-Wall", "-Wextra", "-Wshadow", "-Werror", "-Wno-missing-field-initializers", "-Wno-unused-parameter", \
-    "-Wno-comment", "-Wno-unused", "-Wno-unknown-pragmas"]
-default_link_flags = []
-opt_flags = {"dbg":["-g"], "std":["-O2"], "max":["-O3"], "small":["-Os"]}
-opt_link_flags = {"dbg":[], "std":[], "max":[], "small":[]}
-c_flags = ["-std=c99"]
-cxx_flags = ["-std=c++11", "-Wno-reorder", "-fno-rtti", "-fno-exceptions"]
-c_link_flags = []
-cxx_link_flags = ["-fno-rtti", "-fno-exceptions"]
-
-def setup_build_dir():
-    build_arch = None
-    if "arch" in emk.options:
-        build_arch = emk.options["arch"]
-    elif not emk.cleaning:
-        build_arch = "osx"
-    emk.options["arch"] = build_arch
-
-    opt_level = None
-    if "opt" in emk.options:
-        level = emk.options["opt"]
-        if level in opt_flags:
-            opt_level = level
-        else:
-            emk.log.warning("Unknown optimization level '%s'" % (level))
-    elif not emk.cleaning:
-        opt_level = "dbg"
-    emk.options["opt"] = opt_level
-
-    dirs = ["__build__"]
-    if build_arch:
-        dirs.append(build_arch)
-    if opt_level:
-        dirs.append(opt_level)
-    emk.build_dir = os.path.join(*dirs)
-
-def setup_osx():
-    global c
-    global link
-
-    flags = [("-arch", "x86_64"), "-fno-common", "-Wnewline-eof"]
-    c.flags.extend(flags)
-    c.cxx.flags += ["-stdlib=libc++"]
-    link.cxx.flags += ["-stdlib=libc++"]
-
-    link_flags = [("-arch", "x86_64")]
-    link.local_flags.extend(link_flags)
-
-def setup_avr():
-    global c
-    global link
-
-    c.compiler = c.GccCompiler("/Projects/avr-tools/bin/avr-")
-    c.flags += ["-mmcu=atmega256rfr2", "-ffunction-sections", "-fdata-sections"]
-    link.linker = link.GccLinker("/Projects/avr-tools/bin/avr-")
-    link.flags += ["-mmcu=atmega256rfr2", "-mrelax", "-Wl,--gc-sections"]
-    link.strip = True
-
-def setup_arm_thumb():
-    global c
-    global link
-    global asm
-    global utils
-
-    asm.assembler = asm.GccAssembler("/cross/arm_cortex/bin/arm-none-eabi-")
-    c.compiler = c.GccCompiler("/cross/arm_cortex/bin/arm-none-eabi-")
-    link.linker = link.GccLinker("/cross/arm_cortex/bin/arm-none-eabi-")
-
-    c.flags.extend(["-mcpu=cortex-m0", "-mthumb", "-ffunction-sections", "-fdata-sections", "-fno-builtin-fprintf", "-fno-builtin-printf"])
-    c.defines["LPC11XX"] = 1
-    
-    link.local_flags.extend(["-mcpu=cortex-m0", "-mthumb", "-nostartfiles", "-nostdlib", "-Wl,--gc-sections"])
-    link.local_flags.extend(["-Tflash.lds", "-L/Projects/lpc11xx/core", "/Projects/lpc11xx/core/" + emk.build_dir + "/board_cstartup.o"])
-    link.local_syslibs += ["gcc"]
-    link.depdirs += ["/Projects/lpc11xx/stdlib"]
-
-    def do_objcopy(produces, requires):
-        utils.call("/cross/arm_cortex/bin/arm-none-eabi-objcopy", "-O", "binary", requires[0], produces[0])
-
-    def handle_exe(path):
-        emk.depend(path, "/Projects/lpc11xx/core/" + emk.build_dir + "/board_cstartup.o")
-        emk.rule(do_objcopy, path + ".bin", path, cwd_safe=True, ex_safe=True)
-        emk.autobuild(path + ".bin")
-
-    link.exe_funcs.append(handle_exe)
-    link.strip = True
-    
-    emk.recurse("/Projects/lpc11xx/core")
-
-def setup_linux_rpi():
-    global c
-    global link
-
-    c.compiler = c.GccCompiler("/Volumes/xtools/arm-none-linux-gnueabi/bin/arm-none-linux-gnueabi-")
-    link.linker = link.GccLinker("/Volumes/xtools/arm-none-linux-gnueabi/bin/arm-none-linux-gnueabi-")
-    
-    c.flags.extend(["-fomit-frame-pointer"])
-
-setup_build_dir()
-
-setup_funcs = {"osx":setup_osx, "avr":setup_avr, "arm_thumb":setup_arm_thumb, "rpi": setup_linux_rpi}
-
-if not emk.cleaning:
-    build_arch = emk.options["arch"]
-    opt_level = emk.options["opt"]
-
-    c.flags.extend(default_compile_flags)
-    c.flags.extend(opt_flags[opt_level])
-    c.c.flags.extend(c_flags)
-    c.cxx.flags.extend(cxx_flags)
-    link.local_flags.extend(default_link_flags)
-    link.local_flags.extend(opt_link_flags[opt_level])
-    link.c.local_flags.extend(c_link_flags)
-    link.cxx.local_flags.extend(cxx_link_flags)
-
-    c.include_dirs.append("$:proj:$")
-
-    if build_arch in setup_funcs:
-        setup_funcs[build_arch]()
-    else:
-        raise emk.BuildError("Unknown target arch '%s'" % (build_arch))
-
-    c.defines["TARGET_ARCH_" + build_arch.upper()] = 1

+ 0 - 3
components/bootloader/subproject/components/micro-ecc/micro-ecc/emk_rules.py

@@ -1,3 +0,0 @@
-c, link = emk.module("c", "link")
-
-emk.subdir("test")

+ 0 - 85
components/bootloader/subproject/components/micro-ecc/micro-ecc/examples/ecc_test/ecc_test.ino

@@ -1,85 +0,0 @@
-#include <uECC.h>
-
-extern "C" {
-
-static int RNG(uint8_t *dest, unsigned size) {
-  // Use the least-significant bits from the ADC for an unconnected pin (or connected to a source of 
-  // random noise). This can take a long time to generate random data if the result of analogRead(0) 
-  // doesn't change very frequently.
-  while (size) {
-    uint8_t val = 0;
-    for (unsigned i = 0; i < 8; ++i) {
-      int init = analogRead(0);
-      int count = 0;
-      while (analogRead(0) == init) {
-        ++count;
-      }
-      
-      if (count == 0) {
-         val = (val << 1) | (init & 0x01);
-      } else {
-         val = (val << 1) | (count & 0x01);
-      }
-    }
-    *dest = val;
-    ++dest;
-    --size;
-  }
-  // NOTE: it would be a good idea to hash the resulting random data using SHA-256 or similar.
-  return 1;
-}
-
-}  // extern "C"
-
-void setup() {
-  Serial.begin(115200);
-  Serial.print("Testing ecc\n");
-  uECC_set_rng(&RNG);
-}
-
-void loop() {
-  const struct uECC_Curve_t * curve = uECC_secp160r1();
-  uint8_t private1[21];
-  uint8_t private2[21];
-  
-  uint8_t public1[40];
-  uint8_t public2[40];
-  
-  uint8_t secret1[20];
-  uint8_t secret2[20];
-  
-  unsigned long a = millis();
-  uECC_make_key(public1, private1, curve);
-  unsigned long b = millis();
-  
-  Serial.print("Made key 1 in "); Serial.println(b-a);
-  a = millis();
-  uECC_make_key(public2, private2, curve);
-  b = millis();
-  Serial.print("Made key 2 in "); Serial.println(b-a);
-
-  a = millis();
-  int r = uECC_shared_secret(public2, private1, secret1, curve);
-  b = millis();
-  Serial.print("Shared secret 1 in "); Serial.println(b-a);
-  if (!r) {
-    Serial.print("shared_secret() failed (1)\n");
-    return;
-  }
-
-  a = millis();
-  r = uECC_shared_secret(public1, private2, secret2, curve);
-  b = millis();
-  Serial.print("Shared secret 2 in "); Serial.println(b-a);
-  if (!r) {
-    Serial.print("shared_secret() failed (2)\n");
-    return;
-  }
-    
-  if (memcmp(secret1, secret2, 20) != 0) {
-    Serial.print("Shared secrets are not identical!\n");
-  } else {
-    Serial.print("Shared secrets are identical\n");
-  }
-}
-

+ 0 - 9
components/bootloader/subproject/components/micro-ecc/micro-ecc/library.properties

@@ -1,9 +0,0 @@
-name=micro-ecc
-version=1.0.0
-author=Kenneth MacKay
-maintainer=Kenneth MacKay
-sentence=uECC
-paragraph=A small and fast ECDH and ECDSA implementation for 8-bit, 32-bit, and 64-bit processors.
-category=Other
-url=https://github.com/kmackay/micro-ecc
-architectures=*

+ 0 - 71
components/bootloader/subproject/components/micro-ecc/micro-ecc/platform-specific.inc

@@ -1,71 +0,0 @@
-/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#ifndef _UECC_PLATFORM_SPECIFIC_H_
-#define _UECC_PLATFORM_SPECIFIC_H_
-
-#include "types.h"
-
-#if (defined(_WIN32) || defined(_WIN64))
-/* Windows */
-
-// use pragma syntax to prevent tweaking the linker script for getting CryptXYZ function
-#pragma comment(lib, "crypt32.lib")
-#pragma comment(lib, "advapi32.lib")
-
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#include <wincrypt.h>
-
-static int default_RNG(uint8_t *dest, unsigned size) {
-    HCRYPTPROV prov;
-    if (!CryptAcquireContext(&prov, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) {
-        return 0;
-    }
-
-    CryptGenRandom(prov, size, (BYTE *)dest);
-    CryptReleaseContext(prov, 0);
-    return 1;
-}
-#define default_RNG_defined 1
-
-#elif defined(unix) || defined(__linux__) || defined(__unix__) || defined(__unix) || \
-    (defined(__APPLE__) && defined(__MACH__)) || defined(uECC_POSIX)
-
-/* Some POSIX-like system with /dev/urandom or /dev/random. */
-#include <sys/types.h>
-#include <fcntl.h>
-#include <unistd.h>
-
-#ifndef O_CLOEXEC
-    #define O_CLOEXEC 0
-#endif
-
-static int default_RNG(uint8_t *dest, unsigned size) {
-    int fd = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
-    if (fd == -1) {
-        fd = open("/dev/random", O_RDONLY | O_CLOEXEC);
-        if (fd == -1) {
-            return 0;
-        }
-    }
-    
-    char *ptr = (char *)dest;
-    size_t left = size;
-    while (left > 0) {
-        ssize_t bytes_read = read(fd, ptr, left);
-        if (bytes_read <= 0) { // read failed
-            close(fd);
-            return 0;
-        }
-        left -= bytes_read;
-        ptr += bytes_read;
-    }
-    
-    close(fd);
-    return 1;
-}
-#define default_RNG_defined 1
-
-#endif /* platform */
-
-#endif /* _UECC_PLATFORM_SPECIFIC_H_ */

+ 0 - 188
components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/mult_arm.py

@@ -1,188 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-if len(sys.argv) < 2:
-    print "Provide the integer size in 32-bit words"
-    sys.exit(1)
-
-size = int(sys.argv[1])
-
-full_rows = size // 3
-init_size = size % 3
-
-if init_size == 0:
-    full_rows = full_rows - 1
-    init_size = 3
-
-def emit(line, *args):
-    s = '"' + line + r' \n\t"'
-    print s % args
-
-rx = [3, 4, 5]
-ry = [6, 7, 8]
-
-#### set up registers
-emit("add r0, %s", (size - init_size) * 4) # move z
-emit("add r2, %s", (size - init_size) * 4) # move y
-
-emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size)]))
-emit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(init_size)]))
-
-print ""
-if init_size == 1:
-    emit("umull r9, r10, r3, r6")
-    emit("stmia r0!, {r9, r10}")
-else:
-    #### first two multiplications of initial block
-    emit("umull r11, r12, r3, r6")
-    emit("stmia r0!, {r11}")
-    print ""
-    emit("mov r10, #0")
-    emit("umull r11, r9, r3, r7")
-    emit("adds r12, r12, r11")
-    emit("adc r9, r9, #0")
-    emit("umull r11, r14, r4, r6")
-    emit("adds r12, r12, r11")
-    emit("adcs r9, r9, r14")
-    emit("adc r10, r10, #0")
-    emit("stmia r0!, {r12}")
-    print ""
-
-    #### rest of initial block, with moving accumulator registers
-    acc = [9, 10, 11, 12, 14]
-    if init_size == 3:
-        emit("mov r%s, #0", acc[2])
-        for i in xrange(0, 3):
-            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
-            emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
-            emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
-            emit("adc r%s, r%s, #0", acc[2], acc[2])
-        emit("stmia r0!, {r%s}", acc[0])
-        print ""
-        acc = acc[1:] + acc[:1]
-
-        emit("mov r%s, #0", acc[2])
-        for i in xrange(0, 2):
-            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i + 1], ry[2 - i])
-            emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
-            emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
-            emit("adc r%s, r%s, #0", acc[2], acc[2])
-        emit("stmia r0!, {r%s}", acc[0])
-        print ""
-        acc = acc[1:] + acc[:1]
-    
-    emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[init_size-1], ry[init_size-1])
-    emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
-    emit("adc r%s, r%s, r%s", acc[1], acc[1], acc[4])
-    emit("stmia r0!, {r%s}", acc[0])
-    emit("stmia r0!, {r%s}", acc[1])
-print ""
-
-#### reset y and z pointers
-emit("sub r0, %s", (2 * init_size + 3) * 4)
-emit("sub r2, %s", (init_size + 3) * 4)
-
-#### load y registers
-emit("ldmia r2!, {%s}", ", ".join(["r%s" % (ry[i]) for i in xrange(3)]))
-
-#### load additional x registers
-if init_size != 3:
-    emit("ldmia r1!, {%s}", ", ".join(["r%s" % (rx[i]) for i in xrange(init_size, 3)]))
-print ""
-
-prev_size = init_size
-for row in xrange(full_rows):
-    emit("umull r11, r12, r3, r6")
-    emit("stmia r0!, {r11}")
-    print ""
-    emit("mov r10, #0")
-    emit("umull r11, r9, r3, r7")
-    emit("adds r12, r12, r11")
-    emit("adc r9, r9, #0")
-    emit("umull r11, r14, r4, r6")
-    emit("adds r12, r12, r11")
-    emit("adcs r9, r9, r14")
-    emit("adc r10, r10, #0")
-    emit("stmia r0!, {r12}")
-    print ""
-
-    acc = [9, 10, 11, 12, 14]
-    emit("mov r%s, #0", acc[2])
-    for i in xrange(0, 3):
-        emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], rx[i], ry[2 - i])
-        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
-        emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
-        emit("adc r%s, r%s, #0", acc[2], acc[2])
-    emit("stmia r0!, {r%s}", acc[0])
-    print ""
-    acc = acc[1:] + acc[:1]
-
-    #### now we need to start shifting x and loading from z
-    x_regs = [3, 4, 5]
-    for r in xrange(0, prev_size):
-        x_regs = x_regs[1:] + x_regs[:1]
-        emit("ldmia r1!, {r%s}", x_regs[2])
-        emit("mov r%s, #0", acc[2])
-        for i in xrange(0, 3):
-            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], ry[2 - i])
-            emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
-            emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
-            emit("adc r%s, r%s, #0", acc[2], acc[2])
-        emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
-        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
-        emit("adcs r%s, r%s, #0", acc[1], acc[1])
-        emit("adc r%s, r%s, #0", acc[2], acc[2])
-        emit("stmia r0!, {r%s}", acc[0])
-        print ""
-        acc = acc[1:] + acc[:1]
-
-    # done shifting x, start shifting y
-    y_regs = [6, 7, 8]
-    for r in xrange(0, prev_size):
-        y_regs = y_regs[1:] + y_regs[:1]
-        emit("ldmia r2!, {r%s}", y_regs[2])
-        emit("mov r%s, #0", acc[2])
-        for i in xrange(0, 3):
-            emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i], y_regs[2 - i])
-            emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
-            emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
-            emit("adc r%s, r%s, #0", acc[2], acc[2])
-        emit("ldr r%s, [r0]", acc[3]) # load stored value from initial block, and add to accumulator
-        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
-        emit("adcs r%s, r%s, #0", acc[1], acc[1])
-        emit("adc r%s, r%s, #0", acc[2], acc[2])
-        emit("stmia r0!, {r%s}", acc[0])
-        print ""
-        acc = acc[1:] + acc[:1]
-
-    # done both shifts, do remaining corner
-    emit("mov r%s, #0", acc[2])
-    for i in xrange(0, 2):
-        emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[i + 1], y_regs[2 - i])
-        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
-        emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[4])
-        emit("adc r%s, r%s, #0", acc[2], acc[2])
-    emit("stmia r0!, {r%s}", acc[0])
-    print ""
-    acc = acc[1:] + acc[:1]
-    
-    emit("umull r%s, r%s, r%s, r%s", acc[3], acc[4], x_regs[2], y_regs[2])
-    emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[3])
-    emit("adc r%s, r%s, r%s", acc[1], acc[1], acc[4])
-    emit("stmia r0!, {r%s}", acc[0])
-    emit("stmia r0!, {r%s}", acc[1])
-    print ""
-    
-    prev_size = prev_size + 3
-    if row < full_rows - 1:
-        #### reset x, y and z pointers
-        emit("sub r0, %s", (2 * prev_size + 3) * 4)
-        emit("sub r1, %s", prev_size * 4)
-        emit("sub r2, %s", (prev_size + 3) * 4)
-
-        #### load x and y registers
-        emit("ldmia r1!, {%s}", ",".join(["r%s" % (rx[i]) for i in xrange(3)]))
-        emit("ldmia r2!, {%s}", ",".join(["r%s" % (ry[i]) for i in xrange(3)]))
-        
-        print ""

+ 0 - 203
components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/mult_avr.py

@@ -1,203 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-if len(sys.argv) < 2:
-    print "Provide the integer size in bytes"
-    sys.exit(1)
-
-size = int(sys.argv[1])
-
-full_rows = size // 10
-init_size = size % 10
-
-if init_size == 0:
-    full_rows = full_rows - 1
-    init_size = 10
-
-def rx(i):
-    return i + 2
-
-def ry(i):
-    return i + 12
-
-def emit(line, *args):
-    s = '"' + line + r' \n\t"'
-    print s % args
-
-#### set up registers
-emit("adiw r30, %s", size - init_size) # move z
-emit("adiw r28, %s", size - init_size) # move y
-
-for i in xrange(init_size):
-    emit("ld r%s, x+", rx(i))
-for i in xrange(init_size):
-    emit("ld r%s, y+", ry(i))
-
-emit("ldi r25, 0")
-print ""
-if init_size == 1:
-    emit("mul r2, r12")
-    emit("st z+, r0")
-    emit("st z+, r1")
-else:
-    #### first two multiplications of initial block
-    emit("ldi r23, 0")
-    emit("mul r2, r12")
-    emit("st z+, r0")
-    emit("mov r22, r1")
-    print ""
-    emit("ldi r24, 0")
-    emit("mul r2, r13")
-    emit("add r22, r0")
-    emit("adc r23, r1")
-    emit("mul r3, r12")
-    emit("add r22, r0")
-    emit("adc r23, r1")
-    emit("adc r24, r25")
-    emit("st z+, r22")
-    print ""
-
-    #### rest of initial block, with moving accumulator registers
-    acc = [23, 24, 22]
-    for r in xrange(2, init_size):
-        emit("ldi r%s, 0", acc[2])
-        for i in xrange(0, r+1):
-            emit("mul r%s, r%s", rx(i), ry(r - i))
-            emit("add r%s, r0", acc[0])
-            emit("adc r%s, r1", acc[1])
-            emit("adc r%s, r25", acc[2])
-        emit("st z+, r%s", acc[0])
-        print ""
-        acc = acc[1:] + acc[:1]
-    for r in xrange(1, init_size-1):
-        emit("ldi r%s, 0", acc[2])
-        for i in xrange(0, init_size-r):
-            emit("mul r%s, r%s", rx(r+i), ry((init_size-1) - i))
-            emit("add r%s, r0", acc[0])
-            emit("adc r%s, r1", acc[1])
-            emit("adc r%s, r25", acc[2])
-        emit("st z+, r%s", acc[0])
-        print ""
-        acc = acc[1:] + acc[:1]
-    emit("mul r%s, r%s", rx(init_size-1), ry(init_size-1))
-    emit("add r%s, r0", acc[0])
-    emit("adc r%s, r1", acc[1])
-    emit("st z+, r%s", acc[0])
-    emit("st z+, r%s", acc[1])
-print ""
-
-#### reset y and z pointers
-emit("sbiw r30, %s", 2 * init_size + 10)
-emit("sbiw r28, %s", init_size + 10)
-
-#### load y registers
-for i in xrange(10):
-    emit("ld r%s, y+", ry(i))
-
-#### load additional x registers
-for i in xrange(init_size, 10):
-    emit("ld r%s, x+", rx(i))
-print ""
-
-prev_size = init_size
-for row in xrange(full_rows):
-    #### do x = 0-9, y = 0-9 multiplications
-    emit("ldi r23, 0")
-    emit("mul r2, r12")
-    emit("st z+, r0")
-    emit("mov r22, r1")
-    print ""
-    emit("ldi r24, 0")
-    emit("mul r2, r13")
-    emit("add r22, r0")
-    emit("adc r23, r1")
-    emit("mul r3, r12")
-    emit("add r22, r0")
-    emit("adc r23, r1")
-    emit("adc r24, r25")
-    emit("st z+, r22")
-    print ""
-
-    acc = [23, 24, 22]
-    for r in xrange(2, 10):
-        emit("ldi r%s, 0", acc[2])
-        for i in xrange(0, r+1):
-            emit("mul r%s, r%s", rx(i), ry(r - i))
-            emit("add r%s, r0", acc[0])
-            emit("adc r%s, r1", acc[1])
-            emit("adc r%s, r25", acc[2])
-        emit("st z+, r%s", acc[0])
-        print ""
-        acc = acc[1:] + acc[:1]
-
-    #### now we need to start shifting x and loading from z
-    x_regs = [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
-    for r in xrange(0, prev_size):
-        x_regs = x_regs[1:] + x_regs[:1]
-        emit("ld r%s, x+", x_regs[9]) # load next byte of left
-        emit("ldi r%s, 0", acc[2])
-        for i in xrange(0, 10):
-            emit("mul r%s, r%s", x_regs[i], ry(9 - i))
-            emit("add r%s, r0", acc[0])
-            emit("adc r%s, r1", acc[1])
-            emit("adc r%s, r25", acc[2])
-        emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment)
-        emit("add r%s, r0", acc[0])
-        emit("adc r%s, r25", acc[1])
-        emit("adc r%s, r25", acc[2])
-        emit("st z+, r%s", acc[0]) # store next byte (z increments)
-        print ""
-        acc = acc[1:] + acc[:1]
-
-    # done shifting x, start shifting y
-    y_regs = [12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
-    for r in xrange(0, prev_size):
-        y_regs = y_regs[1:] + y_regs[:1]
-        emit("ld r%s, y+", y_regs[9]) # load next byte of right
-        emit("ldi r%s, 0", acc[2])
-        for i in xrange(0, 10):
-            emit("mul r%s, r%s", x_regs[i], y_regs[9 -i])
-            emit("add r%s, r0", acc[0])
-            emit("adc r%s, r1", acc[1])
-            emit("adc r%s, r25", acc[2])
-        emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment)
-        emit("add r%s, r0", acc[0])
-        emit("adc r%s, r25", acc[1])
-        emit("adc r%s, r25", acc[2])
-        emit("st z+, r%s", acc[0]) # store next byte (z increments)
-        print ""
-        acc = acc[1:] + acc[:1]
-
-    # done both shifts, do remaining corner
-    for r in xrange(1, 9):
-        emit("ldi r%s, 0", acc[2])
-        for i in xrange(0, 10-r):
-            emit("mul r%s, r%s", x_regs[r+i], y_regs[9 - i])
-            emit("add r%s, r0", acc[0])
-            emit("adc r%s, r1", acc[1])
-            emit("adc r%s, r25", acc[2])
-        emit("st z+, r%s", acc[0])
-        print ""
-        acc = acc[1:] + acc[:1]
-    emit("mul r%s, r%s", x_regs[9], y_regs[9])
-    emit("add r%s, r0", acc[0])
-    emit("adc r%s, r1", acc[1])
-    emit("st z+, r%s", acc[0])
-    emit("st z+, r%s", acc[1])
-    print ""
-    
-    prev_size = prev_size + 10
-    if row < full_rows - 1:
-        #### reset x, y and z pointers
-        emit("sbiw r30, %s", 2 * prev_size + 10)
-        emit("sbiw r28, %s", prev_size + 10)
-        emit("sbiw r26, %s", prev_size)
-
-        #### load x and y registers
-        for i in xrange(10):
-            emit("ld r%s, x+", rx(i))
-            emit("ld r%s, y+", ry(i))
-        print ""
-
-emit("eor r1, r1")

+ 0 - 143
components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/mult_avr_extra.py

@@ -1,143 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-if len(sys.argv) < 2:
-    print "Provide the integer size in bytes"
-    sys.exit(1)
-
-size = int(sys.argv[1])
-
-def lhi(i):
-    return i + 2
-
-def rhi(i):
-    return i + 6
-
-left_lo = [10, 11, 12, 13]
-right_lo = [14, 15, 16, 17]
-
-def llo(i):
-    return left_lo[i]
-
-def rlo(i):
-    return right_lo[i]
-
-def emit(line, *args):
-    s = '"' + line + r' \n\t"'
-    print s % args
-
-def update_low():
-    global left_lo
-    global right_lo
-    left_lo = left_lo[1:] + left_lo[:1]
-    right_lo = right_lo[1:] + right_lo[:1]
-    emit("ld r%s, x+", left_lo[3])
-    emit("ld r%s, y+", right_lo[3])
-
-accum = [19, 20, 21]
-
-def acc(i):
-    return accum[i]
-
-def rotate_acc():
-    global accum
-    accum = accum[1:] + accum[:1]
-
-# Load high values
-for i in xrange(4):
-    emit("ld r%s, x+", lhi(i))
-    emit("ld r%s, y+", rhi(i))
-
-emit("sbiw r26, %s", size + 4)
-emit("sbiw r28, %s", size + 4)
-emit("sbiw r30, %s", size)
-
-# Load low values
-for i in xrange(4):
-    emit("ld r%s, x+", llo(i))
-    emit("ld r%s, y+", rlo(i))
-print ""
-
-# Compute initial triangles
-emit("mul r%s, r%s", lhi(0), rlo(0))
-emit("mov r%s, r0", acc(0))
-emit("mov r%s, r1", acc(1))
-emit("ldi r%s, 0", acc(2))
-emit("ld r0, z")
-emit("add r%s, r0", acc(0))
-emit("adc r%s, r25", acc(1))
-emit("mul r%s, r%s", rhi(0), llo(0))
-emit("add r%s, r0", acc(0))
-emit("adc r%s, r1", acc(1))
-emit("adc r%s, r25", acc(2))
-emit("st z+, r%s", acc(0))
-print ""
-rotate_acc()
-
-for i in xrange(1, 4):
-    emit("ldi r%s, 0", acc(2))
-    emit("ld r0, z")
-    emit("add r%s, r0", acc(0))
-    emit("adc r%s, r25", acc(1))
-    for j in xrange(i + 1):
-        emit("mul r%s, r%s", lhi(j), rlo(i-j))
-        emit("add r%s, r0", acc(0))
-        emit("adc r%s, r1", acc(1))
-        emit("adc r%s, r25", acc(2))
-        emit("mul r%s, r%s", rhi(j), llo(i-j))
-        emit("add r%s, r0", acc(0))
-        emit("adc r%s, r1", acc(1))
-        emit("adc r%s, r25", acc(2))
-    emit("st z+, r%s", acc(0))
-    print ""
-    rotate_acc()
-
-# Compute rows overlapping old block
-for i in xrange(4, size):
-    emit("ldi r%s, 0", acc(2))
-    emit("ld r0, z")
-    emit("add r%s, r0", acc(0))
-    emit("adc r%s, r25", acc(1))
-    update_low()
-    for j in xrange(4):
-        emit("mul r%s, r%s", lhi(j), rlo(3-j))
-        emit("add r%s, r0", acc(0))
-        emit("adc r%s, r1", acc(1))
-        emit("adc r%s, r25", acc(2))
-        emit("mul r%s, r%s", rhi(j), llo(3-j))
-        emit("add r%s, r0", acc(0))
-        emit("adc r%s, r1", acc(1))
-        emit("adc r%s, r25", acc(2))
-    emit("st z+, r%s", acc(0))
-    print ""
-    rotate_acc()
-
-# Compute new triangle
-left_combined = [llo(1), llo(2), llo(3), lhi(0), lhi(1), lhi(2), lhi(3)]
-right_combined = [rlo(1), rlo(2), rlo(3), rhi(0), rhi(1), rhi(2), rhi(3)]
-
-def left(i):
-    return left_combined[i]
-
-def right(i):
-    return right_combined[i]
-
-for i in xrange(6):
-    emit("ldi r%s, 0", acc(2))
-    for j in xrange(7 - i):
-        emit("mul r%s, r%s", left(i+j), right(6-j))
-        emit("add r%s, r0", acc(0))
-        emit("adc r%s, r1", acc(1))
-        emit("adc r%s, r25", acc(2))
-    emit("st z+, r%s", acc(0))
-    print ""
-    rotate_acc()
-
-emit("mul r%s, r%s", left(6), right(6))
-emit("add r%s, r0", acc(0))
-emit("adc r%s, r1", acc(1))
-emit("st z+, r%s", acc(0))
-emit("st z+, r%s", acc(1))
-emit("adiw r26, 4")
-emit("adiw r28, 4")

+ 0 - 242
components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/square_arm.py

@@ -1,242 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-if len(sys.argv) < 2:
-    print "Provide the integer size in 32-bit words"
-    sys.exit(1)
-
-size = int(sys.argv[1])
-
-if size > 8:
-    print "This script doesn't work with integer size %s due to laziness" % (size)
-    sys.exit(1)
-
-init_size = 0
-if size > 6:
-    init_size = size - 6
-
-def emit(line, *args):
-    s = '"' + line + r' \n\t"'
-    print s % args
-
-def mulacc(acc, r1, r2):
-    if size <= 6:
-        emit("umull r1, r14, r%s, r%s", r1, r2)
-        emit("adds r%s, r%s, r1", acc[0], acc[0])
-        emit("adcs r%s, r%s, r14", acc[1], acc[1])
-        emit("adc r%s, r%s, #0", acc[2], acc[2])
-    else:
-        emit("mov r14, r%s", acc[1])
-        emit("umlal r%s, r%s, r%s, r%s", acc[0], acc[1], r1, r2)
-        emit("cmp r14, r%s", acc[1])
-        emit("it hi")
-        emit("adchi r%s, r%s, #0", acc[2], acc[2])
-
-r = [2, 3, 4, 5, 6, 7]
-
-s = size - init_size
-
-if init_size == 1:
-    emit("ldmia r1!, {r2}")
-    emit("add r1, %s", (size - init_size * 2) * 4)
-    emit("ldmia r1!, {r5}")
-    
-    emit("add r0, %s", (size - init_size) * 4)
-    emit("umull r8, r9, r2, r5")
-    emit("stmia r0!, {r8, r9}")
-    
-    emit("sub r0, %s", (size + init_size) * 4)
-    emit("sub r1, %s", (size) * 4)
-    print ""
-elif init_size == 2:
-    emit("ldmia r1!, {r2, r3}")
-    emit("add r1, %s", (size - init_size * 2) * 4)
-    emit("ldmia r1!, {r5, r6}")
-    
-    emit("add r0, %s", (size - init_size) * 4)
-    print ""
-
-    emit("umull r8, r9, r2, r5")
-    emit("stmia r0!, {r8}")
-    print ""
-    
-    emit("umull r12, r10, r2, r6")
-    emit("adds r9, r9, r12")
-    emit("adc r10, r10, #0")
-    emit("stmia r0!, {r9}")
-    print ""
-    
-    emit("umull r8, r9, r3, r6")
-    emit("adds r10, r10, r8")
-    emit("adc r11, r9, #0")
-    emit("stmia r0!, {r10, r11}")
-    print ""
-    
-    emit("sub r0, %s", (size + init_size) * 4)
-    emit("sub r1, %s", (size) * 4)
-
-# load input words
-emit("ldmia r1!, {%s}", ", ".join(["r%s" % (r[i]) for i in xrange(s)]))
-print ""
-
-emit("umull r11, r12, r2, r2")
-emit("stmia r0!, {r11}")
-print ""
-emit("mov r9, #0")
-emit("umull r10, r11, r2, r3")
-emit("adds r12, r12, r10")
-emit("adcs r8, r11, #0")
-emit("adc r9, r9, #0")
-emit("adds r12, r12, r10")
-emit("adcs r8, r8, r11")
-emit("adc r9, r9, #0")
-emit("stmia r0!, {r12}")
-print ""
-emit("mov r10, #0")
-emit("umull r11, r12, r2, r4")
-emit("adds r11, r11, r11")
-emit("adcs r12, r12, r12")
-emit("adc r10, r10, #0")
-emit("adds r8, r8, r11")
-emit("adcs r9, r9, r12")
-emit("adc r10, r10, #0")
-emit("umull r11, r12, r3, r3")
-emit("adds r8, r8, r11")
-emit("adcs r9, r9, r12")
-emit("adc r10, r10, #0")
-emit("stmia r0!, {r8}")
-print ""
-
-acc = [8, 9, 10]
-old_acc = [11, 12]
-for i in xrange(3, s):
-    emit("mov r%s, #0", old_acc[1])
-    tmp = [acc[1], acc[2]]
-    acc = [acc[0], old_acc[0], old_acc[1]]
-    old_acc = tmp
-    
-    # gather non-equal words
-    emit("umull r%s, r%s, r%s, r%s", acc[0], acc[1], r[0], r[i])
-    for j in xrange(1, (i+1)//2):
-        mulacc(acc, r[j], r[i-j])
-    # multiply by 2
-    emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
-    emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
-    emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
-    
-    # add equal word (if any)
-    if ((i+1) % 2) != 0:
-        mulacc(acc, r[i//2], r[i//2])
-    
-    # add old accumulator
-    emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
-    emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
-    emit("adc r%s, r%s, #0", acc[2], acc[2])
-    
-    # store
-    emit("stmia r0!, {r%s}", acc[0])
-    print ""
-
-regs = list(r)
-for i in xrange(init_size):
-    regs = regs[1:] + regs[:1]
-    emit("ldmia r1!, {r%s}", regs[5])
-    
-    for limit in [4, 5]:
-        emit("mov r%s, #0", old_acc[1])
-        tmp = [acc[1], acc[2]]
-        acc = [acc[0], old_acc[0], old_acc[1]]
-        old_acc = tmp
-    
-        # gather non-equal words
-        emit("umull r%s, r%s, r%s, r%s", acc[0], acc[1], regs[0], regs[limit])
-        for j in xrange(1, (limit+1)//2):
-            mulacc(acc, regs[j], regs[limit-j])
-    
-        emit("ldr r14, [r0]") # load stored value from initial block, and add to accumulator
-        emit("adds r%s, r%s, r14", acc[0], acc[0])
-        emit("adcs r%s, r%s, #0", acc[1], acc[1])
-        emit("adc r%s, r%s, #0", acc[2], acc[2])
-    
-        # multiply by 2
-        emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
-        emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
-        emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
-    
-        # add equal word
-        if limit == 4:
-            mulacc(acc, regs[2], regs[2])
-    
-        # add old accumulator
-        emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
-        emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
-        emit("adc r%s, r%s, #0", acc[2], acc[2])
-    
-        # store
-        emit("stmia r0!, {r%s}", acc[0])
-        print ""
-
-for i in xrange(1, s-3):
-    emit("mov r%s, #0", old_acc[1])
-    tmp = [acc[1], acc[2]]
-    acc = [acc[0], old_acc[0], old_acc[1]]
-    old_acc = tmp
-
-    # gather non-equal words
-    emit("umull r%s, r%s, r%s, r%s", acc[0], acc[1], regs[i], regs[s - 1])
-    for j in xrange(1, (s-i)//2):
-        mulacc(acc, regs[i+j], regs[s - 1 - j])
-
-    # multiply by 2
-    emit("adds r%s, r%s, r%s", acc[0], acc[0], acc[0])
-    emit("adcs r%s, r%s, r%s", acc[1], acc[1], acc[1])
-    emit("adc r%s, r%s, r%s", acc[2], acc[2], acc[2])
-
-    # add equal word (if any)
-    if ((s-i) % 2) != 0:
-        mulacc(acc, regs[i + (s-i)//2], regs[i + (s-i)//2])
-
-    # add old accumulator
-    emit("adds r%s, r%s, r%s", acc[0], acc[0], old_acc[0])
-    emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
-    emit("adc r%s, r%s, #0", acc[2], acc[2])
-
-    # store
-    emit("stmia r0!, {r%s}", acc[0])
-    print ""
-
-acc = acc[1:] + acc[:1]
-emit("mov r%s, #0", acc[2])
-emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 3], regs[s - 1])
-emit("adds r1, r1, r1")
-emit("adcs r%s, r%s, r%s", old_acc[1], old_acc[1], old_acc[1])
-emit("adc r%s, r%s, #0", acc[2], acc[2])
-emit("adds r%s, r%s, r1", acc[0], acc[0])
-emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
-emit("adc r%s, r%s, #0", acc[2], acc[2])
-emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 2], regs[s - 2])
-emit("adds r%s, r%s, r1", acc[0], acc[0])
-emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
-emit("adc r%s, r%s, #0", acc[2], acc[2])
-emit("stmia r0!, {r%s}", acc[0])
-print ""
-
-acc = acc[1:] + acc[:1]
-emit("mov r%s, #0", acc[2])
-emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 2], regs[s - 1])
-emit("adds r1, r1, r1")
-emit("adcs r%s, r%s, r%s", old_acc[1], old_acc[1], old_acc[1])
-emit("adc r%s, r%s, #0", acc[2], acc[2])
-emit("adds r%s, r%s, r1", acc[0], acc[0])
-emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
-emit("adc r%s, r%s, #0", acc[2], acc[2])
-emit("stmia r0!, {r%s}", acc[0])
-print ""
-
-acc = acc[1:] + acc[:1]
-emit("umull r1, r%s, r%s, r%s", old_acc[1], regs[s - 1], regs[s - 1])
-emit("adds r%s, r%s, r1", acc[0], acc[0])
-emit("adcs r%s, r%s, r%s", acc[1], acc[1], old_acc[1])
-emit("stmia r0!, {r%s}", acc[0])
-emit("stmia r0!, {r%s}", acc[1])

+ 0 - 327
components/bootloader/subproject/components/micro-ecc/micro-ecc/scripts/square_avr.py

@@ -1,327 +0,0 @@
-#!/usr/bin/env python
-
-import sys
-
-if len(sys.argv) < 2:
-    print "Provide the integer size in bytes"
-    sys.exit(1)
-
-size = int(sys.argv[1])
-
-if size > 40:
-    print "This script doesn't work with integer size %s due to laziness" % (size)
-    sys.exit(1)
-
-init_size = size - 20
-if size < 20:
-    init_size = 0
-
-def rg(i):
-    return i + 2
-
-def lo(i):
-    return i + 2
-
-def hi(i):
-    return i + 12
-
-def emit(line, *args):
-    s = '"' + line + r' \n\t"'
-    print s % args
-
-#### set up registers
-zero = "r25"
-emit("ldi %s, 0", zero) # zero register
-
-if init_size > 0:
-    emit("movw r28, r26") # y = x
-    h = (init_size + 1)//2
-    
-    for i in xrange(h):
-        emit("ld r%s, x+", lo(i))
-    emit("adiw r28, %s", size - init_size) # move y to other end
-    for i in xrange(h):
-        emit("ld r%s, y+", hi(i))
-
-    emit("adiw r30, %s", size - init_size) # move z
-
-    if init_size == 1:
-        emit("mul %s, %s", lo(0), hi(0))
-        emit("st z+, r0")
-        emit("st z+, r1")
-    else:
-        #### first one
-        print ""
-        emit("ldi r23, 0")
-        emit("mul %s, %s", lo(0), hi(0))
-        emit("st z+, r0")
-        emit("mov r22, r1")
-        print ""
-
-        #### rest of initial block, with moving accumulator registers
-        acc = [22, 23, 24]
-        for r in xrange(1, h):
-            emit("ldi r%s, 0", acc[2])
-            for i in xrange(0, (r+2)//2):
-                emit("mul r%s, r%s", lo(i), hi(r - i))
-                emit("add r%s, r0", acc[0])
-                emit("adc r%s, r1", acc[1])
-                emit("adc r%s, %s", acc[2], zero)
-            emit("st z+, r%s", acc[0])
-            print ""
-            acc = acc[1:] + acc[:1]
-        
-        lo_r = range(2, 2 + h)
-        hi_r = range(12, 12 + h)
-        
-        # now we need to start loading more from the high end
-        for r in xrange(h, init_size):
-            hi_r = hi_r[1:] + hi_r[:1]
-            emit("ld r%s, y+", hi_r[h-1])
-            
-            emit("ldi r%s, 0", acc[2])
-            for i in xrange(0, (r+2)//2):
-                emit("mul r%s, r%s", lo(i), hi_r[h - 1 - i])
-                emit("add r%s, r0", acc[0])
-                emit("adc r%s, r1", acc[1])
-                emit("adc r%s, %s", acc[2], zero)
-            emit("st z+, r%s", acc[0])
-            print ""
-            acc = acc[1:] + acc[:1]
-            
-        # loaded all of the high end bytes; now need to start loading the rest of the low end
-        for r in xrange(1, init_size-h):
-            lo_r = lo_r[1:] + lo_r[:1]
-            emit("ld r%s, x+", lo_r[h-1])
-            
-            emit("ldi r%s, 0", acc[2])
-            for i in xrange(0, (init_size+1 - r)//2):
-                emit("mul r%s, r%s", lo_r[i], hi_r[h - 1 - i])
-                emit("add r%s, r0", acc[0])
-                emit("adc r%s, r1", acc[1])
-                emit("adc r%s, %s", acc[2], zero)
-            emit("st z+, r%s", acc[0])
-            print ""
-            acc = acc[1:] + acc[:1]
-        
-        lo_r = lo_r[1:] + lo_r[:1]
-        emit("ld r%s, x+", lo_r[h-1])
-        
-        # now we have loaded everything, and we just need to finish the last corner
-        for r in xrange(init_size-h, init_size-1):
-            emit("ldi r%s, 0", acc[2])
-            for i in xrange(0, (init_size+1 - r)//2):
-                emit("mul r%s, r%s", lo_r[i], hi_r[h - 1 - i])
-                emit("add r%s, r0", acc[0])
-                emit("adc r%s, r1", acc[1])
-                emit("adc r%s, %s", acc[2], zero)
-            emit("st z+, r%s", acc[0])
-            print ""
-            acc = acc[1:] + acc[:1]
-            lo_r = lo_r[1:] + lo_r[:1] # make the indexing easy
-        
-        emit("mul r%s, r%s", lo_r[0], hi_r[h - 1])
-        emit("add r%s, r0", acc[0])
-        emit("adc r%s, r1", acc[1])
-        emit("st z+, r%s", acc[0])
-        emit("st z+, r%s", acc[1])
-    print ""
-    emit("sbiw r26, %s", init_size) # reset x
-    emit("sbiw r30, %s", size + init_size) # reset z
-
-# TODO you could do more rows of size 20 here if your integers are larger than 40 bytes
-
-s = size - init_size
-
-for i in xrange(s):
-    emit("ld r%s, x+", rg(i))
-
-#### first few columns
-# NOTE: this is only valid if size >= 3
-print ""
-emit("ldi r23, 0")
-emit("mul r%s, r%s", rg(0), rg(0))
-emit("st z+, r0")
-emit("mov r22, r1")
-print ""
-emit("ldi r24, 0")
-emit("mul r%s, r%s", rg(0), rg(1))
-emit("add r22, r0")
-emit("adc r23, r1")
-emit("adc r24, %s", zero)
-emit("add r22, r0")
-emit("adc r23, r1")
-emit("adc r24, %s", zero)
-emit("st z+, r22")
-print ""
-emit("ldi r22, 0")
-emit("mul r%s, r%s", rg(0), rg(2))
-emit("add r23, r0")
-emit("adc r24, r1")
-emit("adc r22, %s", zero)
-emit("add r23, r0")
-emit("adc r24, r1")
-emit("adc r22, %s", zero)
-emit("mul r%s, r%s", rg(1), rg(1))
-emit("add r23, r0")
-emit("adc r24, r1")
-emit("adc r22, %s", zero)
-emit("st z+, r23")
-print ""
-
-acc = [23, 24, 22]
-old_acc = [28, 29]
-for i in xrange(3, s):
-    emit("ldi r%s, 0", old_acc[1])
-    tmp = [acc[1], acc[2]]
-    acc = [acc[0], old_acc[0], old_acc[1]]
-    old_acc = tmp
-    
-    # gather non-equal words
-    emit("mul r%s, r%s", rg(0), rg(i))
-    emit("mov r%s, r0", acc[0])
-    emit("mov r%s, r1", acc[1])
-    for j in xrange(1, (i+1)//2):
-        emit("mul r%s, r%s", rg(j), rg(i-j))
-        emit("add r%s, r0", acc[0])
-        emit("adc r%s, r1", acc[1])
-        emit("adc r%s, %s", acc[2], zero)
-    # multiply by 2
-    emit("lsl r%s", acc[0])
-    emit("rol r%s", acc[1])
-    emit("rol r%s", acc[2])
-    
-    # add equal word (if any)
-    if ((i+1) % 2) != 0:
-        emit("mul r%s, r%s", rg(i//2), rg(i//2))
-        emit("add r%s, r0", acc[0])
-        emit("adc r%s, r1", acc[1])
-        emit("adc r%s, %s", acc[2], zero)
-    
-    # add old accumulator
-    emit("add r%s, r%s", acc[0], old_acc[0])
-    emit("adc r%s, r%s", acc[1], old_acc[1])
-    emit("adc r%s, %s", acc[2], zero)
-    
-    # store
-    emit("st z+, r%s", acc[0])
-    print ""
-
-regs = range(2, 22)
-for i in xrange(init_size):
-    regs = regs[1:] + regs[:1]
-    emit("ld r%s, x+", regs[19])
-    
-    for limit in [18, 19]:
-        emit("ldi r%s, 0", old_acc[1])
-        tmp = [acc[1], acc[2]]
-        acc = [acc[0], old_acc[0], old_acc[1]]
-        old_acc = tmp
-    
-        # gather non-equal words
-        emit("mul r%s, r%s", regs[0], regs[limit])
-        emit("mov r%s, r0", acc[0])
-        emit("mov r%s, r1", acc[1])
-        for j in xrange(1, (limit+1)//2):
-            emit("mul r%s, r%s", regs[j], regs[limit-j])
-            emit("add r%s, r0", acc[0])
-            emit("adc r%s, r1", acc[1])
-            emit("adc r%s, %s", acc[2], zero)
-    
-        emit("ld r0, z") # load stored value from initial block, and add to accumulator (note z does not increment)
-        emit("add r%s, r0", acc[0])
-        emit("adc r%s, r25", acc[1])
-        emit("adc r%s, r25", acc[2])
-    
-        # multiply by 2
-        emit("lsl r%s", acc[0])
-        emit("rol r%s", acc[1])
-        emit("rol r%s", acc[2])
-    
-        # add equal word
-        if limit == 18:
-            emit("mul r%s, r%s", regs[9], regs[9])
-            emit("add r%s, r0", acc[0])
-            emit("adc r%s, r1", acc[1])
-            emit("adc r%s, %s", acc[2], zero)
-    
-        # add old accumulator
-        emit("add r%s, r%s", acc[0], old_acc[0])
-        emit("adc r%s, r%s", acc[1], old_acc[1])
-        emit("adc r%s, %s", acc[2], zero)
-    
-        # store
-        emit("st z+, r%s", acc[0])
-        print ""
-
-for i in xrange(1, s-3):
-    emit("ldi r%s, 0", old_acc[1])
-    tmp = [acc[1], acc[2]]
-    acc = [acc[0], old_acc[0], old_acc[1]]
-    old_acc = tmp
-
-    # gather non-equal words
-    emit("mul r%s, r%s", regs[i], regs[s - 1])
-    emit("mov r%s, r0", acc[0])
-    emit("mov r%s, r1", acc[1])
-    for j in xrange(1, (s-i)//2):
-        emit("mul r%s, r%s", regs[i+j], regs[s - 1 - j])
-        emit("add r%s, r0", acc[0])
-        emit("adc r%s, r1", acc[1])
-        emit("adc r%s, %s", acc[2], zero)
-    # multiply by 2
-    emit("lsl r%s", acc[0])
-    emit("rol r%s", acc[1])
-    emit("rol r%s", acc[2])
-
-    # add equal word (if any)
-    if ((s-i) % 2) != 0:
-        emit("mul r%s, r%s", regs[i + (s-i)//2], regs[i + (s-i)//2])
-        emit("add r%s, r0", acc[0])
-        emit("adc r%s, r1", acc[1])
-        emit("adc r%s, %s", acc[2], zero)
-
-    # add old accumulator
-    emit("add r%s, r%s", acc[0], old_acc[0])
-    emit("adc r%s, r%s", acc[1], old_acc[1])
-    emit("adc r%s, %s", acc[2], zero)
-
-    # store
-    emit("st z+, r%s", acc[0])
-    print ""
-
-acc = acc[1:] + acc[:1]
-emit("ldi r%s, 0", acc[2])
-emit("mul r%s, r%s", regs[17], regs[19])
-emit("add r%s, r0", acc[0])
-emit("adc r%s, r1", acc[1])
-emit("adc r%s, %s", acc[2], zero)
-emit("add r%s, r0", acc[0])
-emit("adc r%s, r1", acc[1])
-emit("adc r%s, %s", acc[2], zero)
-emit("mul r%s, r%s", regs[18], regs[18])
-emit("add r%s, r0", acc[0])
-emit("adc r%s, r1", acc[1])
-emit("adc r%s, %s", acc[2], zero)
-emit("st z+, r%s", acc[0])
-print ""
-
-acc = acc[1:] + acc[:1]
-emit("ldi r%s, 0", acc[2])
-emit("mul r%s, r%s", regs[18], regs[19])
-emit("add r%s, r0", acc[0])
-emit("adc r%s, r1", acc[1])
-emit("adc r%s, %s", acc[2], zero)
-emit("add r%s, r0", acc[0])
-emit("adc r%s, r1", acc[1])
-emit("adc r%s, %s", acc[2], zero)
-emit("st z+, r%s", acc[0])
-print ""
-
-emit("mul r%s, r%s", regs[19], regs[19])
-emit("add r%s, r0", acc[1])
-emit("adc r%s, r1", acc[2])
-emit("st z+, r%s", acc[1])
-
-emit("st z+, r%s", acc[2])
-emit("eor r1, r1")

+ 0 - 4
components/bootloader/subproject/components/micro-ecc/micro-ecc/test/emk_rules.py

@@ -1,4 +0,0 @@
-c, link = emk.module("c", "link")
-link.depdirs += [
-    "$:proj:$"
-]

+ 0 - 79
components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_compress.c

@@ -1,79 +0,0 @@
-/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#include "uECC.h"
-
-#include <stdio.h>
-#include <string.h>
-
-#ifndef uECC_TEST_NUMBER_OF_ITERATIONS
-#define uECC_TEST_NUMBER_OF_ITERATIONS   256
-#endif
-
-void vli_print(char *str, uint8_t *vli, unsigned int size) {
-    printf("%s ", str);
-    for(unsigned i=0; i<size; ++i) {
-        printf("%02X ", (unsigned)vli[i]);
-    }
-    printf("\n");
-}
-
-int main() {
-    uint8_t public[64];
-    uint8_t private[32];
-    uint8_t compressed_point[33];
-    uint8_t decompressed_point[64];
-
-    int i;
-    int c;
-    
-    const struct uECC_Curve_t * curves[5];
-    int num_curves = 0;
-#if uECC_SUPPORTS_secp160r1
-    curves[num_curves++] = uECC_secp160r1();
-#endif
-#if uECC_SUPPORTS_secp192r1
-    curves[num_curves++] = uECC_secp192r1();
-#endif
-#if uECC_SUPPORTS_secp224r1
-    curves[num_curves++] = uECC_secp224r1();
-#endif
-#if uECC_SUPPORTS_secp256r1
-    curves[num_curves++] = uECC_secp256r1();
-#endif
-#if uECC_SUPPORTS_secp256k1
-    curves[num_curves++] = uECC_secp256k1();
-#endif
-    
-    printf("Testing compression and decompression of %d random EC points\n",
-           uECC_TEST_NUMBER_OF_ITERATIONS);
-
-    for (c = 0; c < num_curves; ++c) {
-        for (i = 0; i < uECC_TEST_NUMBER_OF_ITERATIONS; ++i) {
-            printf(".");
-            fflush(stdout);
-            
-            memset(public, 0, sizeof(public));
-            memset(decompressed_point, 0, sizeof(decompressed_point));
-
-            /* Generate arbitrary EC point (public) on Curve */
-            if (!uECC_make_key(public, private, curves[c])) {
-                printf("uECC_make_key() failed\n");
-                continue;
-            }
-
-            /* compress and decompress point */
-            uECC_compress(public, compressed_point, curves[c]);
-            uECC_decompress(compressed_point, decompressed_point, curves[c]);
-
-            if (memcmp(public, decompressed_point, sizeof(public)) != 0) {
-                printf("Original and decompressed points are not identical!\n");
-                vli_print("Original point =     ", public, sizeof(public));
-                vli_print("Compressed point =   ", compressed_point, sizeof(compressed_point));
-                vli_print("Decompressed point = ", decompressed_point, sizeof(decompressed_point));
-            }
-        }
-        printf("\n");
-    }
-
-    return 0;
-}

+ 0 - 81
components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_compute.c

@@ -1,81 +0,0 @@
-/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#include "uECC.h"
-
-#include <stdio.h>
-#include <string.h>
-
-void vli_print(char *str, uint8_t *vli, unsigned int size) {
-    printf("%s ", str);
-    for(unsigned i=0; i<size; ++i) {
-        printf("%02X ", (unsigned)vli[i]);
-    }
-    printf("\n");
-}
-
-int main() {
-    int i;
-    int success;
-    uint8_t private[32];
-    uint8_t public[64];
-    uint8_t public_computed[64];
-    
-    int c;
-    
-    const struct uECC_Curve_t * curves[5];
-    int num_curves = 0;
-#if uECC_SUPPORTS_secp160r1
-    curves[num_curves++] = uECC_secp160r1();
-#endif
-#if uECC_SUPPORTS_secp192r1
-    curves[num_curves++] = uECC_secp192r1();
-#endif
-#if uECC_SUPPORTS_secp224r1
-    curves[num_curves++] = uECC_secp224r1();
-#endif
-#if uECC_SUPPORTS_secp256r1
-    curves[num_curves++] = uECC_secp256r1();
-#endif
-#if uECC_SUPPORTS_secp256k1
-    curves[num_curves++] = uECC_secp256k1();
-#endif
-
-    printf("Testing 256 random private key pairs\n");
-    for (c = 0; c < num_curves; ++c) {
-        for (i = 0; i < 256; ++i) {
-            printf(".");
-            fflush(stdout);
-            
-            memset(public, 0, sizeof(public));
-            memset(public_computed, 0, sizeof(public_computed));
-            
-            if (!uECC_make_key(public, private, curves[c])) {
-                printf("uECC_make_key() failed\n");
-                continue;
-            }
-
-            if (!uECC_compute_public_key(private, public_computed, curves[c])) {
-                printf("uECC_compute_public_key() failed\n");
-            }
-
-            if (memcmp(public, public_computed, sizeof(public)) != 0) {
-                printf("Computed and provided public keys are not identical!\n");
-                vli_print("Computed public key = ", public_computed, sizeof(public_computed));
-                vli_print("Provided public key = ", public, sizeof(public));
-                vli_print("Private key = ", private, sizeof(private));
-            }
-        }
-        
-        printf("\n");
-        printf("Testing private key = 0\n");
-
-        memset(private, 0, sizeof(private));
-        success = uECC_compute_public_key(private, public_computed, curves[c]);
-        if (success) {
-            printf("uECC_compute_public_key() should have failed\n");
-        }
-        printf("\n");
-    }
-    
-    return 0;
-}

+ 0 - 90
components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_ecdh.c

@@ -1,90 +0,0 @@
-/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#include "uECC.h"
-
-#include <stdio.h>
-#include <string.h>
-
-void vli_print(uint8_t *vli, unsigned int size) {
-    for(unsigned i=0; i<size; ++i) {
-        printf("%02X ", (unsigned)vli[i]);
-    }
-}
-
-int main() {
-    int i, c;
-    uint8_t private1[32] = {0};
-    uint8_t private2[32] = {0};
-    uint8_t public1[64] = {0};
-    uint8_t public2[64] = {0};
-    uint8_t secret1[32] = {0};
-    uint8_t secret2[32] = {0};
-    
-    const struct uECC_Curve_t * curves[5];
-    int num_curves = 0;
-#if uECC_SUPPORTS_secp160r1
-    curves[num_curves++] = uECC_secp160r1();
-#endif
-#if uECC_SUPPORTS_secp192r1
-    curves[num_curves++] = uECC_secp192r1();
-#endif
-#if uECC_SUPPORTS_secp224r1
-    curves[num_curves++] = uECC_secp224r1();
-#endif
-#if uECC_SUPPORTS_secp256r1
-    curves[num_curves++] = uECC_secp256r1();
-#endif
-#if uECC_SUPPORTS_secp256k1
-    curves[num_curves++] = uECC_secp256k1();
-#endif
-    
-    printf("Testing 256 random private key pairs\n");
-
-    for (c = 0; c < num_curves; ++c) {
-        for (i = 0; i < 256; ++i) {
-            printf(".");
-            fflush(stdout);
-
-            if (!uECC_make_key(public1, private1, curves[c]) ||
-                !uECC_make_key(public2, private2, curves[c])) {
-                printf("uECC_make_key() failed\n");
-                return 1;
-            }
-
-            if (!uECC_shared_secret(public2, private1, secret1, curves[c])) {
-                printf("shared_secret() failed (1)\n");
-                return 1;
-            }
-
-            if (!uECC_shared_secret(public1, private2, secret2, curves[c])) {
-                printf("shared_secret() failed (2)\n");
-                return 1;
-            }
-        
-            if (memcmp(secret1, secret2, sizeof(secret1)) != 0) {
-                printf("Shared secrets are not identical!\n");
-                printf("Private key 1 = ");
-                vli_print(private1, 32);
-                printf("\n");
-                printf("Private key 2 = ");
-                vli_print(private2, 32);
-                printf("\n");
-                printf("Public key 1 = ");
-                vli_print(public1, 64);
-                printf("\n");
-                printf("Public key 2 = ");
-                vli_print(public2, 64);
-                printf("\n");
-                printf("Shared secret 1 = ");
-                vli_print(secret1, 32);
-                printf("\n");
-                printf("Shared secret 2 = ");
-                vli_print(secret2, 32);
-                printf("\n");
-            }
-        }
-        printf("\n");
-    }
-    
-    return 0;
-}

+ 0 - 59
components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_ecdsa.c

@@ -1,59 +0,0 @@
-/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#include "uECC.h"
-
-#include <stdio.h>
-#include <string.h>
-
-int main() {
-    int i, c;
-    uint8_t private[32] = {0};
-    uint8_t public[64] = {0};
-    uint8_t hash[32] = {0};
-    uint8_t sig[64] = {0};
-
-    const struct uECC_Curve_t * curves[5];
-    int num_curves = 0;
-#if uECC_SUPPORTS_secp160r1
-    curves[num_curves++] = uECC_secp160r1();
-#endif
-#if uECC_SUPPORTS_secp192r1
-    curves[num_curves++] = uECC_secp192r1();
-#endif
-#if uECC_SUPPORTS_secp224r1
-    curves[num_curves++] = uECC_secp224r1();
-#endif
-#if uECC_SUPPORTS_secp256r1
-    curves[num_curves++] = uECC_secp256r1();
-#endif
-#if uECC_SUPPORTS_secp256k1
-    curves[num_curves++] = uECC_secp256k1();
-#endif
-    
-    printf("Testing 256 signatures\n");
-    for (c = 0; c < num_curves; ++c) {
-        for (i = 0; i < 256; ++i) {
-            printf(".");
-            fflush(stdout);
-
-            if (!uECC_make_key(public, private, curves[c])) {
-                printf("uECC_make_key() failed\n");
-                return 1;
-            }
-            memcpy(hash, public, sizeof(hash));
-            
-            if (!uECC_sign(private, hash, sizeof(hash), sig, curves[c])) {
-                printf("uECC_sign() failed\n");
-                return 1;
-            }
-
-            if (!uECC_verify(public, hash, sizeof(hash), sig, curves[c])) {
-                printf("uECC_verify() failed\n");
-                return 1;
-            }
-        }
-        printf("\n");
-    }
-    
-    return 0;
-}

+ 0 - 93
components/bootloader/subproject/components/micro-ecc/micro-ecc/test/test_ecdsa_deterministic.c.example

@@ -1,93 +0,0 @@
-/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#include "uECC.h"
-
-#include <stdio.h>
-#include <string.h>
-
-#define SHA256_BLOCK_LENGTH  64
-#define SHA256_DIGEST_LENGTH 32
-
-typedef struct SHA256_CTX {
-	uint32_t	state[8];
-	uint64_t	bitcount;
-	uint8_t	buffer[SHA256_BLOCK_LENGTH];
-} SHA256_CTX;
-
-extern void SHA256_Init(SHA256_CTX *ctx);
-extern void SHA256_Update(SHA256_CTX *ctx, const uint8_t *message, size_t message_size);
-extern void SHA256_Final(uint8_t digest[SHA256_DIGEST_LENGTH], SHA256_CTX *ctx);
-
-typedef struct SHA256_HashContext {
-    uECC_HashContext uECC;
-    SHA256_CTX ctx;
-} SHA256_HashContext;
-
-static void init_SHA256(const uECC_HashContext *base) {
-    SHA256_HashContext *context = (SHA256_HashContext *)base;
-    SHA256_Init(&context->ctx);
-}
-
-static void update_SHA256(const uECC_HashContext *base,
-                          const uint8_t *message,
-                          unsigned message_size) {
-    SHA256_HashContext *context = (SHA256_HashContext *)base;
-    SHA256_Update(&context->ctx, message, message_size);
-}
-
-static void finish_SHA256(const uECC_HashContext *base, uint8_t *hash_result) {
-    SHA256_HashContext *context = (SHA256_HashContext *)base;
-    SHA256_Final(hash_result, &context->ctx);
-}
-
-int main() {
-    int i, c;
-    uint8_t private[32] = {0};
-    uint8_t public[64] = {0};
-    uint8_t hash[32] = {0};
-    uint8_t sig[64] = {0};
-    
-    uint8_t tmp[2 * SHA256_DIGEST_LENGTH + SHA256_BLOCK_LENGTH];
-    SHA256_HashContext ctx = {{
-        &init_SHA256,
-        &update_SHA256,
-        &finish_SHA256,
-        SHA256_BLOCK_LENGTH,
-        SHA256_DIGEST_LENGTH,
-        tmp
-    }};
-
-    const struct uECC_Curve_t * curves[5];
-    curves[0] = uECC_secp160r1();
-    curves[1] = uECC_secp192r1();
-    curves[2] = uECC_secp224r1();
-    curves[3] = uECC_secp256r1();
-    curves[4] = uECC_secp256k1();
-    
-    printf("Testing 256 signatures\n");
-    for (c = 0; c < 5; ++c) {
-        for (i = 0; i < 256; ++i) {
-            printf(".");
-            fflush(stdout);
-
-            if (!uECC_make_key(public, private, curves[c])) {
-                printf("uECC_make_key() failed\n");
-                return 1;
-            }
-            memcpy(hash, public, sizeof(hash));
-            
-            if (!uECC_sign_deterministic(private, hash, sizeof(hash), &ctx.uECC, sig, curves[c])) {
-                printf("uECC_sign() failed\n");
-                return 1;
-            }
-
-            if (!uECC_verify(public, hash, sizeof(hash), sig, curves[c])) {
-                printf("uECC_verify() failed\n");
-                return 1;
-            }
-        }
-        printf("\n");
-    }
-    
-    return 0;
-}

+ 0 - 108
components/bootloader/subproject/components/micro-ecc/micro-ecc/types.h

@@ -1,108 +0,0 @@
-/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#ifndef _UECC_TYPES_H_
-#define _UECC_TYPES_H_
-
-#ifndef uECC_PLATFORM
-    #if __AVR__
-        #define uECC_PLATFORM uECC_avr
-    #elif defined(__thumb2__) || defined(_M_ARMT) /* I think MSVC only supports Thumb-2 targets */
-        #define uECC_PLATFORM uECC_arm_thumb2
-    #elif defined(__thumb__)
-        #define uECC_PLATFORM uECC_arm_thumb
-    #elif defined(__arm__) || defined(_M_ARM)
-        #define uECC_PLATFORM uECC_arm
-    #elif defined(__aarch64__)
-        #define uECC_PLATFORM uECC_arm64
-    #elif defined(__i386__) || defined(_M_IX86) || defined(_X86_) || defined(__I86__)
-        #define uECC_PLATFORM uECC_x86
-    #elif defined(__amd64__) || defined(_M_X64)
-        #define uECC_PLATFORM uECC_x86_64
-    #else
-        #define uECC_PLATFORM uECC_arch_other
-    #endif
-#endif
-
-#ifndef uECC_ARM_USE_UMAAL
-    #if (uECC_PLATFORM == uECC_arm) && (__ARM_ARCH >= 6)
-        #define uECC_ARM_USE_UMAAL 1
-    #elif (uECC_PLATFORM == uECC_arm_thumb2) && (__ARM_ARCH >= 6) && !__ARM_ARCH_7M__
-        #define uECC_ARM_USE_UMAAL 1
-    #else
-        #define uECC_ARM_USE_UMAAL 0
-    #endif
-#endif
-
-#ifndef uECC_WORD_SIZE
-    #if uECC_PLATFORM == uECC_avr
-        #define uECC_WORD_SIZE 1
-    #elif (uECC_PLATFORM == uECC_x86_64 || uECC_PLATFORM == uECC_arm64)
-        #define uECC_WORD_SIZE 8
-    #else
-        #define uECC_WORD_SIZE 4
-    #endif
-#endif
-
-#if (uECC_WORD_SIZE != 1) && (uECC_WORD_SIZE != 4) && (uECC_WORD_SIZE != 8)
-    #error "Unsupported value for uECC_WORD_SIZE"
-#endif
-
-#if ((uECC_PLATFORM == uECC_avr) && (uECC_WORD_SIZE != 1))
-    #pragma message ("uECC_WORD_SIZE must be 1 for AVR")
-    #undef uECC_WORD_SIZE
-    #define uECC_WORD_SIZE 1
-#endif
-
-#if ((uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb || \
-        uECC_PLATFORM ==  uECC_arm_thumb2) && \
-     (uECC_WORD_SIZE != 4))
-    #pragma message ("uECC_WORD_SIZE must be 4 for ARM")
-    #undef uECC_WORD_SIZE
-    #define uECC_WORD_SIZE 4
-#endif
-
-#if defined(__SIZEOF_INT128__) || ((__clang_major__ * 100 + __clang_minor__) >= 302)
-    #define SUPPORTS_INT128 1
-#else
-    #define SUPPORTS_INT128 0
-#endif
-
-typedef int8_t wordcount_t;
-typedef int16_t bitcount_t;
-typedef int8_t cmpresult_t;
-
-#if (uECC_WORD_SIZE == 1)
-
-typedef uint8_t uECC_word_t;
-typedef uint16_t uECC_dword_t;
-
-#define HIGH_BIT_SET 0x80
-#define uECC_WORD_BITS 8
-#define uECC_WORD_BITS_SHIFT 3
-#define uECC_WORD_BITS_MASK 0x07
-
-#elif (uECC_WORD_SIZE == 4)
-
-typedef uint32_t uECC_word_t;
-typedef uint64_t uECC_dword_t;
-
-#define HIGH_BIT_SET 0x80000000
-#define uECC_WORD_BITS 32
-#define uECC_WORD_BITS_SHIFT 5
-#define uECC_WORD_BITS_MASK 0x01F
-
-#elif (uECC_WORD_SIZE == 8)
-
-typedef uint64_t uECC_word_t;
-#if SUPPORTS_INT128
-typedef unsigned __int128 uECC_dword_t;
-#endif
-
-#define HIGH_BIT_SET 0x8000000000000000ull
-#define uECC_WORD_BITS 64
-#define uECC_WORD_BITS_SHIFT 6
-#define uECC_WORD_BITS_MASK 0x03F
-
-#endif /* uECC_WORD_SIZE */
-
-#endif /* _UECC_TYPES_H_ */

+ 0 - 1634
components/bootloader/subproject/components/micro-ecc/micro-ecc/uECC.c

@@ -1,1634 +0,0 @@
-/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#include "uECC.h"
-#include "uECC_vli.h"
-
-#ifndef uECC_RNG_MAX_TRIES
-    #define uECC_RNG_MAX_TRIES 64
-#endif
-
-#if uECC_ENABLE_VLI_API
-    #define uECC_VLI_API
-#else
-    #define uECC_VLI_API static
-#endif
-
-#define CONCATX(a, ...) a ## __VA_ARGS__
-#define CONCAT(a, ...) CONCATX(a, __VA_ARGS__)
-
-#define STRX(a) #a
-#define STR(a) STRX(a)
-
-#define EVAL(...)  EVAL1(EVAL1(EVAL1(EVAL1(__VA_ARGS__))))
-#define EVAL1(...) EVAL2(EVAL2(EVAL2(EVAL2(__VA_ARGS__))))
-#define EVAL2(...) EVAL3(EVAL3(EVAL3(EVAL3(__VA_ARGS__))))
-#define EVAL3(...) EVAL4(EVAL4(EVAL4(EVAL4(__VA_ARGS__))))
-#define EVAL4(...) __VA_ARGS__
-
-#define DEC_1  0
-#define DEC_2  1
-#define DEC_3  2
-#define DEC_4  3
-#define DEC_5  4
-#define DEC_6  5
-#define DEC_7  6
-#define DEC_8  7
-#define DEC_9  8
-#define DEC_10 9
-#define DEC_11 10
-#define DEC_12 11
-#define DEC_13 12
-#define DEC_14 13
-#define DEC_15 14
-#define DEC_16 15
-#define DEC_17 16
-#define DEC_18 17
-#define DEC_19 18
-#define DEC_20 19
-#define DEC_21 20
-#define DEC_22 21
-#define DEC_23 22
-#define DEC_24 23
-#define DEC_25 24
-#define DEC_26 25
-#define DEC_27 26
-#define DEC_28 27
-#define DEC_29 28
-#define DEC_30 29
-#define DEC_31 30
-#define DEC_32 31
-
-#define DEC(N) CONCAT(DEC_, N)
-
-#define SECOND_ARG(_, val, ...) val
-#define SOME_CHECK_0 ~, 0
-#define GET_SECOND_ARG(...) SECOND_ARG(__VA_ARGS__, SOME,)
-#define SOME_OR_0(N) GET_SECOND_ARG(CONCAT(SOME_CHECK_, N))
-
-#define EMPTY(...)
-#define DEFER(...) __VA_ARGS__ EMPTY()
-
-#define REPEAT_NAME_0() REPEAT_0
-#define REPEAT_NAME_SOME() REPEAT_SOME
-#define REPEAT_0(...)
-#define REPEAT_SOME(N, stuff) DEFER(CONCAT(REPEAT_NAME_, SOME_OR_0(DEC(N))))()(DEC(N), stuff) stuff
-#define REPEAT(N, stuff) EVAL(REPEAT_SOME(N, stuff))
-
-#define REPEATM_NAME_0() REPEATM_0
-#define REPEATM_NAME_SOME() REPEATM_SOME
-#define REPEATM_0(...)
-#define REPEATM_SOME(N, macro) macro(N) \
-    DEFER(CONCAT(REPEATM_NAME_, SOME_OR_0(DEC(N))))()(DEC(N), macro)
-#define REPEATM(N, macro) EVAL(REPEATM_SOME(N, macro))
-
-#include "platform-specific.inc"
-
-#if (uECC_WORD_SIZE == 1)
-    #if uECC_SUPPORTS_secp160r1
-        #define uECC_MAX_WORDS 21 /* Due to the size of curve_n. */
-    #endif
-    #if uECC_SUPPORTS_secp192r1
-        #undef uECC_MAX_WORDS
-        #define uECC_MAX_WORDS 24
-    #endif
-    #if uECC_SUPPORTS_secp224r1
-        #undef uECC_MAX_WORDS
-        #define uECC_MAX_WORDS 28
-    #endif
-    #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
-        #undef uECC_MAX_WORDS
-        #define uECC_MAX_WORDS 32
-    #endif
-#elif (uECC_WORD_SIZE == 4)
-    #if uECC_SUPPORTS_secp160r1
-        #define uECC_MAX_WORDS 6 /* Due to the size of curve_n. */
-    #endif
-    #if uECC_SUPPORTS_secp192r1
-        #undef uECC_MAX_WORDS
-        #define uECC_MAX_WORDS 6
-    #endif
-    #if uECC_SUPPORTS_secp224r1
-        #undef uECC_MAX_WORDS
-        #define uECC_MAX_WORDS 7
-    #endif
-    #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
-        #undef uECC_MAX_WORDS
-        #define uECC_MAX_WORDS 8
-    #endif
-#elif (uECC_WORD_SIZE == 8)
-    #if uECC_SUPPORTS_secp160r1
-        #define uECC_MAX_WORDS 3
-    #endif
-    #if uECC_SUPPORTS_secp192r1
-        #undef uECC_MAX_WORDS
-        #define uECC_MAX_WORDS 3
-    #endif
-    #if uECC_SUPPORTS_secp224r1
-        #undef uECC_MAX_WORDS
-        #define uECC_MAX_WORDS 4
-    #endif
-    #if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1)
-        #undef uECC_MAX_WORDS
-        #define uECC_MAX_WORDS 4
-    #endif
-#endif /* uECC_WORD_SIZE */
-
-#define BITS_TO_WORDS(num_bits) ((num_bits + ((uECC_WORD_SIZE * 8) - 1)) / (uECC_WORD_SIZE * 8))
-#define BITS_TO_BYTES(num_bits) ((num_bits + 7) / 8)
-
-struct uECC_Curve_t {
-    wordcount_t num_words;
-    wordcount_t num_bytes;
-    bitcount_t num_n_bits;
-    uECC_word_t p[uECC_MAX_WORDS];
-    uECC_word_t n[uECC_MAX_WORDS];
-    uECC_word_t G[uECC_MAX_WORDS * 2];
-    uECC_word_t b[uECC_MAX_WORDS];
-    void (*double_jacobian)(uECC_word_t * X1,
-                            uECC_word_t * Y1,
-                            uECC_word_t * Z1,
-                            uECC_Curve curve);
-#if uECC_SUPPORT_COMPRESSED_POINT
-    void (*mod_sqrt)(uECC_word_t *a, uECC_Curve curve);
-#endif
-    void (*x_side)(uECC_word_t *result, const uECC_word_t *x, uECC_Curve curve);
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-    void (*mmod_fast)(uECC_word_t *result, uECC_word_t *product);
-#endif
-};
-
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-static void bcopy(uint8_t *dst,
-                  const uint8_t *src,
-                  unsigned num_bytes) {
-    while (0 != num_bytes) {
-        num_bytes--;
-        dst[num_bytes] = src[num_bytes];
-    }
-}
-#endif
-
-static cmpresult_t uECC_vli_cmp_unsafe(const uECC_word_t *left,
-                                       const uECC_word_t *right,
-                                       wordcount_t num_words);
-
-#if (uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb || \
-        uECC_PLATFORM == uECC_arm_thumb2)
-    #include "asm_arm.inc"
-#endif
-
-#if (uECC_PLATFORM == uECC_avr)
-    #include "asm_avr.inc"
-#endif
-
-#if default_RNG_defined
-static uECC_RNG_Function g_rng_function = &default_RNG;
-#else
-static uECC_RNG_Function g_rng_function = 0;
-#endif
-
-void uECC_set_rng(uECC_RNG_Function rng_function) {
-    g_rng_function = rng_function;
-}
-
-uECC_RNG_Function uECC_get_rng(void) {
-    return g_rng_function;
-}
-
-int uECC_curve_private_key_size(uECC_Curve curve) {
-    return BITS_TO_BYTES(curve->num_n_bits);
-}
-
-int uECC_curve_public_key_size(uECC_Curve curve) {
-    return 2 * curve->num_bytes;
-}
-
-#if !asm_clear
-uECC_VLI_API void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words) {
-    wordcount_t i;
-    for (i = 0; i < num_words; ++i) {
-        vli[i] = 0;
-    }
-}
-#endif /* !asm_clear */
-
-/* Constant-time comparison to zero - secure way to compare long integers */
-/* Returns 1 if vli == 0, 0 otherwise. */
-uECC_VLI_API uECC_word_t uECC_vli_isZero(const uECC_word_t *vli, wordcount_t num_words) {
-    uECC_word_t bits = 0;
-    wordcount_t i;
-    for (i = 0; i < num_words; ++i) {
-        bits |= vli[i];
-    }
-    return (bits == 0);
-}
-
-/* Returns nonzero if bit 'bit' of vli is set. */
-uECC_VLI_API uECC_word_t uECC_vli_testBit(const uECC_word_t *vli, bitcount_t bit) {
-    return (vli[bit >> uECC_WORD_BITS_SHIFT] & ((uECC_word_t)1 << (bit & uECC_WORD_BITS_MASK)));
-}
-
-/* Counts the number of words in vli. */
-static wordcount_t vli_numDigits(const uECC_word_t *vli, const wordcount_t max_words) {
-    wordcount_t i;
-    /* Search from the end until we find a non-zero digit.
-       We do it in reverse because we expect that most digits will be nonzero. */
-    for (i = max_words - 1; i >= 0 && vli[i] == 0; --i) {
-    }
-
-    return (i + 1);
-}
-
-/* Counts the number of bits required to represent vli. */
-uECC_VLI_API bitcount_t uECC_vli_numBits(const uECC_word_t *vli, const wordcount_t max_words) {
-    uECC_word_t i;
-    uECC_word_t digit;
-
-    wordcount_t num_digits = vli_numDigits(vli, max_words);
-    if (num_digits == 0) {
-        return 0;
-    }
-
-    digit = vli[num_digits - 1];
-    for (i = 0; digit; ++i) {
-        digit >>= 1;
-    }
-
-    return (((bitcount_t)(num_digits - 1) << uECC_WORD_BITS_SHIFT) + i);
-}
-
-/* Sets dest = src. */
-#if !asm_set
-uECC_VLI_API void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordcount_t num_words) {
-    wordcount_t i;
-    for (i = 0; i < num_words; ++i) {
-        dest[i] = src[i];
-    }
-}
-#endif /* !asm_set */
-
-/* Returns sign of left - right. */
-static cmpresult_t uECC_vli_cmp_unsafe(const uECC_word_t *left,
-                                       const uECC_word_t *right,
-                                       wordcount_t num_words) {
-    wordcount_t i;
-    for (i = num_words - 1; i >= 0; --i) {
-        if (left[i] > right[i]) {
-            return 1;
-        } else if (left[i] < right[i]) {
-            return -1;
-        }
-    }
-    return 0;
-}
-
-/* Constant-time comparison function - secure way to compare long integers */
-/* Returns one if left == right, zero otherwise. */
-uECC_VLI_API uECC_word_t uECC_vli_equal(const uECC_word_t *left,
-                                        const uECC_word_t *right,
-                                        wordcount_t num_words) {
-    uECC_word_t diff = 0;
-    wordcount_t i;
-    for (i = num_words - 1; i >= 0; --i) {
-        diff |= (left[i] ^ right[i]);
-    }
-    return (diff == 0);
-}
-
-uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
-                                      const uECC_word_t *left,
-                                      const uECC_word_t *right,
-                                      wordcount_t num_words);
-
-/* Returns sign of left - right, in constant time. */
-uECC_VLI_API cmpresult_t uECC_vli_cmp(const uECC_word_t *left,
-                                      const uECC_word_t *right,
-                                      wordcount_t num_words) {
-    uECC_word_t tmp[uECC_MAX_WORDS];
-    uECC_word_t neg = !!uECC_vli_sub(tmp, left, right, num_words);
-    uECC_word_t equal = uECC_vli_isZero(tmp, num_words);
-    return (!equal - 2 * neg);
-}
-
-/* Computes vli = vli >> 1. */
-#if !asm_rshift1
-uECC_VLI_API void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words) {
-    uECC_word_t *end = vli;
-    uECC_word_t carry = 0;
-
-    vli += num_words;
-    while (vli-- > end) {
-        uECC_word_t temp = *vli;
-        *vli = (temp >> 1) | carry;
-        carry = temp << (uECC_WORD_BITS - 1);
-    }
-}
-#endif /* !asm_rshift1 */
-
-/* Computes result = left + right, returning carry. Can modify in place. */
-#if !asm_add
-uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result,
-                                      const uECC_word_t *left,
-                                      const uECC_word_t *right,
-                                      wordcount_t num_words) {
-    uECC_word_t carry = 0;
-    wordcount_t i;
-    for (i = 0; i < num_words; ++i) {
-        uECC_word_t sum = left[i] + right[i] + carry;
-        if (sum != left[i]) {
-            carry = (sum < left[i]);
-        }
-        result[i] = sum;
-    }
-    return carry;
-}
-#endif /* !asm_add */
-
-/* Computes result = left - right, returning borrow. Can modify in place. */
-#if !asm_sub
-uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result,
-                                      const uECC_word_t *left,
-                                      const uECC_word_t *right,
-                                      wordcount_t num_words) {
-    uECC_word_t borrow = 0;
-    wordcount_t i;
-    for (i = 0; i < num_words; ++i) {
-        uECC_word_t diff = left[i] - right[i] - borrow;
-        if (diff != left[i]) {
-            borrow = (diff > left[i]);
-        }
-        result[i] = diff;
-    }
-    return borrow;
-}
-#endif /* !asm_sub */
-
-#if !asm_mult || (uECC_SQUARE_FUNC && !asm_square) || \
-    (uECC_SUPPORTS_secp256k1 && (uECC_OPTIMIZATION_LEVEL > 0) && \
-        ((uECC_WORD_SIZE == 1) || (uECC_WORD_SIZE == 8)))
-static void muladd(uECC_word_t a,
-                   uECC_word_t b,
-                   uECC_word_t *r0,
-                   uECC_word_t *r1,
-                   uECC_word_t *r2) {
-#if uECC_WORD_SIZE == 8 && !SUPPORTS_INT128
-    uint64_t a0 = a & 0xffffffffull;
-    uint64_t a1 = a >> 32;
-    uint64_t b0 = b & 0xffffffffull;
-    uint64_t b1 = b >> 32;
-
-    uint64_t i0 = a0 * b0;
-    uint64_t i1 = a0 * b1;
-    uint64_t i2 = a1 * b0;
-    uint64_t i3 = a1 * b1;
-
-    uint64_t p0, p1;
-
-    i2 += (i0 >> 32);
-    i2 += i1;
-    if (i2 < i1) { /* overflow */
-        i3 += 0x100000000ull;
-    }
-
-    p0 = (i0 & 0xffffffffull) | (i2 << 32);
-    p1 = i3 + (i2 >> 32);
-
-    *r0 += p0;
-    *r1 += (p1 + (*r0 < p0));
-    *r2 += ((*r1 < p1) || (*r1 == p1 && *r0 < p0));
-#else
-    uECC_dword_t p = (uECC_dword_t)a * b;
-    uECC_dword_t r01 = ((uECC_dword_t)(*r1) << uECC_WORD_BITS) | *r0;
-    r01 += p;
-    *r2 += (r01 < p);
-    *r1 = r01 >> uECC_WORD_BITS;
-    *r0 = (uECC_word_t)r01;
-#endif
-}
-#endif /* muladd needed */
-
-#if !asm_mult
-uECC_VLI_API void uECC_vli_mult(uECC_word_t *result,
-                                const uECC_word_t *left,
-                                const uECC_word_t *right,
-                                wordcount_t num_words) {
-    uECC_word_t r0 = 0;
-    uECC_word_t r1 = 0;
-    uECC_word_t r2 = 0;
-    wordcount_t i, k;
-
-    /* Compute each digit of result in sequence, maintaining the carries. */
-    for (k = 0; k < num_words; ++k) {
-        for (i = 0; i <= k; ++i) {
-            muladd(left[i], right[k - i], &r0, &r1, &r2);
-        }
-        result[k] = r0;
-        r0 = r1;
-        r1 = r2;
-        r2 = 0;
-    }
-    for (k = num_words; k < num_words * 2 - 1; ++k) {
-        for (i = (k + 1) - num_words; i < num_words; ++i) {
-            muladd(left[i], right[k - i], &r0, &r1, &r2);
-        }
-        result[k] = r0;
-        r0 = r1;
-        r1 = r2;
-        r2 = 0;
-    }
-    result[num_words * 2 - 1] = r0;
-}
-#endif /* !asm_mult */
-
-#if uECC_SQUARE_FUNC
-
-#if !asm_square
-static void mul2add(uECC_word_t a,
-                    uECC_word_t b,
-                    uECC_word_t *r0,
-                    uECC_word_t *r1,
-                    uECC_word_t *r2) {
-#if uECC_WORD_SIZE == 8 && !SUPPORTS_INT128
-    uint64_t a0 = a & 0xffffffffull;
-    uint64_t a1 = a >> 32;
-    uint64_t b0 = b & 0xffffffffull;
-    uint64_t b1 = b >> 32;
-
-    uint64_t i0 = a0 * b0;
-    uint64_t i1 = a0 * b1;
-    uint64_t i2 = a1 * b0;
-    uint64_t i3 = a1 * b1;
-
-    uint64_t p0, p1;
-
-    i2 += (i0 >> 32);
-    i2 += i1;
-    if (i2 < i1)
-    { /* overflow */
-        i3 += 0x100000000ull;
-    }
-
-    p0 = (i0 & 0xffffffffull) | (i2 << 32);
-    p1 = i3 + (i2 >> 32);
-
-    *r2 += (p1 >> 63);
-    p1 = (p1 << 1) | (p0 >> 63);
-    p0 <<= 1;
-
-    *r0 += p0;
-    *r1 += (p1 + (*r0 < p0));
-    *r2 += ((*r1 < p1) || (*r1 == p1 && *r0 < p0));
-#else
-    uECC_dword_t p = (uECC_dword_t)a * b;
-    uECC_dword_t r01 = ((uECC_dword_t)(*r1) << uECC_WORD_BITS) | *r0;
-    *r2 += (p >> (uECC_WORD_BITS * 2 - 1));
-    p *= 2;
-    r01 += p;
-    *r2 += (r01 < p);
-    *r1 = r01 >> uECC_WORD_BITS;
-    *r0 = (uECC_word_t)r01;
-#endif
-}
-
-uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
-                                  const uECC_word_t *left,
-                                  wordcount_t num_words) {
-    uECC_word_t r0 = 0;
-    uECC_word_t r1 = 0;
-    uECC_word_t r2 = 0;
-
-    wordcount_t i, k;
-
-    for (k = 0; k < num_words * 2 - 1; ++k) {
-        uECC_word_t min = (k < num_words ? 0 : (k + 1) - num_words);
-        for (i = min; i <= k && i <= k - i; ++i) {
-            if (i < k-i) {
-                mul2add(left[i], left[k - i], &r0, &r1, &r2);
-            } else {
-                muladd(left[i], left[k - i], &r0, &r1, &r2);
-            }
-        }
-        result[k] = r0;
-        r0 = r1;
-        r1 = r2;
-        r2 = 0;
-    }
-
-    result[num_words * 2 - 1] = r0;
-}
-#endif /* !asm_square */
-
-#else /* uECC_SQUARE_FUNC */
-
-#if uECC_ENABLE_VLI_API
-uECC_VLI_API void uECC_vli_square(uECC_word_t *result,
-                                  const uECC_word_t *left,
-                                  wordcount_t num_words) {
-    uECC_vli_mult(result, left, left, num_words);
-}
-#endif /* uECC_ENABLE_VLI_API */
-
-#endif /* uECC_SQUARE_FUNC */
-
-/* Computes result = (left + right) % mod.
-   Assumes that left < mod and right < mod, and that result does not overlap mod. */
-uECC_VLI_API void uECC_vli_modAdd(uECC_word_t *result,
-                                  const uECC_word_t *left,
-                                  const uECC_word_t *right,
-                                  const uECC_word_t *mod,
-                                  wordcount_t num_words) {
-    uECC_word_t carry = uECC_vli_add(result, left, right, num_words);
-    if (carry || uECC_vli_cmp_unsafe(mod, result, num_words) != 1) {
-        /* result > mod (result = mod + remainder), so subtract mod to get remainder. */
-        uECC_vli_sub(result, result, mod, num_words);
-    }
-}
-
-/* Computes result = (left - right) % mod.
-   Assumes that left < mod and right < mod, and that result does not overlap mod. */
-uECC_VLI_API void uECC_vli_modSub(uECC_word_t *result,
-                                  const uECC_word_t *left,
-                                  const uECC_word_t *right,
-                                  const uECC_word_t *mod,
-                                  wordcount_t num_words) {
-    uECC_word_t l_borrow = uECC_vli_sub(result, left, right, num_words);
-    if (l_borrow) {
-        /* In this case, result == -diff == (max int) - diff. Since -x % d == d - x,
-           we can get the correct result from result + mod (with overflow). */
-        uECC_vli_add(result, result, mod, num_words);
-    }
-}
-
-/* Computes result = product % mod, where product is 2N words long. */
-/* Currently only designed to work for curve_p or curve_n. */
-uECC_VLI_API void uECC_vli_mmod(uECC_word_t *result,
-                                uECC_word_t *product,
-                                const uECC_word_t *mod,
-                                wordcount_t num_words) {
-    uECC_word_t mod_multiple[2 * uECC_MAX_WORDS];
-    uECC_word_t tmp[2 * uECC_MAX_WORDS];
-    uECC_word_t *v[2] = {tmp, product};
-    uECC_word_t index;
-
-    /* Shift mod so its highest set bit is at the maximum position. */
-    bitcount_t shift = (num_words * 2 * uECC_WORD_BITS) - uECC_vli_numBits(mod, num_words);
-    wordcount_t word_shift = shift / uECC_WORD_BITS;
-    wordcount_t bit_shift = shift % uECC_WORD_BITS;
-    uECC_word_t carry = 0;
-    uECC_vli_clear(mod_multiple, word_shift);
-    if (bit_shift > 0) {
-        for(index = 0; index < (uECC_word_t)num_words; ++index) {
-            mod_multiple[word_shift + index] = (mod[index] << bit_shift) | carry;
-            carry = mod[index] >> (uECC_WORD_BITS - bit_shift);
-        }
-    } else {
-        uECC_vli_set(mod_multiple + word_shift, mod, num_words);
-    }
-
-    for (index = 1; shift >= 0; --shift) {
-        uECC_word_t borrow = 0;
-        wordcount_t i;
-        for (i = 0; i < num_words * 2; ++i) {
-            uECC_word_t diff = v[index][i] - mod_multiple[i] - borrow;
-            if (diff != v[index][i]) {
-                borrow = (diff > v[index][i]);
-            }
-            v[1 - index][i] = diff;
-        }
-        index = !(index ^ borrow); /* Swap the index if there was no borrow */
-        uECC_vli_rshift1(mod_multiple, num_words);
-        mod_multiple[num_words - 1] |= mod_multiple[num_words] << (uECC_WORD_BITS - 1);
-        uECC_vli_rshift1(mod_multiple + num_words, num_words);
-    }
-    uECC_vli_set(result, v[index], num_words);
-}
-
-/* Computes result = (left * right) % mod. */
-uECC_VLI_API void uECC_vli_modMult(uECC_word_t *result,
-                                   const uECC_word_t *left,
-                                   const uECC_word_t *right,
-                                   const uECC_word_t *mod,
-                                   wordcount_t num_words) {
-    uECC_word_t product[2 * uECC_MAX_WORDS];
-    uECC_vli_mult(product, left, right, num_words);
-    uECC_vli_mmod(result, product, mod, num_words);
-}
-
-uECC_VLI_API void uECC_vli_modMult_fast(uECC_word_t *result,
-                                        const uECC_word_t *left,
-                                        const uECC_word_t *right,
-                                        uECC_Curve curve) {
-    uECC_word_t product[2 * uECC_MAX_WORDS];
-    uECC_vli_mult(product, left, right, curve->num_words);
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-    curve->mmod_fast(result, product);
-#else
-    uECC_vli_mmod(result, product, curve->p, curve->num_words);
-#endif
-}
-
-#if uECC_SQUARE_FUNC
-
-#if uECC_ENABLE_VLI_API
-/* Computes result = left^2 % mod. */
-uECC_VLI_API void uECC_vli_modSquare(uECC_word_t *result,
-                                     const uECC_word_t *left,
-                                     const uECC_word_t *mod,
-                                     wordcount_t num_words) {
-    uECC_word_t product[2 * uECC_MAX_WORDS];
-    uECC_vli_square(product, left, num_words);
-    uECC_vli_mmod(result, product, mod, num_words);
-}
-#endif /* uECC_ENABLE_VLI_API */
-
-uECC_VLI_API void uECC_vli_modSquare_fast(uECC_word_t *result,
-                                          const uECC_word_t *left,
-                                          uECC_Curve curve) {
-    uECC_word_t product[2 * uECC_MAX_WORDS];
-    uECC_vli_square(product, left, curve->num_words);
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-    curve->mmod_fast(result, product);
-#else
-    uECC_vli_mmod(result, product, curve->p, curve->num_words);
-#endif
-}
-
-#else /* uECC_SQUARE_FUNC */
-
-#if uECC_ENABLE_VLI_API
-uECC_VLI_API void uECC_vli_modSquare(uECC_word_t *result,
-                                     const uECC_word_t *left,
-                                     const uECC_word_t *mod,
-                                     wordcount_t num_words) {
-    uECC_vli_modMult(result, left, left, mod, num_words);
-}
-#endif /* uECC_ENABLE_VLI_API */
-
-uECC_VLI_API void uECC_vli_modSquare_fast(uECC_word_t *result,
-                                          const uECC_word_t *left,
-                                          uECC_Curve curve) {
-    uECC_vli_modMult_fast(result, left, left, curve);
-}
-
-#endif /* uECC_SQUARE_FUNC */
-
-#define EVEN(vli) (!(vli[0] & 1))
-static void vli_modInv_update(uECC_word_t *uv,
-                              const uECC_word_t *mod,
-                              wordcount_t num_words) {
-    uECC_word_t carry = 0;
-    if (!EVEN(uv)) {
-        carry = uECC_vli_add(uv, uv, mod, num_words);
-    }
-    uECC_vli_rshift1(uv, num_words);
-    if (carry) {
-        uv[num_words - 1] |= HIGH_BIT_SET;
-    }
-}
-
-/* Computes result = (1 / input) % mod. All VLIs are the same size.
-   See "From Euclid's GCD to Montgomery Multiplication to the Great Divide" */
-uECC_VLI_API void uECC_vli_modInv(uECC_word_t *result,
-                                  const uECC_word_t *input,
-                                  const uECC_word_t *mod,
-                                  wordcount_t num_words) {
-    uECC_word_t a[uECC_MAX_WORDS], b[uECC_MAX_WORDS], u[uECC_MAX_WORDS], v[uECC_MAX_WORDS];
-    cmpresult_t cmpResult;
-
-    if (uECC_vli_isZero(input, num_words)) {
-        uECC_vli_clear(result, num_words);
-        return;
-    }
-
-    uECC_vli_set(a, input, num_words);
-    uECC_vli_set(b, mod, num_words);
-    uECC_vli_clear(u, num_words);
-    u[0] = 1;
-    uECC_vli_clear(v, num_words);
-    while ((cmpResult = uECC_vli_cmp_unsafe(a, b, num_words)) != 0) {
-        if (EVEN(a)) {
-            uECC_vli_rshift1(a, num_words);
-            vli_modInv_update(u, mod, num_words);
-        } else if (EVEN(b)) {
-            uECC_vli_rshift1(b, num_words);
-            vli_modInv_update(v, mod, num_words);
-        } else if (cmpResult > 0) {
-            uECC_vli_sub(a, a, b, num_words);
-            uECC_vli_rshift1(a, num_words);
-            if (uECC_vli_cmp_unsafe(u, v, num_words) < 0) {
-                uECC_vli_add(u, u, mod, num_words);
-            }
-            uECC_vli_sub(u, u, v, num_words);
-            vli_modInv_update(u, mod, num_words);
-        } else {
-            uECC_vli_sub(b, b, a, num_words);
-            uECC_vli_rshift1(b, num_words);
-            if (uECC_vli_cmp_unsafe(v, u, num_words) < 0) {
-                uECC_vli_add(v, v, mod, num_words);
-            }
-            uECC_vli_sub(v, v, u, num_words);
-            vli_modInv_update(v, mod, num_words);
-        }
-    }
-    uECC_vli_set(result, u, num_words);
-}
-
-/* ------ Point operations ------ */
-
-#include "curve-specific.inc"
-
-/* Returns 1 if 'point' is the point at infinity, 0 otherwise. */
-#define EccPoint_isZero(point, curve) uECC_vli_isZero((point), (curve)->num_words * 2)
-
-/* Point multiplication algorithm using Montgomery's ladder with co-Z coordinates.
-From http://eprint.iacr.org/2011/338.pdf
-*/
-
-/* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */
-static void apply_z(uECC_word_t * X1,
-                    uECC_word_t * Y1,
-                    const uECC_word_t * const Z,
-                    uECC_Curve curve) {
-    uECC_word_t t1[uECC_MAX_WORDS];
-
-    uECC_vli_modSquare_fast(t1, Z, curve);    /* z^2 */
-    uECC_vli_modMult_fast(X1, X1, t1, curve); /* x1 * z^2 */
-    uECC_vli_modMult_fast(t1, t1, Z, curve);  /* z^3 */
-    uECC_vli_modMult_fast(Y1, Y1, t1, curve); /* y1 * z^3 */
-}
-
-/* P = (x1, y1) => 2P, (x2, y2) => P' */
-static void XYcZ_initial_double(uECC_word_t * X1,
-                                uECC_word_t * Y1,
-                                uECC_word_t * X2,
-                                uECC_word_t * Y2,
-                                const uECC_word_t * const initial_Z,
-                                uECC_Curve curve) {
-    uECC_word_t z[uECC_MAX_WORDS];
-    wordcount_t num_words = curve->num_words;
-    if (initial_Z) {
-        uECC_vli_set(z, initial_Z, num_words);
-    } else {
-        uECC_vli_clear(z, num_words);
-        z[0] = 1;
-    }
-
-    uECC_vli_set(X2, X1, num_words);
-    uECC_vli_set(Y2, Y1, num_words);
-
-    apply_z(X1, Y1, z, curve);
-    curve->double_jacobian(X1, Y1, z, curve);
-    apply_z(X2, Y2, z, curve);
-}
-
-/* Input P = (x1, y1, Z), Q = (x2, y2, Z)
-   Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3)
-   or P => P', Q => P + Q
-*/
-static void XYcZ_add(uECC_word_t * X1,
-                     uECC_word_t * Y1,
-                     uECC_word_t * X2,
-                     uECC_word_t * Y2,
-                     uECC_Curve curve) {
-    /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
-    uECC_word_t t5[uECC_MAX_WORDS];
-    wordcount_t num_words = curve->num_words;
-
-    uECC_vli_modSub(t5, X2, X1, curve->p, num_words); /* t5 = x2 - x1 */
-    uECC_vli_modSquare_fast(t5, t5, curve);                  /* t5 = (x2 - x1)^2 = A */
-    uECC_vli_modMult_fast(X1, X1, t5, curve);                /* t1 = x1*A = B */
-    uECC_vli_modMult_fast(X2, X2, t5, curve);                /* t3 = x2*A = C */
-    uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = y2 - y1 */
-    uECC_vli_modSquare_fast(t5, Y2, curve);                  /* t5 = (y2 - y1)^2 = D */
-
-    uECC_vli_modSub(t5, t5, X1, curve->p, num_words); /* t5 = D - B */
-    uECC_vli_modSub(t5, t5, X2, curve->p, num_words); /* t5 = D - B - C = x3 */
-    uECC_vli_modSub(X2, X2, X1, curve->p, num_words); /* t3 = C - B */
-    uECC_vli_modMult_fast(Y1, Y1, X2, curve);                /* t2 = y1*(C - B) */
-    uECC_vli_modSub(X2, X1, t5, curve->p, num_words); /* t3 = B - x3 */
-    uECC_vli_modMult_fast(Y2, Y2, X2, curve);                /* t4 = (y2 - y1)*(B - x3) */
-    uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = y3 */
-
-    uECC_vli_set(X2, t5, num_words);
-}
-
-/* Input P = (x1, y1, Z), Q = (x2, y2, Z)
-   Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3)
-   or P => P - Q, Q => P + Q
-*/
-static void XYcZ_addC(uECC_word_t * X1,
-                      uECC_word_t * Y1,
-                      uECC_word_t * X2,
-                      uECC_word_t * Y2,
-                      uECC_Curve curve) {
-    /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
-    uECC_word_t t5[uECC_MAX_WORDS];
-    uECC_word_t t6[uECC_MAX_WORDS];
-    uECC_word_t t7[uECC_MAX_WORDS];
-    wordcount_t num_words = curve->num_words;
-
-    uECC_vli_modSub(t5, X2, X1, curve->p, num_words); /* t5 = x2 - x1 */
-    uECC_vli_modSquare_fast(t5, t5, curve);                  /* t5 = (x2 - x1)^2 = A */
-    uECC_vli_modMult_fast(X1, X1, t5, curve);                /* t1 = x1*A = B */
-    uECC_vli_modMult_fast(X2, X2, t5, curve);                /* t3 = x2*A = C */
-    uECC_vli_modAdd(t5, Y2, Y1, curve->p, num_words); /* t5 = y2 + y1 */
-    uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = y2 - y1 */
-
-    uECC_vli_modSub(t6, X2, X1, curve->p, num_words); /* t6 = C - B */
-    uECC_vli_modMult_fast(Y1, Y1, t6, curve);                /* t2 = y1 * (C - B) = E */
-    uECC_vli_modAdd(t6, X1, X2, curve->p, num_words); /* t6 = B + C */
-    uECC_vli_modSquare_fast(X2, Y2, curve);                  /* t3 = (y2 - y1)^2 = D */
-    uECC_vli_modSub(X2, X2, t6, curve->p, num_words); /* t3 = D - (B + C) = x3 */
-
-    uECC_vli_modSub(t7, X1, X2, curve->p, num_words); /* t7 = B - x3 */
-    uECC_vli_modMult_fast(Y2, Y2, t7, curve);                /* t4 = (y2 - y1)*(B - x3) */
-    uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = (y2 - y1)*(B - x3) - E = y3 */
-
-    uECC_vli_modSquare_fast(t7, t5, curve);                  /* t7 = (y2 + y1)^2 = F */
-    uECC_vli_modSub(t7, t7, t6, curve->p, num_words); /* t7 = F - (B + C) = x3' */
-    uECC_vli_modSub(t6, t7, X1, curve->p, num_words); /* t6 = x3' - B */
-    uECC_vli_modMult_fast(t6, t6, t5, curve);                /* t6 = (y2+y1)*(x3' - B) */
-    uECC_vli_modSub(Y1, t6, Y1, curve->p, num_words); /* t2 = (y2+y1)*(x3' - B) - E = y3' */
-
-    uECC_vli_set(X1, t7, num_words);
-}
-
-/* result may overlap point. */
-static void EccPoint_mult(uECC_word_t * result,
-                          const uECC_word_t * point,
-                          const uECC_word_t * scalar,
-                          const uECC_word_t * initial_Z,
-                          bitcount_t num_bits,
-                          uECC_Curve curve) {
-    /* R0 and R1 */
-    uECC_word_t Rx[2][uECC_MAX_WORDS];
-    uECC_word_t Ry[2][uECC_MAX_WORDS];
-    uECC_word_t z[uECC_MAX_WORDS];
-    bitcount_t i;
-    uECC_word_t nb;
-    wordcount_t num_words = curve->num_words;
-
-    uECC_vli_set(Rx[1], point, num_words);
-    uECC_vli_set(Ry[1], point + num_words, num_words);
-
-    XYcZ_initial_double(Rx[1], Ry[1], Rx[0], Ry[0], initial_Z, curve);
-
-    for (i = num_bits - 2; i > 0; --i) {
-        nb = !uECC_vli_testBit(scalar, i);
-        XYcZ_addC(Rx[1 - nb], Ry[1 - nb], Rx[nb], Ry[nb], curve);
-        XYcZ_add(Rx[nb], Ry[nb], Rx[1 - nb], Ry[1 - nb], curve);
-    }
-
-    nb = !uECC_vli_testBit(scalar, 0);
-    XYcZ_addC(Rx[1 - nb], Ry[1 - nb], Rx[nb], Ry[nb], curve);
-
-    /* Find final 1/Z value. */
-    uECC_vli_modSub(z, Rx[1], Rx[0], curve->p, num_words); /* X1 - X0 */
-    uECC_vli_modMult_fast(z, z, Ry[1 - nb], curve);               /* Yb * (X1 - X0) */
-    uECC_vli_modMult_fast(z, z, point, curve);                    /* xP * Yb * (X1 - X0) */
-    uECC_vli_modInv(z, z, curve->p, num_words);            /* 1 / (xP * Yb * (X1 - X0)) */
-    /* yP / (xP * Yb * (X1 - X0)) */
-    uECC_vli_modMult_fast(z, z, point + num_words, curve);
-    uECC_vli_modMult_fast(z, z, Rx[1 - nb], curve); /* Xb * yP / (xP * Yb * (X1 - X0)) */
-    /* End 1/Z calculation */
-
-    XYcZ_add(Rx[nb], Ry[nb], Rx[1 - nb], Ry[1 - nb], curve);
-    apply_z(Rx[0], Ry[0], z, curve);
-
-    uECC_vli_set(result, Rx[0], num_words);
-    uECC_vli_set(result + num_words, Ry[0], num_words);
-}
-
-static uECC_word_t regularize_k(const uECC_word_t * const k,
-                                uECC_word_t *k0,
-                                uECC_word_t *k1,
-                                uECC_Curve curve) {
-    wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits);
-    bitcount_t num_n_bits = curve->num_n_bits;
-    uECC_word_t carry = uECC_vli_add(k0, k, curve->n, num_n_words) ||
-        (num_n_bits < ((bitcount_t)num_n_words * uECC_WORD_SIZE * 8) &&
-         uECC_vli_testBit(k0, num_n_bits));
-    uECC_vli_add(k1, k0, curve->n, num_n_words);
-    return carry;
-}
-
-static uECC_word_t EccPoint_compute_public_key(uECC_word_t *result,
-                                               uECC_word_t *private_key,
-                                               uECC_Curve curve) {
-    uECC_word_t tmp1[uECC_MAX_WORDS];
-    uECC_word_t tmp2[uECC_MAX_WORDS];
-    uECC_word_t *p2[2] = {tmp1, tmp2};
-    uECC_word_t carry;
-
-    /* Regularize the bitcount for the private key so that attackers cannot use a side channel
-       attack to learn the number of leading zeros. */
-    carry = regularize_k(private_key, tmp1, tmp2, curve);
-
-    EccPoint_mult(result, curve->G, p2[!carry], 0, curve->num_n_bits + 1, curve);
-
-    if (EccPoint_isZero(result, curve)) {
-        return 0;
-    }
-    return 1;
-}
-
-#if uECC_WORD_SIZE == 1
-
-uECC_VLI_API void uECC_vli_nativeToBytes(uint8_t *bytes,
-                                         int num_bytes,
-                                         const uint8_t *native) {
-    wordcount_t i;
-    for (i = 0; i < num_bytes; ++i) {
-        bytes[i] = native[(num_bytes - 1) - i];
-    }
-}
-
-uECC_VLI_API void uECC_vli_bytesToNative(uint8_t *native,
-                                         const uint8_t *bytes,
-                                         int num_bytes) {
-    uECC_vli_nativeToBytes(native, num_bytes, bytes);
-}
-
-#else
-
-uECC_VLI_API void uECC_vli_nativeToBytes(uint8_t *bytes,
-                                         int num_bytes,
-                                         const uECC_word_t *native) {
-    wordcount_t i;
-    for (i = 0; i < num_bytes; ++i) {
-        unsigned b = num_bytes - 1 - i;
-        bytes[i] = native[b / uECC_WORD_SIZE] >> (8 * (b % uECC_WORD_SIZE));
-    }
-}
-
-uECC_VLI_API void uECC_vli_bytesToNative(uECC_word_t *native,
-                                         const uint8_t *bytes,
-                                         int num_bytes) {
-    wordcount_t i;
-    uECC_vli_clear(native, (num_bytes + (uECC_WORD_SIZE - 1)) / uECC_WORD_SIZE);
-    for (i = 0; i < num_bytes; ++i) {
-        unsigned b = num_bytes - 1 - i;
-        native[b / uECC_WORD_SIZE] |=
-            (uECC_word_t)bytes[i] << (8 * (b % uECC_WORD_SIZE));
-    }
-}
-
-#endif /* uECC_WORD_SIZE */
-
-/* Generates a random integer in the range 0 < random < top.
-   Both random and top have num_words words. */
-uECC_VLI_API int uECC_generate_random_int(uECC_word_t *random,
-                                          const uECC_word_t *top,
-                                          wordcount_t num_words) {
-    uECC_word_t mask = (uECC_word_t)-1;
-    uECC_word_t tries;
-    bitcount_t num_bits = uECC_vli_numBits(top, num_words);
-
-    if (!g_rng_function) {
-        return 0;
-    }
-
-    for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) {
-        if (!g_rng_function((uint8_t *)random, num_words * uECC_WORD_SIZE)) {
-            return 0;
-	    }
-        random[num_words - 1] &= mask >> ((bitcount_t)(num_words * uECC_WORD_SIZE * 8 - num_bits));
-        if (!uECC_vli_isZero(random, num_words) &&
-		        uECC_vli_cmp(top, random, num_words) == 1) {
-            return 1;
-        }
-    }
-    return 0;
-}
-
-int uECC_make_key(uint8_t *public_key,
-                  uint8_t *private_key,
-                  uECC_Curve curve) {
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    uECC_word_t *_private = (uECC_word_t *)private_key;
-    uECC_word_t *_public = (uECC_word_t *)public_key;
-#else
-    uECC_word_t _private[uECC_MAX_WORDS];
-    uECC_word_t _public[uECC_MAX_WORDS * 2];
-#endif
-    uECC_word_t tries;
-
-    for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) {
-        if (!uECC_generate_random_int(_private, curve->n, BITS_TO_WORDS(curve->num_n_bits))) {
-            return 0;
-        }
-
-        if (EccPoint_compute_public_key(_public, _private, curve)) {
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0
-            uECC_vli_nativeToBytes(private_key, BITS_TO_BYTES(curve->num_n_bits), _private);
-            uECC_vli_nativeToBytes(public_key, curve->num_bytes, _public);
-            uECC_vli_nativeToBytes(
-                public_key + curve->num_bytes, curve->num_bytes, _public + curve->num_words);
-#endif
-            return 1;
-        }
-    }
-    return 0;
-}
-
-int uECC_shared_secret(const uint8_t *public_key,
-                       const uint8_t *private_key,
-                       uint8_t *secret,
-                       uECC_Curve curve) {
-    uECC_word_t _public[uECC_MAX_WORDS * 2];
-    uECC_word_t _private[uECC_MAX_WORDS];
-
-    uECC_word_t tmp[uECC_MAX_WORDS];
-    uECC_word_t *p2[2] = {_private, tmp};
-    uECC_word_t *initial_Z = 0;
-    uECC_word_t carry;
-    wordcount_t num_words = curve->num_words;
-    wordcount_t num_bytes = curve->num_bytes;
-
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    bcopy((uint8_t *) _private, private_key, num_bytes);
-    bcopy((uint8_t *) _public, public_key, num_bytes*2);
-#else
-    uECC_vli_bytesToNative(_private, private_key, BITS_TO_BYTES(curve->num_n_bits));
-    uECC_vli_bytesToNative(_public, public_key, num_bytes);
-    uECC_vli_bytesToNative(_public + num_words, public_key + num_bytes, num_bytes);
-#endif
-
-    /* Regularize the bitcount for the private key so that attackers cannot use a side channel
-       attack to learn the number of leading zeros. */
-    carry = regularize_k(_private, _private, tmp, curve);
-
-    /* If an RNG function was specified, try to get a random initial Z value to improve
-       protection against side-channel attacks. */
-    if (g_rng_function) {
-        if (!uECC_generate_random_int(p2[carry], curve->p, num_words)) {
-            return 0;
-        }
-        initial_Z = p2[carry];
-    }
-
-    EccPoint_mult(_public, _public, p2[!carry], initial_Z, curve->num_n_bits + 1, curve);
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    bcopy((uint8_t *) secret, (uint8_t *) _public, num_bytes);
-#else
-    uECC_vli_nativeToBytes(secret, num_bytes, _public);
-#endif
-    return !EccPoint_isZero(_public, curve);
-}
-
-#if uECC_SUPPORT_COMPRESSED_POINT
-void uECC_compress(const uint8_t *public_key, uint8_t *compressed, uECC_Curve curve) {
-    wordcount_t i;
-    for (i = 0; i < curve->num_bytes; ++i) {
-        compressed[i+1] = public_key[i];
-    }
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    compressed[0] = 2 + (public_key[curve->num_bytes] & 0x01);
-#else
-    compressed[0] = 2 + (public_key[curve->num_bytes * 2 - 1] & 0x01);
-#endif
-}
-
-void uECC_decompress(const uint8_t *compressed, uint8_t *public_key, uECC_Curve curve) {
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    uECC_word_t *point = (uECC_word_t *)public_key;
-#else
-    uECC_word_t point[uECC_MAX_WORDS * 2];
-#endif
-    uECC_word_t *y = point + curve->num_words;
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    bcopy(public_key, compressed+1, curve->num_bytes);
-#else
-    uECC_vli_bytesToNative(point, compressed + 1, curve->num_bytes);
-#endif
-    curve->x_side(y, point, curve);
-    curve->mod_sqrt(y, curve);
-
-    if ((y[0] & 0x01) != (compressed[0] & 0x01)) {
-        uECC_vli_sub(y, curve->p, y, curve->num_words);
-    }
-
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0
-    uECC_vli_nativeToBytes(public_key, curve->num_bytes, point);
-    uECC_vli_nativeToBytes(public_key + curve->num_bytes, curve->num_bytes, y);
-#endif
-}
-#endif /* uECC_SUPPORT_COMPRESSED_POINT */
-
-int uECC_valid_point(const uECC_word_t *point, uECC_Curve curve) {
-    uECC_word_t tmp1[uECC_MAX_WORDS];
-    uECC_word_t tmp2[uECC_MAX_WORDS];
-    wordcount_t num_words = curve->num_words;
-
-    /* The point at infinity is invalid. */
-    if (EccPoint_isZero(point, curve)) {
-        return 0;
-    }
-
-    /* x and y must be smaller than p. */
-    if (uECC_vli_cmp_unsafe(curve->p, point, num_words) != 1 ||
-            uECC_vli_cmp_unsafe(curve->p, point + num_words, num_words) != 1) {
-        return 0;
-    }
-
-    uECC_vli_modSquare_fast(tmp1, point + num_words, curve);
-    curve->x_side(tmp2, point, curve); /* tmp2 = x^3 + ax + b */
-
-    /* Make sure that y^2 == x^3 + ax + b */
-    return (int)(uECC_vli_equal(tmp1, tmp2, num_words));
-}
-
-int uECC_valid_public_key(const uint8_t *public_key, uECC_Curve curve) {
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    uECC_word_t *_public = (uECC_word_t *)public_key;
-#else
-    uECC_word_t _public[uECC_MAX_WORDS * 2];
-#endif
-
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0
-    uECC_vli_bytesToNative(_public, public_key, curve->num_bytes);
-    uECC_vli_bytesToNative(
-        _public + curve->num_words, public_key + curve->num_bytes, curve->num_bytes);
-#endif
-    return uECC_valid_point(_public, curve);
-}
-
-int uECC_compute_public_key(const uint8_t *private_key, uint8_t *public_key, uECC_Curve curve) {
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    uECC_word_t *_private = (uECC_word_t *)private_key;
-    uECC_word_t *_public = (uECC_word_t *)public_key;
-#else
-    uECC_word_t _private[uECC_MAX_WORDS];
-    uECC_word_t _public[uECC_MAX_WORDS * 2];
-#endif
-
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0
-    uECC_vli_bytesToNative(_private, private_key, BITS_TO_BYTES(curve->num_n_bits));
-#endif
-
-    /* Make sure the private key is in the range [1, n-1]. */
-    if (uECC_vli_isZero(_private, BITS_TO_WORDS(curve->num_n_bits))) {
-        return 0;
-    }
-
-    if (uECC_vli_cmp(curve->n, _private, BITS_TO_WORDS(curve->num_n_bits)) != 1) {
-        return 0;
-    }
-
-    /* Compute public key. */
-    if (!EccPoint_compute_public_key(_public, _private, curve)) {
-        return 0;
-    }
-
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0
-    uECC_vli_nativeToBytes(public_key, curve->num_bytes, _public);
-    uECC_vli_nativeToBytes(
-        public_key + curve->num_bytes, curve->num_bytes, _public + curve->num_words);
-#endif
-    return 1;
-}
-
-
-/* -------- ECDSA code -------- */
-
-static void bits2int(uECC_word_t *native,
-                     const uint8_t *bits,
-                     unsigned bits_size,
-                     uECC_Curve curve) {
-    unsigned num_n_bytes = BITS_TO_BYTES(curve->num_n_bits);
-    unsigned num_n_words = BITS_TO_WORDS(curve->num_n_bits);
-    int shift;
-    uECC_word_t carry;
-    uECC_word_t *ptr;
-
-    if (bits_size > num_n_bytes) {
-        bits_size = num_n_bytes;
-    }
-
-    uECC_vli_clear(native, num_n_words);
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    bcopy((uint8_t *) native, bits, bits_size);
-#else
-    uECC_vli_bytesToNative(native, bits, bits_size);
-#endif    
-    if (bits_size * 8 <= (unsigned)curve->num_n_bits) {
-        return;
-    }
-    shift = bits_size * 8 - curve->num_n_bits;
-    carry = 0;
-    ptr = native + num_n_words;
-    while (ptr-- > native) {
-        uECC_word_t temp = *ptr;
-        *ptr = (temp >> shift) | carry;
-        carry = temp << (uECC_WORD_BITS - shift);
-    }
-
-    /* Reduce mod curve_n */
-    if (uECC_vli_cmp_unsafe(curve->n, native, num_n_words) != 1) {
-        uECC_vli_sub(native, native, curve->n, num_n_words);
-    }
-}
-
-static int uECC_sign_with_k(const uint8_t *private_key,
-                            const uint8_t *message_hash,
-                            unsigned hash_size,
-                            uECC_word_t *k,
-                            uint8_t *signature,
-                            uECC_Curve curve) {
-
-    uECC_word_t tmp[uECC_MAX_WORDS];
-    uECC_word_t s[uECC_MAX_WORDS];
-    uECC_word_t *k2[2] = {tmp, s};
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    uECC_word_t *p = (uECC_word_t *)signature;
-#else
-    uECC_word_t p[uECC_MAX_WORDS * 2];
-#endif
-    uECC_word_t carry;
-    wordcount_t num_words = curve->num_words;
-    wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits);
-    bitcount_t num_n_bits = curve->num_n_bits;
-
-    /* Make sure 0 < k < curve_n */
-    if (uECC_vli_isZero(k, num_words) || uECC_vli_cmp(curve->n, k, num_n_words) != 1) {
-        return 0;
-    }
-
-    carry = regularize_k(k, tmp, s, curve);
-    EccPoint_mult(p, curve->G, k2[!carry], 0, num_n_bits + 1, curve);
-    if (uECC_vli_isZero(p, num_words)) {
-        return 0;
-    }
-
-    /* If an RNG function was specified, get a random number
-       to prevent side channel analysis of k. */
-    if (!g_rng_function) {
-        uECC_vli_clear(tmp, num_n_words);
-        tmp[0] = 1;
-    } else if (!uECC_generate_random_int(tmp, curve->n, num_n_words)) {
-        return 0;
-    }
-
-    /* Prevent side channel analysis of uECC_vli_modInv() to determine
-       bits of k / the private key by premultiplying by a random number */
-    uECC_vli_modMult(k, k, tmp, curve->n, num_n_words); /* k' = rand * k */
-    uECC_vli_modInv(k, k, curve->n, num_n_words);       /* k = 1 / k' */
-    uECC_vli_modMult(k, k, tmp, curve->n, num_n_words); /* k = 1 / k */
-
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0
-    uECC_vli_nativeToBytes(signature, curve->num_bytes, p); /* store r */
-#endif
-
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    bcopy((uint8_t *) tmp, private_key, BITS_TO_BYTES(curve->num_n_bits));
-#else
-    uECC_vli_bytesToNative(tmp, private_key, BITS_TO_BYTES(curve->num_n_bits)); /* tmp = d */
-#endif
-
-    s[num_n_words - 1] = 0;
-    uECC_vli_set(s, p, num_words);
-    uECC_vli_modMult(s, tmp, s, curve->n, num_n_words); /* s = r*d */
-
-    bits2int(tmp, message_hash, hash_size, curve);
-    uECC_vli_modAdd(s, tmp, s, curve->n, num_n_words); /* s = e + r*d */
-    uECC_vli_modMult(s, s, k, curve->n, num_n_words);  /* s = (e + r*d) / k */
-    if (uECC_vli_numBits(s, num_n_words) > (bitcount_t)curve->num_bytes * 8) {
-        return 0;
-    }
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    bcopy((uint8_t *) signature + curve->num_bytes, (uint8_t *) s, curve->num_bytes);
-#else
-    uECC_vli_nativeToBytes(signature + curve->num_bytes, curve->num_bytes, s);
-#endif    
-    return 1;
-}
-
-int uECC_sign(const uint8_t *private_key,
-              const uint8_t *message_hash,
-              unsigned hash_size,
-              uint8_t *signature,
-              uECC_Curve curve) {
-    uECC_word_t k[uECC_MAX_WORDS];
-    uECC_word_t tries;
-
-    for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) {
-        if (!uECC_generate_random_int(k, curve->n, BITS_TO_WORDS(curve->num_n_bits))) {
-            return 0;
-        }
-
-        if (uECC_sign_with_k(private_key, message_hash, hash_size, k, signature, curve)) {
-            return 1;
-        }
-    }
-    return 0;
-}
-
-/* Compute an HMAC using K as a key (as in RFC 6979). Note that K is always
-   the same size as the hash result size. */
-static void HMAC_init(const uECC_HashContext *hash_context, const uint8_t *K) {
-    uint8_t *pad = hash_context->tmp + 2 * hash_context->result_size;
-    unsigned i;
-    for (i = 0; i < hash_context->result_size; ++i)
-        pad[i] = K[i] ^ 0x36;
-    for (; i < hash_context->block_size; ++i)
-        pad[i] = 0x36;
-
-    hash_context->init_hash(hash_context);
-    hash_context->update_hash(hash_context, pad, hash_context->block_size);
-}
-
-static void HMAC_update(const uECC_HashContext *hash_context,
-                        const uint8_t *message,
-                        unsigned message_size) {
-    hash_context->update_hash(hash_context, message, message_size);
-}
-
-static void HMAC_finish(const uECC_HashContext *hash_context,
-                        const uint8_t *K,
-                        uint8_t *result) {
-    uint8_t *pad = hash_context->tmp + 2 * hash_context->result_size;
-    unsigned i;
-    for (i = 0; i < hash_context->result_size; ++i)
-        pad[i] = K[i] ^ 0x5c;
-    for (; i < hash_context->block_size; ++i)
-        pad[i] = 0x5c;
-
-    hash_context->finish_hash(hash_context, result);
-
-    hash_context->init_hash(hash_context);
-    hash_context->update_hash(hash_context, pad, hash_context->block_size);
-    hash_context->update_hash(hash_context, result, hash_context->result_size);
-    hash_context->finish_hash(hash_context, result);
-}
-
-/* V = HMAC_K(V) */
-static void update_V(const uECC_HashContext *hash_context, uint8_t *K, uint8_t *V) {
-    HMAC_init(hash_context, K);
-    HMAC_update(hash_context, V, hash_context->result_size);
-    HMAC_finish(hash_context, K, V);
-}
-
-/* Deterministic signing, similar to RFC 6979. Differences are:
-    * We just use H(m) directly rather than bits2octets(H(m))
-      (it is not reduced modulo curve_n).
-    * We generate a value for k (aka T) directly rather than converting endianness.
-
-   Layout of hash_context->tmp: <K> | <V> | (1 byte overlapped 0x00 or 0x01) / <HMAC pad> */
-int uECC_sign_deterministic(const uint8_t *private_key,
-                            const uint8_t *message_hash,
-                            unsigned hash_size,
-                            const uECC_HashContext *hash_context,
-                            uint8_t *signature,
-                            uECC_Curve curve) {
-    uint8_t *K = hash_context->tmp;
-    uint8_t *V = K + hash_context->result_size;
-    wordcount_t num_bytes = curve->num_bytes;
-    wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits);
-    bitcount_t num_n_bits = curve->num_n_bits;
-    uECC_word_t tries;
-    unsigned i;
-    for (i = 0; i < hash_context->result_size; ++i) {
-        V[i] = 0x01;
-        K[i] = 0;
-    }
-
-    /* K = HMAC_K(V || 0x00 || int2octets(x) || h(m)) */
-    HMAC_init(hash_context, K);
-    V[hash_context->result_size] = 0x00;
-    HMAC_update(hash_context, V, hash_context->result_size + 1);
-    HMAC_update(hash_context, private_key, num_bytes);
-    HMAC_update(hash_context, message_hash, hash_size);
-    HMAC_finish(hash_context, K, K);
-
-    update_V(hash_context, K, V);
-
-    /* K = HMAC_K(V || 0x01 || int2octets(x) || h(m)) */
-    HMAC_init(hash_context, K);
-    V[hash_context->result_size] = 0x01;
-    HMAC_update(hash_context, V, hash_context->result_size + 1);
-    HMAC_update(hash_context, private_key, num_bytes);
-    HMAC_update(hash_context, message_hash, hash_size);
-    HMAC_finish(hash_context, K, K);
-
-    update_V(hash_context, K, V);
-
-    for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) {
-        uECC_word_t T[uECC_MAX_WORDS];
-        uint8_t *T_ptr = (uint8_t *)T;
-        wordcount_t T_bytes = 0;
-        for (;;) {
-            update_V(hash_context, K, V);
-            for (i = 0; i < hash_context->result_size; ++i) {
-                T_ptr[T_bytes++] = V[i];
-                if (T_bytes >= num_n_words * uECC_WORD_SIZE) {
-                    goto filled;
-                }
-            }
-        }
-    filled:
-        if ((bitcount_t)num_n_words * uECC_WORD_SIZE * 8 > num_n_bits) {
-            uECC_word_t mask = (uECC_word_t)-1;
-            T[num_n_words - 1] &=
-                mask >> ((bitcount_t)(num_n_words * uECC_WORD_SIZE * 8 - num_n_bits));
-        }
-
-        if (uECC_sign_with_k(private_key, message_hash, hash_size, T, signature, curve)) {
-            return 1;
-        }
-
-        /* K = HMAC_K(V || 0x00) */
-        HMAC_init(hash_context, K);
-        V[hash_context->result_size] = 0x00;
-        HMAC_update(hash_context, V, hash_context->result_size + 1);
-        HMAC_finish(hash_context, K, K);
-
-        update_V(hash_context, K, V);
-    }
-    return 0;
-}
-
-static bitcount_t smax(bitcount_t a, bitcount_t b) {
-    return (a > b ? a : b);
-}
-
-int uECC_verify(const uint8_t *public_key,
-                const uint8_t *message_hash,
-                unsigned hash_size,
-                const uint8_t *signature,
-                uECC_Curve curve) {
-    uECC_word_t u1[uECC_MAX_WORDS], u2[uECC_MAX_WORDS];
-    uECC_word_t z[uECC_MAX_WORDS];
-    uECC_word_t sum[uECC_MAX_WORDS * 2];
-    uECC_word_t rx[uECC_MAX_WORDS];
-    uECC_word_t ry[uECC_MAX_WORDS];
-    uECC_word_t tx[uECC_MAX_WORDS];
-    uECC_word_t ty[uECC_MAX_WORDS];
-    uECC_word_t tz[uECC_MAX_WORDS];
-    const uECC_word_t *points[4];
-    const uECC_word_t *point;
-    bitcount_t num_bits;
-    bitcount_t i;
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    uECC_word_t *_public = (uECC_word_t *)public_key;
-#else
-    uECC_word_t _public[uECC_MAX_WORDS * 2];
-#endif    
-    uECC_word_t r[uECC_MAX_WORDS], s[uECC_MAX_WORDS];
-    wordcount_t num_words = curve->num_words;
-    wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits);
-
-    rx[num_n_words - 1] = 0;
-    r[num_n_words - 1] = 0;
-    s[num_n_words - 1] = 0;
-
-#if uECC_VLI_NATIVE_LITTLE_ENDIAN
-    bcopy((uint8_t *) r, signature, curve->num_bytes);
-    bcopy((uint8_t *) s, signature + curve->num_bytes, curve->num_bytes);
-#else
-    uECC_vli_bytesToNative(_public, public_key, curve->num_bytes);
-    uECC_vli_bytesToNative(
-        _public + num_words, public_key + curve->num_bytes, curve->num_bytes);
-    uECC_vli_bytesToNative(r, signature, curve->num_bytes);
-    uECC_vli_bytesToNative(s, signature + curve->num_bytes, curve->num_bytes);
-#endif
-
-    /* r, s must not be 0. */
-    if (uECC_vli_isZero(r, num_words) || uECC_vli_isZero(s, num_words)) {
-        return 0;
-    }
-
-    /* r, s must be < n. */
-    if (uECC_vli_cmp_unsafe(curve->n, r, num_n_words) != 1 ||
-            uECC_vli_cmp_unsafe(curve->n, s, num_n_words) != 1) {
-        return 0;
-    }
-
-    /* Calculate u1 and u2. */
-    uECC_vli_modInv(z, s, curve->n, num_n_words); /* z = 1/s */
-    u1[num_n_words - 1] = 0;
-    bits2int(u1, message_hash, hash_size, curve);
-    uECC_vli_modMult(u1, u1, z, curve->n, num_n_words); /* u1 = e/s */
-    uECC_vli_modMult(u2, r, z, curve->n, num_n_words); /* u2 = r/s */
-
-    /* Calculate sum = G + Q. */
-    uECC_vli_set(sum, _public, num_words);
-    uECC_vli_set(sum + num_words, _public + num_words, num_words);
-    uECC_vli_set(tx, curve->G, num_words);
-    uECC_vli_set(ty, curve->G + num_words, num_words);
-    uECC_vli_modSub(z, sum, tx, curve->p, num_words); /* z = x2 - x1 */
-    XYcZ_add(tx, ty, sum, sum + num_words, curve);
-    uECC_vli_modInv(z, z, curve->p, num_words); /* z = 1/z */
-    apply_z(sum, sum + num_words, z, curve);
-
-    /* Use Shamir's trick to calculate u1*G + u2*Q */
-    points[0] = 0;
-    points[1] = curve->G;
-    points[2] = _public;
-    points[3] = sum;
-    num_bits = smax(uECC_vli_numBits(u1, num_n_words),
-                    uECC_vli_numBits(u2, num_n_words));
-
-    point = points[(!!uECC_vli_testBit(u1, num_bits - 1)) |
-                   ((!!uECC_vli_testBit(u2, num_bits - 1)) << 1)];
-    uECC_vli_set(rx, point, num_words);
-    uECC_vli_set(ry, point + num_words, num_words);
-    uECC_vli_clear(z, num_words);
-    z[0] = 1;
-
-    for (i = num_bits - 2; i >= 0; --i) {
-        uECC_word_t index;
-        curve->double_jacobian(rx, ry, z, curve);
-
-        index = (!!uECC_vli_testBit(u1, i)) | ((!!uECC_vli_testBit(u2, i)) << 1);
-        point = points[index];
-        if (point) {
-            uECC_vli_set(tx, point, num_words);
-            uECC_vli_set(ty, point + num_words, num_words);
-            apply_z(tx, ty, z, curve);
-            uECC_vli_modSub(tz, rx, tx, curve->p, num_words); /* Z = x2 - x1 */
-            XYcZ_add(tx, ty, rx, ry, curve);
-            uECC_vli_modMult_fast(z, z, tz, curve);
-        }
-    }
-
-    uECC_vli_modInv(z, z, curve->p, num_words); /* Z = 1/Z */
-    apply_z(rx, ry, z, curve);
-
-    /* v = x1 (mod n) */
-    if (uECC_vli_cmp_unsafe(curve->n, rx, num_n_words) != 1) {
-        uECC_vli_sub(rx, rx, curve->n, num_n_words);
-    }
-
-    /* Accept only if v == r. */
-    return (int)(uECC_vli_equal(rx, r, num_words));
-}
-
-#if uECC_ENABLE_VLI_API
-
-unsigned uECC_curve_num_words(uECC_Curve curve) {
-    return curve->num_words;
-}
-
-unsigned uECC_curve_num_bytes(uECC_Curve curve) {
-    return curve->num_bytes;
-}
-
-unsigned uECC_curve_num_bits(uECC_Curve curve) {
-    return curve->num_bytes * 8;
-}
-
-unsigned uECC_curve_num_n_words(uECC_Curve curve) {
-    return BITS_TO_WORDS(curve->num_n_bits);
-}
-
-unsigned uECC_curve_num_n_bytes(uECC_Curve curve) {
-    return BITS_TO_BYTES(curve->num_n_bits);
-}
-
-unsigned uECC_curve_num_n_bits(uECC_Curve curve) {
-    return curve->num_n_bits;
-}
-
-const uECC_word_t *uECC_curve_p(uECC_Curve curve) {
-    return curve->p;
-}
-
-const uECC_word_t *uECC_curve_n(uECC_Curve curve) {
-    return curve->n;
-}
-
-const uECC_word_t *uECC_curve_G(uECC_Curve curve) {
-    return curve->G;
-}
-
-const uECC_word_t *uECC_curve_b(uECC_Curve curve) {
-    return curve->b;
-}
-
-#if uECC_SUPPORT_COMPRESSED_POINT
-void uECC_vli_mod_sqrt(uECC_word_t *a, uECC_Curve curve) {
-    curve->mod_sqrt(a, curve);
-}
-#endif
-
-void uECC_vli_mmod_fast(uECC_word_t *result, uECC_word_t *product, uECC_Curve curve) {
-#if (uECC_OPTIMIZATION_LEVEL > 0)
-    curve->mmod_fast(result, product);
-#else
-    uECC_vli_mmod(result, product, curve->p, curve->num_words);
-#endif
-}
-
-void uECC_point_mult(uECC_word_t *result,
-                     const uECC_word_t *point,
-                     const uECC_word_t *scalar,
-                     uECC_Curve curve) {
-    uECC_word_t tmp1[uECC_MAX_WORDS];
-    uECC_word_t tmp2[uECC_MAX_WORDS];
-    uECC_word_t *p2[2] = {tmp1, tmp2};
-    uECC_word_t carry = regularize_k(scalar, tmp1, tmp2, curve);
-
-    EccPoint_mult(result, point, p2[!carry], 0, curve->num_n_bits + 1, curve);
-}
-
-#endif /* uECC_ENABLE_VLI_API */

+ 0 - 365
components/bootloader/subproject/components/micro-ecc/micro-ecc/uECC.h

@@ -1,365 +0,0 @@
-/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#ifndef _UECC_H_
-#define _UECC_H_
-
-#include <stdint.h>
-
-/* Platform selection options.
-If uECC_PLATFORM is not defined, the code will try to guess it based on compiler macros.
-Possible values for uECC_PLATFORM are defined below: */
-#define uECC_arch_other 0
-#define uECC_x86        1
-#define uECC_x86_64     2
-#define uECC_arm        3
-#define uECC_arm_thumb  4
-#define uECC_arm_thumb2 5
-#define uECC_arm64      6
-#define uECC_avr        7
-
-/* If desired, you can define uECC_WORD_SIZE as appropriate for your platform (1, 4, or 8 bytes).
-If uECC_WORD_SIZE is not explicitly defined then it will be automatically set based on your
-platform. */
-
-/* Optimization level; trade speed for code size.
-   Larger values produce code that is faster but larger.
-   Currently supported values are 0 - 4; 0 is unusably slow for most applications.
-   Optimization level 4 currently only has an effect ARM platforms where more than one
-   curve is enabled. */
-#ifndef uECC_OPTIMIZATION_LEVEL
-    #define uECC_OPTIMIZATION_LEVEL 2
-#endif
-
-/* uECC_SQUARE_FUNC - If enabled (defined as nonzero), this will cause a specific function to be
-used for (scalar) squaring instead of the generic multiplication function. This can make things
-faster somewhat faster, but increases the code size. */
-#ifndef uECC_SQUARE_FUNC
-    #define uECC_SQUARE_FUNC 0
-#endif
-
-/* uECC_VLI_NATIVE_LITTLE_ENDIAN - If enabled (defined as nonzero), this will switch to native
-little-endian format for *all* arrays passed in and out of the public API. This includes public 
-and private keys, shared secrets, signatures and message hashes. 
-Using this switch reduces the amount of call stack memory used by uECC, since less intermediate
-translations are required. 
-Note that this will *only* work on native little-endian processors and it will treat the uint8_t
-arrays passed into the public API as word arrays, therefore requiring the provided byte arrays 
-to be word aligned on architectures that do not support unaligned accesses.
-IMPORTANT: Keys and signatures generated with uECC_VLI_NATIVE_LITTLE_ENDIAN=1 are incompatible
-with keys and signatures generated with uECC_VLI_NATIVE_LITTLE_ENDIAN=0; all parties must use
-the same endianness. */
-#ifndef uECC_VLI_NATIVE_LITTLE_ENDIAN
-    #define uECC_VLI_NATIVE_LITTLE_ENDIAN 0
-#endif
-
-/* Curve support selection. Set to 0 to remove that curve. */
-#ifndef uECC_SUPPORTS_secp160r1
-    #define uECC_SUPPORTS_secp160r1 1
-#endif
-#ifndef uECC_SUPPORTS_secp192r1
-    #define uECC_SUPPORTS_secp192r1 1
-#endif
-#ifndef uECC_SUPPORTS_secp224r1
-    #define uECC_SUPPORTS_secp224r1 1
-#endif
-#ifndef uECC_SUPPORTS_secp256r1
-    #define uECC_SUPPORTS_secp256r1 1
-#endif
-#ifndef uECC_SUPPORTS_secp256k1
-    #define uECC_SUPPORTS_secp256k1 1
-#endif
-
-/* Specifies whether compressed point format is supported.
-   Set to 0 to disable point compression/decompression functions. */
-#ifndef uECC_SUPPORT_COMPRESSED_POINT
-    #define uECC_SUPPORT_COMPRESSED_POINT 1
-#endif
-
-struct uECC_Curve_t;
-typedef const struct uECC_Curve_t * uECC_Curve;
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#if uECC_SUPPORTS_secp160r1
-uECC_Curve uECC_secp160r1(void);
-#endif
-#if uECC_SUPPORTS_secp192r1
-uECC_Curve uECC_secp192r1(void);
-#endif
-#if uECC_SUPPORTS_secp224r1
-uECC_Curve uECC_secp224r1(void);
-#endif
-#if uECC_SUPPORTS_secp256r1
-uECC_Curve uECC_secp256r1(void);
-#endif
-#if uECC_SUPPORTS_secp256k1
-uECC_Curve uECC_secp256k1(void);
-#endif
-
-/* uECC_RNG_Function type
-The RNG function should fill 'size' random bytes into 'dest'. It should return 1 if
-'dest' was filled with random data, or 0 if the random data could not be generated.
-The filled-in values should be either truly random, or from a cryptographically-secure PRNG.
-
-A correctly functioning RNG function must be set (using uECC_set_rng()) before calling
-uECC_make_key() or uECC_sign().
-
-Setting a correctly functioning RNG function improves the resistance to side-channel attacks
-for uECC_shared_secret() and uECC_sign_deterministic().
-
-A correct RNG function is set by default when building for Windows, Linux, or OS X.
-If you are building on another POSIX-compliant system that supports /dev/random or /dev/urandom,
-you can define uECC_POSIX to use the predefined RNG. For embedded platforms there is no predefined
-RNG function; you must provide your own.
-*/
-typedef int (*uECC_RNG_Function)(uint8_t *dest, unsigned size);
-
-/* uECC_set_rng() function.
-Set the function that will be used to generate random bytes. The RNG function should
-return 1 if the random data was generated, or 0 if the random data could not be generated.
-
-On platforms where there is no predefined RNG function (eg embedded platforms), this must
-be called before uECC_make_key() or uECC_sign() are used.
-
-Inputs:
-    rng_function - The function that will be used to generate random bytes.
-*/
-void uECC_set_rng(uECC_RNG_Function rng_function);
-
-/* uECC_get_rng() function.
-
-Returns the function that will be used to generate random bytes.
-*/
-uECC_RNG_Function uECC_get_rng(void);
-
-/* uECC_curve_private_key_size() function.
-
-Returns the size of a private key for the curve in bytes.
-*/
-int uECC_curve_private_key_size(uECC_Curve curve);
-
-/* uECC_curve_public_key_size() function.
-
-Returns the size of a public key for the curve in bytes.
-*/
-int uECC_curve_public_key_size(uECC_Curve curve);
-
-/* uECC_make_key() function.
-Create a public/private key pair.
-
-Outputs:
-    public_key  - Will be filled in with the public key. Must be at least 2 * the curve size
-                  (in bytes) long. For example, if the curve is secp256r1, public_key must be 64
-                  bytes long.
-    private_key - Will be filled in with the private key. Must be as long as the curve order; this
-                  is typically the same as the curve size, except for secp160r1. For example, if the
-                  curve is secp256r1, private_key must be 32 bytes long.
-
-                  For secp160r1, private_key must be 21 bytes long! Note that the first byte will
-                  almost always be 0 (there is about a 1 in 2^80 chance of it being non-zero).
-
-Returns 1 if the key pair was generated successfully, 0 if an error occurred.
-*/
-int uECC_make_key(uint8_t *public_key, uint8_t *private_key, uECC_Curve curve);
-
-/* uECC_shared_secret() function.
-Compute a shared secret given your secret key and someone else's public key.
-Note: It is recommended that you hash the result of uECC_shared_secret() before using it for
-symmetric encryption or HMAC.
-
-Inputs:
-    public_key  - The public key of the remote party.
-    private_key - Your private key.
-
-Outputs:
-    secret - Will be filled in with the shared secret value. Must be the same size as the
-             curve size; for example, if the curve is secp256r1, secret must be 32 bytes long.
-
-Returns 1 if the shared secret was generated successfully, 0 if an error occurred.
-*/
-int uECC_shared_secret(const uint8_t *public_key,
-                       const uint8_t *private_key,
-                       uint8_t *secret,
-                       uECC_Curve curve);
-
-#if uECC_SUPPORT_COMPRESSED_POINT
-/* uECC_compress() function.
-Compress a public key.
-
-Inputs:
-    public_key - The public key to compress.
-
-Outputs:
-    compressed - Will be filled in with the compressed public key. Must be at least
-                 (curve size + 1) bytes long; for example, if the curve is secp256r1,
-                 compressed must be 33 bytes long.
-*/
-void uECC_compress(const uint8_t *public_key, uint8_t *compressed, uECC_Curve curve);
-
-/* uECC_decompress() function.
-Decompress a compressed public key.
-
-Inputs:
-    compressed - The compressed public key.
-
-Outputs:
-    public_key - Will be filled in with the decompressed public key.
-*/
-void uECC_decompress(const uint8_t *compressed, uint8_t *public_key, uECC_Curve curve);
-#endif /* uECC_SUPPORT_COMPRESSED_POINT */
-
-/* uECC_valid_public_key() function.
-Check to see if a public key is valid.
-
-Note that you are not required to check for a valid public key before using any other uECC
-functions. However, you may wish to avoid spending CPU time computing a shared secret or
-verifying a signature using an invalid public key.
-
-Inputs:
-    public_key - The public key to check.
-
-Returns 1 if the public key is valid, 0 if it is invalid.
-*/
-int uECC_valid_public_key(const uint8_t *public_key, uECC_Curve curve);
-
-/* uECC_compute_public_key() function.
-Compute the corresponding public key for a private key.
-
-Inputs:
-    private_key - The private key to compute the public key for
-
-Outputs:
-    public_key - Will be filled in with the corresponding public key
-
-Returns 1 if the key was computed successfully, 0 if an error occurred.
-*/
-int uECC_compute_public_key(const uint8_t *private_key, uint8_t *public_key, uECC_Curve curve);
-
-/* uECC_sign() function.
-Generate an ECDSA signature for a given hash value.
-
-Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and pass it in to
-this function along with your private key.
-
-Inputs:
-    private_key  - Your private key.
-    message_hash - The hash of the message to sign.
-    hash_size    - The size of message_hash in bytes.
-
-Outputs:
-    signature - Will be filled in with the signature value. Must be at least 2 * curve size long.
-                For example, if the curve is secp256r1, signature must be 64 bytes long.
-
-Returns 1 if the signature generated successfully, 0 if an error occurred.
-*/
-int uECC_sign(const uint8_t *private_key,
-              const uint8_t *message_hash,
-              unsigned hash_size,
-              uint8_t *signature,
-              uECC_Curve curve);
-
-/* uECC_HashContext structure.
-This is used to pass in an arbitrary hash function to uECC_sign_deterministic().
-The structure will be used for multiple hash computations; each time a new hash
-is computed, init_hash() will be called, followed by one or more calls to
-update_hash(), and finally a call to finish_hash() to produce the resulting hash.
-
-The intention is that you will create a structure that includes uECC_HashContext
-followed by any hash-specific data. For example:
-
-typedef struct SHA256_HashContext {
-    uECC_HashContext uECC;
-    SHA256_CTX ctx;
-} SHA256_HashContext;
-
-void init_SHA256(uECC_HashContext *base) {
-    SHA256_HashContext *context = (SHA256_HashContext *)base;
-    SHA256_Init(&context->ctx);
-}
-
-void update_SHA256(uECC_HashContext *base,
-                   const uint8_t *message,
-                   unsigned message_size) {
-    SHA256_HashContext *context = (SHA256_HashContext *)base;
-    SHA256_Update(&context->ctx, message, message_size);
-}
-
-void finish_SHA256(uECC_HashContext *base, uint8_t *hash_result) {
-    SHA256_HashContext *context = (SHA256_HashContext *)base;
-    SHA256_Final(hash_result, &context->ctx);
-}
-
-... when signing ...
-{
-    uint8_t tmp[32 + 32 + 64];
-    SHA256_HashContext ctx = {{&init_SHA256, &update_SHA256, &finish_SHA256, 64, 32, tmp}};
-    uECC_sign_deterministic(key, message_hash, &ctx.uECC, signature);
-}
-*/
-typedef struct uECC_HashContext {
-    void (*init_hash)(const struct uECC_HashContext *context);
-    void (*update_hash)(const struct uECC_HashContext *context,
-                        const uint8_t *message,
-                        unsigned message_size);
-    void (*finish_hash)(const struct uECC_HashContext *context, uint8_t *hash_result);
-    unsigned block_size; /* Hash function block size in bytes, eg 64 for SHA-256. */
-    unsigned result_size; /* Hash function result size in bytes, eg 32 for SHA-256. */
-    uint8_t *tmp; /* Must point to a buffer of at least (2 * result_size + block_size) bytes. */
-} uECC_HashContext;
-
-/* uECC_sign_deterministic() function.
-Generate an ECDSA signature for a given hash value, using a deterministic algorithm
-(see RFC 6979). You do not need to set the RNG using uECC_set_rng() before calling
-this function; however, if the RNG is defined it will improve resistance to side-channel
-attacks.
-
-Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and pass it to
-this function along with your private key and a hash context. Note that the message_hash
-does not need to be computed with the same hash function used by hash_context.
-
-Inputs:
-    private_key  - Your private key.
-    message_hash - The hash of the message to sign.
-    hash_size    - The size of message_hash in bytes.
-    hash_context - A hash context to use.
-
-Outputs:
-    signature - Will be filled in with the signature value.
-
-Returns 1 if the signature generated successfully, 0 if an error occurred.
-*/
-int uECC_sign_deterministic(const uint8_t *private_key,
-                            const uint8_t *message_hash,
-                            unsigned hash_size,
-                            const uECC_HashContext *hash_context,
-                            uint8_t *signature,
-                            uECC_Curve curve);
-
-/* uECC_verify() function.
-Verify an ECDSA signature.
-
-Usage: Compute the hash of the signed data using the same hash as the signer and
-pass it to this function along with the signer's public key and the signature values (r and s).
-
-Inputs:
-    public_key   - The signer's public key.
-    message_hash - The hash of the signed data.
-    hash_size    - The size of message_hash in bytes.
-    signature    - The signature value.
-
-Returns 1 if the signature is valid, 0 if it is invalid.
-*/
-int uECC_verify(const uint8_t *public_key,
-                const uint8_t *message_hash,
-                unsigned hash_size,
-                const uint8_t *signature,
-                uECC_Curve curve);
-
-#ifdef __cplusplus
-} /* end of extern "C" */
-#endif
-
-#endif /* _UECC_H_ */

+ 0 - 172
components/bootloader/subproject/components/micro-ecc/micro-ecc/uECC_vli.h

@@ -1,172 +0,0 @@
-/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
-
-#ifndef _UECC_VLI_H_
-#define _UECC_VLI_H_
-
-#include "uECC.h"
-#include "types.h"
-
-/* Functions for raw large-integer manipulation. These are only available
-   if uECC.c is compiled with uECC_ENABLE_VLI_API defined to 1. */
-#ifndef uECC_ENABLE_VLI_API
-    #define uECC_ENABLE_VLI_API 0
-#endif
-
-#ifdef __cplusplus
-extern "C"
-{
-#endif
-
-#if uECC_ENABLE_VLI_API
-
-void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words);
-
-/* Constant-time comparison to zero - secure way to compare long integers */
-/* Returns 1 if vli == 0, 0 otherwise. */
-uECC_word_t uECC_vli_isZero(const uECC_word_t *vli, wordcount_t num_words);
-
-/* Returns nonzero if bit 'bit' of vli is set. */
-uECC_word_t uECC_vli_testBit(const uECC_word_t *vli, bitcount_t bit);
-
-/* Counts the number of bits required to represent vli. */
-bitcount_t uECC_vli_numBits(const uECC_word_t *vli, const wordcount_t max_words);
-
-/* Sets dest = src. */
-void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, wordcount_t num_words);
-
-/* Constant-time comparison function - secure way to compare long integers */
-/* Returns one if left == right, zero otherwise */
-uECC_word_t uECC_vli_equal(const uECC_word_t *left,
-                           const uECC_word_t *right,
-                           wordcount_t num_words);
-
-/* Constant-time comparison function - secure way to compare long integers */
-/* Returns sign of left - right, in constant time. */
-cmpresult_t uECC_vli_cmp(const uECC_word_t *left, const uECC_word_t *right, wordcount_t num_words);
-
-/* Computes vli = vli >> 1. */
-void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words);
-
-/* Computes result = left + right, returning carry. Can modify in place. */
-uECC_word_t uECC_vli_add(uECC_word_t *result,
-                         const uECC_word_t *left,
-                         const uECC_word_t *right,
-                         wordcount_t num_words);
-
-/* Computes result = left - right, returning borrow. Can modify in place. */
-uECC_word_t uECC_vli_sub(uECC_word_t *result,
-                         const uECC_word_t *left,
-                         const uECC_word_t *right,
-                         wordcount_t num_words);
-
-/* Computes result = left * right. Result must be 2 * num_words long. */
-void uECC_vli_mult(uECC_word_t *result,
-                   const uECC_word_t *left,
-                   const uECC_word_t *right,
-                   wordcount_t num_words);
-
-/* Computes result = left^2. Result must be 2 * num_words long. */
-void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, wordcount_t num_words);
-
-/* Computes result = (left + right) % mod.
-   Assumes that left < mod and right < mod, and that result does not overlap mod. */
-void uECC_vli_modAdd(uECC_word_t *result,
-                     const uECC_word_t *left,
-                     const uECC_word_t *right,
-                     const uECC_word_t *mod,
-                     wordcount_t num_words);
-
-/* Computes result = (left - right) % mod.
-   Assumes that left < mod and right < mod, and that result does not overlap mod. */
-void uECC_vli_modSub(uECC_word_t *result,
-                     const uECC_word_t *left,
-                     const uECC_word_t *right,
-                     const uECC_word_t *mod,
-                     wordcount_t num_words);
-
-/* Computes result = product % mod, where product is 2N words long.
-   Currently only designed to work for mod == curve->p or curve_n. */
-void uECC_vli_mmod(uECC_word_t *result,
-                   uECC_word_t *product,
-                   const uECC_word_t *mod,
-                   wordcount_t num_words);
-
-/* Calculates result = product (mod curve->p), where product is up to
-   2 * curve->num_words long. */
-void uECC_vli_mmod_fast(uECC_word_t *result, uECC_word_t *product, uECC_Curve curve);
-
-/* Computes result = (left * right) % mod.
-   Currently only designed to work for mod == curve->p or curve_n. */
-void uECC_vli_modMult(uECC_word_t *result,
-                      const uECC_word_t *left,
-                      const uECC_word_t *right,
-                      const uECC_word_t *mod,
-                      wordcount_t num_words);
-
-/* Computes result = (left * right) % curve->p. */
-void uECC_vli_modMult_fast(uECC_word_t *result,
-                           const uECC_word_t *left,
-                           const uECC_word_t *right,
-                           uECC_Curve curve);
-
-/* Computes result = left^2 % mod.
-   Currently only designed to work for mod == curve->p or curve_n. */
-void uECC_vli_modSquare(uECC_word_t *result,
-                        const uECC_word_t *left,
-                        const uECC_word_t *mod,
-                        wordcount_t num_words);
-
-/* Computes result = left^2 % curve->p. */
-void uECC_vli_modSquare_fast(uECC_word_t *result, const uECC_word_t *left, uECC_Curve curve);
-
-/* Computes result = (1 / input) % mod.*/
-void uECC_vli_modInv(uECC_word_t *result,
-                     const uECC_word_t *input,
-                     const uECC_word_t *mod,
-                     wordcount_t num_words);
-
-#if uECC_SUPPORT_COMPRESSED_POINT
-/* Calculates a = sqrt(a) (mod curve->p) */
-void uECC_vli_mod_sqrt(uECC_word_t *a, uECC_Curve curve);
-#endif
-
-/* Converts an integer in uECC native format to big-endian bytes. */
-void uECC_vli_nativeToBytes(uint8_t *bytes, int num_bytes, const uECC_word_t *native);
-/* Converts big-endian bytes to an integer in uECC native format. */
-void uECC_vli_bytesToNative(uECC_word_t *native, const uint8_t *bytes, int num_bytes);
-
-unsigned uECC_curve_num_words(uECC_Curve curve);
-unsigned uECC_curve_num_bytes(uECC_Curve curve);
-unsigned uECC_curve_num_bits(uECC_Curve curve);
-unsigned uECC_curve_num_n_words(uECC_Curve curve);
-unsigned uECC_curve_num_n_bytes(uECC_Curve curve);
-unsigned uECC_curve_num_n_bits(uECC_Curve curve);
-
-const uECC_word_t *uECC_curve_p(uECC_Curve curve);
-const uECC_word_t *uECC_curve_n(uECC_Curve curve);
-const uECC_word_t *uECC_curve_G(uECC_Curve curve);
-const uECC_word_t *uECC_curve_b(uECC_Curve curve);
-
-int uECC_valid_point(const uECC_word_t *point, uECC_Curve curve);
-
-/* Multiplies a point by a scalar. Points are represented by the X coordinate followed by
-   the Y coordinate in the same array, both coordinates are curve->num_words long. Note
-   that scalar must be curve->num_n_words long (NOT curve->num_words). */
-void uECC_point_mult(uECC_word_t *result,
-                     const uECC_word_t *point,
-                     const uECC_word_t *scalar,
-                     uECC_Curve curve);
-
-/* Generates a random integer in the range 0 < random < top.
-   Both random and top have num_words words. */
-int uECC_generate_random_int(uECC_word_t *random,
-                             const uECC_word_t *top,
-                             wordcount_t num_words);
-
-#endif /* uECC_ENABLE_VLI_API */
-
-#ifdef __cplusplus
-} /* end of extern "C" */
-#endif
-
-#endif /* _UECC_VLI_H_ */

+ 0 - 8
components/bootloader/subproject/main/CMakeLists.txt

@@ -1,8 +0,0 @@
-idf_component_register(SRCS "bootloader_start.c"
-                    REQUIRES bootloader bootloader_support)
-
-idf_build_get_property(target IDF_TARGET)
-set(scripts "${target}.bootloader.ld"
-            "${target}.bootloader.rom.ld")
-
-target_linker_script(${COMPONENT_LIB} INTERFACE "${scripts}")

+ 0 - 4
components/bootloader/subproject/main/Makefile.projbuild

@@ -1,4 +0,0 @@
-# Submodules normally added in component.mk, but fully qualified
-# paths can be added at this level (we need binary librtc to be
-# available to link bootloader).
-COMPONENT_SUBMODULES += $(IDF_PATH)/components/esp_wifi/lib_esp32

+ 0 - 126
components/bootloader/subproject/main/bootloader_start.c

@@ -1,126 +0,0 @@
-// Copyright 2015-2016 Espressif Systems (Shanghai) PTE LTD
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <string.h>
-#include <stdint.h>
-#include <stdbool.h>
-
-#include "esp_log.h"
-#include "esp32/rom/gpio.h"
-#include "esp32/rom/spi_flash.h"
-#include "bootloader_config.h"
-#include "bootloader_init.h"
-#include "bootloader_utility.h"
-#include "bootloader_common.h"
-#include "sdkconfig.h"
-#include "esp_image_format.h"
-
-static const char* TAG = "boot";
-
-static int select_partition_number (bootloader_state_t *bs);
-static int selected_boot_partition(const bootloader_state_t *bs);
-#define LWS_MAGIC_REBOOT_TYPE_ADS 0x50001ffc
-#define LWS_MAGIC_REBOOT_TYPE_REQ_FACTORY 0xb00bcafe
-#define LWS_MAGIC_REBOOT_TYPE_FORCED_FACTORY 0xfaceb00b
-#define LWS_MAGIC_REBOOT_TYPE_FORCED_FACTORY_BUTTON 0xf0cedfac
-#define LWS_MAGIC_REBOOT_TYPE_REQ_FACTORY_ERASE_OTA 0xfac0eeee
-
-/*
- * We arrive here after the ROM bootloader finished loading this second stage bootloader from flash.
- * The hardware is mostly uninitialized, flash cache is down and the app CPU is in reset.
- * We do have a stack, so we can do the initialization in C.
- */
-void __attribute__((noreturn)) call_start_cpu0()
-{
-    // 1. Hardware initialization
-    if (bootloader_init() != ESP_OK) {
-        bootloader_reset();
-    }
-
-    // 2. Select the number of boot partition
-    bootloader_state_t bs = { 0 };
-    int boot_index = select_partition_number(&bs);
-    if (boot_index == INVALID_INDEX) {
-        bootloader_reset();
-    }
-
-    // 3. Load the app image for booting
-    bootloader_utility_load_boot_image(&bs, boot_index);
-}
-
-// Select the number of boot partition
-static int select_partition_number (bootloader_state_t *bs)
-{
-    // 1. Load partition table
-    if (!bootloader_utility_load_partition_table(bs)) {
-        ESP_LOGE(TAG, "load partition table error!");
-        return INVALID_INDEX;
-    }
-
-    // 2. Select the number of boot partition
-    return selected_boot_partition(bs);
-}
-
-/*
- * Selects a boot partition.
- * The conditions for switching to another firmware are checked.
- */
-static int selected_boot_partition(const bootloader_state_t *bs)
-{
-    int boot_index = bootloader_utility_get_selected_boot_partition(bs);
-    if (boot_index == INVALID_INDEX) {
-        return boot_index; // Unrecoverable failure (not due to corrupt ota data or bad partition contents)
-    } else {
-        // Factory firmware.
-#ifdef CONFIG_BOOTLOADER_FACTORY_RESET
-        if (bootloader_common_check_long_hold_gpio(CONFIG_BOOTLOADER_NUM_PIN_FACTORY_RESET, CONFIG_BOOTLOADER_HOLD_TIME_GPIO) == 1) {
-            ESP_LOGI(TAG, "Detect a condition of the factory reset");
-            bool ota_data_erase = false;
-#ifdef CONFIG_BOOTLOADER_OTA_DATA_ERASE
-            ota_data_erase = true;
-#endif
-            const char *list_erase = CONFIG_BOOTLOADER_DATA_FACTORY_RESET;
-            ESP_LOGI(TAG, "Data partitions to erase: %s", list_erase);
-            if (bootloader_common_erase_part_type_data(list_erase, ota_data_erase) == false) {
-                ESP_LOGE(TAG, "Not all partitions were erased");
-            }
-            return bootloader_utility_get_selected_boot_partition(bs);
-        }
-#endif
-       // TEST firmware.
-#ifdef CONFIG_BOOTLOADER_APP_TEST
-        if (bootloader_common_check_long_hold_gpio(CONFIG_BOOTLOADER_NUM_PIN_APP_TEST, CONFIG_BOOTLOADER_HOLD_TIME_GPIO) == 1) {
-            ESP_LOGI(TAG, "Detect a boot condition of the test firmware");
-            if (bs->test.offset != 0) {
-                boot_index = TEST_APP_INDEX;
-                return boot_index;
-            } else {
-                ESP_LOGE(TAG, "Test firmware is not found in partition table");
-                return INVALID_INDEX;
-            }
-        }
-#endif
-        uint32_t *p_force_factory_magic = (uint32_t *)LWS_MAGIC_REBOOT_TYPE_ADS;
-        if(*p_force_factory_magic == LWS_MAGIC_REBOOT_TYPE_REQ_FACTORY){
-        	boot_index=FACTORY_INDEX;
-        }
-
-    }
-    return boot_index;
-}
-
-// Return global reent struct if any newlib functions are linked to bootloader
-struct _reent* __getreent() {
-    return _GLOBAL_REENT;
-}
-

+ 0 - 21
components/bootloader/subproject/main/component.mk

@@ -1,21 +0,0 @@
-#
-# Main bootloader Makefile.
-#
-# This is basically the same as a component makefile, but in the case of the bootloader
-# we pull in bootloader-specific linker arguments.
-#
-
-LINKER_SCRIPTS := \
-    $(IDF_TARGET).bootloader.ld \
-    $(IDF_TARGET).bootloader.rom.ld \
-    $(IDF_PATH)/components/esp_rom/$(IDF_TARGET)/ld/$(IDF_TARGET).rom.ld \
-    $(IDF_PATH)/components/esp_rom/$(IDF_TARGET)/ld/$(IDF_TARGET).rom.newlib-funcs.ld \
-    $(IDF_PATH)/components/$(IDF_TARGET)/ld/$(IDF_TARGET).peripherals.ld
-
-ifndef CONFIG_SPI_FLASH_ROM_DRIVER_PATCH
-LINKER_SCRIPTS += $(IDF_PATH)/components/esp_rom/$(IDF_TARGET)/ld/$(IDF_TARGET).rom.spiflash.ld
-endif
-
-COMPONENT_ADD_LDFLAGS += -L $(COMPONENT_PATH) $(addprefix -T ,$(LINKER_SCRIPTS))
-
-COMPONENT_ADD_LINKER_DEPS := $(LINKER_SCRIPTS)

+ 0 - 167
components/bootloader/subproject/main/esp32.bootloader.ld

@@ -1,167 +0,0 @@
-/*
-Linker file used to link the bootloader.
-*/
-
-
-/* Simplified memory map for the bootloader
-
-   The main purpose is to make sure the bootloader can load into main memory
-   without overwriting itself.
-*/
-
-MEMORY
-{
-  /* I/O */
-  dport0_seg (RW) :                 	org = 0x3FF00000, len = 0x10
-  /* IRAM POOL1, used for APP CPU cache. Bootloader runs from here during the final stage of loading the app because APP CPU is still held in reset, the main app enables APP CPU cache */
-  iram_loader_seg (RWX) :           org = 0x40078000, len = 0x8000  /* 32KB, APP CPU cache */
-  /* 63kB, IRAM. We skip the first 1k to prevent the entry point being
-     placed into the same range as exception vectors in the app.
-     This leads to idf_monitor decoding ROM bootloader "entry 0x40080xxx"
-     message as one of the exception vectors, which looks scary to users.
-  */
-  iram_seg (RWX) :                  org = 0x40080400, len = 0xfc00
-  /* 64k at the end of DRAM, after ROM bootloader stack */
-  dram_seg (RW) :                  	org = 0x3FFF0000, len = 0x10000
-}
-
-/*  Default entry point:  */
-ENTRY(call_start_cpu0);
-
-
-SECTIONS
-{
-
-  .iram_loader.text :
-  {
-    . = ALIGN (16);
-    _loader_text_start = ABSOLUTE(.);
-    *(.stub .gnu.warning .gnu.linkonce.literal.* .gnu.linkonce.t.*.literal .gnu.linkonce.t.*)
-     *(.iram1 .iram1.*) /* catch stray IRAM_ATTR */
-    *liblog.a:(.literal .text .literal.* .text.*)
-    *libgcc.a:(.literal .text .literal.* .text.*)
-    *libbootloader_support.a:bootloader_common.*(.literal .text .literal.* .text.*)
-    *libbootloader_support.a:bootloader_flash.*(.literal .text .literal.* .text.*)
-    *libbootloader_support.a:bootloader_random.*(.literal .text .literal.* .text.*)
-    *libbootloader_support.a:bootloader_utility.*(.literal .text .literal.* .text.*)
-    *libbootloader_support.a:bootloader_sha.*(.literal .text .literal.* .text.*)
-    *libbootloader_support.a:esp_image_format.*(.literal .text .literal.* .text.*)
-    *libbootloader_support.a:flash_encrypt.*(.literal .text .literal.* .text.*)
-    *libbootloader_support.a:flash_partitions.*(.literal .text .literal.* .text.*)
-    *libbootloader_support.a:secure_boot.*(.literal .text .literal.* .text.*)
-    *libbootloader_support.a:secure_boot_signatures.*(.literal .text .literal.* .text.*)
-    *libmicro-ecc.a:*.*(.literal .text .literal.* .text.*)
-    *libspi_flash.a:*.*(.literal .text .literal.* .text.*)
-    *libsoc.a:rtc_wdt.*(.literal .text .literal.* .text.*)
-    *libefuse.a:*.*(.literal .text .literal.* .text.*)
-    *(.fini.literal)
-    *(.fini)
-    *(.gnu.version)
-    _loader_text_end = ABSOLUTE(.);
-  } > iram_loader_seg
-
-  .iram.text :
-  {
-    . = ALIGN (16);
-    *(.entry.text)
-    *(.init.literal)
-    *(.init)
-  } > iram_seg
-
-
-  /* Shared RAM */
-  .dram0.bss (NOLOAD) :
-  {
-    . = ALIGN (8);
-    _bss_start = ABSOLUTE(.);
-    *(.dynsbss)
-    *(.sbss)
-    *(.sbss.*)
-    *(.gnu.linkonce.sb.*)
-    *(.scommon)
-    *(.sbss2)
-    *(.sbss2.*)
-    *(.gnu.linkonce.sb2.*)
-    *(.dynbss)
-    *(.bss)
-    *(.bss.*)
-    *(.gnu.linkonce.b.*)
-    *(COMMON)
-    . = ALIGN (8);
-    _bss_end = ABSOLUTE(.);
-  } >dram_seg
-
-  .dram0.data :
-  {
-    _data_start = ABSOLUTE(.);
-    *(.data)
-    *(.data.*)
-    *(.gnu.linkonce.d.*)
-    *(.data1)
-    *(.sdata)
-    *(.sdata.*)
-    *(.gnu.linkonce.s.*)
-    *(.sdata2)
-    *(.sdata2.*)
-    *(.gnu.linkonce.s2.*)
-    *(.jcr)
-    _data_end = ABSOLUTE(.);
-  } >dram_seg
-
-  .dram0.rodata :
-  {
-    _rodata_start = ABSOLUTE(.);
-    *(.rodata)
-    *(.rodata.*)
-    *(.gnu.linkonce.r.*)
-    *(.rodata1)
-    __XT_EXCEPTION_TABLE_ = ABSOLUTE(.);
-    *(.xt_except_table)
-    *(.gcc_except_table)
-    *(.gnu.linkonce.e.*)
-    *(.gnu.version_r)
-    *(.eh_frame)
-    . = (. + 3) & ~ 3;
-    /*  C++ constructor and destructor tables, properly ordered:  */
-    __init_array_start = ABSOLUTE(.);
-    KEEP (*crtbegin.*(.ctors))
-    KEEP (*(EXCLUDE_FILE (*crtend.*) .ctors))
-    KEEP (*(SORT(.ctors.*)))
-    KEEP (*(.ctors))
-    __init_array_end = ABSOLUTE(.);
-    KEEP (*crtbegin.*(.dtors))
-    KEEP (*(EXCLUDE_FILE (*crtend.*) .dtors))
-    KEEP (*(SORT(.dtors.*)))
-    KEEP (*(.dtors))
-    /*  C++ exception handlers table:  */
-    __XT_EXCEPTION_DESCS_ = ABSOLUTE(.);
-    *(.xt_except_desc)
-    *(.gnu.linkonce.h.*)
-    __XT_EXCEPTION_DESCS_END__ = ABSOLUTE(.);
-    *(.xt_except_desc_end)
-    *(.dynamic)
-    *(.gnu.version_d)
-    _rodata_end = ABSOLUTE(.);
-	/* Literals are also RO data. */
-    _lit4_start = ABSOLUTE(.);
-    *(*.lit4)
-    *(.lit4.*)
-    *(.gnu.linkonce.lit4.*)
-    _lit4_end = ABSOLUTE(.);
-    . = ALIGN(4);
-  } >dram_seg
-
-  .iram.text :
-  {
-    _stext = .;
-    _text_start = ABSOLUTE(.);
-    *(.literal .text .literal.* .text.* .stub .gnu.warning .gnu.linkonce.literal.* .gnu.linkonce.t.*.literal .gnu.linkonce.t.*)
-    *(.iram .iram.*) /* catch stray IRAM_ATTR */
-    *(.fini.literal)
-    *(.fini)
-    *(.gnu.version)
-    _text_end = ABSOLUTE(.);
-    _etext = .;
-  } > iram_seg
-
-}

+ 0 - 9
components/bootloader/subproject/main/esp32.bootloader.rom.ld

@@ -1,9 +0,0 @@
-PROVIDE ( ets_update_cpu_frequency = 0x40008550 ); /* Updates g_ticks_per_us on the current CPU only; not on the other core */
-PROVIDE ( MD5Final = 0x4005db1c );
-PROVIDE ( MD5Init = 0x4005da7c );
-PROVIDE ( MD5Update = 0x4005da9c );
-
-/* bootloader will use following functions from xtensa hal library */
-xthal_get_ccount = 0x4000c050;
-xthal_get_ccompare = 0x4000c078;
-xthal_set_ccompare = 0x4000c058;

+ 4 - 52
components/cmd_nvs/cmd_nvs.c

@@ -6,7 +6,7 @@
    software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    CONDITIONS OF ANY KIND, either express or implied.
 */
-
+//#define LOG_LOCAL_LEVEL ESP_LOG_VERBOSE
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -26,26 +26,8 @@ extern "C" {
 #include "nvs.h"
 #include "nvs_utilities.h"
 
-typedef struct {
-    nvs_type_t type;
-    const char *str;
-} type_str_pair_t;
-
-static const type_str_pair_t type_str_pair[] = {
-    { NVS_TYPE_I8, "i8" },
-    { NVS_TYPE_U8, "u8" },
-    { NVS_TYPE_U16, "u16" },
-    { NVS_TYPE_I16, "i16" },
-    { NVS_TYPE_U32, "u32" },
-    { NVS_TYPE_I32, "i32" },
-    { NVS_TYPE_U64, "u64" },
-    { NVS_TYPE_I64, "i64" },
-    { NVS_TYPE_STR, "str" },
-    { NVS_TYPE_BLOB, "blob" },
-    { NVS_TYPE_ANY, "any" },
-};
-
-static const size_t TYPE_STR_PAIR_SIZE = sizeof(type_str_pair) / sizeof(type_str_pair[0]);
+
+
 static const char *ARG_TYPE_STR = "type can be: i8, u8, i16, u16 i32, u32 i64, u64, str, blob";
 static const char * TAG = "platform_esp32";
 
@@ -80,28 +62,7 @@ static struct {
 } list_args;
 
 
-static nvs_type_t str_to_type(const char *type)
-{
-    for (int i = 0; i < TYPE_STR_PAIR_SIZE; i++) {
-        const type_str_pair_t *p = &type_str_pair[i];
-        if (strcmp(type, p->str) == 0) {
-            return  p->type;
-        }
-    }
-
-    return NVS_TYPE_ANY;
-}
-static const char *type_to_str(nvs_type_t type)
-{
-    for (int i = 0; i < TYPE_STR_PAIR_SIZE; i++) {
-        const type_str_pair_t *p = &type_str_pair[i];
-        if (p->type == type) {
-            return  p->str;
-        }
-    }
 
-    return "Unknown";
-}
 static esp_err_t store_blob(nvs_handle nvs, const char *key, const char *str_values)
 {
     uint8_t value;
@@ -149,14 +110,6 @@ static esp_err_t store_blob(nvs_handle nvs, const char *key, const char *str_val
     return err;
 }
 
-static void print_blob(const char *blob, size_t len)
-{
-    for (int i = 0; i < len; i++) {
-        printf("%02x", blob[i]);
-    }
-    printf("\n");
-}
-
 static esp_err_t set_value_in_nvs(const char *key, const char *str_type, const char *str_value)
 {
     esp_err_t err;
@@ -494,8 +447,7 @@ static int list_entries(int argc, char **argv)
 }
 void register_nvs()
 {
-	esp_log_level_set(TAG, ESP_LOG_VERBOSE);
-    set_args.key = arg_str1(NULL, NULL, "<key>", "key of the value to be set");
+	set_args.key = arg_str1(NULL, NULL, "<key>", "key of the value to be set");
     set_args.type = arg_str1(NULL, NULL, "<type>", ARG_TYPE_STR);
     set_args.value = arg_str1("v", "value", "<value>", "value to be stored");
     set_args.end = arg_end(2);

+ 1 - 0
components/cmd_nvs/cmd_nvs.h

@@ -7,6 +7,7 @@
    CONDITIONS OF ANY KIND, either express or implied.
 */
 #pragma once
+#include "nvs_flash.h"
 
 #ifdef __cplusplus
 extern "C" {

+ 82 - 16
components/cmd_system/cmd_system.c

@@ -6,14 +6,13 @@
    software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    CONDITIONS OF ANY KIND, either express or implied.
 */
-
+#define LOG_LOCAL_LEVEL ESP_LOG_INFO
 #include <stdio.h>
 #include <string.h>
 #include <ctype.h>
 #include "esp_log.h"
 #include "esp_console.h"
 #include "esp_system.h"
-#include "esp_sleep.h"
 #include "esp_spi_flash.h"
 #include "driver/rtc_io.h"
 #include "driver/uart.h"
@@ -27,13 +26,16 @@
 #include "esp_partition.h"
 #include "esp_ota_ops.h"
 #include "platform_esp32.h"
+#include "nvs_utilities.h"
+#include "esp_sleep.h"
+#include "driver/uart.h"            // for the uart driver access
 
 #ifdef CONFIG_FREERTOS_USE_STATS_FORMATTING_FUNCTIONS
 #define WITH_TASKS_INFO 1
 #endif
 
 
-static const char * TAG = "platform_esp32";
+static const char * TAG = "cmd_system";
 
 static void register_free();
 static void register_heap();
@@ -42,6 +44,7 @@ static void register_restart();
 static void register_deep_sleep();
 static void register_light_sleep();
 static void register_factory_boot();
+static void register_restart_ota();
 #if WITH_TASKS_INFO
 static void register_tasks();
 #endif
@@ -55,6 +58,7 @@ void register_system()
     register_deep_sleep();
     register_light_sleep();
     register_factory_boot();
+    register_restart_ota();
 #if WITH_TASKS_INFO
     register_tasks();
 #endif
@@ -98,9 +102,21 @@ esp_err_t guided_boot(esp_partition_subtype_t partition_subtype)
 #if RECOVERY_APPLICATION
 	if(partition_subtype ==ESP_PARTITION_SUBTYPE_APP_FACTORY){
 		ESP_LOGW(TAG,"RECOVERY application is already active");
+		ESP_LOGW(TAG, "Restarting after tx complete");
+		uart_wait_tx_done(UART_NUM_1, 500 / portTICK_RATE_MS);
+		esp_restart();
+		return ESP_OK;
+	}
+#else
+	if(partition_subtype !=ESP_PARTITION_SUBTYPE_APP_FACTORY){
+		ESP_LOGW(TAG,"SQUEEZELITE application is already active");
+		ESP_LOGW(TAG, "Restarting after tx complete");
+		uart_wait_tx_done(UART_NUM_1, 500 / portTICK_RATE_MS);
+		esp_restart();
 		return ESP_OK;
 	}
 #endif
+	esp_err_t err = ESP_OK;
 	bool bFound=false;
     ESP_LOGI(TAG, "Looking for partition type %u",partition_subtype);
     const esp_partition_t *partition;
@@ -112,21 +128,38 @@ esp_err_t guided_boot(esp_partition_subtype_t partition_subtype)
 	}
 	else
 	{
+		ESP_LOGD(TAG, "Found partition. Getting info.");
 		partition = (esp_partition_t *) esp_partition_get(it);
+		ESP_LOGD(TAG, "Releasing partition iterator");
+		esp_partition_iterator_release(it);
 		if(partition != NULL){
-			ESP_LOGI(TAG, "Found partition type %u",partition_subtype);
-			esp_ota_set_boot_partition(partition);
-			bFound=true;
-			set_status_message(WARNING, "Rebooting!");
+			ESP_LOGI(TAG, "Found application partition %s sub type %u", partition->label,partition_subtype);
+			err=esp_ota_set_boot_partition(partition);
+			if(err!=ESP_OK){
+				ESP_LOGE(TAG,"Unable to set partition as active for next boot. %s",esp_err_to_name(err));
+				bFound=false;
+				set_status_message(ERROR, "Unable to select partition for reboot.");
+			}
+			else{
+				ESP_LOGW(TAG, "Application partition %s sub type %u is selected for boot", partition->label,partition_subtype);
+				bFound=true;
+				set_status_message(WARNING, "Rebooting!");
+			}
 		}
 		else
 		{
 			ESP_LOGE(TAG,"partition type %u not found!  Unable to reboot to recovery.",partition_subtype);
 			set_status_message(ERROR, "Partition not found.");
 		}
-		esp_partition_iterator_release(it);
+		ESP_LOGD(TAG, "Yielding to other processes");
+		taskYIELD();
 		if(bFound) {
-			ESP_LOGI(TAG, "Restarting!.");
+			ESP_LOGW(TAG,"Configuration %s changes. ",config_has_changes()?"has":"does not have");
+			if(!wait_for_commit()){
+				ESP_LOGW(TAG,"Unable to commit configuration. ");
+			}
+			ESP_LOGW(TAG, "Restarting after tx complete");
+			uart_wait_tx_done(UART_NUM_1, 500 / portTICK_RATE_MS);
 			esp_restart();
 		}
 	}
@@ -136,27 +169,50 @@ esp_err_t guided_boot(esp_partition_subtype_t partition_subtype)
 
 static int restart(int argc, char **argv)
 {
-    ESP_LOGI(TAG, "Restarting");
-    guided_boot(ESP_PARTITION_SUBTYPE_APP_OTA_0);
-    // If we're still alive, then there may not be an ota partition to boot from
-    guided_boot(ESP_PARTITION_SUBTYPE_APP_FACTORY);
-	return 0; // return fail.  This should never return... we're rebooting!
+	ESP_LOGW(TAG, "\n\nPerforming a simple restart to the currently active partition.");
+	if(!wait_for_commit()){
+		ESP_LOGW(TAG,"Unable to commit configuration. ");
+	}
+	ESP_LOGW(TAG, "Restarting after tx complete");
+    uart_wait_tx_done(UART_NUM_1, 500 / portTICK_RATE_MS);
+    esp_restart();
+    return 0;
+}
+
+void simple_restart()
+{
+	ESP_LOGW(TAG,"\n\n Called to perform a simple system reboot.");
+	if(!wait_for_commit()){
+		ESP_LOGW(TAG,"Unable to commit configuration. ");
+	}
+
+	ESP_LOGW(TAG, "Restarting after tx complete");
+	uart_wait_tx_done(UART_NUM_1, 500 / portTICK_RATE_MS);
+    esp_restart();
 }
+
 esp_err_t guided_restart_ota(){
+	ESP_LOGW(TAG,"\n\nCalled for a reboot to OTA Application");
     guided_boot(ESP_PARTITION_SUBTYPE_APP_OTA_0);
-    // If we're still alive, then there may not be an ota partition to boot from
-    guided_boot(ESP_PARTITION_SUBTYPE_APP_FACTORY);
 	return ESP_FAIL; // return fail.  This should never return... we're rebooting!
 }
 esp_err_t guided_factory(){
+	ESP_LOGW(TAG,"\n\nCalled for a reboot to recovery application");
 	guided_boot(ESP_PARTITION_SUBTYPE_APP_FACTORY);
 	return ESP_FAIL; // return fail.  This should never return... we're rebooting!
 }
 static int restart_factory(int argc, char **argv)
 {
+	ESP_LOGW(TAG, "Executing guided boot into recovery");
 	guided_boot(ESP_PARTITION_SUBTYPE_APP_FACTORY);
 	return 0; // return fail.  This should never return... we're rebooting!
 }
+static int restart_ota(int argc, char **argv)
+{
+	ESP_LOGW(TAG, "Executing guided boot into ota app 0");
+	guided_boot(ESP_PARTITION_SUBTYPE_APP_OTA_0);
+	return 0; // return fail.  This should never return... we're rebooting!
+}
 static void register_restart()
 {
     const esp_console_cmd_t cmd = {
@@ -167,6 +223,16 @@ static void register_restart()
     };
     ESP_ERROR_CHECK( esp_console_cmd_register(&cmd) );
 }
+static void register_restart_ota()
+{
+    const esp_console_cmd_t cmd = {
+        .command = "restart_ota",
+        .help = "Selects the ota app partition to boot from and performa a software reset of the chip",
+        .hint = NULL,
+        .func = &restart_ota,
+    };
+    ESP_ERROR_CHECK( esp_console_cmd_register(&cmd) );
+}
 
 static void register_factory_boot()
 {

+ 1 - 0
components/cmd_system/cmd_system.h

@@ -16,6 +16,7 @@ extern "C" {
 void register_system();
 esp_err_t guided_factory();
 esp_err_t guided_restart_ota();
+void simple_restart();
 
 #ifdef __cplusplus
 }

+ 1 - 0
components/cmd_system/component.mk

@@ -9,3 +9,4 @@
 
 COMPONENT_ADD_INCLUDEDIRS := .
 COMPONENT_EXTRA_INCLUDES += $(PROJECT_PATH)/main/
+COMPONENT_EXTRA_INCLUDES += $(PROJECT_PATH)/components/tools/

+ 29 - 8
components/driver_bt/bt_app_sink.c

@@ -399,16 +399,37 @@ void bt_sink_init(bt_cmd_cb_t cmd_cb, bt_data_cb_t data_cb)
 
     /*
      * Set default parameters for Legacy Pairing
-     * Use fixed pin code
      */
     esp_bt_pin_type_t pin_type = ESP_BT_PIN_TYPE_FIXED;
-    esp_bt_pin_code_t pin_code;
-	pin_code[0] = '1';
-    pin_code[1] = '2';
-    pin_code[2] = '3';
-    pin_code[3] = '4';
-    esp_bt_gap_set_pin(pin_type, 4, pin_code);
 
+    char * pin_code = config_alloc_get_default(NVS_TYPE_STR, "bt_sink_pin", STR(CONFIG_BT_SINK_PIN), 0);
+    if(strlen(pin_code)>ESP_BT_PIN_CODE_LEN){
+
+    	ESP_LOGW(BT_AV_TAG, "BT Sink pin code [%s] too long. ", pin_code);
+    	pin_code[ESP_BT_PIN_CODE_LEN] = '\0';
+    	ESP_LOGW(BT_AV_TAG, "BT Sink pin truncated code [%s]. ", pin_code);
+    }
+
+    esp_bt_pin_code_t esp_pin_code;
+    bool bError=false;
+    memset(esp_pin_code, 0x00, sizeof(esp_pin_code) );
+    ESP_LOGW(BT_AV_TAG, "BT Sink pin code is: [%s] ", pin_code);
+
+    for(int i=0;i<strlen(pin_code);i++){
+    	if(pin_code[i] < '0' || pin_code[i] > '9' ) {
+    		ESP_LOGE(BT_AV_TAG,"Invalid number found in sequence");
+    		bError=true;
+    	}
+    	esp_pin_code[i]= pin_code[i];
+
+    }
+    if(bError){
+    	esp_pin_code[0]='1';
+    	esp_pin_code[1]='2';
+    	esp_pin_code[2]='3';
+    	esp_pin_code[3]='4';
+    }
+    esp_bt_gap_set_pin(pin_type, strlen(pin_code), esp_pin_code);
 }
 
 void bt_sink_deinit(void)
@@ -466,7 +487,7 @@ static void bt_av_hdl_stack_evt(uint16_t event, void *p_param)
     switch (event) {
     case BT_APP_EVT_STACK_UP: {
         /* set up device name */
-		bt_name = (char * )get_nvs_value_alloc_default(NVS_TYPE_STR, "bt_name", CONFIG_BT_NAME, 0);
+		bt_name = (char * )config_alloc_get_default(NVS_TYPE_STR, "bt_name", CONFIG_BT_NAME, 0);
 		esp_bt_dev_set_device_name(bt_name);
 		free(bt_name);
         esp_bt_gap_register_callback(bt_app_gap_cb);

+ 2 - 2
components/driver_bt/bt_app_source.c

@@ -166,7 +166,7 @@ void hal_bluetooth_init(const char * options)
 	}
 	if(squeezelite_args.sink_name->count == 0)
 	{
-		squeezelite_conf.sink_name = get_nvs_value_alloc_default(NVS_TYPE_STR, "a2dp_sink_name", CONFIG_A2DP_SINK_NAME, 0);
+		squeezelite_conf.sink_name = config_alloc_get_default(NVS_TYPE_STR, "a2dp_sink_name", CONFIG_A2DP_SINK_NAME, 0);
     	if(squeezelite_conf.sink_name  == NULL){
     		ESP_LOGW(TAG,"Unable to retrieve the a2dp sink name from nvs");
     		squeezelite_conf.sink_name = strdup(CONFIG_A2DP_SINK_NAME);
@@ -511,7 +511,7 @@ static void bt_av_hdl_stack_evt(uint16_t event, void *p_param)
         /* set up device name */
 
 
-        char * a2dp_dev_name = 	get_nvs_value_alloc_default(NVS_TYPE_STR, "a2dp_dev_name", CONFIG_A2DP_DEV_NAME, 0);
+        char * a2dp_dev_name = 	config_alloc_get_default(NVS_TYPE_STR, "a2dp_dev_name", CONFIG_A2DP_DEV_NAME, 0);
     	if(a2dp_dev_name  == NULL){
     		ESP_LOGW(TAG,"Unable to retrieve the a2dp device name from nvs");
     		esp_bt_dev_set_device_name(CONFIG_A2DP_DEV_NAME);

+ 2 - 1
components/driver_i2s/component.mk

@@ -9,4 +9,5 @@
 
 COMPONENT_ADD_INCLUDEDIRS := .
 CFLAGS += -Os -DPOSIX -DLINKALL -DLOOPBACK -DNO_FAAD -DEMBEDDED -DTREMOR_ONLY -DBYTES_PER_FRAME=4 	
-CFLAGS += -D LOG_LOCAL_LEVEL=ESP_LOG_DEBUG
+#CFLAGS += -D LOG_LOCAL_LEVEL=ESP_LOG_DEBUG
+CFLAGS += -D LOG_LOCAL_LEVEL=ESP_LOG_INFO

+ 24 - 6
components/io/led.c

@@ -21,6 +21,8 @@
 #define MAX_LED	8
 #define BLOCKTIME	10	// up to portMAX_DELAY
 
+static const char TAG[] = "led";
+
 static struct led_s {
 	gpio_num_t gpio;
 	bool on;
@@ -37,6 +39,7 @@ static void vCallbackFunction( TimerHandle_t xTimer ) {
 	if (!led->timer) return;
 	
 	led->on = !led->on;
+	ESP_LOGD(TAG,"led vCallbackFunction setting gpio %d level", led->gpio);
 	gpio_set_level(led->gpio, led->on ? led->onstate : !led->onstate);
 	
 	// was just on for a while
@@ -47,8 +50,9 @@ static void vCallbackFunction( TimerHandle_t xTimer ) {
 }
 
 bool led_blink_core(int idx, int ontime, int offtime, bool pushed) {
-	if (!leds[idx].gpio) return false;
+	if (!leds[idx].gpio || leds[idx].gpio<0 ) return false;
 	
+	ESP_LOGD(TAG,"led_blink_core");
 	if (leds[idx].timer) {
 		// normal requests waits if a pop is pending
 		if (!pushed && leds[idx].pushed) {
@@ -71,21 +75,28 @@ bool led_blink_core(int idx, int ontime, int offtime, bool pushed) {
 	leds[idx].offtime = offtime;	
 			
 	if (ontime == 0) {
+		ESP_LOGD(TAG,"led %d, setting reverse level", idx);
 		gpio_set_level(leds[idx].gpio, !leds[idx].onstate);
 	} else if (offtime == 0) {
+		ESP_LOGD(TAG,"led %d, setting level", idx);
 		gpio_set_level(leds[idx].gpio, leds[idx].onstate);
 	} else {
-		if (!leds[idx].timer) leds[idx].timer = xTimerCreate("ledTimer", ontime / portTICK_RATE_MS, pdFALSE, (void *)&leds[idx], vCallbackFunction);
+		if (!leds[idx].timer) {
+			ESP_LOGD(TAG,"led %d, Creating timer", idx);
+			leds[idx].timer = xTimerCreate("ledTimer", ontime / portTICK_RATE_MS, pdFALSE, (void *)&leds[idx], vCallbackFunction);
+		}
         leds[idx].on = true;
+        ESP_LOGD(TAG,"led %d, Setting gpio %d", idx, leds[idx].gpio);
 		gpio_set_level(leds[idx].gpio, leds[idx].onstate);
+		ESP_LOGD(TAG,"led %d, Starting timer.", idx);
 		if (xTimerStart(leds[idx].timer, BLOCKTIME) == pdFAIL) return false;
 	}
-	
+	ESP_LOGD(TAG,"led %d, led_blink_core_done", idx);
 	return true;
 } 
 
 bool led_unpush(int idx) {
-	if (!leds[idx].gpio) return false;
+	if (!leds[idx].gpio || leds[idx].gpio<0) return false;
 	
 	led_blink_core(idx, leds[idx].pushedon, leds[idx].pushedoff, true);
 	leds[idx].pushed = false;
@@ -94,14 +105,21 @@ bool led_unpush(int idx) {
 }	
 
 bool led_config(int idx, gpio_num_t gpio, int onstate) {
+	if(gpio<0){
+		ESP_LOGW(TAG,"LED GPIO not configured");
+		return false;
+	}
+	ESP_LOGD(TAG,"Index %d, GPIO %d, on state %s", idx, gpio, onstate>0?"On":"Off");
 	if (idx >= MAX_LED) return false;
 	leds[idx].gpio = gpio;
 	leds[idx].onstate = onstate;
-	
+	ESP_LOGD(TAG,"Index %d, GPIO %d, on state %s. Selecting GPIO pad", idx, gpio, onstate>0?"On":"Off");
 	gpio_pad_select_gpio(gpio);
+	ESP_LOGD(TAG,"Index %d, GPIO %d, on state %s. Setting direction to OUTPUT", idx, gpio, onstate>0?"On":"Off");
 	gpio_set_direction(gpio, GPIO_MODE_OUTPUT);
+	ESP_LOGD(TAG,"Index %d, GPIO %d, on state %s. Setting State to %d", idx, gpio, onstate>0?"On":"Off", onstate);
 	gpio_set_level(gpio, !onstate);
-	
+	ESP_LOGD(TAG,"Done configuring the led");
 	return true;
 }
 

+ 2 - 4
components/raop/raop.c

@@ -185,9 +185,6 @@ struct raop_ctx_s *raop_create(struct in_addr host, char *name,
 	LOG_INFO("starting mDNS with %s", id);
 	ESP_ERROR_CHECK( mdns_service_add(id, "_raop", "_tcp", ctx->port, txt, sizeof(txt) / sizeof(mdns_txt_item_t)) );
 	
-	/*
-	xTaskCreate((TaskFunction_t) rtsp_thread, "RTSP_thread", 8*1024, ctx, ESP_TASK_PRIO_MIN + 1, &ctx->thread);
-	*/
     ctx->xTaskBuffer = (StaticTask_t*) heap_caps_malloc(sizeof(StaticTask_t), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT);
     ctx->xStack = (StackType_t*) malloc(RTSP_STACK_SIZE);
 	ctx->thread = xTaskCreateStatic( (TaskFunction_t) rtsp_thread, "RTSP_thread", RTSP_STACK_SIZE, ctx, ESP_TASK_PRIO_MIN + 1, ctx->xStack, ctx->xTaskBuffer);
@@ -221,6 +218,7 @@ void raop_delete(struct raop_ctx_s *ctx) {
 	pthread_join(ctx->thread, NULL);
 #else
 	xTaskNotifyWait(0, 0, NULL, portMAX_DELAY);
+	vTaskDelete(ctx->thread);
 	free(ctx->xStack);
 	heap_caps_free(ctx->xTaskBuffer);
 #endif
@@ -369,7 +367,7 @@ static void *rtsp_thread(void *arg) {
 
 #ifndef WIN32
 	xTaskNotify(ctx->joiner, 0, eNoAction);
-	vTaskDelete(NULL);
+	vTaskSuspend(NULL);
 #endif
 
 	return NULL;

+ 1 - 1
components/raop/raop_sink.c

@@ -56,7 +56,7 @@ void raop_sink_init(raop_cmd_cb_t cmd_cb, raop_data_cb_t data_cb) {
     ESP_ERROR_CHECK( mdns_init() );
     ESP_ERROR_CHECK( mdns_hostname_set(hostname) );
         
-    char * sink_name_buffer= (char *)get_nvs_value_alloc(NVS_TYPE_STR, "airplay_name");
+    char * sink_name_buffer= (char *)config_alloc_get(NVS_TYPE_STR, "airplay_name");
     if(sink_name_buffer != NULL){
     	memset(sink_name, 0x00, sizeof(sink_name));
     	strncpy(sink_name,sink_name_buffer,sizeof(sink_name)-1 );

+ 2 - 1
components/raop/rtp.c

@@ -305,6 +305,7 @@ void rtp_end(rtp_t *ctx)
 		pthread_join(ctx->thread, NULL);
 #else
 		xTaskNotifyWait(0, 0, NULL, portMAX_DELAY);
+		vTaskDelete(ctx->thread);
 		free(ctx->xStack);
 		heap_caps_free(ctx->xTaskBuffer);
 #endif
@@ -709,7 +710,7 @@ static void *rtp_thread_func(void *arg) {
 
 #ifndef WIN32
 	xTaskNotify(ctx->joiner, 0, eNoAction);
-	vTaskDelete(NULL);
+	vTaskSuspend(NULL);
 #endif
 
 	return NULL;

+ 2 - 2
components/squeezelite-ota/cmd_ota.c

@@ -27,7 +27,7 @@
 #include "sdkconfig.h"
 
 static const char * TAG = "platform_esp32";
-extern esp_err_t start_ota(const char * bin_url, bool bFromAppMain);
+extern esp_err_t start_ota(const char * bin_url);
 static struct {
     struct arg_str *url;
     struct arg_end *end;
@@ -45,7 +45,7 @@ static int perform_ota_update(int argc, char **argv)
 
     esp_err_t err=ESP_OK;
     ESP_LOGI(TAG, "Starting ota: %s", url);
-    start_ota(url,false);
+    start_ota(url);
 
     if (err != ESP_OK) {
         ESP_LOGE(TAG, "%s", esp_err_to_name(err));

+ 2 - 2
components/squeezelite-ota/component.mk

@@ -8,6 +8,6 @@ COMPONENT_ADD_INCLUDEDIRS := .
 COMPONENT_ADD_INCLUDEDIRS += include
 COMPONENT_EXTRA_INCLUDES += $(PROJECT_PATH)/main/
 COMPONENT_EXTRA_INCLUDES += $(PROJECT_PATH)/components/tools
+CFLAGS += -D LOG_LOCAL_LEVEL=ESP_LOG_INFO -DCONFIG_OTA_ALLOW_HTTP=1
+#CFLAGS += -DLOG_LOCAL_LEVEL=ESP_LOG_DEBUG -DCONFIG_OTA_ALLOW_HTTP=1
 
-CFLAGS += -DLOG_LOCAL_LEVEL=ESP_LOG_DEBUG -DCONFIG_OTA_ALLOW_HTTP=1
-COMPONENT_EMBED_TXTFILES :=  ${PROJECT_PATH}/server_certs/github.pem

+ 364 - 106
components/squeezelite-ota/squeezelite-ota.c

@@ -6,12 +6,14 @@
    software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    CONDITIONS OF ANY KIND, either express or implied.
 */
+#ifndef LOG_LOCAL_LEVEL
+#define LOG_LOCAL_LEVEL ESP_LOG_INFO
+#endif
 #include "freertos/FreeRTOS.h"
 #include "freertos/task.h"
 #include "esp_system.h"
 #include "esp_event.h"
 #include "esp_log.h"
-#include "esp_ota_ops.h"
 #include "esp_https_ota.h"
 #include "string.h"
 #include <stdbool.h>
@@ -25,23 +27,21 @@
 #include <time.h>
 #include <sys/time.h>
 #include <stdarg.h>
-
-
-
-
-#include "esp_image_format.h"
 #include "esp_secure_boot.h"
 #include "esp_flash_encrypt.h"
 #include "esp_spi_flash.h"
 #include "sdkconfig.h"
 
 #include "esp_ota_ops.h"
+extern const char * get_certificate();
 
-#define OTA_FLASH_ERASE_BLOCK (1024*100)
 static const char *TAG = "squeezelite-ota";
-extern const uint8_t server_cert_pem_start[] asm("_binary_github_pem_start");
-extern const uint8_t server_cert_pem_end[] asm("_binary_github_pem_end");
-char * cert=NULL;
+char * ota_write_data = NULL;
+esp_http_client_handle_t ota_http_client = NULL;
+#define IMAGE_HEADER_SIZE sizeof(esp_image_header_t) + sizeof(esp_image_segment_header_t) + sizeof(esp_app_desc_t) + 1
+#define BUFFSIZE 4096
+#define HASH_LEN 32 /* SHA-256 digest length */
+
 
 static struct {
 	char status_text[81];
@@ -49,7 +49,6 @@ static struct {
 	uint32_t ota_total_len;
 	char * redirected_url;
 	char * current_url;
-	bool bRedirectFound;
 	bool bOTAStarted;
 	bool bInitialized;
 	uint8_t lastpct;
@@ -82,13 +81,12 @@ void triggerStatusJsonRefresh(bool bDelay,const char * status, ...){
 	    vTaskDelay(200 / portTICK_PERIOD_MS);  // wait here for a short amount of time.  This will help with refreshing the UI status
 		ESP_LOGD(TAG,"Done holding task...");
 	}
-	else
-	{
+	else {
 		ESP_LOGI(TAG,"%s",ota_status.status_text);
 		taskYIELD();
 	}
 }
-const char * ota_get_status(){
+const char *  ota_get_status(){
 	if(!ota_status.bInitialized)
 		{
 			memset(ota_status.status_text, 0x00,sizeof(ota_status.status_text));
@@ -129,6 +127,7 @@ esp_err_t _http_event_handler(esp_http_client_event_t *evt)
 //	char *header_value For HTTP_EVENT_ON_HEADER event_id, it’s store current http header value
 // --------------
     switch (evt->event_id) {
+
     case HTTP_EVENT_ERROR:
         ESP_LOGD(TAG, "HTTP_EVENT_ERROR");
         _printMemStats();
@@ -149,12 +148,11 @@ esp_err_t _http_event_handler(esp_http_client_event_t *evt)
         ESP_LOGD(TAG, "HTTP_EVENT_HEADER_SENT");
         break;
     case HTTP_EVENT_ON_HEADER:
-        ESP_LOGD(TAG, "HTTP_EVENT_ON_HEADER, status_code=%d, key=%s, value=%s",esp_http_client_get_status_code(evt->client),evt->header_key, evt->header_value);
+        ESP_LOGD(TAG, "HTTP_EVENT_ON_HEADER, key=%s, value=%s",evt->header_key, evt->header_value);
 		if (strcasecmp(evt->header_key, "location") == 0) {
 			FREE_RESET(ota_status.redirected_url);
         	ota_status.redirected_url=strdup(evt->header_value);
         	ESP_LOGW(TAG,"OTA will redirect to url: %s",ota_status.redirected_url);
-        	ota_status.bRedirectFound= true;
         }
         if (strcasecmp(evt->header_key, "content-length") == 0) {
         	ota_status.ota_total_len = atol(evt->header_value);
@@ -165,18 +163,6 @@ esp_err_t _http_event_handler(esp_http_client_event_t *evt)
     	if(!ota_status.bOTAStarted)  {
     		ESP_LOGD(TAG, "HTTP_EVENT_ON_DATA, status_code=%d, len=%d",esp_http_client_get_status_code(evt->client), evt->data_len);
     	}
-    	else if(ota_status.bOTAStarted && esp_http_client_get_status_code(evt->client) == 200 ){
-			ota_status.ota_actual_len+=evt->data_len;
-			if(ota_get_pct_complete()%5 == 0) ota_status.newpct = ota_get_pct_complete();
-			if(ota_status.lastpct!=ota_status.newpct )
-			{
-				gettimeofday(&tv, NULL);
-				uint32_t elapsed_ms= (tv.tv_sec-ota_status.OTA_start.tv_sec )*1000+(tv.tv_usec-ota_status.OTA_start.tv_usec)/1000;
-				ESP_LOGI(TAG,"OTA progress : %d/%d (%d pct), %d KB/s", ota_status.ota_actual_len, ota_status.ota_total_len, ota_status.newpct, elapsed_ms>0?ota_status.ota_actual_len*1000/elapsed_ms/1024:0);
-				wifi_manager_refresh_ota_json();
-				ota_status.lastpct=ota_status.newpct;
-			}
-        }
         break;
     case HTTP_EVENT_ON_FINISH:
         ESP_LOGD(TAG, "HTTP_EVENT_ON_FINISH");
@@ -188,133 +174,375 @@ esp_err_t _http_event_handler(esp_http_client_event_t *evt)
     return ESP_OK;
 }
 
-esp_err_t init_config(esp_http_client_config_t * conf, const char * url){
-	memset(conf, 0x00, sizeof(esp_http_client_config_t));
-
-	conf->cert_pem =cert==NULL?(char *)server_cert_pem_start:cert;
-	conf->event_handler = _http_event_handler;
-	conf->buffer_size = 2048*4;
-	conf->disable_auto_redirect=true;
-	conf->skip_cert_common_name_check = false;
-	conf->url = strdup(url);
-	conf->max_redirection_count = 0;
-
+esp_err_t init_config(char * url){
+	memset(&ota_config, 0x00, sizeof(ota_config));
+	ota_status.bInitialized = true;
+	triggerStatusJsonRefresh(true,"Initializing...");
+	if(url==NULL || strlen(url)==0){
+		ESP_LOGE(TAG,"HTTP OTA called without a url");
+		return ESP_FAIL;
+	}
+	ota_status.current_url= url;
+	ota_config.cert_pem =get_certificate();
+	ota_config.event_handler = _http_event_handler;
+	ota_config.buffer_size = BUFFSIZE;
+	//ota_config.disable_auto_redirect=true;
+	ota_config.disable_auto_redirect=false;
+	ota_config.skip_cert_common_name_check = false;
+	ota_config.url = strdup(url);
+	ota_config.max_redirection_count = 3;
+	//ota_write_data = heap_caps_malloc(ota_config.buffer_size+1 , MALLOC_CAP_INTERNAL);
+	ota_write_data = malloc(ota_config.buffer_size+1);
+	if(ota_write_data== NULL){
+		ESP_LOGE(TAG,"Error allocating the ota buffer");
+		return ESP_ERR_NO_MEM;
+	}
 	return ESP_OK;
 }
-esp_err_t _erase_last_boot_app_partition(void)
-{
-	uint16_t num_passes=0;
-	uint16_t remain_size=0;
-    const esp_partition_t *ota_partition=NULL;
-    const esp_partition_t *ota_data_partition=NULL;
-	esp_err_t err=ESP_OK;
+esp_partition_t * _get_ota_partition(esp_partition_subtype_t subtype){
+	esp_partition_t *ota_partition=NULL;
+	ESP_LOGI(TAG, "Looking for OTA partition.");
 
-    ESP_LOGI(TAG, "Looking for OTA partition.");
-	esp_partition_iterator_t it = esp_partition_find(ESP_PARTITION_TYPE_APP, ESP_PARTITION_SUBTYPE_APP_OTA_0 , NULL);
+	esp_partition_iterator_t it = esp_partition_find(ESP_PARTITION_TYPE_APP, subtype , NULL);
 	if(it == NULL){
 		ESP_LOGE(TAG,"Unable initialize partition iterator!");
 	}
 	else {
 		ota_partition = (esp_partition_t *) esp_partition_get(it);
 		if(ota_partition != NULL){
-			ESP_LOGI(TAG, "Found OTA partition.");
+			ESP_LOGI(TAG, "Found OTA partition: %s.",ota_partition->label);
 		}
 		else {
 			ESP_LOGE(TAG,"OTA partition not found!  Unable update application.");
 		}
 		esp_partition_iterator_release(it);
 	}
+	return ota_partition;
 
-	it = esp_partition_find(ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_DATA_OTA , NULL);
-	if(it == NULL){
-		ESP_LOGE(TAG,"Unable initialize partition iterator!");
+}
+
+
+
+esp_err_t _erase_last_boot_app_partition(esp_partition_t *ota_partition)
+{
+	uint16_t num_passes=0;
+	uint16_t remain_size=0;
+	uint32_t single_pass_size=0;
+	esp_err_t err=ESP_OK;
+
+    char * ota_erase_size=config_alloc_get(NVS_TYPE_STR, "ota_erase_blk");
+	if(ota_erase_size!=NULL) {
+		single_pass_size = atol(ota_erase_size);
+		ESP_LOGD(TAG,"OTA Erase block size is %d (from string: %s)",single_pass_size, ota_erase_size );
+		free(ota_erase_size);
 	}
 	else {
-		ota_data_partition = (esp_partition_t *) esp_partition_get(it);
-
-		if(ota_data_partition != NULL){
-			ESP_LOGI(TAG, "Found OTA data partition.");
-		}
-		else {
-			ESP_LOGE(TAG,"OTA data partition not found!  Unable update application.");
-		}
-		esp_partition_iterator_release(it);
+		ESP_LOGW(TAG,"OTA Erase block config not found");
+		single_pass_size = OTA_FLASH_ERASE_BLOCK;
 	}
 
-	if(ota_data_partition==NULL || ota_partition==NULL){
-		return ESP_FAIL;
+	if(single_pass_size % SPI_FLASH_SEC_SIZE !=0){
+		uint32_t temp_single_pass_size = single_pass_size-(single_pass_size % SPI_FLASH_SEC_SIZE);
+		ESP_LOGW(TAG,"Invalid erase block size of %u. Value should be a multiple of %d and will be adjusted to %u.", single_pass_size, SPI_FLASH_SEC_SIZE,temp_single_pass_size);
+		single_pass_size=temp_single_pass_size;
 	}
-	ESP_LOGI(TAG,"Erasing flash ");
-	num_passes=ota_partition->size/OTA_FLASH_ERASE_BLOCK;
-	remain_size=ota_partition->size-(num_passes*OTA_FLASH_ERASE_BLOCK);
-
+	ESP_LOGI(TAG,"Erasing flash partition of size %u in blocks of %d bytes", ota_partition->size, single_pass_size);
+	num_passes=ota_partition->size/single_pass_size;
+	remain_size=ota_partition->size-(num_passes*single_pass_size);
+	ESP_LOGI(TAG,"Erasing in %d passes with blocks of %d bytes ", num_passes,single_pass_size);
 	for(uint16_t i=0;i<num_passes;i++){
-		err=esp_partition_erase_range(ota_partition, 0, ota_partition->size);
 		ESP_LOGD(TAG,"Erasing flash (%u%%)",i/num_passes);
-		triggerStatusJsonRefresh(i%5==0?true:false,"Erasing flash (%u/%u)",i,num_passes);
-		taskYIELD();
+		ESP_LOGD(TAG,"Pass %d of %d, with chunks of %d bytes, from %d to %d", i+1, num_passes,single_pass_size,i*single_pass_size,i*single_pass_size+single_pass_size);
+		err=esp_partition_erase_range(ota_partition, i*single_pass_size, single_pass_size);
 		if(err!=ESP_OK) return err;
+//		triggerStatusJsonRefresh(i%10==0?true:false,"Erasing flash (%u/%u)",i,num_passes);
+		if(i%2) {
+			triggerStatusJsonRefresh(false,"Erasing flash (%u/%u)",i,num_passes);
+		}
+		vTaskDelay(200/ portTICK_PERIOD_MS);  // wait here for a short amount of time.  This will help with reducing WDT errors
 	}
 	if(remain_size>0){
 		err=esp_partition_erase_range(ota_partition, ota_partition->size-remain_size, remain_size);
 		if(err!=ESP_OK) return err;
 	}
-	triggerStatusJsonRefresh(false,"Erasing flash (100%%)");
+	triggerStatusJsonRefresh(true,"Erasing flash complete.");
 	taskYIELD();
 	return ESP_OK;
 }
 
-void ota_task(void *pvParameter)
+static bool process_again(int status_code)
+{
+    switch (status_code) {
+        case HttpStatus_MovedPermanently:
+        case HttpStatus_Found:
+        case HttpStatus_Unauthorized:
+            return true;
+        default:
+            return false;
+    }
+    return false;
+}
+static esp_err_t _http_handle_response_code(esp_http_client_handle_t http_client, int status_code)
+{
+    esp_err_t err;
+    if (status_code == HttpStatus_MovedPermanently || status_code == HttpStatus_Found) {
+    	ESP_LOGW(TAG, "Handling HTTP redirection. ");
+        err = esp_http_client_set_redirection(http_client);
+        if (err != ESP_OK) {
+            ESP_LOGE(TAG, "URL redirection Failed. %s", esp_err_to_name(err));
+            return err;
+        }
+    } else if (status_code == HttpStatus_Unauthorized) {
+    	ESP_LOGW(TAG, "Handling Unauthorized. ");
+        esp_http_client_add_auth(http_client);
+    }
+    ESP_LOGD(TAG, "Redirection done, checking if we need to read the data. ");
+    if (process_again(status_code)) {
+    	//char * local_buff = heap_caps_malloc(ota_config.buffer_size, MALLOC_CAP_INTERNAL);
+    	char * local_buff = malloc(ota_config.buffer_size+1);
+    	if(local_buff==NULL){
+    		ESP_LOGE(TAG,"Failed to allocate internal memory buffer for http processing");
+    		return ESP_ERR_NO_MEM;
+    	}
+        while (1) {
+        	ESP_LOGD(TAG, "Reading data chunk. ");
+            int data_read = esp_http_client_read(http_client, local_buff, ota_config.buffer_size);
+            if (data_read < 0) {
+                ESP_LOGE(TAG, "Error: SSL data read error");
+                err= ESP_FAIL;
+                break;
+            } else if (data_read == 0) {
+            	ESP_LOGD(TAG, "No more data. ");
+            	err= ESP_OK;
+            	break;
+            }
+        }
+        FREE_RESET(local_buff);
+    }
+
+    return err;
+}
+static esp_err_t _http_connect(esp_http_client_handle_t http_client)
 {
-	char * passedURL=(char *)pvParameter;
+    esp_err_t err = ESP_FAIL;
+    int status_code, header_ret;
+    do {
+    	ESP_LOGD(TAG, "connecting the http client. ");
+        err = esp_http_client_open(http_client, 0);
+        if (err != ESP_OK) {
+            ESP_LOGE(TAG, "Failed to open HTTP connection: %s", esp_err_to_name(err));
+            return err;
+        }
+        ESP_LOGD(TAG, "Fetching headers");
+        header_ret = esp_http_client_fetch_headers(http_client);
+        if (header_ret < 0) {
+        	// Error found
+            return header_ret;
+        }
+        ESP_LOGD(TAG, "HTTP Header fetch completed, found content length of %d",header_ret);
+        status_code = esp_http_client_get_status_code(http_client);
+        ESP_LOGD(TAG, "HTTP status code was %d",status_code);
 
-	ota_status.bInitialized = true;
+
+
+        err = _http_handle_response_code(http_client, status_code);
+        if (err != ESP_OK) {
+            return err;
+        }
+    } while (process_again(status_code));
+    return err;
+}
+void ota_task_cleanup(const char * message, ...){
+	ota_status.bOTAThreadStarted=false;
+	if(message!=NULL){
+
+	    va_list args;
+	    va_start(args, message);
+		triggerStatusJsonRefresh(true,message, args);
+	    va_end(args);
+	    ESP_LOGE(TAG, "%s",ota_status.status_text);
+	}
+	FREE_RESET(ota_status.redirected_url);
+	FREE_RESET(ota_status.current_url);
+	FREE_RESET(ota_write_data);
+	if(ota_http_client!=NULL) {
+		esp_http_client_cleanup(ota_http_client);
+		ota_http_client=NULL;
+	}
+	ota_status.bOTAStarted = false;
+	task_fatal_error();
+}
+void ota_task(void *pvParameter)
+{
+	esp_err_t err = ESP_OK;
+	size_t buffer_size = BUFFSIZE;
 	ESP_LOGD(TAG, "HTTP ota Thread started");
-	triggerStatusJsonRefresh(true,"Initializing...");
-	ota_status.bRedirectFound=false;
-	if(passedURL==NULL || strlen(passedURL)==0){
-		ESP_LOGE(TAG,"HTTP OTA called without a url");
-		triggerStatusJsonRefresh(true,"Updating needs a URL!");
-		ota_status.bOTAThreadStarted=false;
-		vTaskDelete(NULL);
-		return ;
+    const esp_partition_t *configured = esp_ota_get_boot_partition();
+    const esp_partition_t *running = esp_ota_get_running_partition();
+    const esp_partition_t * update_partition = esp_ota_get_next_update_partition(NULL);
+    ESP_LOGI(TAG, "esp_ota_get_next_update_partition returned : partition [%s] subtype %d at offset 0x%x",
+    			update_partition->label, update_partition->subtype, update_partition->address);
+
+    if (configured != running) {
+        ESP_LOGW(TAG, "Configured OTA boot partition at offset 0x%08x, but running from offset 0x%08x", configured->address, running->address);
+        ESP_LOGW(TAG, "(This can happen if either the OTA boot data or preferred boot image become corrupted somehow.)");
+    }
+    ESP_LOGI(TAG, "Running partition [%s] type %d subtype %d (offset 0x%08x)", running->label, running->type, running->subtype, running->address);
+    _printMemStats();
+
+
+	ESP_LOGI(TAG,"Initializing OTA configuration");
+	err = init_config(pvParameter);
+	if(err!=ESP_OK){
+		ota_task_cleanup("Error: Failed to initialize OTA.");
+		return;
 	}
-	ota_status.current_url= strdup(passedURL);
-	FREE_RESET(pvParameter);
 
+	/* Locate and erase ota application partition */
 	ESP_LOGW(TAG,"****************  Expecting WATCHDOG errors below during flash erase. This is OK and not to worry about **************** ");
 	triggerStatusJsonRefresh(true,"Erasing OTA partition");
-	esp_err_t err=_erase_last_boot_app_partition();
+	esp_partition_t *ota_partition = _get_ota_partition(ESP_PARTITION_SUBTYPE_APP_OTA_0);
+	if(ota_partition == NULL){
+		ESP_LOGE(TAG,"Unable to locate OTA application partition. ");
+        ota_task_cleanup("Error: OTA application partition not found. (%s)",esp_err_to_name(err));
+        return;
+	}
+	_printMemStats();
+	err=_erase_last_boot_app_partition(ota_partition);
 	if(err!=ESP_OK){
-		ESP_LOGE(TAG,"Unable to erase last APP partition. Error: %s",esp_err_to_name(err));
-		FREE_RESET(ota_status.current_url);
-		FREE_RESET(ota_status.redirected_url);
-
-	    vTaskDelete(NULL);
+		ota_task_cleanup("Error: Unable to erase last APP partition. (%s)",esp_err_to_name(err));
+		return;
 	}
 
-	ESP_LOGI(TAG,"Calling esp_https_ota");
-	init_config(&ota_config,ota_status.bRedirectFound?ota_status.redirected_url:ota_status.current_url);
+	_printMemStats();
 	ota_status.bOTAStarted = true;
 	triggerStatusJsonRefresh(true,"Starting OTA...");
-	err = esp_https_ota(&ota_config);
+    ota_http_client = esp_http_client_init(&ota_config);
+    if (ota_http_client == NULL) {
+        ota_task_cleanup("Error: Failed to initialize HTTP connection.");
+        return;
+    }
+    _printMemStats();
+    // Open the http connection and follow any redirection
+    err = _http_connect(ota_http_client);
+    if (err != ESP_OK) {
+       ota_task_cleanup("Error: HTTP Start read failed. (%s)",esp_err_to_name(err));
+       return;
+    }
+
+    _printMemStats();
+
+    esp_ota_handle_t update_handle = 0 ;
+    int binary_file_length = 0;
+
+    /*deal with all receive packet*/
+    bool image_header_was_checked = false;
+    while (1) {
+        int data_read = esp_http_client_read(ota_http_client, ota_write_data, buffer_size);
+        if (data_read < 0) {
+            ota_task_cleanup("Error: Data read error");
+            return;
+        } else if (data_read > 0) {
+        	if (image_header_was_checked == false) {
+                esp_app_desc_t new_app_info;
+                if (data_read > sizeof(esp_image_header_t) + sizeof(esp_image_segment_header_t) + sizeof(esp_app_desc_t)) {
+                    // check current version with downloading
+                    memcpy(&new_app_info, &ota_write_data[sizeof(esp_image_header_t) + sizeof(esp_image_segment_header_t)], sizeof(esp_app_desc_t));
+                    ESP_LOGI(TAG, "New firmware version: %s", new_app_info.version);
+
+                    esp_app_desc_t running_app_info;
+                    if (esp_ota_get_partition_description(running, &running_app_info) == ESP_OK) {
+                        ESP_LOGI(TAG, "Running recovery version: %s", running_app_info.version);
+                    }
+
+                    const esp_partition_t* last_invalid_app = esp_ota_get_last_invalid_partition();
+                    esp_app_desc_t invalid_app_info;
+                    if (esp_ota_get_partition_description(last_invalid_app, &invalid_app_info) == ESP_OK) {
+                        ESP_LOGI(TAG, "Last invalid firmware version: %s", invalid_app_info.version);
+                    }
+
+                    // check current version with last invalid partition
+//                    if (last_invalid_app != NULL) {
+//                        if (memcmp(invalid_app_info.version, new_app_info.version, sizeof(new_app_info.version)) == 0) {
+//                            ESP_LOGW(TAG, "New version is the same as invalid version.");
+//                            ESP_LOGW(TAG, "Previously, there was an attempt to launch the firmware with %s version, but it failed.", invalid_app_info.version);
+//                            ESP_LOGW(TAG, "The firmware has been rolled back to the previous version.");
+//                    		  ota_task_cleanup("esp_ota_begin failed (%s)", esp_err_to_name(err));
+//                        }
+//                    }
+
+                    if (memcmp(new_app_info.version, running_app_info.version, sizeof(new_app_info.version)) == 0) {
+                        ESP_LOGW(TAG, "Current running version is the same as a new.");
+                    }
+
+                    image_header_was_checked = true;
+
+                    // Call OTA Begin with a small partition size - this drives the erase operation which was already done;
+                    err = esp_ota_begin(ota_partition, 512, &update_handle);
+                    if (err != ESP_OK) {
+                        ota_task_cleanup("esp_ota_begin failed (%s)", esp_err_to_name(err));
+                        return;
+                    }
+					ESP_LOGD(TAG, "esp_ota_begin succeeded");
+                } else {
+                    ota_task_cleanup("Error: Binary file too large for the current partition");
+                    return;
+                }
+            }
+            err = esp_ota_write( update_handle, (const void *)ota_write_data, data_read);
+            if (err != ESP_OK) {
+                ota_task_cleanup("Error: OTA Partition write failure. (%s)",esp_err_to_name(err));
+                return;
+            }
+            binary_file_length += data_read;
+            ESP_LOGD(TAG, "Written image length %d", binary_file_length);
+			ota_status.ota_actual_len=binary_file_length;
+			if(ota_get_pct_complete()%5 == 0) ota_status.newpct = ota_get_pct_complete();
+			if(ota_status.lastpct!=ota_status.newpct ) {
+				gettimeofday(&tv, NULL);
+				uint32_t elapsed_ms= (tv.tv_sec-ota_status.OTA_start.tv_sec )*1000+(tv.tv_usec-ota_status.OTA_start.tv_usec)/1000;
+				ESP_LOGI(TAG,"OTA progress : %d/%d (%d pct), %d KB/s", ota_status.ota_actual_len, ota_status.ota_total_len, ota_status.newpct, elapsed_ms>0?ota_status.ota_actual_len*1000/elapsed_ms/1024:0);
+				triggerStatusJsonRefresh(true,"Downloading & writing update.");
+				ota_status.lastpct=ota_status.newpct;
+			}
+			taskYIELD();
+
+        } else if (data_read == 0) {
+            ESP_LOGI(TAG, "Connection closed");
+            break;
+        }
+    }
+
+    ESP_LOGI(TAG, "Total Write binary data length: %d", binary_file_length);
+    if (ota_status.ota_total_len != binary_file_length) {
+        ota_task_cleanup("Error: Error in receiving complete file");
+        return;
+    }
+    _printMemStats();
+
+    err = esp_ota_end(update_handle);
+    if (err != ESP_OK) {
+        ota_task_cleanup("Error: %s",esp_err_to_name(err));
+        return;
+     }
+    _printMemStats();
+    err = esp_ota_set_boot_partition(ota_partition);
     if (err == ESP_OK) {
+    	ESP_LOGI(TAG,"OTA Process completed successfully!");
     	triggerStatusJsonRefresh(true,"Success!");
+    	vTaskDelay(1500/ portTICK_PERIOD_MS);  // wait here to give the UI a chance to refresh
         esp_restart();
     } else {
-    	triggerStatusJsonRefresh(true,"Error: %s",esp_err_to_name(err));
-    	wifi_manager_refresh_ota_json();
-        ESP_LOGE(TAG, "Firmware upgrade failed with error : %s", esp_err_to_name(err));
-        ota_status.bOTAThreadStarted=false;
+        ota_task_cleanup("Error: Unable to update boot partition [%s]",esp_err_to_name(err));
+        return;
     }
-	FREE_RESET(ota_status.current_url);
-	FREE_RESET(ota_status.redirected_url);
-
-    vTaskDelete(NULL);
+    ota_task_cleanup(NULL);
+    return;
 }
 
 esp_err_t process_recovery_ota(const char * bin_url){
 	int ret = 0;
+	uint16_t stack_size, task_priority;
     if(ota_status.bOTAThreadStarted){
 		ESP_LOGE(TAG,"OTA Already started. ");
 		return ESP_FAIL;
@@ -329,11 +557,33 @@ esp_err_t process_recovery_ota(const char * bin_url){
 #define OTA_CORE 0
 #warning "OTA will run on core 0"
 #else
-#warning "OTA will run on core 1"
+#pragma message "OTA will run on core 1"
 #define OTA_CORE 1
 #endif
     ESP_LOGI(TAG, "Starting ota on core %u for : %s", OTA_CORE,urlPtr);
-    ret=xTaskCreatePinnedToCore(&ota_task, "ota_task", 1024*20, (void *)urlPtr, ESP_TASK_MAIN_PRIO+1, NULL, OTA_CORE);
+    char * num_buffer=config_alloc_get(NVS_TYPE_STR, "ota_stack");
+  	if(num_buffer!=NULL) {
+  		stack_size= atol(num_buffer);
+  		free(num_buffer);
+  		num_buffer=NULL;
+  	}
+  	else {
+		ESP_LOGW(TAG,"OTA stack size config not found");
+  		stack_size = OTA_STACK_SIZE;
+  	}
+  	num_buffer=config_alloc_get(NVS_TYPE_STR, "ota_prio");
+	if(num_buffer!=NULL) {
+		task_priority= atol(num_buffer);
+		free(num_buffer);
+		num_buffer=NULL;
+	}
+	else {
+		ESP_LOGW(TAG,"OTA task priority not found");
+		task_priority= OTA_TASK_PRIOTITY;
+  	}
+  	ESP_LOGD(TAG,"OTA task stack size %d, priority %d (%d %s ESP_TASK_MAIN_PRIO)",stack_size , task_priority, abs(task_priority-ESP_TASK_MAIN_PRIO), task_priority-ESP_TASK_MAIN_PRIO>0?"above":"below");
+    ret=xTaskCreatePinnedToCore(&ota_task, "ota_task", stack_size , (void *)urlPtr, task_priority, NULL, OTA_CORE);
+    //ret=xTaskCreate(&ota_task, "ota_task", 1024*20, (void *)urlPtr, ESP_TASK_MAIN_PRIO+2, NULL);
     if (ret != pdPASS)  {
             ESP_LOGI(TAG, "create thread %s failed", "ota_task");
             return ESP_FAIL;
@@ -341,14 +591,22 @@ esp_err_t process_recovery_ota(const char * bin_url){
     return ESP_OK;
 }
 
-esp_err_t start_ota(const char * bin_url, bool bFromAppMain)
+esp_err_t start_ota(const char * bin_url)
 {
-//	uint8_t * get_nvs_value_alloc_default(NVS_TYPE_BLOB, "certs", server_cert_pem_start , server_cert_pem_end-server_cert_pem_start);
+//	uint8_t * config_alloc_get_default(NVS_TYPE_BLOB, "certs", server_cert_pem_start , server_cert_pem_end-server_cert_pem_start);
 #if RECOVERY_APPLICATION
 	return process_recovery_ota(bin_url);
 #else
 		ESP_LOGW(TAG, "Called to update the firmware from url: %s",bin_url);
-		store_nvs_value(NVS_TYPE_STR, "fwurl", bin_url);
+		if(config_set_value(NVS_TYPE_STR, "fwurl", bin_url) != ESP_OK){
+			ESP_LOGE(TAG,"Failed to save the OTA url into nvs cache");
+			return ESP_FAIL;
+		}
+
+		if(!wait_for_commit()){
+			ESP_LOGW(TAG,"Unable to commit configuration. ");
+		}
+
 		ESP_LOGW(TAG, "Rebooting to recovery to complete the installation");
 	return guided_factory();
 	return ESP_OK;

+ 23 - 1
components/squeezelite-ota/squeezelite-ota.h

@@ -7,13 +7,35 @@
 
 #pragma once
 #include "esp_attr.h"
+#include "esp_image_format.h"
+#include "esp_ota_ops.h"
+
 #if RECOVERY_APPLICATION
 #define CODE_RAM_LOCATION
+#define RECOVERY_IRAM_FUNCTION IRAM_ATTR
 #else
+#define RECOVERY_IRAM_FUNCTION
 #define CODE_RAM_LOCATION
 #endif
 
-esp_err_t start_ota(const char * bin_url, bool bFromAppMain);
+
+
+// ERASE BLOCK needs to be a multiple of sector size. If a different multiple is passed
+// the OTA process will adjust. Here, we need to strike the balance between speed and
+// stability.  The larger the blocks, the faster the erase will be, but the more likely
+// the system will throw WDT while the flash chip is locked and the more likely
+// the OTA process will derail
+#define OTA_FLASH_ERASE_BLOCK (uint32_t)249856
+
+// We're running the OTA without squeezelite in the background, so we can set a comfortable
+// amount of stack to avoid overflows.
+#define OTA_STACK_SIZE 10240
+
+// To speed up processing, we set this priority to a number that is higher than normal
+// tasks
+#define OTA_TASK_PRIOTITY 6
+
+esp_err_t start_ota(const char * bin_url);
 const char * ota_get_status();
 uint8_t ota_get_pct_complete();
 

+ 5 - 0
components/squeezelite/decode_external.c

@@ -35,6 +35,10 @@ extern struct buffer *outputbuf;
 // this is the only system-wide loglevel variable
 extern log_level loglevel;
 
+// not great to have these here, but they should not be in embedded.h
+bool enable_bt_sink;
+bool enable_airplay;
+
 #define RAOP_OUTPUT_SIZE (RAOP_SAMPLE_RATE * 2 * 2 * 2 * 1.2)
 
 static raop_event_t	raop_state;
@@ -213,6 +217,7 @@ void raop_sink_cmd_handler(raop_event_t event, void *param)
 			break;
 		}
 		case RAOP_SETUP:
+			// we need a fair bit of space for RTP process
 			_buf_resize(outputbuf, RAOP_OUTPUT_SIZE);
 			LOG_INFO("resizing buffer %u", outputbuf->size);
 			break;

+ 3 - 5
components/squeezelite/embedded.h

@@ -1,6 +1,6 @@
 #ifndef EMBEDDED_H
 #define EMBEDDED_H
-#include "esp_system.h"
+
 #include <inttypes.h>
 
 /* 	must provide 
@@ -12,6 +12,7 @@
 		- exit
 		- gettime_ms
 		- BASE_CAP
+		- EXT_BSS 		
 	recommended to add platform specific include(s) here
 */	
 	
@@ -25,6 +26,7 @@
 #define IR_THREAD_STACK_SIZE      6 * 1024
 
 //#define BASE_CAP "Model=squeezelite,AccuratePlayPoints=0,HasDigitalOut=1,HasPolarityInversion=1,Firmware=" VERSION	
+#define EXT_BSS __attribute__((section(".ext_ram.bss"))) 
 
 typedef int16_t   s16_t;
 typedef int32_t   s32_t;
@@ -41,10 +43,6 @@ uint32_t 	_gettime_ms_(void);
 int			pthread_create_name(pthread_t *thread, _CONST pthread_attr_t  *attr, 
 				   void *(*start_routine)( void * ), void *arg, char *name);
 			
-// these are here as they can be #define to nothing
-extern bool enable_bt_sink;
-extern bool enable_airplay;
-
 void 		register_external(void);
 void 		deregister_external(void);
 				   

+ 6 - 5
components/squeezelite/helix-aac.c

@@ -45,6 +45,7 @@ struct helixaac {
 	HAACDecoder hAac;
 	u8_t type;
 	u8_t *write_buf;
+	u8_t *wrap_buf;
 	// following used for mp4 only
 	u32_t consume;
 	u32_t pos;
@@ -418,13 +419,11 @@ static decode_state helixaac_decode(void) {
 	}
 
 	if (bytes_wrap < WRAPBUF_LEN && bytes_total > WRAPBUF_LEN) {
-
 		// make a local copy of frames which may have wrapped round the end of streambuf
-		static u8_t buf[WRAPBUF_LEN];
-		memcpy(buf, streambuf->readp, bytes_wrap);
-		memcpy(buf + bytes_wrap, streambuf->buf, WRAPBUF_LEN - bytes_wrap);
+		memcpy(a->wrap_buf, streambuf->readp, bytes_wrap);
+		memcpy(a->wrap_buf + bytes_wrap, streambuf->buf, WRAPBUF_LEN - bytes_wrap);
 		
-		sptr = buf;
+		sptr = a->wrap_buf;
 		bytes = bytes_wrap = WRAPBUF_LEN;
 	} else {
 
@@ -590,6 +589,7 @@ static void helixaac_open(u8_t size, u8_t rate, u8_t chan, u8_t endianness) {
 	} else {
 		a->hAac = HAAC(a, InitDecoder);	
 		a->write_buf = malloc(FRAME_BUF * BYTES_PER_FRAME);
+		a->wrap_buf = malloc(WRAPBUF_LEN);
 	}
 }
 
@@ -605,6 +605,7 @@ static void helixaac_close(void) {
 		a->stsc = NULL;
 	}
 	free(a->write_buf);
+	free(a->wrap_buf);
 }
 
 static bool load_helixaac() {

+ 23 - 8
components/squeezelite/output_i2s.c

@@ -109,7 +109,8 @@ extern struct outputstate output;
 extern struct buffer *streambuf;
 extern struct buffer *outputbuf;
 extern u8_t *silencebuf;
-extern bool jack_mutes_amp;
+
+bool jack_mutes_amp = false;
 
 static log_level loglevel;
 static bool running, isI2SStarted;
@@ -120,6 +121,7 @@ static u8_t *obuf;
 static frames_t oframes;
 static bool spdif;
 static size_t dma_buf_frames;
+static int jack_status = -1;		// 0 = inserted
 
 DECLARE_ALL_MIN_MAX;
 
@@ -154,7 +156,7 @@ static void spdif_convert(ISAMPLE_T *src, size_t frames, u32_t *dst, size_t *cou
 
 #define I2C_PORT	0
 #define I2C_ADDR	0x4c
-#define VOLUME_GPIO	33
+#define VOLUME_GPIO	14
 #define JACK_GPIO	34
 
 struct tas575x_cmd_s {
@@ -196,6 +198,7 @@ void output_init_i2s(log_level level, char *device, unsigned output_buf_size, ch
 	loglevel = level;
 	
 #ifdef TAS575x
+	LOG_INFO("Initializing TAS575x ");
 	gpio_pad_select_gpio(JACK_GPIO);
 	gpio_set_direction(JACK_GPIO, GPIO_MODE_INPUT);
 			
@@ -358,7 +361,11 @@ void output_close_i2s(void) {
  */
 bool output_volume_i2s(unsigned left, unsigned right) {
 #ifdef TAS575x	
-	if (!spdif) gpio_set_level(VOLUME_GPIO, left || right);
+
+	if (!spdif) {
+		LOG_INFO("Setting TAS575x volume GPIO");
+		gpio_set_level(VOLUME_GPIO, left || right);
+	}
 #endif	
  return false;	
 } 
@@ -433,10 +440,18 @@ static void *output_thread_i2s() {
 			
 		TIME_MEASUREMENT_START(timer_start);
 		
-		LOCK;
-		if(jack_mutes_amp){
-			// todo: implement some muting logic
+#ifdef TAS575x
+		// handle jack insertion as a polling function (to avoid to have to do de-bouncing)
+		if (gpio_get_level(JACK_GPIO) != jack_status) {
+			jack_status = gpio_get_level(JACK_GPIO);
+			if (jack_mutes_amp) {
+				//gpio_set_level(VOLUME_GPIO, jack_status);
+				LOG_INFO("Changing jack status %d", jack_status);
+			}	
 		}
+#endif
+		LOCK;
+		
 		// manage led display
 		if (state != output.state) {
 			LOG_INFO("Output state is %d", output.state);
@@ -459,7 +474,7 @@ static void *output_thread_i2s() {
 		} else if (output.state == OUTPUT_STOPPED) {
 			synced = false;
 		}
-		
+					
 		oframes = 0;
 		output.updated = gettime_ms();
 		output.frames_played_dmp = output.frames_played;
@@ -488,7 +503,7 @@ static void *output_thread_i2s() {
 		}
 		
 		UNLOCK;
-		
+				
 		// now send all the data
 		TIME_MEASUREMENT_START(timer_start);
 		

+ 4 - 3
components/squeezelite/slimproto.c

@@ -533,14 +533,14 @@ static void process(u8_t *pack, int len) {
 static bool running;
 
 static void slimproto_run() {
-	static u8_t buffer[MAXBUF];
+	static u8_t EXT_BSS buffer[MAXBUF];
 	int  expect = 0;
 	int  got    = 0;
 	u32_t now;
 	static u32_t last = 0;
 	event_handle ehandles[2];
 	int timeouts = 0;
-
+	
 	set_readwake_handles(ehandles, sock, wake_e);
 
 	while (running && !new_server) {
@@ -623,12 +623,13 @@ static void slimproto_run() {
 			bool _start_output = false;
 			decode_state _decode_state;
 			disconnect_code disconnect_code;
-			static char header[MAX_HEADER];
+			static char EXT_BSS header[MAX_HEADER];
 			size_t header_len = 0;
 #if IR
 			bool _sendIR   = false;
 			u32_t ir_code, ir_ts;
 #endif
+			
 			last = now;
 
 			LOCK_S;

+ 4 - 0
components/squeezelite/squeezelite.h

@@ -442,6 +442,10 @@ void _wake_create(event_event*);
 #define wake_close(e) CloseHandle(e)
 #endif
 
+#ifndef EXT_BSS
+#define EXT_BSS
+#endif
+
 // printf/scanf formats for u64_t
 #if (LINUX && __WORDSIZE == 64) || (FREEBSD && __LP64__)
 #define FMT_u64 "%lu"

+ 3 - 2
components/wifi-manager/CMakeLists.txt

@@ -1,7 +1,8 @@
-idf_component_register(SRCS "dns_server.c" "http_server.c" "json.c" "wifi_manager.c"
+idf_component_register(SRCS "dns_server.c" "http_server.c" "wifi_manager.c"
                        INCLUDE_DIRS .
                        REQUIRES esp_common 
-                       PRIV_REQUIRES newlib freertos  spi_flash nvs_flash mdns pthread wpa_supplicant cmd_system json
+                       PRIV_REQUIRES newlib freertos  spi_flash nvs_flash mdns pthread wpa_supplicant cmd_system 
                        EMBED_FILES style.css code.js index.html bootstrap.min.css.gz jquery.min.js.gz popper.min.js.gz bootstrap.min.js.gz
 
 )
+ 

+ 40 - 16
components/wifi-manager/code.js

@@ -28,7 +28,7 @@ var checkStatusInterval = null;
 
 var StatusIntervalActive = false;
 var RefreshAPIIntervalActive = false;
-
+var LastRecoveryState=null;
 var output = '';
 
 function stopCheckStatusInterval(){
@@ -54,7 +54,7 @@ function startCheckStatusInterval(){
 
 function startRefreshAPInterval(){
     RefreshAPIIntervalActive = true;
-    refreshAPInterval = setTimeout(refreshAP(false), 2800);
+    refreshAPInterval = setTimeout(refreshAP(false), 4500); // leave enough time for the initial scan
 }
 
 function RepeatCheckStatusInterval(){
@@ -176,6 +176,16 @@ $(document).ready(function(){
         $( "#wifi" ).slideDown( "fast", function() {})
     });
 
+    $("input#show-nvs").on("click", function() {
+        this.checked=this.checked?1:0;
+        if(this.checked){
+            $('a[href^="#tab-nvs"]').show();
+        } else {
+            $('a[href^="#tab-nvs"]').hide();
+        }
+
+    });
+    
     $("input#autoexec-cb").on("click", function() {
         var data = { 'timestamp': Date.now() };
         autoexec = (this.checked)?1:0;
@@ -200,7 +210,7 @@ $(document).ready(function(){
                 console.log('sent config JSON with headers:', autoexec);
                 console.log('now triggering reboot');
                 $.ajax({
-                    url: '/reboot.json',
+                    url: '/reboot_ota.json',
                     dataType: 'text',
                     method: 'POST',
                     cache: false,
@@ -279,14 +289,17 @@ $(document).ready(function(){
             var val = $(this).val();
             if (key != '') {
                 headers["X-Custom-"+key] = val;
-                data[key] = val;
+                data[key] = {};
+                data[key].value = val;
+                data[key].type = 33;
             }
         });
         var key = $("#nvs-new-key").val();
         var val = $("#nvs-new-value").val();
         if (key != '') {
             headers["X-Custom-"+key] = val;
-            data[key] = val;
+            data[key] = {};
+            data[key].value = val;
         }
         $.ajax({
             url: '/config.json',
@@ -429,7 +442,10 @@ $(document).ready(function(){
     $('#boot-button').on("click", function(){
         enableStatusTimer = true;
     });
-
+    $('#reboot-button').on("click", function(){
+        enableStatusTimer = true;
+    });
+    
     $('#updateAP').on("click", function(){
         refreshAP(true);
         console.log("refresh AP");
@@ -441,7 +457,7 @@ $(document).ready(function(){
 
     //start timers
     startCheckStatusInterval();
-    startRefreshAPInterval();
+    //startRefreshAPInterval();
 
     $('[data-toggle="tooltip"]').tooltip({
         html: true,
@@ -572,7 +588,7 @@ function checkStatus(){
 
                     //update wait screen
                     $( "#loading" ).hide();
-                    $( "#connect-success" ).append("<p>Your IP address now is: " + text(data["ip"]) + "</p>");
+                    $( "#connect-success" ).text("Your IP address now is: " + data["ip"] );
                     $( "#connect-success" ).show();
                     $( "#connect-fail" ).hide();
 
@@ -628,23 +644,31 @@ function checkStatus(){
             enableStatusTimer = true;
         }
         if (data.hasOwnProperty('recovery')) {
+            if(LastRecoveryState != data["recovery"]){
+                LastRecoveryState = data["recovery"];
+                $("input#show-nvs")[0].checked=LastRecoveryState==1?true:false;
+            }
+            if($("input#show-nvs")[0].checked){
+                    $('a[href^="#tab-nvs"]').show();
+            } else{
+                $('a[href^="#tab-nvs"]').hide();
+            }
+
             if (data["recovery"] === 1) {
                 recovery = true;
                 $("#otadiv").show();
                 $('a[href^="#tab-audio"]').hide();
                 $('a[href^="#tab-gpio"]').show();
-                $('a[href^="#tab-nvs"]').show();
                 $("footer.footer").removeClass('sl');
                 $("footer.footer").addClass('recovery');
                 $("#boot-button").html('Reboot');
-                $("#boot-form").attr('action', '/reboot.json');
+                $("#boot-form").attr('action', '/reboot_ota.json');
                 enableStatusTimer = true;
             } else {
                 recovery = false;
                 $("#otadiv").hide();
                 $('a[href^="#tab-audio"]').show();
                 $('a[href^="#tab-gpio"]').hide();
-                $('a[href^="#tab-nvs"]').hide();
                 $("footer.footer").removeClass('recovery');
                 $("footer.footer").addClass('sl');
                 $("#boot-button").html('Recovery');
@@ -697,16 +721,16 @@ function getConfig() {
         Object.keys(data).sort().forEach(function(key, i) {
             if (data.hasOwnProperty(key)) {
                 if (key == 'autoexec') {
-                    if (data["autoexec"] === "1") {
+                    if (data["autoexec"].value === "1") {
                         $("#autoexec-cb")[0].checked=true;
                     } else {
                         $("#autoexec-cb")[0].checked=false;
                     }
                 } else if (key == 'autoexec1') {
-                    $("textarea#autoexec1").val(data[key]);
+                    $("textarea#autoexec1").val(data[key].value);
                 } else if (key == 'host_name') {
-                    $("dhcp-name1").val(data[key]);
-                    $("dhcp-name2").val(data[key]);
+                    $("dhcp-name1").val(data[key].value);
+                    $("dhcp-name2").val(data[key].value);
                 }
 
                 $("tbody#nvsTable").append(
@@ -717,7 +741,7 @@ function getConfig() {
                         "</td>"+
                     "</tr>"
                 );
-                $("input#"+key).val(data[key]);
+                $("input#"+key).val(data[key].value);
             }
         });
         $("tbody#nvsTable").append(

+ 2 - 1
components/wifi-manager/component.mk

@@ -7,7 +7,8 @@
 # please read the SDK documents if you need to do this.
 #
 COMPONENT_EMBED_FILES := style.css code.js index.html bootstrap.min.css.gz jquery.min.js.gz popper.min.js.gz bootstrap.min.js.gz
-CFLAGS += -D LOG_LOCAL_LEVEL=ESP_LOG_DEBUG \
+#CFLAGS += -D LOG_LOCAL_LEVEL=ESP_LOG_DEBUG 
+CFLAGS += -D LOG_LOCAL_LEVEL=ESP_LOG_INFO \
 	-I$(COMPONENT_PATH)/../tools				
 COMPONENT_ADD_INCLUDEDIRS := .
 COMPONENT_ADD_INCLUDEDIRS += $(COMPONENT_PATH)/../tools

+ 162 - 159
components/wifi-manager/http_server.c

@@ -42,14 +42,20 @@ function to process requests, decode URLs, serve files, etc. etc.
 #include "esp_system.h"
 #include "freertos/FreeRTOS.h"
 #include "freertos/task.h"
+#include "config.h"
 
-#define NVS_PARTITION_NAME "nvs"
-#define NUM_BUFFER_LEN 101
+#define HTTP_STACK_SIZE	(5*1024)
 
 /* @brief tag used for ESP serial console messages */
 static const char TAG[] = "http_server";
 /* @brief task handle for the http server */
 static TaskHandle_t task_http_server = NULL;
+static StaticTask_t task_http_buffer;
+#if RECOVERY_APPLICATION
+static StackType_t task_http_stack[HTTP_STACK_SIZE];
+#else
+static StackType_t EXT_RAM_ATTR task_http_stack[HTTP_STACK_SIZE];
+#endif
 SemaphoreHandle_t http_server_config_mutex = NULL;
 
 /**
@@ -86,13 +92,11 @@ const static char http_redirect_hdr_start[] = "HTTP/1.1 302 Found\nLocation: htt
 const static char http_redirect_hdr_end[] = "/\n\n";
 
 
-
-
-
 void http_server_start() {
-
+	ESP_LOGD(TAG,  "http_server_start ");
 	if(task_http_server == NULL) {
-		xTaskCreate(&http_server, "http_server", 1024*5, NULL, WIFI_MANAGER_TASK_PRIORITY, &task_http_server);
+		task_http_server = xTaskCreateStatic( (TaskFunction_t) &http_server, "http_server", HTTP_STACK_SIZE, NULL, 
+										 WIFI_MANAGER_TASK_PRIORITY, task_http_stack, &task_http_buffer);
 	}
 }
 void http_server(void *pvParameters) {
@@ -102,7 +106,7 @@ void http_server(void *pvParameters) {
 	conn = netconn_new(NETCONN_TCP);
 	netconn_bind(conn, IP_ADDR_ANY, 80);
 	netconn_listen(conn);
-	ESP_LOGI(TAG, "HTTP Server listening on 80/tcp");
+	ESP_LOGI(TAG,   "HTTP Server listening on 80/tcp");
 	do {
 		err = netconn_accept(conn, &newconn);
 		if(err == ERR_OK) {
@@ -111,7 +115,7 @@ void http_server(void *pvParameters) {
 		}
 		else
 		{
-			ESP_LOGE(TAG,"Error accepting new connection. Terminating HTTP server");
+			ESP_LOGE(TAG,  "Error accepting new connection. Terminating HTTP server");
 		}
 		taskYIELD();  /* allows the freeRTOS scheduler to take over if needed. */
 	} while(err == ERR_OK);
@@ -147,7 +151,7 @@ char* http_server_search_header(char *request, char *header_name, int *len, char
 	char *ptr = NULL;
 	int currentLength=0;
 
-	ESP_LOGV(TAG, "searching for header name: [%s]", header_name);
+	ESP_LOGV(TAG,   "searching for header name: [%s]", header_name);
 	ptr = strstr(request, header_name);
 
 
@@ -155,23 +159,23 @@ char* http_server_search_header(char *request, char *header_name, int *len, char
 		ret = ptr + strlen(header_name);
 		ptr = ret;
 		currentLength=(int)(ptr-request);
-		ESP_LOGV(TAG, "found string at %d", currentLength);
+		ESP_LOGV(TAG,   "found string at %d", currentLength);
 
 		while (*ptr != '\0' && *ptr != '\n' && *ptr != '\r' && *ptr != ':' && ptr<bufEnd) {
 			ptr++;
 		}
 		if(*ptr==':') {
 			currentLength=(int)(ptr-ret);
-			ESP_LOGV(TAG, "Found parameter name end, length : %d", currentLength);
+			ESP_LOGV(TAG,   "Found parameter name end, length : %d", currentLength);
 			// save the parameter name: the string between header name and ":"
 			*parm_name=malloc(currentLength+1);
 			if(*parm_name==NULL) {
-				ESP_LOGE(TAG, "Unable to allocate memory for new header name");
+				ESP_LOGE(TAG,   "Unable to allocate memory for new header name");
 				return NULL;
 			}
 			memset(*parm_name, 0x00,currentLength+1);
 			strncpy(*parm_name,ret,currentLength);
-			ESP_LOGV(TAG, "Found parameter name : %s ", *parm_name);
+			ESP_LOGV(TAG,   "Found parameter name : %s ", *parm_name);
 			ptr++;
 			while (*ptr == ' ' && ptr<bufEnd) {
 				ptr++;
@@ -186,12 +190,12 @@ char* http_server_search_header(char *request, char *header_name, int *len, char
 		// Terminate value inside its actual buffer so we can treat it as individual string
 		*ptr='\0';
 		currentLength=(int)(ptr-ret);
-		ESP_LOGV(TAG, "Found parameter value end, length : %d, 	value: %s", currentLength,ret );
+		ESP_LOGV(TAG,   "Found parameter value end, length : %d, 	value: %s", currentLength,ret );
 
 		*next_position=++ptr;
 		return ret;
 	}
-	ESP_LOGD(TAG, "No more match for : %s", header_name);
+	ESP_LOGD(TAG,   "No more match for : %s", header_name);
 	return NULL;
 }
 void http_server_send_resource_file(struct netconn *conn,const uint8_t * start, const uint8_t * end, char * content_type,char * encoding) {
@@ -199,7 +203,7 @@ void http_server_send_resource_file(struct netconn *conn,const uint8_t * start,
 	size_t  buff_length= sizeof(http_hdr_template)+strlen(content_type)+strlen(encoding);
 	char * http_hdr=malloc(buff_length);
 	if( http_hdr == NULL) {
-		ESP_LOGE(TAG,"Cound not allocate %d bytes for headers.",buff_length);
+		ESP_LOGE(TAG,  "Cound not allocate %d bytes for headers.",buff_length);
 		netconn_write(conn, http_503_hdr, sizeof(http_503_hdr) - 1, NETCONN_NOCOPY);
 	}
 	else
@@ -207,73 +211,25 @@ void http_server_send_resource_file(struct netconn *conn,const uint8_t * start,
 		memset(http_hdr,0x00,buff_length);
 		snprintf(http_hdr, buff_length-1,http_hdr_template,content_type,len,encoding);
 		netconn_write(conn, http_hdr, strlen(http_hdr), NETCONN_NOCOPY);
-		ESP_LOGD(TAG,"sending response : %s",http_hdr);
+		ESP_LOGD(TAG,  "sending response : %s",http_hdr);
 		netconn_write(conn, start, end - start, NETCONN_NOCOPY);
 		free(http_hdr);
 	}
 }
 
-err_t http_server_nvs_dump(struct netconn *conn, nvs_type_t nvs_type) {
-	nvs_entry_info_t info;
-	char * num_buffer = NULL;
-	cJSON * nvs_json = cJSON_CreateObject();
-	num_buffer = malloc(NUM_BUFFER_LEN);
-	nvs_iterator_t it = nvs_entry_find(settings_partition, NULL, nvs_type);
-	if(it == NULL) {
-		ESP_LOGW(TAG, "No nvs entry found in %s",NVS_PARTITION_NAME );
+err_t http_server_send_config_json(struct netconn *conn) {
+	char * json = config_alloc_get_json(false);
+	if(json!=NULL){
+		ESP_LOGD(TAG,  "config json : %s",json );
+		netconn_write(conn, http_ok_json_no_cache_hdr, sizeof(http_ok_json_no_cache_hdr) - 1, NETCONN_NOCOPY);
+		netconn_write(conn, json, strlen(json), NETCONN_NOCOPY);
+		free(json);
 	}
-	while (it != NULL) {
-		nvs_entry_info(it, &info);
-		memset(num_buffer,0x00,NUM_BUFFER_LEN);
-		if(strstr(info.namespace_name, current_namespace)) {
-			void * value = get_nvs_value_alloc(nvs_type,info.key);
-			if(value==NULL)
-			{
-				ESP_LOGE(TAG,"nvs read failed.");
-				netconn_write(conn, http_503_hdr, sizeof(http_503_hdr) - 1, NETCONN_NOCOPY); //200ok
-				free(num_buffer);
-				cJSON_Delete(nvs_json);
-				return ESP_FAIL;
-			}
-			switch (nvs_type) {
-				case NVS_TYPE_I8:
-					snprintf(num_buffer, NUM_BUFFER_LEN-1, "%i", *(int8_t*)value);
-					break;
-				case NVS_TYPE_I16:
-					snprintf(num_buffer, NUM_BUFFER_LEN-1, "%i", *(int16_t*)value);
-					break;
-				case NVS_TYPE_I32:
-					snprintf(num_buffer, NUM_BUFFER_LEN-1, "%i", *(int32_t*)value);
-					break;
-				case NVS_TYPE_U8:
-					snprintf(num_buffer, NUM_BUFFER_LEN-1, "%u", *(uint8_t*)value);
-					break;
-				case NVS_TYPE_U16:
-					snprintf(num_buffer, NUM_BUFFER_LEN-1, "%u", *(uint16_t*)value);
-					break;
-				case NVS_TYPE_U32:
-					snprintf(num_buffer, NUM_BUFFER_LEN-1, "%u", *(uint32_t*)value);
-					break;
-				case NVS_TYPE_STR:
-					// string will be processed directly below
-					break;
-				case NVS_TYPE_I64:
-				case NVS_TYPE_U64:
-				default:
-					ESP_LOGE(TAG, "nvs type %u not supported", nvs_type);
-					break;
-			}
-			cJSON_AddItemToObject(nvs_json, info.key, cJSON_CreateString((nvs_type==NVS_TYPE_STR)?(char *)value:num_buffer));
-			free(value );
-		}
-		it = nvs_entry_next(it);
+	else{
+		ESP_LOGD(TAG,  "Error retrieving config json string. ");
+		netconn_write(conn, http_503_hdr, sizeof(http_503_hdr) - 1, NETCONN_NOCOPY);
 	}
-	ESP_LOGD(TAG,"config json : %s\n", cJSON_Print(nvs_json));
 
-	netconn_write(conn, http_ok_json_no_cache_hdr, sizeof(http_ok_json_no_cache_hdr) - 1, NETCONN_NOCOPY);
-	netconn_write(conn, cJSON_Print(nvs_json), strlen(cJSON_Print(nvs_json)), NETCONN_NOCOPY);
-	cJSON_Delete(nvs_json);
-	free(num_buffer);
 	return ESP_OK;
 }
 
@@ -288,8 +244,8 @@ void http_server_process_config(struct netconn *conn, 	char *inbuf) {
 //		/* extract the first line of the request */
 //		char *save_ptr = buf;
 //		char *line = strtok_r(save_ptr, new_line, &save_ptr);
-//		ESP_LOGD(TAG,"Processing line %s",line);
-	ESP_LOGD(TAG,"Processing request buffer: \n%s",inbuf);
+//		ESP_LOGD(TAG,  "Processing line %s",line);
+	ESP_LOGD(TAG,  "Processing request buffer: \n%s",inbuf);
 	char *last = NULL;
 	char *ptr = NULL;
 	last = ptr = inbuf;
@@ -302,7 +258,7 @@ void http_server_process_config(struct netconn *conn, 	char *inbuf) {
 			}
 			// terminate the header string
 			if( *(ptr) == '\0' ) {
-				ESP_LOGD(TAG, "End of buffer found");
+				ESP_LOGD(TAG,   "End of buffer found");
 				return;
 			}
 			*ptr = '\0';
@@ -311,21 +267,21 @@ void http_server_process_config(struct netconn *conn, 	char *inbuf) {
 				ptr+=2;
 			}
 			if(ptr==last) {
-				ESP_LOGD(TAG,"Processing body. ");
+				ESP_LOGD(TAG,  "Processing body. ");
 				break;
 			}
 			if(strlen(last)>0) {
-				ESP_LOGD(TAG,"Found Header Line %s ", last);
+				ESP_LOGD(TAG,  "Found Header Line %s ", last);
 				//Content-Type: application/json
 			}
 			else {
-				ESP_LOGD(TAG,"Found end of headers");
+				ESP_LOGD(TAG,  "Found end of headers");
 				bHeaders = false;
 			}
 			last=ptr;
 		}
 		else {
-			//ESP_LOGD(TAG,"Body content: %s", last);
+			//ESP_LOGD(TAG,  "Body content: %s", last);
 			//cJSON * json = cJSON_Parse(last);
 			//cJSON_Delete(json);
 			//todo:  implement body json parsing
@@ -339,13 +295,13 @@ void http_server_process_config(struct netconn *conn, 	char *inbuf) {
 
 void dump_net_buffer(void * buf, u16_t buflen) {
 	char * curbuf = malloc(buflen+1);
-	ESP_LOGD(TAG,"netconn buffer, length=%u",buflen);
+	ESP_LOGV(TAG,  "netconn buffer, length=%u",buflen);
 	if(curbuf==NULL) {
-		ESP_LOGE(TAG,"Unable to show netconn buffer.  Malloc failed");
+		ESP_LOGE(TAG,  "Unable to show netconn buffer.  Malloc failed");
 	}
 	memset(curbuf,0x0, buflen+1);
 	memcpy(curbuf,buf,buflen);
-	ESP_LOGV(TAG,"netconn buffer content:\n%s",curbuf);
+	ESP_LOGV(TAG,  "netconn buffer content:\n%s",curbuf);
 	free(curbuf);
 }
 
@@ -355,39 +311,59 @@ void http_server_netconn_serve(struct netconn *conn) {
 	char *buf = NULL;
 	u16_t buflen;
 	err_t err;
+	ip_addr_t remote_add;
+	u16_t port;
+	ESP_LOGV(TAG,  "Serving page.  Getting device AP address.");
 	const char new_line[2] = "\n";
-
+	char * ap_ip_address= config_alloc_get_default(NVS_TYPE_STR, "ap_ip_address", DEFAULT_AP_IP, 0);
+	if(ap_ip_address==NULL){
+		ESP_LOGE(TAG,  "Unable to retrieve default AP IP Address");
+		netconn_write(conn, http_503_hdr, sizeof(http_503_hdr) - 1, NETCONN_NOCOPY);
+		netconn_close(conn);
+		return;
+	}
+	ESP_LOGV(TAG,  "Getting remote device IP address.");
+	netconn_getaddr(conn,	&remote_add,	&port,	0);
+	char * remote_address = strdup(ip4addr_ntoa(ip_2_ip4(&remote_add)));
+	ESP_LOGD(TAG,  "Local Access Point IP address is: %s. Remote device IP address is %s. Receiving request buffer", ap_ip_address, remote_address);
 	err = netconn_recv(conn, &inbuf);
 	if(err == ERR_OK) {
-
+		ESP_LOGV(TAG,  "Getting data buffer.");
 		netbuf_data(inbuf, (void**)&buf, &buflen);
 		dump_net_buffer(buf, buflen);
-
+		int lenH = 0;
 		/* extract the first line of the request */
 		char *save_ptr = buf;
 		char *line = strtok_r(save_ptr, new_line, &save_ptr);
-		ESP_LOGD(TAG,"http_server_netconn_serve Processing line %s",line);
+		char *temphost = http_server_get_header(save_ptr, "Host: ", &lenH);
+		char * host = malloc(lenH+1);
+		memset(host,0x00,lenH+1);
+		if(lenH>0){
+			strlcpy(host,temphost,lenH+1);
+		}
+		ESP_LOGD(TAG,  "http_server_netconn_serve Host: [%s], host: [%s], Processing line [%s]",remote_address,host,line);
 
 		if(line) {
 
 			/* captive portal functionality: redirect to access point IP for HOST that are not the access point IP OR the STA IP */
-			int lenH = 0;
-			char *host = http_server_get_header(save_ptr, "Host: ", &lenH);
 			const char * host_name=NULL;
 			if((err=tcpip_adapter_get_hostname(TCPIP_ADAPTER_IF_STA, &host_name )) !=ESP_OK) {
-				ESP_LOGE(TAG,"Unable to get host name. Error: %s",esp_err_to_name(err));
+				ESP_LOGE(TAG,  "Unable to get host name. Error: %s",esp_err_to_name(err));
+			}
+			else {
+				ESP_LOGI(TAG,"System host name %s, http requested host: %s.",host_name, host);
 			}
 
 			/* determine if Host is from the STA IP address */
 			wifi_manager_lock_sta_ip_string(portMAX_DELAY);
-			bool access_from_sta_ip = lenH > 0?strstr(host, wifi_manager_get_sta_ip_string()):false;
+			bool access_from_sta_ip = lenH > 0?strcasestr(host, wifi_manager_get_sta_ip_string()):false;
 			wifi_manager_unlock_sta_ip_string();
-			bool access_from_host_name = (host_name!=NULL) && strstr(host, host_name);
+			bool access_from_host_name = (host_name!=NULL) && strcasestr(host,host_name);
 
-			if(lenH > 0 && !strstr(host, DEFAULT_AP_IP) && !(access_from_sta_ip || access_from_host_name)) {
-				ESP_LOGI(TAG,"Redirecting to default AP IP Address : %s", DEFAULT_AP_IP);
+			if(lenH > 0 && !strcasestr(host, ap_ip_address) && !(access_from_sta_ip || access_from_host_name)) {
+				ESP_LOGI(TAG,  "Redirecting host [%s] to AP IP Address : %s",remote_address, ap_ip_address);
 				netconn_write(conn, http_redirect_hdr_start, sizeof(http_redirect_hdr_start) - 1, NETCONN_NOCOPY);
-				netconn_write(conn, DEFAULT_AP_IP, sizeof(DEFAULT_AP_IP) - 1, NETCONN_NOCOPY);
+				netconn_write(conn, ap_ip_address, strlen(ap_ip_address), NETCONN_NOCOPY);
 				netconn_write(conn, http_redirect_hdr_end, sizeof(http_redirect_hdr_end) - 1, NETCONN_NOCOPY);
 			}
 			else {
@@ -419,32 +395,43 @@ void http_server_netconn_serve(struct netconn *conn) {
 				}
 
                 //dynamic stuff
+				else if(strstr(line, "GET /scan.json ")) {
+					ESP_LOGI(TAG,  "Starting wifi scan");
+					wifi_manager_scan_async();
+				}
 				else if(strstr(line, "GET /ap.json ")) {
 					/* if we can get the mutex, write the last version of the AP list */
-					ESP_LOGI(TAG,"Processing ap.json request");
+					ESP_LOGI(TAG,  "Processing ap.json request");
 					if(wifi_manager_lock_json_buffer(( TickType_t ) 10)) {
 						netconn_write(conn, http_ok_json_no_cache_hdr, sizeof(http_ok_json_no_cache_hdr) - 1, NETCONN_NOCOPY);
-						char *buff = wifi_manager_get_ap_list_json();
-						netconn_write(conn, buff, strlen(buff), NETCONN_NOCOPY);
+						char *buff = wifi_manager_alloc_get_ap_list_json();
 						wifi_manager_unlock_json_buffer();
+						if(buff!=NULL){
+							netconn_write(conn, buff, strlen(buff), NETCONN_NOCOPY);
+							free(buff);
+						}
+						else {
+							ESP_LOGD(TAG,  "Error retrieving ap list json string. ");
+							netconn_write(conn, http_503_hdr, sizeof(http_503_hdr) - 1, NETCONN_NOCOPY);
+						}
 					}
 					else {
 						netconn_write(conn, http_503_hdr, sizeof(http_503_hdr) - 1, NETCONN_NOCOPY);
-						ESP_LOGE(TAG, "http_server_netconn_serve: GET /ap.json failed to obtain mutex");
+						ESP_LOGE(TAG,   "http_server_netconn_serve: GET /ap.json failed to obtain mutex");
 					}
 					/* request a wifi scan */
-					ESP_LOGI(TAG,"Starting wifi scan");
+					ESP_LOGI(TAG,  "Starting wifi scan");
 					wifi_manager_scan_async();
-					ESP_LOGI(TAG,"Done serving ap.json");
+					ESP_LOGI(TAG,  "Done serving ap.json");
 				}
 				else if(strstr(line, "GET /config.json ")) {
-					ESP_LOGI(TAG,"Serving config.json");
-					ESP_LOGI(TAG, "About to get config from flash");
-					http_server_nvs_dump(conn,NVS_TYPE_STR);
-					ESP_LOGD(TAG,"Done serving config.json");
+					ESP_LOGI(TAG,  "Serving config.json");
+					ESP_LOGI(TAG,   "About to get config from flash");
+					http_server_send_config_json(conn);
+					ESP_LOGD(TAG,  "Done serving config.json");
 				}
 				else if(strstr(line, "POST /config.json ")) {
-					ESP_LOGI(TAG,"Serving POST config.json");
+					ESP_LOGI(TAG,  "Serving POST config.json");
 					int lenA=0;
 					char * last_parm=save_ptr;
 					char * next_parm=save_ptr;
@@ -457,20 +444,21 @@ void http_server_netconn_serve(struct netconn *conn) {
 
 					while(last_parm!=NULL) {
 						// Search will return
-						ESP_LOGD(TAG, "Getting parameters from X-Custom headers");
+						ESP_LOGD(TAG,   "Getting parameters from X-Custom headers");
 						last_parm = http_server_search_header(next_parm, "X-Custom-", &lenA, &last_parm_name,&next_parm,buf+buflen);
 						if(last_parm!=NULL && last_parm_name!=NULL) {
-							ESP_LOGI(TAG, "http_server_netconn_serve: POST config.json, config %s=%s", last_parm_name, last_parm);
+							ESP_LOGI(TAG,   "http_server_netconn_serve: POST config.json, config %s=%s", last_parm_name, last_parm);
 							if(strcmp(last_parm_name, "fwurl")==0) {
 								// we're getting a request to do an OTA from that URL
-								ESP_LOGW(TAG, "Found OTA request!");
+								ESP_LOGW(TAG,   "Found OTA request!");
 								otaURL=strdup(last_parm);
 								bOTA=true;
 							}
 							else {
-									ESP_LOGV(TAG, "http_server_netconn_serve: POST config.json Storing parameter");
-									err= store_nvs_value(NVS_TYPE_STR, last_parm_name , last_parm);
-									if(err!=ESP_OK) ESP_LOGE(TAG,"Unable to save nvs value. Error: %s",esp_err_to_name(err));
+								ESP_LOGV(TAG,   "http_server_netconn_serve: POST config.json Storing parameter");
+								if(config_set_value(NVS_TYPE_STR, last_parm_name , last_parm) != ESP_OK){
+									ESP_LOGE(TAG,  "Unable to save nvs value.");
+								}
 							}
 						}
 						if(last_parm_name!=NULL) {
@@ -484,106 +472,121 @@ void http_server_netconn_serve(struct netconn *conn) {
 					else {
 						netconn_write(conn, http_ok_json_no_cache_hdr, sizeof(http_ok_json_no_cache_hdr) - 1, NETCONN_NOCOPY); //200ok
 						if(bOTA) {
+
 #if RECOVERY_APPLICATION
-							ESP_LOGW(TAG, "Starting process OTA for url %s",otaURL);
+							ESP_LOGW(TAG,   "Starting process OTA for url %s",otaURL);
 #else
-							ESP_LOGW(TAG, "Restarting system to process OTA for url %s",otaURL);
-							// close the connection cleanly
-							netconn_close(conn);
-							netconn_delete(conn);
+							ESP_LOGW(TAG,   "Restarting system to process OTA for url %s",otaURL);
 #endif
-							start_ota(otaURL,false);
+							wifi_manager_reboot_ota(otaURL);
 							free(otaURL);
 						}
 					}
-					ESP_LOGI(TAG,"Done Serving POST config.json");
+					ESP_LOGI(TAG,  "Done Serving POST config.json");
 				} 
 				else if(strstr(line, "POST /connect.json ")) {
-					ESP_LOGI(TAG, "http_server_netconn_serve: POST /connect.json");
+					ESP_LOGI(TAG,   "http_server_netconn_serve: POST /connect.json");
 					bool found = false;
-					int lenS = 0, lenP = 0;
+					int lenS = 0, lenP = 0, lenN = 0;
 					char *ssid = NULL, *password = NULL;
 					ssid = http_server_get_header(save_ptr, "X-Custom-ssid: ", &lenS);
 					password = http_server_get_header(save_ptr, "X-Custom-pwd: ", &lenP);
+					char * new_host_name_b = http_server_get_header(save_ptr, "X-Custom-host_name: ", &lenN);
+					if(lenN > 0){
+						lenN++;
+						char * new_host_name = malloc(lenN);
+						strlcpy(new_host_name, new_host_name_b, lenN);
+						if(config_set_value(NVS_TYPE_STR, "host_name", new_host_name) != ESP_OK){
+							ESP_LOGE(TAG,  "Unable to save host name configuration");
+						}
+						free(new_host_name);
+					}
 
 					if(ssid && lenS <= MAX_SSID_SIZE && password && lenP <= MAX_PASSWORD_SIZE) {
 						wifi_config_t* config = wifi_manager_get_wifi_sta_config();
 						memset(config, 0x00, sizeof(wifi_config_t));
 						memcpy(config->sta.ssid, ssid, lenS);
 						memcpy(config->sta.password, password, lenP);
-						ESP_LOGD(TAG, "http_server_netconn_serve: wifi_manager_connect_async() call, with ssid: %s, password: %s", ssid, password);
+						ESP_LOGD(TAG,   "http_server_netconn_serve: wifi_manager_connect_async() call, with ssid: %s, password: %s", config->sta.ssid, config->sta.password);
 						wifi_manager_connect_async();
 						netconn_write(conn, http_ok_json_no_cache_hdr, sizeof(http_ok_json_no_cache_hdr) - 1, NETCONN_NOCOPY); //200ok
 						found = true;
 					}
+					else{
+						ESP_LOGE(TAG,  "SSID or Password invalid");
+					}
+
 
 					if(!found) {
 						/* bad request the authentification header is not complete/not the correct format */
 						netconn_write(conn, http_400_hdr, sizeof(http_400_hdr) - 1, NETCONN_NOCOPY);
-						ESP_LOGE(TAG, "bad request the authentification header is not complete/not the correct format");
+						ESP_LOGE(TAG,   "bad request the authentification header is not complete/not the correct format");
 					}
 
-					ESP_LOGI(TAG, "http_server_netconn_serve: done serving connect.json");
+					ESP_LOGI(TAG,   "http_server_netconn_serve: done serving connect.json");
 				}
 				else if(strstr(line, "DELETE /connect.json ")) {
-					ESP_LOGI(TAG, "http_server_netconn_serve: DELETE /connect.json");
+					ESP_LOGI(TAG,   "http_server_netconn_serve: DELETE /connect.json");
 					/* request a disconnection from wifi and forget about it */
 					wifi_manager_disconnect_async();
 					netconn_write(conn, http_ok_json_no_cache_hdr, sizeof(http_ok_json_no_cache_hdr) - 1, NETCONN_NOCOPY); /* 200 ok */
-					ESP_LOGI(TAG, "http_server_netconn_serve: done serving DELETE /connect.json");
+					ESP_LOGI(TAG,   "http_server_netconn_serve: done serving DELETE /connect.json");
+				}
+				else if(strstr(line, "POST /reboot_ota.json ")) {
+					ESP_LOGI(TAG,   "http_server_netconn_serve: POST reboot_ota.json");
+					netconn_write(conn, http_ok_json_no_cache_hdr, sizeof(http_ok_json_no_cache_hdr) - 1, NETCONN_NOCOPY); /* 200 ok */
+					wifi_manager_reboot(OTA);
+					ESP_LOGI(TAG,   "http_server_netconn_serve: done serving POST reboot_ota.json");
 				}
 				else if(strstr(line, "POST /reboot.json ")) {
-					ESP_LOGI(TAG, "http_server_netconn_serve: POST reboot.json");
-					netconn_close(conn);
-					netconn_delete(conn);
-					guided_restart_ota();
-					ESP_LOGI(TAG, "http_server_netconn_serve: done serving POST reboot.json");
+					ESP_LOGI(TAG,   "http_server_netconn_serve: POST reboot.json");
+					netconn_write(conn, http_ok_json_no_cache_hdr, sizeof(http_ok_json_no_cache_hdr) - 1, NETCONN_NOCOPY); /* 200 ok */
+					wifi_manager_reboot(RESTART);
+					ESP_LOGI(TAG,   "http_server_netconn_serve: done serving POST reboot.json");
 				}
 				else if(strstr(line, "POST /recovery.json ")) {
-					ESP_LOGI(TAG, "http_server_netconn_serve: POST recovery.json");
-					netconn_close(conn);
-					netconn_delete(conn);
-					guided_factory();
-					ESP_LOGI(TAG, "http_server_netconn_serve: done serving POST recovery.json");
+					ESP_LOGI(TAG,   "http_server_netconn_serve: POST recovery.json");
+					netconn_write(conn, http_ok_json_no_cache_hdr, sizeof(http_ok_json_no_cache_hdr) - 1, NETCONN_NOCOPY); /* 200 ok */
+					wifi_manager_reboot(RECOVERY);
+					ESP_LOGI(TAG,   "http_server_netconn_serve: done serving POST recovery.json");
 				}
 				else if(strstr(line, "GET /status.json ")) {
-					ESP_LOGI(TAG,"Serving status.json");
+					ESP_LOGI(TAG,  "Serving status.json");
 					if(wifi_manager_lock_json_buffer(( TickType_t ) 10)) {
-						char *buff = wifi_manager_get_ip_info_json();
+						char *buff = wifi_manager_alloc_get_ip_info_json();
+						wifi_manager_unlock_json_buffer();
 						if(buff) {
 							netconn_write(conn, http_ok_json_no_cache_hdr, sizeof(http_ok_json_no_cache_hdr) - 1, NETCONN_NOCOPY);
 							netconn_write(conn, buff, strlen(buff), NETCONN_NOCOPY);
+							free(buff);
 						}
 						else {
 							netconn_write(conn, http_503_hdr, sizeof(http_503_hdr) - 1, NETCONN_NOCOPY);
 						}
-						wifi_manager_unlock_json_buffer();
+
 					}
 					else {
 						netconn_write(conn, http_503_hdr, sizeof(http_503_hdr) - 1, NETCONN_NOCOPY);
-						ESP_LOGE(TAG, "http_server_netconn_serve: GET /status failed to obtain mutex");
+						ESP_LOGE(TAG,   "http_server_netconn_serve: GET /status failed to obtain mutex");
 					}
-					ESP_LOGI(TAG,"Done Serving status.json");
+					ESP_LOGI(TAG,  "Done Serving status.json");
 				}
 				else {
 					netconn_write(conn, http_400_hdr, sizeof(http_400_hdr) - 1, NETCONN_NOCOPY);
-					ESP_LOGE(TAG, "bad request");
+					ESP_LOGE(TAG,   "bad request from host: %s, request %s",remote_address, line);
 				}
 			}
 		}
 		else {
-			ESP_LOGE(TAG, "URL Not found. Sending 404.");
+			ESP_LOGE(TAG,   "URL not found processing for remote host : %s",remote_address);
 			netconn_write(conn, http_404_hdr, sizeof(http_404_hdr) - 1, NETCONN_NOCOPY);
 		}
+		free(host);
+
 	}
-	//-1 if there is no next part
-	// 1 if moved to the next part but now there is no next part
-	// 0 if moved to the next part and there are still more parts
-	while(netbuf_next(inbuf) != -1) {
-		ESP_LOGD(TAG,"More data found from the connection!");
-		netbuf_data(inbuf, (void**)&buf, &buflen);
-		dump_net_buffer(buf, buflen);
-	}
+
+	free(ap_ip_address);
+	free(remote_address);
 	netconn_close(conn);
 	netbuf_delete(inbuf);
 	/* free the buffer */
@@ -591,26 +594,26 @@ void http_server_netconn_serve(struct netconn *conn) {
 }
 
 bool http_server_lock_json_object(TickType_t xTicksToWait) {
-	ESP_LOGD(TAG,"Locking config json object");
+	ESP_LOGD(TAG,  "Locking config json object");
 	if(http_server_config_mutex) {
 		if( xSemaphoreTake( http_server_config_mutex, xTicksToWait ) == pdTRUE ) {
-			ESP_LOGV(TAG,"config Json object locked!");
+			ESP_LOGV(TAG,  "config Json object locked!");
 			return true;
 		}
 		else {
-			ESP_LOGW(TAG,"Semaphore take failed. Unable to lock config Json object mutex");
+			ESP_LOGW(TAG,  "Semaphore take failed. Unable to lock config Json object mutex");
 			return false;
 		}
 	}
 	else {
-		ESP_LOGW(TAG,"Unable to lock config Json object mutex");
+		ESP_LOGW(TAG,  "Unable to lock config Json object mutex");
 		return false;
 	}
 
 }
 
 void http_server_unlock_json_object() {
-	ESP_LOGD(TAG,"Unlocking json buffer!");
+	ESP_LOGD(TAG,  "Unlocking json buffer!");
 	xSemaphoreGive( http_server_config_mutex );
 }
 

+ 12 - 3
components/wifi-manager/index.html

@@ -266,6 +266,11 @@
                         </tbody>
                     </table>
                     <div class="buttons">
+						<div id="boot-div">
+                        	<form id="reboot-form" action="/reboot.json" method="post" target="dummyframe">
+                            	<button id="reboot-button" type="submit" class="btn btn-primary">Reboot</button>
+                        	</form>
+                    	</div>                    
                         <input id="save-nvs" type="button" class="btn btn-success" value="Save" />
                     </div>
                 </div>
@@ -298,7 +303,7 @@
                         </tbody>
                     </table>
                     <h2>Firmware URL:</h2>
-                    <textarea id="fwurl" maxlength="120"></textarea>
+                    <textarea id="fwurl" maxlength="350"></textarea>
                    <!-- 
                     <br />OR<br />
                     <div class="input-group mb-3" id="upload">
@@ -312,7 +317,7 @@
                     </div>
                    -->
                     <div class="buttons">
-                        <input type="button" id="flash" class="btn btn-danger" value="Flash!" /><span id="flash-status"></span>
+                    	<input type="button" id="flash" class="btn btn-danger" value="Flash!" /><span id="flash-status"></span>
                     </div>
                     <div id="otadiv">
                         <div class="progress" id="progress">
@@ -337,8 +342,12 @@
                             <li>cJSON, &copy; 2009-2017, Dave Gamble and cJSON contributors. Licensed under the MIT License.</li>
                         </ul>
                     </div>
+	                <h2>Show NVS Editor</h2>
+	                <div class="custom-control custom-switch">
+	                      <input type="checkbox" class="custom-control-input" id="show-nvs" checked="checked">
+	                      <label class="custom-control-label" for="show-nvs"></label>
+	                </div>
                 </div>
-
             </div>
             <footer class="footer"><span id="foot-fw"></span><span id="foot-wifi"></span></footer>
             <iframe width="0" height="0" border="0" name="dummyframe" id="dummyframe"></iframe>

+ 0 - 144
components/wifi-manager/json.c

@@ -1,144 +0,0 @@
-/*
-@file json.c
-@brief handles very basic JSON with a minimal footprint on the system
-
-This code is a lightly modified version of cJSON 1.4.7. cJSON is licensed under the MIT license:
-Copyright (c) 2009 Dave Gamble
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
-INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
-PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
-OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
-
-@see https://github.com/DaveGamble/cJSON
-*/
-
-#include "json.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdbool.h>
-
-
-bool json_print_string(const unsigned char *input, unsigned char *output_buffer)
-{
-	const unsigned char *input_pointer = NULL;
-	unsigned char *output = NULL;
-	unsigned char *output_pointer = NULL;
-	size_t output_length = 0;
-	/* numbers of additional characters needed for escaping */
-	size_t escape_characters = 0;
-
-	if (output_buffer == NULL)
-	{
-		return false;
-	}
-
-	/* empty string */
-	if (input == NULL)
-	{
-		//output = ensure(output_buffer, sizeof("\"\""), hooks);
-		if (output == NULL)
-		{
-			return false;
-		}
-		strcpy((char*)output, "\"\"");
-
-		return true;
-	}
-
-	/* set "flag" to 1 if something needs to be escaped */
-	for (input_pointer = input; *input_pointer; input_pointer++)
-	{
-		if (strchr("\"\\\b\f\n\r\t", *input_pointer))
-		{
-			/* one character escape sequence */
-			escape_characters++;
-		}
-		else if (*input_pointer < 32)
-		{
-			/* UTF-16 escape sequence uXXXX */
-			escape_characters += 5;
-		}
-	}
-	output_length = (size_t)(input_pointer - input) + escape_characters;
-
-	/* in the original cJSON it is possible to realloc here in case output buffer is too small.
-	 * This is overkill for an embedded system. */
-	output = output_buffer;
-
-	/* no characters have to be escaped */
-	if (escape_characters == 0)
-	{
-		output[0] = '\"';
-		memcpy(output + 1, input, output_length);
-		output[output_length + 1] = '\"';
-		output[output_length + 2] = '\0';
-
-		return true;
-	}
-
-	output[0] = '\"';
-	output_pointer = output + 1;
-	/* copy the string */
-	for (input_pointer = input; *input_pointer != '\0'; (void)input_pointer++, output_pointer++)
-	{
-		if ((*input_pointer > 31) && (*input_pointer != '\"') && (*input_pointer != '\\'))
-		{
-			/* normal character, copy */
-			*output_pointer = *input_pointer;
-		}
-		else
-		{
-			/* character needs to be escaped */
-			*output_pointer++ = '\\';
-			switch (*input_pointer)
-			{
-			case '\\':
-				*output_pointer = '\\';
-				break;
-			case '\"':
-				*output_pointer = '\"';
-				break;
-			case '\b':
-				*output_pointer = 'b';
-				break;
-			case '\f':
-				*output_pointer = 'f';
-				break;
-			case '\n':
-				*output_pointer = 'n';
-				break;
-			case '\r':
-				*output_pointer = 'r';
-				break;
-			case '\t':
-				*output_pointer = 't';
-				break;
-			default:
-				/* escape and print as unicode codepoint */
-				sprintf((char*)output_pointer, "u%04x", *input_pointer);
-				output_pointer += 4;
-				break;
-			}
-		}
-	}
-	output[output_length + 1] = '\"';
-	output[output_length + 2] = '\0';
-
-	return true;
-}
-

+ 0 - 47
components/wifi-manager/json.h

@@ -1,47 +0,0 @@
-/*
-@file json.h
-@brief handles very basic JSON with a minimal footprint on the system
-
-This code is a lightly modified version of cJSON 1.4.7. cJSON is licensed under the MIT license:
-Copyright (c) 2009 Dave Gamble
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
-INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
-PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
-OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-OTHER DEALINGS IN THE SOFTWARE.
-
-@see https://github.com/DaveGamble/cJSON
-*/
-
-#ifndef JSON_H_INCLUDED
-#define JSON_H_INCLUDED
-#include <stdbool.h>
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**
- * @brief Render the cstring provided to a JSON escaped version that can be printed.
- * @param input the input buffer to be escaped.
- * @param output_buffer the output buffer to write to. You must ensure it is big enough to contain the final string.
- * @see cJSON equivlaent static cJSON_bool print_string_ptr(const unsigned char * const input, printbuffer * const output_buffer)
- */
-bool json_print_string(const unsigned char *input, unsigned char *output_buffer);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* JSON_H_INCLUDED */

文件差异内容过多而无法显示
+ 617 - 214
components/wifi-manager/wifi_manager.c


+ 37 - 25
components/wifi-manager/wifi_manager.h

@@ -48,13 +48,12 @@ extern "C" {
 
 #if RECOVERY_APPLICATION==1
 #elif RECOVERY_APPLICATION==0
-#warning "compiling for squeezelite."
+#pragma message "compiling for squeezelite."
 #else
 #error "unknown configuration"
 #endif
 
 
-#define DEFAULT_COMMAND_LINE  CONFIG_DEFAULT_COMMAND_LINE
 
 /**
  * @brief Defines the maximum size of a SSID name. 32 is IEEE standard.
@@ -113,8 +112,6 @@ extern "C" {
  */
 #define DEFAULT_AP_PASSWORD 				CONFIG_DEFAULT_AP_PASSWORD
 
-/** @brief Defines the hostname broadcasted by mDNS */
-#define DEFAULT_HOSTNAME					"esp32"
 
 /** @brief Defines access point's bandwidth.
  *  Value: WIFI_BW_HT20 for 20 MHz  or  WIFI_BW_HT40 for 40 MHz
@@ -203,10 +200,24 @@ typedef enum message_code_t {
 	EVENT_SCAN_DONE = 13,
 	EVENT_STA_GOT_IP = 14,
 	EVENT_REFRESH_OTA = 15,
-	MESSAGE_CODE_COUNT = 16 /* important for the callback array */
+	ORDER_RESTART_OTA = 16,
+	ORDER_RESTART_RECOVERY = 17,
+	ORDER_RESTART_OTA_URL = 18,
+	ORDER_RESTART = 19,
+	MESSAGE_CODE_COUNT = 20 /* important for the callback array */
 
 }message_code_t;
 
+typedef enum reboot_type_t{
+	OTA,
+	RECOVERY,
+	RESTART,
+} reboot_type_t;
+void wifi_manager_reboot(reboot_type_t rtype);
+void wifi_manager_reboot_ota(char * url);
+
+
+
 /**
  * @brief simplified reason codes for a lost connection.
  *
@@ -229,21 +240,15 @@ typedef enum connection_request_made_by_code_t{
 }connection_request_made_by_code_t;
 
 /**
- * The actual WiFi settings in use
- */
-struct wifi_settings_t{
-	uint8_t ap_ssid[MAX_SSID_SIZE];
-	uint8_t ap_pwd[MAX_PASSWORD_SIZE];
-	uint8_t ap_channel;
-	uint8_t ap_ssid_hidden;
-	wifi_bandwidth_t ap_bandwidth;
-	bool sta_only;
-	wifi_ps_type_t sta_power_save;
-	bool sta_static_ip;
-	tcpip_adapter_ip_info_t sta_static_ip_config;
-};
-extern struct wifi_settings_t wifi_settings;
-
+ * The wifi manager settings in use
+ */
+//struct wifi_settings_t{
+//	bool sta_only;
+//	bool sta_static_ip;
+//	wifi_ps_type_t sta_power_save;
+//	tcpip_adapter_ip_info_t sta_static_ip_config;
+//};
+//extern struct wifi_settings_t wifi_settings;
 
 /**
  * @brief Structure used to store one message in the queue.
@@ -276,9 +281,9 @@ void  filter_unique( wifi_ap_record_t * aplist, uint16_t * ap_num);
 void wifi_manager( void * pvParameters );
 
 
-char* wifi_manager_get_ap_list_json();
-char* wifi_manager_get_ip_info_json();
-
+char* wifi_manager_alloc_get_ap_list_json();
+char* wifi_manager_alloc_get_ip_info_json();
+cJSON * wifi_manager_clear_ap_list_json(cJSON **old);
 
 /**
  * @brief saves the current STA wifi config to flash ram storage.
@@ -300,6 +305,13 @@ wifi_config_t* wifi_manager_get_wifi_sta_config();
 esp_err_t wifi_manager_event_handler(void *ctx, system_event_t *event);
 
 
+
+/**
+ * @brief Registers handler for wifi and ip events
+ */
+void wifi_manager_register_handlers();
+
+
 /**
  * @brief requests a connection to an access point that will be process in the main task thread.
  */
@@ -352,7 +364,7 @@ cJSON * wifi_manager_get_new_json(cJSON **old);
  * @brief Generates the list of access points after a wifi scan.
  * @note This is not thread-safe and should be called only if wifi_manager_lock_json_buffer call is successful.
  */
-void wifi_manager_generate_acess_points_json();
+void wifi_manager_generate_access_points_json(cJSON ** ap_list);
 
 /**
  * @brief Clear the list of access points.
@@ -378,7 +390,7 @@ char* wifi_manager_get_sta_ip_string();
 /**
  * @brief thread safe char representation of the STA IP update
  */
-void wifi_manager_safe_update_sta_ip_string(uint32_t ip);
+void wifi_manager_safe_update_sta_ip_string(struct ip4_addr * ip4);
 
 
 /**

+ 1 - 1
main/CMakeLists.txt

@@ -1,6 +1,6 @@
 set(COMPONENT_ADD_INCLUDEDIRS . )
 
-set(COMPONENT_SRCS "esp_app_main.c" "platform_esp32.c" "cmd_wifi.c" "console.c" "nvs_utilities.c" "cmd_squeezelite.c")
+set(COMPONENT_SRCS "esp_app_main.c" "platform_esp32.c" "cmd_wifi.c" "console.c" "nvs_utilities.c" "cmd_squeezelite.c" "config.c")
 set(REQUIRES esp_common)
 set(REQUIRES_COMPONENTS freertos squeezelite nvs_flash esp32 spi_flash newlib log console ota tools )
 

+ 7 - 7
main/cmd_squeezelite.c

@@ -48,13 +48,13 @@ static void * squeezelite_thread(){
 //  Let's not wait on WiFi to allow squeezelite to run in bluetooth mode
 //	ESP_LOGI(TAG,"Waiting for WiFi.");
 //	while(!wait_for_wifi()){usleep(100000);};
-	ESP_LOGD(TAG ,"Number of args received: %u",thread_parms.argc );
-	ESP_LOGD(TAG ,"Values:");
+	ESP_LOGV(TAG ,"Number of args received: %u",thread_parms.argc );
+	ESP_LOGV(TAG ,"Values:");
     for(int i = 0;i<thread_parms.argc; i++){
-    	ESP_LOGD(TAG ,"     %s",thread_parms.argv[i]);
+    	ESP_LOGV(TAG ,"     %s",thread_parms.argv[i]);
     }
 
-    ESP_LOGD(TAG,"Starting Squeezelite runner Thread");
+    ESP_LOGV(TAG,"Starting Squeezelite runner Thread");
     esp_pthread_cfg_t cfg = esp_pthread_get_default_config();
     cfg.thread_name= "squeezelite-run";
     cfg.inherit_cfg = true;
@@ -80,9 +80,9 @@ static int launchsqueezelite(int argc, char **argv)
 {
 	ESP_LOGV(TAG ,"Begin");
 
-    ESP_LOGD(TAG, "Parameters:");
+	ESP_LOGV(TAG, "Parameters:");
     for(int i = 0;i<argc; i++){
-    	ESP_LOGD(TAG, "     %s",argv[i]);
+    	ESP_LOGV(TAG, "     %s",argv[i]);
     }
     ESP_LOGV(TAG,"Saving args in thread structure");
 
@@ -102,7 +102,7 @@ static int launchsqueezelite(int argc, char **argv)
 		thread_parms.argv[thread_parms.argc++]=strdup("-?");
 	}
 
-    ESP_LOGD(TAG,"Starting Squeezelite Thread");
+	ESP_LOGD(TAG,"Starting Squeezelite Thread");
     esp_pthread_cfg_t cfg = esp_pthread_get_default_config();
     cfg.thread_name= "squeezelite";
     cfg.inherit_cfg = true;

+ 190 - 0
main/cmd_wifi.c

@@ -8,3 +8,193 @@
 */
 
 // cmd_wifi has been replaced by wifi-manager
+/* Console example — WiFi commands
+
+   This example code is in the Public Domain (or CC0 licensed, at your option.)
+
+   Unless required by applicable law or agreed to in writing, this
+   software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+   CONDITIONS OF ANY KIND, either express or implied.
+*/
+
+#include "cmd_wifi.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "cmd_decl.h"
+#include "esp_log.h"
+#include "esp_console.h"
+#include "argtable3/argtable3.h"
+#include "freertos/FreeRTOS.h"
+#include "freertos/event_groups.h"
+#include "esp_wifi.h"
+#include "tcpip_adapter.h"
+#include "esp_event.h"
+#include "led.h"
+extern bool bypass_wifi_manager;
+#define JOIN_TIMEOUT_MS (10000)
+
+extern EventGroupHandle_t wifi_event_group;
+extern const int CONNECTED_BIT;
+static const char * TAG = "cmd_wifi";
+/** Arguments used by 'join' function */
+static struct {
+    struct arg_int *timeout;
+    struct arg_str *ssid;
+    struct arg_str *password;
+    struct arg_end *end;
+} join_args;
+
+///** Arguments used by 'join' function */
+//static struct {
+//    struct arg_int *autoconnect;
+//    struct arg_end *end;
+//} auto_connect_args;
+
+static void event_handler(void* arg, esp_event_base_t event_base,
+                                int32_t event_id, void* event_data)
+{
+    if (event_base == WIFI_EVENT && event_id == WIFI_EVENT_STA_DISCONNECTED) {
+		led_blink_pushed(LED_GREEN, 250, 250);
+        esp_wifi_connect();
+        xEventGroupClearBits(wifi_event_group, CONNECTED_BIT);
+    } else if (event_base == IP_EVENT && event_id == IP_EVENT_STA_GOT_IP) {
+		led_unpush(LED_GREEN);
+        xEventGroupSetBits(wifi_event_group, CONNECTED_BIT);
+    }
+}
+//bool wait_for_wifi(){
+//
+//	bool connected=(xEventGroupGetBits(wifi_event_group) & CONNECTED_BIT)!=0;
+//
+//	if(!connected){
+//		ESP_LOGD(TAG,"Waiting for WiFi...");
+//	    connected = (xEventGroupWaitBits(wifi_event_group, CONNECTED_BIT,
+//	                                   pdFALSE, pdTRUE, JOIN_TIMEOUT_MS / portTICK_PERIOD_MS)& CONNECTED_BIT)!=0;
+//	    if(!connected){
+//	    	ESP_LOGD(TAG,"wifi timeout.");
+//	    }
+//	    else
+//	    {
+//	    	ESP_LOGI(TAG,"WiFi Connected!");
+//	    }
+//	}
+//
+//
+//    return connected;
+//
+//}
+static void initialise_wifi(void)
+{
+    static bool initialized = false;
+    if (initialized) {
+        return;
+    }
+    tcpip_adapter_init();
+    // Now moved to esp_app_main: wifi_event_group = xEventGroupCreate();
+    ESP_ERROR_CHECK(esp_event_loop_create_default());
+    wifi_init_config_t cfg = WIFI_INIT_CONFIG_DEFAULT();
+    ESP_ERROR_CHECK( esp_wifi_init(&cfg) );
+    ESP_ERROR_CHECK( esp_event_handler_register(WIFI_EVENT, WIFI_EVENT_STA_DISCONNECTED, &event_handler, NULL) );
+    ESP_ERROR_CHECK( esp_event_handler_register(IP_EVENT, IP_EVENT_STA_GOT_IP, &event_handler, NULL) );
+    ESP_ERROR_CHECK( esp_wifi_set_storage(WIFI_STORAGE_RAM) );
+    ESP_ERROR_CHECK( esp_wifi_set_mode(WIFI_MODE_NULL) );
+    ESP_ERROR_CHECK( esp_wifi_start() );
+    initialized = true;
+	led_blink(LED_GREEN, 250, 250);
+}
+
+static bool wifi_join(const char *ssid, const char *pass, int timeout_ms)
+{
+    initialise_wifi();
+    wifi_config_t wifi_config = { 0 };
+    strncpy((char *) wifi_config.sta.ssid, ssid, sizeof(wifi_config.sta.ssid));
+    if (pass) {
+        strncpy((char *) wifi_config.sta.password, pass, sizeof(wifi_config.sta.password));
+    }
+
+    ESP_ERROR_CHECK( esp_wifi_set_mode(WIFI_MODE_STA) );
+    ESP_ERROR_CHECK( esp_wifi_set_config(ESP_IF_WIFI_STA, &wifi_config) );
+    ESP_ERROR_CHECK( esp_wifi_connect() );
+
+    int bits = xEventGroupWaitBits(wifi_event_group, CONNECTED_BIT,
+                                   pdFALSE, pdTRUE, timeout_ms / portTICK_PERIOD_MS);
+    return (bits & CONNECTED_BIT) != 0;
+}
+
+
+static int set_auto_connect(int argc, char **argv)
+{
+//    int nerrors = arg_parse(argc, argv, (void **) &join_args);
+//    if (nerrors != 0) {
+//        arg_print_errors(stderr, join_args.end, argv[0]);
+//        return 1;
+//    }
+//    ESP_LOGI(__func__, "Connecting to '%s'",
+//             join_args.ssid->sval[0]);
+//
+//    /* set default value*/
+//    if (join_args.timeout->count == 0) {
+//        join_args.timeout->ival[0] = JOIN_TIMEOUT_MS;
+//    }
+//
+//    bool connected = wifi_join(join_args.ssid->sval[0],
+//                               join_args.password->sval[0],
+//                               join_args.timeout->ival[0]);
+//    if (!connected) {
+//        ESP_LOGW(__func__, "Connection timed out");
+//        return 1;
+//    }
+//    ESP_LOGI(__func__, "Connected");
+    return 0;
+}
+static int connect(int argc, char **argv)
+{
+    int nerrors = arg_parse(argc, argv, (void **) &join_args);
+    if (nerrors != 0) {
+        arg_print_errors(stderr, join_args.end, argv[0]);
+        return 1;
+    }
+    ESP_LOGI(__func__, "Connecting to '%s'",
+             join_args.ssid->sval[0]);
+
+    /* set default value*/
+    if (join_args.timeout->count == 0) {
+        join_args.timeout->ival[0] = JOIN_TIMEOUT_MS;
+    }
+
+    bool connected = wifi_join(join_args.ssid->sval[0],
+                               join_args.password->sval[0],
+                               join_args.timeout->ival[0]);
+    if (!connected) {
+        ESP_LOGW(__func__, "Connection timed out");
+        return 1;
+    }
+    ESP_LOGI(__func__, "Connected");
+    return 0;
+}
+void register_wifi_join()
+{
+    join_args.timeout = arg_int0(NULL, "timeout", "<t>", "Connection timeout, ms");
+    join_args.ssid = arg_str1(NULL, NULL, "<ssid>", "SSID of AP");
+    join_args.password = arg_str0(NULL, NULL, "<pass>", "PSK of AP");
+    join_args.end = arg_end(2);
+
+    const esp_console_cmd_t join_cmd = {
+        .command = "join",
+        .help = "Join WiFi AP as a station",
+        .hint = NULL,
+        .func = &connect,
+        .argtable = &join_args
+    };
+    ESP_ERROR_CHECK( esp_console_cmd_register(&join_cmd) );
+}
+
+void register_wifi()
+{
+    register_wifi_join();
+    if(bypass_wifi_manager){
+    	initialise_wifi();
+    }
+}

+ 3 - 5
main/cmd_wifi.h

@@ -1,7 +1,5 @@
-/* Console example — declarations of command registration functions.
-
+/* Console example — declarations of command registration functions.
    This example code is in the Public Domain (or CC0 licensed, at your option.)
-
    Unless required by applicable law or agreed to in writing, this
    software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    CONDITIONS OF ANY KIND, either express or implied.
@@ -12,9 +10,9 @@
 extern "C" {
 #endif
 
-
+// Register WiFi functions
+void register_wifi();
 
 #ifdef __cplusplus
 }
 #endif
-

+ 4 - 2
main/component.mk

@@ -6,7 +6,9 @@
 # lib(subdirectory_name).a in the build directory. This behaviour is entirely configurable,
 # please read the SDK documents if you need to do this.
 #
-CFLAGS += -D LOG_LOCAL_LEVEL=ESP_LOG_DEBUG
+#CFLAGS += -D LOG_LOCAL_LEVEL=ESP_LOG_DEBUG
+CFLAGS += -D LOG_LOCAL_LEVEL=ESP_LOG_INFO
 COMPONENT_ADD_INCLUDEDIRS += $(COMPONENT_PATH)/../tools
 COMPONENT_EXTRA_INCLUDES += $(PROJECT_PATH)/components/tools/
-LDFLAGS += -s
+LDFLAGS += -s
+COMPONENT_EMBED_TXTFILES :=  ${PROJECT_PATH}/server_certs/github.pem

+ 719 - 0
main/config.c

@@ -0,0 +1,719 @@
+/*
+ *  Squeezelite for esp32
+ *
+ *  (c) Sebastien 2019
+ *      Philippe G. 2019, philippe_44@outlook.com
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+//#define LOG_LOCAL_LEVEL ESP_LOG_VERBOSE
+#include "config.h"
+#include "nvs_utilities.h"
+
+#include <stdio.h>
+#include <string.h>
+#include "esp_system.h"
+#include "esp_log.h"
+#include "esp_console.h"
+#include "esp_vfs_dev.h"
+#include "driver/uart.h"
+#include "linenoise/linenoise.h"
+#include "argtable3/argtable3.h"
+#include "cmd_decl.h"
+#include "esp_vfs_fat.h"
+#include "nvs.h"
+#include "nvs_flash.h"
+#include "nvs_utilities.h"
+#include "cJSON.h"
+#include "freertos/timers.h"
+#include "freertos/event_groups.h"
+
+
+#define CONFIG_COMMIT_DELAY 1000
+#define LOCK_MAX_WAIT 20*CONFIG_COMMIT_DELAY
+static const char * TAG = "config";
+static cJSON * nvs_json=NULL;
+static TimerHandle_t timer;
+static SemaphoreHandle_t config_mutex = NULL;
+static EventGroupHandle_t config_group;
+/* @brief indicate that the ESP32 is currently connected. */
+static const int CONFIG_NO_COMMIT_PENDING = BIT0;
+static const int CONFIG_LOAD_BIT = BIT1;
+
+bool config_lock(TickType_t xTicksToWait);
+void config_unlock();
+extern esp_err_t nvs_load_config();
+void config_raise_change(bool flag);
+cJSON_bool config_is_entry_changed(cJSON * entry);
+bool config_set_group_bit(int bit_num,bool flag);
+cJSON * config_set_value_safe(nvs_type_t nvs_type, const char *key, void * value);
+static void vCallbackFunction( TimerHandle_t xTimer );
+void config_set_entry_changed_flag(cJSON * entry, cJSON_bool flag);
+#define IMPLEMENT_SET_DEFAULT(t,nt) void config_set_default_## t (const char *key, t  value){\
+	void * pval = malloc(sizeof(value));\
+	*((t *) pval) = value;\
+	config_set_default(nt, key,pval,0);\
+	free(pval); }
+#define IMPLEMENT_GET_NUM(t,nt) esp_err_t config_get_## t (const char *key, t *  value){\
+		void * pval = config_alloc_get(nt, key);\
+		if(pval!=NULL){ *value = *(t * )pval; free(pval); return ESP_OK; }\
+		return ESP_FAIL;}
+#ifdef RECOVERY_APPLICATION
+static void * malloc_fn(size_t sz){
+
+	void * ptr = heap_caps_malloc(sz, MALLOC_CAP_SPIRAM);
+	if(ptr==NULL){
+		ESP_LOGE(TAG,"malloc_fn:  unable to allocate memory!");
+	}
+	return ptr;
+}
+static void * free_fn(void * ptr){
+	if(ptr!=NULL){
+		free(ptr);
+	}
+	else {
+		ESP_LOGW(TAG,"free_fn: Cannot free null pointer!");
+	}
+	return NULL;
+}
+#endif
+void init_cJSON(){
+	static cJSON_Hooks hooks;
+	// initialize cJSON hooks it uses SPIRAM memory
+	// as opposed to IRAM
+#ifndef RECOVERY_APPLICATION
+	// In squeezelite mode, allocate memory from PSRAM.  Otherwise allocate from internal RAM
+	// as recovery will lock flash access when erasing FLASH or writing to OTA partition.
+	hooks.malloc_fn=&malloc_fn;
+    //hooks.free_fn=&free_fn;
+	cJSON_InitHooks(&hooks);
+#endif
+}
+void config_init(){
+	ESP_LOGD(TAG, "Creating mutex for Config");
+	config_mutex = xSemaphoreCreateMutex();
+	ESP_LOGD(TAG, "Creating event group");
+	config_group = xEventGroupCreate();
+	ESP_LOGD(TAG, "Loading config from nvs");
+
+	init_cJSON();
+	if(nvs_json !=NULL){
+		cJSON_Delete(nvs_json);
+	}
+	nvs_json = cJSON_CreateObject();
+
+	config_set_group_bit(CONFIG_LOAD_BIT,true);
+	nvs_load_config();
+	config_set_group_bit(CONFIG_LOAD_BIT,false);
+	config_start_timer();
+}
+
+void config_start_timer(){
+	ESP_LOGD(TAG, "Starting config timer");
+	timer = xTimerCreate("configTimer", CONFIG_COMMIT_DELAY / portTICK_RATE_MS, pdFALSE, NULL, vCallbackFunction);
+    if( xTimerStart( timer , CONFIG_COMMIT_DELAY/ portTICK_RATE_MS ) != pdPASS )    {
+        ESP_LOGE(TAG, "config commitment timer failed to start.");
+    }
+
+}
+
+nvs_type_t  config_get_item_type(cJSON * entry){
+	if(entry==NULL){
+		ESP_LOGE(TAG,"null pointer received!");
+		return true;
+	}
+	cJSON * item_type = cJSON_GetObjectItemCaseSensitive(entry, "type");
+	if(item_type ==NULL ) {
+		ESP_LOGE(TAG, "Item type not found! ");
+		return 0;
+	}
+	ESP_LOGD(TAG,"Found item type %f",item_type->valuedouble);
+	return item_type->valuedouble;
+}
+
+
+cJSON * config_set_value_safe(nvs_type_t nvs_type, const char *key, void * value){
+	cJSON * entry = cJSON_CreateObject();
+
+	double numvalue = 0;
+	if(entry == NULL) {
+		ESP_LOGE(TAG, "Unable to allocate memory for entry %s",key);
+		return NULL;
+	}
+
+	cJSON * existing = cJSON_GetObjectItemCaseSensitive(nvs_json, key);
+	if(existing !=NULL && nvs_type == NVS_TYPE_STR && config_get_item_type(existing) != NVS_TYPE_STR  ) {
+		ESP_LOGW(TAG, "Storing numeric value from string");
+		numvalue = atof((char *)value);
+		cJSON_AddNumberToObject(entry,"value", numvalue	);
+		nvs_type_t exist_type = config_get_item_type(existing);
+		ESP_LOGW(TAG, "Stored  value %f from string %s as type %d",numvalue, (char *)value,exist_type);
+		cJSON_AddNumberToObject(entry,"type", exist_type);
+	}
+	else {
+		cJSON_AddNumberToObject(entry,"type", nvs_type	);
+		switch (nvs_type) {
+			case NVS_TYPE_I8:
+				cJSON_AddNumberToObject(entry,"value", *(int8_t*)value	);
+				break;
+			case NVS_TYPE_I16:
+				cJSON_AddNumberToObject(entry,"value", *(int16_t*)value	);
+				break;
+			case NVS_TYPE_I32:
+				cJSON_AddNumberToObject(entry,"value", *(int32_t*)value	);
+				break;
+			case NVS_TYPE_U8:
+				cJSON_AddNumberToObject(entry,"value", *(uint8_t*)value	);
+				break;
+			case NVS_TYPE_U16:
+				cJSON_AddNumberToObject(entry,"value", *(uint16_t*)value	);
+				break;
+			case NVS_TYPE_U32:
+				cJSON_AddNumberToObject(entry,"value", *(uint32_t*)value	);
+				break;
+			case NVS_TYPE_STR:
+				cJSON_AddStringToObject(entry, "value", (char *)value);
+				break;
+			case NVS_TYPE_I64:
+			case NVS_TYPE_U64:
+			default:
+				ESP_LOGE(TAG, "nvs type %u not supported", nvs_type);
+				break;
+		}
+	}
+	if(existing!=NULL ) {
+		ESP_LOGV(TAG, "Changing existing entry [%s].", key);
+		char * exist_str = cJSON_PrintUnformatted(existing);
+		if(exist_str!=NULL){
+			ESP_LOGV(TAG,"Existing entry: %s", exist_str);
+			free(exist_str);
+		}
+		else {
+			ESP_LOGV(TAG,"Failed to print existing entry");
+		}
+		// set commit flag as equal so we can compare
+		cJSON_AddBoolToObject(entry,"chg",config_is_entry_changed(existing));
+		if(!cJSON_Compare(entry,existing,false)){
+			char * entry_str = cJSON_PrintUnformatted(entry);
+			if(entry_str!=NULL){
+				ESP_LOGD(TAG,"New config object: \n%s", entry_str );
+				free(entry_str);
+			}
+			else {
+				ESP_LOGD(TAG,"Failed to print entry");
+			}
+			ESP_LOGI(TAG, "Setting changed flag config [%s]", key);
+			config_set_entry_changed_flag(entry,true);
+			ESP_LOGI(TAG, "Updating config [%s]", key);
+			cJSON_ReplaceItemInObject(nvs_json,key, entry);
+			entry_str = cJSON_PrintUnformatted(entry);
+			if(entry_str!=NULL){
+				ESP_LOGD(TAG,"New config: %s", entry_str );
+				free(entry_str);
+			}
+			else {
+				ESP_LOGD(TAG,"Failed to print entry");
+			}
+		}
+		else {
+			ESP_LOGD(TAG, "Config not changed. ");
+		}
+	}
+	else {
+		// This is a new entry.
+		config_set_entry_changed_flag(entry,true);
+		cJSON_AddItemToObject(nvs_json, key, entry);
+	}
+
+	return entry;
+}
+
+nvs_type_t config_get_entry_type(cJSON * entry){
+	if(entry==NULL){
+		ESP_LOGE(TAG,"null pointer received!");
+		return 0;
+	}
+	cJSON * entry_type = cJSON_GetObjectItemCaseSensitive(entry, "type");
+	if(entry_type ==NULL ) {
+		ESP_LOGE(TAG, "Entry type not found in nvs cache for existing setting.");
+		return 0;
+	}
+	ESP_LOGV(TAG,"Found type %s",type_to_str(entry_type->valuedouble));
+	return entry_type->valuedouble;
+}
+void config_set_entry_changed_flag(cJSON * entry, cJSON_bool flag){
+	ESP_LOGV(TAG, "config_set_entry_changed_flag: begin");
+	if(entry==NULL){
+		ESP_LOGE(TAG,"null pointer received!");
+		return;
+	}
+	bool bIsConfigLoading=((xEventGroupGetBits(config_group) & CONFIG_LOAD_BIT)!=0);
+	bool changedFlag=bIsConfigLoading?false:flag;
+	ESP_LOGV(TAG, "config_set_entry_changed_flag: retrieving chg flag from entry");
+	cJSON * changed = cJSON_GetObjectItemCaseSensitive(entry, "chg");
+	if(changed ==NULL ) {
+		ESP_LOGV(TAG, "config_set_entry_changed_flag: chg flag not found. Adding. ");
+		cJSON_AddBoolToObject(entry,"chg",changedFlag);
+	}
+	else {
+		ESP_LOGV(TAG, "config_set_entry_changed_flag: Existing change flag found. ");
+		if(cJSON_IsTrue(changed) && changedFlag){
+			ESP_LOGW(TAG, "Commit flag not changed!");
+		}
+		else{
+			ESP_LOGV(TAG, "config_set_entry_changed_flag: Updating change flag to %s",changedFlag?"TRUE":"FALSE");
+			changed->type = changedFlag?cJSON_True:cJSON_False ;
+		}
+	}
+
+	if(changedFlag) {
+		ESP_LOGV(TAG, "config_set_entry_changed_flag: Calling config_raise_change. ");
+		config_raise_change(true);
+	}
+	ESP_LOGV(TAG, "config_set_entry_changed_flag: done. ");
+}
+cJSON_bool config_is_entry_changed(cJSON * entry){
+	if(entry==NULL){
+		ESP_LOGE(TAG,"null pointer received!");
+		return true;
+	}
+	cJSON * changed = cJSON_GetObjectItemCaseSensitive(entry, "chg");
+	if(changed ==NULL ) {
+		ESP_LOGE(TAG, "Change flag not found! ");
+		return true;
+	}
+	return cJSON_IsTrue(changed);
+}
+
+
+
+
+void * config_safe_alloc_get_entry_value(nvs_type_t nvs_type, cJSON * entry){
+	void * value=NULL;
+	if(entry==NULL){
+		ESP_LOGE(TAG,"null pointer received!");
+	}
+	ESP_LOGV(TAG, "getting config value type %s", type_to_str(nvs_type));
+	cJSON * entry_value = cJSON_GetObjectItemCaseSensitive(entry, "value");
+	if(entry_value==NULL ) {
+		char * entry_str = cJSON_PrintUnformatted(entry);
+		if(entry_str!=NULL){
+			ESP_LOGE(TAG, "Missing config value!. Object: \n%s", entry_str);
+			free(entry_str);
+		}
+		else{
+			ESP_LOGE(TAG, "Missing config value");
+		}
+		return NULL;
+	}
+
+	nvs_type_t type = config_get_entry_type(entry);
+	if(nvs_type != type){
+		// requested value type different than the stored type
+		char * entry_str = cJSON_PrintUnformatted(entry);
+		if(entry_str!=NULL){
+			ESP_LOGE(TAG, "Requested value type %s, found value type %s instead, Object: \n%s", type_to_str(nvs_type), type_to_str(type),entry_str);
+			free(entry_str);
+		}
+		else{
+			ESP_LOGE(TAG, "Requested value type %s, found value type %s instead", type_to_str(nvs_type), type_to_str(type));
+		}
+
+		return NULL;
+	}
+	if (nvs_type == NVS_TYPE_I8) {
+		value=malloc(sizeof(int8_t));
+		*(int8_t *)value = (int8_t)entry_value->valuedouble;
+	} else if (nvs_type == NVS_TYPE_U8) {
+		value=malloc(sizeof(uint8_t));
+		*(uint8_t *)value = (uint8_t)entry_value->valuedouble;
+	} else if (nvs_type == NVS_TYPE_I16) {
+		value=malloc(sizeof(int16_t));
+		*(int16_t *)value = (int16_t)entry_value->valuedouble;
+	} else if (nvs_type == NVS_TYPE_U16) {
+		value=malloc(sizeof(uint16_t));
+		*(uint16_t *)value = (uint16_t)entry_value->valuedouble;
+	} else if (nvs_type == NVS_TYPE_I32) {
+		value=malloc(sizeof(int32_t));
+		*(int32_t *)value = (int32_t)entry_value->valuedouble;
+	} else if (nvs_type == NVS_TYPE_U32) {
+		value=malloc(sizeof(uint32_t));
+		*(uint32_t *)value = (uint32_t)entry_value->valuedouble;
+	} else if (nvs_type == NVS_TYPE_I64) {
+		value=malloc(sizeof(int64_t));
+		*(int64_t *)value = (int64_t)entry_value->valuedouble;
+	} else if (nvs_type == NVS_TYPE_U64) {
+		value=malloc(sizeof(uint64_t));
+		*(uint64_t *)value = (uint64_t)entry_value->valuedouble;
+	} else if (nvs_type == NVS_TYPE_STR) {
+		if(!cJSON_IsString(entry_value)){
+			char * entry_str = cJSON_PrintUnformatted(entry);
+			if(entry_str!=NULL){
+				ESP_LOGE(TAG, "requested value type string, config type is different. key: %s, value: %s, type %d, Object: \n%s",
+					entry_value->string,
+					entry_value->valuestring,
+					entry_value->type,
+					entry_str);
+				free(entry_str);
+			}
+			else {
+				ESP_LOGE(TAG, "requested value type string, config type is different. key: %s, value: %s, type %d",
+					entry_value->string,
+					entry_value->valuestring,
+					entry_value->type);
+			}
+		}
+		else {
+			value=(void *)strdup(cJSON_GetStringValue(entry_value));
+			if(value==NULL){
+				char * entry_str = cJSON_PrintUnformatted(entry);
+				if(entry_str!=NULL){
+					ESP_LOGE(TAG, "strdup failed on value for object \n%s",entry_str);
+					free(entry_str);
+				}
+				else {
+					ESP_LOGE(TAG, "strdup failed on value");
+				}
+			}
+		}
+	} else if (nvs_type == NVS_TYPE_BLOB) {
+		ESP_LOGE(TAG, "Unsupported type NVS_TYPE_BLOB");
+	}
+	return value;
+}
+
+void config_commit_to_nvs(){
+	ESP_LOGI(TAG,"Committing configuration to nvs. Locking config object.");
+	ESP_LOGV(TAG,"config_commit_to_nvs. Locking config object.");
+	if(!config_lock(LOCK_MAX_WAIT/portTICK_PERIOD_MS)){
+		ESP_LOGE(TAG, "config_commit_to_nvs: Unable to lock config for commit ");
+		return ;
+	}
+	if(nvs_json==NULL){
+		ESP_LOGE(TAG, ": cJSON nvs cache object not set.");
+		return;
+	}
+	ESP_LOGV(TAG,"config_commit_to_nvs. Config Locked!");
+	cJSON * entry=nvs_json->child;
+	while(entry!= NULL){
+		char * entry_str = cJSON_PrintUnformatted(entry);
+		if(entry_str!=NULL){
+			ESP_LOGV(TAG,"config_commit_to_nvs processing item %s",entry_str);
+			free(entry_str);
+		}
+
+		if(config_is_entry_changed(entry)){
+			ESP_LOGD(TAG, "Committing entry %s value to nvs.",(entry->string==NULL)?"UNKNOWN":entry->string);
+			nvs_type_t type = config_get_entry_type(entry);
+			void * value = config_safe_alloc_get_entry_value(type, entry);
+			if(value!=NULL){
+				esp_err_t err = store_nvs_value(type,entry->string,value);
+				free(value);
+				if(err!=ESP_OK){
+					char * entry_str = cJSON_PrintUnformatted(entry);
+					if(entry_str!=NULL){
+						ESP_LOGE(TAG, "Error comitting value to nvs for key %s, Object: \n%s",entry->string,entry_str);
+						free(entry_str);
+					}
+					else {
+						ESP_LOGE(TAG, "Error comitting value to nvs for key %s",entry->string);
+					}
+				}
+				else {
+					config_set_entry_changed_flag(entry, false);
+				}
+			}
+			else {
+				char * entry_str = cJSON_PrintUnformatted(entry);
+				if(entry_str!=NULL){
+					ESP_LOGE(TAG, "Unable to retrieve value. Error comitting value to nvs for key %s, Object: \n%s",entry->string,entry_str);
+					free(entry_str);
+				}
+				else {
+					ESP_LOGE(TAG, "Unable to retrieve value. Error comitting value to nvs for key %s",entry->string);
+				}
+			}
+		}
+		else {
+			ESP_LOGV(TAG,"config_commit_to_nvs. Item already committed.  Ignoring.");
+		}
+		taskYIELD();  /* allows the freeRTOS scheduler to take over if needed. */
+		entry = entry->next;
+	}
+	ESP_LOGV(TAG,"config_commit_to_nvs. Resetting the global commit flag.");
+	config_raise_change(false);
+	ESP_LOGV(TAG,"config_commit_to_nvs. Releasing the lock object.");
+	config_unlock();
+}
+bool config_has_changes(){
+	return  (xEventGroupGetBits(config_group) & CONFIG_NO_COMMIT_PENDING)==0;
+}
+
+
+bool wait_for_commit(){
+	bool commit_pending=(xEventGroupGetBits(config_group) & CONFIG_NO_COMMIT_PENDING)==0;
+	while (commit_pending){
+		ESP_LOGW(TAG,"Waiting for config commit ...");
+		commit_pending = (xEventGroupWaitBits(config_group, CONFIG_NO_COMMIT_PENDING,pdFALSE, pdTRUE, (CONFIG_COMMIT_DELAY*2) / portTICK_PERIOD_MS) & CONFIG_NO_COMMIT_PENDING)==0;
+		if(commit_pending){
+			ESP_LOGW(TAG,"Timeout waiting for config commit.");
+	    }
+	    else {
+	    	ESP_LOGI(TAG,"Config committed!");
+	    }
+	}
+	return !commit_pending;
+}
+
+bool config_lock(TickType_t xTicksToWait) {
+	ESP_LOGV(TAG, "Locking config json object");
+	if( xSemaphoreTake( config_mutex, xTicksToWait ) == pdTRUE ) {
+		ESP_LOGV(TAG, "config Json object locked!");
+		return true;
+	}
+	else {
+		ESP_LOGE(TAG, "Semaphore take failed. Unable to lock config Json object mutex");
+		return false;
+	}
+}
+
+void config_unlock() {
+	ESP_LOGV(TAG, "Unlocking json buffer!");
+	xSemaphoreGive( config_mutex );
+}
+
+static void vCallbackFunction( TimerHandle_t xTimer ) {
+	static int cnt=0;
+	if(config_has_changes()){
+		ESP_LOGI(TAG, "configuration has some uncommitted entries");
+		config_commit_to_nvs();
+	}
+	else{
+		if(++cnt>=15){
+			ESP_LOGV(TAG,"commit timer: commit flag not set");
+			cnt=0;
+		}
+	}
+	xTimerReset( xTimer, 10 );
+}
+void config_raise_change(bool change_found){
+	if(config_set_group_bit(CONFIG_NO_COMMIT_PENDING,!change_found))
+	{
+		ESP_LOGD(TAG,"Config commit set to %s",change_found?"Pending Commit":"Committed");
+	}
+}
+bool config_set_group_bit(int bit_num,bool flag){
+	bool result = true;
+	int curFlags=xEventGroupGetBits(config_group);
+	if((curFlags & CONFIG_LOAD_BIT) && bit_num == CONFIG_NO_COMMIT_PENDING ){
+		ESP_LOGD(TAG,"Loading config, ignoring changes");
+		result = false;
+	}
+	if(result){
+		bool curBit=(xEventGroupGetBits(config_group) & bit_num);
+		if(curBit == flag){
+			ESP_LOGV(TAG,"Flag %d already %s", bit_num, flag?"Set":"Cleared");
+			result = false;
+		}
+	}
+	if(result){
+		ESP_LOGV(TAG,"%s Flag %d ", flag?"Setting":"Clearing",bit_num);
+		if(!flag){
+			xEventGroupClearBits(config_group, bit_num);
+		}
+		else {
+			xEventGroupSetBits(config_group, bit_num);
+		}
+	}
+	return result;
+}
+
+void config_set_default(nvs_type_t type, const char *key, void * default_value, size_t blob_size) {
+	if(!config_lock(LOCK_MAX_WAIT/portTICK_PERIOD_MS)){
+		ESP_LOGE(TAG, "Unable to lock config");
+		return;
+	}
+
+	ESP_LOGV(TAG, "Checking if key %s exists in nvs cache for type %s.", key,type_to_str(type));
+	cJSON * entry = cJSON_GetObjectItemCaseSensitive(nvs_json, key);
+
+	if(entry !=NULL){
+		ESP_LOGV(TAG, "Entry found.");
+	}
+	else {
+		// Value was not found
+		ESP_LOGW(TAG, "Adding default value for [%s].", key);
+		entry=config_set_value_safe(type, key, default_value);
+		if(entry == NULL){
+			ESP_LOGE(TAG, "Failed to add value to cache!");
+		}
+		char * entry_str = cJSON_PrintUnformatted(entry);
+		if(entry_str!=NULL){
+			ESP_LOGD(TAG, "Value added to default for object: \n%s",entry_str);
+			free(entry_str);
+		}
+	}
+
+	config_unlock();
+
+}
+
+void config_delete_key(const char *key){
+	nvs_handle nvs;
+	ESP_LOGD(TAG, "Deleting nvs entry for [%s]", key);
+	if(!config_lock(LOCK_MAX_WAIT/portTICK_PERIOD_MS)){
+		ESP_LOGE(TAG, "Unable to lock config for delete");
+		return false;
+	}
+	esp_err_t err = nvs_open_from_partition(settings_partition, current_namespace, NVS_READWRITE, &nvs);
+	if (err == ESP_OK) {
+		err = nvs_erase_key(nvs, key);
+		if (err == ESP_OK) {
+			ESP_LOGD(TAG, "key [%s] erased from nvs.",key);
+			err = nvs_commit(nvs);
+			if (err == ESP_OK) {
+				ESP_LOGD(TAG, "nvs erase committed.");
+			}
+			else {
+				ESP_LOGE(TAG, "Unable to commit nvs erase operation for key [%s]. %s.",key,esp_err_to_name(err));
+			}
+		}
+		else {
+			ESP_LOGE(TAG, "Unable to delete nvs key [%s]. %s. ",key, esp_err_to_name(err));
+		}
+		nvs_close(nvs);
+	}
+	else {
+		ESP_LOGE(TAG, "Error opening nvs: %s. Unable to delete nvs key [%s].",esp_err_to_name(err),key);
+	}
+	char * struc_str = cJSON_PrintUnformatted(nvs_json);
+	if(struc_str!=NULL){
+		ESP_LOGV(TAG, "Structure before delete \n%s", struc_str);
+		free(struc_str);
+	}
+	cJSON * entry = cJSON_DetachItemFromObjectCaseSensitive(nvs_json, key);
+	if(entry !=NULL){
+		ESP_LOGI(TAG, "Removing config key [%s]", entry->string);
+		cJSON_Delete(entry);
+		struc_str = cJSON_PrintUnformatted(nvs_json);
+		if(struc_str!=NULL){
+			ESP_LOGV(TAG, "Structure after delete \n%s", struc_str);
+			free(struc_str);
+		}
+	}
+	else {
+		ESP_LOGW(TAG, "Unable to remove config key [%s]: not found.", key);
+	}
+	config_unlock();
+}
+void * config_alloc_get(nvs_type_t nvs_type, const char *key) {
+	return config_alloc_get_default(nvs_type, key, NULL, 0);
+}
+void * config_alloc_get_default(nvs_type_t nvs_type, const char *key, void * default_value, size_t blob_size) {
+
+	void * value = NULL;
+	ESP_LOGV(TAG, "Retrieving key %s from nvs cache for type %s.", key,type_to_str(nvs_type));
+	if(nvs_json==NULL){
+		ESP_LOGE(TAG,"configuration not loaded!");
+		return value;
+	}
+	if(!config_lock(LOCK_MAX_WAIT/portTICK_PERIOD_MS)){
+		ESP_LOGE(TAG, "Unable to lock config");
+		return value;
+	}
+	ESP_LOGD(TAG,"Getting config entry for key %s",key);
+	cJSON * entry = cJSON_GetObjectItemCaseSensitive(nvs_json, key);
+	if(entry !=NULL){
+		ESP_LOGV(TAG, "Entry found, getting value.");
+		value = config_safe_alloc_get_entry_value(nvs_type, entry);
+	}
+	else if(default_value!=NULL){
+		// Value was not found
+		ESP_LOGW(TAG, "Adding new config value for key [%s]",key);
+		entry=config_set_value_safe(nvs_type, key, default_value);
+		if(entry == NULL){
+			ESP_LOGE(TAG, "Failed to add value to cache");
+		}
+		else {
+			char * entry_str = cJSON_PrintUnformatted(entry);
+			if(entry_str!=NULL){
+				ESP_LOGV(TAG, "Value added configuration object for key [%s]: \n%s", entry->string,entry_str);
+				free(entry_str);
+			}
+			else {
+				ESP_LOGV(TAG, "Value added configuration object for key [%s]", entry->string);
+			}
+			value = config_safe_alloc_get_entry_value(nvs_type, entry);
+		}
+	}
+	else{
+		ESP_LOGW(TAG,"Value not found for key %s",key);
+	}
+	config_unlock();
+	return value;
+}
+char * config_alloc_get_json(bool bFormatted){
+	char * json_buffer = NULL;
+	if(!config_lock(LOCK_MAX_WAIT/portTICK_PERIOD_MS)){
+		ESP_LOGE(TAG, "Unable to lock config after %d ms",LOCK_MAX_WAIT);
+		return strdup("{\"error\":\"Unable to lock configuration object.\"}");
+	}
+	if(bFormatted){
+		json_buffer= cJSON_Print(nvs_json);
+	}
+	else {
+		json_buffer= cJSON_PrintUnformatted(nvs_json);
+	}
+	config_unlock();
+	return json_buffer;
+}
+esp_err_t config_set_value(nvs_type_t nvs_type, const char *key, void * value){
+	esp_err_t result = ESP_OK;
+	if(!config_lock(LOCK_MAX_WAIT/portTICK_PERIOD_MS)){
+			ESP_LOGE(TAG, "Unable to lock config after %d ms",LOCK_MAX_WAIT);
+			result = ESP_FAIL;
+	}
+	cJSON * entry = config_set_value_safe(nvs_type, key, value);
+	if(entry == NULL){
+		result = ESP_FAIL;
+	}
+	else{
+		char * entry_str = cJSON_PrintUnformatted(entry);
+		if(entry_str!=NULL){
+			ESP_LOGV(TAG,"config_set_value result: \n%s",entry_str);
+			free(entry_str);
+		}
+		else {
+			ESP_LOGV(TAG,"config_set_value completed");
+		}
+
+	}
+	config_unlock();
+	return result;
+}
+
+IMPLEMENT_SET_DEFAULT(uint8_t,NVS_TYPE_U8);
+IMPLEMENT_SET_DEFAULT(int8_t,NVS_TYPE_I8);
+IMPLEMENT_SET_DEFAULT(uint16_t,NVS_TYPE_U16);
+IMPLEMENT_SET_DEFAULT(int16_t,NVS_TYPE_I16);
+IMPLEMENT_SET_DEFAULT(uint32_t,NVS_TYPE_U32);
+IMPLEMENT_SET_DEFAULT(int32_t,NVS_TYPE_I32);
+
+IMPLEMENT_GET_NUM(uint8_t,NVS_TYPE_U8);
+IMPLEMENT_GET_NUM(int8_t,NVS_TYPE_I8);
+IMPLEMENT_GET_NUM(uint16_t,NVS_TYPE_U16);
+IMPLEMENT_GET_NUM(int16_t,NVS_TYPE_I16);
+IMPLEMENT_GET_NUM(uint32_t,NVS_TYPE_U32);
+IMPLEMENT_GET_NUM(int32_t,NVS_TYPE_I32);

+ 41 - 0
main/config.h

@@ -0,0 +1,41 @@
+#pragma once
+#include <stdio.h>
+#include <string.h>
+#include "esp_system.h"
+#include "nvs_utilities.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef __cplusplus
+}
+#endif
+#define DECLARE_SET_DEFAULT(t) void config_set_default_## t (const char *key, t  value);
+#define DECLARE_GET_NUM(t) esp_err_t config_get_## t (const char *key, t *  value);
+
+
+DECLARE_SET_DEFAULT(uint8_t);
+DECLARE_SET_DEFAULT(uint16_t);
+DECLARE_SET_DEFAULT(uint32_t);
+DECLARE_SET_DEFAULT(int8_t);
+DECLARE_SET_DEFAULT(int16_t);
+DECLARE_SET_DEFAULT(int32_t);
+DECLARE_GET_NUM(uint8_t);
+DECLARE_GET_NUM(uint16_t);
+DECLARE_GET_NUM(uint32_t);
+DECLARE_GET_NUM(int8_t);
+DECLARE_GET_NUM(int16_t);
+DECLARE_GET_NUM(int32_t);
+
+bool config_has_changes();
+void config_commit_to_nvs();
+void config_start_timer();
+void config_init();
+void * config_alloc_get_default(nvs_type_t type, const char *key, void * default_value, size_t blob_size);
+void config_delete_key(const char *key);
+void config_set_default(nvs_type_t type, const char *key, void * default_value, size_t blob_size);
+void * config_alloc_get(nvs_type_t nvs_type, const char *key) ;
+bool wait_for_commit();
+char * config_alloc_get_json(bool bFormatted);
+esp_err_t config_set_value(nvs_type_t nvs_type, const char *key, void * value);
+

部分文件因为文件数量过多而无法显示