mk_fallbacks.sh 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. #!/bin/sh
  2. add_fallback()
  3. {
  4. echo " - for $3..."
  5. cat _tmp3 | grep "$1" | while read i ; do
  6. code=`echo $i | cut -c1-6`
  7. echo "$code $2" >> _tmp5
  8. done
  9. }
  10. echo " * getting list of needed unicode characters..."
  11. cat mappings/*.TXT | sed -n '/^0x../p' | \
  12. cut -f2,4 | sort | uniq | sed -n '/^0x/p' > _tmp1
  13. cat _tmp1 | cut -f1 | sort | uniq > _tmp2
  14. echo " * making unique list of unicode characters meanings..."
  15. rm -f _tmp3
  16. cat _tmp2 | while read i ; do
  17. sed -n "/^$i/p" _tmp1 | (read t ; echo "$t" >> _tmp3)
  18. done
  19. cp _tmp3 UnicodeChars
  20. echo " * creating one-byte fallback tables..."
  21. rm -f Fallbacks _tmp5
  22. echo " - for latin capital letters..."
  23. cat _tmp3 | grep 'LATIN CAPITAL LETTER [A-Z]$' > _tmp6
  24. cat _tmp3 | grep 'LATIN CAPITAL LETTER [A-Z] WITH' >> _tmp6
  25. cat _tmp6 | sort +2 > _tmp4
  26. cat _tmp4 | while read i ; do
  27. code=`echo $i | cut -c1-6`
  28. fallb=`echo $i | cut -c8-29`
  29. cat _tmp4 | fgrep "$fallb" | cut -c1-6 | (read i ;
  30. echo "$code $i" >> _tmp5)
  31. done
  32. echo " - for latin small letters..."
  33. cat _tmp3 | grep 'LATIN SMALL LETTER [A-Z]$' > _tmp6
  34. cat _tmp3 | grep 'LATIN SMALL LETTER [A-Z] WITH' >> _tmp6
  35. cat _tmp6 | sort +2 > _tmp4
  36. cat _tmp4 | while read i ; do
  37. code=`echo $i | cut -c1-6`
  38. fallb=`echo $i | cut -c8-27`
  39. cat _tmp4 | fgrep "$fallb" | cut -c1-6 | (read i ;
  40. echo "$code $i" >> _tmp5)
  41. done
  42. add_fallback "DOUBLE .*QUOTATION MARK" "0x0022" "double quotations"
  43. add_fallback "SINGLE .*QUOTATION MARK" "0x0027" "single quotations"
  44. add_fallback "DASH" "0x002D" "dashes"
  45. echo " * removing infinite loops from fallback tables..."
  46. cat _tmp5 | grep -v '\(0x....\) \1' | sort > Fallbacks
  47. rm -f _tmp1 _tmp2 _tmp3 _tmp4 _tmp5 _tmp6