#! /usr/bin/env bash
# Updates ./unirange.c as per Blocks.txt, preserving original .c file
# By gsr.bugs infernal-iceberg com for Debian

set -Eeuo pipefail  # bash, not all supported with (da)sh

# TODO: change sed to not have to add back lines (like those used as anchors)
DEBIANBFILE=/usr/share/unicode/Blocks.txt

BFILE=${1:-"./Blocks.txt"}
if [ -f $DEBIANBFILE ] ; then
  BFILE=$DEBIANBFILE
fi
[ -f "$BFILE" ] || { echo "Usage: $0 /path/to/new/Blocks.txt" ; echo "Alternatively try 'sudo apt install unicode-data'"; exit 1 ; }

SOURCE="./unirange.c"

if [ -f "${SOURCE}-orig" ] ; then
  echo "Backup ${SOURCE}-orig exists, writing new version to $SOURCE"
else
  echo "Backing up $SOURCE to ${SOURCE}-orig, then writing to $SOURCE"
  cp -a "$SOURCE" "${SOURCE}-orig" || { echo Problem with cp ; exit 2 ; }
fi

# Write top of C file by dropping bottom and adding back the regex anchor line
rm -f "${SOURCE}"
sed -e "/^struct cr Range_Table/, $ d" "${SOURCE}-orig" > "${SOURCE}"
echo "struct cr Range_Table []={" >> "${SOURCE}"  # (see TODO)

# Recreate structure contents from the new Blocks.txt file
FIRST=true
# Remove comments and empty lines then convert to just "hex hex description"
sed -e "/#.*/d" -e "/^$/d" -e "s/^/0x/" -e "s/\.\./ 0x/" -e "s/;//" < "$BFILE" \
| while read BLOCK ; do
    # Get each one of the three parts thanks to space being the separator
    START=$( echo "$BLOCK" | cut -d " " -f 1 )
    END=$( echo "$BLOCK" | cut -d " " -f 2 )
    TEXT=$( echo "$BLOCK" | cut -d " " -f 3- )

    if [ $FIRST = "true"  ] ; then
      # Special case, no extra processing
       FIRST="false"
    else
      # All other lines, look for missing range against previous line end
      # Compute as decimal for comparison
      TARGET=$(( $PREV + 1 ))
      START_DEC=$(( $START ))
      if [ $TARGET -ne $START_DEC ] ; then
        # Insert filler C line, in hexadecimal, slightly changed desc
        printf "{0x%0.4X,0x%0.4X,\"Undefined / Unused\"},\n" $TARGET $(( START - 1 ))
      fi
    fi
    # Format the read values as C source, no spaces so diff is minimal
    echo "{$START,$END,\"$TEXT\"},"

    # Save hex to compare with next line
    PREV=$END
  done >> "${SOURCE}"

# Write closing lines including regex anchor line (see TODO)
echo "};" >> "${SOURCE}"
echo "" >> "${SOURCE}"
echo "int Ranges_Defined = sizeof(Range_Table)/sizeof(struct cr);" >> "${SOURCE}"
# Add trailing part of C file, using regex anchor to drop top of it
sed -e "1, /^int Ranges_Defined = sizeof/ d" "${SOURCE}-orig" >> "${SOURCE}"

# Bring back the private definitions to not invalidate the man pages ...
grep '\"Private Use Area' "${SOURCE}-orig" > "${SOURCE}-range1"
grep 'Supplementary Private Use Area-A' "${SOURCE}-orig" > "${SOURCE}-range2"
# ... by adding the found lines and deleting the generic ones used as anchors
sed -i -e "/\"Private Use Area.*/{r ${SOURCE}-range1" -e 'd}' "${SOURCE}"
sed -i -e "/Supplementary Private Use Area-A/{r ${SOURCE}-range2" -e 'd}' "${SOURCE}"
rm -f "${SOURCE}-range1" "${SOURCE}-range2"

echo "You may want to run 'diff -u ${SOURCE}-orig ${SOURCE}' to verify"
