#!/usr/bin/env zsh

setopt err_exit

typeset -A nutsToRename
nutsToRename=(  204 "Total Fat"
                205 "Total Carbohydrate"
                208 "Calories"
                291 "Dietary Fiber"
                301 "Calcium"
                303 "Iron"
                304 "Magnesium"
                305 "Phosphorus"
                306 "Potassium"
                307 "Sodium"
                309 "Zinc"
                312 "Copper"
                313 "Fluoride"
                315 "Manganese"
                317 "Selenium"
                318 "Vitamin A"
                320 "Vitamin A"
                401 "Vitamin C"
                415 "Vitamin B6"
                418 "Vitamin B12"
                578 "Vitamin B12, added"
                605 "Trans Fat"
                606 "Saturated Fat"
                645 "Monounsaturated Fat"
                646 "Polyunsaturated Fat")

typeset -A foodDesToKeep 
foodDesToKeep=(     ndb         1
                    groupCd     2
                    foodDesc    3
                    refDesc     8
                    pctRefuse   9 )

typeset -A fdGrpToKeep 
fdGrpToKeep=(       groupCd     1
                    groupDesc   2 )

typeset -A nutDefToKeep 
nutDefToKeep=(      nutNo       1
                    units       2 
                    nutDesc     4 )

typeset -A nutDataToKeep 
nutDataToKeep=(     ndb         1
                    nutNo       2
                    nutVal      3 )

typeset -A weightToKeep
weightToKeep=(      ndb         1
                    amount      3
                    weightDesc  4
                    grams       5 )

typeset -A fieldsToKeep
fieldsToKeep=(  foodDes     ${(j:,:)foodDesToKeep}
                fdGrp       ${(j:,:)fdGrpToKeep}
                nutDef      ${(j:,:)nutDefToKeep}
                nutData     ${(j:,:)nutDataToKeep}
                weight      ${(j:,:)weightToKeep} )

typeset -A foodDes 
foodDes=(       ndb         1
                groupCd     2
                foodDesc    3
                refDesc     4
                pctRefuse   5 )

typeset -A fdGrp 
fdGrp=(         groupCd     1
                groupDesc   2 )

typeset -A nutDef 
nutDef=(        nutNo       1
                units       2
                desc        3 )

typeset -A nutData 
nutData=(       ndb         1
                nutNo       2
                nutVal      3 )

typeset -A weight
weight=(        ndb         1
                amount      2
                weightDesc  3
                grams       4 )

typeset -A foodDesAndGrp
foodDesAndGrp=( groupCd     1
                ndb         2
                foodDesc    3
                refDesc     4
                pctRefuse   5
                groupDesc   6 )

typeset -A nutDataAndDef
nutDataAndDef=( nutNo       1
                ndb         2
                nutVal      3
                descAndUnits 4 )

typeset -A nutsRenamed
nutsRenamed=(   nutNo       1
                descAndUnits 2 )

typeset -A srFiles
srFiles=( foodDes     FOOD_DES.txt
          fdGrp       FD_GROUP.txt
          nutData     NUT_DATA.txt
          nutDef      NUTR_DEF.txt
          weight      WEIGHT.txt )

textDelim='~'
fieldDelim="^"
encoding="LATIN1"
typeset -A uselessUnits
uselessUnits=(lb true oz true)
verbose=false

# can't give values to these until we know what the tempDir is
typeset -A files
typeset tempDir foodDesAndGrpFilename nutDataAndDefFilename
typeset nutsRenamedFilename

# Print a message to stderr, but only if verbose is set.
# $1: message to print.
function status
{
    if [[ $verbose == true ]]; then
        echo $1 >&2
    fi
}

# sort file.
# $1 : filename
# $2 : sort key
function sorter
{
    status "sorting file $1"
    sort -t $fieldDelim -k $2,$2 ${1} > ${1}.tmp
    mv ${1}.tmp ${1}
}

# join files.
# $1: join file 1 filename
# $2: join file 1 join field
# $3: join file 2 filename
# $4: join file 2 join field
# $5: output filename
function joiner
{
    status "joining files $1 and $3"
    join -t $fieldDelim -1 $2 -2 $4 $1 $3 > $5
}

# sort and join files.
# $1: join file 1 filename
# $2: join file 1 join field
# $3: join file 2 filename
# $4: join file 2 join field
# $5: output filename
function sortAndJoin
{
    status "sorting and joining files $1 and $3"
    sorter $1 $2
    sorter $3 $4
    joiner $@
}

function renameNuts
{
    status "renaming nutrients"
    IFS=$fieldDelim
    local nutNo units desc newName newNameAndUnits

    while read nutNo units desc
    do
        newName=${nutsToRename[$nutNo]:-${desc}}
        if [[ $newName == "Calories" ]]
        then
            newNameAndUnits=$newName
        else
            newNameAndUnits="$newName, $units"
        fi
        printf "%s${fieldDelim}%s\n" $nutNo $newNameAndUnits \
               >> $nutsRenamedFilename
    done < ${files[nutDef]}
}

# Strip out bad things like tildes, non-ASCII characters, and
# MS-DOS linefeeds
function stripper
{
    status "stripping unwanted characters"
    local filename
    for filename in ${files}
    do
        # iconv gives non-zero exit status when it removes bad
        # characters from file
        unsetopt err_exit
        tr -d '\15\32'${textDelim} < $filename | 
            iconv -c -f $encoding -t ASCII > ${filename}.tmp 
        setopt err_exit
        mv ${filename}.tmp $filename
    done
}

# cut out unneeded fields
function doCuts
{
    status "eliminating unneeded fields"
    local id args
    for id in ${(k)files}; do
        cut -d $fieldDelim -f ${fieldsToKeep[$id]} \
            ${files[$id]} > ${files[$id]}.tmp
        mv ${files[$id]}.tmp ${files[$id]}
    done
}

function makeOutput
{
    status "creating output:"
    local groupCd ndb foodDesc refDesc pctRefuse groupDesc
    local dbDir=$(mktemp -d db.XXXXXX)

    status "creating foods"
    IFS=$fieldDelim
    while read groupCd ndb foodDesc refDesc pctRefuse groupDesc
    do
        {
        printf 'pantry --create --add master \\\n'
        printf '--change unit "^g$" --change quantity 100 \\\n'
        printf '--change name %q \\\n' $foodDesc
        printf '--change group %q \\\n' $groupDesc
        if [[ -n $refDesc ]]; then
            printf '--change refuse %q \\\n' $refDesc
        fi

        if [[ -n $pctRefuse ]]; then
            printf '--change percent-refuse %q \\\n' $pctRefuse
        fi
        } > ${dbDir}/${ndb}
    done < ${foodDesAndGrpFilename}

    status "creating available units"
    while read weightNdb weightAmount weightDesc weightGrams
    do
        # eliminate useless units and units greater than 1
        if [[ ${+uselessUnits[$weightDesc]} == 0 && $weightAmount == 1 ]]; then
            printf '--change-avail-unit %q %q \\\n' \
                $weightDesc $weightGrams >> ${dbDir}/${weightNdb}
        fi
    done < ${files[weight]}

    status "creating nutrients"
    while read nutNo nutsNdb nutVal descAndUnits
    do
        printf '--change-nut %q %q \\\n' \
            $descAndUnits $nutVal >> ${dbDir}/${nutsNdb}
    done < ${nutDataAndDefFilename}

    # add a blank line to each file, so that last line of each
    # food does not have a trailing backslash
    for filename in ${dbDir}/*; do
        printf '\n' >> $filename
    done

    find $dbDir -mindepth 1 -print0 | xargs -0 cat
    rm -r $dbDir
}

# $1 filename to unzip
function unzipper
{
    status "unzipping zip file"
    tempDir=$(mktemp -d sr.XXXXXXXXX)
    unzip $1 $srFiles -d $tempDir > /dev/null
    for name srName in ${(kv)srFiles}; do
        files[${name}]=${tempDir}/${srName}
    done
}

function showHelp
{
    echo "usage: convert_sr [options] filename"
    echo
    echo "Reads filename, which is a zip archive in USDA SR format (the same format"
    echo "used in SR21)."
    echo "Prints an sh script to standard output, where each line is a Pantry"
    echo "command to create a food from the SR database."
    echo "Save this output to a file so you can run the script with sh later."
    echo
    echo "Options:"
    echo
    echo "  -h Show this help and exit"
    echo
    echo "  -v print progress messages to stderr while working"
    echo
    echo "In addition to zsh, you need to have these things installed:"
    echo "  sort"
    echo "  join"
    echo "  tr"
    echo "  iconv"
    echo "  cut"
    echo "  unzip"
    echo "  mktemp"
    echo "  GNU find"
    echo "  GNU xargs"
    exit 0
}

function main
{
    while getopts 'hv' arg; do
        case $arg in
            (h)
                showHelp;;
            (v)
                verbose=true;;
        esac
    done
    shift $(( OPTIND - 1  ))

    if [[ -z $1 ]]; then
        echo Need to supply zip file name. Use convert_sr -h for help. 1>&2
        exit 1
    fi

    unzipper $1
    nutsRenamedFilename=${tempDir}/nutsRenamed.txt
    stripper
    doCuts
    renameNuts

    # join foods and groups
    foodDesAndGrpFilename=${tempDir}/des_and_group.txt
    sortAndJoin ${files[foodDes]} ${foodDes[groupCd]} \
        ${files[fdGrp]}   ${fdGrp[groupCd]} ${foodDesAndGrpFilename}

    nutDataAndDefFilename=${tempDir}/data_and_def.txt 
    sortAndJoin ${files[nutData]} ${nutData[nutNo]} \
        ${nutsRenamedFilename}  ${nutsRenamed[nutNo]} ${nutDataAndDefFilename}

    makeOutput

    rm -r ${tempDir}
}

main $@
