#!/bin/bash

# convert xml table to delimited table (tsv,csv)
# vo2dlm [-d DFS] {-c} {-s} {-h} infile.vot 
#
# LAST REVISION: 3 February 2021
#-----------------------------------------------------------------------
DFS="\t";  CAPTION=; SHOW=; HELP=;
DRS="\n"
#-----------------------------------------------------------------------
TF="OUT_xml2dlm" TFo=${TF}".out"; TFt=${TF}".tmp" TFh=${TF}".hdr" TFd=${TF}".dat" 
trap "[ -e $TFo ] && rm $TFo; [ -e $TFt ] && rm $TFt; [ -e $TFh ] && rm $TFh; [ -e $TFd ] && rm $TFd" EXIT
#-----------------------------------------------------------------------

while getopts d:csh OPTVAL
do
    case $OPTVAL in
	d) DFS=$OPTARG;;
	s) SHOW=1;;	
	c) CAPTION=1;;
	h) HELP=1;;
	*) echo "xml2dlm -h for help"; exit -1
    esac
done
shift $((OPTIND-1))

if [ $HELP ]; then
    echo -e "\txml2dlm [-d DFS] {-c} {-h} xml_file .. (or pipe)"
    echo -e '\t -d .. field separator for output file, "[\t]" ("," etc)'
    echo -e "\t -c .. display table caption and exit"
    echo -e "\t -s .. display header, one field per line and exit"
    exit
fi

#-----------------------------------------------------------------------
# check to see if data is being supplied by pipe
#-----------------------------------------------------------------------

case $# in
    0) if [ -p /dev/stdin ]; then
          set -- "/dev/stdin"                     #set $1=/dev/stdin
        else
          echo "no file given nor is there a trailing pipe"
          exit -1;
       fi
       cat $1 > $TFo; IF=$TFo;;
    1) IF=$1;;
    *) echo "can accept only one file"; exit -1;;
esac

#-----------------------------------------------------------------------
#The heavy lift
#-----------------------------------------------------------------------

#caption to table
ANNOUNCE=$(sed -n '/<TABLE ID/,/<DESCRIPTION>/p' $IF | tr -d '\n' | sed 's/.*name="//;s/".*$//')

[ $CAPTION ] && { echo $ANNOUNCE; exit;} 

#header 
grep "<FIELD" $IF | sed 's/^.*name="//;s/".*$//' >  $TFt
[ $SHOW ] && { nl $TFt; exit;}
tr "\n" "$DFS" < $TFt | gsed 's/$/\n/' > $TFh

#data
sed -n '/<TABLEDATA>/,/<.TABLEDATA>/p' $IF | sed '1d;$d;s/<TR>//g;s/<TD>//g' | tr -d "\n" | \
	gsed "s/<.TD>/$DFS/g;s/<.TR>/$DRS/g" | sed "s/$DFS$//" > $TFd

#display the table
cat $TFh $TFd 
