#!/bin/bash

# ml2r .. multilines to records # Input stream consists of lines,
# some of which match regular expression "regtok". The ouput
# consists of records, each defined lines enclosed by
# a "regtok" line and up to, but not including, the next
# regtok line (or end of file). "\nl" which define the end of the
# lines are replaced by DFS and the end of the new record by "\n"

# ml2r -r regtok [-d DFS] {-h} file | pipe
#
# -r regtok, regular expression token identifying the start of record
# -d DFS, field separator for the output stream
# -h help

#-----------------------------------------------------------------------
DFS="\t"; HELP=; REGTOK=;
#-----------------------------------------------------------------------

while getopts d:r:h OPTVAL
do
    case $OPTVAL in
	d) DFS=$OPTARG;;
	r) REGTOK="$OPTARG";;
	h) HELP=1;;
	*) echo "ml2r -h for help"; exit -1
    esac
done
shift $((OPTIND-1))

if [ $HELP ]; then
   	echo "ml2r -r regtok [-d DFS] {-h} file | pipe"
	echo "-r regtok, regular expression for start of record"
	echo "-d DFS, field separator for the output stream"
	exit;
fi

if [ -z $REGTOK ]; then
  echo "record start token must be specified"
  exit -1
fi

head -1 $IFILE | awk -v rectok="$REGTOK" \
 '$0!~rectok{print "fist line does lacks record start token"; exit -1;}'

#-----------------------------------------------------------------------
# check to see if data is being supplied by pipe
#-----------------------------------------------------------------------

if [ $# -eq 0 ]; then
   if [ -p /dev/stdin ]; then
        set -- "/dev/stdin"                     #set $1=/dev/stdin
   else
        echo "no file given nor is there a trailing pipe"; exit -1
   fi
fi
IFILE=$1

gsed -E -n \
'1{h;b};${H;x;s/\n/'"$DFS"'/gp;q};H;/'"$REGTOK"'/{x;s/\n/'"$DFS"'/gp}' $IFILE
