#!/bin/bash # ml2r .. multilines to records # Input stream consists of lines, # some of which match regular expression "regtok". The ouput # consists of records, each defined lines enclosed by # a "regtok" line and up to, but not including, the next # regtok line (or end of file). "\nl" which define the end of the # lines are replaced by DFS and the end of the new record by "\n" # ml2r -r regtok [-d DFS] {-h} file | pipe # # -r regtok, regular expression token identifying the start of record # -d DFS, field separator for the output stream # -h help #----------------------------------------------------------------------- DFS="\t"; HELP=; REGTOK=; #----------------------------------------------------------------------- while getopts d:r:h OPTVAL do case $OPTVAL in d) DFS=$OPTARG;; r) REGTOK="$OPTARG";; h) HELP=1;; *) echo "ml2r -h for help"; exit -1 esac done shift $((OPTIND-1)) if [ $HELP ]; then echo "ml2r -r regtok [-d DFS] {-h} file | pipe" echo "-r regtok, regular expression for start of record" echo "-d DFS, field separator for the output stream" exit; fi if [ -z $REGTOK ]; then echo "record start token must be specified" exit -1 fi head -1 $IFILE | awk -v rectok="$REGTOK" \ '$0!~rectok{print "fist line does lacks record start token"; exit -1;}' #----------------------------------------------------------------------- # check to see if data is being supplied by pipe #----------------------------------------------------------------------- if [ $# -eq 0 ]; then if [ -p /dev/stdin ]; then set -- "/dev/stdin" #set $1=/dev/stdin else echo "no file given nor is there a trailing pipe"; exit -1 fi fi IFILE=$1 gsed -E -n \ '1{h;b};${H;x;s/\n/'"$DFS"'/gp;q};H;/'"$REGTOK"'/{x;s/\n/'"$DFS"'/gp}' $IFILE