#!/usr/bin/ksh93 ################################################################ function usagemsg_regexp_k93 { print " Program: regexp_k93 Convert and translate regular expressions into Korn Shell patterns. Resulting patterns can be used with Korn Shell built-in commands such as "if", "while", and "case", to perform Pattern matching. Usage: ${1##*/} [-?] [-vV] 'regexp|...' Where: -v = Verbose Mode -V = Very Verbose Mode Example: regexp_k93 '^start.*end$' Author: Dana French (dfrench@mtxia.com) Copyright 2006 by Dana French \"AutoContent\" enabled " } ################################################################ #### #### Description: #### #### The purpose of this function is to translate regular #### expressions into korn shell patterns. The resultant korn #### shell pattern can be captured and then used with korn #### shell pattern matching functions. #### #### Limited regular expression functionality at this point #### It understands the following regular expression #### metacharacters: #### #### . ^ $ [ ] \ #### \. ^$ \$ \[X\] \\ #### * \* .* [a-z]* \.* #### + \+ .+ [a-z]+ \.+ #### \(string\) \ #### #### Assumptions: #### #### The regular expression is assumed to be the first #### command line argument after any options specified. It #### is usually best to enclose the regular expression in #### single quotes on the command line to ensure the shell #### does not process any of the metacharacters. #### #### Dependencies: #### #### This function is self contained and is not dependent #### upon any external programs, functions or scripts. #### #### Products: #### #### This function produces a korn shell pattern equivalent #### of the regular expression specified on the command line. #### The korn shell pattern is written to STDOUT. #### #### Configured Usage: #### #### Details: #### ################################################################ function regexp_k93 { typeset VERSION="1.0" typeset TRUE="0" typeset FALSE="1" typeset VERBOSE="${FALSE}" typeset VERYVERB="${FALSE}" while getopts ":vV" OPTION do case "${OPTION}" in 'v') VERBOSE="${TRUE}";; 'V') VERYVERB="${TRUE}";; [?:#]) print "Syntax: regexp_k93 'regexp'" && return 1 ;; esac done shift $(( ${OPTIND} - 1 )) #### Retrieve the remainder of the command line arguments #### assuming they are regular expressions. Parse multiple #### RE's separated with OR '|' symbols or AND '&' symbols #### and store each RE in an array to be processed #### individually. IFS="|" REGEXP="${*}" IFS=$' \t\n' LOOPRE="${REGEXP}|" LOOPRE="${LOOPRE//\&/|}" LOOPRE="${LOOPRE//\|\|/|}" CNT="0" while [[ "_${LOOPRE}" != "_" ]] do REARRAY[CNT++]="${LOOPRE%%\|*}" LOOPRE="${LOOPRE#*\|}" done #### If no regular expressions were specified on the command #### line, display an error message and return from this #### function. trap "usagemsg_regexp_k93 ${0}" EXIT if [[ "_${REGEXP}" == "_" ]] then print -- "# ERROR: Regular expression not specified." return 2 fi trap "-" EXIT (( VERYVERB == TRUE )) && set -x (( VERBOSE == TRUE )) && print -u 2 "# Entering Function: ${0}" (( VERBOSE == TRUE )) && print -u 2 "# Version..........: ${VERSION}" ################################################################ #### Convert regular expression metacharacters to strings : for REGEXP in "${REARRAY[@]}" do (( VERBOSE == TRUE )) && print -u 2 "# Specified regexp.: '${REGEXP}'" #### convert '\' -> 'ESC' : REGEXP="${REGEXP//\\/%K93ESC%}" #### convert '\$' -> '\$' : REGEXP="${REGEXP//%K93ESC%'$'/%K93ESC%%K93DOL%}" #### convert 'beginning of line' -> 'BEG' : REGEXP="${REGEXP/#\^/%K93BEG%}" #### convert 'end of line' -> 'END' : REGEXP="${REGEXP/%\$/%K93END%}" #### convert '$' -> '$' : REGEXP="${REGEXP//'$'/%K93DOL%}" #### convert '.' -> 'DOT' : REGEXP="${REGEXP//./%K93DOT%}" #### convert '*' -> 'AST' : REGEXP="${REGEXP//\*/%K93AST%}" #### convert '+' -> 'PLU' : REGEXP="${REGEXP//+/%K93PLU%}" #### convert '<' -> 'LES' : REGEXP="${REGEXP//\' -> 'GRE' : REGEXP="${REGEXP//\>/%K93GRE%}" #### convert '[' -> 'OSB' : REGEXP="${REGEXP//\[/%K93OSB%}" #### convert ']' -> 'CSB' : REGEXP="${REGEXP//\]/%K93CSB%}" #### convert '(' -> 'OPA' : REGEXP="${REGEXP//\(/%K93OPA%}" #### convert ')' -> 'CPA' : REGEXP="${REGEXP//\)/%K93CPA%}" #### convert '?' -> 'QUE' : REGEXP="${REGEXP//\?/%K93QUE%}" #### convert '[^' -> '[!' : REGEXP="${REGEXP//%K93OSB%^/%K93OSB%%K93EXC%}" #### convert 'regexp' -> '*regexp' : [[ "_${REGEXP}" != _%K93BEG%* ]] && REGEXP="%K93ANY%${REGEXP}" #### convert 'regexp' -> 'regexp*' : [[ "_${REGEXP}" != _*%K93END% ]] && REGEXP="${REGEXP}%K93ANY%" ################################################################ #### Convert strings to korn shell patterns : #### translate '^*' -> '*' : REGEXP="${REGEXP/#%K93ANY%/*}" #### translate '*$' -> '*' : REGEXP="${REGEXP/%%K93ANY%/*}" #### translate '\\*' -> '*(\\)' : REGEXP="${REGEXP//%K93ESC%%K93ESC%%K93AST%/*(\\)}" #### translate '\\+' -> '+(\\)' : REGEXP="${REGEXP//%K93ESC%%K93ESC%%K93PLU%/+(\\)}" #### translate '\**' -> '*(\*)' : REGEXP="${REGEXP//%K93ESC%%K93AST%%K93AST%/*(\*)}" #### translate '\*+' -> '+(\*)' : REGEXP="${REGEXP//%K93ESC%%K93AST%%K93PLU%/+(\*)}" #### translate '\^*' -> '*(^)' : REGEXP="${REGEXP//%K93ESC%%K93CAR%%K93AST%/*(^)}" #### translate '\^+' -> '+(^)' : REGEXP="${REGEXP//%K93ESC%%K93CAR%%K93PLU%/+(^)}" #### translate '\\' -> '\\' : REGEXP="${REGEXP//%K93ESC%%K93ESC%/\\\\}" #### translate '\.*' -> '*(.)' : REGEXP="${REGEXP//%K93ESC%%K93DOT%%K93AST%/*(.)}" #### translate '.*' -> '*(?)' : REGEXP="${REGEXP//%K93DOT%%K93AST%/*(?)}" #### translate '\.+' -> '+(.)' : REGEXP="${REGEXP//%K93ESC%%K93DOT%%K93PLU%/+(.)}" #### translate '.+' -> '+(?)' : REGEXP="${REGEXP//%K93DOT%%K93PLU%/+(?)}" #### translate '\$*' -> '*(\$)' : REGEXP="${REGEXP//%K93ESC%%K93DOL%%K93AST%/*(\\$)}" #### translate '\$+' -> '+(\$)' : REGEXP="${REGEXP//%K93ESC%%K93DOL%%K93PLU%/+(\\$)}" #### translate '$*' -> '*(\$)' : REGEXP="${REGEXP//%K93DOL%%K93AST%/*('\$')}" #### translate '$+' -> '+(\$)' : REGEXP="${REGEXP//%K93DOL%%K93PLU%/+(\\$)}" #### translate '\[*' -> '*(\[)' : REGEXP="${REGEXP//%K93ESC%%K93OSB%%K93AST%/*(\\'[')}" #### translate '\[+' -> '+(\[)' : REGEXP="${REGEXP//%K93ESC%%K93OSB%%K93PLU%/+(\\'[')}" #### translate '\]*' -> '*(\])' : REGEXP="${REGEXP//%K93ESC%%K93CSB%%K93AST%/*(\\']')}" #### translate '\]+' -> '+(\])' : REGEXP="${REGEXP//%K93ESC%%K93CSB%%K93PLU%/+(\\']')}" #### translate '\*' -> '*(+([$' \t\n.,:'])string+([$' \t\n.,:']))' REGEXP="${REGEXP//%K93ESC%%K93LES%(*(?))%K93ESC%%K93GRE%%K93AST%/*([\$\' \\t\\n.,:\']\1[\$\' \\t\\n.,:\'])}" #### translate '\' -> '+([$' \t\n.,:'])string+([$' \t\n.,:'])' REGEXP="${REGEXP//%K93ESC%%K93LES%(*(?))%K93ESC%%K93GRE%/[\$\' \\t\\n.,:\']\1[\$\' \\t\\n.,:\']}" #### translate '<*' -> '*(\<)' : REGEXP="${REGEXP//%K93LES%%K93AST%/*(\\<)}" #### translate '<+' -> '+(\<)' : REGEXP="${REGEXP//%K93LES%%K93PLU%/+(\\<)}" #### translate '>*' -> '*(\>)' : REGEXP="${REGEXP//%K93GRE%%K93AST%/*(\\>)}" #### translate '>+' -> '+(\>)' : REGEXP="${REGEXP//%K93GRE%%K93PLU%/+(\\>)}" #### translate '\(string\)*' -> '*(\1)' REGEXP="${REGEXP//%K93ESC%%K93OPA%(*(?))%K93ESC%%K93CPA%%K93AST%/'*('\1')'}" #### translate '\(string\)' -> '(\1)' REGEXP="${REGEXP//%K93ESC%%K93OPA%(*(?))%K93ESC%%K93CPA%/'('\1')'}" #### translate '\(*' -> '*(\()' : REGEXP="${REGEXP//%K93ESC%%K93OPA%%K93AST%/*(\\'(')}" #### translate '\(+' -> '+(\()' : REGEXP="${REGEXP//%K93ESC%%K93OPA%%K93PLU%/+(\\'(')}" #### translate '\)*' -> '*(\))' : REGEXP="${REGEXP//%K93ESC%%K93CPA%%K93AST%/*(\\')')}" #### translate '\)+' -> '+(\))' : REGEXP="${REGEXP//%K93ESC%%K93CPA%%K93PLU%/+(\\')')}" #### translate '(*' -> '*(\()' : REGEXP="${REGEXP//%K93OPA%%K93AST%/*(\\'(')}" #### translate '(+' -> '+(\()' : REGEXP="${REGEXP//%K93OPA%%K93PLU%/+(\\'(')}" #### translate ')*' -> '*(\))' : REGEXP="${REGEXP//%K93CPA%%K93AST%/*(\\')')}" #### translate ')+' -> '+(\))' : REGEXP="${REGEXP//%K93CPA%%K93PLU%/+(\\')')}" #### translate '\X*' -> '*(X)' : REGEXP="${REGEXP//%K93ESC%(?)%K93AST%/*(\\\1)}" #### translate '\X+' -> '+(X)' : REGEXP="${REGEXP//%K93ESC%(?)%K93PLU%/+(\\\1)}" #### translate '[a-z]*' -> '*([a-z])' : REGEXP="${REGEXP//%K93OSB%(*(?))%K93CSB%%K93AST%/'*(['\1'])'}" #### translate '[a-z]+' -> '+([a-z])' : REGEXP="${REGEXP//%K93OSB%(*(?))%K93CSB%%K93PLU%/'+(['\1'])'}" #### translate '\*' -> '\*)' : REGEXP="${REGEXP//%K93ESC%%K93AST%/\\*}" #### translate '\+' -> '\+)' : REGEXP="${REGEXP//%K93ESC%%K93PLU%/\\+}" #### translate '\.' -> '.' : REGEXP="${REGEXP//%K93ESC%%K93DOT%/.}" #### translate '^*' -> '\*' : REGEXP="${REGEXP/#%K93BEG%%K93AST%/\\*}" #### translate '*$' -> '\*' : REGEXP="${REGEXP/%%K93AST%%K93END%/\\*}" #### translate 'X*' -> '*(X)' : REGEXP="${REGEXP//(?)%K93AST%/*(\1)}" #### translate 'X+' -> '+(X)' : REGEXP="${REGEXP//(?)%K93PLU%/+(\1)}" #### translate '\^' -> '^' : REGEXP="${REGEXP//%K93ESC%%K93CAR%/\^}" #### translate '\$' -> '\$' : REGEXP="${REGEXP//%K93ESC%%K93DOL%/\\$}" #### translate '\[' -> '[' : REGEXP="${REGEXP//%K93ESC%%K93OSB%/'\['}" #### translate '\]' -> ']' : REGEXP="${REGEXP//%K93ESC%%K93CSB%/'\]'}" #### translate '[^' -> '[!' : REGEXP="${REGEXP//%K93OSB%%K93EXC%/'['!}" #### translate 'LES' -> '<' : REGEXP="${REGEXP//%K93LES%/'\<'}" #### translate 'GRE' -> '>' : REGEXP="${REGEXP//%K93GRE%/'\>'}" #### translate 'OSB' -> '[' : REGEXP="${REGEXP//%K93OSB%/'['}" #### translate 'CSB' -> ']' : REGEXP="${REGEXP//%K93CSB%/']'}" #### translate 'OPA' -> '(' : REGEXP="${REGEXP//%K93OPA%/'\('}" #### translate 'CPA' -> ')' : REGEXP="${REGEXP//%K93CPA%/'\)'}" #### translate 'DOT' -> '?' : REGEXP="${REGEXP//%K93DOT%/?}" #### translate 'CAR' -> '^' : REGEXP="${REGEXP//%K93CAR%/^}" #### translate 'EXC' -> '!' : REGEXP="${REGEXP//%K93EXC%/!}" #### translate 'AST' -> '*' : REGEXP="${REGEXP//%K93AST%/*}" #### translate 'PLU' -> '+' : REGEXP="${REGEXP//%K93PLU%/+}" #### translate 'QUE' -> '?' : REGEXP="${REGEXP//%K93QUE%/\?}" #### translate 'ESC' -> '\' : REGEXP="${REGEXP//%K93ESC%/\\}" #### translate 'DOL' -> '\$' : REGEXP="${REGEXP//%K93DOL%/\\$}" #### translate 'beginning of line' -> '' : REGEXP="${REGEXP/#%K93BEG%/}" #### translate 'end of line' -> '' : REGEXP="${REGEXP/%%K93END%/}" print -r -- "${REGEXP}" (( VERBOSE == TRUE )) && print -u 2 "# Resultant Pattern: '${REGEXP}'" done (( VERBOSE == TRUE )) && print -u 2 "# Exiting Function.: ${0}" return 0 } ################################################################ regexp_k93 "${@}"