#!/usr/bin/ksh93 ################################################################ function usagemsg_getwiki { print " Program: getwiki Shell script to retreive content from a wiki location and optionally, perform replacements on imbedded links to point to another wiki location. This script uses external unix utilities lynx and sed to retrieve the wiki pages and process the information. Usage: ${1##*/} [-?vV] [-n] [-y] [-s wikiSvr] [-d wikiDir] [-t wikiTitle] [-p httpPort] [-l linkSrc] [-r linkRpl] [-b beginContent] [-e endContent] [-g] [-h] Where: -s wikiSvr = Specify the Wiki Server from which to retreive content ( default: en.wikipedia.org ) -d wikiDir = Specify the Wiki Directory from which to retreive content ( default: /wiki/ ) -t wikiTitle = Specify the Wiki File or Title of the content to retrieve ( default: How_to ) -p httpPort = Specify the Web Server Port number for connection ( default: 80 ) -l linkSrc = Identify existing source Wiki Link text for replacement ( default: href=\"/wiki/ ) -r linkRpl = Specify replacement text for Wiki link URL's (NULL OK) ( default: href=\"/cgi-bin/mylexica/wiki/lookup.cgi? ) -b begincontent = A string of characters that designates the beginning of the content to extract from the retrieved wiki page ( default: ) -e endContent = A string of characters that designates the ending of the content to extract from the retrieved wiki page ( default:
) -g = Send a GET command to the Wiki web server to retrieve the page content ( default: GET ) -h = Send a HEAD command to the Wiki server to retrieve only header info -y = Perform text and URL replacements on retrieved Wiki Page (default) -n = Do not perform any text or URL replacements on retrieved Wiki Page -v = Verbose mode - displays getwiki function info -V = Very Verbose Mode - debug output displayed -? = Help - display this message Author: Dana French (dfrench@mtxia.com) \"AutoContent\" enabled " } ################################################################ #### #### Description: #### #### Shell script to retreive content from a wiki location #### and optionally, perform replacements on imbedded links #### to point to another wiki location. This script permits #### the user to specify any wiki location from which to #### retrieve content, and any content at that location. #### #### Optionally, the user may replace text in the retrieved #### pages by specifying a source pattern in the text to look #### for, and a replacement string when the pattern is #### found. #### #### Assumptions: #### #### This script assumes the wiki location is on the node #### from which this script is being run, or a network is #### connection is available to connect to remote wiki's. In #### either case, it is assumed the wiki provides web based #### content. #### #### Dependencies: #### #### This script is dependent upon the existance of the text #### based browser "lynx" and utilizes this program to #### retrieve wiki pages from a web server. This script is #### also dependent upon the Unix utility "sed" to extract #### the page content from the retrieved wiki pages. Both #### programs are assumed to be accessible via a directory in #### the PATH environment variable. #### #### Products: #### #### The output from this script is a stream of data #### containing the content from a wiki page. A subset of #### the Wiki page can be extracted by specifying beginning #### and ending pattern to look for in the stream of text. #### The format of the retrieved content is unaltered by this #### script, however this script provides the capability of #### altering URL links. #### #### #### Configured Usage: #### #### This script may be executed from the command line as a #### stand alone shell script, included and executed by #### another shell script, or called by name from a shell #### script function library. #### #### An example command line using this script: #### #### ./getwiki.sh -v -t Shell_script #### #### #### Details: #### ################################################################ function getwiki { typeset VERSION="1.0" typeset TRUE="1" typeset FALSE="0" typeset VERBOSE="${FALSE}" typeset VERYVERB="${FALSE}" typeset REPLACE="${TRUE}" typeset PRNT="${FALSE}" typeset SNDGET="${TRUE}" typeset SNDHEAD="${FALSE}" typeset SRVR="en.wikipedia.org" typeset RDIR="/wiki/" # Must end with / typeset PORT="80" typeset SRCH="href=\"/wiki/" typeset RPLC="href=\"/cgi-bin/mylexica/wiki/lookup.cgi?" typeset PAGE="How_to" typeset BEGCON="" typeset ENDCON="
" typeset ACTION="GET" while getopts ":vVynghs:d:t:p:l:r:" OPTION do case "${OPTION}" in 'v') VERBOSE="${TRUE}";; 'V') VERYVERB="${TRUE}";; 's') SRVR="${OPTARG}";; 'd') RDIR="${OPTARG%%+(/)}/" RDIR="/${RDIR##+(/)}";; 't') PAGE="${OPTARG}";; 'p') PORT="${OPTARG}";; 'l') SRCH="${OPTARG}";; 'r') RPLC="${OPTARG}";; 'n') REPLACE="${FALSE}";; 'y') REPLACE="${TRUE}";; 'g') SNDGET="${TRUE}" SNDHEAD="${FALSE}";; 'h') SNDGET="${FALSE}" SNDHEAD="${TRUE}";; '?') usagemsg_getwiki "${0}" && return 1 ;; ':') usagemsg_getwiki "${0}" && return 1 ;; '#') usagemsg_getwiki "${0}" && return 1 ;; esac done shift $(( ${OPTIND} - 1 )) #### Insure the Wiki remote directory value begins and ends with a "/" RDIR="${RDIR%%+(/)}/" RDIR="/${RDIR##+(/)}" #### Perform some error checking to verify all critical variable contain values. trap "usagemsg_getwiki ${0}" EXIT if [[ "_${SRVR}" == "_" ]] then print -u 2 -- "# ERROR: Wiki Server name is NULL" return 1 fi if [[ "_${RDIR}" == "_" ]] then print -u 2 -- "# ERROR: Wiki remote directory specification is NULL" return 1 fi if [[ "_${PORT}" == "_" ]] then print -u 2 -- "# ERROR: http Port number is NULL" return 1 fi if (( REPLACE == TRUE )) && [[ "_${SRCH}" == "_" ]] then print -u 2 -- "# ERROR: Replacement is turned on, but no search pattern is specified" return 1 fi if (( SNDGET == SNDHEAD )) then print -u 2 -- "# ERROR: Specify one option of GET or HEAD" return 1 fi trap "-" EXIT #### Display the program configuration settings if VERBOSE mode is TRUE. (( VERYVERB == TRUE )) && set -x (( VERBOSE == TRUE )) && print -u 2 "# Program........: ${0}" (( VERBOSE == TRUE )) && print -u 2 "# Version........: ${VERSION}" (( VERBOSE == TRUE )) && print -u 2 "# Wiki Server....: ${SRVR}" (( VERBOSE == TRUE )) && print -u 2 "# Wiki Directory.: ${RDIR}" (( VERBOSE == TRUE )) && print -u 2 "# Wiki Page Title: ${PAGE}" (( VERBOSE == TRUE )) && print -u 2 "# HTTP Port no...: ${PORT}" if (( SNDGET == TRUE )) then (( VERBOSE == TRUE )) && print -u 2 "# HTTP GET.......: TRUE" (( VERBOSE == TRUE )) && print -u 2 "# HTTP HEAD......: FALSE" fi if (( SNDHEAD == TRUE )) then (( VERBOSE == TRUE )) && print -u 2 "# HTTP GET.......: FALSE" (( VERBOSE == TRUE )) && print -u 2 "# HTTP HEAD......: TRUE" fi if (( REPLACE == TRUE )) then (( VERBOSE == TRUE )) && print -u 2 "# Wiki Replace...: TRUE" (( VERBOSE == TRUE )) && print -u 2 "# Wiki Search Pat: ${SRCH}" (( VERBOSE == TRUE )) && print -u 2 "# Wiki Replace w/: ${RPLC}" else (( VERBOSE == TRUE )) && print -u 2 "# Wiki Replace...: FALSE" fi ################################################################ #### Retreive the Wiki page from the remote server and display. ACTION="-source -dump" PRNT="${FALSE}" if (( SNDHEAD == TRUE )) then ACTION="-source -dump -head" REPLACE="${FALSE}" PRNT="${TRUE}" fi if (( REPLACE == TRUE )) && [[ "_${LINKSRC}" != "_" ]] then lynx ${ACTION} "http://${SRVR}${RDIR}${PAGE}" | sed -e "1,/${BEGCON}/ d;/${ENDCON}/,$ d" | sed -e "s|${LINKSRC}|${LINKRPL}|g" elif (( SNDHEAD == TRUE )) then lynx ${ACTION} "http://${SRVR}${RDIR}${PAGE}" else lynx ${ACTION} "http://${SRVR}${RDIR}${PAGE}" | sed -e "1,/${BEGCON}/ d;/${ENDCON}/,$ d" fi return 0 } ################################################################ getwiki "${@}"