#!/usr/bin/ksh93 ################################################################ function usagemsg_wikiget { print " Program: wikiget Shell script to retreive content from a wiki location and optionally, perform replacements on imbedded links to point to another wiki location. This script does not use any external utilities, network connections and content retrieval is performed using only Korn Shell 93 built-in commands. Usage: ${1##*/} [-?vV] [-n] [-y] [-s wikiSvr] [-d wikiDir] [-t wikiTitle] [-p httpPort] [-l linkSrc] [-r linkRpl] [-b beginContent] [-e endContent] [-g] [-h] Where: -s wikiSvr = Specify the Wiki Server from which to retreive content ( default: en.wikipedia.org ) -d wikiDir = Specify the Wiki Directory from which to retreive content ( default: /wiki/ ) -t wikiTitle = Specify the Wiki File or Title of the content to retrieve ( default: How_to ) -p httpPort = Specify the Web Server Port number for connection ( default: 80 ) -l linkSrc = Identify existing source Wiki Link text for replacement ( default: href=\"/wiki/ ) -r linkRpl = Specify replacement text for Wiki link URL's (NULL OK) ( default: href=\"/cgi-bin/mylexica/wiki/lookup.cgi? ) -b begincontent = A string of characters that designates the beginning of the content to extract from the retrieved wiki page ( default: ) -e endContent = A string of characters that designates the ending of the content to extract from the retrieved wiki page ( default:
) -g = Send a GET command to the Wiki web server to retrieve the page content ( default: GET ) -h = Send a HEAD command to the Wiki server to retrieve only header info -y = Perform text and URL replacements on retrieved Wiki Page (default) -n = Do not perform any text or URL replacements on retrieved Wiki Page -v = Verbose mode - displays wikiget function info -V = Very Verbose Mode - debug output displayed -? = Help - display this message Author: Dana French (dfrench@mtxia.com) \"AutoContent\" enabled " } ################################################################ #### #### Description: #### #### Shell script to retreive content from a wiki location #### and optionally, perform replacements on imbedded links #### to point to another wiki location. This script permits #### the user to specify any wiki location from which to #### retrieve content, and any content at that location. #### #### Optionally, the user may replace text in the retrieved #### pages by specifying a source pattern in the text to look #### for, and a replacement string when the pattern is #### found. #### #### Assumptions: #### #### This script assumes the wiki location is on the node #### from which this script is being run, or a network is #### connection is available to connect to remote wiki's. In #### either case, it is assumed the wiki provides web based #### content. #### #### Dependencies: #### #### This script is fully self contained and only uses #### built-in Korn Shell commands, no external Unix utilities #### are required. However, this script utilizes some of the #### latest capabilities of Korn Shell 93 and requires a #### recent version of the shell. The latest version of Korn #### Shell 93 can be obtained at www.kornshell.com #### #### Products: #### #### The output from this script is a stream of data #### containing the content from a wiki page. A subset of #### the Wiki page can be extracted by specifying beginning #### and ending pattern to look for in the stream of text. #### The format of the retrieved content is unaltered by this #### script, however this script provides the capability of #### altering URL links. #### #### #### Configured Usage: #### #### This script may be executed from the command line as a #### stand alone shell script, included and executed by #### another shell script, or called by name from a shell #### script function library. #### #### An example command line using this script: #### #### ./wikiget.sh -v -t Shell_script #### #### #### Details: #### ################################################################ function wikiget { typeset VERSION="1.0" typeset TRUE="1" typeset FALSE="0" typeset VERBOSE="${FALSE}" typeset VERYVERB="${FALSE}" typeset REPLACE="${TRUE}" typeset PRNT="${FALSE}" typeset SNDGET="${TRUE}" typeset SNDHEAD="${FALSE}" typeset SRVR="en.wikipedia.org" typeset RDIR="/wiki/" # Must end with / typeset PORT="80" typeset SRCH="href=\"/wiki/" typeset RPLC="href=\"/cgi-bin/mylexica/wiki/lookup.cgi?" typeset PAGE="How_to" typeset BEGCON="" typeset ENDCON="
" typeset ACTION="GET" while getopts ":vVynghs:d:t:p:l:r:" OPTION do case "${OPTION}" in 'v') VERBOSE="${TRUE}";; 'V') VERYVERB="${TRUE}";; 's') SRVR="${OPTARG}";; 'd') RDIR="${OPTARG%%+(/)}/" RDIR="/${RDIR##+(/)}";; 't') PAGE="${OPTARG}";; 'p') PORT="${OPTARG}";; 'l') SRCH="${OPTARG}";; 'r') RPLC="${OPTARG}";; 'n') REPLACE="${FALSE}";; 'y') REPLACE="${TRUE}";; 'g') SNDGET="${TRUE}" SNDHEAD="${FALSE}";; 'h') SNDGET="${FALSE}" SNDHEAD="${TRUE}";; '?') usagemsg_wikiget "${0}" && return 1 ;; ':') usagemsg_wikiget "${0}" && return 1 ;; '#') usagemsg_wikiget "${0}" && return 1 ;; esac done shift $(( ${OPTIND} - 1 )) #### Insure the Wiki remote directory value begins and ends with a "/" RDIR="${RDIR%%+(/)}/" RDIR="/${RDIR##+(/)}" #### Perform some error checking to verify all critical variable contain values. trap "usagemsg_wikiget ${0}" EXIT if [[ "_${SRVR}" == "_" ]] then print -u 2 -- "# ERROR: Wiki Server name is NULL" return 1 fi if [[ "_${RDIR}" == "_" ]] then print -u 2 -- "# ERROR: Wiki remote directory specification is NULL" return 1 fi if [[ "_${PORT}" == "_" ]] then print -u 2 -- "# ERROR: http Port number is NULL" return 1 fi if (( REPLACE == TRUE )) && [[ "_${SRCH}" == "_" ]] then print -u 2 -- "# ERROR: Replacement is turned on, but no search pattern is specified" return 1 fi if (( SNDGET == SNDHEAD )) then print -u 2 -- "# ERROR: Specify one option of GET or HEAD" return 1 fi trap "-" EXIT #### Display the program configuration settings if VERBOSE mode is TRUE. (( VERYVERB == TRUE )) && set -x (( VERBOSE == TRUE )) && print -u 2 "# Program........: ${0}" (( VERBOSE == TRUE )) && print -u 2 "# Version........: ${VERSION}" (( VERBOSE == TRUE )) && print -u 2 "# Wiki Server....: ${SRVR}" (( VERBOSE == TRUE )) && print -u 2 "# Wiki Directory.: ${RDIR}" (( VERBOSE == TRUE )) && print -u 2 "# Wiki Page Title: ${PAGE}" (( VERBOSE == TRUE )) && print -u 2 "# HTTP Port no...: ${PORT}" if (( SNDGET == TRUE )) then (( VERBOSE == TRUE )) && print -u 2 "# HTTP GET.......: TRUE" (( VERBOSE == TRUE )) && print -u 2 "# HTTP HEAD......: FALSE" fi if (( SNDHEAD == TRUE )) then (( VERBOSE == TRUE )) && print -u 2 "# HTTP GET.......: FALSE" (( VERBOSE == TRUE )) && print -u 2 "# HTTP HEAD......: TRUE" fi if (( REPLACE == TRUE )) then (( VERBOSE == TRUE )) && print -u 2 "# Wiki Replace...: TRUE" (( VERBOSE == TRUE )) && print -u 2 "# Wiki Search Pat: ${SRCH}" (( VERBOSE == TRUE )) && print -u 2 "# Wiki Replace w/: ${RPLC}" else (( VERBOSE == TRUE )) && print -u 2 "# Wiki Replace...: FALSE" fi ################################################################ #### Retreive the Wiki page from the remote server and display. ACTION="GET" PRNT="${FALSE}" if (( SNDHEAD == TRUE )) then ACTION="HEAD" REPLACE="${FALSE}" PRNT="${TRUE}" fi #### Open a network port to the remote server. exec 3<>"/dev/tcp/${SRVR}/${PORT}" #### Send a GET or HEAD command to the remote Wiki web server to retrieve the page. print -u 3 -- "${ACTION} ${RDIR}${PAGE} HTTP/1.1\r\nHost: ${SRVR}\r\n\r" #### Parse each line of the Wiki page to extract the desired content. while IFS="" read -u 3 -r -- LINE do #### Remove all non-printing characters from the wiki page content line LINE="${LINE//[![:alnum::punct::space::blank:]]/}" #### If the ending pattern is detected, break out of the loop to stop #### reading wiki page content. [[ "_${LINE}" == _*"${ENDCON}"* ]] && break if (( PRNT == TRUE )) then if (( REPLACE == TRUE )) && [[ "_${SRCH}" != "_" ]] then #### Search and Replace the URL Links, if specified by the user. print -r -- "${LINE//${SRCH}/${RPLC}}" else #### Otherwise just print the Wiki page content. print -r -- "${LINE}" fi fi #### If the beginning pattern is detected, turn on the PRINT signal to #### display the wiki page content. [[ "_${LINE}" == _*"${BEGCON}"* ]] && PRNT="${TRUE}" done exec 3>&- return 0 } ################################################################ wikiget "${@}"