DOC HOME SITE MAP MAN PAGES GNU INFO SEARCH PRINT BOOK
 
Automating frequent tasks

Putting everything together

We have covered most of the shell-specific elements of a style analysis program, except for two components: the global constants set up at the top of the file, and the function analyze, which reports on the readability indices of a file. Here is a complete listing of the program. (See below for a commentary on the features that have not yet been covered.)

1  : #-----------------------------------------------------
2  : #
3  : # rap -- Readability Analysis Program
4  : #
5  : # Purpose: provide readability analysis of texts to:
6  : # Kincaid formula, ARI, Coleman-Liau Formula, Flesch
7  : # Reading Ease Score. Also word count, sentence length,
8  : # word length.
9  : #
10 : # Note that rap is _not_ as functional as style(CT),
11 : # which is dictionary-driven; this is the outcome of
12 : # a deliberate attempt to keep everything in a single
13 : # shell script.
14 : #
15 : #------------- define program constants here ----------
16 : #
17 : DEBUG=${DEBUG:-true}
18 : CLS=`tput clear`
19 : HILITE=`tput smso`
20 : NORMAL=`tput rmso`
21 : #
22 : #----- define the lexical structure of a sentence -----
23 : #
24 : # a `word' primitive is any sequence of characters.
25 : #
26 : WORD='[A-Za-z1-90]+'
27 : #
28 : # whitespace is what goes between real words in a sentence;
29 : # it includes carriage returns so sentences can cross line
30 : # boundaries.
31 : #
32 : WHITESPACE="[[:space:]]"
33 : #
34 : # an initial -- one or two letters followed by a period --
35 : # is defined so we call tell that it is not a short sentence.
36 : # (Otherwise Ph.D. would be counted as two sentences.)
37 : #
38 : INITIAL="($WHITESPACE|.)(([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9]).)"
39 : #
40 : # syllabic consonants; consonants including letter pairs:
41 : #
42 : CONS="[bcdfghjklmnpqrstvwxyz]|ll|ght|qu|([wstgpc]h)|sch"
43 : #
44 : # syllabic vowels; include the ly suffix
45 : #
46 : VOWL="[aeiou]+|ly"
47 : #
48 : # definition of a syllable (after Webster's Collegiate Dictionary)
49 : #
50 : SYL="(${CONS})*\
51 : ((${CONS})|((${VOWL})+))\
52 : (${CONS})*"
53 : #
54 : # Finally, a sentence consists of (optionally) repeated
55 : # sequences of one word followed by zero or more
56 : # whitespaces, terminated by a period.
57 : #
58 : SENT="($WORD($WHITESPACE)*)+."
59 : #
60 : #---------- initialize some local variables -----------
61 : #
62 : SCRIPT=$0
63 : help='no'	  ;	verbose=' '	;	record=' '
64 : next_log_state='ON'; 	log='OFF'	; 	batch=' '
65 : file=' '	  ; 	fname=' '	;	LOGFILE=$$.log
66 : #
67 : #--------------- define program traps here ------------
68 : #
69 : trap "strike_any_key" 1 2 3 15
70 : #
71 : #----------------- useful subroutines -----------------
72 : #
73 : getc ()
74 : {
75 :   stty raw
76 :   tmp=`dd bs=1 count=1 2>/dev/null`
77 :   eval $1='$tmp'
78 :   stty cooked
79 : }
80 : #
81 : #-----------------------------------------------------
82 : #
83 : toggle_logging ()
84 : {
85 :   log=$next_log_state
86 :   case $log in
87 :     ON)  next_log_state=OFF ;;
88 :     OFF) next_log_state=ON  ;;
89 :   esac
90 : }
91 : #
92 : #-----------------------------------------------------
93 : #
94 : get_fname ()
95 : {
96 :   echo "Enter a filename: \c"
97 :   read newfname
98 :   fname=${newfname:-${fname}}
99 : }
100 : #
101 : #------------------------------------------------------
102 : #
103 : strike_any_key()
104 : {
105 :     echo '
106 :     strike any key to continue ...\c'
107 :     getc junk
108 :     echo $CLS
109 : }
110 : #
111 : #-----------------------------------------------------
112 : #
113 : change_dir ()
114 : {
115 :   echo "Enter a directory: \c"
116 :   read newdir
117 :   newdir=${newdir:-`pwd`}
118 :   cd $newdir
119 :   echo "Directory set to: $newdir"
120 : }
121 : #
122 : #-----------------------------------------------------
123 : #
124 : _help()
125 : {
126 : echo "
127 :
128 : 	Readability Analysis Program
129 :
130 : 	A shell/awk demo to determine the readability grade of texts
131 :
132 : 	Usage:
133 :
134 : 	Either invoke with no options for full menu-driven
135 : 	activity, or use the following flags:
136 :
137 : 	-[h|H]	prints this help
138 : 	-l	cause output to be logged to a file
139 : 	-f file	enter the name of the file to check
140 : 	-b	run in batch mode (no menus)
141 : "
142 : }
143 : #
144 : #---------- define the menu handler functions here ----
145 : get_file()
146 : {
147 :   while :
148 :   do
149 :     echo $CLS
150 :     echo "
151 :
152 : 	${HILITE}Select a file${NORMAL}
153 :
154 : 	Current file is: [${HILITE} $fname ${NORMAL}]
155 :
156 : 	Type the letter corresponding to your current task:
157 :
158 :   	[space]	Enter a filename or pattern to use
159 :   	l	List the current directory
160 : 	c	Change current directory
161 :   	q	quit back to main menu
162 :
163 :
164 :     =======>\c"
165 :     getc char
166 :     case $char in
167 :     ' ')    get_fname 		;;
168 :     'l')    ls | ${PAGER:-more} ;;
169 :     'c')    change_dir 		;;
170 :     'q')    break 		;;
172 :     esac
173 :     strike_any_key
174 :   done
175 : }
176 : #
177 : #------------------------------------------------------
178 : #
179 : analyze()
180 : {
181 :   if [ $fname = " " ]
182 :   then
183 :       echo "
184 :
185 :   	You must specify a filename first
186 : "
187 :       strike_any_key
188 :       return 1
189 :   fi
190 :   wordcount=`wc -w < $fname`
191 :   lines=`wc -l < $fname`
192 :   nonwhitespace=`sed -e "/${WHITESPACE}/s///g" < $fname | wc -l`
193 :   sentences=`awk -e '  BEGIN { sentences = 0
194 : 			    target = ""
195 : 			    marker = "+X+"
196 : 			  }
197 : 		          { target = target " " $0
198 : 			    initials = gsub(init, "", target)
199 :                             hit = gsub(sent, marker, target)
200 :                             sentences += hit
201 : 			    if (hit != 0) {
202 : 				for (i= 0; i < hit; i++) {
203 :                                    found = index(target, marker)
204 : 				   target = substr(target, found+3)
205 : 				} # end for
206 : 			    } # end if
207 : 			    hit = 0
208 : 			  }
209 : 		    END   { print sentences }
210 :   ' sent="$SENT" init="$INITIAL" < $fname`
211 :   letters=`expr $nonwhitespace - $lines`
212 :   sylcount=`awk -e ' BEGIN { sylcount = 0 }
213 :                    	 { target = $0
214 : 			   sylcount += gsub(syllable, "*", target)
215 : 			 }
216 : 		   END   { print sylcount }
217 :   ' syllable="$SYL" < $fname`
218 :     echo "
219 :
220 :	Number of words: $wordcount
221 : 	Number of syllables: $sylcount
222 : 	Number of sentences: $sentences
223 :
224 :   "
225 :   export letters wordcount sentences sylcount
226 :   ARI=`bc << %%
227 :   l = ($letters / $wordcount)
228 :   w = ($wordcount / $sentences)
229 :   4.71 * l +0.5 * w -21.43
230 : %%
231 :   `
232 :   Kincaid=`bc << %%
233 :   w = ($wordcount / $sentences)
234 :   s = ($sylcount / $wordcount)
235 :   11.8 * s + 0.39 * w - 15.59
236 : %%
237 :   `
238 :   CLF=`bc << %%
239 :   l = ($letters / $wordcount)
240 :   s = ($sentences / ($wordcount / 100))
241 :   5.89 * l - 0.3 * s - 15.8
242 : %%
243 :   `
244 :   Flesch=`bc << %%
245 :   w = ($wordcount / $sentences)
246 :   s = ($sylcount / $wordcount)
247 :   206.835 - 84.6 * s - 1.015 * w
248 : %%
249 : `
250 :   if [ log = "ON" ]
251 :   then
252 :   echo "
253 :     ARI = $ARI
254 :     Kincaid= $Kincaid
255 :     Coleman-Liau = $CLF
256 :     Flesch Reading Ease = $Flesch" > $LOGFILE
257 :   fi
258 :   echo "ARI = $ARI
259 :   Kincaid= $Kincaid
260 :   Coleman-Liau = $CLF
261 :   Flesch Reading Ease = $Flesch" > /dev/tty
262 : }
263 : #
264 : #=========== THIS IS WHERE THE PROGRAM BEGINS =========
265 : #
266 : #
267 : #---------- parse the command line---------------------
268 : #
269 : while getopts hHvlbf: result
270 : do
271 :   case $result in
272 : 	h|H)	help="yes"		;;
273 : 	v)	verbose="yes"		;;
274 : 	l)	record="yes"	
275 :                 next_log_state=off
276 :                 log=ON			;;
277 : 	b)	batch="yes"		;;
278 : 	f)	file="yes"
279 :                 fname=${OPTARG:-" "}	;;
280 :   	*)	help="yes"		;;
281 :   esac
282 : done
283 : if [ $help = "yes" ]
284 : then
285 :   _help
286 :   exit 1
287 : fi
288 : if [ $batch = "yes" ]
289 : then
290 :   analyze
291 :   exit 0
292 : fi
293 : #
294 : #---------- enter the mainloop ------------------------
295 : #
296 : while :
297 : do
298 :         echo $CLS
299 :         echo "
300 :
301 : 	${HILITE}Readability Analysis Program${NORMAL}
302 :
303 : 	Type the letter corresponding to your current task:
304 :
305 :   f    Select files to analyze [now ${HILITE}$fname${NORMAL} ]
306 :   p    Perform analyses
307 :   l    switch ${next_log_state} report logging [now ${HILITE}$log${NORMAL}]
308 :   q    quit program
309 :
310 :
311 : 	=======>\c"
312 :        getc char
313 :        case $char in
314 :   'f') getloop=1
315 :        get_file 	;;
316 :   'p') analyze
317 : 	     strike_any_key	;;
318 :   'l') toggle_logging	;;
319 :   'q') break		;;
320 :   (**) continue	;;
321 :   esac
322 : done
323 : clear
324 : exit 0

The variable definitions from lines 17 to 65 set up some constants for screen clearing and highlighting, initialize variables for use in the script, and define some extended regular expressions, as explained in ``Regular expressions'', that are used later to scan the target file for initials, sentences, and syllables. The mechanism used to conduct the scan is a pair of scripts written in the awk programming language (explained in ``Using awk'') that identify the number of sentences in a file, and the number of syllables in the file. These scripts lie between lines 190 and 217; they are explained in detail in ``Spanning multiple lines''.


Next topic: Readability analysis
Previous topic: Recommended ways to set up directories

© 2005 The SCO Group, Inc. All rights reserved.
SCO OpenServer Release 6.0.0 -- 03 June 2005