# ========================================================================
# JANUS-SR Janus Speech Recognition Toolkit
# ------------------------------------------------------------
# Author : Martin Westphal
# Module : countPairs.tcl
# Date : 15.Nov.96
#
# Remarks : Read in a text file and count different word pairs.
# Omit the first word in each line. Print a list of
# found pairs and their frequency.
# example: key hello Mister X
# pairs:
# ,hello
# hello,Mister
# Mister,X
# X,
#========================================================================
if { $argc != 1 || [lindex $argv 1] == "-help"} {
puts stderr "USAGE: $argv0 'textfile'"
exit
}
set filename [lindex $argv 0]
# ---------------------
# define count proc
# ---------------------
proc countWord {word} {
global count
if [info exists count($word)] { incr count($word)
} else { set count($word) 1 }
}
# ---------------------
# read text file
# ---------------------
set FP [open $filename r]
while {[gets $FP line] >= 0} {
set words [lrange $line 1 end]
set prev
foreach word $words {
countWord $prev,$word
set prev $word
}
countWord $prev,
}
close $FP
# ---------------------
# print sorted list
# ---------------------
foreach word [lsort [array names count]] {
puts "$word $count($word)"
}
exit