Script Clustering
[CodebookSet cbs fs] read ../step8/codebookSet
[DistribSet dss cbs] read ../step10/distribSet
[Tags tags] read ../step2/tags
[PhonesSet ps] read phonesSet
[QuestionSet qs ps:PHONES ps tags] read questionSet
Tree dst ps:PHONES ps tags dss
dst.ptreeSet read ../step10/ptreeSet
dst read ../step10/distribTree
SenoneSet sns [DistribStream str dss dst]
[TmSet tms] read ../step2/transitionModels
[TopoSet tps sns tms] read ../step2/topologies
[Tree tpt ps:PHONES ps tags tps] read ../step2/topologyTree
qs configure -padPhone pad
dst configure -padPhone pad
dss load ../step11/distribWeights.2
dss configure -minCount 250
proc findQuestion { tree node qs parent nodesA countA } {
upvar \$countA count
upvar \$nodesA nodes
if { [set p [\$tree:\$node configure -ptree]] >= 0} {
set c [\$tree.ptreeSet.item(\$p) configure -count]
set question [\$tree question \$node -questionSet \$qs]
set score [lindex \$question 1]
set question [lindex \$question 0]
if { [string length \$question] } {
lappend nodes(\$score) [list \$node \$parent \$question \$c]
if {! [info exist count(\$parent)]} {set count(\$parent) 0}
}
}
}
puts "=============== starting with splitting tree nodes ====================="
foreach node [dst:] { findQuestion dst \$node qs \$node nodes count }
set scores [lsort -real -decreasing [array names nodes]]
while { [llength \$scores] } {
set score [lindex \$scores 0]
set nlist \$nodes(\$score)
unset nodes([lindex \$scores 0])
foreach node \$nlist {
set name [lindex \$node 0]
set par [lindex \$node 1]
set quest [lindex \$node 2]
set cnt [lindex \$node 3]
if { [string length \$quest] } {
set c \$count(\$par)
puts "\$name \$quest (\$score) \${par}(\$c) -> ([expr \$c+1]) ([expr \$c+2])"
dst split \$name \$quest \${par}(\$c) \${par}([expr \$c+1]) \${par}([expr \$c+2])
incr count(\$par) 3
for {} { \$c < \$count(\$par)} { incr c} {
if { [set idx [dst index \${par}(\$c)]] > -1} {findQuestion dst \${par}(\$c) qs \$par nodes count
}
}
}
}
if [array exists nodes] {
set scores [lsort -real -decreasing [array names nodes]]
} else { set scores {}}
}
dst write distribTreeClustered
dst.ptreeSet write ptreeSetClustered
puts "====================== introducing new models =========================="
set itemN [dst configure -itemN]
for { set i 0} { \$i < \$itemN} { incr i} {
if { [set ptree [dst.item(\$i) configure -ptree]] > -1} {
set node [dst.item(\$i) configure -name]
dst.ptreeSet.item(\$ptree) models [ModelArray dtabMA dss]
if { [llength [set models [dtabMA puts]]] } {
cbs add \$node LDA 48 32 DIAGONAL
cbs:\$node := cbs.item([dss:[lindex [lindex \$models 0] 0] configure -cbX])
set cbX [cbs index \$node]
foreach ds \$models { dss:[lindex \$ds 0] configure -cbX \$cbX }
if { [dst.item(\$i) configure -model] < 0 } {
dss add \$node \$node
dst.item(\$i) configure -model [dss index \$node]}
}
dtabMA destroy
}
}
cbs write codebookSetClustered
dss write distribSetClustered
puts "==================== pruning away not needed ptrees ===================="
[DistribSet dss2 cbs]
foreach node [dst:] {
set model [dst:\$node configure -model]
set ptree [dst:\$node configure -ptree]
if { \$ptree > -1 } { dst:\$node configure -ptree -1 }
if { \$model > -1 } {
set dsname [dss name \$model]
dss2 add \$dsname [cbs name [dss:\$dsname configure -cbX]]
}
}
dst write distribTreeClusteredPruned
dss2 write distribSetClusteredPruned
exit