Script Clustering

[FeatureSet fs]               setDesc   @/home/islpra0/IslData/featDesc
[CodebookSet cbs fs]                read ../step8/codebookSet
[DistribSet  dss cbs]               read ../step10/distribSet
[Tags tags]                         read ../step2/tags
[PhonesSet ps]                      read    phonesSet
[QuestionSet qs ps:PHONES ps tags]  read    questionSet

Tree dst ps:PHONES ps tags dss
     dst.ptreeSet                   read ../step10/ptreeSet
     dst                            read ../step10/distribTree

SenoneSet sns [DistribStream str dss dst]

[TmSet tms]                         read ../step2/transitionModels
[TopoSet tps sns tms]               read ../step2/topologies
[Tree tpt ps:PHONES ps tags tps]    read ../step2/topologyTree

qs  configure -padPhone pad
dst configure -padPhone pad

dss load ../step11/distribWeights.2
dss configure -minCount 250

proc findQuestion { tree node qs parent nodesA countA } {

  upvar \$countA count
  upvar \$nodesA nodes

  if { [set p [\$tree:\$node configure -ptree]] >= 0} {
    set c     [\$tree.ptreeSet.item(\$p) configure -count]

    set question [\$tree question \$node -questionSet \$qs]
    set score    [lindex \$question 1]

    set question [lindex \$question 0]

    if { [string length \$question] } {
      lappend nodes(\$score) [list \$node \$parent \$question \$c]
      if {! [info exist count(\$parent)]} {set count(\$parent) 0}
    }
  }
}

puts "=============== starting with splitting tree nodes ====================="

foreach node [dst:] { findQuestion dst \$node qs \$node nodes count }

set scores [lsort -real -decreasing [array names nodes]]

while { [llength \$scores] } {

  set   score [lindex \$scores 0]
  set   nlist \$nodes(\$score)
  unset nodes([lindex \$scores 0])

  foreach node \$nlist {
    set name   [lindex \$node 0]
    set par    [lindex \$node 1]
    set quest  [lindex \$node 2]
    set cnt    [lindex \$node 3]

    if { [string length \$quest] } {
      set c \$count(\$par)
      puts "\$name \$quest (\$score) \${par}(\$c) -> ([expr \$c+1]) ([expr \$c+2])"
      dst split \$name \$quest \${par}(\$c) \${par}([expr \$c+1]) \${par}([expr \$c+2])
      incr count(\$par) 3   
      for {} { \$c < \$count(\$par)} { incr c} {
        if { [set idx [dst index \${par}(\$c)]] > -1} {findQuestion dst \${par}(\$c) qs \$par nodes count
        }
      }
    }
  }
  if [array exists nodes] {
    set scores [lsort -real -decreasing [array names nodes]]
  } else { set scores {}}
}

dst          write distribTreeClustered
dst.ptreeSet write ptreeSetClustered

puts "====================== introducing new models =========================="

set itemN [dst configure -itemN]

for { set i 0} { \$i < \$itemN} { incr i} {

  if { [set ptree [dst.item(\$i) configure -ptree]] > -1} {

    set node [dst.item(\$i) configure -name]
    dst.ptreeSet.item(\$ptree) models [ModelArray dtabMA dss]

    if { [llength [set models [dtabMA puts]]] } {

      cbs add \$node LDA 48 32 DIAGONAL
      cbs:\$node := cbs.item([dss:[lindex [lindex \$models 0] 0] configure -cbX])
      set cbX [cbs index \$node]
      foreach ds \$models { dss:[lindex \$ds 0] configure -cbX \$cbX }
      if { [dst.item(\$i) configure -model] < 0 } {
        dss add \$node \$node
        dst.item(\$i) configure -model [dss index \$node]}
    }
    dtabMA destroy
  }
}

cbs write codebookSetClustered
dss write  distribSetClustered

puts "==================== pruning away not needed ptrees ===================="

[DistribSet dss2 cbs]

foreach node [dst:] {

  set model [dst:\$node configure -model]
  set ptree [dst:\$node configure -ptree]

  if { \$ptree > -1 } { dst:\$node configure -ptree -1 }
  if { \$model > -1 } {
    set dsname [dss name \$model]
    dss2 add \$dsname [cbs name [dss:\$dsname configure -cbX]]
  }
}

dst  write distribTreeClusteredPruned
dss2 write distribSetClusteredPruned

exit