Ticket #393 (new defect)

Opened 11 years ago

Step7 of HTK_Compile_Model.sh script takes too much time to run

Reported by: kmaclean Owned by: kmaclean
Priority: major Milestone: Acoustic Model 0.1.2
Component: Acoustic Model Version: Acoustic Model 0.1.1
Keywords: Cc:

Description

Patch from post by Yessil

Ken,

Here is my update of Step 7 fragment of the script:

 

make_hmm4 () {
    LINE="start"
    NUM=1

 rm -rf ./interim_files/hmm4/hmmdefs.tmp1
 rm -rf ./interim_files/hmm4/hmmdefs.tmp2
 
    while [ `echo ${LINE} | awk '{ print $1 }'` != "" ];
    do
  LINE=`cat -n ./interim_files/hmm4/hmm_sil | grep ^[[:space:]]*${NUM}[[:space:]] | sed s/^[[:space:]]*[[:digit:]]*//g`
  if [ `echo ${LINE} | awk '{ print $2 }'` = "\"sil\"" ]; then
            while [ `echo ${LINE} | awk '{ print $1 }'` != "<ENDHMM>" ];
            do
                echo ${LINE} >> ./interim_files/hmm4/hmmdefs.tmp1
                echo ${LINE} >> ./interim_files/hmm4/hmmdefs.tmp2
#    print_process ${NUM}
    let "NUM += 1"
    LINE=`cat -n ./interim_files/hmm4/hmm_sil | grep ^[[:space:]]*${NUM}[[:space:]] | sed s/^[[:space:]]*[[:digit:]]*//g`
   done
   
   echo ${LINE} >> ./interim_files/hmm4/hmmdefs.tmp1

   NUM2=1
   while [ ${NUM2} != "28" ];
   do
    LINE2=`cat -n ./interim_files/hmm4/hmmdefs.tmp2 | grep ^[[:space:]]*${NUM2}[[:space:]] \
    | sed s/^[[:space:]]*[[:digit:]]*//g`

      case ${NUM2} in
     1 )
      echo ${LINE2} | sed s/~h\ \"sil\"/~h\ \"sp\"/g >> ./interim_files/hmm4/hmmdefs.tmp1
      ;;
     2 )
      echo ${LINE2} >> ./interim_files/hmm4/hmmdefs.tmp1
      ;;
     3 )
      echo ${LINE2} | sed s/5/3/g >> ./interim_files/hmm4/hmmdefs.tmp1
      ;;
     10 )
      echo ${LINE2} | sed s/3/2/g >> ./interim_files/hmm4/hmmdefs.tmp1
      ;;
     11 | 12 | 13 | 14 | 15)
      echo ${LINE2} >> ./interim_files/hmm4/hmmdefs.tmp1
      ;;
     22 )
      echo ${LINE2} | sed s/5/3/g >> ./interim_files/hmm4/hmmdefs.tmp1
      ;;
     24 )
      echo "0.000000e+000 1.000000e+000 0.000000e+000" >> ./interim_files/hmm4/hmmdefs.tmp1
      ;;
     25 )
      echo "0.000000e+000 0.900000e+000 0.100000e+000" >> ./interim_files/hmm4/hmmdefs.tmp1
      ;;
     26 )
      echo "0.000000e+000 0.000000e+000 0.000000e+000" >> ./interim_files/hmm4/hmmdefs.tmp1
      ;;
    esac
    let "NUM2 += 1"
   done
        fi
  echo ${LINE} >> ./interim_files/hmm4/hmmdefs.tmp1
#  print_process ${NUM}
  let "NUM += 1"
 done
 cat ./interim_files/hmm4/hmm_before_sil ./interim_files/hmm4/hmmdefs.tmp1 > ./interim_files/hmm4/hmmdefs
 rm -rf ./interim_files/hmm4/hmmdefs.tmp1
 rm -rf ./interim_files/hmm4/hmmdefs.tmp2
 rm -rf ./interim_files/hmm4/hmm_before_sil
 rm -rf ./interim_files/hmm4/hmm_sil
 return 0
}

extract_sil_model(){
   SWITCH=0
 BEGIN=0
 NUM2=0
 for TOKEN in `cat -n ./interim_files/hmm4/hmmdefs `
 do
  if [ "${TOKEN}" = "\"sil\"" ]; then
   SWITCH=1
  fi
  if [ "${SWITCH}" = "1" ]; then
   YES=$(echo $TOKEN | grep ^[[:digit:]+])
   if [ "1${YES}1" != "11" ]; then #### supposed to be [ $YES !=  ]; but didn't work ?
    SWITCH=0
    BEGIN=$TOKEN
   fi
  fi
 done
 TOKEN=`wc ./interim_files/hmm4/hmmdefs | awk '{ print $1 }'`
 END=$TOKEN
 let NUM=END-BEGIN+2
 tail -$NUM ./interim_files/hmm4/hmmdefs > ./interim_files/hmm4/hmm_sil
 let NUM=BEGIN-2
 head -$NUM ./interim_files/hmm4/hmmdefs > ./interim_files/hmm4/hmm_before_sil
 return 0
}

.......

print_heading "Step 7 - Fixing the Silence Model"
 cp ./interim_files/hmm3/. ./interim_files/hmm4 -R
 echo -e 'making hmm4\n'
 extract_sil_model
 make_hmm4 2> /dev/null
.......


Script assumes that sil model is located at the bottom of hmmdefs file. I think it can be modified to be valid for any location inside hmmdefs file.

It works much faster for me - about 30 sec instead of 10 min.

Thanks,

Yessil

Note: See TracTickets for help on using tickets.