sapi_textless_lipsync::callback

In the textless case, we only handle SPIE_RECOGNITION event. We aren't looking at SPIE_HYPOTHESIS. This might be an error. We might be more robust by handling both.
00789 {
00790     CSpEvent event; // the event
00791 
00792     ISpRecoResult *pRecoResult;         // recoResult from the event
00793     SPPHRASE      *pSpPhrase;           // phrase from recoResult
00794     SPRECORESULTTIMES pRecoResultTimes; // result times from RecoResult
00795     WCHAR phone_buffer[256];            // phoneme buffer for conversion
00796     long msStart;                       // time stamp of the result 
00797     
00798 
00799     while (event.GetFrom(this->m_recogCntxt) == S_OK)
00800     {
00801         if (event.eEventId == SPEI_RECOGNITION /*|| event.eEventId == SPEI_HYPOTHESIS */)
00802         {   
00803             // for textless we only accept full recognition. This might be an area
00804             // to watch out for
00805             
00806             // pull out the result object
00807             pRecoResult = event.RecoResult();
00808 
00809             // pull the whole text from the result
00810             CSpDynamicString pSapiText;
00811             pRecoResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, FALSE, &pSapiText, NULL);
00812 
00813             // get the start time for the phrase. we use this as an offset for the phrase
00814             // elements. Not sure if this is correct.
00815             pRecoResult->GetResultTimes(&pRecoResultTimes);
00816             msStart = sapi_time_to_milli(pRecoResultTimes.ullStart);
00817 
00818             // extract the phrase object
00819             pRecoResult->GetPhrase(&pSpPhrase);
00820 
00821             if (pSpPhrase != NULL)
00822             {
00823                 // Process each element of the phrase. These should be our
00824                 // orthorgraphs
00825                 const SPPHRASEELEMENT *p = pSpPhrase->pElements;
00826                 const SPPHRASEELEMENT *pEnd = p + pSpPhrase->Rule.ulCountOfElements;
00827                 while (p != pEnd)
00828                 {
00829                     // for each phrase element we create a marker 
00830                     // that contains the time stamps along with the 
00831                     // phonemes. associated with it.
00832                     alignment_result al;
00833                     al.m_orthography = p->pszDisplayText;
00834                     // Get the phonemes
00835                     ULONG j = 0;
00836                     SPPHONEID phn[2];
00837                     phn[1] = 0x00;
00838                     while (p->pszPronunciation[j] != 0)
00839                     {
00840                         // process each phoneme
00841                         phn[0] = p->pszPronunciation[j];
00842                         m_phnCvt->IdToPhone(phn, phone_buffer);
00843                         al.m_phonemes.push_back(phone_buffer);
00844                         j++;
00845                     }
00846                     // start time of the ortheme
00847                     al.m_msStart= msStart + bytes_to_milli(p->ulAudioStreamOffset);
00848                     // end time of the ortheme
00849                     al.m_msEnd = bytes_to_milli(p->ulAudioSizeBytes);
00850                     al.m_msEnd += al.m_msStart;
00851                     // add it to the results
00852                     m_results.push_back(al);
00853 
00854                     p++;
00855                 }
00856             }
00857         }
00858         else if (event.eEventId == SPEI_END_SR_STREAM)
00859         {
00860             // This event occurs when the stream has finished processing.
00861             // we set a flag to indicate that things are done.
00862             m_bDone = TRUE;        
00863         }
00864     }
00865 }