locked
Bing Speech to text API - always timing out <Response [408]> RRS feed

  • Question

  • Using Bing Speech to text API, to convert mp3 file, which is just 5 seconds. I am always getting <Response [408]>
    . Any help please.

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    import requests
    import http.client as httplib
    import uuid
    import json

    class Microsoft_ASR():
        def __init__(self):
            self.sub_key = '088fd08ae5f949ffa272a1acc6a082d5'
            self.token = None
            pass
        def get_speech_token(self):
            FetchTokenURI = "/sts/v1.0/issueToken"
            header = {'Ocp-Apim-Subscription-Key': self.sub_key}
            conn = httplib.HTTPSConnection('api.cognitive.microsoft.com')
            body = ""
            conn.request("POST", FetchTokenURI, body, header)
            
            response = conn.getresponse()
            str_data = response.read()
            conn.close()
            self.token = str_data
            print("Got Token: ", self.token)
            return True
        def transcribe(self,speech_file):
            # Grab the token if we need it
            if self.token is None:
                print("No Token... Getting one")
                self.get_speech_token()
            endpoint = 'https://speech.platform.bing.com/recognize'
            request_id = uuid.uuid4()
            # Params form Microsoft Example
            params = {'scenarios': 'ulm',
                      'appid': 'D4D52672-91D7-4C74-8AD8-42B1D98141A5',
                      'locale': 'en-GB',
                      'version': '3.0',
                      'format': 'json',
                      'instanceid': '565D69FF-E928-4B7E-87DA-9A750B96D9E3',
                      'requestid': uuid.uuid4(),
                      'device.os': 'linux'}
            content_type = "audio/wav; codec=""audio/pcm""; samplerate=16000"
            def stream_audio_file(speech_file, chunk_size=1024):
                with open(speech_file, 'rb') as f:
                    while 1:
                        data = f.read(1024)
                        if not data:
                            break
                        yield data

            headers = {'Authorization': 'Bearer '.encode() + self.token
                       , 'Content-Type': content_type}
            #print("headers",headers)
            resp = requests.post(endpoint,
                                params=params,
                                data=stream_audio_file(speech_file),
                                headers=headers)        
            print("resp",resp)
             val = {}
            try:
                resp = resp.decode('utf-8').replace('\0', '')
                val = json.loads(resp)
            except:
                print('bad json: ', resp)
            return val
            print("resp",resp)
            #val = json.loads(resp.text)
            return val["results"][0]["name"]
           
    if __name__ == "__main__":
        ms_asr = Microsoft_ASR()
        ms_asr.get_speech_token()
       
        text = ms_asr.transcribe('d:/Recording.mp3')
        print("Text: ", text)

    Wednesday, October 4, 2017 6:23 AM