Transcribing audio from streaming file

This section demonstrates how to stream a file in real time in order to simulate live speech and get its transcription and other enrich events.

Code

"""
Stream file in real time in order to simulate live speech.
"""

import argparse
import os
import time
from threading import Thread

import uhlive


class AudioSender(Thread):
def __init__(self, client, audio_file):
Thread.__init__(self)
self.client = client
self.audio_file = audio_file

def run(self):
print(f"Streaming file in realtime: {self.audio_file} for transcription!")
with open(self.audio_file, "rb") as audio_file:
while True:
audio_chunk = audio_file.read(8000)
if not audio_chunk:
break
self.client.send_audio_chunk(audio_chunk)
time.sleep(0.5)

print(f"File {self.audio_file} successfully streamed")
self.client.leave_conversation()


parser = argparse.ArgumentParser(
description="Get the transcription of an audio file, live!"
)
parser.add_argument("audio_file", help="Audio file to transcribe")
parser.add_argument("conversation_id", help="Conversation ID")
parser.add_argument("--asr_model", dest="model", default="fr")
parser.add_argument(
"--without_interim_results",
dest="interim_results",
action="store_false",
)
parser.add_argument("--without_rescoring", dest="rescoring", action="store_false")
args = parser.parse_args()

uhlive_url = os.environ["UHLIVE_API_URL"]
uhlive_token = os.environ["UHLIVE_API_TOKEN"]
uhlive_id = os.environ["UHLIVE_API_ID"]

client = uhlive.Client(
url=uhlive_url, identifier=uhlive_id, token=uhlive_token, timeout=10
)
client.connect()
client.join_conversation(
args.conversation_id,
model=args.model,
speaker="Alice",
interim_results=args.interim_results,
rescoring=args.rescoring,
origin=int(time.time() * 1000),
)

sender = AudioSender(client, args.audio_file)
sender.start()


print("Listening to events")
try:
while True:
event = client.get_event()
if not event:
print("Silence")
continue
if isinstance(event, uhlive.SpeakerLeft) and event.speaker == "Alice":
print("Transcription completed")
break
else:
print(event)
finally:
print("Exiting")
client.close()
sender.join()

Usage

UHLIVE_API_TOKEN="some-token" UHLIVE_API_ID="your_identifier" UHLIVE_API_URL="wss://api.uh.live" python stream_file.py audio_file.raw your_conversation_id --asr_model en