Transcribing audio from streaming microphone

This section demonstrates how to stream a file in real time in order to simulate live speech and get its transcription and other enrich events.

You will need PyAudio as a dependency, as well as system libraries depending on your OS. Please check PyAudio requirements.

Code

import argparse
import os
import time

import pyaudio # type: ignore

import uhlive

# Audio recording parameters
RATE = 8000
CHUNK = int(RATE / 10) # 100ms


def send_audio(in_data, frame_count, time_info, status):
client.send_audio_chunk(in_data)
return None, pyaudio.paContinue


def stream_microphone():
return p.open(
format=pyaudio.paInt16,
channels=1,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
stream_callback=send_audio,
)


parser = argparse.ArgumentParser(
description="Get the transcription of an audio file, live!"
)
parser.add_argument("conversation_id", help="Conversation ID")
parser.add_argument("--asr_model", dest="model", default="fr")
parser.add_argument(
"--without_interim_results",
dest="interim_results",
action="store_false",
)
parser.add_argument("--without_rescoring", dest="rescoring", action="store_false")
args = parser.parse_args()

uhlive_url = os.environ["UHLIVE_API_URL"]
uhlive_token = os.environ["UHLIVE_API_TOKEN"]
uhlive_id = os.environ["UHLIVE_API_ID"]


p = pyaudio.PyAudio()

client = uhlive.Client(
url=uhlive_url, identifier=uhlive_id, token=uhlive_token, timeout=10
)
client.connect()
client.join_conversation(
args.conversation_id,
model=args.model,
speaker="Alice",
interim_results=args.interim_results,
rescoring=args.rescoring,
origin=int(time.time() * 1000),
)

stream = stream_microphone()

print("Listening to events")
try:
while True:
event = client.get_event()
if not event:
print("There are no more events")
break
if isinstance(event, uhlive.EntityFound):
print(
f"{event.__class__.__name__} in {event.speaker}: {event.canonical or event.original}",
f"({event.value})" if event.value != event.canonical else "",
f"[confidence: {event.confidence:.2f}]",
)
else:
print(event)
finally:
print("Exiting")
stream.stop_stream()
stream.close()
p.terminate()
client.leave_conversation()
client.close()

Usage

UHLIVE_API_TOKEN="some-token" UHLIVE_API_ID="your_identifier" UHLIVE_API_URL="wss://api.uh.live" python stream_microphone.py your_conversation_id --asr_model en