Getting started

Get a token

Please contact our support to ask for a token.

Install the python SDK

Get the Python SDK

The SDK is available on pip. It is built for Python 3.7 and higher and can then be installed using pip:

$ pip install -U uhlive

Or, to automatically install the dependencies needed to run the examples:

$ pip install -U uhlive[examples]

Connect to the server

First things first, you need to initiate a connection to our servers. Our API URL is: wss://api.uh.live/bots

Make sure you have valid credentials (token and identifier), otherwise you'll receive an error. We recommend you pass credentials in as environment variables, or persist them in a database that is accessed at runtime. You can add a token to the environment by starting your app as:

UHLIVE_API_TOKEN="some-token" UHLIVE_API_URL="wss://api.uh.live/bots" python myapp.py

Our SDK is designed so that you are free to use the websocket library you want as transport, and to architecture your code the way you like.

Here is a straightforward example of connecting to the API using either websocket-client (sync) or aiohttp (async):

import os

import websocket as ws
from uhlive.stream.recognition import Recognizer

def main(uhlive_url: str, uhlive_token: str):
# create transport
socket = ws.create_connection(
uhlive_url, header={"Authorization": f"bearer {uhlive_token}"}
)
# instantiate service
client = Recognizer()
print("connected!")


if __name__ == "__main__":
uhlive_url = os.environ["UHLIVE_API_URL"]
uhlive_token = os.environ["UHLIVE_API_TOKEN"]
main(uhlive_url, uhlive_token)
import asyncio
import os

from aiohttp import ClientSession
from uhlive.stream.recognition import Recognizer

async def main(uhlive_url: str, uhlive_token: str):
#create transport
async with ClientSession() as session:
async with session.ws_connect(
uhlive_url, headers={"Authorization": f"bearer {uhlive_token}"}
) as socket:
# instantiate service
client = Recognizer()
print("connected!")


if __name__ == "__main__":
uhlive_url = os.environ["UHLIVE_API_URL"]
uhlive_token = os.environ["UHLIVE_API_TOKEN"]
asyncio.run(main(uhlive_url, uhlive_token))

When running this code, you should see a log line.

Open a session and start streaming

This API is meant to develop rich voice interactive applications like chat bots, Interactive Voice Response (IVR)… So you need to be able to source some realtime audio from the user, be it from a local soundcard interface (microphone) or a network stream. You also need to be able to interact back with the user, either by playing audio (recorded prompts or Text-To-Speech) or by displaying some text.

To keep code snippets short and illustrative, we won't deal with the details of audio acquisition or TTS. Instead we'll be just display the prompt on the console, and get the voice from the microphone using sounddevice.

Replace the previous code with:

import os
import time
from random import randint

import sounddevice as sd
import websocket as ws
from uhlive.stream.recognition import *


def play_prompt(text):
print(text)
# let time to read it
time.sleep(len(text.split())*0.1)


def stream_mic(socket, client):
def callback(indata, frame_count, time_info, status):
# Audio is sent as binary messages
socket.send_binary(bytes(indata))

stream = sd.RawInputStream(
callback=callback, channels=1, samplerate=8000, dtype="int16", blocksize=960
)
stream.start()
return stream


def main(uhlive_url: str, uhlive_token: str):
# create transport
socket = ws.create_connection(
uhlive_url, header={"Authorization": f"bearer {uhlive_token}"}
)
# instantiate service
client = Recognizer()
print("connected!")
# Open a session
# Commands are sent as text frames
socket.send(client.open())
# Check if successfull
event = client.receive(socket.recv())
assert isinstance(event, Opened), f"Expected Opened, got {event}"
print("session opened!")
# start streaming the user's voice
voice = stream_mic(socket, client)
voice.start()
print("Stream started")


if __name__ == "__main__":
uhlive_url = os.environ["UHLIVE_API_URL"]
uhlive_token = os.environ["UHLIVE_API_TOKEN"]
main(uhlive_url, uhlive_token)
import asyncio
import os
from random import randint

import sounddevice as sd
from aiohttp import ClientSession
from uhlive.stream.recognition import *


async def inputstream_generator(channels=1, samplerate=8000, dtype="int16", **kwargs):
"""Generator that yields blocks of input data as NumPy arrays."""
q_in = asyncio.Queue()
loop = asyncio.get_event_loop()

def callback(indata, frame_count, time_info, status):
loop.call_soon_threadsafe(q_in.put_nowait, bytes(indata))

stream = sd.RawInputStream(
callback=callback,
channels=channels,
samplerate=samplerate,
dtype=dtype,
**kwargs,
)
with stream:
while True:
indata = await q_in.get()
yield indata


async def play_prompt(text):
print(text)
# let time to read it
await asyncio.sleep(len(text.split())*0.1)


async def stream(socket, client):
try:
async for block in inputstream_generator(blocksize=960):
# Audio is sent as binary messages
await socket.send_bytes(client.send_audio_chunk(block))
except asyncio.CancelledError:
pass


async def main(uhlive_url: str, uhlive_token: str):
# create transport
async with ClientSession() as session:
async with session.ws_connect(
uhlive_url, headers={"Authorization": f"bearer {uhlive_token}"}
) as socket:
# instantiate service
client = Recognizer()
print("connected!")
# Open a session
# Commands are sent as text frames
await socket.send_str(client.open())
# Check if successfull
msg = await socket.receive()
event = client.receive(msg.data)
assert isinstance(event, Opened), f"Expected Opened, got {event}"
print("session opened!")
# start streaming the user's voice
voice = asyncio.create_task(stream(socket, client))
print("stream started")


if __name__ == "__main__":
uhlive_url = os.environ["UHLIVE_API_URL"]
uhlive_token = os.environ["UHLIVE_API_TOKEN"]
asyncio.run(main(uhlive_url, uhlive_token))

Define some default values and grammar aliases

In order to avoid repeating the same parameters on every recognize request, you can define them once for the entire session. See the protocol reference for a list of the possible options and their meanings.

Append this code to your main(…) function:

    # [...] in function main(uhlive_url: str, uhlive_token: str)
socket.send(client.set_params(
speech_language="en", # or "fr"
no_input_timeout=5000,
speech_complete_timeout=1000,
speech_incomplete_timeout=2000,
speech_nomatch_timeout=3000,
recognition_timeout=30000
))
# Check if successful
event = client.receive(socket.recv())
assert isinstance(event, ParamsSet), f"Expected ParamsSet, got {event}"
print("parameters set!")
            # [...] in funtion async main(uhlive_url: str, uhlive_token: str)
await socket.send_str(client.set_params(
speech_language="en", # or "fr"
no_input_timeout=5000,
speech_complete_timeout=1000,
speech_incomplete_timeout=2000,
speech_nomatch_timeout=3000,
recognition_timeout=30000
))
# Check if successfull
msg = await socket.receive()
event = client.receive(msg.data)
assert isinstance(event, ParamsSet), f"Expected ParamsSet, got {event}"
print("parameters set!")

Want to try in another language? You can set parameter speech_language to English with en or French with fr.

We can also define some shortcuts to parametrized grammars you intent to use. For example (still appending to main):

    socket.send(client.define_grammar(
"speech/spelling/digits?regex=[0-9]{1,2}",
"num_in_range100"
))
# Check if successful
event = client.receive(socket.recv())
assert isinstance(event, GrammarDefined), f"Expected GrammarDefined, got {event}"
print("grammar alias defined!")
            await socket.send_str(client.define_grammar(
"speech/spelling/digits?regex=[0-9]{1,2}",
"num_in_range100"
))
# Check if successful
msg = await socket.receive()
event = client.receive(msg.data)
assert isinstance(event, GrammarDefined), f"Expected GrammarDefined, got {event}"
print("grammar alias defined!")

Define some convenience functions

To spare some typing, we're going to define some convenience functions (closures to be exact), now that you've learned the details.

Append these lines to your main function:

    send = socket.send
def expect(*event_classes):
event = client.receive(socket.recv())
assert isinstance(event, event_classes), f"Expected {event_classes} got {event}"
return event
            send = socket.send_str
async def expect(*event_classes):
msg = await socket.receive()
event = client.receive(msg.data)
assert isinstance(
event, event_classes
), f"expected {event_classes} got {event}"
return event

Write your first interactions

Let's play the guess game! As a scenario, the vocal bot will randomly choose a number between 0 and 99 (both end included) and ask the user to guess it within five tries.

At each guess, the user may win, otherwise the bot will give a hint, telling whether the guess is above or under the secret number.

Append the scenario logic to your main function:

    play_again = True
while play_again:
secret = randint(0,99)
play_prompt("I chose a number between 0 and 99. Try to guess it in less than five turns.")
for i in range(1, 6):
play_prompt(f"Turn {i}: what's your guess?")
send(client.recognize("session:num_in_range100"))
expect(RecognitionInProgress)
# StartOfInput is received when the user starts to speak (it that ever happen)
# As we don't care here, we just ignore it and wait for the completion of the recognize request
response = expect(RecognitionComplete, StartOfInput)
if isinstance(response, StartOfInput):
response = expect(RecognitionComplete)
if response.completion_cause == CompletionCause.NoInputTimeout:
play_prompt("You should answer faster, you loose your turn!")
continue
if response.completion_cause != CompletionCause.Success:
play_prompt("That's not a number between 0 and 99. You lose your turn.")
continue
# It's safe to access the NLU value now
guess = int(response.body.nlu.value)
if guess == secret:
play_prompt(f"You win! Congratulations! It was {secret}!")
break
elif guess > secret:
play_prompt(f"Your guess, {guess}, is too high")
else:
play_prompt(f"Your guess, {guess}, is too low")
else:
play_prompt(f"You lose! My secret number was {secret}.")
while True:
play_prompt("Do you want to play again?")
send(client.recognize("builtin:speech/boolean", recognition_mode="hotword"))
expect(RecognitionInProgress)
# No StartOfInput in hotword mode
response = expect(RecognitionComplete)
if response.completion_cause != CompletionCause.Success:
play_prompt("Please, clearly answer the question.")
continue
play_again = response.body.nlu.value
break
voice.stop()
send(client.close())
socket.close()
            play_again = True
while play_again:
secret = randint(0,99)
await play_prompt("I chose a number between 0 and 99. Try to guess it in less than five turns.")
for i in range(1, 6):
await play_prompt(f"Turn {i}: what's your guess?")
await send(client.recognize("session:num_in_range100"))
await expect(RecognitionInProgress)
response = await expect(RecognitionComplete, StartOfInput)
# StartOfInput is received when the user starts to speak (it that ever happen)
# As we don't care here, we just ignore it and wait for the completion of the recognize request
if isinstance(response, StartOfInput):
response = await expect(RecognitionComplete)
if response.completion_cause == CompletionCause.NoInputTimeout:
await play_prompt("You should answer faster, you loose your turn!")
continue
if response.completion_cause != CompletionCause.Success:
await play_prompt("That's not a number between 0 and 99. You lose your turn.")
continue
# It's safe to access the NLU value now
guess = int(response.body.nlu.value)
if guess == secret:
await play_prompt(f"You win! Congratulations! It was {secret}!")
break
elif guess > secret:
await play_prompt(f"Your guess, {guess}, is too high")
else:
await play_prompt(f"Your guess, {guess}, is too low")
else:
await play_prompt(f"You lose! My secret number was {secret}.")
while True:
await play_prompt("Do you want to play again?")
await send(client.recognize("builtin:speech/boolean", recognition_mode="hotword"))
await expect(RecognitionInProgress)
# No StartOfInput in hotword mode
response = await expect(RecognitionComplete)
if response.completion_cause != CompletionCause.Success:
await play_prompt("Please, clearly answer the question.")
continue
play_again = response.body.nlu.value
break
voice.cancel()
await voice
await send(client.close())

Example by the code

Check our examples, including complete source code of the Guess Game to learn how to use the SDK.

You will need a Google Text to Speech token to run the desktop bot examples.