Table of content
Manipulate transcript
What are you building here
In this example, we are going to manipulate a bit the transcript before saving it to a file.
Please refer to previous example regarding authentication, pagination, and how to run such scripts.
Let's say that for some business purpose, we want to create a CSV file, one call per row, and display the unique identifier of the Call, a link to Scribr, our speech analytics tool, and the first 50 words a customer said during the Call.
Code
In this example, we're going to stop consuming the API after 3 pages. For more information about the resources of the API, please check the reference.
#!/bin/env python3
import csv
import os
import requests
payload = {
"client_id": os.getenv("CLIENT_ID"),
"client_secret": os.getenv("CLIENT_SECRET"),
"grant_type" : "client_credentials"
}
r = requests.post("https://id.uh.live/realms/uhlive/protocol/openid-connect/token", data=payload)
r.raise_for_status()
access_token = r.json()['access_token']
headers = {"Authorization": f"Bearer {access_token}"}
LIMIT = 20
offset = 0
# First get the 3 pages we want and store them in a variable
call_list = []
while offset < 3 * LIMIT:
r = requests.get(f"https://activate.uh.live/calls?limit={LIMIT}&offset={offset}", headers=headers)
r.raise_for_status()
data = r.json()
call_list += data["data"]
offset += LIMIT
output = []
for call in call_list:
# for each call, transcript is within key `transcript_json`. Aside from metadata, `callData` is the key
# you're looking for the transcript itself.
transcript = call["transcript_json"].get("callData", [])
words = []
for segment in transcript:
# A transcript is made of segments: utterance of a given speaker.
# We're just going to keep only the customer's, named `in`.
if segment["from"] == "in":
words += [word["value"] for word in segment["words"]]
# And now we can prepare the data we'll save in CSV:
# - call unique identifier
# - a link to our website
# - 50 first words of the customer
output.append({
'unique_id': call['unique_id'],
'url': f"https://app.uh.live/scribr/{call['unique_id']}",
'50words': " ".join(words[:50])
})
with open("client_first_words.csv", "w") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=['unique_id', 'url', '50words'])
writer.writeheader()
writer.writerows(output)