pythonflaskwebsockettwiliospeech-recognition

How can I dynamically handle a call on Twilio in real time and with Python?


I'm building an app that does the following:

  1. Make an outbound call.
  2. Start streaming when the callee picks it up
  3. Transcribe the stream in real time with Vosk
  4. Detect a keyword to tell apart two different automated responses
  5. Once the right automated response is identified, transfer the call to my personal number

I'm having trouble with point 5. I don't know how to dial my own number in an ongoing call.

I would have to use the verb <Dial> conditionally, and afaik, that's not possible.

For context, I'm using Twilio functions, a Flask server with websockets support, and ngrok to connect localhost to Twilio. Here's my code.

The first module contains the logic to send requests to Twilio:

from twilio.rest import Client
from twilio.twiml.voice_response import VoiceResponse, Start
from twilio.request_validator import RequestValidator
from server import file_path
import os
import requests
import xml.etree.cElementTree as ET

# Global variables
client = Client(account_sid, auth_token)
# Functions
call_test_url = "https://my_function_url.com"
# Security
validator = RequestValidator(auth_token)

def call_request():
# We read ngrok public URL from a file
  if os.path.exists(file_path):
    with open(file_path, "r") as file:
          public_url = file.read().strip()

    stream_url = public_url.replace("https", "wss") + "/stream"
# Util that dynamically generates the Twiml for my function
    stream_twiml = generate_stream_twiml(stream_url, "15")

    request_params = {
    "to" : "...",
    "from" : "...",
    "callback_url" : public_url + "/callstatus",
    "twiml" : stream_twiml
  }
    twilio_signature = validator.compute_signature(call_test_url, request_params)
    request_headers = {
    "X-Twilio-Signature" : twilio_signature
  }
    my_call_request = requests.post(call_test_url, headers=request_headers, data=request_params)
    print(my_call_request.content)

    return my_call_request
  
  else:
      print("No file created to retrieve public endpoint.")

if __name__ == "__main__":
call_request()

The second module is my server:

from flask import Flask, request
from flask_sock import Sock
from vosk import SetLogLevel
import time
import audioop
import base64
import json
import os
import vosk
import logging

port = 1234
file_path = "path-to-file-that-contains-ngrok-public-url"

app = Flask(__name__)
sock = Sock(app)
log = logging.getLogger('werkzeug')
log.disabled = True
model = vosk.Model('my-model')

CL = '\x1b[0K'
BS = '\x08'

@app.route('/callstatus', methods=['POST'])
def get_call_status():
    from twilio_script import call_request
    call_status = request.form.get('CallStatus')
    call_sid = request.form.get('CallSID')
    print(f"Call status: {call_status}")
    if call_status == "busy":
        print("Callee busy. Calling again...")
        time.sleep(0.5)
        call_request()

    return call_status

@sock.route('/stream')
def stream(ws):
    """Receive and transcribe audio stream."""
    rec = vosk.KaldiRecognizer(model, 16000)
    while True:
        message = ws.receive()
        packet = json.loads(message)
        if packet['event'] == 'start':
            print('Streaming is starting')
        elif packet['event'] == 'stop':
            print('\nStreaming has stopped')
        elif packet['event'] == 'media':
            key_string = "..." # String to differentiate between automated responses
            audio = base64.b64decode(packet['media']['payload'])
            audio = audioop.ulaw2lin(audio, 2)
            audio = audioop.ratecv(audio, 2, 1, 8000, 16000, None)[0]
            if rec.AcceptWaveform(audio):
                r = json.loads(rec.Result())
                full_output = CL + r['text'] + ' '
                print("\n Transcription output: ", full_output)

                if key_string in full_output:
                   print("We're in, transferring call...")
                   # Logic to transfer call goes here
                   break
            else:
                r = json.loads(rec.PartialResult())
                partial_output = CL + r['partial'] + BS * len(r['partial'])
                print("\n Transcription output: ", partial_output)
                
                if key_string in partial_output:
                   print("We're in, transferring call...")
                   # Logic to transfer call goes here
                   break
    
if __name__ == "__main__":
    if os.path.exists(file_path):
        print("Deleting file...")
        os.remove(file_path)

    from pyngrok import ngrok
    public_url = ngrok.connect(port, bind_tls=True).public_url

    with open(file_path, "w") as file:
        file.write(public_url)

    app.run(port=port, debug=True, use_reloader=False)

Solution

  • The easiest way would be to use the call sid and update the call's TwiML to instruct the redirect (aka <Dial>) to your personal number:

    call = client.calls('CAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX') \
                 .update(twiml='<Response><Dial>415-123-4567</Dial></Response>')
    
    print(call.to)