Skip to content

Instantly share code, notes, and snippets.

@Aayush9029
Created January 8, 2024 17:00
Show Gist options
  • Select an option

  • Save Aayush9029/1b0aa8cfe61ac3d0f5c5bf5fc4a511c5 to your computer and use it in GitHub Desktop.

Select an option

Save Aayush9029/1b0aa8cfe61ac3d0f5c5bf5fc4a511c5 to your computer and use it in GitHub Desktop.
Assembly AI websocket swift
import AVFoundation
import Foundation
import SwiftUI
class AssemblyAIWebSocketClient: ObservableObject {
private var webSocketTask: URLSessionWebSocketTask?
private let urlSession: URLSession
private let apiKey: String = "" // Replace with your actual API key
private let audioSampleRate: Int
@Published var transcript: String = ""
init(audioSampleRate: Int = 16000) {
self.audioSampleRate = audioSampleRate
self.urlSession = URLSession(configuration: .default)
print("AssemblyAIWebSocketClient initialized with sample rate: \(audioSampleRate)")
}
func connect() {
print("Attempting to connect to AssemblyAI")
let url = URL(string: "wss://api.assemblyai.com/v2/realtime/ws?sample_rate=\(audioSampleRate)")!
var request = URLRequest(url: url)
// Correctly format the Authorization header
request.addValue("\(apiKey)", forHTTPHeaderField: "Authorization")
webSocketTask = urlSession.webSocketTask(with: request)
webSocketTask?.resume()
listenForMessages()
}
func send(pcmBuffer: AVAudioPCMBuffer) {
guard let audioBuffer = pcmBuffer.int16ChannelData?[0] else {
print("No data in PCM buffer")
return
}
let frameLength = Int(pcmBuffer.frameLength)
let data = Data(bytes: audioBuffer, count: frameLength * 2) // 16-bit samples
// Convert audio data to base64 encoded string
let base64EncodedString = data.base64EncodedString()
// Send as a string message in JSON format
let message = URLSessionWebSocketTask.Message.string("{\"audio_data\": \"\(base64EncodedString)\"}")
webSocketTask?.send(message) { error in
if let error = error {
print("Error sending PCM buffer: \(error)")
}
}
}
func disconnect() {
print("Disconnecting from AssemblyAI")
// Send a termination message before closing the connection
let terminateMessage = URLSessionWebSocketTask.Message.string("{\"terminate_session\": true}")
webSocketTask?.send(terminateMessage) { [weak self] error in
if let error = error {
print("Error sending termination message: \(error)")
}
self?.webSocketTask?.cancel(with: .normalClosure, reason: nil)
}
}
private func listenForMessages() {
print("Listening for messages from AssemblyAI")
webSocketTask?.receive { [weak self] result in
switch result {
case .failure(let error):
print("Error receiving message: \(error)")
case .success(let message):
switch message {
case .string(let text):
print("Received text: \(text)")
DispatchQueue.main.async {
self?.handleReceivedMessage(text)
}
case .data(let data):
print("Received data: \(data)")
default:
break
}
self?.listenForMessages()
}
}
}
// Function to handle the received message
private func handleReceivedMessage(_ text: String) {
// Parse the JSON string into a Swift object
guard let data = text.data(using: .utf8),
let json = try? JSONSerialization.jsonObject(with: data, options: []),
let dictionary = json as? [String: Any]
else {
print("Error parsing JSON")
return
}
// Extract data from the JSON object
guard let messageType = dictionary["message_type"] as? String,
let text = dictionary["text"] as? String,
let confidence = dictionary["confidence"] as? Double,
let wordsArray = dictionary["words"] as? [[String: Any]]
else {
print("Error extracting data")
return
}
// Map the words JSON array to an array of Word structs
let words = wordsArray.compactMap { wordDict -> Word? in
guard let start = wordDict["start"] as? Int,
let end = wordDict["end"] as? Int,
let wordConfidence = wordDict["confidence"] as? Double,
let wordText = wordDict["text"] as? String
else {
return nil
}
return Word(start: start, end: end, confidence: wordConfidence, text: wordText)
}
// Create a Transcript instance
let transcript = Transcript(messageType: messageType, text: text, confidence: confidence, words: words)
// Handle the transcript as needed
print(transcript.text)
}
}
// Define a structure to represent the transcript data
struct Transcript {
var messageType: String
var text: String
var confidence: Double
var words: [Word]
}
struct Word {
var start: Int
var end: Int
var confidence: Double
var text: String
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment