Created
January 8, 2024 17:00
-
-
Save Aayush9029/1b0aa8cfe61ac3d0f5c5bf5fc4a511c5 to your computer and use it in GitHub Desktop.
Assembly AI websocket swift
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import AVFoundation | |
| import Foundation | |
| import SwiftUI | |
| class AssemblyAIWebSocketClient: ObservableObject { | |
| private var webSocketTask: URLSessionWebSocketTask? | |
| private let urlSession: URLSession | |
| private let apiKey: String = "" // Replace with your actual API key | |
| private let audioSampleRate: Int | |
| @Published var transcript: String = "" | |
| init(audioSampleRate: Int = 16000) { | |
| self.audioSampleRate = audioSampleRate | |
| self.urlSession = URLSession(configuration: .default) | |
| print("AssemblyAIWebSocketClient initialized with sample rate: \(audioSampleRate)") | |
| } | |
| func connect() { | |
| print("Attempting to connect to AssemblyAI") | |
| let url = URL(string: "wss://api.assemblyai.com/v2/realtime/ws?sample_rate=\(audioSampleRate)")! | |
| var request = URLRequest(url: url) | |
| // Correctly format the Authorization header | |
| request.addValue("\(apiKey)", forHTTPHeaderField: "Authorization") | |
| webSocketTask = urlSession.webSocketTask(with: request) | |
| webSocketTask?.resume() | |
| listenForMessages() | |
| } | |
| func send(pcmBuffer: AVAudioPCMBuffer) { | |
| guard let audioBuffer = pcmBuffer.int16ChannelData?[0] else { | |
| print("No data in PCM buffer") | |
| return | |
| } | |
| let frameLength = Int(pcmBuffer.frameLength) | |
| let data = Data(bytes: audioBuffer, count: frameLength * 2) // 16-bit samples | |
| // Convert audio data to base64 encoded string | |
| let base64EncodedString = data.base64EncodedString() | |
| // Send as a string message in JSON format | |
| let message = URLSessionWebSocketTask.Message.string("{\"audio_data\": \"\(base64EncodedString)\"}") | |
| webSocketTask?.send(message) { error in | |
| if let error = error { | |
| print("Error sending PCM buffer: \(error)") | |
| } | |
| } | |
| } | |
| func disconnect() { | |
| print("Disconnecting from AssemblyAI") | |
| // Send a termination message before closing the connection | |
| let terminateMessage = URLSessionWebSocketTask.Message.string("{\"terminate_session\": true}") | |
| webSocketTask?.send(terminateMessage) { [weak self] error in | |
| if let error = error { | |
| print("Error sending termination message: \(error)") | |
| } | |
| self?.webSocketTask?.cancel(with: .normalClosure, reason: nil) | |
| } | |
| } | |
| private func listenForMessages() { | |
| print("Listening for messages from AssemblyAI") | |
| webSocketTask?.receive { [weak self] result in | |
| switch result { | |
| case .failure(let error): | |
| print("Error receiving message: \(error)") | |
| case .success(let message): | |
| switch message { | |
| case .string(let text): | |
| print("Received text: \(text)") | |
| DispatchQueue.main.async { | |
| self?.handleReceivedMessage(text) | |
| } | |
| case .data(let data): | |
| print("Received data: \(data)") | |
| default: | |
| break | |
| } | |
| self?.listenForMessages() | |
| } | |
| } | |
| } | |
| // Function to handle the received message | |
| private func handleReceivedMessage(_ text: String) { | |
| // Parse the JSON string into a Swift object | |
| guard let data = text.data(using: .utf8), | |
| let json = try? JSONSerialization.jsonObject(with: data, options: []), | |
| let dictionary = json as? [String: Any] | |
| else { | |
| print("Error parsing JSON") | |
| return | |
| } | |
| // Extract data from the JSON object | |
| guard let messageType = dictionary["message_type"] as? String, | |
| let text = dictionary["text"] as? String, | |
| let confidence = dictionary["confidence"] as? Double, | |
| let wordsArray = dictionary["words"] as? [[String: Any]] | |
| else { | |
| print("Error extracting data") | |
| return | |
| } | |
| // Map the words JSON array to an array of Word structs | |
| let words = wordsArray.compactMap { wordDict -> Word? in | |
| guard let start = wordDict["start"] as? Int, | |
| let end = wordDict["end"] as? Int, | |
| let wordConfidence = wordDict["confidence"] as? Double, | |
| let wordText = wordDict["text"] as? String | |
| else { | |
| return nil | |
| } | |
| return Word(start: start, end: end, confidence: wordConfidence, text: wordText) | |
| } | |
| // Create a Transcript instance | |
| let transcript = Transcript(messageType: messageType, text: text, confidence: confidence, words: words) | |
| // Handle the transcript as needed | |
| print(transcript.text) | |
| } | |
| } | |
| // Define a structure to represent the transcript data | |
| struct Transcript { | |
| var messageType: String | |
| var text: String | |
| var confidence: Double | |
| var words: [Word] | |
| } | |
| struct Word { | |
| var start: Int | |
| var end: Int | |
| var confidence: Double | |
| var text: String | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment