Created
December 30, 2025 10:35
-
-
Save ilovejs/c3f8538021c148abfdbe89d435161f51 to your computer and use it in GitHub Desktop.
whisper.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "gpuType": "T4", | |
| "authorship_tag": "ABX9TyNr89E/aBPpbqr5iLfeqRyr", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| }, | |
| "accelerator": "GPU" | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/ilovejs/c3f8538021c148abfdbe89d435161f51/whisper.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "id": "Rnfvac1QAXRO", | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "!pip install git+https://github.com/openai/whisper.git\n", | |
| "!sudo apt install ffmpeg" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "from google.colab import files\n", | |
| "uploaded = files.upload()\n", | |
| "file_name = list(uploaded.keys())[0]\n", | |
| "print(f\"Uploaded: {file_name}\")" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 90 | |
| }, | |
| "id": "_SX1rRF0pC3u", | |
| "outputId": "693027c2-07e4-4928-b284-66bcaf5c9bc8" | |
| }, | |
| "execution_count": 2, | |
| "outputs": [ | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "<IPython.core.display.HTML object>" | |
| ], | |
| "text/html": [ | |
| "\n", | |
| " <input type=\"file\" id=\"files-9f339a73-878c-4a06-ba3d-3a9abb0b9642\" name=\"files[]\" multiple disabled\n", | |
| " style=\"border:none\" />\n", | |
| " <output id=\"result-9f339a73-878c-4a06-ba3d-3a9abb0b9642\">\n", | |
| " Upload widget is only available when the cell has been executed in the\n", | |
| " current browser session. Please rerun this cell to enable.\n", | |
| " </output>\n", | |
| " <script>// Copyright 2017 Google LLC\n", | |
| "//\n", | |
| "// Licensed under the Apache License, Version 2.0 (the \"License\");\n", | |
| "// you may not use this file except in compliance with the License.\n", | |
| "// You may obtain a copy of the License at\n", | |
| "//\n", | |
| "// http://www.apache.org/licenses/LICENSE-2.0\n", | |
| "//\n", | |
| "// Unless required by applicable law or agreed to in writing, software\n", | |
| "// distributed under the License is distributed on an \"AS IS\" BASIS,\n", | |
| "// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", | |
| "// See the License for the specific language governing permissions and\n", | |
| "// limitations under the License.\n", | |
| "\n", | |
| "/**\n", | |
| " * @fileoverview Helpers for google.colab Python module.\n", | |
| " */\n", | |
| "(function(scope) {\n", | |
| "function span(text, styleAttributes = {}) {\n", | |
| " const element = document.createElement('span');\n", | |
| " element.textContent = text;\n", | |
| " for (const key of Object.keys(styleAttributes)) {\n", | |
| " element.style[key] = styleAttributes[key];\n", | |
| " }\n", | |
| " return element;\n", | |
| "}\n", | |
| "\n", | |
| "// Max number of bytes which will be uploaded at a time.\n", | |
| "const MAX_PAYLOAD_SIZE = 100 * 1024;\n", | |
| "\n", | |
| "function _uploadFiles(inputId, outputId) {\n", | |
| " const steps = uploadFilesStep(inputId, outputId);\n", | |
| " const outputElement = document.getElementById(outputId);\n", | |
| " // Cache steps on the outputElement to make it available for the next call\n", | |
| " // to uploadFilesContinue from Python.\n", | |
| " outputElement.steps = steps;\n", | |
| "\n", | |
| " return _uploadFilesContinue(outputId);\n", | |
| "}\n", | |
| "\n", | |
| "// This is roughly an async generator (not supported in the browser yet),\n", | |
| "// where there are multiple asynchronous steps and the Python side is going\n", | |
| "// to poll for completion of each step.\n", | |
| "// This uses a Promise to block the python side on completion of each step,\n", | |
| "// then passes the result of the previous step as the input to the next step.\n", | |
| "function _uploadFilesContinue(outputId) {\n", | |
| " const outputElement = document.getElementById(outputId);\n", | |
| " const steps = outputElement.steps;\n", | |
| "\n", | |
| " const next = steps.next(outputElement.lastPromiseValue);\n", | |
| " return Promise.resolve(next.value.promise).then((value) => {\n", | |
| " // Cache the last promise value to make it available to the next\n", | |
| " // step of the generator.\n", | |
| " outputElement.lastPromiseValue = value;\n", | |
| " return next.value.response;\n", | |
| " });\n", | |
| "}\n", | |
| "\n", | |
| "/**\n", | |
| " * Generator function which is called between each async step of the upload\n", | |
| " * process.\n", | |
| " * @param {string} inputId Element ID of the input file picker element.\n", | |
| " * @param {string} outputId Element ID of the output display.\n", | |
| " * @return {!Iterable<!Object>} Iterable of next steps.\n", | |
| " */\n", | |
| "function* uploadFilesStep(inputId, outputId) {\n", | |
| " const inputElement = document.getElementById(inputId);\n", | |
| " inputElement.disabled = false;\n", | |
| "\n", | |
| " const outputElement = document.getElementById(outputId);\n", | |
| " outputElement.innerHTML = '';\n", | |
| "\n", | |
| " const pickedPromise = new Promise((resolve) => {\n", | |
| " inputElement.addEventListener('change', (e) => {\n", | |
| " resolve(e.target.files);\n", | |
| " });\n", | |
| " });\n", | |
| "\n", | |
| " const cancel = document.createElement('button');\n", | |
| " inputElement.parentElement.appendChild(cancel);\n", | |
| " cancel.textContent = 'Cancel upload';\n", | |
| " const cancelPromise = new Promise((resolve) => {\n", | |
| " cancel.onclick = () => {\n", | |
| " resolve(null);\n", | |
| " };\n", | |
| " });\n", | |
| "\n", | |
| " // Wait for the user to pick the files.\n", | |
| " const files = yield {\n", | |
| " promise: Promise.race([pickedPromise, cancelPromise]),\n", | |
| " response: {\n", | |
| " action: 'starting',\n", | |
| " }\n", | |
| " };\n", | |
| "\n", | |
| " cancel.remove();\n", | |
| "\n", | |
| " // Disable the input element since further picks are not allowed.\n", | |
| " inputElement.disabled = true;\n", | |
| "\n", | |
| " if (!files) {\n", | |
| " return {\n", | |
| " response: {\n", | |
| " action: 'complete',\n", | |
| " }\n", | |
| " };\n", | |
| " }\n", | |
| "\n", | |
| " for (const file of files) {\n", | |
| " const li = document.createElement('li');\n", | |
| " li.append(span(file.name, {fontWeight: 'bold'}));\n", | |
| " li.append(span(\n", | |
| " `(${file.type || 'n/a'}) - ${file.size} bytes, ` +\n", | |
| " `last modified: ${\n", | |
| " file.lastModifiedDate ? file.lastModifiedDate.toLocaleDateString() :\n", | |
| " 'n/a'} - `));\n", | |
| " const percent = span('0% done');\n", | |
| " li.appendChild(percent);\n", | |
| "\n", | |
| " outputElement.appendChild(li);\n", | |
| "\n", | |
| " const fileDataPromise = new Promise((resolve) => {\n", | |
| " const reader = new FileReader();\n", | |
| " reader.onload = (e) => {\n", | |
| " resolve(e.target.result);\n", | |
| " };\n", | |
| " reader.readAsArrayBuffer(file);\n", | |
| " });\n", | |
| " // Wait for the data to be ready.\n", | |
| " let fileData = yield {\n", | |
| " promise: fileDataPromise,\n", | |
| " response: {\n", | |
| " action: 'continue',\n", | |
| " }\n", | |
| " };\n", | |
| "\n", | |
| " // Use a chunked sending to avoid message size limits. See b/62115660.\n", | |
| " let position = 0;\n", | |
| " do {\n", | |
| " const length = Math.min(fileData.byteLength - position, MAX_PAYLOAD_SIZE);\n", | |
| " const chunk = new Uint8Array(fileData, position, length);\n", | |
| " position += length;\n", | |
| "\n", | |
| " const base64 = btoa(String.fromCharCode.apply(null, chunk));\n", | |
| " yield {\n", | |
| " response: {\n", | |
| " action: 'append',\n", | |
| " file: file.name,\n", | |
| " data: base64,\n", | |
| " },\n", | |
| " };\n", | |
| "\n", | |
| " let percentDone = fileData.byteLength === 0 ?\n", | |
| " 100 :\n", | |
| " Math.round((position / fileData.byteLength) * 100);\n", | |
| " percent.textContent = `${percentDone}% done`;\n", | |
| "\n", | |
| " } while (position < fileData.byteLength);\n", | |
| " }\n", | |
| "\n", | |
| " // All done.\n", | |
| " yield {\n", | |
| " response: {\n", | |
| " action: 'complete',\n", | |
| " }\n", | |
| " };\n", | |
| "}\n", | |
| "\n", | |
| "scope.google = scope.google || {};\n", | |
| "scope.google.colab = scope.google.colab || {};\n", | |
| "scope.google.colab._files = {\n", | |
| " _uploadFiles,\n", | |
| " _uploadFilesContinue,\n", | |
| "};\n", | |
| "})(self);\n", | |
| "</script> " | |
| ] | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Saving audio_video.mp4 to audio_video.mp4\n", | |
| "Uploaded: audio_video.mp4\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "5a728360" | |
| }, | |
| "source": [ | |
| "## Modify Transcription for Word Timestamps\n", | |
| "\n", | |
| "### Subtask:\n", | |
| "Adjust the `whisper` transcription call to include `word_timestamps=True`. This will enable access to start and end times for individual words, which is crucial for accurate sentence splitting and timestamp adjustment.\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "e6f246c6", | |
| "outputId": "bbd3ddbd-cb10-4f0d-a858-17e763449df1" | |
| }, | |
| "source": [ | |
| "import whisper\n", | |
| "import math\n", | |
| "import subprocess # Import subprocess to run shell commands\n", | |
| "\n", | |
| "# 1. Load the model (Options: tiny, base, small, medium, large, turbo)\n", | |
| "print(\"Loading model...\")\n", | |
| "model = whisper.load_model(\"turbo\")\n", | |
| "\n", | |
| "# Get audio duration using ffprobe\n", | |
| "duration_seconds = None\n", | |
| "try:\n", | |
| " # Command to get duration in seconds\n", | |
| " cmd = ['ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', file_name]\n", | |
| " result_ffprobe = subprocess.run(cmd, capture_output=True, text=True, check=True)\n", | |
| " duration_seconds = float(result_ffprobe.stdout.strip())\n", | |
| "except FileNotFoundError:\n", | |
| " print(\"ffprobe not found. Please ensure ffmpeg is installed and in your PATH.\")\n", | |
| "except subprocess.CalledProcessError as e:\n", | |
| " print(f\"Error running ffprobe: {e}\\n{e.stderr}\")\n", | |
| "except ValueError:\n", | |
| " print(\"Could not parse duration from ffprobe output.\")\n", | |
| "\n", | |
| "duration_str = \"\"\n", | |
| "if duration_seconds:\n", | |
| " hours = int(duration_seconds // 3600)\n", | |
| " minutes = int((duration_seconds % 3600) // 60)\n", | |
| " seconds = duration_seconds % 60\n", | |
| " if hours > 0:\n", | |
| " duration_str = f\" (approx. {hours}h {minutes}m {seconds:.1f}s)\"\n", | |
| " elif minutes > 0:\n", | |
| " duration_str = f\" (approx. {minutes}m {seconds:.1f}s)\"\n", | |
| " else:\n", | |
| " duration_str = f\" (approx. {seconds:.1f}s)\"\n", | |
| "\n", | |
| "# 2. Transcribe the audio\n", | |
| "print(f\"Transcribing {file_name}{duration_str}... this may take a moment.\")\n", | |
| "result = model.transcribe(file_name, word_timestamps=True)\n", | |
| "\n", | |
| "# 3. Define function to format timestamps for SBV (H:MM:SS.mmm)\n", | |
| "def format_sbv_timestamp(seconds):\n", | |
| " hours = int(seconds // 3600)\n", | |
| " minutes = int((seconds % 3600) // 60)\n", | |
| " secs = seconds % 60\n", | |
| " # SBV format: 0:00:00.000 (Hours:Minutes:Seconds.Milliseconds)\n", | |
| " return f\"{hours}:{minutes:02d}:{secs:06.3f}\"\n" | |
| ], | |
| "execution_count": 25, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Loading model...\n", | |
| "Transcribing audio_video.mp4 (approx. 15m 52.0s)... this may take a moment.\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "01bdb3d8", | |
| "outputId": "e27fa291-68ce-41a4-86f2-00b11d7f92bf" | |
| }, | |
| "source": [ | |
| "import re\n", | |
| "\n", | |
| "# Configuration for sentence breaking\n", | |
| "# Add comma as a delimiter for natural breaks\n", | |
| "sentence_break_delimiters = \".?!,。?!\"\n", | |
| "\n", | |
| "# Generate the regex pattern dynamically from the configured delimiters\n", | |
| "# re.escape is used to handle any special regex characters if they were in the delimiters string\n", | |
| "escaped_delimiters = re.escape(sentence_break_delimiters)\n", | |
| "punctuation_pattern = rf'[{escaped_delimiters}]$'\n", | |
| "\n", | |
| "# 1. Initialize an empty list to store the processed sub-segments\n", | |
| "new_sbv_segments = []\n", | |
| "\n", | |
| "# Helper function to process and store a sub-segment\n", | |
| "def add_sub_segment(words_list):\n", | |
| " if not words_list:\n", | |
| " return\n", | |
| "\n", | |
| " text = \"\".join([w['word'] for w in words_list]).strip()\n", | |
| " start_time = words_list[0]['start']\n", | |
| " end_time = words_list[-1]['end']\n", | |
| " new_sbv_segments.append({\n", | |
| " 'start': start_time,\n", | |
| " 'end': end_time,\n", | |
| " 'text': text\n", | |
| " })\n", | |
| "\n", | |
| "# Iterate through each segment in the result['segments'] list\n", | |
| "for segment in result['segments']:\n", | |
| " current_sentence_words = []\n", | |
| " words_in_segment = segment.get('words', [])\n", | |
| "\n", | |
| " # Iterate through each word_info dictionary in the segment['words'] list\n", | |
| " for i, word_info in enumerate(words_in_segment):\n", | |
| " # Append the current word_info to current_sentence_words\n", | |
| " current_sentence_words.append(word_info)\n", | |
| "\n", | |
| " is_last_word_in_segment = (i == len(words_in_segment) - 1)\n", | |
| "\n", | |
| " # Check for conditions to create a new sub-segment:\n", | |
| " # a. If the number of words in current_sentence_words reaches 10.\n", | |
| " # b. If the word_info['word'] ends with a punctuation mark\n", | |
| " # (e.g., '.', '?', '!', '。', '?', '!', ',') and current_sentence_words is not empty.\n", | |
| " # c. If it's the last word in the segment['words'] list and current_sentence_words is not empty.\n", | |
| "\n", | |
| " ends_with_punctuation = bool(re.search(punctuation_pattern, word_info['word']))\n", | |
| "\n", | |
| " if (len(current_sentence_words) >= 10 and current_sentence_words) or \\\n", | |
| " (ends_with_punctuation and current_sentence_words) or \\\n", | |
| " (is_last_word_in_segment and current_sentence_words):\n", | |
| "\n", | |
| " add_sub_segment(current_sentence_words)\n", | |
| " current_sentence_words = []\n", | |
| "\n", | |
| "# Now, generate the SBV content using the new_sbv_segments\n", | |
| "sbv_content_new = \"\"\n", | |
| "for seg in new_sbv_segments:\n", | |
| " start = format_sbv_timestamp(seg['start'])\n", | |
| " end = format_sbv_timestamp(seg['end'])\n", | |
| " text = seg['text'].strip()\n", | |
| "\n", | |
| " sbv_content_new += f\"{start},{end}\\n{text}\\n\\n\"\n", | |
| "\n", | |
| "# Save to file, overwriting the previous audio_video.sbv\n", | |
| "output_filename = file_name.rsplit('.', 1)[0] + \".sbv\"\n", | |
| "with open(output_filename, \"w\", encoding=\"utf-8\") as f:\n", | |
| " f.write(sbv_content_new)\n", | |
| "\n", | |
| "print(f\"Success! Created: {output_filename} with broken sentences.\")" | |
| ], | |
| "execution_count": 23, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Success! Created: audio_video.sbv with broken sentences.\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 17 | |
| }, | |
| "id": "960e4ab5", | |
| "outputId": "d1e93cc1-bacd-486a-9eea-2afa6d901b53" | |
| }, | |
| "source": [ | |
| "# sbv file\n", | |
| "files.download(output_filename)" | |
| ], | |
| "execution_count": 21, | |
| "outputs": [ | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "<IPython.core.display.Javascript object>" | |
| ], | |
| "application/javascript": [ | |
| "\n", | |
| " async function download(id, filename, size) {\n", | |
| " if (!google.colab.kernel.accessAllowed) {\n", | |
| " return;\n", | |
| " }\n", | |
| " const div = document.createElement('div');\n", | |
| " const label = document.createElement('label');\n", | |
| " label.textContent = `Downloading \"${filename}\": `;\n", | |
| " div.appendChild(label);\n", | |
| " const progress = document.createElement('progress');\n", | |
| " progress.max = size;\n", | |
| " div.appendChild(progress);\n", | |
| " document.body.appendChild(div);\n", | |
| "\n", | |
| " const buffers = [];\n", | |
| " let downloaded = 0;\n", | |
| "\n", | |
| " const channel = await google.colab.kernel.comms.open(id);\n", | |
| " // Send a message to notify the kernel that we're ready.\n", | |
| " channel.send({})\n", | |
| "\n", | |
| " for await (const message of channel.messages) {\n", | |
| " // Send a message to notify the kernel that we're ready.\n", | |
| " channel.send({})\n", | |
| " if (message.buffers) {\n", | |
| " for (const buffer of message.buffers) {\n", | |
| " buffers.push(buffer);\n", | |
| " downloaded += buffer.byteLength;\n", | |
| " progress.value = downloaded;\n", | |
| " }\n", | |
| " }\n", | |
| " }\n", | |
| " const blob = new Blob(buffers, {type: 'application/binary'});\n", | |
| " const a = document.createElement('a');\n", | |
| " a.href = window.URL.createObjectURL(blob);\n", | |
| " a.download = filename;\n", | |
| " div.appendChild(a);\n", | |
| " a.click();\n", | |
| " div.remove();\n", | |
| " }\n", | |
| " " | |
| ] | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "<IPython.core.display.Javascript object>" | |
| ], | |
| "application/javascript": [ | |
| "download(\"download_728cc862-8c45-4e4e-8612-94c5c5f1a429\", \"audio_video.sbv\", 20764)" | |
| ] | |
| }, | |
| "metadata": {} | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# converting sbv (youtube format) to srt for local checking.\n", | |
| "# im using mac and iina, so subtitle should be rendered.\n", | |
| "\n", | |
| "import os\n", | |
| "\n", | |
| "input_filename = \"audio_video.sbv\"\n", | |
| "output_filename = \"audio_video.srt\"\n", | |
| "\n", | |
| "# 1. Check if file exists\n", | |
| "if not os.path.exists(input_filename):\n", | |
| " print(f\"Error: '{input_filename}' not found in the current directory.\")\n", | |
| "else:\n", | |
| " with open(input_filename, 'r', encoding='utf-8') as f:\n", | |
| " # Split by empty lines to isolate caption blocks\n", | |
| " blocks = f.read().strip().split('\\n\\n')\n", | |
| "\n", | |
| " with open(output_filename, 'w', encoding='utf-8') as f_out:\n", | |
| " for index, block in enumerate(blocks, 1):\n", | |
| " if not block.strip():\n", | |
| " continue\n", | |
| "\n", | |
| " lines = block.split('\\n')\n", | |
| "\n", | |
| " # The first line is the timestamp\n", | |
| " sbv_time = lines[0]\n", | |
| " # The rest is the text\n", | |
| " text = \"\\n\".join(lines[1:])\n", | |
| "\n", | |
| " # Convert SBV timestamp (0:00.000,0:05.000) to SRT (00:00,000 --> 00:05,000)\n", | |
| " # 1. Split start and end\n", | |
| " start, end = sbv_time.split(',')\n", | |
| " # 2. Replace dots with commas\n", | |
| " srt_time = f\"{start.replace('.', ',')} --> {end.replace('.', ',')}\"\n", | |
| "\n", | |
| " # Write to file (Index, Time, Text, Empty Line)\n", | |
| " f_out.write(f\"{index}\\n{srt_time}\\n{text}\\n\\n\")\n", | |
| "\n", | |
| " print(f\"Done! Created: {output_filename}\")\n", | |
| "\n", | |
| " # Optional: Uncomment the line below to automatically download the file to your PC\n", | |
| " # from google.colab import files; files.download(output_filename)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "ktFjhz9Iqm8G", | |
| "outputId": "7552bbb0-e55f-483d-8ec2-6102c2a5f0bb" | |
| }, | |
| "execution_count": 24, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Done! Created: audio_video.srt\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "files.download(output_filename)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 17 | |
| }, | |
| "id": "ZINL0zEKwOZH", | |
| "outputId": "e8def8c0-cc3b-4060-9da0-48ec60a0af86" | |
| }, | |
| "execution_count": 14, | |
| "outputs": [ | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "<IPython.core.display.Javascript object>" | |
| ], | |
| "application/javascript": [ | |
| "\n", | |
| " async function download(id, filename, size) {\n", | |
| " if (!google.colab.kernel.accessAllowed) {\n", | |
| " return;\n", | |
| " }\n", | |
| " const div = document.createElement('div');\n", | |
| " const label = document.createElement('label');\n", | |
| " label.textContent = `Downloading \"${filename}\": `;\n", | |
| " div.appendChild(label);\n", | |
| " const progress = document.createElement('progress');\n", | |
| " progress.max = size;\n", | |
| " div.appendChild(progress);\n", | |
| " document.body.appendChild(div);\n", | |
| "\n", | |
| " const buffers = [];\n", | |
| " let downloaded = 0;\n", | |
| "\n", | |
| " const channel = await google.colab.kernel.comms.open(id);\n", | |
| " // Send a message to notify the kernel that we're ready.\n", | |
| " channel.send({})\n", | |
| "\n", | |
| " for await (const message of channel.messages) {\n", | |
| " // Send a message to notify the kernel that we're ready.\n", | |
| " channel.send({})\n", | |
| " if (message.buffers) {\n", | |
| " for (const buffer of message.buffers) {\n", | |
| " buffers.push(buffer);\n", | |
| " downloaded += buffer.byteLength;\n", | |
| " progress.value = downloaded;\n", | |
| " }\n", | |
| " }\n", | |
| " }\n", | |
| " const blob = new Blob(buffers, {type: 'application/binary'});\n", | |
| " const a = document.createElement('a');\n", | |
| " a.href = window.URL.createObjectURL(blob);\n", | |
| " a.download = filename;\n", | |
| " div.appendChild(a);\n", | |
| " a.click();\n", | |
| " div.remove();\n", | |
| " }\n", | |
| " " | |
| ] | |
| }, | |
| "metadata": {} | |
| }, | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "<IPython.core.display.Javascript object>" | |
| ], | |
| "application/javascript": [ | |
| "download(\"download_959552eb-ddbd-482e-9179-2f80024edfd0\", \"audio_video.srt\", 21685)" | |
| ] | |
| }, | |
| "metadata": {} | |
| } | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment