|
# -*- coding: utf-8 -*- |
|
|
|
# my_youtube_download.ipynb |
|
# |
|
# Automatically generated by Colab. |
|
# |
|
# Original file is located at |
|
# https://colab.research.google.com/gist/shhommychon/759036d8f19f868407190ccf8ca75040 |
|
|
|
# Cell no.1 (id: R1zMGDzzzm8v) |
|
"""markdown |
|
[](https://colab.research.google.com/gist/shhommychon/759036d8f19f868407190ccf8ca75040) |
|
""" |
|
|
|
# Cell no.2 (id: cTCLqj-Wzv1w) |
|
"""markdown |
|
# 유튜브 다운로드 |
|
- 유튜브 다운로드 웹페이지들 광고가 너무 많고, 사무실에 있는데 성인 게임/웹툰 광고로 암살하려 드는 사이트들도 너무 많아 개빡쳐서 간단히 만든 페이지 |
|
""" |
|
|
|
# Cell no.3 (id: DCFI_fgT0O_y) |
|
"""markdown |
|
##### setup |
|
- 다시 시작하라는 경고 메세지가 떠도 놀라지 말고 다시 실행해보세요 |
|
""" |
|
|
|
# Cell no.4 (id: J4dArR1q0RQB) |
|
"""markdown |
|
###### dependencies |
|
""" |
|
|
|
# Cell no.5 (id: RKB3KQ1fzl8X) |
|
# !pip install yt-dlp --quiet # Google Colab shell command |
|
|
|
import yt_dlp |
|
help(yt_dlp.version) |
|
|
|
# Cell no.6 (id: 3yyyo7tjHjX6) |
|
from pandas import __version__ as pandas_version |
|
print(pandas_version) |
|
|
|
# Cell no.7 (id: cPXL9fQpO-UG) |
|
from natsort import __version__ as natsort_version |
|
print(natsort_version) |
|
|
|
# Cell no.8 (id: 8A8d2Lc31KK8) |
|
"""markdown |
|
###### utility functions |
|
""" |
|
|
|
# Cell no.9 (id: Q_c5Yf5918Gu) |
|
import gc |
|
import os |
|
import re |
|
import time |
|
import urllib.request |
|
import yt_dlp |
|
|
|
from yt_dlp.utils import UnsupportedError |
|
|
|
# Cell no.10 (id: mGXksrMh1OzW) |
|
def warn_cookie_use(): |
|
print("Are you trying to download content that requires an account to access,") |
|
print(" such as private playlists, age-restricted videos, or members-only contents?") |
|
print() |
|
print(" You can use cookies to authenticate, but this is risky and strongly not recommended.") |
|
print(" Follow the below method at your own risk.") |
|
print() |
|
print(" 1) Install a browser extension like `EditThisCookie`, `GetCookies.txt`, or any other.") |
|
print(" `EditThisCookie` - https://chromewebstore.google.com/detail/editthiscookie-v3/ojfebgpkimhlhcblbalbfjblapadhbol") |
|
print(" `GetCookies.txt` - https://chromewebstore.google.com/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc") |
|
print(" or any other exts - https://chromewebstore.google.com/") |
|
print(" 2) Export your YouTube cookies to a file (e.g., cookies.txt),") |
|
print(" and upload it on the left sidebar of this Colab notebook.") |
|
print(" 3) Try `YouTubeURLMemory.set_url_with_cookie(URL, cookie_fpath='./cookies.txt'):") |
|
print() |
|
print(" Note that login with password is not supported for YouTube.") |
|
print(" For advanced usage,") |
|
print(" * See https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp for how to manually pass cookies.") |
|
print(" * Also see https://github.com/yt-dlp/yt-dlp/wiki/Extractors#exporting-youtube-cookies for tips on effectively exporting YouTube cookies.") |
|
|
|
def generate_naive_cookies_txt(filepath:str): |
|
"""단순한 쿠키 파일을 생성합니다.""" |
|
with open(filepath, 'w') as f: |
|
f.write("# Netscape HTTP Cookie File\n") |
|
f.write("# http://curl.haxx.se/rfc/cookie_spec.html\n") |
|
f.write("# This is a programmatically generated cookie file.\n\n") |
|
|
|
# 필드: 도메인, 플래그, 경로, 보안, 만료, 이름, 값 |
|
cookies = [ |
|
# 도메인, 하위 도메인 포함, 경로, 보안, 7일 후 만료, 쿠키 이름, 쿠키 값 |
|
[".youtube.com", "TRUE", "/", "TRUE", str(int(time.time())+7*24*60*60), "PREF", "hl=ko&tz=Asia.Seoul"], |
|
[".youtube.com", "TRUE", "/", "TRUE", str(int(time.time())+7*24*60*60), "GPS", "1"], |
|
[".youtube.com", "TRUE", "/", "TRUE", str(int(time.time())+7*24*60*60), "YSC", "SomeHashVal"], |
|
[".youtube.com", "TRUE", "/", "TRUE", str(int(time.time())+7*24*60*60), "VISITOR_INFO1_LIVE", "OthrHashVal"], |
|
[".youtube.com", "TRUE", "/", "TRUE", str(int(time.time())+7*24*60*60), "VISITOR_PRIVACY_METADATA", "LongHaashValue%3D%3D"], |
|
[".youtube.com", "TRUE", "/", "TRUE", str(int(time.time())+7*24*60*60), "__Secure-ROLLOUT_TOKEN", "SomeAnotherRandomVeryVeryLongHashValue%3D%3D"], |
|
] |
|
|
|
# 쿠키 각 라인 작성 |
|
for cookie in cookies: f.write("\t".join(cookie) + "\n") |
|
|
|
print(f"Naive cookies.txt file written to: {filepath} (expires in 7 days)") |
|
|
|
generate_naive_cookies_txt("./cookies.txt") |
|
|
|
# Cell no.11 (id: OSO7yETC2OGl) |
|
class YouTubeURLMemory: |
|
"""YouTube URL과 관련된 메타데이터를 저장하고 관리하는 클래스.""" |
|
__is_yt_url = False |
|
__url = "https://hommy.tistory.com/" |
|
__use_account = False |
|
__cookie_fpath = "./cookies.txt" |
|
|
|
__regex_str_1 = r"(\b[a-zA-Z][a-zA-Z0-9+.-]*://)?" # 프로토콜 (참조: https://datatracker.ietf.org/doc/html/rfc3986#section-3.1) |
|
__regex_str_2 = r"([a-zA-Z0-9_-]+=[^=&]+&|&)*" # 비디오 ID 이전의 매개변수 |
|
__regex_str_3 = r"(&[a-zA-Z0-9_-]+=[^=&]+|&)*" # 비디오 ID 이후의 매개변수 |
|
__yt_regex_1 = re.compile(f"{__regex_str_1}www\.youtube\.com/watch\?{__regex_str_2}v=([a-zA-Z0-9_-]+){__regex_str_3}") |
|
__yt_regex_2 = re.compile(f"{__regex_str_1}youtu\.be/watch\?{__regex_str_2}v=([a-zA-Z0-9_-]+){__regex_str_3}") |
|
__yt_regex_3 = re.compile(f"{__regex_str_1}youtu\.be/([a-zA-Z0-9_-]+){__regex_str_3}") |
|
__yt_regex_4 = re.compile(f"{__regex_str_1}(www\.|m\.)?youtube\.com/shorts/([a-zA-Z0-9_-]+)(\?{__regex_str_2})?") |
|
|
|
__video_options = dict() |
|
__audio_options = dict() |
|
__combined_options = dict() |
|
__image_options = dict() |
|
|
|
__thumbnail_resolution_info = \ |
|
("default", "mqdefault", "sddefault", "maxresdefault") |
|
|
|
# `is_yt_url`의 getter 메서드 |
|
@property |
|
def is_yt_url(self): return self.__is_yt_url |
|
|
|
# `url`의 getter 메서드 |
|
@property |
|
def url(self): return self.__url |
|
|
|
# `url`의 setter 메서드 |
|
@url.setter |
|
def url(self, url:str): |
|
self.__input_youtube_url(url) |
|
|
|
def set_url_with_cookie(self, url:str, cookie_fpath=''): |
|
"""쿠키 파일을 사용하여 URL을 설정합니다.""" |
|
try: |
|
if not os.path.isfile(cookie_fpath): |
|
raise FileNotFoundError(f"Your cookie file path '{cookie_fpath}' is invalid.") |
|
with open(cookie_fpath, 'r') as f: |
|
_ = f.read() # 쿠키 파일을 읽을 수 있는지 확인합니다. |
|
self.__use_account = True |
|
self.__cookie_fpath = cookie_fpath |
|
except Exception as e: |
|
print(f"{e.__class__.__name__}: {str(e)}") |
|
print() |
|
print("Ignoring cookie file...") |
|
self.__use_account = False |
|
self.__cookie_fpath = '' |
|
self.__input_youtube_url(url) |
|
|
|
def __repr__(self): return self.__str__() |
|
|
|
def __str__(self): |
|
return f"YouTubeURLMemory(url='{self.__url}', is_yt_url={self.__is_yt_url})" |
|
|
|
def refresh_memory(self): |
|
"""저장된 URL과 옵션을 초기화합니다.""" |
|
self.__is_yt_url = False |
|
self.__url = '' |
|
self.__video_options = dict() |
|
self.__audio_options = dict() |
|
self.__combined_options = dict() |
|
self.__image_options = dict() |
|
|
|
|
|
def __input_youtube_url(self, url:str): |
|
"""YouTube URL을 입력하고 유효성을 검사합니다.""" |
|
self.refresh_memory() |
|
try: |
|
url = self.clean_youtube_url(url) |
|
info_dicts = self.validate_youtube_url(url) |
|
except (AttributeError, TypeError, UnsupportedError) as e: |
|
print(f"UnsupportedError: '{str(url)}' {type(url)} is not a valid URL. {str(e)}") |
|
return |
|
except KeyError as e: |
|
print(f"UnsupportedError: `yt-dlp` recieved no media-related metadata. Check if '{str(url)}' is a valid YouTube URL. {str(e)}") |
|
return |
|
except UnsupportedError as e: |
|
print(f"UnsupportedError: `yt-dlp`: '{str(url)}' {type(url)} is not a valid URL. {str(e)}") |
|
warn_cookie_use() |
|
return |
|
except Exception as e: |
|
print(f"{e.__class__.__name__}: {str(e)}") |
|
return |
|
|
|
self.__is_yt_url = True |
|
self.__url = url |
|
|
|
for info_dict in info_dicts["formats"]: |
|
if info_dict.get("format_note") == "storyboard": |
|
self.__image_options[info_dict["format_id"]] = info_dict |
|
elif info_dict.get("resolution") == "audio only": |
|
self.__audio_options[info_dict["format_id"]] = info_dict |
|
elif info_dict.get("acodec") == "none": |
|
self.__video_options[info_dict["format_id"]] = info_dict |
|
elif info_dict.get("acodec") and info_dict.get("vcodec"): |
|
self.__combined_options[info_dict["format_id"]] = info_dict |
|
else: |
|
print(f"UnsupportedError: Unrecognized metadata: \n{str(info_dict)}") |
|
|
|
|
|
def clean_youtube_url(self, url:str): |
|
"""YouTube URL을 정리하여 표준 형식으로 반환합니다.""" |
|
url_str = str(url) |
|
|
|
if m := self.__yt_regex_1.match(url_str): vid = m.group(3) |
|
elif m := self.__yt_regex_2.match(url_str): vid = m.group(3) |
|
elif m := self.__yt_regex_3.match(url_str): vid = m.group(2) |
|
elif m := self.__yt_regex_4.match(url_str): vid = m.group(3) |
|
else: raise UnsupportedError(url) |
|
|
|
return f"https://www.youtube.com/watch?v={vid}" |
|
|
|
def validate_youtube_url(self, url:str): |
|
"""YouTube URL의 유효성을 검사하고 메타데이터를 반환합니다.""" |
|
ydl_opts = { |
|
"quiet": True, # 다운로드 출력 억제 |
|
"simulate": True, # 실제 다운로드는 하지 않음 |
|
"format": "best", |
|
} |
|
if self.__use_account: |
|
ydl_opts["cookiefile"] = self.__cookie_fpath |
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info_dicts = ydl.extract_info(url, download=False) |
|
|
|
if len(info_dicts["formats"]) == 0: raise KeyError |
|
|
|
return info_dicts |
|
|
|
|
|
def scrap_thumbnail_urls(self, url:str): |
|
"""YouTube URL에서 썸네일 URL을 스크랩합니다.""" |
|
url_str = str(url) |
|
|
|
if m := self.__yt_regex_1.match(url_str): vid = m.group(3) |
|
elif m := self.__yt_regex_2.match(url_str): vid = m.group(3) |
|
elif m := self.__yt_regex_3.match(url_str): vid = m.group(2) |
|
elif m := self.__yt_regex_4.match(url_str): vid = m.group(3) |
|
else: raise UnsupportedError(url) |
|
|
|
urls = list() |
|
for res in self.__thumbnail_resolution_info: |
|
for ext in ("jpg", "webp"): |
|
target_url = f"https://img.youtube.com/vi/{vid}/{res}.{ext}" |
|
try: |
|
with urllib.request.urlopen(target_url) as _: |
|
urls.append(target_url) |
|
except urllib.request.HTTPError as e: |
|
if e.code == 404: |
|
continue |
|
else: |
|
raise e |
|
|
|
return urls |
|
|
|
|
|
# `video_options`의 getter 메서드 |
|
@property |
|
def video_options(self): return self.__video_options |
|
|
|
# `audio_options`의 getter 메서드 |
|
@property |
|
def audio_options(self): return self.__audio_options |
|
|
|
# `combined_options`의 getter 메서드 |
|
@property |
|
def combined_options(self): return self.__combined_options |
|
|
|
# `image_options`의 getter 메서드 |
|
@property |
|
def image_options(self): return self.__image_options |
|
|
|
# `image_options`의 setter 메서드 |
|
@image_options.setter |
|
def image_options(self, dict_obj:dict): |
|
self.__image_options = dict_obj; gc.collect() |
|
|
|
# Cell no.12 (id: Et87dYjY2Yy9) |
|
bin_units = ('', "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi") |
|
dec_units = ('', 'k', 'm', 'b', 't', 'q', "qu", 's', "sp", 'o') |
|
|
|
def bytes_num_to_bin(num:int) -> str: |
|
"""바이트 수를 이진 접두사 단위로 포맷합니다.""" |
|
if num is None: return None |
|
index = 0 |
|
while num >= 1024: |
|
num /= 1024 |
|
index += 1 |
|
return f"{num:.2f}{bin_units[index]}B" |
|
|
|
def natrl_num_to_dec(num:int) -> str: |
|
"""자연수를 약어로 포맷합니다.""" |
|
if num is None: return None |
|
index = 0 |
|
while num >= 10000: |
|
num /= 1000 |
|
index += 1 |
|
return f"{round(num)}{dec_units[index]}" |
|
|
|
# Cell no.13 (id: tOHgmwjQ4C_h) |
|
"""markdown |
|
###### widgets |
|
""" |
|
|
|
# Cell no.14 (id: 8b-oO4NB4J4w) |
|
from bs4 import BeautifulSoup |
|
from enum import Enum |
|
import gc |
|
from natsort import natsorted |
|
import os |
|
import pandas as pd |
|
import re |
|
import urllib.request |
|
import yt_dlp |
|
|
|
from IPython.display import display, clear_output |
|
import ipywidgets as widgets |
|
|
|
# Cell no.15 (id: DJ2yKWlpPkg7) |
|
def create_html_table(df): |
|
"""DataFrame을 HTML 테이블로 변환하고 스타일을 적용합니다.""" |
|
table_style = """ |
|
<style> |
|
/* 위젯 레이아웃 */ |
|
.subwidget-dropdown { |
|
margin-left: 80px !important; |
|
margin-top: 10px !important; |
|
} |
|
|
|
.subwidget-dropdown > label { |
|
width: 180px !important; |
|
} |
|
|
|
.subwidget-dropdown > select { |
|
width: 70px !important; |
|
} |
|
|
|
.subwidget-button { |
|
margin-top: 20px !important; |
|
} |
|
|
|
table.options-table { |
|
margin-bottom: 30px !important; |
|
} |
|
|
|
/* 라이트 모드 스타일 */ |
|
@media (prefers-color-scheme: light) { |
|
.options-table { |
|
border-collapse: collapse; |
|
width: 100%; |
|
margin: 10px 0; |
|
background-color: #ffffff; |
|
} |
|
.options-table td { |
|
border: 1px solid #ddd; |
|
padding: 8px; |
|
text-align: left; |
|
} |
|
.options-table th { |
|
border: 1px solid #ddd; |
|
padding: 8px; |
|
text-align: left; |
|
background-color: #f2f2f2; |
|
color: #333; |
|
} |
|
table.options-table thead tr:first-child th { |
|
background-color: transparent !important; |
|
position: relative; |
|
color: #333; |
|
text-align: center; |
|
transform: rotate(30deg); |
|
padding: 2em 0; |
|
white-space: nowrap; |
|
} |
|
.options-table tr:nth-child(even) { |
|
background-color: #f9f9f9; |
|
} |
|
.options-table tr:hover { |
|
background-color: #f5f5f5; |
|
} |
|
} |
|
|
|
/* 다크 모드 스타일 */ |
|
@media (prefers-color-scheme: dark) { |
|
.options-table { |
|
border-collapse: collapse; |
|
width: 100%; |
|
margin: 10px 0; |
|
background-color: #2d2d2d; |
|
color: #e0e0e0; |
|
} |
|
.options-table td { |
|
border: 1px solid #404040; |
|
padding: 8px; |
|
text-align: left; |
|
} |
|
.options-table th { |
|
border: 1px solid #404040; |
|
padding: 8px; |
|
text-align: left; |
|
background-color: #383838; |
|
color: #ffffff; |
|
} |
|
table.options-table thead tr:first-child th { |
|
background-color: transparent !important; |
|
position: relative; |
|
color: #fff; |
|
text-align: center; |
|
transform: rotate(30deg); |
|
padding: 2em 0; |
|
white-space: nowrap; |
|
} |
|
.options-table tr:nth-child(even) { |
|
background-color: #333333; |
|
} |
|
.options-table tr:hover { |
|
background-color: #404040; |
|
} |
|
.options-table a { |
|
color: #66b3ff; |
|
} |
|
.options-table a:hover { |
|
color: #99ccff; |
|
} |
|
} |
|
</style> |
|
""" |
|
html_table = df.to_html(classes="options-table", escape=False) |
|
return table_style + html_table |
|
|
|
# Cell no.16 (id: eBJYsHe14Et5) |
|
YT_URL = YouTubeURLMemory() |
|
|
|
class DownloadMode(Enum): |
|
"""다운로드 모드를 정의하는 열거형.""" |
|
VIDEO_ONLY = 1 |
|
AUDIO_ONLY = 2 |
|
SEPARATE_VIDEO_N_AUDIO = 3 |
|
PRE_MERGED_VIDEO_N_AUDIO = 4 |
|
THUMBNAILS_N_STORYBOARDS = 5 |
|
|
|
# Cell no.17 (id: -Wnpobvm-Qkk) |
|
# IPython 위젯 |
|
output_area = widgets.Output() |
|
|
|
text_input = widgets.Text( |
|
description="URL:", |
|
placeholder="Enter YouTube URL..." |
|
) |
|
submit_button = widgets.Button( |
|
description="CHECK!", |
|
button_style="info" |
|
) |
|
|
|
dropdown = widgets.Dropdown( |
|
options=[ |
|
("video only", DownloadMode.VIDEO_ONLY), |
|
("audio only", DownloadMode.AUDIO_ONLY), |
|
("video+audio (advanced)", DownloadMode.SEPARATE_VIDEO_N_AUDIO), |
|
("video+audio (simple)", DownloadMode.PRE_MERGED_VIDEO_N_AUDIO), |
|
("image", DownloadMode.THUMBNAILS_N_STORYBOARDS), |
|
], |
|
description="download type:", |
|
value=None |
|
) |
|
|
|
class MediaChoiceWidget: |
|
"""미디어 선택 위젯의 기본 클래스.""" |
|
def __init__(self) -> None: |
|
super().__init__() |
|
self.download_button = widgets.Button( |
|
description="DOWNLOAD!", |
|
button_style="primary" |
|
) |
|
self.download_button.add_class("subwidget-button") |
|
self.options_view = widgets.VBox(children=[]) |
|
self.widget = widgets.VBox(children=[self.download_button, self.options_view]) |
|
|
|
class Mode1Widget(MediaChoiceWidget): |
|
"""비디오 전용 다운로드 모드를 위한 위젯.""" |
|
def __init__(self) -> None: |
|
super().__init__() |
|
self.dropdown_video = widgets.Dropdown( |
|
options=[], |
|
description="chosen video option:", |
|
value=None, |
|
layout=widgets.Layout(width="300px") |
|
) |
|
self.dropdown_video.add_class("subwidget-dropdown") |
|
|
|
self.options_view.children = [ |
|
widgets.HTML(value="<h4 style='margin-left: 20px; margin-top: 15px;'>Video Options:</h4>"), |
|
self.dropdown_video, |
|
widgets.HTML(value="") |
|
] |
|
|
|
def set_video(self, info_dicts:dict): |
|
"""비디오 옵션을 설정합니다.""" |
|
sorted_video_dicts = {v["format_id"]: v for v in natsorted( |
|
info_dicts.values(), |
|
key=lambda d: ( |
|
min(d["width"], d["height"]), |
|
d["fps"], |
|
d.get("filesize", d.get("filesize_approx", 0)) |
|
), |
|
reverse=True, |
|
)} |
|
|
|
self.dropdown_video.options = [ (k, k) for k in sorted_video_dicts.keys() ] |
|
self.dropdown_video.value = list(sorted_video_dicts.keys())[0] |
|
|
|
rows = list() |
|
for option in sorted_video_dicts.keys(): |
|
row = sorted_video_dicts[option] |
|
rows.append({ |
|
"ID": row["format_id"], |
|
"extension": row["ext"], |
|
"resolution": row["resolution"], |
|
"FPS": row["fps"], |
|
"video_codec": row["vcodec"], |
|
"protocol": row["protocol"], |
|
"filesize": bytes_num_to_bin(row["filesize"]) |
|
if row.get("filesize", 0) |
|
else ("~ " + bytes_num_to_bin(row["filesize_approx"]) |
|
if row.get("filesize_approx", 0) |
|
else "unknown"), |
|
"total_bitrate": natrl_num_to_dec(row["tbr"]), |
|
"variable_bitrate": natrl_num_to_dec(row["vbr"]), |
|
}) |
|
df = pd.DataFrame(rows) |
|
df.set_index("ID", inplace=True) |
|
self.options_view.children[2].value = create_html_table(df) |
|
|
|
def refresh_memory(self): |
|
"""비디오 옵션을 초기화합니다.""" |
|
self.dropdown_video.value = None |
|
self.dropdown_video.options = [] |
|
self.options_view.children[2].value = "" |
|
|
|
class Mode2Widget(MediaChoiceWidget): |
|
"""오디오 전용 다운로드 모드를 위한 위젯.""" |
|
def __init__(self) -> None: |
|
super().__init__() |
|
self.dropdown_audio = widgets.Dropdown( |
|
options=[], |
|
description="chosen audio option:", |
|
value=None, |
|
layout=widgets.Layout(width="300px") |
|
) |
|
self.dropdown_audio.add_class("subwidget-dropdown") |
|
|
|
self.options_view.children = [ |
|
widgets.HTML(value="<h4 style='margin-left: 20px; margin-top: 15px;'>Audio Options:</h4>"), |
|
self.dropdown_audio, |
|
widgets.HTML(value="") |
|
] |
|
|
|
def set_audio(self, info_dicts:dict): |
|
"""오디오 옵션을 설정합니다.""" |
|
sorted_audio_dicts = {v["format_id"]: v for v in natsorted( |
|
info_dicts.values(), |
|
key=lambda d: d.get("filesize", 0), |
|
reverse=True, |
|
)} |
|
|
|
self.dropdown_audio.options = [ (k, k) for k in sorted_audio_dicts.keys() ] |
|
self.dropdown_audio.value = list(sorted_audio_dicts.keys())[0] |
|
|
|
rows = list() |
|
for option in sorted_audio_dicts.keys(): |
|
row = sorted_audio_dicts[option] |
|
rows.append({ |
|
"ID": row["format_id"], |
|
"extension": row["ext"], |
|
"channels": row.get("audio_channels", None), |
|
"audio_codec": row.get("acodec", "unknown"), |
|
"protocol": row["protocol"], |
|
"filesize": bytes_num_to_bin(row["filesize"]) |
|
if row.get("filesize", 0) |
|
else None, |
|
"average_bitrate": natrl_num_to_dec(row["abr"]), |
|
"audio_sample_rate": natrl_num_to_dec(row.get("asr")), |
|
}) |
|
df = pd.DataFrame(rows) |
|
df.set_index("ID", inplace=True) |
|
self.options_view.children[2].value = create_html_table(df) |
|
|
|
def refresh_memory(self): |
|
"""오디오 옵션을 초기화합니다.""" |
|
self.dropdown_audio.value = None |
|
self.dropdown_audio.options = [] |
|
self.options_view.children[2].value = "" |
|
|
|
class Mode3Widget(MediaChoiceWidget): |
|
"""비디오 및 오디오를 별도로 다운로드하는 모드를 위한 위젯.""" |
|
def __init__(self) -> None: |
|
super().__init__() |
|
self.dropdown_video = widgets.Dropdown( |
|
options=[], |
|
description="chosen video option:", |
|
value=None, |
|
layout=widgets.Layout(width="300px") |
|
) |
|
self.dropdown_video.add_class("subwidget-dropdown") |
|
|
|
self.dropdown_audio = widgets.Dropdown( |
|
options=[], |
|
description="chosen audio option:", |
|
value=None, |
|
layout=widgets.Layout(width="300px") |
|
) |
|
self.dropdown_audio.add_class("subwidget-dropdown") |
|
|
|
self.options_view.children = [ |
|
widgets.HTML(value="<h4 style='margin-left: 20px; margin-top: 15px;'>Video Options:</h4>"), |
|
self.dropdown_video, |
|
widgets.HTML(value=""), # 비디오 테이블 |
|
widgets.HTML(value="<h4 style='margin-left: 20px; margin-top: 15px;'>Audio Options:</h4>"), |
|
self.dropdown_audio, |
|
widgets.HTML(value=""), # 오디오 테이블 |
|
] |
|
|
|
def set_video(self, info_dicts:dict): |
|
"""비디오 옵션을 설정합니다.""" |
|
sorted_video_dicts = {v["format_id"]: v for v in natsorted( |
|
info_dicts.values(), |
|
key=lambda d: ( |
|
min(d["width"], d["height"]), |
|
d["fps"], |
|
d.get("filesize", d.get("filesize_approx", 0)) |
|
), |
|
reverse=True, |
|
)} |
|
|
|
self.dropdown_video.options = [ (k, k) for k in sorted_video_dicts.keys() ] |
|
self.dropdown_video.value = list(sorted_video_dicts.keys())[0] |
|
|
|
rows = list() |
|
for option in sorted_video_dicts.keys(): |
|
row = sorted_video_dicts[option] |
|
rows.append({ |
|
"ID": row["format_id"], |
|
"extension": row["ext"], |
|
"resolution": row["resolution"], |
|
"FPS": row["fps"], |
|
"video_codec": row["vcodec"], |
|
"protocol": row["protocol"], |
|
"filesize": bytes_num_to_bin(row["filesize"]) |
|
if row.get("filesize", 0) |
|
else ("~ " + bytes_num_to_bin(row["filesize_approx"]) |
|
if row.get("filesize_approx", 0) |
|
else "unknown"), |
|
"total_bitrate": natrl_num_to_dec(row["tbr"]), |
|
"variable_bitrate": natrl_num_to_dec(row["vbr"]), |
|
}) |
|
df = pd.DataFrame(rows) |
|
df.set_index("ID", inplace=True) |
|
self.options_view.children[2].value = create_html_table(df) |
|
|
|
def set_audio(self, info_dicts:dict): |
|
"""오디오 옵션을 설정합니다.""" |
|
sorted_audio_dicts = {v["format_id"]: v for v in natsorted( |
|
info_dicts.values(), |
|
key=lambda d: d.get("filesize", 0), |
|
reverse=True, |
|
)} |
|
|
|
self.dropdown_audio.options = [ (k, k) for k in sorted_audio_dicts.keys() ] |
|
self.dropdown_audio.value = list(sorted_audio_dicts.keys())[0] |
|
|
|
rows = list() |
|
for option in sorted_audio_dicts.keys(): |
|
row = sorted_audio_dicts[option] |
|
rows.append({ |
|
"ID": row["format_id"], |
|
"extension": row["ext"], |
|
"channels": row.get("audio_channels", None), |
|
"audio_codec": row.get("acodec", "unknown"), |
|
"protocol": row["protocol"], |
|
"filesize": bytes_num_to_bin(row["filesize"]) |
|
if row.get("filesize", 0) |
|
else None, |
|
"average_bitrate": natrl_num_to_dec(row["abr"]), |
|
"audio_sample_rate": natrl_num_to_dec(row.get("asr")), |
|
}) |
|
df = pd.DataFrame(rows) |
|
df.set_index("ID", inplace=True) |
|
self.options_view.children[5].value = create_html_table(df) |
|
|
|
def refresh_memory(self): |
|
"""비디오 및 오디오 옵션을 초기화합니다.""" |
|
self.dropdown_video.value = None |
|
self.dropdown_video.options = [] |
|
self.options_view.children[2].value = "" |
|
self.dropdown_audio.value = None |
|
self.dropdown_audio.options = [] |
|
self.options_view.children[5].value = "" |
|
|
|
class Mode4Widget(MediaChoiceWidget): |
|
"""비디오 및 오디오가 병합된 파일을 다운로드하는 모드를 위한 위젯.""" |
|
def __init__(self) -> None: |
|
super().__init__() |
|
self.dropdown_media = widgets.Dropdown( |
|
options=[], |
|
description="chosen video+audio option:", |
|
value=None, |
|
layout=widgets.Layout(width="300px") |
|
) |
|
self.dropdown_media.add_class("subwidget-dropdown") |
|
|
|
self.options_view.children = [ |
|
widgets.HTML(value="<h4 style='margin-left: 20px; margin-top: 15px;'>Merged Options:</h4>"), |
|
self.dropdown_media, |
|
widgets.HTML(value="") |
|
] |
|
|
|
def set_media(self, info_dicts:dict): |
|
"""혼합 미디어 옵션을 설정합니다.""" |
|
sorted_media_dicts = {v["format_id"]: v for v in natsorted( |
|
info_dicts.values(), |
|
key=lambda d: (min(d["width"],d["height"]), d["fps"], d["filesize_approx"]), |
|
reverse=True, |
|
)} |
|
|
|
self.dropdown_media.options = [ (k, k) for k in sorted_media_dicts.keys() ] |
|
self.dropdown_media.value = list(sorted_media_dicts.keys())[0] |
|
|
|
rows = list() |
|
for option in sorted_media_dicts.keys(): |
|
row = sorted_media_dicts[option] |
|
rows.append({ |
|
"ID": row["format_id"], |
|
"extension": row["ext"], |
|
"resolution": row["resolution"], |
|
"FPS": row["fps"], |
|
"video_codec": row["vcodec"], |
|
"audio_codec": row.get("acodec", "unknown"), |
|
"protocol": row["protocol"], |
|
"filesize": bytes_num_to_bin(row["filesize"]) |
|
if row.get("filesize", 0) |
|
else ("~ " + bytes_num_to_bin(row["filesize_approx"]) |
|
if row.get("filesize_approx", 0) |
|
else "unknown"), |
|
"audio_channels": row.get("audio_channels", None), |
|
"total_bitrate": natrl_num_to_dec(row["tbr"]), |
|
"variable_bitrate": natrl_num_to_dec(row["vbr"]), |
|
"audio_sample_rate": natrl_num_to_dec(row["asr"]), |
|
}) |
|
df = pd.DataFrame(rows) |
|
df.set_index("ID", inplace=True) |
|
self.options_view.children[2].value = create_html_table(df) |
|
|
|
def refresh_memory(self): |
|
"""혼합 미디어 옵션을 초기화합니다.""" |
|
self.dropdown_media.value = None |
|
self.dropdown_media.options = [] |
|
self.options_view.children[2].value = "" |
|
|
|
class Mode5Widget(MediaChoiceWidget): |
|
"""썸네일 및 스토리보드를 다운로드하는 모드를 위한 위젯.""" |
|
def __init__(self) -> None: |
|
super().__init__() |
|
self.thumbnail_resolution_info = \ |
|
("default", "mqdefault", "sddefault", "maxresdefault") |
|
self.dropdown_image = widgets.Dropdown( |
|
options=[], |
|
description="chosen image option:", |
|
value=None, |
|
layout=widgets.Layout(width="300px") |
|
) |
|
self.dropdown_image.add_class("subwidget-dropdown") |
|
|
|
self.options_view.children = [ |
|
widgets.HTML(value="<h4 style='margin-left: 20px; margin-top: 15px;'>Image Options:</h4>"), |
|
self.dropdown_image, |
|
widgets.HTML(value="") |
|
] |
|
|
|
def custom_order(self, value:str): |
|
"""사용자 정의 정렬 순서를 반환합니다.""" |
|
if value.isdigit(): |
|
return (0, int(value)) |
|
elif value in self.thumbnail_resolution_info: |
|
return (1, self.thumbnail_resolution_info.index(value)) |
|
else: |
|
return (2, value.lower()) |
|
|
|
def set_image(self, info_dicts:dict): |
|
"""이미지 옵션을 설정합니다.""" |
|
sorted_image_dicts = {v["format_id"]: v for v in sorted( |
|
info_dicts.values(), |
|
key=lambda d: self.custom_order(d["format_id"]), |
|
)} |
|
|
|
self.dropdown_image.options = [ (k, k) for k in sorted_image_dicts.keys() ] |
|
self.dropdown_image.value = list(sorted_image_dicts.keys())[0] |
|
|
|
rows = list() |
|
for option in sorted_image_dicts.keys(): |
|
row = sorted_image_dicts[option] |
|
rows.append({ |
|
"ID": row["format_id"], |
|
"extension": row["ext"], |
|
"resolution": row["resolution"], |
|
"url": f'<a href="{row["url"]}" target="_blank">Preview</a>' |
|
}) |
|
df = pd.DataFrame(rows) |
|
df.set_index("ID", inplace=True) |
|
self.options_view.children[2].value = create_html_table(df) |
|
|
|
def refresh_memory(self): |
|
"""이미지 옵션을 초기화합니다.""" |
|
self.dropdown_image.value = None |
|
self.dropdown_image.options = [] |
|
self.options_view.children[2].value = "" |
|
|
|
m1_widget = Mode1Widget() |
|
m2_widget = Mode2Widget() |
|
m3_widget = Mode3Widget() |
|
m4_widget = Mode4Widget() |
|
m5_widget = Mode5Widget() |
|
|
|
# Cell no.18 (id: LIB4jycb-QcV) |
|
# IPython 위젯 함수 |
|
|
|
def on_submit_button_click(b): |
|
"""제출 버튼 클릭 시 호출되는 함수.""" |
|
with output_area: |
|
clear_output() |
|
|
|
m1_widget.refresh_memory() |
|
m2_widget.refresh_memory() |
|
m3_widget.refresh_memory() |
|
m4_widget.refresh_memory() |
|
m5_widget.refresh_memory() |
|
|
|
print(f"Submitted {text_input.value}\n") |
|
|
|
try: |
|
# YT_URL.url = text_input.value |
|
YT_URL.set_url_with_cookie(text_input.value, cookie_fpath="./cookies.txt") |
|
if YT_URL.is_yt_url: |
|
dropdown.value = None |
|
print(f"\nFetched metadata of {YT_URL.url}\n") |
|
display(dropdown) |
|
else: |
|
print("Not a valid YouTube URL") |
|
except Exception as e: |
|
print(f"Error setting URL: {str(e)}") |
|
|
|
def on_dropdown_change(change): |
|
"""드롭다운 변경 시 호출되는 함수.""" |
|
with output_area: |
|
clear_output() |
|
display(dropdown) |
|
|
|
selected_value = change["new"] |
|
|
|
if selected_value == DownloadMode.VIDEO_ONLY: |
|
if YT_URL.is_yt_url and not m1_widget.dropdown_video.value: |
|
if YT_URL.video_options: |
|
m1_widget.set_video(YT_URL.video_options) |
|
display(m1_widget.widget) |
|
|
|
elif selected_value == DownloadMode.AUDIO_ONLY: |
|
if YT_URL.is_yt_url and not m2_widget.dropdown_audio.value: |
|
if YT_URL.audio_options: |
|
m2_widget.set_audio(YT_URL.audio_options) |
|
display(m2_widget.widget) |
|
|
|
elif selected_value == DownloadMode.SEPARATE_VIDEO_N_AUDIO: |
|
if YT_URL.is_yt_url and not m3_widget.dropdown_video.value: |
|
if YT_URL.video_options: |
|
m3_widget.set_video(YT_URL.video_options) |
|
if YT_URL.audio_options: |
|
m3_widget.set_audio(YT_URL.audio_options) |
|
display(m3_widget.widget) |
|
|
|
elif selected_value == DownloadMode.PRE_MERGED_VIDEO_N_AUDIO: |
|
if YT_URL.is_yt_url and not m4_widget.dropdown_media.value: |
|
if YT_URL.combined_options: |
|
m4_widget.set_media(YT_URL.combined_options) |
|
display(m4_widget.widget) |
|
|
|
elif selected_value == DownloadMode.THUMBNAILS_N_STORYBOARDS: |
|
if YT_URL.is_yt_url and not m5_widget.dropdown_image.value: |
|
includes_thumbnails = False |
|
for k in YT_URL.image_options.keys(): |
|
if k[0] == 't': |
|
includes_thumbnails = True |
|
break |
|
if not includes_thumbnails: |
|
new_rows = dict() |
|
for k, v in YT_URL.image_options.items(): |
|
for v1 in v["fragments"]: |
|
new_rows[k] = { |
|
"format_id": v["format_id"], |
|
"ext": "jpg", |
|
"resolution": v["resolution"]+"xN", |
|
"url": v1["url"], |
|
} |
|
thumbnail_urls = YT_URL.scrap_thumbnail_urls(YT_URL.url) |
|
i = 1 |
|
for v in thumbnail_urls: |
|
res, ext = v.split('/')[-1].split('.') |
|
k = f"thumb{i}" |
|
new_rows[k] = { |
|
"format_id": k, |
|
"ext": ext, |
|
"resolution": res, |
|
"url": v, |
|
} |
|
i += 1 |
|
YT_URL.image_options = new_rows |
|
|
|
if YT_URL.image_options: |
|
m5_widget.set_image(YT_URL.image_options) |
|
display(m5_widget.widget) |
|
|
|
def on_m1_button_click(b): |
|
"""Mode1 위젯의 다운로드 버튼 클릭 시 호출되는 함수.""" |
|
video_format = m1_widget.dropdown_video.value |
|
|
|
ydl_opts = { |
|
"format": video_format, |
|
"outtmpl": "downloads/%(title)s.%(ext)s", |
|
} |
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info_dicts = ydl.extract_info(YT_URL.url, download=True) |
|
|
|
title = info_dicts.get("title", "output_video") |
|
for info_dict in info_dicts["formats"]: |
|
if info_dict["format_id"] == video_format: |
|
video_ext = info_dict["ext"] |
|
temp_video_path = f"downloads/{title}.{video_ext}" |
|
break |
|
|
|
# 다운로드된 파일 이름 변경 |
|
vid = info_dicts.get("id") |
|
new_video_path = f"downloads/{vid}_f{video_format}.{video_ext}" |
|
if os.path.exists(temp_video_path): |
|
os.rename(temp_video_path, new_video_path) |
|
print(f"\nSuccessfully downloaded video to {new_video_path}\n") |
|
else: |
|
print(f"ERROR: Expected downloaded file not found at: {temp_video_path}") |
|
|
|
def on_m2_button_click(b): |
|
"""Mode2 위젯의 다운로드 버튼 클릭 시 호출되는 함수.""" |
|
audio_format = m2_widget.dropdown_audio.value |
|
|
|
ydl_opts = { |
|
"format": audio_format, |
|
"outtmpl": "downloads/%(title)s.%(ext)s", |
|
} |
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info_dicts = ydl.extract_info(YT_URL.url, download=True) |
|
|
|
title = info_dicts.get("title", "output_audio") |
|
for info_dict in info_dicts["formats"]: |
|
if info_dict["format_id"] == audio_format: |
|
audio_ext = info_dict["ext"] |
|
temp_audio_path = f"downloads/{title}.{audio_ext}" |
|
break |
|
|
|
# 다운로드된 파일 이름 변경 |
|
vid = info_dicts.get("id") |
|
new_audio_path = f"downloads/{vid}_f{audio_format}.{audio_ext}" |
|
if os.path.exists(temp_audio_path): |
|
os.rename(temp_audio_path, new_audio_path) |
|
print(f"\nSuccessfully downloaded audio to {new_audio_path}\n") |
|
else: |
|
print(f"ERROR: Expected downloaded file not found at: {temp_audio_path}") |
|
|
|
def on_m3_button_click(b): |
|
"""Mode3 위젯의 다운로드 버튼 클릭 시 호출되는 함수.""" |
|
video_format = m3_widget.dropdown_video.value |
|
audio_format = m3_widget.dropdown_audio.value |
|
merge_output_format = "mp4" |
|
|
|
ydl_opts = { |
|
"format": f"{video_format}+{audio_format}", |
|
"outtmpl": "downloads/%(title)s.%(ext)s", |
|
"keepvideo": True, # 병합 전 삭제 방지 |
|
"keepaudio": True, # 병합 전 삭제 방지 |
|
"postprocessors": [{"key": "FFmpegMerger"}], |
|
"merge_output_format": merge_output_format, |
|
} |
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info_dicts = ydl.extract_info(YT_URL.url, download=True) |
|
|
|
title = info_dicts.get("title", "output_video") |
|
final_ext = merge_output_format |
|
final_path = f"downloads/{title}.{final_ext}" |
|
|
|
# 최종 병합 파일 이름 변경 |
|
vid = info_dicts.get("id") |
|
new_final_path = f"downloads/{vid}_f{video_format}+{audio_format}.{final_ext}" |
|
if os.path.exists(final_path): |
|
os.rename(final_path, new_final_path) |
|
print(f"\nSuccessfully downloaded video to {new_final_path}\n") |
|
else: |
|
print(f"ERROR: Expected merged file not found at: {final_path}") |
|
|
|
# 임시 파일 정리 |
|
for info_dict in info_dicts["formats"]: |
|
if info_dict["format_id"] == video_format: |
|
video_ext = info_dict["ext"] |
|
temp_video_path = f"downloads/{title}.f{video_format}.{video_ext}" |
|
if os.path.exists(temp_video_path): os.remove(temp_video_path) |
|
|
|
elif info_dict["format_id"] == audio_format: |
|
audio_ext = info_dict["ext"] |
|
temp_audio_path = f"downloads/{title}.f{audio_format}.{audio_ext}" |
|
if os.path.exists(temp_audio_path): os.remove(temp_audio_path) |
|
|
|
def on_m4_button_click(b): |
|
"""Mode4 위젯의 다운로드 버튼 클릭 시 호출되는 함수.""" |
|
media_format = m4_widget.dropdown_media.value |
|
|
|
ydl_opts = { |
|
"format": media_format, |
|
"outtmpl": "downloads/%(title)s.%(ext)s", |
|
} |
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info_dicts = ydl.extract_info(YT_URL.url, download=True) |
|
|
|
title = info_dicts.get("title", "output_video") |
|
for info_dict in info_dicts["formats"]: |
|
if info_dict["format_id"] == media_format: |
|
media_ext = info_dict["ext"] |
|
temp_media_path = f"downloads/{title}.{media_ext}" |
|
break |
|
|
|
# 다운로드된 파일 이름 변경 |
|
vid = info_dicts.get("id") |
|
new_media_path = f"downloads/{vid}_f{media_format}.{media_ext}" |
|
if os.path.exists(temp_media_path): |
|
os.rename(temp_media_path, new_media_path) |
|
print(f"\nSuccessfully downloaded video to {new_media_path}\n") |
|
else: |
|
print(f"ERROR: Expected downloaded file not found at: {temp_media_path}") |
|
|
|
def __get_row_data_from_html_table(html_table: str, row_id: str) -> dict: |
|
"""HTML 테이블에서 행 ID와 일치하는 데이터를 추출합니다.""" |
|
soup = BeautifulSoup(html_table, "html.parser") |
|
|
|
# ID와 일치하는 행 찾기 |
|
for row in soup.find("tbody").find_all("tr"): |
|
cell_header = row.find("th") |
|
cells = row.find_all("td") |
|
if cell_header.text.strip() == row_id: # 첫 번째 열은 ID |
|
data_list = list() |
|
for cell in cells: |
|
if cell.find('a', href=True): |
|
data_list.append(cell.a["href"].strip()) |
|
else: |
|
data_list.append(cell.text.strip()) |
|
return data_list |
|
raise ValueError(f"Row with ID '{row_id}' not found in HTML table") |
|
|
|
def on_m5_button_click(b): |
|
"""Mode5 위젯의 다운로드 버튼 클릭 시 호출되는 함수.""" |
|
selected_id = m5_widget.dropdown_image.value |
|
if not selected_id: |
|
print("ERROR: No image option selected!") |
|
return |
|
|
|
row_data = __get_row_data_from_html_table( |
|
m5_widget.options_view.children[2].value, |
|
selected_id |
|
) |
|
|
|
if not row_data: |
|
print(f"ERROR: Could not find data for image option {selected_id}") |
|
return |
|
|
|
ext = row_data[0] |
|
resolution = row_data[1] |
|
url = row_data[2] |
|
|
|
try: |
|
with urllib.request.urlopen(url) as response: |
|
image_data = response.read() |
|
|
|
# 해상도를 파일 이름으로 사용 |
|
output_path = f"downloads/{resolution}.{ext}" |
|
|
|
with open(output_path, "wb") as f: |
|
f.write(image_data) |
|
|
|
print(f"\nSuccessfully downloaded image to {output_path}\n") |
|
except Exception as e: |
|
print(f"ERROR: {str(e)}") |
|
|
|
# Cell no.19 (id: 83SQgkVA-QUO) |
|
# 버튼 콜백 연결 |
|
submit_button.on_click(on_submit_button_click) |
|
dropdown.observe(on_dropdown_change, names="value") |
|
m1_widget.download_button.on_click(on_m1_button_click) |
|
m2_widget.download_button.on_click(on_m2_button_click) |
|
m3_widget.download_button.on_click(on_m3_button_click) |
|
m4_widget.download_button.on_click(on_m4_button_click) |
|
m5_widget.download_button.on_click(on_m5_button_click) |
|
|
|
# Cell no.20 (id: 81V0B16x45bG) |
|
os.makedirs("downloads", exist_ok=True) |
|
|
|
# Cell no.21 (id: cjY8tnAP8LS6) |
|
"""markdown |
|
##### run |
|
""" |
|
|
|
# Cell no.21 (id: NeHl-Rs_8Qdi) |
|
display(widgets.HBox([text_input, submit_button]), output_area) |