aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/util.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube/util.py')
-rw-r--r--youtube/util.py171
1 files changed, 108 insertions, 63 deletions
diff --git a/youtube/util.py b/youtube/util.py
index b9225d2..c59fae8 100644
--- a/youtube/util.py
+++ b/youtube/util.py
@@ -318,10 +318,11 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None,
cleanup_func(response) # release_connection for urllib3
content = decode_content(
content,
- response.getheader('Content-Encoding', default='identity'))
+ response.headers.get('Content-Encoding', default='identity'))
if (settings.debugging_save_responses
- and debug_name is not None and content):
+ and debug_name is not None
+ and content):
save_dir = os.path.join(settings.data_dir, 'debug')
if not os.path.exists(save_dir):
os.makedirs(save_dir)
@@ -394,23 +395,22 @@ def head(url, use_tor=False, report_text=None, max_redirects=10):
round(time.monotonic() - start_time, 3))
return response
-
-mobile_user_agent = 'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.6312.80 Mobile Safari/537.36'
+mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36'
mobile_ua = (('User-Agent', mobile_user_agent),)
-desktop_user_agent = 'Mozilla/5.0 (Windows NT 10.0; rv:124.0) Gecko/20100101 Firefox/124.0'
+desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0'
desktop_ua = (('User-Agent', desktop_user_agent),)
json_header = (('Content-Type', 'application/json'),)
desktop_xhr_headers = (
('Accept', '*/*'),
('Accept-Language', 'en-US,en;q=0.5'),
('X-YouTube-Client-Name', '1'),
- ('X-YouTube-Client-Version', '2.20240327.00.00'),
+ ('X-YouTube-Client-Version', '2.20240304.00.00'),
) + desktop_ua
mobile_xhr_headers = (
('Accept', '*/*'),
('Accept-Language', 'en-US,en;q=0.5'),
- ('X-YouTube-Client-Name', '1'),
- ('X-YouTube-Client-Version', '2.20240328.08.00'),
+ ('X-YouTube-Client-Name', '2'),
+ ('X-YouTube-Client-Version', '2.20240304.08.00'),
) + mobile_ua
@@ -431,29 +431,34 @@ class RateLimitedQueue(gevent.queue.Queue):
gevent.queue.Queue.__init__(self)
def get(self):
- with self.lock: # blocks if another greenlet currently has the lock
- if ((self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial) or
- (self.count_since_last_wait >= self.initial_burst and not self.surpassed_initial)):
- self.surpassed_initial = True
- gevent.sleep(self.waiting_period)
- self.count_since_last_wait = 0
+ self.lock.acquire() # blocks if another greenlet currently has the lock
+ if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial:
+ gevent.sleep(self.waiting_period)
+ self.count_since_last_wait = 0
- self.count_since_last_wait += 1
+ elif self.count_since_last_wait >= self.initial_burst and not self.surpassed_initial:
+ self.surpassed_initial = True
+ gevent.sleep(self.waiting_period)
+ self.count_since_last_wait = 0
- if not self.currently_empty and self.empty():
- self.currently_empty = True
- self.empty_start = time.monotonic()
+ self.count_since_last_wait += 1
- item = gevent.queue.Queue.get(self) # blocks when nothing left
+ if not self.currently_empty and self.empty():
+ self.currently_empty = True
+ self.empty_start = time.monotonic()
- if self.currently_empty:
- if time.monotonic() - self.empty_start >= self.waiting_period:
- self.count_since_last_wait = 0
- self.surpassed_initial = False
+ item = gevent.queue.Queue.get(self) # blocks when nothing left
- self.currently_empty = False
+ if self.currently_empty:
+ if time.monotonic() - self.empty_start >= self.waiting_period:
+ self.count_since_last_wait = 0
+ self.surpassed_initial = False
- return item
+ self.currently_empty = False
+
+ self.lock.release()
+
+ return item
def download_thumbnail(save_directory, video_id):
@@ -662,19 +667,19 @@ def to_valid_filename(name):
# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/extractor/youtube.py#L72
INNERTUBE_CLIENTS = {
- 'android-test-suite': {
+ 'android': {
'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
'INNERTUBE_CONTEXT': {
'client': {
'hl': 'en',
'gl': 'US',
- 'clientName': 'ANDROID_TESTSUITE',
- 'clientVersion': '1.9',
+ 'clientName': 'ANDROID',
+ 'clientVersion': '19.09.36',
'osName': 'Android',
'osVersion': '12',
'androidSdkVersion': 31,
'platform': 'MOBILE',
- 'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 12; US) gzip'
+ 'userAgent': 'com.google.android.youtube/19.09.36 (Linux; U; Android 12; US) gzip'
},
# https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287
#'thirdParty': {
@@ -685,57 +690,42 @@ INNERTUBE_CLIENTS = {
'REQUIRE_JS_PLAYER': False,
},
- 'ios': {
- 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
- 'INNERTUBE_CONTEXT': {
- 'client': {
- 'hl': 'en',
- 'gl': 'US',
- 'clientName': 'IOS',
- 'clientVersion': '19.12.3',
- 'deviceModel': 'iPhone14,3',
- 'userAgent': 'com.google.ios.youtube/19.12.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
- }
- },
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
- 'REQUIRE_JS_PLAYER': False
- },
-
- 'android': {
+ 'android-test-suite': {
'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
'INNERTUBE_CONTEXT': {
'client': {
'hl': 'en',
'gl': 'US',
- 'clientName': 'ANDROID',
- 'clientVersion': '19.15.35',
+ 'clientName': 'ANDROID_TESTSUITE',
+ 'clientVersion': '1.9',
'osName': 'Android',
- 'osVersion': '14',
- 'androidSdkVersion': 34,
+ 'osVersion': '12',
+ 'androidSdkVersion': 31,
'platform': 'MOBILE',
- 'userAgent': 'com.google.android.youtube/19.15.35 (Linux; U; Android 14; en_US; Google Pixel 6 Pro) gzip'
- }
+ 'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 12; US) gzip'
+ },
+ # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287
+ #'thirdParty': {
+ # 'embedUrl': 'https://google.com', # Can be any valid URL
+ #}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
'REQUIRE_JS_PLAYER': False,
},
- 'android_music': {
- 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
+ 'ios': {
+ 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc',
'INNERTUBE_CONTEXT': {
'client': {
'hl': 'en',
'gl': 'US',
- 'clientName': 'ANDROID_MUSIC',
- 'clientVersion': '6.48.51',
- 'osName': 'Android',
- 'osVersion': '14',
- 'androidSdkVersion': 34,
- 'platform': 'MOBILE',
- 'userAgent': 'com.google.android.apps.youtube.music/6.48.51 (Linux; U; Android 14; US) gzip'
+ 'clientName': 'IOS',
+ 'clientVersion': '19.09.3',
+ 'deviceModel': 'iPhone14,3',
+ 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
}
},
- 'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
'REQUIRE_JS_PLAYER': False
},
@@ -766,14 +756,62 @@ INNERTUBE_CLIENTS = {
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'WEB',
- 'clientVersion': '2.20240327.00.00',
+ 'clientVersion': '2.20220801.00.00',
'userAgent': desktop_user_agent,
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 1
},
+ 'android_vr': {
+ 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID_VR',
+ 'clientVersion': '1.60.19',
+ 'deviceMake': 'Oculus',
+ 'deviceModel': 'Quest 3',
+ 'androidSdkVersion': 32,
+ 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.60.19 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip',
+ 'osName': 'Android',
+ 'osVersion': '12L',
+ },
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 28,
+ 'REQUIRE_JS_PLAYER': False,
+ },
}
+def get_visitor_data():
+ visitor_data = None
+ visitor_data_cache = os.path.join(settings.data_dir, 'visitorData.txt')
+ if not os.path.exists(settings.data_dir):
+ os.makedirs(settings.data_dir)
+ if os.path.isfile(visitor_data_cache):
+ with open(visitor_data_cache, 'r') as file:
+ print('Getting visitor_data from cache')
+ visitor_data = file.read()
+ max_age = 12*3600
+ file_age = time.time() - os.path.getmtime(visitor_data_cache)
+ if file_age > max_age:
+ print('visitor_data cache is too old. Removing file...')
+ os.remove(visitor_data_cache)
+ return visitor_data
+
+ print('Fetching youtube homepage to get visitor_data')
+ yt_homepage = 'https://www.youtube.com'
+ yt_resp = fetch_url(yt_homepage, headers={'User-Agent': mobile_user_agent}, report_text='Getting youtube homepage')
+ visitor_data_re = r'''"visitorData":\s*?"(.+?)"'''
+ visitor_data_match = re.search(visitor_data_re, yt_resp.decode())
+ if visitor_data_match:
+ visitor_data = visitor_data_match.group(1)
+ print(f'Got visitor_data: {len(visitor_data)}')
+ with open(visitor_data_cache, 'w') as file:
+ print('Saving visitor_data cache...')
+ file.write(visitor_data)
+ return visitor_data
+ else:
+ print('Unable to get visitor_data value')
+ return visitor_data
def call_youtube_api(client, api, data):
client_params = INNERTUBE_CLIENTS[client]
@@ -781,12 +819,17 @@ def call_youtube_api(client, api, data):
key = client_params['INNERTUBE_API_KEY']
host = client_params.get('INNERTUBE_HOST') or 'www.youtube.com'
user_agent = context['client'].get('userAgent') or mobile_user_agent
+ visitor_data = get_visitor_data()
url = 'https://' + host + '/youtubei/v1/' + api + '?key=' + key
+ if visitor_data:
+ context['client'].update({'visitorData': visitor_data})
data['context'] = context
data = json.dumps(data)
headers = (('Content-Type', 'application/json'),('User-Agent', user_agent))
+ if visitor_data:
+ headers = ( *headers, ('X-Goog-Visitor-Id', visitor_data ))
response = fetch_url(
url, data=data, headers=headers,
debug_name='youtubei_' + api + '_' + client,
@@ -797,6 +840,8 @@ def call_youtube_api(client, api, data):
def strip_non_ascii(string):
''' Returns the string without non ASCII characters'''
+ if string is None:
+ return ""
stripped = (c for c in string if 0 < ord(c) < 127)
return ''.join(stripped)