diff options
Diffstat (limited to 'youtube/util.py')
-rw-r--r-- | youtube/util.py | 171 |
1 files changed, 108 insertions, 63 deletions
diff --git a/youtube/util.py b/youtube/util.py index b9225d2..c59fae8 100644 --- a/youtube/util.py +++ b/youtube/util.py @@ -318,10 +318,11 @@ def fetch_url(url, headers=(), timeout=15, report_text=None, data=None, cleanup_func(response) # release_connection for urllib3 content = decode_content( content, - response.getheader('Content-Encoding', default='identity')) + response.headers.get('Content-Encoding', default='identity')) if (settings.debugging_save_responses - and debug_name is not None and content): + and debug_name is not None + and content): save_dir = os.path.join(settings.data_dir, 'debug') if not os.path.exists(save_dir): os.makedirs(save_dir) @@ -394,23 +395,22 @@ def head(url, use_tor=False, report_text=None, max_redirects=10): round(time.monotonic() - start_time, 3)) return response - -mobile_user_agent = 'Mozilla/5.0 (Linux; Android 14) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.6312.80 Mobile Safari/537.36' +mobile_user_agent = 'Mozilla/5.0 (Linux; Android 7.0; Redmi Note 4 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36' mobile_ua = (('User-Agent', mobile_user_agent),) -desktop_user_agent = 'Mozilla/5.0 (Windows NT 10.0; rv:124.0) Gecko/20100101 Firefox/124.0' +desktop_user_agent = 'Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0' desktop_ua = (('User-Agent', desktop_user_agent),) json_header = (('Content-Type', 'application/json'),) desktop_xhr_headers = ( ('Accept', '*/*'), ('Accept-Language', 'en-US,en;q=0.5'), ('X-YouTube-Client-Name', '1'), - ('X-YouTube-Client-Version', '2.20240327.00.00'), + ('X-YouTube-Client-Version', '2.20240304.00.00'), ) + desktop_ua mobile_xhr_headers = ( ('Accept', '*/*'), ('Accept-Language', 'en-US,en;q=0.5'), - ('X-YouTube-Client-Name', '1'), - ('X-YouTube-Client-Version', '2.20240328.08.00'), + ('X-YouTube-Client-Name', '2'), + ('X-YouTube-Client-Version', '2.20240304.08.00'), ) + mobile_ua @@ -431,29 +431,34 @@ class RateLimitedQueue(gevent.queue.Queue): gevent.queue.Queue.__init__(self) def get(self): - with self.lock: # blocks if another greenlet currently has the lock - if ((self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial) or - (self.count_since_last_wait >= self.initial_burst and not self.surpassed_initial)): - self.surpassed_initial = True - gevent.sleep(self.waiting_period) - self.count_since_last_wait = 0 + self.lock.acquire() # blocks if another greenlet currently has the lock + if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial: + gevent.sleep(self.waiting_period) + self.count_since_last_wait = 0 - self.count_since_last_wait += 1 + elif self.count_since_last_wait >= self.initial_burst and not self.surpassed_initial: + self.surpassed_initial = True + gevent.sleep(self.waiting_period) + self.count_since_last_wait = 0 - if not self.currently_empty and self.empty(): - self.currently_empty = True - self.empty_start = time.monotonic() + self.count_since_last_wait += 1 - item = gevent.queue.Queue.get(self) # blocks when nothing left + if not self.currently_empty and self.empty(): + self.currently_empty = True + self.empty_start = time.monotonic() - if self.currently_empty: - if time.monotonic() - self.empty_start >= self.waiting_period: - self.count_since_last_wait = 0 - self.surpassed_initial = False + item = gevent.queue.Queue.get(self) # blocks when nothing left - self.currently_empty = False + if self.currently_empty: + if time.monotonic() - self.empty_start >= self.waiting_period: + self.count_since_last_wait = 0 + self.surpassed_initial = False - return item + self.currently_empty = False + + self.lock.release() + + return item def download_thumbnail(save_directory, video_id): @@ -662,19 +667,19 @@ def to_valid_filename(name): # https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/extractor/youtube.py#L72 INNERTUBE_CLIENTS = { - 'android-test-suite': { + 'android': { 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w', 'INNERTUBE_CONTEXT': { 'client': { 'hl': 'en', 'gl': 'US', - 'clientName': 'ANDROID_TESTSUITE', - 'clientVersion': '1.9', + 'clientName': 'ANDROID', + 'clientVersion': '19.09.36', 'osName': 'Android', 'osVersion': '12', 'androidSdkVersion': 31, 'platform': 'MOBILE', - 'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 12; US) gzip' + 'userAgent': 'com.google.android.youtube/19.09.36 (Linux; U; Android 12; US) gzip' }, # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287 #'thirdParty': { @@ -685,57 +690,42 @@ INNERTUBE_CLIENTS = { 'REQUIRE_JS_PLAYER': False, }, - 'ios': { - 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc', - 'INNERTUBE_CONTEXT': { - 'client': { - 'hl': 'en', - 'gl': 'US', - 'clientName': 'IOS', - 'clientVersion': '19.12.3', - 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtube/19.12.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' - } - }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, - 'REQUIRE_JS_PLAYER': False - }, - - 'android': { + 'android-test-suite': { 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w', 'INNERTUBE_CONTEXT': { 'client': { 'hl': 'en', 'gl': 'US', - 'clientName': 'ANDROID', - 'clientVersion': '19.15.35', + 'clientName': 'ANDROID_TESTSUITE', + 'clientVersion': '1.9', 'osName': 'Android', - 'osVersion': '14', - 'androidSdkVersion': 34, + 'osVersion': '12', + 'androidSdkVersion': 31, 'platform': 'MOBILE', - 'userAgent': 'com.google.android.youtube/19.15.35 (Linux; U; Android 14; en_US; Google Pixel 6 Pro) gzip' - } + 'userAgent': 'com.google.android.youtube/1.9 (Linux; U; Android 12; US) gzip' + }, + # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287 + #'thirdParty': { + # 'embedUrl': 'https://google.com', # Can be any valid URL + #} }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, 'REQUIRE_JS_PLAYER': False, }, - 'android_music': { - 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI', + 'ios': { + 'INNERTUBE_API_KEY': 'AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc', 'INNERTUBE_CONTEXT': { 'client': { 'hl': 'en', 'gl': 'US', - 'clientName': 'ANDROID_MUSIC', - 'clientVersion': '6.48.51', - 'osName': 'Android', - 'osVersion': '14', - 'androidSdkVersion': 34, - 'platform': 'MOBILE', - 'userAgent': 'com.google.android.apps.youtube.music/6.48.51 (Linux; U; Android 14; US) gzip' + 'clientName': 'IOS', + 'clientVersion': '19.09.3', + 'deviceModel': 'iPhone14,3', + 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' } }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, 'REQUIRE_JS_PLAYER': False }, @@ -766,14 +756,62 @@ INNERTUBE_CLIENTS = { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'WEB', - 'clientVersion': '2.20240327.00.00', + 'clientVersion': '2.20220801.00.00', 'userAgent': desktop_user_agent, } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 1 }, + 'android_vr': { + 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w', + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'ANDROID_VR', + 'clientVersion': '1.60.19', + 'deviceMake': 'Oculus', + 'deviceModel': 'Quest 3', + 'androidSdkVersion': 32, + 'userAgent': 'com.google.android.apps.youtube.vr.oculus/1.60.19 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip', + 'osName': 'Android', + 'osVersion': '12L', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 28, + 'REQUIRE_JS_PLAYER': False, + }, } +def get_visitor_data(): + visitor_data = None + visitor_data_cache = os.path.join(settings.data_dir, 'visitorData.txt') + if not os.path.exists(settings.data_dir): + os.makedirs(settings.data_dir) + if os.path.isfile(visitor_data_cache): + with open(visitor_data_cache, 'r') as file: + print('Getting visitor_data from cache') + visitor_data = file.read() + max_age = 12*3600 + file_age = time.time() - os.path.getmtime(visitor_data_cache) + if file_age > max_age: + print('visitor_data cache is too old. Removing file...') + os.remove(visitor_data_cache) + return visitor_data + + print('Fetching youtube homepage to get visitor_data') + yt_homepage = 'https://www.youtube.com' + yt_resp = fetch_url(yt_homepage, headers={'User-Agent': mobile_user_agent}, report_text='Getting youtube homepage') + visitor_data_re = r'''"visitorData":\s*?"(.+?)"''' + visitor_data_match = re.search(visitor_data_re, yt_resp.decode()) + if visitor_data_match: + visitor_data = visitor_data_match.group(1) + print(f'Got visitor_data: {len(visitor_data)}') + with open(visitor_data_cache, 'w') as file: + print('Saving visitor_data cache...') + file.write(visitor_data) + return visitor_data + else: + print('Unable to get visitor_data value') + return visitor_data def call_youtube_api(client, api, data): client_params = INNERTUBE_CLIENTS[client] @@ -781,12 +819,17 @@ def call_youtube_api(client, api, data): key = client_params['INNERTUBE_API_KEY'] host = client_params.get('INNERTUBE_HOST') or 'www.youtube.com' user_agent = context['client'].get('userAgent') or mobile_user_agent + visitor_data = get_visitor_data() url = 'https://' + host + '/youtubei/v1/' + api + '?key=' + key + if visitor_data: + context['client'].update({'visitorData': visitor_data}) data['context'] = context data = json.dumps(data) headers = (('Content-Type', 'application/json'),('User-Agent', user_agent)) + if visitor_data: + headers = ( *headers, ('X-Goog-Visitor-Id', visitor_data )) response = fetch_url( url, data=data, headers=headers, debug_name='youtubei_' + api + '_' + client, @@ -797,6 +840,8 @@ def call_youtube_api(client, api, data): def strip_non_ascii(string): ''' Returns the string without non ASCII characters''' + if string is None: + return "" stripped = (c for c in string if 0 < ord(c) < 127) return ''.join(stripped) |