Last active
August 13, 2024 07:41
-
-
Save fqrouter/9602380 to your computer and use it in GitHub Desktop.
Revisions
-
fqrouter revised this gist
Mar 17, 2014 . 1 changed file with 126 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -162,4 +162,129 @@ def handle_css(upstream_url, environ, start_response): for k, v in response.headers.items(): if 'set-cookie' == k.lower(): v = v.replace('domain=.youtube.com;', '') headers.append((k, v)) start_response(httplib.OK, headers) body = response.read() body = RE_YTIMG.sub(replace_ytimg, body) return [body] def handle_watch(video_id, environ, start_response): video_url = REDIS.get(video_id) if video_url: LOGGER.info('%s hit cache' % video_id) else: LOGGER.info('get url for movie: %s' % video_id) try: video_url = proc_pool.spawn(get_url, video_id).get() except: LOGGER.exception('failed to get url') start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')]) return ['no valid url'] if 'googlevideo.com' not in video_url: LOGGER.error('googlevideo.com not in url: %s' % video_url) start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')]) return ['no valid url'] video_url = video_url.replace('https://', 'http://') history = set() success = False for i in range(3): worker = pick_worker(history) try_url = RE_GOOGLEVIDEO.sub(functools.partial(replace_googlevideo, worker), video_url) if is_url_correct(try_url): video_url = try_url success = True break # else: # worker[1] = False if not success: start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')]) return ['no valid url'] REDIS.set(video_id, video_url) REDIS.expire(video_id, 60 * 3) LOGGER.info('got url for movie: %s %s' % (video_id, video_url)) start_response(httplib.FOUND, [ ('Location', video_url), ('Content-Type', 'text/plain'), ('Cache-Control', 'max-age=180') ]) return ['you can use this link to download the movie'] def get_url(video_id): if '/' in video_id: raise Exception('evil') return subprocess.check_output( 'youtube-dl http://www.youtube.com/watch?v=%s -g' % video_id, shell=True).strip() def serve_forever(): try: server = WSGIServer((LISTEN_IP, LISTEN_PORT), handle_request) LOGGER.info('serving HTTP on port %s:%s...' % (LISTEN_IP, LISTEN_PORT)) except: LOGGER.exception('failed to start HTTP server on port %s:%s' % (LISTEN_IP, LISTEN_PORT)) os._exit(1) server.serve_forever() def pick_worker(history=()): if len(history) >= len(WORKERS): raise Exception('no worker') server_name = random.choice(WORKERS.keys()) worker = random.choice(WORKERS[server_name]) if not worker[1]: return pick_worker(set(list(history) + [server_name])) return worker def is_url_correct(url): class NoRedirectHandler(urllib2.HTTPRedirectHandler): def http_error_302(self, req, fp, code, msg, headers): infourl = urllib.addinfourl(fp, headers, req.get_full_url()) infourl.status = code infourl.code = code return infourl http_error_300 = http_error_302 http_error_301 = http_error_302 http_error_303 = http_error_302 http_error_307 = http_error_302 try: opener = urllib2.build_opener(NoRedirectHandler()) response = opener.open(url) response.close() if 200 == response.code: return True else: LOGGER.error('status code %s for url %s' % (response.code, url)) return False except: LOGGER.exception('try url failed: %s' % url) return False def refresh_workers(): while True: for workers in WORKERS.values(): for worker in workers: worker[1] = is_worker_alive(worker[0]) LOGGER.info('%s refreshed workers' % datetime.datetime.now()) gevent.sleep(60 * 60) def is_worker_alive(worker_host): try: urllib2.urlopen('http://%s/image/i1/vi/tLcfAnN2QgY/mqdefault.jpg' % worker_host, timeout=3).close() LOGGER.info('%s => OK' % worker_host) return True except: LOGGER.info('%s => FAILURE' % worker_host) return False signal.signal(signal.SIGINT, lambda signum, fame: os._exit(0)) logging.basicConfig(level=logging.DEBUG) gevent.spawn(refresh_workers) serve_forever() -
fqrouter revised this gist
Mar 17, 2014 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -63,7 +63,7 @@ def get_http_response(code): def replace_ytimg_css(match): return '/your-reverse-proxy-ip/css/%s' % match.group(1) def replace_ytimg_esc(match): return '\\/%s\\/image/%s' % (pick_worker()[0], match.group(1)) @@ -153,7 +153,7 @@ def handle(environ, start_response): body = body.replace('class="premium-yva-unexpanded"', 'style="display: none;"') # body = body.replace('id="masthead-search"', 'style="position: relative; padding: 0; margin-top: 3px; overflow: hidden;"') body = body.replace('ad.doubleclick.net', '127.0.0.1') body = body.replace('www.youtube.com', 'your-reverse-proxy-ip') return [body] def handle_css(upstream_url, environ, start_response): -
fqrouter revised this gist
Mar 17, 2014 . 1 changed file with 1 addition and 127 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -146,7 +146,6 @@ def handle(environ, start_response): body = RE_YTIMG.sub(replace_ytimg, body) body = RE_GGPHT.sub(replace_ggpht, body) # body = body.replace('class="search-form', 'method="POST" class="search-form') body = body.replace('class="video-masthead">', 'style="display: none;">') body = body.replace('class="branded-page-v2-top-row">', 'style="display: none;">') body = body.replace('style="z-index: 1">', 'style="display: none;">') @@ -163,129 +162,4 @@ def handle_css(upstream_url, environ, start_response): for k, v in response.headers.items(): if 'set-cookie' == k.lower(): v = v.replace('domain=.youtube.com;', '')
-
fqrouter created this gist
Mar 17, 2014 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,34 @@ resolver 8.8.8.8; location /video/ { if ($request_uri ~ "^/video/(.+?)/.+") { set $upstream_host $1.googlevideo.com; add_header Content-Disposition "attachment; filename=video.mp4;"; } rewrite /video/.+?/(.+)$ /$1 break; proxy_buffering off; proxy_pass https://$upstream_host; proxy_set_header Host $upstream_host; } location /image/ { if ($request_uri ~ "^/image/(.+?)/.+") { set $upstream_host $1.ytimg.com; } rewrite /image/.+?/(.+)$ /$1 break; proxy_buffering off; proxy_pass http://$upstream_host; proxy_set_header Host $upstream_host; } location /photo/ { if ($request_uri ~ "^/photo/(.+?)/.+") { set $upstream_host $1.ggpht.com; } rewrite /photo/.+?/(.+)$ /$1 break; proxy_buffering off; proxy_pass http://$upstream_host; proxy_set_header Host $upstream_host; } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,291 @@ #!/usr/bin/env python import logging import httplib import os import subprocess import socket import datetime import random import signal import urllib2 import urlparse import urllib import re from gevent.wsgi import WSGIServer import gevent.monkey import gevent.pool import gevent import redis import cgi import functools import Cookie REDIS = redis.StrictRedis() gevent.monkey.patch_all(subprocess=True) proc_pool = gevent.pool.Pool(size=16) LOGGER = logging.getLogger(__name__) WORKERS = { } # you need to fill this LISTEN_IP = '' LISTEN_PORT = 3000 RE_YTIMG_CSS = re.compile(r'/s.ytimg\.com(.*?\.css)', re.IGNORECASE) RE_YTIMG_ESC = re.compile(r'\\/([a-zA-Z0-9-]+?)\.ytimg\.com', re.IGNORECASE) RE_YTIMG = re.compile(r'/([a-zA-Z0-9-]+?)\.ytimg\.com', re.IGNORECASE) RE_GGPHT = re.compile(r'https://([a-zA-Z0-9-]+?)\.ggpht\.com', re.IGNORECASE) RE_GOOGLEVIDEO = re.compile(r'/([a-zA-Z0-9-]+?)\.googlevideo\.com', re.IGNORECASE) def handle_request(environ, start_response): method = environ.get('REQUEST_METHOD') try: lines = handle(environ, lambda status, headers: start_response(get_http_response(status), headers)) except: path = environ.get('PATH_INFO', '').strip('/') LOGGER.exception('failed to handle request: %s %s' % (method, path)) start_response('500 INTERNAL_SERVER_ERROR', [ ('Content-Type', 'text/javascript'), ('Cache-Control', 'no-cache, no-store, must-revalidate'), ('Pragma', 'no-cache'), ('Expires', '0')]) lines = ['Retry in 30 minutes'] for line in lines: yield line def get_http_response(code): if code not in httplib.responses: return code return '%s %s' % (code, httplib.responses[code]) def replace_ytimg_css(match): return '/23.226.226.92/css/%s' % match.group(1) def replace_ytimg_esc(match): return '\\/%s\\/image/%s' % (pick_worker()[0], match.group(1)) def replace_ytimg(match): return '/%s/image/%s' % (pick_worker()[0], match.group(1)) def replace_ggpht(match): return 'http://%s/photo/%s' % (pick_worker()[0], match.group(1)) def replace_googlevideo(worker, match): return '/%s/video/%s' % (worker[0], match.group(1)) def handle(environ, start_response): host = 'youtube.com' path = environ.get('PATH_INFO', '') if '/watch' == path: video_id = urlparse.parse_qs(environ['QUERY_STRING'])['v'][0] return handle_watch(video_id, environ, start_response) if '/watch_videos' == path: video_id = urlparse.parse_qs(environ['QUERY_STRING'])['video_ids'][0].split(',')[0] return handle_watch(video_id, environ, start_response) if path.startswith('/css/'): upstream_url = path.replace('/css/', '') upstream_url = 'http://s.ytimg.com/%s' % upstream_url return handle_css(upstream_url, environ, start_response) if path.startswith('/t/'): domain = path.replace('/t/', '').replace('.js', '') words = 'window.location.href="http://%s";' % domain start_response(httplib.OK, [ ('Content-Type', 'text/javascript'), ('Cache-Control', 'no-cache, no-store, must-revalidate'), ('Pragma', 'no-cache'), ('Expires', '0')]) return [words] if path.startswith('//'): start_response(httplib.FOUND, [ ('Location', 'http:%s' % path) ]) return [] data = None if 'POST' == environ['REQUEST_METHOD']: if '/results' == path: post_body = cgi.FieldStorage( fp=environ['wsgi.input'], environ=environ, keep_blank_values=True) upstream_url = 'http://youtube.com/results?%s' % urllib.urlencode({'search_query': post_body['search_query'].value}) else: data = environ['wsgi.input'].readline() upstream_url = 'http://%s%s' % (host, path) else: upstream_url = 'http://%s%s' % (host, path) if environ['QUERY_STRING']: upstream_url = '%s?%s' % (upstream_url, environ['QUERY_STRING']) LOGGER.info('upstream url: %s' % upstream_url) headers = {} if environ.get('HTTP_COOKIE'): LOGGER.info('cookie is: %s' % environ.get('HTTP_COOKIE')) headers['Cookie'] = environ.get('HTTP_COOKIE') try: response = urllib2.urlopen(urllib2.Request(upstream_url, data=data, headers=headers)) except urllib2.HTTPError as e: start_response(e.code, [(k, v) for k, v in e.hdrs.items()]) return [e.msg] except: raise headers = [] for k, v in response.headers.items(): if 'set-cookie' == k.lower(): v = v.replace('domain=.youtube.com;', '') if 'x-frame' in k.lower(): continue headers.append((k, v)) start_response(httplib.OK, headers) body = response.read() body = RE_YTIMG_CSS.sub(replace_ytimg_css, body) body = RE_YTIMG_ESC.sub(replace_ytimg_esc, body) body = RE_YTIMG.sub(replace_ytimg, body) body = RE_GGPHT.sub(replace_ggpht, body) # body = body.replace('class="search-form', 'method="POST" class="search-form') body = body.replace('</head>', '<script src="http://exp.jiankongbao.com/loadtrace.php?host_id=12669&style=5&type=0" type="text/javascript"></script></head>') body = body.replace('class="video-masthead">', 'style="display: none;">') body = body.replace('class="branded-page-v2-top-row">', 'style="display: none;">') body = body.replace('style="z-index: 1">', 'style="display: none;">') body = body.replace('style="z-index: 1;">', 'style="display: none;">') body = body.replace('class="premium-yva-unexpanded"', 'style="display: none;"') # body = body.replace('id="masthead-search"', 'style="position: relative; padding: 0; margin-top: 3px; overflow: hidden;"') body = body.replace('ad.doubleclick.net', '127.0.0.1') body = body.replace('www.youtube.com', '23.226.226.92') return [body] def handle_css(upstream_url, environ, start_response): response = urllib2.urlopen(urllib2.Request(upstream_url)) headers = [] for k, v in response.headers.items(): if 'set-cookie' == k.lower(): v = v.replace('domain=.youtube.com;', '') headers.append((k, v)) start_response(httplib.OK, headers) body = response.read() body = RE_YTIMG.sub(replace_ytimg, body) return [body] def handle_watch(video_id, environ, start_response): video_url = REDIS.get(video_id) if video_url: LOGGER.info('%s hit cache' % video_id) else: LOGGER.info('get url for movie: %s' % video_id) try: video_url = proc_pool.spawn(get_url, video_id).get() except: LOGGER.exception('failed to get url') start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')]) return ['no valid url'] if 'googlevideo.com' not in video_url: LOGGER.error('googlevideo.com not in url: %s' % video_url) start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')]) return ['no valid url'] video_url = video_url.replace('https://', 'http://') history = set() success = False for i in range(3): worker = pick_worker(history) try_url = RE_GOOGLEVIDEO.sub(functools.partial(replace_googlevideo, worker), video_url) if is_url_correct(try_url): video_url = try_url success = True break # else: # worker[1] = False if not success: start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')]) return ['no valid url'] REDIS.set(video_id, video_url) REDIS.expire(video_id, 60 * 3) LOGGER.info('got url for movie: %s %s' % (video_id, video_url)) start_response(httplib.FOUND, [ ('Location', video_url), ('Content-Type', 'text/plain'), ('Cache-Control', 'max-age=180') ]) return ['you can use this link to download the movie'] def get_url(video_id): if '/' in video_id: raise Exception('evil') return subprocess.check_output( 'youtube-dl http://www.youtube.com/watch?v=%s -g' % video_id, shell=True).strip() def serve_forever(): try: server = WSGIServer((LISTEN_IP, LISTEN_PORT), handle_request) LOGGER.info('serving HTTP on port %s:%s...' % (LISTEN_IP, LISTEN_PORT)) except: LOGGER.exception('failed to start HTTP server on port %s:%s' % (LISTEN_IP, LISTEN_PORT)) os._exit(1) server.serve_forever() def pick_worker(history=()): if len(history) >= len(WORKERS): raise Exception('no worker') server_name = random.choice(WORKERS.keys()) worker = random.choice(WORKERS[server_name]) if not worker[1]: return pick_worker(set(list(history) + [server_name])) return worker def is_url_correct(url): class NoRedirectHandler(urllib2.HTTPRedirectHandler): def http_error_302(self, req, fp, code, msg, headers): infourl = urllib.addinfourl(fp, headers, req.get_full_url()) infourl.status = code infourl.code = code return infourl http_error_300 = http_error_302 http_error_301 = http_error_302 http_error_303 = http_error_302 http_error_307 = http_error_302 try: opener = urllib2.build_opener(NoRedirectHandler()) response = opener.open(url) response.close() if 200 == response.code: return True else: LOGGER.error('status code %s for url %s' % (response.code, url)) return False except: LOGGER.exception('try url failed: %s' % url) return False def refresh_workers(): while True: for workers in WORKERS.values(): for worker in workers: worker[1] = is_worker_alive(worker[0]) LOGGER.info('%s refreshed workers' % datetime.datetime.now()) gevent.sleep(60 * 60) def is_worker_alive(worker_host): try: urllib2.urlopen('http://%s/image/i1/vi/tLcfAnN2QgY/mqdefault.jpg' % worker_host, timeout=3).close() LOGGER.info('%s => OK' % worker_host) return True except: LOGGER.info('%s => FAILURE' % worker_host) return False signal.signal(signal.SIGINT, lambda signum, fame: os._exit(0)) logging.basicConfig(level=logging.DEBUG) gevent.spawn(refresh_workers) serve_forever()