Skip to content

Instantly share code, notes, and snippets.

@cnsoft
Forked from fqrouter/worker nginx conf
Created March 26, 2014 15:27
Show Gist options
  • Save cnsoft/9785998 to your computer and use it in GitHub Desktop.
Save cnsoft/9785998 to your computer and use it in GitHub Desktop.

Revisions

  1. @fqrouter fqrouter revised this gist Mar 17, 2014. 1 changed file with 126 additions and 1 deletion.
    127 changes: 126 additions & 1 deletion youtube-reverse-proxy.py
    Original file line number Diff line number Diff line change
    @@ -162,4 +162,129 @@ def handle_css(upstream_url, environ, start_response):
    for k, v in response.headers.items():
    if 'set-cookie' == k.lower():
    v = v.replace('domain=.youtube.com;', '')

    headers.append((k, v))
    start_response(httplib.OK, headers)
    body = response.read()
    body = RE_YTIMG.sub(replace_ytimg, body)
    return [body]

    def handle_watch(video_id, environ, start_response):
    video_url = REDIS.get(video_id)
    if video_url:
    LOGGER.info('%s hit cache' % video_id)
    else:
    LOGGER.info('get url for movie: %s' % video_id)
    try:
    video_url = proc_pool.spawn(get_url, video_id).get()
    except:
    LOGGER.exception('failed to get url')
    start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')])
    return ['no valid url']
    if 'googlevideo.com' not in video_url:
    LOGGER.error('googlevideo.com not in url: %s' % video_url)
    start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')])
    return ['no valid url']
    video_url = video_url.replace('https://', 'http://')
    history = set()
    success = False
    for i in range(3):
    worker = pick_worker(history)
    try_url = RE_GOOGLEVIDEO.sub(functools.partial(replace_googlevideo, worker), video_url)
    if is_url_correct(try_url):
    video_url = try_url
    success = True
    break
    # else:
    # worker[1] = False
    if not success:
    start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')])
    return ['no valid url']
    REDIS.set(video_id, video_url)
    REDIS.expire(video_id, 60 * 3)
    LOGGER.info('got url for movie: %s %s' % (video_id, video_url))
    start_response(httplib.FOUND, [
    ('Location', video_url),
    ('Content-Type', 'text/plain'),
    ('Cache-Control', 'max-age=180')
    ])
    return ['you can use this link to download the movie']


    def get_url(video_id):
    if '/' in video_id:
    raise Exception('evil')
    return subprocess.check_output(
    'youtube-dl http://www.youtube.com/watch?v=%s -g'
    % video_id, shell=True).strip()


    def serve_forever():
    try:
    server = WSGIServer((LISTEN_IP, LISTEN_PORT), handle_request)
    LOGGER.info('serving HTTP on port %s:%s...' % (LISTEN_IP, LISTEN_PORT))
    except:
    LOGGER.exception('failed to start HTTP server on port %s:%s' % (LISTEN_IP, LISTEN_PORT))
    os._exit(1)
    server.serve_forever()


    def pick_worker(history=()):
    if len(history) >= len(WORKERS):
    raise Exception('no worker')
    server_name = random.choice(WORKERS.keys())
    worker = random.choice(WORKERS[server_name])
    if not worker[1]:
    return pick_worker(set(list(history) + [server_name]))
    return worker


    def is_url_correct(url):
    class NoRedirectHandler(urllib2.HTTPRedirectHandler):
    def http_error_302(self, req, fp, code, msg, headers):
    infourl = urllib.addinfourl(fp, headers, req.get_full_url())
    infourl.status = code
    infourl.code = code
    return infourl

    http_error_300 = http_error_302
    http_error_301 = http_error_302
    http_error_303 = http_error_302
    http_error_307 = http_error_302

    try:
    opener = urllib2.build_opener(NoRedirectHandler())
    response = opener.open(url)
    response.close()
    if 200 == response.code:
    return True
    else:
    LOGGER.error('status code %s for url %s' % (response.code, url))
    return False
    except:
    LOGGER.exception('try url failed: %s' % url)
    return False


    def refresh_workers():
    while True:
    for workers in WORKERS.values():
    for worker in workers:
    worker[1] = is_worker_alive(worker[0])
    LOGGER.info('%s refreshed workers' % datetime.datetime.now())
    gevent.sleep(60 * 60)


    def is_worker_alive(worker_host):
    try:
    urllib2.urlopen('http://%s/image/i1/vi/tLcfAnN2QgY/mqdefault.jpg' % worker_host, timeout=3).close()
    LOGGER.info('%s => OK' % worker_host)
    return True
    except:
    LOGGER.info('%s => FAILURE' % worker_host)
    return False


    signal.signal(signal.SIGINT, lambda signum, fame: os._exit(0))
    logging.basicConfig(level=logging.DEBUG)
    gevent.spawn(refresh_workers)
    serve_forever()
  2. @fqrouter fqrouter revised this gist Mar 17, 2014. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions youtube-reverse-proxy.py
    Original file line number Diff line number Diff line change
    @@ -63,7 +63,7 @@ def get_http_response(code):


    def replace_ytimg_css(match):
    return '/23.226.226.92/css/%s' % match.group(1)
    return '/your-reverse-proxy-ip/css/%s' % match.group(1)

    def replace_ytimg_esc(match):
    return '\\/%s\\/image/%s' % (pick_worker()[0], match.group(1))
    @@ -153,7 +153,7 @@ def handle(environ, start_response):
    body = body.replace('class="premium-yva-unexpanded"', 'style="display: none;"')
    # body = body.replace('id="masthead-search"', 'style="position: relative; padding: 0; margin-top: 3px; overflow: hidden;"')
    body = body.replace('ad.doubleclick.net', '127.0.0.1')
    body = body.replace('www.youtube.com', '23.226.226.92')
    body = body.replace('www.youtube.com', 'your-reverse-proxy-ip')
    return [body]

    def handle_css(upstream_url, environ, start_response):
  3. @fqrouter fqrouter revised this gist Mar 17, 2014. 1 changed file with 1 addition and 127 deletions.
    128 changes: 1 addition & 127 deletions youtube-reverse-proxy.py
    Original file line number Diff line number Diff line change
    @@ -146,7 +146,6 @@ def handle(environ, start_response):
    body = RE_YTIMG.sub(replace_ytimg, body)
    body = RE_GGPHT.sub(replace_ggpht, body)
    # body = body.replace('class="search-form', 'method="POST" class="search-form')
    body = body.replace('</head>', '<script src="http://exp.jiankongbao.com/loadtrace.php?host_id=12669&style=5&type=0" type="text/javascript"></script></head>')
    body = body.replace('class="video-masthead">', 'style="display: none;">')
    body = body.replace('class="branded-page-v2-top-row">', 'style="display: none;">')
    body = body.replace('style="z-index: 1">', 'style="display: none;">')
    @@ -163,129 +162,4 @@ def handle_css(upstream_url, environ, start_response):
    for k, v in response.headers.items():
    if 'set-cookie' == k.lower():
    v = v.replace('domain=.youtube.com;', '')
    headers.append((k, v))
    start_response(httplib.OK, headers)
    body = response.read()
    body = RE_YTIMG.sub(replace_ytimg, body)
    return [body]

    def handle_watch(video_id, environ, start_response):
    video_url = REDIS.get(video_id)
    if video_url:
    LOGGER.info('%s hit cache' % video_id)
    else:
    LOGGER.info('get url for movie: %s' % video_id)
    try:
    video_url = proc_pool.spawn(get_url, video_id).get()
    except:
    LOGGER.exception('failed to get url')
    start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')])
    return ['no valid url']
    if 'googlevideo.com' not in video_url:
    LOGGER.error('googlevideo.com not in url: %s' % video_url)
    start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')])
    return ['no valid url']
    video_url = video_url.replace('https://', 'http://')
    history = set()
    success = False
    for i in range(3):
    worker = pick_worker(history)
    try_url = RE_GOOGLEVIDEO.sub(functools.partial(replace_googlevideo, worker), video_url)
    if is_url_correct(try_url):
    video_url = try_url
    success = True
    break
    # else:
    # worker[1] = False
    if not success:
    start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')])
    return ['no valid url']
    REDIS.set(video_id, video_url)
    REDIS.expire(video_id, 60 * 3)
    LOGGER.info('got url for movie: %s %s' % (video_id, video_url))
    start_response(httplib.FOUND, [
    ('Location', video_url),
    ('Content-Type', 'text/plain'),
    ('Cache-Control', 'max-age=180')
    ])
    return ['you can use this link to download the movie']


    def get_url(video_id):
    if '/' in video_id:
    raise Exception('evil')
    return subprocess.check_output(
    'youtube-dl http://www.youtube.com/watch?v=%s -g'
    % video_id, shell=True).strip()


    def serve_forever():
    try:
    server = WSGIServer((LISTEN_IP, LISTEN_PORT), handle_request)
    LOGGER.info('serving HTTP on port %s:%s...' % (LISTEN_IP, LISTEN_PORT))
    except:
    LOGGER.exception('failed to start HTTP server on port %s:%s' % (LISTEN_IP, LISTEN_PORT))
    os._exit(1)
    server.serve_forever()


    def pick_worker(history=()):
    if len(history) >= len(WORKERS):
    raise Exception('no worker')
    server_name = random.choice(WORKERS.keys())
    worker = random.choice(WORKERS[server_name])
    if not worker[1]:
    return pick_worker(set(list(history) + [server_name]))
    return worker


    def is_url_correct(url):
    class NoRedirectHandler(urllib2.HTTPRedirectHandler):
    def http_error_302(self, req, fp, code, msg, headers):
    infourl = urllib.addinfourl(fp, headers, req.get_full_url())
    infourl.status = code
    infourl.code = code
    return infourl

    http_error_300 = http_error_302
    http_error_301 = http_error_302
    http_error_303 = http_error_302
    http_error_307 = http_error_302

    try:
    opener = urllib2.build_opener(NoRedirectHandler())
    response = opener.open(url)
    response.close()
    if 200 == response.code:
    return True
    else:
    LOGGER.error('status code %s for url %s' % (response.code, url))
    return False
    except:
    LOGGER.exception('try url failed: %s' % url)
    return False


    def refresh_workers():
    while True:
    for workers in WORKERS.values():
    for worker in workers:
    worker[1] = is_worker_alive(worker[0])
    LOGGER.info('%s refreshed workers' % datetime.datetime.now())
    gevent.sleep(60 * 60)


    def is_worker_alive(worker_host):
    try:
    urllib2.urlopen('http://%s/image/i1/vi/tLcfAnN2QgY/mqdefault.jpg' % worker_host, timeout=3).close()
    LOGGER.info('%s => OK' % worker_host)
    return True
    except:
    LOGGER.info('%s => FAILURE' % worker_host)
    return False


    signal.signal(signal.SIGINT, lambda signum, fame: os._exit(0))
    logging.basicConfig(level=logging.DEBUG)
    gevent.spawn(refresh_workers)
    serve_forever()

  4. @fqrouter fqrouter created this gist Mar 17, 2014.
    34 changes: 34 additions & 0 deletions worker nginx conf
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,34 @@
    resolver 8.8.8.8;
    location /video/ {
    if ($request_uri ~ "^/video/(.+?)/.+") {
    set $upstream_host $1.googlevideo.com;
    add_header Content-Disposition "attachment; filename=video.mp4;";
    }
    rewrite /video/.+?/(.+)$ /$1 break;
    proxy_buffering off;
    proxy_pass https://$upstream_host;
    proxy_set_header Host $upstream_host;
    }


    location /image/ {
    if ($request_uri ~ "^/image/(.+?)/.+") {
    set $upstream_host $1.ytimg.com;
    }
    rewrite /image/.+?/(.+)$ /$1 break;
    proxy_buffering off;
    proxy_pass http://$upstream_host;
    proxy_set_header Host $upstream_host;

    }


    location /photo/ {
    if ($request_uri ~ "^/photo/(.+?)/.+") {
    set $upstream_host $1.ggpht.com;
    }
    rewrite /photo/.+?/(.+)$ /$1 break;
    proxy_buffering off;
    proxy_pass http://$upstream_host;
    proxy_set_header Host $upstream_host;
    }
    291 changes: 291 additions & 0 deletions youtube-reverse-proxy.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,291 @@
    #!/usr/bin/env python
    import logging
    import httplib
    import os
    import subprocess
    import socket
    import datetime
    import random
    import signal
    import urllib2
    import urlparse
    import urllib
    import re

    from gevent.wsgi import WSGIServer
    import gevent.monkey
    import gevent.pool
    import gevent
    import redis
    import cgi
    import functools
    import Cookie

    REDIS = redis.StrictRedis()

    gevent.monkey.patch_all(subprocess=True)
    proc_pool = gevent.pool.Pool(size=16)

    LOGGER = logging.getLogger(__name__)

    WORKERS = {
    } # you need to fill this

    LISTEN_IP = ''
    LISTEN_PORT = 3000
    RE_YTIMG_CSS = re.compile(r'/s.ytimg\.com(.*?\.css)', re.IGNORECASE)
    RE_YTIMG_ESC = re.compile(r'\\/([a-zA-Z0-9-]+?)\.ytimg\.com', re.IGNORECASE)
    RE_YTIMG = re.compile(r'/([a-zA-Z0-9-]+?)\.ytimg\.com', re.IGNORECASE)
    RE_GGPHT = re.compile(r'https://([a-zA-Z0-9-]+?)\.ggpht\.com', re.IGNORECASE)
    RE_GOOGLEVIDEO = re.compile(r'/([a-zA-Z0-9-]+?)\.googlevideo\.com', re.IGNORECASE)

    def handle_request(environ, start_response):
    method = environ.get('REQUEST_METHOD')
    try:
    lines = handle(environ, lambda status, headers: start_response(get_http_response(status), headers))
    except:
    path = environ.get('PATH_INFO', '').strip('/')
    LOGGER.exception('failed to handle request: %s %s' % (method, path))
    start_response('500 INTERNAL_SERVER_ERROR', [
    ('Content-Type', 'text/javascript'),
    ('Cache-Control', 'no-cache, no-store, must-revalidate'),
    ('Pragma', 'no-cache'),
    ('Expires', '0')])
    lines = ['Retry in 30 minutes']
    for line in lines:
    yield line


    def get_http_response(code):
    if code not in httplib.responses:
    return code
    return '%s %s' % (code, httplib.responses[code])


    def replace_ytimg_css(match):
    return '/23.226.226.92/css/%s' % match.group(1)

    def replace_ytimg_esc(match):
    return '\\/%s\\/image/%s' % (pick_worker()[0], match.group(1))

    def replace_ytimg(match):
    return '/%s/image/%s' % (pick_worker()[0], match.group(1))

    def replace_ggpht(match):
    return 'http://%s/photo/%s' % (pick_worker()[0], match.group(1))

    def replace_googlevideo(worker, match):
    return '/%s/video/%s' % (worker[0], match.group(1))


    def handle(environ, start_response):
    host = 'youtube.com'
    path = environ.get('PATH_INFO', '')
    if '/watch' == path:
    video_id = urlparse.parse_qs(environ['QUERY_STRING'])['v'][0]
    return handle_watch(video_id, environ, start_response)
    if '/watch_videos' == path:
    video_id = urlparse.parse_qs(environ['QUERY_STRING'])['video_ids'][0].split(',')[0]
    return handle_watch(video_id, environ, start_response)
    if path.startswith('/css/'):
    upstream_url = path.replace('/css/', '')
    upstream_url = 'http://s.ytimg.com/%s' % upstream_url
    return handle_css(upstream_url, environ, start_response)
    if path.startswith('/t/'):
    domain = path.replace('/t/', '').replace('.js', '')
    words = 'window.location.href="http://%s";' % domain
    start_response(httplib.OK, [
    ('Content-Type', 'text/javascript'),
    ('Cache-Control', 'no-cache, no-store, must-revalidate'),
    ('Pragma', 'no-cache'),
    ('Expires', '0')])
    return [words]
    if path.startswith('//'):
    start_response(httplib.FOUND, [
    ('Location', 'http:%s' % path)
    ])
    return []
    data = None
    if 'POST' == environ['REQUEST_METHOD']:
    if '/results' == path:
    post_body = cgi.FieldStorage(
    fp=environ['wsgi.input'],
    environ=environ,
    keep_blank_values=True)
    upstream_url = 'http://youtube.com/results?%s' % urllib.urlencode({'search_query': post_body['search_query'].value})
    else:
    data = environ['wsgi.input'].readline()
    upstream_url = 'http://%s%s' % (host, path)
    else:
    upstream_url = 'http://%s%s' % (host, path)
    if environ['QUERY_STRING']:
    upstream_url = '%s?%s' % (upstream_url, environ['QUERY_STRING'])
    LOGGER.info('upstream url: %s' % upstream_url)
    headers = {}
    if environ.get('HTTP_COOKIE'):
    LOGGER.info('cookie is: %s' % environ.get('HTTP_COOKIE'))
    headers['Cookie'] = environ.get('HTTP_COOKIE')
    try:
    response = urllib2.urlopen(urllib2.Request(upstream_url, data=data, headers=headers))
    except urllib2.HTTPError as e:
    start_response(e.code, [(k, v) for k, v in e.hdrs.items()])
    return [e.msg]
    except:
    raise
    headers = []
    for k, v in response.headers.items():
    if 'set-cookie' == k.lower():
    v = v.replace('domain=.youtube.com;', '')
    if 'x-frame' in k.lower():
    continue
    headers.append((k, v))
    start_response(httplib.OK, headers)
    body = response.read()
    body = RE_YTIMG_CSS.sub(replace_ytimg_css, body)
    body = RE_YTIMG_ESC.sub(replace_ytimg_esc, body)
    body = RE_YTIMG.sub(replace_ytimg, body)
    body = RE_GGPHT.sub(replace_ggpht, body)
    # body = body.replace('class="search-form', 'method="POST" class="search-form')
    body = body.replace('</head>', '<script src="http://exp.jiankongbao.com/loadtrace.php?host_id=12669&style=5&type=0" type="text/javascript"></script></head>')
    body = body.replace('class="video-masthead">', 'style="display: none;">')
    body = body.replace('class="branded-page-v2-top-row">', 'style="display: none;">')
    body = body.replace('style="z-index: 1">', 'style="display: none;">')
    body = body.replace('style="z-index: 1;">', 'style="display: none;">')
    body = body.replace('class="premium-yva-unexpanded"', 'style="display: none;"')
    # body = body.replace('id="masthead-search"', 'style="position: relative; padding: 0; margin-top: 3px; overflow: hidden;"')
    body = body.replace('ad.doubleclick.net', '127.0.0.1')
    body = body.replace('www.youtube.com', '23.226.226.92')
    return [body]

    def handle_css(upstream_url, environ, start_response):
    response = urllib2.urlopen(urllib2.Request(upstream_url))
    headers = []
    for k, v in response.headers.items():
    if 'set-cookie' == k.lower():
    v = v.replace('domain=.youtube.com;', '')
    headers.append((k, v))
    start_response(httplib.OK, headers)
    body = response.read()
    body = RE_YTIMG.sub(replace_ytimg, body)
    return [body]

    def handle_watch(video_id, environ, start_response):
    video_url = REDIS.get(video_id)
    if video_url:
    LOGGER.info('%s hit cache' % video_id)
    else:
    LOGGER.info('get url for movie: %s' % video_id)
    try:
    video_url = proc_pool.spawn(get_url, video_id).get()
    except:
    LOGGER.exception('failed to get url')
    start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')])
    return ['no valid url']
    if 'googlevideo.com' not in video_url:
    LOGGER.error('googlevideo.com not in url: %s' % video_url)
    start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')])
    return ['no valid url']
    video_url = video_url.replace('https://', 'http://')
    history = set()
    success = False
    for i in range(3):
    worker = pick_worker(history)
    try_url = RE_GOOGLEVIDEO.sub(functools.partial(replace_googlevideo, worker), video_url)
    if is_url_correct(try_url):
    video_url = try_url
    success = True
    break
    # else:
    # worker[1] = False
    if not success:
    start_response(httplib.BAD_GATEWAY, [('Content-Type', 'text/plain')])
    return ['no valid url']
    REDIS.set(video_id, video_url)
    REDIS.expire(video_id, 60 * 3)
    LOGGER.info('got url for movie: %s %s' % (video_id, video_url))
    start_response(httplib.FOUND, [
    ('Location', video_url),
    ('Content-Type', 'text/plain'),
    ('Cache-Control', 'max-age=180')
    ])
    return ['you can use this link to download the movie']


    def get_url(video_id):
    if '/' in video_id:
    raise Exception('evil')
    return subprocess.check_output(
    'youtube-dl http://www.youtube.com/watch?v=%s -g'
    % video_id, shell=True).strip()


    def serve_forever():
    try:
    server = WSGIServer((LISTEN_IP, LISTEN_PORT), handle_request)
    LOGGER.info('serving HTTP on port %s:%s...' % (LISTEN_IP, LISTEN_PORT))
    except:
    LOGGER.exception('failed to start HTTP server on port %s:%s' % (LISTEN_IP, LISTEN_PORT))
    os._exit(1)
    server.serve_forever()


    def pick_worker(history=()):
    if len(history) >= len(WORKERS):
    raise Exception('no worker')
    server_name = random.choice(WORKERS.keys())
    worker = random.choice(WORKERS[server_name])
    if not worker[1]:
    return pick_worker(set(list(history) + [server_name]))
    return worker


    def is_url_correct(url):
    class NoRedirectHandler(urllib2.HTTPRedirectHandler):
    def http_error_302(self, req, fp, code, msg, headers):
    infourl = urllib.addinfourl(fp, headers, req.get_full_url())
    infourl.status = code
    infourl.code = code
    return infourl

    http_error_300 = http_error_302
    http_error_301 = http_error_302
    http_error_303 = http_error_302
    http_error_307 = http_error_302

    try:
    opener = urllib2.build_opener(NoRedirectHandler())
    response = opener.open(url)
    response.close()
    if 200 == response.code:
    return True
    else:
    LOGGER.error('status code %s for url %s' % (response.code, url))
    return False
    except:
    LOGGER.exception('try url failed: %s' % url)
    return False


    def refresh_workers():
    while True:
    for workers in WORKERS.values():
    for worker in workers:
    worker[1] = is_worker_alive(worker[0])
    LOGGER.info('%s refreshed workers' % datetime.datetime.now())
    gevent.sleep(60 * 60)


    def is_worker_alive(worker_host):
    try:
    urllib2.urlopen('http://%s/image/i1/vi/tLcfAnN2QgY/mqdefault.jpg' % worker_host, timeout=3).close()
    LOGGER.info('%s => OK' % worker_host)
    return True
    except:
    LOGGER.info('%s => FAILURE' % worker_host)
    return False


    signal.signal(signal.SIGINT, lambda signum, fame: os._exit(0))
    logging.basicConfig(level=logging.DEBUG)
    gevent.spawn(refresh_workers)
    serve_forever()