# coding:utf-8 # Copyright 2011 litl, LLC. All Rights Reserved. import httplib import re import urllib import urlparse from flask import Blueprint, request, Response, url_for from werkzeug.datastructures import Headers from werkzeug.exceptions import NotFound from jiffy.admin.login import check_login proxy = Blueprint('proxy', __name__) proxy.before_request(check_login) HTML_REGEX = re.compile(r'((?:src|action|href)=["\'])/') JQUERY_REGEX = re.compile(r'(\$\.(?:get|post)\(["\'])/') JS_LOCATION_REGEX = re.compile(r'((?:window|document)\.location.*=.*["\'])/') CSS_REGEX = re.compile(r'(url\(["\']?)/') REGEXES = [HTML_REGEX, JQUERY_REGEX, JS_LOCATION_REGEX, CSS_REGEX] def iterform(multidict): for key in multidict.keys(): for value in multidict.getlist(key): yield (key.encode("utf8"), value.encode("utf8")) def parse_host_port(h): """Parses strings in the form host[:port]""" host_port = h.split(":", 1) if len(host_port) == 1: return (h, 80) else: host_port[1] = int(host_port[1]) return host_port @proxy.route('/proxy//', methods=["GET", "POST"]) @proxy.route('/proxy//', methods=["GET", "POST"]) def proxy_request(host, file=""): hostname, port = parse_host_port(host) # Whitelist a few headers to pass on request_headers = {} for h in ["Cookie", "Referer", "X-Csrf-Token"]: if h in request.headers: request_headers[h] = request.headers[h] if request.query_string: path = "/%s?%s" % (file, request.query_string) else: path = "/" + file if request.method == "POST": form_data = list(iterform(request.form)) form_data = urllib.urlencode(form_data) request_headers["Content-Length"] = len(form_data) else: form_data = None conn = httplib.HTTPConnection(hostname, port) conn.request(request.method, path, body=form_data, headers=request_headers) resp = conn.getresponse() # Clean up response headers for forwarding response_headers = Headers() for key, value in resp.getheaders(): if key in ["content-length", "connection", "content-type"]: continue if key == "set-cookie": cookies = value.split(",") [response_headers.add(key, c) for c in cookies] else: response_headers.add(key, value) # If this is a redirect, munge the Location URL if "location" in response_headers: redirect = response_headers["location"] parsed = urlparse.urlparse(request.url) redirect_parsed = urlparse.urlparse(redirect) redirect_host = redirect_parsed.netloc if not redirect_host: redirect_host = "%s:%d" % (hostname, port) redirect_path = redirect_parsed.path if redirect_parsed.query: redirect_path += "?" + redirect_parsed.query munged_path = url_for(".proxy_request", host=redirect_host, file=redirect_path[1:]) url = "%s://%s%s" % (parsed.scheme, parsed.netloc, munged_path) response_headers["location"] = url # Rewrite URLs in the content to point to our URL scheme instead. # Ugly, but seems to mostly work. root = url_for(".proxy_request", host=host) contents = resp.read() for regex in REGEXES: contents = regex.sub(r'\1%s' % root, contents) flask_response = Response(response=contents, status=resp.status, headers=response_headers, content_type=resp.getheader('content-type')) return flask_response