authentik/passbook/app_gw/proxy/utils.py

"""Utils from django-revproxy, slightly adjusted"""
import logging
import re
from wsgiref.util import is_hop_by_hop

try:
    from http.cookies import SimpleCookie
    COOKIE_PREFIX = ''
except ImportError:
    from Cookie import SimpleCookie
    COOKIE_PREFIX = 'Set-Cookie: '


#: List containing string constant that are used to represent headers that can
#: be ignored in the required_header function
IGNORE_HEADERS = (
    'HTTP_ACCEPT_ENCODING',  # We want content to be uncompressed so
                             # we remove the Accept-Encoding from
                             # original request
    'HTTP_HOST',
    'HTTP_REMOTE_USER',
)


# Default from HTTP RFC 2616
#   See: http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1
#: Variable that represent the default charset used
DEFAULT_CHARSET = 'latin-1'

#: List containing string constants that represents possible html content type
HTML_CONTENT_TYPES = (
    'text/html',
    'application/xhtml+xml'
)

#: Variable used to represent a minimal content size required for response
#: to be turned into stream
MIN_STREAMING_LENGTH = 4 * 1024  # 4KB

#: Regex used to find charset in a html content type
_get_charset_re = re.compile(r';\s*charset=(?P<charset>[^\s;]+)', re.I)


def is_html_content_type(content_type):
    """Function used to verify if the parameter is a proper html content type

    :param content_type: String variable that represent a content-type
    :returns:  A boolean value stating if the content_type is a valid html
               content type
    """
    for html_content_type in HTML_CONTENT_TYPES:
        if content_type.startswith(html_content_type):
            return True

    return False


def should_stream(proxy_response):
    """Function to verify if the proxy_response must be converted into
    a stream.This will be done by checking the proxy_response content-length
    and verify if its length is bigger than one stipulated
    by MIN_STREAMING_LENGTH.

    :param proxy_response: An Instance of urllib3.response.HTTPResponse
    :returns: A boolean stating if the proxy_response should
              be treated as a stream
    """
    content_type = proxy_response.headers.get('Content-Type')

    if is_html_content_type(content_type):
        return False

    try:
        content_length = int(proxy_response.headers.get('Content-Length', 0))
    except ValueError:
        content_length = 0

    if not content_length or content_length > MIN_STREAMING_LENGTH:
        return True

    return False


def get_charset(content_type):
    """Function used to retrieve the charset from a content-type.If there is no
    charset in the content type then the charset defined on DEFAULT_CHARSET
    will be returned

    :param  content_type:   A string containing a Content-Type header
    :returns:               A string containing the charset
    """
    if not content_type:
        return DEFAULT_CHARSET

    matched = _get_charset_re.search(content_type)
    if matched:
        # Extract the charset and strip its double quotes
        return matched.group('charset').replace('"', '')
    return DEFAULT_CHARSET


def required_header(header):
    """Function that verify if the header parameter is a essential header

    :param header:  A string represented a header
    :returns:       A boolean value that represent if the header is required
    """
    if header in IGNORE_HEADERS:
        return False

    if header.startswith('HTTP_') or header == 'CONTENT_TYPE':
        return True

    return False


def set_response_headers(response, response_headers):
    """Set response's header"""
    for header, value in response_headers.items():
        if is_hop_by_hop(header) or header.lower() == 'set-cookie':
            continue

        response[header.title()] = value

    logger.debug('Response headers: %s', getattr(response, '_headers'))


def normalize_request_headers(request):
    """Function used to transform header, replacing 'HTTP\\_' to ''
    and replace '_' to '-'

    :param request:  A HttpRequest that will be transformed
    :returns:        A dictionary with the normalized headers
    """
    norm_headers = {}
    for header, value in request.META.items():
        if required_header(header):
            norm_header = header.replace('HTTP_', '').title().replace('_', '-')
            norm_headers[norm_header] = value

    return norm_headers


def encode_items(items):
    """Function that encode all elements in the list of items passed as
    a parameter

    :param items:  A list of tuple
    :returns:      A list of tuple with all items encoded in 'utf-8'
    """
    encoded = []
    for key, values in items:
        for value in values:
            encoded.append((key.encode('utf-8'), value.encode('utf-8')))
    return encoded


logger = logging.getLogger('revproxy.cookies')


def cookie_from_string(cookie_string, strict_cookies=False):
    """Parser for HTTP header set-cookie
    The return from this function will be used as parameters for
    django's response.set_cookie method. Because set_cookie doesn't
    have parameter comment, this cookie attribute will be ignored.

    :param  cookie_string: A string representing a valid cookie
    :param  strict_cookies: Whether to only accept RFC-compliant cookies
    :returns: A dictionary containing the cookie_string attributes
    """

    if strict_cookies:

        cookies = SimpleCookie(COOKIE_PREFIX + cookie_string)
        if not cookies.keys():
            return None
        cookie_name, = cookies.keys()
        cookie_dict = {k: v for k, v in cookies[cookie_name].items()
                       if v and k != 'comment'}
        cookie_dict['key'] = cookie_name
        cookie_dict['value'] = cookies[cookie_name].value
        return cookie_dict
    valid_attrs = ('path', 'domain', 'comment', 'expires',
                   'max_age', 'httponly', 'secure')

    cookie_dict = {}

    cookie_parts = cookie_string.split(';')
    try:
        cookie_dict['key'], cookie_dict['value'] = \
            cookie_parts[0].split('=', 1)
        cookie_dict['value'] = cookie_dict['value'].replace('"', '')
        # print('aaaaaaaaaaaaaaaaaaaaaaaaaaaa')
        # print(cookie_parts[0].split('=', 1))
    except ValueError:
        logger.warning('Invalid cookie: `%s`', cookie_string)
        return None

    if cookie_dict['value'].startswith('='):
        logger.warning('Invalid cookie: `%s`', cookie_string)
        return None

    for part in cookie_parts[1:]:
        if '=' in part:
            attr, value = part.split('=', 1)
            value = value.strip()
        else:
            attr = part
            value = ''

        attr = attr.strip().lower()
        if not attr:
            continue

        if attr in valid_attrs:
            if attr in ('httponly', 'secure'):
                cookie_dict[attr] = True
            elif attr in 'comment':
                # ignoring comment attr as explained in the
                # function docstring
                continue
            else:
                cookie_dict[attr] = value
        else:
            logger.warning('Unknown cookie attribute %s', attr)

        return cookie_dict
Move code from django-revproxy to app_gw to fix cookie bug 2019-04-10 17:03:22 +00:00			`"""Utils from django-revproxy, slightly adjusted"""`
			`import logging`
			`import re`
			`from wsgiref.util import is_hop_by_hop`

			`try:`
			`from http.cookies import SimpleCookie`
			`COOKIE_PREFIX = ''`
			`except ImportError:`
			`from Cookie import SimpleCookie`
			`COOKIE_PREFIX = 'Set-Cookie: '`


			`#: List containing string constant that are used to represent headers that can`
			`#: be ignored in the required_header function`
			`IGNORE_HEADERS = (`
			`'HTTP_ACCEPT_ENCODING', # We want content to be uncompressed so`
			`# we remove the Accept-Encoding from`
			`# original request`
			`'HTTP_HOST',`
			`'HTTP_REMOTE_USER',`
			`)`


			`# Default from HTTP RFC 2616`
			`# See: http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.7.1`
			`#: Variable that represent the default charset used`
			`DEFAULT_CHARSET = 'latin-1'`

			`#: List containing string constants that represents possible html content type`
			`HTML_CONTENT_TYPES = (`
			`'text/html',`
			`'application/xhtml+xml'`
			`)`

			`#: Variable used to represent a minimal content size required for response`
			`#: to be turned into stream`
			`MIN_STREAMING_LENGTH = 4 * 1024 # 4KB`

			`#: Regex used to find charset in a html content type`
			`_get_charset_re = re.compile(r';\s*charset=(?P<charset>[^\s;]+)', re.I)`


			`def is_html_content_type(content_type):`
			`"""Function used to verify if the parameter is a proper html content type`

			`:param content_type: String variable that represent a content-type`
			`:returns: A boolean value stating if the content_type is a valid html`
			`content type`
			`"""`
			`for html_content_type in HTML_CONTENT_TYPES:`
			`if content_type.startswith(html_content_type):`
			`return True`

			`return False`


			`def should_stream(proxy_response):`
			`"""Function to verify if the proxy_response must be converted into`
			`a stream.This will be done by checking the proxy_response content-length`
			`and verify if its length is bigger than one stipulated`
			`by MIN_STREAMING_LENGTH.`

			`:param proxy_response: An Instance of urllib3.response.HTTPResponse`
			`:returns: A boolean stating if the proxy_response should`
			`be treated as a stream`
			`"""`
			`content_type = proxy_response.headers.get('Content-Type')`

			`if is_html_content_type(content_type):`
			`return False`

			`try:`
			`content_length = int(proxy_response.headers.get('Content-Length', 0))`
			`except ValueError:`
			`content_length = 0`

			`if not content_length or content_length > MIN_STREAMING_LENGTH:`
			`return True`

			`return False`


			`def get_charset(content_type):`
			`"""Function used to retrieve the charset from a content-type.If there is no`
			`charset in the content type then the charset defined on DEFAULT_CHARSET`
			`will be returned`

			`:param content_type: A string containing a Content-Type header`
			`:returns: A string containing the charset`
			`"""`
			`if not content_type:`
			`return DEFAULT_CHARSET`

			`matched = _get_charset_re.search(content_type)`
			`if matched:`
			`# Extract the charset and strip its double quotes`
			`return matched.group('charset').replace('"', '')`
			`return DEFAULT_CHARSET`


			`def required_header(header):`
			`"""Function that verify if the header parameter is a essential header`

			`:param header: A string represented a header`
			`:returns: A boolean value that represent if the header is required`
			`"""`
			`if header in IGNORE_HEADERS:`
			`return False`

			`if header.startswith('HTTP_') or header == 'CONTENT_TYPE':`
			`return True`

			`return False`


			`def set_response_headers(response, response_headers):`
			`"""Set response's header"""`
			`for header, value in response_headers.items():`
			`if is_hop_by_hop(header) or header.lower() == 'set-cookie':`
			`continue`

			`response[header.title()] = value`

			`logger.debug('Response headers: %s', getattr(response, '_headers'))`


			`def normalize_request_headers(request):`
			`"""Function used to transform header, replacing 'HTTP\\_' to ''`
			`and replace '_' to '-'`

			`:param request: A HttpRequest that will be transformed`
			`:returns: A dictionary with the normalized headers`
			`"""`
			`norm_headers = {}`
			`for header, value in request.META.items():`
			`if required_header(header):`
			`norm_header = header.replace('HTTP_', '').title().replace('_', '-')`
			`norm_headers[norm_header] = value`

			`return norm_headers`


			`def encode_items(items):`
			`"""Function that encode all elements in the list of items passed as`
			`a parameter`

			`:param items: A list of tuple`
			`:returns: A list of tuple with all items encoded in 'utf-8'`
			`"""`
			`encoded = []`
			`for key, values in items:`
			`for value in values:`
			`encoded.append((key.encode('utf-8'), value.encode('utf-8')))`
			`return encoded`


			`logger = logging.getLogger('revproxy.cookies')`


			`def cookie_from_string(cookie_string, strict_cookies=False):`
			`"""Parser for HTTP header set-cookie`
			`The return from this function will be used as parameters for`
			`django's response.set_cookie method. Because set_cookie doesn't`
			`have parameter comment, this cookie attribute will be ignored.`

			`:param cookie_string: A string representing a valid cookie`
			`:param strict_cookies: Whether to only accept RFC-compliant cookies`
			`:returns: A dictionary containing the cookie_string attributes`
			`"""`

			`if strict_cookies:`

			`cookies = SimpleCookie(COOKIE_PREFIX + cookie_string)`
			`if not cookies.keys():`
			`return None`
			`cookie_name, = cookies.keys()`
			`cookie_dict = {k: v for k, v in cookies[cookie_name].items()`
			`if v and k != 'comment'}`
			`cookie_dict['key'] = cookie_name`
			`cookie_dict['value'] = cookies[cookie_name].value`
			`return cookie_dict`
			`valid_attrs = ('path', 'domain', 'comment', 'expires',`
			`'max_age', 'httponly', 'secure')`

			`cookie_dict = {}`

			`cookie_parts = cookie_string.split(';')`
			`try:`
			`cookie_dict['key'], cookie_dict['value'] = \`
			`cookie_parts[0].split('=', 1)`
			`cookie_dict['value'] = cookie_dict['value'].replace('"', '')`
			`# print('aaaaaaaaaaaaaaaaaaaaaaaaaaaa')`
			`# print(cookie_parts[0].split('=', 1))`
			`except ValueError:`
			logger.warning('Invalid cookie: `%s`', cookie_string)
			`return None`

			`if cookie_dict['value'].startswith('='):`
			logger.warning('Invalid cookie: `%s`', cookie_string)
			`return None`

			`for part in cookie_parts[1:]:`
			`if '=' in part:`
			`attr, value = part.split('=', 1)`
			`value = value.strip()`
			`else:`
			`attr = part`
			`value = ''`

			`attr = attr.strip().lower()`
			`if not attr:`
			`continue`

			`if attr in valid_attrs:`
			`if attr in ('httponly', 'secure'):`
			`cookie_dict[attr] = True`
			`elif attr in 'comment':`
			`# ignoring comment attr as explained in the`
			`# function docstring`
			`continue`
			`else:`
			`cookie_dict[attr] = value`
			`else:`
			`logger.warning('Unknown cookie attribute %s', attr)`

			`return cookie_dict`