pavan538 · May 27, 2019 11:10
diff --git a/.gitignore b/.gitignore
 *.pyc

 /bin
 /include
 /lib
diff --git a/README.rst b/README.rst
diff --git a/streamingjson.py b/streamingjson.py
 from django.utils import simplejson
 from django.utils.simplejson.encoder import (
    encode_basestring, encode_basestring_ascii, FLOAT_REPR, INFINITY)
 from types import GeneratorType


 class JSONEncoder(simplejson.JSONEncoder):
    def iterencode(self, o, _one_shot=False):
        """Encode the given object and yield each string
        representation as available.

        For example::

            for chunk in JSONEncoder().iterencode(bigobject):
                mysocket.write(chunk)

        This method is a verbatim copy of
        :meth:`django.utils.simplejson.encoder.JSONEncoder.iterencode`.  It is
        needed because we need to call our patched
        :func:`streamingjson._make_iterencode`.
        """
        if self.check_circular:
            markers = {}
        else:
            markers = None
        if self.ensure_ascii:
            _encoder = encode_basestring_ascii
        else:
            _encoder = encode_basestring
        if self.encoding != 'utf-8':
            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
                if isinstance(o, str):
                    o = o.decode(_encoding)
                return _orig_encoder(o)

        def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
            # Check for specials.  Note that this type of test is processor- and/or
            # platform-specific, so do tests which don't depend on the internals.

            if o != o:
                text = 'NaN'
            elif o == _inf:
                text = 'Infinity'
            elif o == _neginf:
                text = '-Infinity'
            else:
                return _repr(o)

            if not allow_nan:
                raise ValueError("Out of range float values are not JSON compliant: %r"
                    % (o,))

            return text

        _iterencode = _make_iterencode(
            markers, self.default, _encoder, self.indent, floatstr,
            self.key_separator, self.item_separator, self.sort_keys,
            self.skipkeys, _one_shot)
        return _iterencode(o, 0)


 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
        ## HACK: hand-optimized bytecode; turn globals into locals
        False=False,
        True=True,
        ValueError=ValueError,
        basestring=basestring,
        dict=dict,
        float=float,
        GeneratorType=GeneratorType,
        id=id,
        int=int,
        isinstance=isinstance,
        list=list,
        long=long,
        str=str,
        tuple=tuple,
    ):
    """
    This is a patched version of
    :func:`django.utils.simplejson.encoder.iterencode`.  Whenever it encounters
    a generator in the data structure, it encodes it as a JSON list.
    """
    def _iterencode_list(lst, _current_indent_level):
        if not lst:
            yield '[]'
            return
        if markers is not None:
            markerid = id(lst)
            if markerid in markers:
                raise ValueError("Circular reference detected")
            markers[markerid] = lst
        buf = '['
        if _indent is not None:
            _current_indent_level += 1
            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
            separator = _item_separator + newline_indent
            buf += newline_indent
        else:
            newline_indent = None
            separator = _item_separator
        first = True
        for value in lst:
            if first:
                first = False
            else:
                buf = separator
            if isinstance(value, basestring):
                yield buf + _encoder(value)
            elif value is None:
                yield buf + 'null'
            elif value is True:
                yield buf + 'true'
            elif value is False:
                yield buf + 'false'
            elif isinstance(value, (int, long)):
                yield buf + str(value)
            elif isinstance(value, float):
                yield buf + _floatstr(value)
            else:
                yield buf
                if isinstance(value, (list, tuple, GeneratorType)):
                    chunks = _iterencode_list(value, _current_indent_level)
                elif isinstance(value, dict):
                    chunks = _iterencode_dict(value, _current_indent_level)
                else:
                    chunks = _iterencode(value, _current_indent_level)
                for chunk in chunks:
                    yield chunk
        if newline_indent is not None:
            _current_indent_level -= 1
            yield '\n' + (' ' * (_indent * _current_indent_level))
        yield ']'
        if markers is not None:
            del markers[markerid]

    def _iterencode_dict(dct, _current_indent_level):
        if not dct:
            yield '{}'
            return
        if markers is not None:
            markerid = id(dct)
            if markerid in markers:
                raise ValueError("Circular reference detected")
            markers[markerid] = dct
        yield '{'
        if _indent is not None:
            _current_indent_level += 1
            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
            item_separator = _item_separator + newline_indent
            yield newline_indent
        else:
            newline_indent = None
            item_separator = _item_separator
        first = True
        if _sort_keys:
            items = dct.items()
            items.sort(key=lambda kv: kv[0])
        else:
            items = dct.iteritems()
        for key, value in items:
            if isinstance(key, basestring):
                pass
            # JavaScript is weakly typed for these, so it makes sense to
            # also allow them.  Many encoders seem to do something like this.
            elif isinstance(key, float):
                key = _floatstr(key)
            elif isinstance(key, (int, long)):
                key = str(key)
            elif key is True:
                key = 'true'
            elif key is False:
                key = 'false'
            elif key is None:
                key = 'null'
            elif _skipkeys:
                continue
            else:
                raise TypeError("key %r is not a string" % (key,))
            if first:
                first = False
            else:
                yield item_separator
            yield _encoder(key)
            yield _key_separator
            if isinstance(value, basestring):
                yield _encoder(value)
            elif value is None:
                yield 'null'
            elif value is True:
                yield 'true'
            elif value is False:
                yield 'false'
            elif isinstance(value, (int, long)):
                yield str(value)
            elif isinstance(value, float):
                yield _floatstr(value)
            else:
                if isinstance(value, (list, tuple, GeneratorType)):
                    chunks = _iterencode_list(value, _current_indent_level)
                elif isinstance(value, dict):
                    chunks = _iterencode_dict(value, _current_indent_level)
                else:
                    chunks = _iterencode(value, _current_indent_level)
                for chunk in chunks:
                    yield chunk
        if newline_indent is not None:
            _current_indent_level -= 1
            yield '\n' + (' ' * (_indent * _current_indent_level))
        yield '}'
        if markers is not None:
            del markers[markerid]

    def _iterencode(o, _current_indent_level):
        if isinstance(o, basestring):
            yield _encoder(o)
        elif o is None:
            yield 'null'
        elif o is True:
            yield 'true'
        elif o is False:
            yield 'false'
        elif isinstance(o, (int, long)):
            yield str(o)
        elif isinstance(o, float):
            yield _floatstr(o)
        elif isinstance(o, (list, tuple, GeneratorType)):
            for chunk in _iterencode_list(o, _current_indent_level):
                yield chunk
        elif isinstance(o, dict):
            for chunk in _iterencode_dict(o, _current_indent_level):
                yield chunk
        else:
            if markers is not None:
                markerid = id(o)
                if markerid in markers:
                    raise ValueError("Circular reference detected")
                markers[markerid] = o
            o = _default(o)
            for chunk in _iterencode(o, _current_indent_level):
                yield chunk
            if markers is not None:
                del markers[markerid]

    return _iterencode
diff --git a/tests.py b/tests.py
 from streamingjson import JSONEncoder


 data = {'ints': (x for x in xrange(100000))}
 for s in JSONEncoder().iterencode(data):
    print s
	from django.utils import simplejson
	from django.utils.simplejson.encoder import (
	encode_basestring, encode_basestring_ascii, FLOAT_REPR, INFINITY)
	from types import GeneratorType


	class JSONEncoder(simplejson.JSONEncoder):
	def iterencode(self, o, _one_shot=False):
	"""Encode the given object and yield each string
	representation as available.

	For example::

	for chunk in JSONEncoder().iterencode(bigobject):
	mysocket.write(chunk)

	This method is a verbatim copy of
	:meth:`django.utils.simplejson.encoder.JSONEncoder.iterencode`. It is
	needed because we need to call our patched
	:func:`streamingjson._make_iterencode`.
	"""
	if self.check_circular:
	markers = {}
	else:
	markers = None
	if self.ensure_ascii:
	_encoder = encode_basestring_ascii
	else:
	_encoder = encode_basestring
	if self.encoding != 'utf-8':
	def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
	if isinstance(o, str):
	o = o.decode(_encoding)
	return _orig_encoder(o)

	def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
	# Check for specials. Note that this type of test is processor- and/or
	# platform-specific, so do tests which don't depend on the internals.

	if o != o:
	text = 'NaN'
	elif o == _inf:
	text = 'Infinity'
	elif o == _neginf:
	text = '-Infinity'
	else:
	return _repr(o)

	if not allow_nan:
	raise ValueError("Out of range float values are not JSON compliant: %r"
	% (o,))

	return text

	_iterencode = _make_iterencode(
	markers, self.default, _encoder, self.indent, floatstr,
	self.key_separator, self.item_separator, self.sort_keys,
	self.skipkeys, _one_shot)
	return _iterencode(o, 0)


	def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
	## HACK: hand-optimized bytecode; turn globals into locals
	False=False,
	True=True,
	ValueError=ValueError,
	basestring=basestring,
	dict=dict,
	float=float,
	GeneratorType=GeneratorType,
	id=id,
	int=int,
	isinstance=isinstance,
	list=list,
	long=long,
	str=str,
	tuple=tuple,
	):
	"""
	This is a patched version of
	:func:`django.utils.simplejson.encoder.iterencode`. Whenever it encounters
	a generator in the data structure, it encodes it as a JSON list.
	"""
	def _iterencode_list(lst, _current_indent_level):
	if not lst:
	yield '[]'
	return
	if markers is not None:
	markerid = id(lst)
	if markerid in markers:
	raise ValueError("Circular reference detected")
	markers[markerid] = lst
	buf = '['
	if _indent is not None:
	_current_indent_level += 1
	newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
	separator = _item_separator + newline_indent
	buf += newline_indent
	else:
	newline_indent = None
	separator = _item_separator
	first = True
	for value in lst:
	if first:
	first = False
	else:
	buf = separator
	if isinstance(value, basestring):
	yield buf + _encoder(value)
	elif value is None:
	yield buf + 'null'
	elif value is True:
	yield buf + 'true'
	elif value is False:
	yield buf + 'false'
	elif isinstance(value, (int, long)):
	yield buf + str(value)
	elif isinstance(value, float):
	yield buf + _floatstr(value)
	else:
	yield buf
	if isinstance(value, (list, tuple, GeneratorType)):
	chunks = _iterencode_list(value, _current_indent_level)
	elif isinstance(value, dict):
	chunks = _iterencode_dict(value, _current_indent_level)
	else:
	chunks = _iterencode(value, _current_indent_level)
	for chunk in chunks:
	yield chunk
	if newline_indent is not None:
	_current_indent_level -= 1
	yield '\n' + (' ' * (_indent * _current_indent_level))
	yield ']'
	if markers is not None:
	del markers[markerid]

	def _iterencode_dict(dct, _current_indent_level):
	if not dct:
	yield '{}'
	return
	if markers is not None:
	markerid = id(dct)
	if markerid in markers:
	raise ValueError("Circular reference detected")
	markers[markerid] = dct
	yield '{'
	if _indent is not None:
	_current_indent_level += 1
	newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
	item_separator = _item_separator + newline_indent
	yield newline_indent
	else:
	newline_indent = None
	item_separator = _item_separator
	first = True
	if _sort_keys:
	items = dct.items()
	items.sort(key=lambda kv: kv[0])
	else:
	items = dct.iteritems()
	for key, value in items:
	if isinstance(key, basestring):
	pass
	# JavaScript is weakly typed for these, so it makes sense to
	# also allow them. Many encoders seem to do something like this.
	elif isinstance(key, float):
	key = _floatstr(key)
	elif isinstance(key, (int, long)):
	key = str(key)
	elif key is True:
	key = 'true'
	elif key is False:
	key = 'false'
	elif key is None:
	key = 'null'
	elif _skipkeys:
	continue
	else:
	raise TypeError("key %r is not a string" % (key,))
	if first:
	first = False
	else:
	yield item_separator
	yield _encoder(key)
	yield _key_separator
	if isinstance(value, basestring):
	yield _encoder(value)
	elif value is None:
	yield 'null'
	elif value is True:
	yield 'true'
	elif value is False:
	yield 'false'
	elif isinstance(value, (int, long)):
	yield str(value)
	elif isinstance(value, float):
	yield _floatstr(value)
	else:
	if isinstance(value, (list, tuple, GeneratorType)):
	chunks = _iterencode_list(value, _current_indent_level)
	elif isinstance(value, dict):
	chunks = _iterencode_dict(value, _current_indent_level)
	else:
	chunks = _iterencode(value, _current_indent_level)
	for chunk in chunks:
	yield chunk
	if newline_indent is not None:
	_current_indent_level -= 1
	yield '\n' + (' ' * (_indent * _current_indent_level))
	yield '}'
	if markers is not None:
	del markers[markerid]

	def _iterencode(o, _current_indent_level):
	if isinstance(o, basestring):
	yield _encoder(o)
	elif o is None:
	yield 'null'
	elif o is True:
	yield 'true'
	elif o is False:
	yield 'false'
	elif isinstance(o, (int, long)):
	yield str(o)
	elif isinstance(o, float):
	yield _floatstr(o)
	elif isinstance(o, (list, tuple, GeneratorType)):
	for chunk in _iterencode_list(o, _current_indent_level):
	yield chunk
	elif isinstance(o, dict):
	for chunk in _iterencode_dict(o, _current_indent_level):
	yield chunk
	else:
	if markers is not None:
	markerid = id(o)
	if markerid in markers:
	raise ValueError("Circular reference detected")
	markers[markerid] = o
	o = _default(o)
	for chunk in _iterencode(o, _current_indent_level):
	yield chunk
	if markers is not None:
	del markers[markerid]

	return _iterencode
	from streamingjson import JSONEncoder


	data = {'ints': (x for x in xrange(100000))}
	for s in JSONEncoder().iterencode(data):
	print s