xflr6 · November 1, 2025 10:48
diff --git a/read_text.py b/read_text.py
 """Compare different stdlib methods to read an UTF-8 encoded text file."""

 import codecs
 import io
 import os
 import pathlib
 import time

 TEST_FILE = pathlib.Path('read_text.txt')

 ENCODING = 'utf-8'

 LINE = '\u201eFix, Schwyz!\u201c, qu\xe4kt J\xfcrgen bl\xf6d vom Pa\xdf'

 N_LINES = 1_000_000


 def setup_test_file(path: os.PathLike[str] | str = TEST_FILE, /, *,
                    n_lines: int = N_LINES,
                    encoding: str = ENCODING) -> pathlib.Path:
    if not isinstance(path, pathlib.Path):
        path = pathlib.Path(path)
    if not path.exists():
        with path.open('w', encoding=encoding) as f:
            for _ in range(n_lines):
                print(LINE, file=f)

    assert path.exists()
    expected_size = len((LINE + os.linesep).encode(encoding)) * n_lines
    size = path.stat().st_size
    assert size == expected_size, f'{size=} should be {expected_size}'
    return path


 def read_text_open(path: os.PathLike[str] | str, /, *,
                   encoding: str = ENCODING) -> str:
    with open(path,  encoding=encoding) as f:
        return f.read()


 def read_text_pathlib(path: os.PathLike[str] | str, /, *,
                      encoding: str = ENCODING) -> str:
    return pathlib.Path(path).read_text(encoding=encoding)


 def read_text_io(path: os.PathLike[str] | str, /, *,
                 encoding: str = ENCODING) -> str:
    with io.TextIOWrapper(open(path, mode='rb'), encoding) as f:
        result = f.read()
    assert f.closed
    return result


 def read_text_codecs(path: os.PathLike[str] | str, /, *,
                     encoding: str = ENCODING) -> str:
    with codecs.getreader(encoding)(open(path, mode='rb')) as f:
        result = f.read()
    assert f.closed
    return result


 if __name__ == '__main__':
    path = setup_test_file()
    for read_func in [read_text_open,
                      read_text_pathlib,
                      read_text_io,
                      read_text_codecs]:
        print(read_func, end=' ')
        start = time.perf_counter_ns()
        result = read_func(path)
        duration = (time.perf_counter_ns() - start) / 1_000_000_000
        print(duration, 'seconds')

        assert isinstance(result, str)
        # stdlib codecs module lacks universal newline support (PEP 278)
        linesep = os.linesep if read_func is read_text_codecs else '\n'
        expected = len(LINE + linesep) * N_LINES
        assert len(result) == expected, f'{len(result)=} should be {expected}'
	"""Compare different stdlib methods to read an UTF-8 encoded text file."""

	import codecs
	import io
	import os
	import pathlib
	import time

	TEST_FILE = pathlib.Path('read_text.txt')

	ENCODING = 'utf-8'

	LINE = '\u201eFix, Schwyz!\u201c, qu\xe4kt J\xfcrgen bl\xf6d vom Pa\xdf'

	N_LINES = 1_000_000


	def setup_test_file(path: os.PathLike[str] \| str = TEST_FILE, /, *,
	n_lines: int = N_LINES,
	encoding: str = ENCODING) -> pathlib.Path:
	if not isinstance(path, pathlib.Path):
	path = pathlib.Path(path)
	if not path.exists():
	with path.open('w', encoding=encoding) as f:
	for _ in range(n_lines):
	print(LINE, file=f)

	assert path.exists()
	expected_size = len((LINE + os.linesep).encode(encoding)) * n_lines
	size = path.stat().st_size
	assert size == expected_size, f'{size=} should be {expected_size}'
	return path


	def read_text_open(path: os.PathLike[str] \| str, /, *,
	encoding: str = ENCODING) -> str:
	with open(path, encoding=encoding) as f:
	return f.read()


	def read_text_pathlib(path: os.PathLike[str] \| str, /, *,
	encoding: str = ENCODING) -> str:
	return pathlib.Path(path).read_text(encoding=encoding)


	def read_text_io(path: os.PathLike[str] \| str, /, *,
	encoding: str = ENCODING) -> str:
	with io.TextIOWrapper(open(path, mode='rb'), encoding) as f:
	result = f.read()
	assert f.closed
	return result


	def read_text_codecs(path: os.PathLike[str] \| str, /, *,
	encoding: str = ENCODING) -> str:
	with codecs.getreader(encoding)(open(path, mode='rb')) as f:
	result = f.read()
	assert f.closed
	return result


	if __name__ == '__main__':
	path = setup_test_file()
	for read_func in [read_text_open,
	read_text_pathlib,
	read_text_io,
	read_text_codecs]:
	print(read_func, end=' ')
	start = time.perf_counter_ns()
	result = read_func(path)
	duration = (time.perf_counter_ns() - start) / 1_000_000_000
	print(duration, 'seconds')

	assert isinstance(result, str)
	# stdlib codecs module lacks universal newline support (PEP 278)
	linesep = os.linesep if read_func is read_text_codecs else '\n'
	expected = len(LINE + linesep) * N_LINES
	assert len(result) == expected, f'{len(result)=} should be {expected}'
No results found