Source code for pycounter.csvhelper

"""Read CSV as unicode from both python 2 and 3 transparently."""
from __future__ import absolute_import

import csv
import warnings

import six


# noinspection PyUnusedLocal
[docs]class UnicodeReader(six.Iterator):
    """CSV reader that can handle unicode.

    Must be used as a context manager:

    with UnicodeReader('myfile.csv') as reader:
        pass # do things with reader

    :param filename: path to file to open
    :param dialect: a csv.Dialect instance or dialect name
    :param encoding: text encoding of file
    :param fallback_encoding: encoding to fall back to if default
             encoding fails; gives warning if it's used.

    All other parameters will be passed through to csv.reader()
    """

    def __init__(
        self,
        filename,
        dialect=csv.excel,
        encoding="utf-8",
        fallback_encoding="latin-1",
        **kwargs
    ):
        self.filename = filename
        self.dialect = dialect
        self.encoding = encoding
        self.kwargs = kwargs
        self.fileobj = None
        self.reader = None
        self.fallback_encoding = fallback_encoding

    def __enter__(self):
        if six.PY3:
            self.fileobj = open(self.filename, "rt", encoding=self.encoding, newline="")
            try:
                self.fileobj.read()
            except UnicodeDecodeError:
                warnings.warn(
                    "Decoding with '%s' codec failed; falling "
                    "back to '%s'" % (self.encoding, self.fallback_encoding)
                )
                self.fileobj = open(
                    self.filename, "rt", encoding=self.fallback_encoding, newline=""
                )
                self.encoding = self.fallback_encoding
            finally:
                self.fileobj.seek(0)
        else:
            self.fileobj = open(self.filename, "rb")
            try:
                self.fileobj.read().decode(self.encoding)
            except UnicodeDecodeError:
                warnings.warn(
                    "Decoding with '%s' codec failed; falling "
                    "back to '%s'" % (self.encoding, self.fallback_encoding)
                )
                self.encoding = self.fallback_encoding
            finally:
                self.fileobj.seek(0)
        self.reader = csv.reader(self.fileobj, dialect=self.dialect, **self.kwargs)
        return self

    def __exit__(self, type_, value, traceback):
        self.fileobj.close()

    def __next__(self):
        row = next(self.reader)
        if six.PY3:
            return row
        return [s.decode(self.encoding) for s in row]

    def __iter__(self):
        return self


# noinspection PyUnusedLocal
[docs]class UnicodeWriter(object):
    """CSV writer that can handle unicode.

    Must be used as a context manager:

    with UnicodeWriter('myfile.csv') as writer:
        pass # do things with writer

    :param filename: path to file to open
    :param dialect: a csv.Dialect instance or dialect name
    :param encoding: text encoding of file

    All other parameters will be passed through to csv.writer()
    """

    def __init__(
        self,
        filename,
        dialect=csv.excel,
        encoding="utf-8",
        lineterminator="\n",
        **kwargs
    ):
        self.filename = filename
        self.dialect = dialect
        self.encoding = encoding
        self.lineterminator = lineterminator
        self.kwargs = kwargs
        self.writer = None
        self.fileobj = None

    def __enter__(self):
        if six.PY3:
            self.fileobj = open(self.filename, "wt", encoding=self.encoding, newline="")
        else:
            self.fileobj = open(self.filename, "wb")
        self.writer = csv.writer(
            self.fileobj,
            dialect=self.dialect,
            lineterminator=self.lineterminator,
            **self.kwargs
        )
        return self

    def __exit__(self, type_, value, traceback):
        self.fileobj.close()

[docs]    def writerow(self, row):
        """Write a row to the output.

        :param row: list of cells to write to the file
        """
        if not six.PY3:
            row = [(s or "").encode(self.encoding) for s in row]
        self.writer.writerow(row)

[docs]    def writerows(self, rows):
        """Write many rows to the output.

        :param rows: list of lists of cells to write
        """
        for row in rows:
            self.writerow(row)