~sschwarzer/ftputil

927259c707a43adfcf4bf45b9e5939b3d83db40d — Stefan Schwarzer 12 years ago ade5a94
Fail as early as possible if a method gets a non-ASCII unicode path.

See ticket #53 at http://ftputil.sschwarzer.net/trac/ticket/53 and
the long comment in `test_real_ftp.TestUnicodePaths` for the
motivation for this change.
2 files changed, 142 insertions(+), 2 deletions(-)

M ftputil.py
M test/test_real_ftp.py
M ftputil.py => ftputil.py +41 -0
@@ 200,6 200,8 @@ class FTPHost(object):
        This method tries to reuse a child but will generate a new one
        if none is available.
        """
        # Fail early if we get a unicode path which can't be encoded.
        path = str(path)
        host = self._available_child()
        if host is None:
            host = self._copy()


@@ 460,6 462,11 @@ class FTPHost(object):
        target (name). The argument `mode` is an empty string or 'a' for
        text copies, or 'b' for binary copies.
        """
        # Fail early if we get a unicode path which can't be encoded.
        # Only attempt to convert the remote `target` name to a
        # bytestring. We leave it to the local filesystem whether it
        # wants to support unicode filenames or not.
        target = str(target)
        source_file, target_file = self._upload_files(source, target, mode)
        file_transfer.copy_file(source_file, target_file,
                                conditional=False, callback=callback)


@@ 473,6 480,8 @@ class FTPHost(object):
        If an upload was necessary, return `True`, else return
        `False`.
        """
        # See comment in `upload`.
        target = str(target)
        source_file, target_file = self._upload_files(source, target, mode)
        return file_transfer.copy_file(source_file, target_file,
                                       conditional=True, callback=callback)


@@ 493,6 502,11 @@ class FTPHost(object):
        target (name). The argument mode is an empty string or 'a' for
        text copies, or 'b' for binary copies.
        """
        # Fail early if we get a unicode path which can't be encoded.
        # Only attempt to convert the remote `source` name to a
        # bytestring. We leave it to the local filesystem whether it
        # wants to support unicode filenames or not.
        source = str(source)
        source_file, target_file = self._download_files(source, target, mode)
        file_transfer.copy_file(source_file, target_file,
                                conditional=False, callback=callback)


@@ 506,6 520,8 @@ class FTPHost(object):
        If a download was necessary, return `True`, else return
        `False`.
        """
        # See comment in `download`.
        source = str(source)
        source_file, target_file = self._download_files(source, target, mode)
        return file_transfer.copy_file(source_file, target_file,
                                       conditional=True, callback=callback)


@@ 574,6 590,8 @@ class FTPHost(object):

    def chdir(self, path):
        """Change the directory on the host."""
        # Fail early if we get a unicode path which can't be encoded.
        path = str(path)
        ftp_error._try_with_oserror(self._session.cwd, path)
        # The path given as the argument is relative to the old current
        #  directory, therefore join them.


@@ 586,6 604,8 @@ class FTPHost(object):
        `mode` is ignored and only "supported" for similarity with
        `os.mkdir`.
        """
        # Fail early if we get a unicode path which can't be encoded.
        path = str(path)
        # Ignore unused argument `mode`
        # pylint: disable=W0613
        def command(self, path):


@@ 600,6 620,8 @@ class FTPHost(object):
        of `mode` is only accepted for compatibility with
        `os.makedirs` but otherwise ignored.
        """
        # Fail early if we get a unicode path which can't be encoded.
        path = str(path)
        # Ignore unused argument `mode`
        # pylint: disable=W0613
        path = self.path.abspath(path)


@@ 629,6 651,8 @@ class FTPHost(object):
        empty directories as well, - if the server allowed it. This
        is no longer supported.
        """
        # Fail early if we get a unicode path which can't be encoded.
        path = str(path)
        path = self.path.abspath(path)
        if self.listdir(path):
            raise ftp_error.PermanentError("directory '%s' not empty" % path)


@@ 641,6 665,8 @@ class FTPHost(object):

    def remove(self, path):
        """Remove the given file or link."""
        # Fail early if we get a unicode path which can't be encoded.
        path = str(path)
        path = self.path.abspath(path)
        # Though `isfile` includes also links to files, `islink`
        #  is needed to include links to directories.


@@ 690,6 716,8 @@ class FTPHost(object):
        Implementation note: The code is copied from `shutil.rmtree`
        in Python 2.4 and adapted to ftputil.
        """
        # Fail early if we get a unicode path which can't be encoded.
        path = str(path)
        # The following code is an adapted version of Python 2.4's
        #  `shutil.rmtree` function.
        if ignore_errors:


@@ 731,6 759,9 @@ class FTPHost(object):

    def rename(self, source, target):
        """Rename the source on the FTP host to target."""
        # Fail early if we get a unicode path which can't be encoded.
        source = str(source)
        target = str(target)
        # The following code is in spirit similar to the code in the
        #  method `_robust_ftp_command`, though we do _not_ do
        #  _everything_ imaginable.


@@ 783,6 814,8 @@ class FTPHost(object):
        If the directory listing from the server can't be parsed with
        any of the available parsers raise a `ParserError`.
        """
        # Fail early if we get a unicode path which can't be encoded.
        path = str(path)
        return self._stat.listdir(path)

    def lstat(self, path, _exception_for_missing_path=True):


@@ 797,6 830,8 @@ class FTPHost(object):
        (`_exception_for_missing_path` is an implementation aid and
        _not_ intended for use by ftputil clients.)
        """
        # Fail early if we get a unicode path which can't be encoded.
        path = str(path)
        return self._stat.lstat(path, _exception_for_missing_path)

    def stat(self, path, _exception_for_missing_path=True):


@@ 812,6 847,8 @@ class FTPHost(object):
        (`_exception_for_missing_path` is an implementation aid and
        _not_ intended for use by ftputil clients.)
        """
        # Fail early if we get a unicode path which can't be encoded.
        path = str(path)
        return self._stat.stat(path, _exception_for_missing_path)

    def walk(self, top, topdown=True, onerror=None):


@@ 820,6 857,8 @@ class FTPHost(object):
        dirnames, filenames) on each iteration, like the `os.walk`
        function (see http://docs.python.org/lib/os-file-dir.html ).
        """
        # Fail early if we get a unicode path which can't be encoded.
        top = str(top)
        # The following code is copied from `os.walk` in Python 2.4
        #  and adapted to ftputil.
        try:


@@ 855,6 894,8 @@ class FTPHost(object):
        the server. In particular, a non-existent path usually
        causes a `PermanentError`.
        """
        # Fail early if we get a unicode path which can't be encoded.
        path = str(path)
        path = self.path.abspath(path)
        def command(self, path):
            """Callback function."""

M test/test_real_ftp.py => test/test_real_ftp.py +101 -2
@@ 1,3 1,5 @@
# encoding: UTF-8

# Copyright (C) 2003-2010, Stefan Schwarzer <sschwarzer@sschwarzer.net>
# See the file LICENSE for licensing terms.



@@ 638,6 640,104 @@ class TestChmod(RealFTPTest):
        self.assert_mode(file_name, 0646)


class TestUnicodePaths(RealFTPTest):
    """Test if using unicode paths fails if they contain non-ASCII
    characters (see ticket #53).
    """

    # Actually, all of these methods will raise a `UnicodeEncodeError`
    #  at some point, at the latest when a unicode string is tried to
    #  be sent over a socket. However, it can be rather confusing to
    #  get an encoding error from deep inside of ftputil or even
    #  modules used by it (see ticket #53). Therefore, I added tests
    #  to fail as early as possible if a path is a unicode path that
    #  can't be converted to ASCII. Moreover, the code won't try to
    #  use unicode strings which come into existence intermediately.
 
    def assert_non_unicode(self, s):
        self.assertFalse(isinstance(s, unicode))

    def assert_unicode_error(self, function, *args):
        self.assertRaises(UnicodeEncodeError, function, *args)

    def test_open(self):
        host = self.host
        # Check if the name attribute is a bytestring, no matter if we
        #  passed in a bytestring or not beforehand.
        fobj = host.file("CONTENTS")
        try:
            self.assert_non_unicode(fobj.name)
        finally:
            fobj.close()
        fobj = host.file(u"CONTENTS")
        try:
            self.assert_non_unicode(fobj.name)
        finally:
            fobj.close()
        # Check if non-encodable unicode strings are refused.
        self.assert_unicode_error(host.file, u"ä")

    def test_upload(self):
        self.assert_unicode_error(self.host.upload, "ftputil.py", u"ä")

    def test_upload_if_newer(self):
        self.assert_unicode_error(self.host.upload_if_newer,
                                  "ftputil.py", u"ä")

    def test_download(self):
        self.assert_unicode_error(self.host.download, u"ä", "ok")

    def test_download_if_newer(self):
        self.assert_unicode_error(self.host.download_if_newer, u"ä", "ok")

    def test_chdir(self):
        # Unicode strings are ok if they can be encoded to ASCII.
        host = self.host
        host.chdir(".")
        self.assert_non_unicode(host.getcwd())
        host.chdir(u".")
        self.assert_non_unicode(host.getcwd())
        # Fail early if string can't be encoded to ASCII.
        self.assert_unicode_error(host.chdir, u"ä")

    def test_mkdir(self):
        self.assert_unicode_error(self.host.mkdir, u"ä")

    def test_makedirs(self):
        self.assert_unicode_error(self.host.makedirs, u"b/ä")

    def test_rmdir(self):
        self.assert_unicode_error(self.host.rmdir, u"ä")

    def test_remove(self):
        self.assert_unicode_error(self.host.remove, u"ä")

    def test_rmtree(self):
        self.assert_unicode_error(self.host.rmtree, u"ä")

    def test_rename(self):
        self.assert_unicode_error(self.host.rename, u"ä", "b")
        self.assert_unicode_error(self.host.rename, "b", u"ä")

    def test_listdir(self):
        self.assert_unicode_error(self.host.listdir, u"ä")

    def test_lstat(self):
        self.assert_unicode_error(self.host.lstat, u"ä")

    def test_stat(self):
        self.assert_unicode_error(self.host.stat, u"ä")

    def test_walk(self):
        generator = self.host.walk(u"ä")
        # The string test is only executed when the first item is
        #  requested from the generator.
        self.assert_unicode_error(generator.next)

    def test_chmod(self):
        self.assert_unicode_error(self.host.chmod, u"ä", 0644)


class TestOther(RealFTPTest):

    def test_open_for_reading(self):


@@ 700,6 800,5 @@ minutes because it has to wait to test the timezone calculation.
    server, user, password = get_login_data()
    unittest.main()
    import __main__
    #unittest.main(__main__,
    #              "TestFTPFiles.test_no_timed_out_children")
    #unittest.main(__main__, "TestUnicodePaths")