Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
1a72c01
have shutil.copyfileobj use sendfile() if possible
giampaolo May 22, 2018
77c4bfa
refactoring: use ctx manager
giampaolo May 22, 2018
2afa04a
add test with non-regular file obj
giampaolo May 22, 2018
542cd17
emulate case where file size can't be determined
giampaolo May 22, 2018
3520c6c
reference _copyfileobj_sendfile directly
giampaolo May 22, 2018
050a722
add test for offset() at certain position
giampaolo May 22, 2018
c1fd38a
add test for empty file
giampaolo May 22, 2018
2ab6317
add test for non regular file dst
giampaolo May 22, 2018
dacc3b6
small refactoring
giampaolo May 22, 2018
29d5881
leave copyfileobj() alone in order to not introduce any incompatibility
giampaolo May 24, 2018
114c4de
minor refactoring
giampaolo May 24, 2018
501c0dd
remove old test
giampaolo May 24, 2018
41b4506
update docstring
giampaolo May 24, 2018
fdb0973
update docstring; rename exception class
giampaolo May 24, 2018
64d2bc5
detect platforms which only support file to socket zero copy
giampaolo May 24, 2018
3a3c8ef
don't run test on platforms where file-to-file zero copy is not suppo…
giampaolo May 24, 2018
7861737
use tempfiles
giampaolo May 24, 2018
f3eecfd
reset verbosity
giampaolo May 24, 2018
f67ce57
add test for smaller chunks
giampaolo May 24, 2018
d457254
add big file size test
giampaolo May 24, 2018
8eb211d
add comment
giampaolo May 24, 2018
a0fe703
update doc
giampaolo May 24, 2018
7296147
update whatsnew doc
giampaolo May 24, 2018
d0c3bba
update doc
giampaolo May 24, 2018
2cafd80
catch Exception
giampaolo May 24, 2018
bb2a75f
remove unused import
giampaolo May 24, 2018
e5025dc
add test case for error on second sendfile() call
giampaolo May 24, 2018
a36a534
turn docstring into comment
giampaolo May 24, 2018
e9da3fa
add one more test
giampaolo May 24, 2018
9fcc2e7
update comment
giampaolo May 24, 2018
4f32242
add Misc/NEWS entry
giampaolo May 24, 2018
24ad25a
get rid of COPY_BUFSIZE; it belongs to another PR
giampaolo May 25, 2018
24d20e6
update doc
giampaolo May 25, 2018
8380b9b
set min bufsize to either 8MB or 128MB
giampaolo May 25, 2018
081f907
fix test
giampaolo May 25, 2018
5c242f3
make sendfile() raise immediately if filesystem is full on first call
giampaolo May 26, 2018
470dba8
use sendfile() only on Linux
giampaolo May 26, 2018
cbc79e1
raise err from None
giampaolo May 26, 2018
5ac745a
remove 'total' variable; it's not necessary
giampaolo May 27, 2018
eb1edd3
check out file position as an extra method to determine whether some …
giampaolo May 27, 2018
f83a990
refactoring
giampaolo May 27, 2018
7905991
test refactoring: move utility function out of test class
giampaolo May 27, 2018
6ac06c1
add assert
giampaolo May 27, 2018
9373b4c
remove unused class attribute
giampaolo May 27, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Doc/library/shutil.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ Directory and files operations
Raise :exc:`SameFileError` instead of :exc:`Error`. Since the former is
a subclass of the latter, this change is backward compatible.

.. versionchanged:: 3.8
Uses high-performance :func:`os.sendfile` (Linux only).

.. exception:: SameFileError

Expand Down Expand Up @@ -163,6 +165,9 @@ Directory and files operations
Added *follow_symlinks* argument.
Now returns path to the newly created file.

.. versionchanged:: 3.8
Uses high-performance :func:`os.sendfile` (Linux only).

.. function:: copy2(src, dst, *, follow_symlinks=True)

Identical to :func:`~shutil.copy` except that :func:`copy2`
Expand All @@ -185,6 +190,9 @@ Directory and files operations
file system attributes too (currently Linux only).
Now returns path to the newly created file.

.. versionchanged:: 3.8
Uses high-performance :func:`os.sendfile` (Linux only).

.. function:: ignore_patterns(\*patterns)

This factory function creates a function that can be used as a callable for
Expand Down
5 changes: 5 additions & 0 deletions Doc/whatsnew/3.8.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,11 @@ Optimizations
first introduced in Python 3.4. It offers better performance and smaller
size compared to Protocol 3 available since Python 3.0.

* :func:`shutil.copyfile`, :func:`shutil.copy` and :func:`shutil.copy2` use
high-performance :func:`os.sendfile` on Linux resulting in roughly a 20-25%
speedup of the copying operation and a considerably lower CPU cycles
consumption. (Contributed by Giampaolo Rodola' and desbma in :issue:`33639`)

Build and C API Changes
=======================

Expand Down
74 changes: 73 additions & 1 deletion Lib/shutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
except ImportError:
getgrnam = None

_HAS_LINUX_SENDFILE = hasattr(os, "sendfile") and \
sys.platform.startswith("linux")

__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2",
"copytree", "move", "rmtree", "Error", "SpecialFileError",
"ExecError", "make_archive", "get_archive_formats",
Expand Down Expand Up @@ -72,6 +75,9 @@ class RegistryError(Exception):
"""Raised when a registry operation with the archiving
and unpacking registries fails"""

class _GiveupOnZeroCopy(Exception):
"""Raised when os.sendfile() cannot be used for copying files."""


def copyfileobj(fsrc, fdst, length=16*1024):
"""copy data from file-like object fsrc to file-like object fdst"""
Expand All @@ -81,6 +87,72 @@ def copyfileobj(fsrc, fdst, length=16*1024):
break
fdst.write(buf)

def _copyfileobj_sendfile(fsrc, fdst):
"""Copy data from one regular file object to another by using
high-performance sendfile() method. Linux >= 2.6.33 is apparently
the only platform able to do this.
"""
global _HAS_LINUX_SENDFILE
try:
infd = fsrc.fileno()
outfd = fdst.fileno()
except Exception as err:
raise _GiveupOnZeroCopy(err) # not a regular file

# Hopefully the whole file will be copied in a single call.
# sendfile() is called in a loop 'till EOF is reached (0 return)
# so a bufsize smaller or bigger than the actual file size
# should not make any difference, also in case the file content
# changes while being copied.
try:
blocksize = max(os.fstat(infd).st_size, 2 ** 23) # min 8MB
except Exception:
blocksize = 2 ** 27 # 128MB

offset = 0
while True:
try:
sent = os.sendfile(outfd, infd, offset, blocksize)
except OSError as err:
if err.errno == errno.ENOTSOCK:
# sendfile() on this platform (probably Linux < 2.6.33)
# does not support copies between regular files (only
# sockets).
_HAS_LINUX_SENDFILE = False

if err.errno == errno.ENOSPC: # filesystem is full
raise err from None

# Give up on first call and if no data was copied.
if offset == 0 and os.lseek(outfd, 0, os.SEEK_CUR) == 0:
raise _GiveupOnZeroCopy(err)

raise err from None
else:
if sent == 0:
break # EOF
offset += sent

def _copyfileobj2(fsrc, fdst):
# Copies 2 filesystem files by using zero-copy sendfile(2) syscall
# (faster). This is used by copyfile(), copy() and copy2() in order
# to leave copyfileobj() alone and not introduce any unexpected
# breakage. Possible risks by using sendfile() in copyfileobj() are:
# - fdst cannot be open in "a"(ppend) mode
# - fsrc and fdst may be opened in text mode
# - fdst offset doesn't get updated
# - fsrc may be a BufferedReader (which hides unread data in a buffer),
# GzipFile (which decompresses data), HTTPResponse (which decodes
# chunks).
# - possibly others...
if _HAS_LINUX_SENDFILE:
try:
return _copyfileobj_sendfile(fsrc, fdst)
except _GiveupOnZeroCopy:
pass

return copyfileobj(fsrc, fdst)

def _samefile(src, dst):
# Macintosh, Unix.
if hasattr(os.path, 'samefile'):
Expand Down Expand Up @@ -119,7 +191,7 @@ def copyfile(src, dst, *, follow_symlinks=True):
else:
with open(src, 'rb') as fsrc:
with open(dst, 'wb') as fdst:
copyfileobj(fsrc, fdst)
_copyfileobj2(fsrc, fdst)
return dst

def copymode(src, dst, *, follow_symlinks=True):
Expand Down
218 changes: 217 additions & 1 deletion Lib/test/test_shutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@
import functools
import pathlib
import subprocess
import random
import string
import contextlib
import io
from shutil import (make_archive,
register_archive_format, unregister_archive_format,
get_archive_formats, Error, unpack_archive,
register_unpack_format, RegistryError,
unregister_unpack_format, get_unpack_formats,
SameFileError)
SameFileError, _GiveupOnZeroCopy)
import tarfile
import zipfile

Expand Down Expand Up @@ -60,6 +64,24 @@ def write_file(path, content, binary=False):
with open(path, 'wb' if binary else 'w') as fp:
fp.write(content)

def write_test_file(path, size):
"""Create a test file with an arbitrary size and random text content."""
def chunks(total, step):
assert total >= step
while total > step:
yield step
total -= step
if total:
yield total

bufsize = min(size, 8192)
chunk = b"".join([random.choice(string.ascii_letters).encode()
for i in range(bufsize)])
with open(path, 'wb') as f:
for csize in chunks(size, bufsize):
f.write(chunk)
assert os.path.getsize(path) == size

def read_file(path, binary=False):
"""Return contents from a file located at *path*.

Expand All @@ -84,6 +106,37 @@ def rlistdir(path):
res.append(name)
return res

def supports_file2file_sendfile():
# ...apparently Linux is the only one.
if not hasattr(os, "sendfile"):
return False
srcname = None
dstname = None
try:
with tempfile.NamedTemporaryFile("wb", delete=False) as f:
srcname = f.name
f.write(b"0123456789")

with open(srcname, "rb") as src:
with tempfile.NamedTemporaryFile("wb", delete=False) as dst:
dstname = f.name
infd = src.fileno()
outfd = dst.fileno()
try:
os.sendfile(outfd, infd, 0, 2)
except OSError:
return False
else:
return True
finally:
if srcname is not None:
support.unlink(srcname)
if dstname is not None:
support.unlink(dstname)


SUPPORTS_SENDFILE = supports_file2file_sendfile()


class TestShutil(unittest.TestCase):

Expand Down Expand Up @@ -1829,6 +1882,169 @@ def test_move_dir_caseinsensitive(self):
finally:
os.rmdir(dst_dir)


class _CopyFileTest(object):
FILESIZE = (10 * 1024 * 1024) # 10 MiB
FILEDATA = b""

@classmethod
def setUpClass(cls):
write_test_file(TESTFN, cls.FILESIZE)
with open(TESTFN, 'rb') as f:
cls.FILEDATA = f.read()
assert len(cls.FILEDATA) == cls.FILESIZE

@classmethod
def tearDownClass(cls):
support.unlink(TESTFN)

def tearDown(self):
support.unlink(TESTFN2)

@contextlib.contextmanager
def get_files(self):
with open(TESTFN, "rb") as src:
with open(TESTFN2, "wb") as dst:
yield (src, dst)


@unittest.skipIf(not SUPPORTS_SENDFILE, 'os.sendfile() not supported')
class TestCopyFileObjSendfile(_CopyFileTest, unittest.TestCase):

def test_regular_copy(self):
with self.get_files() as (src, dst):
shutil._copyfileobj_sendfile(src, dst)
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)

def test_non_regular_file_src(self):
with io.BytesIO(self.FILEDATA) as src:
with open(TESTFN2, "wb") as dst:
with self.assertRaises(_GiveupOnZeroCopy):
shutil._copyfileobj_sendfile(src, dst)
shutil.copyfileobj(src, dst)

self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)

def test_non_regular_file_dst(self):
with open(TESTFN, "rb") as src:
with io.BytesIO() as dst:
with self.assertRaises(_GiveupOnZeroCopy):
shutil._copyfileobj_sendfile(src, dst)
shutil.copyfileobj(src, dst)
dst.seek(0)
self.assertEqual(dst.read(), self.FILEDATA)

def test_empty_file(self):
srcname = TESTFN + 'src'
dstname = TESTFN + 'dst'
self.addCleanup(lambda: support.unlink(srcname))
self.addCleanup(lambda: support.unlink(dstname))
with open(srcname, "wb"):
pass

with open(srcname, "rb") as src:
with open(dstname, "wb") as dst:
shutil._copyfileobj_sendfile(src, dst)

self.assertEqual(read_file(dstname, binary=True), b"")

def test_unhandled_exception(self):
with unittest.mock.patch('os.sendfile',
side_effect=ZeroDivisionError):
self.assertRaises(ZeroDivisionError,
shutil.copyfile, TESTFN, TESTFN2)

def test_exception_on_first_call(self):
# Emulate a case where the first call to sendfile() raises
# an exception in which case the function is supposed to
# give up immediately.
with unittest.mock.patch('os.sendfile',
side_effect=OSError):
with self.get_files() as (src, dst):
with self.assertRaises(_GiveupOnZeroCopy):
shutil._copyfileobj_sendfile(src, dst)

def test_exception_on_second_call(self):
# ...but on subsequent calls we expect the exception to bubble up.
def sendfile(*args, **kwargs):
if not flag:
flag.append(None)
return orig_sendfile(*args, **kwargs)
else:
raise OSError(errno.EBADF, "yo")

flag = []
orig_sendfile = os.sendfile
with unittest.mock.patch('os.sendfile', create=True,
side_effect=sendfile):
with self.get_files() as (src, dst):
with self.assertRaises(OSError) as cm:
shutil._copyfileobj_sendfile(src, dst)
assert flag
self.assertEqual(cm.exception.errno, errno.EBADF)

def test_cant_get_size(self):
# Emulate a case where src file size cannot be determined.
# Internally bufsize will be set to a small value and
# sendfile() will be called repeatedly.
with unittest.mock.patch('os.fstat', side_effect=OSError) as m:
with self.get_files() as (src, dst):
shutil._copyfileobj_sendfile(src, dst)
assert m.called
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)

def test_small_chunks(self):
# Force internal file size detection to be smaller than the
# actual file size. We want to force sendfile() to be called
# multiple times, also in order to emulate a src fd which gets
# bigger while it is being copied.
mock = unittest.mock.Mock()
mock.st_size = 65536 + 1
with unittest.mock.patch('os.fstat', return_value=mock) as m:
with self.get_files() as (src, dst):
shutil._copyfileobj_sendfile(src, dst)
assert m.called
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)

def test_big_chunk(self):
# Force internal file size detection to be +100MB bigger than
# the actual file size. Make sure sendfile() does not rely on
# file size value except for (maybe) a better throughput /
# performance.
mock = unittest.mock.Mock()
mock.st_size = self.FILESIZE + (100 * 1024 * 1024)
with unittest.mock.patch('os.fstat', return_value=mock) as m:
with self.get_files() as (src, dst):
shutil._copyfileobj_sendfile(src, dst)
assert m.called
self.assertEqual(read_file(TESTFN2, binary=True), self.FILEDATA)

def test_blocksize_arg(self):
with unittest.mock.patch('os.sendfile',
side_effect=ZeroDivisionError) as m:
self.assertRaises(ZeroDivisionError,
shutil.copyfile, TESTFN, TESTFN2)
blocksize = m.call_args[0][3]
# Make sure file size and the block size arg passed to
# sendfile() are the same.
self.assertEqual(blocksize, os.path.getsize(TESTFN))
# ...unless we're dealing with a small file.
support.unlink(TESTFN2)
write_file(TESTFN2, b"hello", binary=True)
self.addCleanup(support.unlink, TESTFN2 + '3')
self.assertRaises(ZeroDivisionError,
shutil.copyfile, TESTFN2, TESTFN2 + '3')
blocksize = m.call_args[0][3]
self.assertEqual(blocksize, 2 ** 23)

def test_filesystem_full(self):
# Emulate a case where filesystem is full and sendfile() fails
# on first call.
with unittest.mock.patch('os.sendfile',
side_effect=OSError(errno.ENOSPC, "yo")):
self.assertRaises(OSError, shutil.copyfile, TESTFN, TESTFN2)


class TermsizeTests(unittest.TestCase):
def test_does_not_crash(self):
"""Check if get_terminal_size() returns a meaningful value.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
shutil.copyfile(), shutil.copy() and shutil.copy2() use high-performance
os.sendfile() on Linux resulting in roughly a 20-25% speedup of the copying
operation and a considerably lower CPU cycles consumption.
(Contributed by Giampaolo Rodola' and desbma in 33639)