Bug report
Bug description:
Several places in File IO (e.g. _io_FileIO_read_impl, _io_FileIO_write_impl, and portable_lseek) read self->fd with no synchronisation, while internal_close may write self->fd = -1 and call close(fd) with no lock either.
|
internal_close(fileio *self) |
|
{ |
|
int err = 0; |
|
int save_errno = 0; |
|
if (self->fd >= 0) { |
|
int fd = self->fd; |
|
self->fd = -1; |
|
/* fd is accessible and someone else may have closed it */ |
|
Py_BEGIN_ALLOW_THREADS |
|
if (self->fd < 0) |
|
return err_closed(); |
|
if (!self->readable) { |
|
_PyIO_State *state = get_io_state_by_cls(cls); |
|
return err_mode(state, "reading"); |
|
} |
|
|
|
if (size < 0) |
|
return _io_FileIO_readall_impl(self, cls); |
|
|
|
if (size > _PY_READ_MAX) { |
|
size = _PY_READ_MAX; |
|
} |
|
|
|
PyBytesWriter *writer = PyBytesWriter_Create(size); |
|
if (writer == NULL) { |
|
return NULL; |
|
} |
|
char *ptr = PyBytesWriter_GetData(writer); |
|
|
|
Py_ssize_t n = _Py_read(self->fd, ptr, size); |
|
portable_lseek(fileio *self, PyObject *posobj, int whence, bool suppress_pipe_error) |
|
{ |
|
Py_off_t pos, res; |
|
int fd = self->fd; |
|
|
When a FileIO object is shared across threads, the following races can occur.
_io_FileIO_read_impl does:
if (self->fd < 0) // check -- no lock
return err_closed();
...
n = _Py_read(self->fd, ptr, size); // use -- no lock
Between the check and the use, another thread can call close, which writes self->fd = -1 in internal_close. The reader then calls _Py_read(-1, ...), gets EBADF, and raises an unexpected OSError.
Alternatively, the file descriptor may also be reused in which case it would read from a different file than the one it "thinks".
This is also the case in portable_lseek which makes an explicit local copy:
int fd = self->fd;
...
lseek(fd, pos, whence);
Reproducer
import os
import threading
import tempfile
def test_concurrent_read_close() -> None:
errors: list[str] = []
lock = threading.Lock()
with tempfile.NamedTemporaryFile(delete=False) as tf:
path = tf.name
tf.write(b"hello" * 1000)
try:
fio = open(path, "rb", buffering=0) # FileIO
def reader() -> None:
for _ in range(5000):
try:
fio.seek(0)
fio.read(10)
except Exception as e:
with lock:
errors.append(str(e))
def closer() -> None:
for _ in range(100):
try:
fio.close()
except Exception:
pass
threads = [
*[threading.Thread(target=reader) for _ in range(4)],
threading.Thread(target=closer),
]
for t in threads: t.start()
for t in threads: t.join()
unexpected = [e for e in errors if "closed file" not in e]
if unexpected:
print(f"BUG: {unexpected[:3]}")
finally:
os.unlink(path)
for _ in range(20):
test_concurrent_read_close()
Under TSAN (CPython main, free-threaded build):
==================
WARNING: ThreadSanitizer: data race (pid=77467)
Read of size 4 at 0x000302acd320 by thread T1:
#0 _io_FileIO_read_impl fileio.c:878 (python.exe:arm64+0x1004053cc)
#1 _io_FileIO_read fileio.c.h:353 (python.exe:arm64+0x1004053cc)
#2 method_vectorcall_FASTCALL_KEYWORDS_METHOD descrobject.c:381 (python.exe:arm64+0x1000a8a78)
#3 _PyObject_VectorcallTstate pycore_call.h:144 (python.exe:arm64+0x100090e80)
#4 PyObject_Vectorcall call.c:327 (python.exe:arm64+0x100090e80)
#5 _Py_VectorCall_StackRefSteal ceval.c:724 (python.exe:arm64+0x100290228)
#6 _PyEval_EvalFrameDefault generated_cases.c.h:4362 (python.exe:arm64+0x10029b364)
#7 _PyEval_EvalFrame pycore_ceval.h:122 (python.exe:arm64+0x10028fe5c)
#8 _PyEval_Vector ceval.c:2134 (python.exe:arm64+0x10028fe5c)
#9 _PyFunction_Vectorcall call.c (python.exe:arm64+0x1000914bc)
#10 _PyObject_VectorcallTstate pycore_call.h:144 (python.exe:arm64+0x100092c3c)
#11 _PyObject_VectorcallPrepend call.c:855 (python.exe:arm64+0x100092c3c)
#12 method_vectorcall classobject.c:55 (python.exe:arm64+0x100095fdc)
#13 _PyObject_VectorcallTstate pycore_call.h:144 (python.exe:arm64+0x1002dce38)
#14 context_run context.c:728 (python.exe:arm64+0x1002dce38)
#15 method_vectorcall_FASTCALL_KEYWORDS descrobject.c:421 (python.exe:arm64+0x1000a8644)
#16 _PyObject_VectorcallTstate pycore_call.h:144 (python.exe:arm64+0x100090e80)
#17 PyObject_Vectorcall call.c:327 (python.exe:arm64+0x100090e80)
#18 _Py_VectorCallInstrumentation_StackRefSteal ceval.c:766 (python.exe:arm64+0x100290968)
#19 _PyEval_EvalFrameDefault generated_cases.c.h:1846 (python.exe:arm64+0x100295ff8)
#20 _PyEval_EvalFrame pycore_ceval.h:122 (python.exe:arm64+0x10028fe5c)
#21 _PyEval_Vector ceval.c:2134 (python.exe:arm64+0x10028fe5c)
#22 _PyFunction_Vectorcall call.c (python.exe:arm64+0x1000914bc)
#23 _PyObject_VectorcallTstate pycore_call.h:144 (python.exe:arm64+0x100092c3c)
#24 _PyObject_VectorcallPrepend call.c:855 (python.exe:arm64+0x100092c3c)
#25 method_vectorcall classobject.c:55 (python.exe:arm64+0x100095fdc)
#26 _PyVectorcall_Call call.c:273 (python.exe:arm64+0x10009112c)
#27 _PyObject_Call call.c:348 (python.exe:arm64+0x10009112c)
#28 PyObject_Call call.c:373 (python.exe:arm64+0x1000911a4)
#29 thread_run _threadmodule.c:388 (python.exe:arm64+0x10044df04)
#30 pythread_wrapper thread_pthread.h:234 (python.exe:arm64+0x10038a12c)
Previous write of size 4 at 0x000302acd320 by thread T5:
#0 internal_close fileio.c:128 (python.exe:arm64+0x100406bd8)
#1 _io_FileIO_close_impl fileio.c:187 (python.exe:arm64+0x10040627c)
#2 _io_FileIO_close fileio.c.h:34 (python.exe:arm64+0x10040627c)
#3 method_vectorcall_FASTCALL_KEYWORDS_METHOD descrobject.c:381 (python.exe:arm64+0x1000a8a78)
#4 _PyObject_VectorcallTstate pycore_call.h:144 (python.exe:arm64+0x100090e80)
#5 PyObject_Vectorcall call.c:327 (python.exe:arm64+0x100090e80)
#6 _Py_VectorCallInstrumentation_StackRefSteal ceval.c:766 (python.exe:arm64+0x100290968)
#7 _PyEval_EvalFrameDefault generated_cases.c.h:1846 (python.exe:arm64+0x100295ff8)
#8 _PyEval_EvalFrame pycore_ceval.h:122 (python.exe:arm64+0x10028fe5c)
#9 _PyEval_Vector ceval.c:2134 (python.exe:arm64+0x10028fe5c)
#10 _PyFunction_Vectorcall call.c (python.exe:arm64+0x1000914bc)
#11 _PyObject_VectorcallTstate pycore_call.h:144 (python.exe:arm64+0x100092c3c)
#12 _PyObject_VectorcallPrepend call.c:855 (python.exe:arm64+0x100092c3c)
#13 method_vectorcall classobject.c:55 (python.exe:arm64+0x100095fdc)
#14 _PyObject_VectorcallTstate pycore_call.h:144 (python.exe:arm64+0x1002dce38)
#15 context_run context.c:728 (python.exe:arm64+0x1002dce38)
#16 method_vectorcall_FASTCALL_KEYWORDS descrobject.c:421 (python.exe:arm64+0x1000a8644)
#17 _PyObject_VectorcallTstate pycore_call.h:144 (python.exe:arm64+0x100090e80)
#18 PyObject_Vectorcall call.c:327 (python.exe:arm64+0x100090e80)
#19 _Py_VectorCallInstrumentation_StackRefSteal ceval.c:766 (python.exe:arm64+0x100290968)
#20 _PyEval_EvalFrameDefault generated_cases.c.h:1846 (python.exe:arm64+0x100295ff8)
#21 _PyEval_EvalFrame pycore_ceval.h:122 (python.exe:arm64+0x10028fe5c)
#22 _PyEval_Vector ceval.c:2134 (python.exe:arm64+0x10028fe5c)
#23 _PyFunction_Vectorcall call.c (python.exe:arm64+0x1000914bc)
#24 _PyObject_VectorcallTstate pycore_call.h:144 (python.exe:arm64+0x100092c3c)
#25 _PyObject_VectorcallPrepend call.c:855 (python.exe:arm64+0x100092c3c)
#26 method_vectorcall classobject.c:55 (python.exe:arm64+0x100095fdc)
#27 _PyVectorcall_Call call.c:273 (python.exe:arm64+0x10009112c)
#28 _PyObject_Call call.c:348 (python.exe:arm64+0x10009112c)
#29 PyObject_Call call.c:373 (python.exe:arm64+0x1000911a4)
#30 thread_run _threadmodule.c:388 (python.exe:arm64+0x10044df04)
#31 pythread_wrapper thread_pthread.h:234 (python.exe:arm64+0x10038a12c)
Thread T1 (tid=19700764, running) created by main thread at:
#0 pthread_create <null> (libclang_rt.tsan_osx_dynamic.dylib:arm64+0x31d84)
#1 do_start_joinable_thread thread_pthread.h:281 (python.exe:arm64+0x100389300)
#2 PyThread_start_joinable_thread thread_pthread.h:323 (python.exe:arm64+0x100389138)
#3 ThreadHandle_start _threadmodule.c:475 (python.exe:arm64+0x10044dd10)
#4 do_start_new_thread _threadmodule.c:1919 (python.exe:arm64+0x10044d7dc)
#5 thread_PyThread_start_joinable_thread _threadmodule.c:2042 (python.exe:arm64+0x10044c844)
#6 cfunction_call methodobject.c:564 (python.exe:arm64+0x100138744)
#7 _PyObject_MakeTpCall call.c:242 (python.exe:arm64+0x10009030c)
#8 _PyObject_VectorcallTstate pycore_call.h:142 (python.exe:arm64+0x100090f14)
#9 PyObject_Vectorcall call.c:327 (python.exe:arm64+0x100090f14)
#10 _Py_VectorCall_StackRefSteal ceval.c:724 (python.exe:arm64+0x100290228)
#11 _PyEval_EvalFrameDefault generated_cases.c.h:3325 (python.exe:arm64+0x10029964c)
#12 _PyEval_EvalFrame pycore_ceval.h:122 (python.exe:arm64+0x10028fa34)
#13 _PyEval_Vector ceval.c:2134 (python.exe:arm64+0x10028fa34)
#14 PyEval_EvalCode ceval.c:677 (python.exe:arm64+0x10028fa34)
#15 run_eval_code_obj pythonrun.c:1369 (python.exe:arm64+0x100366a70)
#16 run_mod pythonrun.c:1472 (python.exe:arm64+0x1003667cc)
#17 pyrun_file pythonrun.c:1296 (python.exe:arm64+0x100361d38)
#18 _PyRun_SimpleFileObject pythonrun.c:518 (python.exe:arm64+0x100361d38)
#19 _PyRun_AnyFileObject pythonrun.c:81 (python.exe:arm64+0x1003614c8)
#20 pymain_run_file_obj main.c:411 (python.exe:arm64+0x1003a3328)
#21 pymain_run_file main.c:430 (python.exe:arm64+0x1003a3328)
#22 pymain_run_python main.c:715 (python.exe:arm64+0x1003a2658)
#23 Py_RunMain main.c:796 (python.exe:arm64+0x1003a2658)
#24 pymain_main main.c:826 (python.exe:arm64+0x1003a2bd0)
#25 Py_BytesMain main.c:850 (python.exe:arm64+0x1003a2cd0)
#26 main python.c:15 (python.exe:arm64+0x100000a78)
Thread T5 (tid=19700768, running) created by main thread at:
#0 pthread_create <null> (libclang_rt.tsan_osx_dynamic.dylib:arm64+0x31d84)
#1 do_start_joinable_thread thread_pthread.h:281 (python.exe:arm64+0x100389300)
#2 PyThread_start_joinable_thread thread_pthread.h:323 (python.exe:arm64+0x100389138)
#3 ThreadHandle_start _threadmodule.c:475 (python.exe:arm64+0x10044dd10)
#4 do_start_new_thread _threadmodule.c:1919 (python.exe:arm64+0x10044d7dc)
#5 thread_PyThread_start_joinable_thread _threadmodule.c:2042 (python.exe:arm64+0x10044c844)
#6 cfunction_call methodobject.c:564 (python.exe:arm64+0x100138744)
#7 _PyObject_MakeTpCall call.c:242 (python.exe:arm64+0x10009030c)
#8 _PyObject_VectorcallTstate pycore_call.h:142 (python.exe:arm64+0x100090f14)
#9 PyObject_Vectorcall call.c:327 (python.exe:arm64+0x100090f14)
#10 _Py_VectorCall_StackRefSteal ceval.c:724 (python.exe:arm64+0x100290228)
#11 _PyEval_EvalFrameDefault generated_cases.c.h:3528 (python.exe:arm64+0x100299d1c)
#12 _PyEval_EvalFrame pycore_ceval.h:122 (python.exe:arm64+0x10028fa34)
#13 _PyEval_Vector ceval.c:2134 (python.exe:arm64+0x10028fa34)
#14 PyEval_EvalCode ceval.c:677 (python.exe:arm64+0x10028fa34)
#15 run_eval_code_obj pythonrun.c:1369 (python.exe:arm64+0x100366a70)
#16 run_mod pythonrun.c:1472 (python.exe:arm64+0x1003667cc)
#17 pyrun_file pythonrun.c:1296 (python.exe:arm64+0x100361d38)
#18 _PyRun_SimpleFileObject pythonrun.c:518 (python.exe:arm64+0x100361d38)
#19 _PyRun_AnyFileObject pythonrun.c:81 (python.exe:arm64+0x1003614c8)
#20 pymain_run_file_obj main.c:411 (python.exe:arm64+0x1003a3328)
#21 pymain_run_file main.c:430 (python.exe:arm64+0x1003a3328)
#22 pymain_run_python main.c:715 (python.exe:arm64+0x1003a2658)
#23 Py_RunMain main.c:796 (python.exe:arm64+0x1003a2658)
#24 pymain_main main.c:826 (python.exe:arm64+0x1003a2bd0)
#25 Py_BytesMain main.c:850 (python.exe:arm64+0x1003a2cd0)
#26 main python.c:15 (python.exe:arm64+0x100000a78)
SUMMARY: ThreadSanitizer: data race fileio.c:878 in _io_FileIO_read_impl
==================
CPython versions tested on:
CPython main branch
Operating systems tested on:
macOS
Linked PRs
Bug report
Bug description:
Several places in File IO (e.g.
_io_FileIO_read_impl,_io_FileIO_write_impl, andportable_lseek) readself->fdwith no synchronisation, whileinternal_closemay writeself->fd = -1and callclose(fd)with no lock either.cpython/Modules/_io/fileio.c
Lines 122 to 130 in d986124
cpython/Modules/_io/fileio.c
Lines 878 to 898 in d986124
cpython/Modules/_io/fileio.c
Lines 959 to 963 in d986124
When a
FileIOobject is shared across threads, the following races can occur._io_FileIO_read_impldoes:Between the check and the use, another thread can call
close, which writesself->fd = -1ininternal_close. The reader then calls_Py_read(-1, ...), getsEBADF, and raises an unexpectedOSError.Alternatively, the file descriptor may also be reused in which case it would read from a different file than the one it "thinks".
This is also the case in
portable_lseekwhich makes an explicit local copy:Reproducer
Under TSAN (CPython main, free-threaded build):
CPython versions tested on:
CPython main branch
Operating systems tested on:
macOS
Linked PRs
FileIO#151708