I am doing an os.walk() over a certain part of my OneDrive synced folder structure. It all worked fine until recently. Now ALL files from one specific directory are ignored. I tested several possible reasons and narrowed it down to this: The directory that is ignored is the one that holds the most files (897 at this point).
If I remove two of the files from said directory (it does not matter which two), it works and all files are recognized. When I add the files again, the result is the same: No files from that directory turn up in my os.walk() result list.
I did check Microsoft's Restrictions and limitations in OneDrive and SharePoint, but am far from any of the file size and number (1 ,2) limits mentioned.
My code looks like this
files = []
for root, dir, files in os.walk(mainDirectory):
for f in files:
if 'Common part' in root:
files.append(os.path.join(root, f))
'Common part' is a text string, that all relevant folders in the mainDirectory have in common.
The directory itself is recognized all the times, just the files are not added to my list. So, I tried another approach featuring glob.glob(). Here, the results are a bit different but still not satisfactory:
folders = []
for root, dir, files in os.walk(mainDirectory):
for d in dir:
if d.startswith('Common part')
folders.append(os.path.join(root, d))
files = [glob.glob(os.path.join(f,'*.xlsx')) for f in folders]
This does give me approximately half the files from the problematic folder. Again, when I remove two files, it gives me the full list.
When I copy/move the files to a local (not OneDrive synced) path, it works. So I guess it does have to do with OneDrive. Having the files outside of OneDrive is not an option.
The directory in question is not directly in my OneDrive but a "Sync"/"Shortcut" from SharePoint.
All files can be opened, they are downloaded, not on-demand. I have removed the sync and re-synced the folder. I have restarted OneDrive (and my machine) several times
I am really at a loss here. Any hints welcome!
Update: Thanks to the help of @GordonAitchJay, it could be established, that at the threshold of files (or sum of file sizes?) functions like os.listdir() and win32file.FindFilesW() stop returning their usual output and instead return OSError: [WinError 87] The parameter is incorrect
Also, in the meantime, we reproduced the same behaviour on another machine within the same organization. This was conducted after a full reset of my OneDrive did not result in any improvement.
Though I can't prove it, it seems that OneDrive is up to some sort of tomfoolery that causes win32's FindNextFileW
to fail with a ERROR_INVALID_PARAMETER
error, but apparently only when it is called by Python's os.walk
, os.listdir
, and win32file.FindFilesW
, and when some files have been deleted from the OneDrive directory syncing a SharePoint folder. Utterly bizarre. I'm thinking maybe OneDrive hooks FindNextFileW
which remains after ending the OneDrive process and services with Task Manager.
A workaround is to use ctypes to call the lower level NtQueryDirectoryFile function (which is ultimately what FindNextFileW
calls anyway).
Eryk Sun's answer to another question has a working example. I have copied it below, and have only changed the last couple lines:
import os
import msvcrt
import ctypes
from ctypes import wintypes
ntdll = ctypes.WinDLL('ntdll')
kernel32 = ctypes.WinDLL('kernel32', use_last_error=True)
def NtError(status):
err = ntdll.RtlNtStatusToDosError(status)
return ctypes.WinError(err)
NTSTATUS = wintypes.LONG
STATUS_BUFFER_OVERFLOW = NTSTATUS(0x80000005).value
STATUS_NO_MORE_FILES = NTSTATUS(0x80000006).value
STATUS_INFO_LENGTH_MISMATCH = NTSTATUS(0xC0000004).value
ERROR_DIRECTORY = 0x010B
INVALID_HANDLE_VALUE = wintypes.HANDLE(-1).value
GENERIC_READ = 0x80000000
FILE_SHARE_READ = 1
OPEN_EXISTING = 3
FILE_FLAG_BACKUP_SEMANTICS = 0x02000000
FILE_ATTRIBUTE_DIRECTORY = 0x0010
FILE_INFORMATION_CLASS = wintypes.ULONG
FileDirectoryInformation = 1
FileBasicInformation = 4
LPSECURITY_ATTRIBUTES = wintypes.LPVOID
PIO_APC_ROUTINE = wintypes.LPVOID
ULONG_PTR = wintypes.WPARAM
class UNICODE_STRING(ctypes.Structure):
_fields_ = (('Length', wintypes.USHORT),
('MaximumLength', wintypes.USHORT),
('Buffer', wintypes.LPWSTR))
PUNICODE_STRING = ctypes.POINTER(UNICODE_STRING)
class IO_STATUS_BLOCK(ctypes.Structure):
class _STATUS(ctypes.Union):
_fields_ = (('Status', NTSTATUS),
('Pointer', wintypes.LPVOID))
_anonymous_ = '_Status',
_fields_ = (('_Status', _STATUS),
('Information', ULONG_PTR))
PIO_STATUS_BLOCK = ctypes.POINTER(IO_STATUS_BLOCK)
ntdll.NtQueryInformationFile.restype = NTSTATUS
ntdll.NtQueryInformationFile.argtypes = (
wintypes.HANDLE, # In FileHandle
PIO_STATUS_BLOCK, # Out IoStatusBlock
wintypes.LPVOID, # Out FileInformation
wintypes.ULONG, # In Length
FILE_INFORMATION_CLASS) # In FileInformationClass
ntdll.NtQueryDirectoryFile.restype = NTSTATUS
ntdll.NtQueryDirectoryFile.argtypes = (
wintypes.HANDLE, # In FileHandle
wintypes.HANDLE, # In_opt Event
PIO_APC_ROUTINE, # In_opt ApcRoutine
wintypes.LPVOID, # In_opt ApcContext
PIO_STATUS_BLOCK, # Out IoStatusBlock
wintypes.LPVOID, # Out FileInformation
wintypes.ULONG, # In Length
FILE_INFORMATION_CLASS, # In FileInformationClass
wintypes.BOOLEAN, # In ReturnSingleEntry
PUNICODE_STRING, # In_opt FileName
wintypes.BOOLEAN) # In RestartScan
kernel32.CreateFileW.restype = wintypes.HANDLE
kernel32.CreateFileW.argtypes = (
wintypes.LPCWSTR, # In lpFileName
wintypes.DWORD, # In dwDesiredAccess
wintypes.DWORD, # In dwShareMode
LPSECURITY_ATTRIBUTES, # In_opt lpSecurityAttributes
wintypes.DWORD, # In dwCreationDisposition
wintypes.DWORD, # In dwFlagsAndAttributes
wintypes.HANDLE) # In_opt hTemplateFile
class FILE_BASIC_INFORMATION(ctypes.Structure):
_fields_ = (('CreationTime', wintypes.LARGE_INTEGER),
('LastAccessTime', wintypes.LARGE_INTEGER),
('LastWriteTime', wintypes.LARGE_INTEGER),
('ChangeTime', wintypes.LARGE_INTEGER),
('FileAttributes', wintypes.ULONG))
class FILE_DIRECTORY_INFORMATION(ctypes.Structure):
_fields_ = (('_Next', wintypes.ULONG),
('FileIndex', wintypes.ULONG),
('CreationTime', wintypes.LARGE_INTEGER),
('LastAccessTime', wintypes.LARGE_INTEGER),
('LastWriteTime', wintypes.LARGE_INTEGER),
('ChangeTime', wintypes.LARGE_INTEGER),
('EndOfFile', wintypes.LARGE_INTEGER),
('AllocationSize', wintypes.LARGE_INTEGER),
('FileAttributes', wintypes.ULONG),
('FileNameLength', wintypes.ULONG),
('_FileName', wintypes.WCHAR * 1))
@property
def FileName(self):
addr = ctypes.addressof(self) + type(self)._FileName.offset
size = self.FileNameLength // ctypes.sizeof(wintypes.WCHAR)
return (wintypes.WCHAR * size).from_address(addr).value
class DirEntry(FILE_DIRECTORY_INFORMATION):
def __repr__(self):
return '<{} {!r}>'.format(self.__class__.__name__, self.FileName)
@classmethod
def listbuf(cls, buf):
result = []
base_size = ctypes.sizeof(cls) - ctypes.sizeof(wintypes.WCHAR)
offset = 0
while True:
fdi = cls.from_buffer(buf, offset)
if fdi.FileNameLength and fdi.FileName not in ('.', '..'):
cfdi = cls()
size = base_size + fdi.FileNameLength
ctypes.resize(cfdi, size)
ctypes.memmove(ctypes.byref(cfdi), ctypes.byref(fdi), size)
result.append(cfdi)
if fdi._Next:
offset += fdi._Next
else:
break
return result
def isdir(path):
if not isinstance(path, int):
return os.path.isdir(path)
try:
hFile = msvcrt.get_osfhandle(path)
except IOError:
return False
iosb = IO_STATUS_BLOCK()
info = FILE_BASIC_INFORMATION()
status = ntdll.NtQueryInformationFile(hFile, ctypes.byref(iosb),
ctypes.byref(info), ctypes.sizeof(info),
FileBasicInformation)
return bool(status >= 0 and info.FileAttributes & FILE_ATTRIBUTE_DIRECTORY)
def ntlistdir(path=None):
result = []
if path is None:
path = os.getcwd()
if isinstance(path, int):
close = False
fd = path
hFile = msvcrt.get_osfhandle(fd)
else:
close = True
hFile = kernel32.CreateFileW(path, GENERIC_READ, FILE_SHARE_READ,
None, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, None)
if hFile == INVALID_HANDLE_VALUE:
raise ctypes.WinError(ctypes.get_last_error())
fd = msvcrt.open_osfhandle(hFile, os.O_RDONLY)
try:
if not isdir(fd):
raise ctypes.WinError(ERROR_DIRECTORY)
iosb = IO_STATUS_BLOCK()
info = (ctypes.c_char * 4096)()
while True:
status = ntdll.NtQueryDirectoryFile(hFile, None, None, None,
ctypes.byref(iosb), ctypes.byref(info),
ctypes.sizeof(info), FileDirectoryInformation,
False, None, False)
if (status == STATUS_BUFFER_OVERFLOW or
iosb.Information == 0 and status >= 0):
info = (ctypes.c_char * (ctypes.sizeof(info) * 2))()
elif status == STATUS_NO_MORE_FILES:
break
elif status >= 0:
sublist = DirEntry.listbuf(info)
result.extend(sublist)
else:
raise NtError(status)
finally:
if close:
os.close(fd)
return result
for entry in ntlistdir(r"C:\Users\UserName\OneDriveFolder\BigFolder"):
print(entry.FileName)