Skip to content

Commit 96d54e2

Browse files
author
Jon Olick
committed
MT dir search
1 parent 5541005 commit 96d54e2

File tree

1 file changed

+71
-12
lines changed

1 file changed

+71
-12
lines changed

aisearch.py

Lines changed: 71 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -515,7 +515,7 @@ def is_comment(line: str, file_ext: str) -> bool:
515515

516516
def fast_walk(directory: str, skip_dirs: Optional[Set[str]] = None) -> Tuple[str, List[str], List[str]]:
517517
"""
518-
Faster alternative to os.walk using scandir with Windows-specific optimizations.
518+
Faster alternative to os.walk using scandir with Windows-specific optimizations and multithreading.
519519
520520
Args:
521521
directory: Directory to walk
@@ -533,17 +533,23 @@ def fast_walk(directory: str, skip_dirs: Optional[Set[str]] = None) -> Tuple[str
533533
directory = os.path.abspath(directory)
534534
if not directory.startswith('\\\\'): # Not a UNC path
535535
directory = '\\\\?\\' + directory
536+
537+
# Use a thread-safe queue for directories to process
538+
from queue import Queue, Empty
539+
from threading import Lock, Event
540+
dir_queue = Queue()
541+
result_queue = Queue()
542+
dir_queue.put(directory)
543+
active_threads = 0
544+
threads_done = Event()
536545

537-
# Use a stack instead of a queue for better memory locality
538-
dirs = [directory]
539-
while dirs:
540-
current = dirs.pop() # Depth-first is more memory efficient
546+
# Function to process a single directory
547+
def process_directory(current_dir: str) -> None:
541548
try:
542-
# Get subdirectories and files in one pass
543549
subdirs = []
544550
files = []
545551

546-
with os.scandir(current) as it:
552+
with os.scandir(current_dir) as it:
547553
for entry in it:
548554
try:
549555
name = entry.name
@@ -571,14 +577,67 @@ def fast_walk(directory: str, skip_dirs: Optional[Set[str]] = None) -> Tuple[str
571577
# Skip entries we can't access
572578
continue
573579

574-
# Yield the current directory, subdirectory basenames, and files
575-
yield current, [os.path.basename(d) for d in subdirs], files
580+
# Put the current directory results in the result queue
581+
result_queue.put((current_dir, [os.path.basename(d) for d in subdirs], files))
576582

577-
# Add subdirs to the stack in reverse order to maintain expected traversal order
578-
dirs.extend(reversed(subdirs))
583+
# Add subdirectories to the queue
584+
for subdir in subdirs:
585+
dir_queue.put(subdir)
586+
579587
except (PermissionError, OSError, Exception):
580588
# Skip any directories we can't access
581-
continue
589+
pass
590+
591+
# Worker function for threads
592+
def worker():
593+
nonlocal active_threads
594+
active_threads += 1
595+
try:
596+
while True:
597+
try:
598+
current_dir = dir_queue.get_nowait()
599+
process_directory(current_dir)
600+
dir_queue.task_done()
601+
except Empty:
602+
break
603+
except Exception:
604+
dir_queue.task_done()
605+
continue
606+
finally:
607+
active_threads -= 1
608+
if active_threads == 0:
609+
threads_done.set()
610+
611+
# Create and start worker threads
612+
import threading
613+
num_workers = min(32, os.cpu_count() * 4) # Limit max threads
614+
threads = []
615+
for _ in range(num_workers):
616+
t = threading.Thread(target=worker)
617+
t.daemon = True
618+
t.start()
619+
threads.append(t)
620+
621+
# Wait for all directories to be processed
622+
dir_queue.join()
623+
624+
# Wait for all threads to complete
625+
for t in threads:
626+
t.join()
627+
628+
# Signal that all threads are done
629+
threads_done.set()
630+
631+
# Yield all results from the result queue
632+
while True:
633+
try:
634+
result = result_queue.get_nowait()
635+
yield result
636+
except Empty:
637+
if threads_done.is_set():
638+
break
639+
# Give other threads a chance to add results
640+
time.sleep(0.1)
582641

583642

584643
def search_file(path: str, file_ext: str, search_terms: List[str],

0 commit comments

Comments
 (0)