@@ -515,7 +515,7 @@ def is_comment(line: str, file_ext: str) -> bool:
515
515
516
516
def fast_walk (directory : str , skip_dirs : Optional [Set [str ]] = None ) -> Tuple [str , List [str ], List [str ]]:
517
517
"""
518
- Faster alternative to os.walk using scandir with Windows-specific optimizations.
518
+ Faster alternative to os.walk using scandir with Windows-specific optimizations and multithreading .
519
519
520
520
Args:
521
521
directory: Directory to walk
@@ -533,17 +533,23 @@ def fast_walk(directory: str, skip_dirs: Optional[Set[str]] = None) -> Tuple[str
533
533
directory = os .path .abspath (directory )
534
534
if not directory .startswith ('\\ \\ ' ): # Not a UNC path
535
535
directory = '\\ \\ ?\\ ' + directory
536
+
537
+ # Use a thread-safe queue for directories to process
538
+ from queue import Queue , Empty
539
+ from threading import Lock , Event
540
+ dir_queue = Queue ()
541
+ result_queue = Queue ()
542
+ dir_queue .put (directory )
543
+ active_threads = 0
544
+ threads_done = Event ()
536
545
537
- # Use a stack instead of a queue for better memory locality
538
- dirs = [directory ]
539
- while dirs :
540
- current = dirs .pop () # Depth-first is more memory efficient
546
+ # Function to process a single directory
547
+ def process_directory (current_dir : str ) -> None :
541
548
try :
542
- # Get subdirectories and files in one pass
543
549
subdirs = []
544
550
files = []
545
551
546
- with os .scandir (current ) as it :
552
+ with os .scandir (current_dir ) as it :
547
553
for entry in it :
548
554
try :
549
555
name = entry .name
@@ -571,14 +577,67 @@ def fast_walk(directory: str, skip_dirs: Optional[Set[str]] = None) -> Tuple[str
571
577
# Skip entries we can't access
572
578
continue
573
579
574
- # Yield the current directory, subdirectory basenames, and files
575
- yield current , [os .path .basename (d ) for d in subdirs ], files
580
+ # Put the current directory results in the result queue
581
+ result_queue . put (( current_dir , [os .path .basename (d ) for d in subdirs ], files ))
576
582
577
- # Add subdirs to the stack in reverse order to maintain expected traversal order
578
- dirs .extend (reversed (subdirs ))
583
+ # Add subdirectories to the queue
584
+ for subdir in subdirs :
585
+ dir_queue .put (subdir )
586
+
579
587
except (PermissionError , OSError , Exception ):
580
588
# Skip any directories we can't access
581
- continue
589
+ pass
590
+
591
+ # Worker function for threads
592
+ def worker ():
593
+ nonlocal active_threads
594
+ active_threads += 1
595
+ try :
596
+ while True :
597
+ try :
598
+ current_dir = dir_queue .get_nowait ()
599
+ process_directory (current_dir )
600
+ dir_queue .task_done ()
601
+ except Empty :
602
+ break
603
+ except Exception :
604
+ dir_queue .task_done ()
605
+ continue
606
+ finally :
607
+ active_threads -= 1
608
+ if active_threads == 0 :
609
+ threads_done .set ()
610
+
611
+ # Create and start worker threads
612
+ import threading
613
+ num_workers = min (32 , os .cpu_count () * 4 ) # Limit max threads
614
+ threads = []
615
+ for _ in range (num_workers ):
616
+ t = threading .Thread (target = worker )
617
+ t .daemon = True
618
+ t .start ()
619
+ threads .append (t )
620
+
621
+ # Wait for all directories to be processed
622
+ dir_queue .join ()
623
+
624
+ # Wait for all threads to complete
625
+ for t in threads :
626
+ t .join ()
627
+
628
+ # Signal that all threads are done
629
+ threads_done .set ()
630
+
631
+ # Yield all results from the result queue
632
+ while True :
633
+ try :
634
+ result = result_queue .get_nowait ()
635
+ yield result
636
+ except Empty :
637
+ if threads_done .is_set ():
638
+ break
639
+ # Give other threads a chance to add results
640
+ time .sleep (0.1 )
582
641
583
642
584
643
def search_file (path : str , file_ext : str , search_terms : List [str ],
0 commit comments