|
1 | 1 | import os
|
2 | 2 | import re
|
3 | 3 | import sys
|
| 4 | +import ast |
4 | 5 | import argparse
|
5 | 6 | import concurrent.futures
|
6 | 7 | import time
|
|
21 | 22 | # Cache for file lists when directory and extensions don't change
|
22 | 23 | _file_cache = {}
|
23 | 24 |
|
| 25 | +# Cache for file contents to avoid re-reading files |
| 26 | +_content_cache = {} |
| 27 | +_content_cache_stats = {"hits": 0, "misses": 0, "max_size": 1000} |
| 28 | + |
24 | 29 | # Maps file extensions to programming languages
|
25 | 30 | LANGUAGE_MAP = {
|
26 | 31 | '.py': 'Python',
|
@@ -695,8 +700,10 @@ def search_file(path: str, file_ext: str, search_terms: List[str],
|
695 | 700 | file_matches = []
|
696 | 701 |
|
697 | 702 | try:
|
698 |
| - with open(path, 'r', encoding='utf-8', errors='ignore') as f: |
699 |
| - lines = f.readlines() |
| 703 | + # Use cached content if available |
| 704 | + lines = _read_file_with_cache(path) |
| 705 | + if lines is None: |
| 706 | + return file_matches # Return empty list if file can't be read |
700 | 707 |
|
701 | 708 | if multiline and use_regex:
|
702 | 709 | # Process the entire file as a single string for multiline mode
|
@@ -1328,6 +1335,103 @@ def clear_file_cache() -> None:
|
1328 | 1335 | _file_cache.clear()
|
1329 | 1336 |
|
1330 | 1337 |
|
| 1338 | +def clear_content_cache() -> None: |
| 1339 | + """Clear the file content cache.""" |
| 1340 | + global _content_cache, _content_cache_stats |
| 1341 | + _content_cache.clear() |
| 1342 | + _content_cache_stats["hits"] = 0 |
| 1343 | + _content_cache_stats["misses"] = 0 |
| 1344 | + |
| 1345 | + |
| 1346 | +def get_content_cache_stats() -> Dict[str, Any]: |
| 1347 | + """Get content cache statistics.""" |
| 1348 | + return { |
| 1349 | + "cache_size": len(_content_cache), |
| 1350 | + "hits": _content_cache_stats["hits"], |
| 1351 | + "misses": _content_cache_stats["misses"], |
| 1352 | + "hit_rate": _content_cache_stats["hits"] / max(1, _content_cache_stats["hits"] + _content_cache_stats["misses"]), |
| 1353 | + "max_size": _content_cache_stats["max_size"] |
| 1354 | + } |
| 1355 | + |
| 1356 | + |
| 1357 | +def set_content_cache_max_size(max_size: int) -> None: |
| 1358 | + """Set the maximum number of files to cache in memory.""" |
| 1359 | + global _content_cache_stats |
| 1360 | + _content_cache_stats["max_size"] = max_size |
| 1361 | + _evict_content_cache_if_needed() |
| 1362 | + |
| 1363 | + |
| 1364 | +def _evict_content_cache_if_needed() -> None: |
| 1365 | + """Evict oldest entries from content cache if it exceeds max size.""" |
| 1366 | + global _content_cache |
| 1367 | + max_size = _content_cache_stats["max_size"] |
| 1368 | + |
| 1369 | + if len(_content_cache) > max_size: |
| 1370 | + # Remove oldest entries (simple FIFO eviction) |
| 1371 | + items_to_remove = len(_content_cache) - max_size |
| 1372 | + keys_to_remove = list(_content_cache.keys())[:items_to_remove] |
| 1373 | + for key in keys_to_remove: |
| 1374 | + del _content_cache[key] |
| 1375 | + |
| 1376 | + |
| 1377 | +def _get_file_cache_key(path: str) -> Tuple[str, float]: |
| 1378 | + """ |
| 1379 | + Generate a cache key for a file based on path and modification time. |
| 1380 | + |
| 1381 | + Args: |
| 1382 | + path: File path |
| 1383 | + |
| 1384 | + Returns: |
| 1385 | + Tuple of (absolute_path, modification_time) |
| 1386 | + """ |
| 1387 | + try: |
| 1388 | + abs_path = os.path.abspath(path) |
| 1389 | + mtime = os.path.getmtime(path) |
| 1390 | + return (abs_path, mtime) |
| 1391 | + except (OSError, FileNotFoundError): |
| 1392 | + # If we can't get mtime, use current time to force cache miss |
| 1393 | + return (os.path.abspath(path), time.time()) |
| 1394 | + |
| 1395 | + |
| 1396 | +def _read_file_with_cache(path: str) -> Optional[List[str]]: |
| 1397 | + """ |
| 1398 | + Read file contents with caching support. |
| 1399 | + |
| 1400 | + Args: |
| 1401 | + path: Path to file to read |
| 1402 | + |
| 1403 | + Returns: |
| 1404 | + List of lines from the file, or None if file cannot be read |
| 1405 | + """ |
| 1406 | + global _content_cache, _content_cache_stats |
| 1407 | + |
| 1408 | + cache_key = _get_file_cache_key(path) |
| 1409 | + |
| 1410 | + # Check if we have cached content for this file |
| 1411 | + if cache_key in _content_cache: |
| 1412 | + _content_cache_stats["hits"] += 1 |
| 1413 | + return _content_cache[cache_key] |
| 1414 | + |
| 1415 | + # Cache miss - read the file |
| 1416 | + _content_cache_stats["misses"] += 1 |
| 1417 | + |
| 1418 | + try: |
| 1419 | + with open(path, 'r', encoding='utf-8', errors='ignore') as f: |
| 1420 | + lines = f.readlines() |
| 1421 | + |
| 1422 | + # Cache the content if we're under the size limit |
| 1423 | + if len(_content_cache) < _content_cache_stats["max_size"]: |
| 1424 | + _content_cache[cache_key] = lines |
| 1425 | + else: |
| 1426 | + # Evict old entries and add new one |
| 1427 | + _evict_content_cache_if_needed() |
| 1428 | + _content_cache[cache_key] = lines |
| 1429 | + |
| 1430 | + return lines |
| 1431 | + except Exception: |
| 1432 | + return None |
| 1433 | + |
| 1434 | + |
1331 | 1435 | def get_refined_search_terms(prompt: str, matches: List[Dict[str, Any]],
|
1332 | 1436 | max_terms: int = 10, extensions: Optional[List[str]] = None,
|
1333 | 1437 | context_lines: int = 3, provider: str = "anthropic",
|
@@ -1465,9 +1569,21 @@ def get_refined_search_terms(prompt: str, matches: List[Dict[str, Any]],
|
1465 | 1569 | parser.add_argument("--workers", type=int, help="Number of parallel workers")
|
1466 | 1570 | parser.add_argument("--provider", choices=["anthropic", "openai", "azure"], default="anthropic", help="AI provider to use")
|
1467 | 1571 | parser.add_argument("--single-line", action="store_true", help="Disable multi-line regex mode (uses single-line mode)")
|
| 1572 | + parser.add_argument("--cache-size", type=int, default=1000, help="Maximum number of files to cache in memory (default: 1000)") |
| 1573 | + parser.add_argument("--clear-cache", action="store_true", help="Clear both file list and content caches before searching") |
| 1574 | + parser.add_argument("--cache-stats", action="store_true", help="Show cache statistics after search") |
1468 | 1575 | args = parser.parse_args()
|
1469 | 1576 |
|
1470 | 1577 | try:
|
| 1578 | + # Handle cache management |
| 1579 | + if args.clear_cache: |
| 1580 | + clear_file_cache() |
| 1581 | + clear_content_cache() |
| 1582 | + print("Cleared file list and content caches") |
| 1583 | + |
| 1584 | + # Set content cache size |
| 1585 | + set_content_cache_max_size(args.cache_size) |
| 1586 | + |
1471 | 1587 | # Get search terms and anti-patterns
|
1472 | 1588 | search_terms, ai_anti_patterns = get_search_terms_from_prompt(
|
1473 | 1589 | args.prompt,
|
@@ -1531,6 +1647,16 @@ def get_refined_search_terms(prompt: str, matches: List[Dict[str, Any]],
|
1531 | 1647 | # Chat about results if enabled
|
1532 | 1648 | if not args.no_chat and matches:
|
1533 | 1649 | chat_about_matches(matches, args.prompt, args.provider)
|
| 1650 | + |
| 1651 | + # Show cache statistics if requested |
| 1652 | + if args.cache_stats: |
| 1653 | + stats = get_content_cache_stats() |
| 1654 | + print(f"\nContent Cache Statistics:") |
| 1655 | + print(f" Cache size: {stats['cache_size']} files") |
| 1656 | + print(f" Cache hits: {stats['hits']}") |
| 1657 | + print(f" Cache misses: {stats['misses']}") |
| 1658 | + print(f" Hit rate: {stats['hit_rate']:.2%}") |
| 1659 | + print(f" Max cache size: {stats['max_size']}") |
1534 | 1660 |
|
1535 | 1661 | except Exception as e:
|
1536 | 1662 | print(f"Error: {type(e).__name__}: {e}")
|
|
0 commit comments