Skip to content

Conversation

@divinity76
Copy link

@divinity76 divinity76 commented May 7, 2025

Caching it is about 60 times faster than repeatedly calling it in my test:

$ python3 test.py 
CUDA is available
Benchmarking torch.cuda.is_available()...
     Iters     Uncached (s)    Cached (s)   Speedup
----------------------------------------------------
      1000         0.000576      0.000010     57.95x
     10000         0.005846      0.000097     60.43x
    100000         0.057846      0.000881     65.65x
   1000000         0.578599      0.008649     66.89x

$ cat test.py 
import torch
import time
import argparse

def benchmark_uncached(num_iters: int) -> float:
    """Call torch.cuda.is_available() on every iteration."""
    start = time.perf_counter()
    for _ in range(num_iters):
        torch.cuda.is_available()
    return time.perf_counter() - start

def benchmark_cached(num_iters: int) -> float:
    """Call torch.cuda.is_available() once, then re-use the boolean."""
    available = torch.cuda.is_available()
    start = time.perf_counter()
    for _ in range(num_iters):
        available  # just read the local variable
    return time.perf_counter() - start

def main():
    p = argparse.ArgumentParser(description="Benchmark torch.cuda.is_available()")
    p.add_argument(
        "--iters", "-n",
        type=int,
        nargs="*",
        default=[10**3, 10**4, 10**5, 10**6],
        help="List of iteration counts to test (default: 1e3,1e4,1e5,1e6)"
    )
    args = p.parse_args()

    print(f"{'Iters':>10}  {'Uncached (s)':>15}  {'Cached (s)':>12}  {'Speedup':>8}")
    print("-" * 52)
    for n in args.iters:
        t_unc = benchmark_uncached(n)
        t_cac = benchmark_cached(n)
        speedup = t_unc / t_cac if t_cac > 0 else float('inf')
        print(f"{n:10d}  {t_unc:15.6f}  {t_cac:12.6f}  {speedup:8.2f}x")

if __name__ == "__main__":
    if torch.cuda.is_available():
        print("CUDA is available")
    else:
        print("CUDA is not available")
    print("Benchmarking torch.cuda.is_available()...")
    main()

Caching it is about 60 times faster than calling it in my test:
$ python3 test.py 
CUDA is available
Benchmarking torch.cuda.is_available()...
     Iters     Uncached (s)    Cached (s)   Speedup
----------------------------------------------------
      1000         0.000576      0.000010     57.95x
     10000         0.005846      0.000097     60.43x
    100000         0.057846      0.000881     65.65x
   1000000         0.578599      0.008649     66.89x

$ cat test.py 
import torch
import time
import argparse

def benchmark_uncached(num_iters: int) -> float:
    """Call torch.cuda.is_available() on every iteration."""
    start = time.perf_counter()
    for _ in range(num_iters):
        torch.cuda.is_available()
    return time.perf_counter() - start

def benchmark_cached(num_iters: int) -> float:
    """Call torch.cuda.is_available() once, then re-use the boolean."""
    available = torch.cuda.is_available()
    start = time.perf_counter()
    for _ in range(num_iters):
        available  # just read the local variable
    return time.perf_counter() - start

def main():
    p = argparse.ArgumentParser(description="Benchmark torch.cuda.is_available()")
    p.add_argument(
        "--iters", "-n",
        type=int,
        nargs="*",
        default=[10**3, 10**4, 10**5, 10**6],
        help="List of iteration counts to test (default: 1e3,1e4,1e5,1e6)"
    )
    args = p.parse_args()

    print(f"{'Iters':>10}  {'Uncached (s)':>15}  {'Cached (s)':>12}  {'Speedup':>8}")
    print("-" * 52)
    for n in args.iters:
        t_unc = benchmark_uncached(n)
        t_cac = benchmark_cached(n)
        speedup = t_unc / t_cac if t_cac > 0 else float('inf')
        print(f"{n:10d}  {t_unc:15.6f}  {t_cac:12.6f}  {speedup:8.2f}x")

if __name__ == "__main__":
    if torch.cuda.is_available():
        print("CUDA is available")
    else:
        print("CUDA is not available")
    print("Benchmarking torch.cuda.is_available()...")
    main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant