Test Kpu 🎯

print(f"Tensor core feature test complete") print(f"Time for 100x 4096x4096 FP16 matmuls: {elapsed:.3f}s") print(f"Throughput: {(4096**3 * 2 * 100) / elapsed / 1e12:.2f} TFLOPS") test_tensor_core_feature()

# Timed test start = time.time() for _ in range(100): c = torch.mm(a, b) torch.cuda.synchronize() elapsed = time.time() - start

import torch import time def test_tensor_core_feature(): # Check if CUDA and tensor cores are available (Volta+ GPUs) if not torch.cuda.is_available(): print("CUDA not available") return

# Large matrix multiplication to utilize tensor cores a = torch.randn(4096, 4096, device=device, dtype=dtype) b = torch.randn(4096, 4096, device=device, dtype=dtype)

Do you have a question?

Our team is happy to advise you personally.

╳

Event ID* Order ID* First Name* Last Name* Email* Subject* Message* \nIf you have a support question, please add your event ID!

I have read the data privacy terms.

Test Kpu 🎯

Contact us!