Custom kernels unused bits
Option 2:
@cp.fuse()
def primeFactorizationSum(n):
num = n
total = 0
if num % 2 == 0:
count = 0
while num % 2 == 0:
num //= 2
count += 1
total += count * 2
i = 3
while i * i <= num:
if num % i == 0:
count = 0
while num % i == 0:
num //= i
count += 1
total += count * i
i += 2
if num > 1:
total += num
return total
n = 1_000_000
A = cp.empty(n - 1, dtype=cp.int64)
exec_time = timeit.timeit(
"for i in range(0, n - 1): A[i] = primeFactorizationSum(i + 2)",
number=20,
globals=globals()
)
print(benchmark(primeFactorizationSum, (n,), n_repeat=20))
print(f"Execution in {round(exec_time, 3)} s")
print(f"\nPrimes sum array: {A}")General concepts
Inputs and output format: type + name.
Examples:
float32 a # NumPy data types can be used
T x # T = generic typeElementwise kernels
<kernel name> = cp.ElementwiseKernel(
'<list of inputs>',
'<list of outputs>',
'<operation to perform>',
'<kernel name>'
)
Example:
squared_diff = cp.ElementwiseKernel(
'float32 x, float32 y',
'float32 z',
'z = (x - y) * (x - y)',
'squared_diff'
)