init: Beep Boop

master
qhga 3 years ago
commit 6e15d25210
Signed by: phga
GPG Key ID: 5249548AA705F019

@ -0,0 +1 @@
((nil . ((projectile-project-install-cmd . "mvn -B clean compile assembly:single && java -jar target/*.jar"))))

4
.gitignore vendored

@ -0,0 +1,4 @@
target/
.settings/
.project
.classpath

@ -0,0 +1,26 @@
# How to run
Right now, I am to lazy to create an appropriate cli. Therefore, it is that hacky. Sorry...
```sh
# Device selected by Devices.java (One has to know the selection in advance though...)
# DEFAULT = 1
DEVICE=1
# The different targets in App.java
# 0 = Informational output
# 10 = All PrimeNumber related implementations at once
# 20 = All Reduce related implementations at once
# 21 - 25 = Reduce1 - Reduce5 respectively
# 30 = All Prefix related implementations at once
# 31 - 32 = Prefix1 - Prefix2 respectively
# DEFAULT = 0
TARGET=31
# How many shifts for N (e.g. if N should be 8, N_LSHIFTS should be 3)
# Targes 2X and 3X depend on this parameter
# DEFAULT = 25
N_LSHIFTS=3
mvn -B clean compile assembly:single && java -jar target/*.jar $DEVICE $TARGET $N_LSHIFTS
```

@ -0,0 +1,103 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>edu.thi.phga</groupId>
<artifactId>aparapi-test</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>aparapi-test</name>
<!-- FIXME change it to the project's website -->
<url>http://www.example.com</url>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/com.aparapi/aparapi -->
<dependency>
<groupId>com.aparapi</groupId>
<artifactId>aparapi</artifactId>
<version>3.0.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<!-- clean lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#clean_Lifecycle -->
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.1.0</version>
</plugin>
<!-- default lifecycle, jar packaging: see https://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.22.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>edu.thi.phga.aparapi_test.App</mainClass>
</manifest>
</archive>
</configuration>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>edu.thi.phga.aparapi_test.App</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
<!-- site lifecycle, see https://maven.apache.org/ref/current/maven-core/lifecycles.html#site_Lifecycle -->
<plugin>
<artifactId>maven-site-plugin</artifactId>
<version>3.7.1</version>
</plugin>
<plugin>
<artifactId>maven-project-info-reports-plugin</artifactId>
<version>3.0.0</version>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>

@ -0,0 +1,100 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.device.OpenCLDevice;
public class App {
public static int choice;
public static OpenCLDevice device;
private static void printHeader(final String txt) {
final String spacer =
"######################################################################";
System.out.printf("%s\n%s\n", spacer, txt);
}
public static void main( String[] args ) {
// System.out.println("Running the OpenCL Tasks");
if (args.length == 0) {
App.choice = 0;
} else {
App.choice = Integer.parseInt(args[0]) - 1;
}
App.device = Devices.selectDevice();
int target = 0;
if (args.length > 1) {
target = Integer.parseInt(args[1]);
}
int shift = 25;
if (args.length > 2) {
shift = Integer.parseInt(args[2]);
}
final int[] b = new int[1 << shift];
switch (target) {
case 0:
OpenCLGetMemoryInfo.getInfo();
OpenCLSizeTest.start(8);
break;
// PRIMES
case 10:
printHeader("FindPrimes (Seriell, Parallel, OpenCL)");
// Ab 1 << 14 stimmt es nicht mehr
// final int[] a = {8, 5, 6, 2, 3, 7, 1, 4};
// CPU is faster than GPU...
FindPrimes.start();
FindPrimesThreads.start();
FindPrimesOpenCL.start();
break;
// REDUCE
case 20:
java.util.Arrays.fill(b, 1);
OpenCLReduce1.start(b);
java.util.Arrays.fill(b, 1);
OpenCLReduce2.start(b);
java.util.Arrays.fill(b, 1);
OpenCLReduce3.start(b);
java.util.Arrays.fill(b, 1);
OpenCLReduce4.start(b);
java.util.Arrays.fill(b, 1);
OpenCLReduce5.start(b);
break;
case 21:
java.util.Arrays.fill(b, 1);
OpenCLReduce1.start(b);
break;
case 22:
java.util.Arrays.fill(b, 1);
OpenCLReduce2.start(b);
break;
case 23:
java.util.Arrays.fill(b, 1);
OpenCLReduce3.start(b);
break;
case 24:
java.util.Arrays.fill(b, 1);
OpenCLReduce4.start(b);
break;
case 25:
java.util.Arrays.fill(b, 1);
OpenCLReduce5.start(b);
break;
// PREFIX
case 30:
// Up to 1 << 27
java.util.Arrays.fill(b, 1);
OpenCLPrefix1.start(b);
// Up to 1 << 28
java.util.Arrays.fill(b, 1);
OpenCLPrefix2.start(b);
break;
case 31:
// Up to 1 << 27
java.util.Arrays.fill(b, 1);
OpenCLPrefix1.start(b);
break;
case 32:
// Up to 1 << 28
java.util.Arrays.fill(b, 1);
OpenCLPrefix2.start(b);
break;
}
}
}

@ -0,0 +1,49 @@
package edu.thi.phga.aparapi_test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.aparapi.device.OpenCLDevice;
import com.aparapi.internal.opencl.OpenCLPlatform;
public class Devices {
public static OpenCLDevice selectDevice() {
// Informationen über alle verfügbaren OpenCL-Implementierungen ausgeben
class Processor { // Hilfsklasse
String name, version, typ;
OpenCLDevice device;
Processor(OpenCLPlatform platform, OpenCLDevice device) {
this.device = device;
name = platform.getName();
Matcher m = Pattern.compile("\\d.\\d").matcher(platform.getVersion());
version = m.find() ? m.group() : "";
typ = device.getType().name();
}
@Override public String toString() {
return typ + " - OpenCL " + version + " - " + name;
}
}
// erzeuge alle Platform-Device-Kombinationen
List<Processor> processors = new ArrayList<>();
for (var platform : OpenCLPlatform.getUncachedOpenCLPlatforms())
for (var device : platform.getOpenCLDevices())
processors.add(new Processor(platform, device));
for (int i = 0; i < processors.size(); i++)
System.out.println(i + 1 + ") " + processors.get(i));
var processor = processors.get(App.choice);
System.out.println("\n" + processor + "\n");
return processor.device;
}
}

@ -0,0 +1,35 @@
package edu.thi.phga.aparapi_test;
public class FindPrimes {
private static final int N = 10_000_000;
private static int START = 123_456_789;
private static boolean[] istPrime = new boolean[N];
public static void start() {
long t1 = System.nanoTime();
primeTest();
long t2 = System.nanoTime();
int a = 0;
for (var b : istPrime) {
if (b) {
a++;
}
}
System.out.println("Single: " + (t2 - t1) / 1000000 + " ms: " + a);
}
private static void primeTest() {
for (int z = START, i = 0; i < N; z += 2, i++) {
int teiler = 3;
while(z > teiler * teiler && z % teiler != 0){
teiler += 2;
}
istPrime[i] = z % teiler != 0;
}
}
}

@ -0,0 +1,25 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.Kernel;
public class FindPrimesKernel extends Kernel {
private int start;
private boolean[] istPrime;
public FindPrimesKernel(int start, boolean[] istPrime) {
this.start = start;
this.istPrime = istPrime;
}
@Override public void run() {
int index = getGlobalId(0);
int zahl = start + index * 2;
int teiler = 3;
while (teiler * teiler < zahl && zahl % teiler != 0) {
teiler += 2;
}
istPrime[index] = zahl % teiler != 0;
}
}

@ -0,0 +1,28 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.Kernel;
import com.aparapi.Range;
public class FindPrimesOpenCL {
private static final int N = 10_000_000;
private static final int START = 123_456_789;
private static boolean[] istPrime = new boolean[N];
public static void start() {
//
Range r = Range.create(Devices.selectDevice(), N, 250);
Kernel k = new FindPrimesKernel(START, istPrime);
k.execute(r);
k.execute(r);
int a = 0;
for (var b : istPrime) {
if (b) {
a++;
}
}
System.out.println("OpenCL: " + k.getExecutionTime() + " ms: " + a);
}
}

@ -0,0 +1,69 @@
package edu.thi.phga.aparapi_test;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class FindPrimesThreads {
private static final int N = 10_000_000;
private static final int P = Runtime.getRuntime().availableProcessors();
private static int START = 123_456_789;
private static boolean[] istPrime = new boolean[N];
public static void start() {
long t1 = System.nanoTime();
primeTest();
long t2 = System.nanoTime();
int a = 0;
for (var b : istPrime) {
if (b) {
a++;
}
}
System.out.println("Threads(" + P + "): " + (t2 - t1) / 1000000 + " ms: " + a);
}
private static void primeTest() {
// Taskliste anlegen
List<Callable<Object>> tasks = IntStream
.range(0, P)
.mapToObj(Task::new)
.map(Executors::callable)
.collect(Collectors.toList());
try {
Executors.newCachedThreadPool().invokeAll(tasks);
} catch (InterruptedException e) {
}
}
private static class Task implements Runnable {
private static final int SPANNE = N / P;
private int index;
Task(int index) {
this.index = index;
}
@Override
public void run() {
int start = START + index * 2 * SPANNE;
for (int z = start, i = 0; i < SPANNE; z += 2, i++) {
int teiler = 3;
while(z > teiler * teiler && z % teiler != 0){
teiler += 2;
}
istPrime[index * SPANNE + i] = z % teiler != 0;
}
}
}
}

@ -0,0 +1,15 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.device.Device;
public class OpenCLGetMemoryInfo {
public static void getInfo() {
int localMem = (int) App.device.getLocalMemSize();
int maxWGSize = App.device.getMaxWorkGroupSize();
int localSize = Math.min(maxWGSize, localMem / 4);
System.out.printf("Memsize: %d, Max-WGSize: %d, LocalSize: %d\n",
localMem, maxWGSize, localSize);
}
}

@ -0,0 +1,77 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.Kernel;
import com.aparapi.Range;
public class OpenCLPrefix1 {
public static void start(final int[] a) {
int N = a.length;
int b[] = new int[N * 2];
java.util.Arrays.fill(b, 1);
Range r = Range.create(App.device, N, 4);
OpenCLPrefix1Kernel k = new OpenCLPrefix1Kernel(b);
k.setExplicit(true);
k.setStepSize(1);
k.execute(r);
k.toggleIndex();
double ct = k.getConversionTime();
// k.get(b);
// for (int i = 0; i < b.length; i++) {
// System.out.printf("%c[%d] = %d\n", (i < b.length / 2) ? 'A' : 'B', i, b[i]);
// }
for (int n = 2; n < N; n *= 2) {
k.setStepSize(n);
k.execute(r);
k.toggleIndex();
// k.get(b);
// for (int i = 0; i < b.length; i++) {
// System.out.printf("%c[%d] = %d\n", (i < b.length / 2) ? 'A' : 'B', i, b[i]);
// }
}
k.get(b);
double et = k.getAccumulatedExecutionTime() - ct;
int n = N + k.getIndex();
System.out.printf("P1 - GOT: %d, %d, %d, TIME: %.2f ms\n",
b[n - 3], b[n - 2], b[n - 1], et);
}
private static class OpenCLPrefix1Kernel extends Kernel {
private int stepSize;
private int index;
private int N;
private int[] a;
public OpenCLPrefix1Kernel(int[] a) {
this.a = a;
this.N = a.length / 2;
}
public void setStepSize(int s) {
this.stepSize = s;
}
public void toggleIndex() {
index = N - index;
}
public int getIndex() {
return index;
}
@Override
public void run() {
int i = getGlobalId();
int out = N - index;
if (i < stepSize) {
a[out + i] = a[index + i];
} else {
a[out + i] = a[index + i] + a[index + i - stepSize];
}
}
}
}

@ -0,0 +1,112 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.Kernel;
import com.aparapi.Range;
public class OpenCLPrefix2 {
/**
Parallel Prefix implementation with 2 arrays instead
of one array twice the size of the requested input.
Accumulated execution time was equally good.
Works up to 1 << 28 before OutOfMemoryError
Rewrite of the original idea by: Prof. Dr. Schmidt <Ulrich.Schmidt@thi.de>
*/
public static void start(int[] a) {
int N = a.length;
int b[] = new int[N];
java.util.Arrays.fill(b, 1);
Range r = Range.create(App.device, N, 4);
OpenCLPrefix2Kernel k = new OpenCLPrefix2Kernel(a, b);
k.setExplicit(true);
k.setStepSize(1);
k.execute(r);
k.toggleIndex();
double ct = k.getConversionTime();
// Uncomment to get Debug output (Try with 1 << 3)
// k.get(a);
// k.get(b);
// System.out.println(k.getIndex());
// for (int i = 0; i < b.length; i++) {
// System.out.printf("a[%d] = %d\n", i, a[i]);
// }
// for (int i = 0; i < b.length; i++) {
// System.out.printf("b[%d] = %d\n", i, b[i]);
// }
for (int n = 2; n < N; n *= 2) {
k.setStepSize(n);
k.execute(r);
k.toggleIndex();
// Uncomment to get Debug output (Try with 1 << 3)
// k.get(a);
// k.get(b);
// System.out.println(k.getIndex());
// for (int i = 0; i < b.length; i++) {
// System.out.printf("a[%d] = %d\n", i, a[i]);
// }
// for (int i = 0; i < b.length; i++) {
// System.out.printf("b[%d] = %d\n", i, b[i]);
// }
}
double et = k.getAccumulatedExecutionTime() - ct;
// Last write was to A
if (k.getIndex() > 0) {
k.get(a);
// Last write was to B
} else {
k.get(b);
a = b;
}
System.out.printf("P2 - GOT: %d, %d, %d, TIME: %.2f ms\n",
a[N - 3], a[N - 2], a[N - 1], et);
}
private static class OpenCLPrefix2Kernel extends Kernel {
private int stepSize;
private int index = 1;
private int[] a, b;
public OpenCLPrefix2Kernel(int[] a, int[] b) {
this.a = a;
this.b = b;
}
public void setStepSize(int s) {
this.stepSize = s;
}
public void toggleIndex() {
index = -index;
}
public int getIndex() {
return index;
}
@Override
public void run() {
int i = getGlobalId();
// A -> B
if (index > 0) {
if (i < stepSize) {
b[i] = a[i];
} else {
b[i] = a[i] + a[i - stepSize];
}
// B -> A
} else {
if (i < stepSize) {
a[i] = b[i];
} else {
a[i] = b[i] + b[i - stepSize];
}
}
}
}
}

@ -0,0 +1,40 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.Kernel;
import com.aparapi.Range;
public class OpenCLReduce1 {
public static void start(final int[] a) {
int optimalSize = a.length / 2;
int[] gs = new int[optimalSize];
Kernel k = new Kernel() {
@Override
public void run() {
int i = getGlobalId();
int size = getGlobalSize();
gs[i] = size;
for (int s = 1; s <= size; s *= 2) {
if (i % s == 0) {
a[2 * i] += a[2 * i + s];
}
}
}
};
k.execute(Range.create(optimalSize, optimalSize <= 256 ? optimalSize : 256));
double ct = k.getConversionTime();
double et = k.getExecutionTime() - ct;
// if (a.length <= 64) {
// for (int x = 0; x < optimalSize; x++) {
// System.out.printf("GI: %d, GSize: %d\n", x, gs[x]);
// }
// } else {
// System.out.printf("GI: %d, GSize: %d\n", 0, gs[0]);
// System.out.printf("GI: %d, GSize: %d\n", optimalSize - 1, gs[optimalSize - 1]);
// }
System.out.printf("R1 - WANT: %d, GOT: %d, TIME: %.2f ms\n", a.length, a[0], et);
}
}

@ -0,0 +1,35 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.Kernel;
import com.aparapi.Range;
public class OpenCLReduce2 {
public static void start(int[] a) {
int optimalSize = a.length / 2;
int N = a.length;
int[] gs = new int[optimalSize];
Kernel k = new Kernel() {
@Override
public void run() {
int size = N / getGlobalSize(); // 2, 4, 8, ...
int i = getGlobalId() * size; // 2: 0 -> 0, 1 -> 2, 2 -> 4, 3 -> 6, ...
gs[getGlobalId()] = size;
a[i] += a[i + size / 2]; // 1, 2, 4, ..
}
};
k.execute(Range.create(N / 2, optimalSize < 256 ? optimalSize : 256));
double ct = k.getConversionTime();
for (int n = N / 4; n >= 1; n /= 2) {
k.execute(Range.create(n, n < 256 ? n : 256));
}
double et = k.getAccumulatedExecutionTime() - ct;
System.out.printf("R2 - WANT: %d, GOT: %d, TIME: %.2f ms\n", a.length, a[0], et);
}
}

@ -0,0 +1,51 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.Kernel;
import com.aparapi.Range;
public class OpenCLReduce3 {
public static void start(int[] a) {
int optimalSize = a.length / 2;
int N = a.length;
int[] gs = new int[optimalSize];
Kernel k = new Kernel() {
@Override
public void run() {
int size = N / getGlobalSize(); // 2, 4, 8, ...
int i = getGlobalId() * size; // 2: 0 -> 0, 1 -> 2, 2 -> 4, 3 -> 6, ...
gs[getGlobalId()] = size;
a[i] += a[i + size / 2]; // 1, 2, 4, ..
}
};
// Manually manage variables that are accessible and returned by kernel
// Anything that is passed to the constructor is still automatically pushed once
// to the Memory on the gpu. This is done to reduce the amount of times
// the data has to be transferred to the gpu memory via slow pci bus
k.setExplicit(true);
try {
System.out.printf("MAX WG SIZE: %d\n", k.getKernelMaxWorkGroupSize(App.device));
} catch(Exception e) {
}
// Not necessary (So maybe anything used by the kernel is put once?)
// k.put(a);
k.execute(Range.create(N / 2, optimalSize < 256 ? optimalSize : 256));
double ct = k.getConversionTime();
for (int n = N / 4; n >= 1; n /= 2) {
k.execute(Range.create(n, n < 256 ? n : 256));
}
k.get(a);
double et = k.getAccumulatedExecutionTime() - ct;
System.out.printf("R3 - WANT: %d, GOT: %d, TIME: %.2f ms\n", a.length, a[0], et);
}
}

@ -0,0 +1,51 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.Kernel;
import com.aparapi.Range;
public class OpenCLReduce4 {
public static void start(int[] a) {
int optimalSize = a.length / 2;
int N = a.length;
int[] gs = new int[optimalSize];
Kernel k = new Kernel() {
@Override
public void run() {
int size = getGlobalSize(); // 2, 4, 8, ...
int i = getGlobalId(); // 2: 0 -> 0, 1 -> 2, 2 -> 4, 3 -> 6, ...
gs[getGlobalId()] = size;
a[i] += a[i + size]; // 1, 2, 4, ..
}
};
// Manually manage variables that are accessible and returned by kernel
// Anything that is passed to the constructor is still automatically pushed once
// to the Memory on the gpu. This is done to reduce the amount of times
// the data has to be transferred to the gpu memory via slow pci bus
k.setExplicit(true);
try {
System.out.printf("MAX WG SIZE: %d\n", k.getKernelMaxWorkGroupSize(App.device));
} catch(Exception e) {
}
// Not necessary (So maybe anything used by the kernel is put once?)
// k.put(a);
k.execute(Range.create(N / 2, optimalSize < 256 ? optimalSize : 256));
double ct = k.getConversionTime();
for (int n = N / 4; n >= 1; n /= 2) {
k.execute(Range.create(n, n < 256 ? n : 256));
}
k.get(a);
double et = k.getAccumulatedExecutionTime() - ct;
System.out.printf("R4 - WANT: %d, GOT: %d, TIME: %.2f ms\n", a.length, a[0], et);
}
}

@ -0,0 +1,52 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.Range;
public class OpenCLReduce5 {
public static void start(int[] a) {
int N = a.length;
int localMem = (int) App.device.getLocalMemSize();
int maxWGSize = Math.min(App.device.getMaxWorkGroupSize(), 256);
int localSize = Math.min(maxWGSize, localMem / 4);
int globalSize = Math.min(N, 2 * localSize * localSize);
localSize = Math.min(localSize, globalSize);
// These have to be created on the host side for the kernel to access them later
// int[] a = new int[N] Folie 180 das input array
int[] result = new int[2 * localSize]; // Folie 180
int[] summe = { 0 }; // Folie 180
OpenCLReduce5Kernel k = new OpenCLReduce5Kernel(a, localSize, result, summe);
try {
System.out.printf("KERNEL MAX WG SIZE: %d\n", k.getKernelMaxWorkGroupSize(App.device));
} catch(Exception e) {
}
// Manually manage variables that are accessible and returned by kernel
// Anything that is passed to the constructor is still automatically pushed once
// to the Memory on the gpu. This is done to reduce the amount of times
// the data has to be transferred to the gpu memory via slow pci bus
k.setExplicit(true);
Range r1 = Range.create(App.device, globalSize, localSize);
Range r2 = Range.create(App.device, localSize, localSize);
// Not necessary (So maybe anything used by the kernel is put once?)
// k.put(a);
k.execute(r1);
double ct = k.getConversionTime();
k.setStep(2);
k.execute(r2);
k.get(summe);
double et = k.getAccumulatedExecutionTime() - ct;
System.out.printf("R5 - WANT: %d, GOT: %d, TIME: %.2f ms\n", a.length, summe[0], et);
}
}

@ -0,0 +1,76 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.Kernel;
public class OpenCLReduce5Kernel extends Kernel {
private int[] a, result, summe;
private int step = 1;
@Local
private int[] scratch;
public OpenCLReduce5Kernel(int[] a, int localSize, int[] result, int[] summe) {
this.a = a;
this.result = result;
this.summe = summe;
scratch = new int[localSize];
}
@Override
public void run() {
if (this.step == 1) {
step1();
} else {
step2();
}
}
public void setStep(int step) {
this.step = step;
}
private void step1() {
int globalId = getGlobalId();
int localId = getLocalId();
int globalSize = getGlobalSize();
int sum = 0;
for (int i = globalId; i < a.length; i += globalSize) {
sum += a[i];
}
scratch[localId] = sum;
localBarrier();
for (int s = getLocalSize() / 2; s > 0; s /= 2) {
if (localId < s) {
scratch[localId] += scratch[localId + s];
}
localBarrier();
}
if (localId == 0) {
result[getGroupId()] = scratch[0];
}
}
// Verarbeitung der einzelnen Elemente in result[]
// Erst wieder einmal sequenziell, dann parallel
private void step2() {
int localId = getLocalId();
int localSize = getLocalSize();
scratch[localId] = result[localId] + result[localId + localSize];
localBarrier(); // Wait for all to finish (locally)
for (int s = localSize / 2; s > 0; s /= 2) {
if (localId < s) {
scratch[localId] += scratch[localId + s];
}
localBarrier();
}
if (localId == 0) {
summe[0] = scratch[0];
}
}
}

@ -0,0 +1,47 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.Kernel;
import com.aparapi.Range;
public class OpenCLSizeTest {
public static void start(int n) {
final int N = n;
int[] res = new int[N];
int[] gids = new int[N];
int[] lids = new int[N];
int[] gsizes = new int[N];
int[] lsizes = new int[N];
new Kernel() {
@Override
public void run() {
int i = getGlobalId();
int gi = getGlobalId();
int li = getLocalId();
int gs = getGlobalSize();
int ls = getLocalSize();
gids[i] = gi;
lids[i] = li;
gsizes[i] = gs;
lsizes[i] = ls;
res[i] = 10 + i;
}
}.execute(Range.create(N, 4));
if (N < 3000) {
for (int x = 0; x < N; x++) {
System.out.printf("GI: %d, LI: %d, GSize: %d, LSize: %d, RES: %d\n",
gids[x], lids[x], gsizes[x], lsizes[x], res[x]);
}
} else {
System.out.printf("GI: %d, LI: %d, GSize: %d, LSize: %d, RES: %d\n",
gids[0], lids[0], gsizes[0], lsizes[0], res[0]);
System.out.printf("GI: %d, LI: %d, GSize: %d, LSize: %d, RES: %d\n",
gids[N - 1], lids[N - 1], gsizes[N - 1], lsizes[N - 1], res[N - 1]);
}
}
}

@ -0,0 +1,46 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.Kernel;
import com.aparapi.Range;
import com.aparapi.device.Device;
public class VAdd {
public VAdd() {
// System.setProperty("com.aparapi.enableShowGeneratedOpenCL", "true");
int[] a = {1, 2, 3, 4};
int[] b = {5, 6, 7, 8};
int[] c = new int[a.length];
Device device = Devices.selectDevice();
Range r = Range.create(device, a.length);
Kernel vak = new VAddKernel(a, b, c);
vak.execute(r); // Execution erzwingen um ConversionTime zu erhalten
final double ctime = vak.getConversionTime();
final double etime = vak.getExecutionTime();
System.out.println("Conversion Time: " + ctime + " ms");
System.out.println("Execution Time: " + etime + " ms");
System.out.println("Execution w/o Conversion Time: " + (etime - ctime) + " ms");
vak.execute(r);
final double etime2 = vak.getExecutionTime();
System.out.println("Execution Time(2): " + etime2 + " ms");
for (int i = 0; i < a.length; i++)
System.out.printf("%d + %d = %2d\n", a[i], b[i], c[i]);
}
// CPU
// Conversion Time: 309.714392 ms
// Execution Time: 310.215247 ms
// Execution w/o Conversion Time: 0.5008550000000014 ms
// Execution Time(2): 0.103835 ms
// GPU
// Conversion Time: 249.313076 ms
// Execution Time: 250.076624 ms
// Execution w/o Conversion Time: 0.7635480000000143 ms
// Execution Time(2): 0.075589 ms
}

@ -0,0 +1,19 @@
package edu.thi.phga.aparapi_test;
import com.aparapi.Kernel;
public class VAddKernel extends Kernel {
private int[] a, b, c;
public VAddKernel(int[] a, int[] b, int[] c) {
this.a = a;
this.b = b;
this.c = c;
}
@Override
public void run() {
int i = getGlobalId(0);
c[i] = a[i] + b[i];
}
}

@ -0,0 +1,20 @@
package edu.thi.phga.aparapi_test;
import static org.junit.Assert.assertTrue;
import org.junit.Test;
/**
* Unit test for simple App.
*/
public class AppTest
{
/**
* Rigorous Test :-)
*/
@Test
public void shouldAnswerWithTrue()
{
assertTrue( true );
}
}
Loading…
Cancel
Save