#include #include #include #include #include #include #include #include #include #include #include #include static const char* sumShader = "#version 310 es\n" "precision lowp float;\n" "layout(local_size_x = %d) in;\n" "layout(std430) buffer;\n" "layout(binding = 0) buffer Input0\n" "{\n" " float elements[];\n" "} input_data0;\n" "layout(binding = 1) readonly buffer Input1\n" "{\n" " float elements[];\n" "} input_data1;\n" "layout(binding = 2) writeonly buffer Output\n" "{\n" " float elements[];\n" "} output_data;\n" "void main()\n" "{\n" " int idx = int(gl_LocalInvocationID.x);\n" " int i, j, k, l, m, n, o, p, q;\n" " m = %d;\n" " l = input_data0.elements.length();\n" " i = 0;\n" " while ((j = (i * m + idx)) < l) {\n" " input_data0.elements[j] = input_data0.elements[j] * input_data1.elements[j];\n" " i++;\n" " }\n" " barrier();\n" " j = 1;\n" " k = 2;\n" " while (j < l) {\n" " i = 0;\n" " p = idx * k;\n" " q = m * k;\n" " n = p;\n" " o = n + j;\n" " while (o < l) {\n" " input_data0.elements[n] = input_data0.elements[n] + input_data0.elements[o];\n" " i = i + q;\n" " n = i + p;\n" " o = n + j;\n" " }\n" " j = k;\n" " k = k * 2;\n" " barrier();\n" " }\n" " if (idx == 0)\n" " output_data.elements[0] = input_data0.elements[0];\n" "}\n"; int main(int argc, char **argv) { float *A; float *B; float *C; float sum; int i, j, k; struct timeval now; double a, b; GLint status; GLint length; char *log; GLuint BO[3]; float *data; char *sumShaderString; int threads; int32_t fd = open ("/dev/dri/renderD128", O_RDWR); if (fd <= 0) return -3; struct gbm_device *gbm = gbm_create_device (fd); if (gbm == NULL) return -4; EGLDisplay dpy = eglGetPlatformDisplay (EGL_PLATFORM_GBM_MESA, gbm, NULL); if (dpy == NULL) return -5; EGLBoolean returnValue = eglInitialize(dpy, NULL, NULL); if (returnValue != EGL_TRUE) { printf("eglInitialize failed\n"); return 0; } EGLConfig cfg; EGLint count; EGLint s_configAttribs[] = { EGL_RENDERABLE_TYPE, EGL_OPENGL_ES3_BIT_KHR, EGL_NONE }; if (eglChooseConfig(dpy, s_configAttribs, &cfg, 1, &count) == EGL_FALSE) { printf("eglChooseConfig failed\n"); return 0; } EGLint context_attribs[] = { EGL_CONTEXT_CLIENT_VERSION, 3, EGL_NONE }; EGLContext context = eglCreateContext(dpy, cfg, EGL_NO_CONTEXT, context_attribs); if (context == EGL_NO_CONTEXT) { printf("eglCreateContext failed\n"); return 0; } returnValue = eglMakeCurrent(dpy, EGL_NO_SURFACE, EGL_NO_SURFACE, context); if (returnValue != EGL_TRUE) { printf("eglMakeCurrent failed returned %d\n", returnValue); return 0; } GLuint sumShaderID = glCreateShader(GL_COMPUTE_SHADER); if (sumShaderID == 0) { printf("glGetError %d\n", glGetError()); return -2; } glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0, &threads); fprintf(stderr, "GL_MAX_COMPUTE_WORK_GROUP_SIZE: %d\n", threads); sumShaderString = malloc(strlen(sumShader) + 256); snprintf(sumShaderString, strlen(sumShader) + 256 - 1, sumShader, threads, threads); glShaderSource(sumShaderID, 1, (const char**)&sumShaderString, NULL); glCompileShader(sumShaderID); glGetShaderiv(sumShaderID, GL_COMPILE_STATUS, &status); if (status == GL_FALSE) { fprintf(stderr, "COMPUTE\n"); glGetShaderiv(sumShaderID, GL_INFO_LOG_LENGTH, &length); log = malloc(length+1); glGetShaderInfoLog(sumShaderID, length, &length, log); write(2, log, length); return -1; } GLuint sumShaderProgram = glCreateProgram(); glAttachShader(sumShaderProgram, sumShaderID); glLinkProgram(sumShaderProgram); glGetShaderiv(sumShaderID, GL_LINK_STATUS, &status); if (status == GL_FALSE) { fprintf(stderr, "LINK\n"); glGetProgramiv(sumShaderID, GL_INFO_LOG_LENGTH, &length); log = malloc(length+1); glGetProgramInfoLog(sumShaderID, length, &length, log); write(2, log, length); return -1; } srand(time(NULL)); for (i = 10; i < 50; i++) { j = i * 1000; A = malloc(j * sizeof(float)); B = malloc(j * sizeof(float)); C = malloc(1 * sizeof(float)); for (k = 0; k < j; k++) { A[k] = 0.125; //((float)rand() / RAND_MAX) - 0.5; B[k] = 8.0; //((float)rand() / RAND_MAX) - 0.5; } sum = 0; gettimeofday(&now, NULL); a = now.tv_sec * 1000000; a += now.tv_usec; for (k = 0; k < j; k++) sum += A[k] * B[k]; gettimeofday(&now, NULL); b = now.tv_sec * 1000000; b += now.tv_usec; printf("CPU: %f, %f microseconds\n", sum, b - a); gettimeofday(&now, NULL); a = now.tv_sec * 1000000; a += now.tv_usec; glGenBuffers(3, BO); glBindBuffer(GL_SHADER_STORAGE_BUFFER, BO[0]); glBufferData(GL_SHADER_STORAGE_BUFFER, j * sizeof(GLfloat), A, GL_STATIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, BO[0]); glBindBuffer(GL_SHADER_STORAGE_BUFFER, BO[1]); glBufferData(GL_SHADER_STORAGE_BUFFER, j * sizeof(GLfloat), B, GL_STATIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, BO[1]); glBindBuffer(GL_SHADER_STORAGE_BUFFER, BO[2]); glBufferData(GL_SHADER_STORAGE_BUFFER, 1 * sizeof(GLfloat), C, GL_STATIC_DRAW); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, BO[2]); glUseProgram(sumShaderProgram); glDispatchCompute(1, 1, 1); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glBindBuffer(GL_SHADER_STORAGE_BUFFER, BO[2]); float *out = (float*)glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, 1 * sizeof(float), GL_MAP_READ_BIT); sum = out[0]; glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); glDeleteProgram(sumShaderProgram); glDeleteBuffers(3, BO); gettimeofday(&now, NULL); b = now.tv_sec * 1000000; b += now.tv_usec; printf("GPU: %f, %f microseconds\n", sum, b - a); free(A); free(B); free(C); } return 0; }