pocl rebuild for newer clang include paths

Rebase to latest master branch for LLVM 5.0 support
2018-05-23 16:40:44 +10:00 · 2017-12-13 03:45:18 +00:00
5 changed files with 269 additions and 48 deletions
--- a/0001-Fix-build-on-i686.patch
+++ b/0001-Fix-build-on-i686.patch
@ -0,0 +1,192 @@
+From 14c2f22ae98f854e927b24ba4c4c6c8dcd2ba90c Mon Sep 17 00:00:00 2001
+From: Tom Stellard <tstellar@redhat.com>
+Date: Tue, 24 Oct 2017 14:44:56 -0700
+Subject: [PATCH] Fix build on i686
+
+Use the clang builtin names for intrinsics in vload_store_half_f16c.c
+so we don't need to include x86intrin.h which redifes the size_t
+typedef which is also defined in pocl_types.h
+
+In file included from /builddir/build/BUILD/pocl-53ef5e89aa99b5e85ad512ff46aa8f38b36acf4f/lib/kernel/vload_store_half_f16c.c:50:
+In file included from /usr/lib/clang/5.0.0/include/x86intrin.h:29:
+In file included from /usr/lib/clang/5.0.0/include/immintrin.h:32:
+In file included from /usr/lib/clang/5.0.0/include/xmmintrin.h:39:
+In file included from /usr/lib/clang/5.0.0/include/mm_malloc.h:27:
+In file included from /usr/include/stdlib.h:31:
+/usr/lib/clang/5.0.0/include/stddef.h:62:23: error: typedef redefinition with different types ('unsigned int' vs 'ulong' (aka 'unsigned long'))
+typedef __SIZE_TYPE__ size_t;
+                      ^
+/builddir/build/BUILD/pocl-53ef5e89aa99b5e85ad512ff46aa8f38b36acf4f/include/pocl_types.h:23:15: note: previous definition is here
+typedef ulong size_t;
+              ^
+---
+ lib/kernel/vload_store_half_f16c.c | 38 ++++++++++++++++++--------------------
+ 1 file changed, 18 insertions(+), 20 deletions(-)
+
+diff --git a/lib/kernel/vload_store_half_f16c.c b/lib/kernel/vload_store_half_f16c.c
+index ae21e1f..a494380 100644
+--- a/lib/kernel/vload_store_half_f16c.c
+++ b/lib/kernel/vload_store_half_f16c.c
+@@ -47,19 +47,17 @@
+ 
+ 
+ 
+-#include <x86intrin.h>
+-
+ /** FLOAT -> HALF vec4 ************************************************/
+ 
+ typedef union
+ {
+-  __m128 i;
+  float4 i;
+   float4 low, hi;
+ } f2h4_i;
+ 
+ typedef union
+ {
+-  __m128i o;
+  short8 o;
+   ushort4 low, hi;
+ } f2h4_o;
+ 
+@@ -69,7 +67,7 @@ _cl_float2half4_rte (const float4 data)
+   f2h4_i ui;
+   f2h4_o uo;
+   ui.low = data;
+-  uo.o = _mm_cvtps_ph (ui.i, 0);
+  uo.o = __builtin_ia32_vcvtps2ph (ui.i, 0);
+   return uo.low;
+ }
+ 
+@@ -79,7 +77,7 @@ _cl_float2half4_rtn (const float4 data)
+   f2h4_i ui;
+   f2h4_o uo;
+   ui.low = data;
+-  uo.o = _mm_cvtps_ph (ui.i, 1);
+  uo.o = __builtin_ia32_vcvtps2ph (ui.i, 1);
+   return uo.low;
+ }
+ 
+@@ -89,7 +87,7 @@ _cl_float2half4_rtp (const float4 data)
+   f2h4_i ui;
+   f2h4_o uo;
+   ui.low = data;
+-  uo.o = _mm_cvtps_ph (ui.i, 2);
+  uo.o = __builtin_ia32_vcvtps2ph (ui.i, 2);
+   return uo.low;
+ }
+ 
+@@ -99,7 +97,7 @@ _cl_float2half4_rtz (const float4 data)
+   f2h4_i ui;
+   f2h4_o uo;
+   ui.low = data;
+-  uo.o = _mm_cvtps_ph (ui.i, 3);
+  uo.o = __builtin_ia32_vcvtps2ph (ui.i, 3);
+   return uo.low;
+ }
+ 
+@@ -113,13 +111,13 @@ _cl_float2half4 (const float4 data)
+ 
+ typedef union
+ {
+-  __m256 i;
+  float8 i;
+   float8 f;
+ } f2h8_i;
+ 
+ typedef union
+ {
+-  __m128i o;
+  ushort8 o;
+   ushort8 f;
+ } f2h8_o;
+ 
+@@ -129,7 +127,7 @@ _cl_float2half8_rte (const float8 data)
+   f2h8_i ui;
+   f2h8_o uo;
+   ui.f = data;
+-  uo.o = _mm256_cvtps_ph (ui.i, 0);
+  uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 0);
+   return uo.f;
+ }
+ 
+@@ -139,7 +137,7 @@ _cl_float2half8_rtn (const float8 data)
+   f2h8_i ui;
+   f2h8_o uo;
+   ui.f = data;
+-  uo.o = _mm256_cvtps_ph (ui.i, 1);
+  uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 1);
+   return uo.f;
+ }
+ 
+@@ -149,7 +147,7 @@ _cl_float2half8_rtp (const float8 data)
+   f2h8_i ui;
+   f2h8_o uo;
+   ui.f = data;
+-  uo.o = _mm256_cvtps_ph (ui.i, 2);
+  uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 2);
+   return uo.f;
+ }
+ 
+@@ -159,7 +157,7 @@ _cl_float2half8_rtz (const float8 data)
+   f2h8_i ui;
+   f2h8_o uo;
+   ui.f = data;
+-  uo.o = _mm256_cvtps_ph (ui.i, 3);
+  uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 3);
+   return uo.f;
+ }
+ 
+@@ -173,13 +171,13 @@ _cl_float2half8 (const float8 data)
+ 
+ typedef union
+ {
+-  __m128i i;
+  short8 i;
+   ushort4 low, hi;
+ } h2f4_i;
+ 
+ typedef union
+ {
+-  __m128 o;
+  float4 o;
+   float4 f;
+ } h2f4_o;
+ 
+@@ -189,7 +187,7 @@ _cl_half2float4 (const ushort4 data)
+   h2f4_i ui;
+   h2f4_o uo;
+   ui.low = data;
+-  uo.o = _mm_cvtph_ps (ui.i);
+  uo.o = __builtin_ia32_vcvtph2ps (ui.i);
+   return uo.f;
+ }
+ 
+@@ -197,13 +195,13 @@ _cl_half2float4 (const ushort4 data)
+ 
+ typedef union
+ {
+-  __m128i i;
+  short8 i;
+   ushort8 u;
+ } h2f8_i;
+ 
+ typedef union
+ {
+-  __m256 o;
+  float8 o;
+   float8 f;
+ } h2f8_o;
+ 
+@@ -213,7 +211,7 @@ _cl_half2float8 (const ushort8 data)
+   h2f8_i ui;
+   h2f8_o uo;
+   ui.u = data;
+-  uo.o = _mm256_cvtph_ps (ui.i);
+  uo.o = __builtin_ia32_vcvtph2ps256 (ui.i);
+   return uo.f;
+ }
+ 
+-- 
+1.8.3.1
+
--- a/0001-Revert-lib-kernel-printf.c-fix-nan-on-output.patch
+++ b/0001-Revert-lib-kernel-printf.c-fix-nan-on-output.patch
@ -0,0 +1,56 @@
+From e9bab88cffde2871c3f7ca4339552fb3dcb66154 Mon Sep 17 00:00:00 2001
+From: Tom Stellard <tstellar@redhat.com>
+Date: Wed, 25 Oct 2017 15:04:43 -0700
+Subject: [PATCH] Revert "lib/kernel/printf.c: fix "-nan" on output"
+
+This reverts commit 3877d6316985fdf36a54cfee745952436744ebcc.
+---
+ lib/kernel/printf.c | 11 +++++------
+ 1 file changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/lib/kernel/printf.c b/lib/kernel/printf.c
+index bd522fb..1564121 100644
+--- a/lib/kernel/printf.c
+++ b/lib/kernel/printf.c
+@@ -28,7 +28,6 @@
+ #include <limits.h>
+ #include <stdarg.h>
+ #include <stdbool.h>
+-#include <math.h>
+ 
+ // We implement the OpenCL printf by calling the C99 printf. This is
+ // not very efficient, but is easy to implement.
+@@ -111,6 +110,9 @@ float __attribute__((overloadable)) vload_half(size_t offset,
+ 
+ // Note: To simplify implementation, we print double values with %lf,
+ // although %f would suffice as well
+#define FLOAT_CONV_half   "h"
+#define FLOAT_CONV_float  ""
+#define FLOAT_CONV_double "l"
+ #define FLOAT_GET_half(ptr)   vload_half(0, ptr)
+ #define FLOAT_GET_float(ptr)  (*(ptr))
+ #define FLOAT_GET_double(ptr) (*(ptr))
+@@ -121,7 +123,7 @@ float __attribute__((overloadable)) vload_half(size_t offset,
+   {                                                                     \
+     DEBUG_PRINTF(("[printf:floats:n=%dd]\n", n));                       \
+     char outfmt[1000];                                                  \
+-    OCL_C_AS char str[] = "%%%s%s%s%s%s%.0d%s%.0d" "%c";                \
+    OCL_C_AS char str[] = "%%%s%s%s%s%s%.0d%s%.0d" FLOAT_CONV_##WIDTH "%c"; \
+     snprintf(outfmt, sizeof outfmt,                                     \
+              str,                                                       \
+              flags.left ? "-" : "",                                     \
+@@ -138,10 +140,7 @@ float __attribute__((overloadable)) vload_half(size_t offset,
+     for (int d=0; d<n; ++d) {                                           \
+       DEBUG_PRINTF(("[printf:floats:d=%d]\n", d));                      \
+       if (d != 0) printf(comma);                                        \
+-      WIDTH val = (FLOAT_GET_##WIDTH((OCL_C_AS const WIDTH*)vals+d));   \
+-      if (isnan (val))                                                  \
+-        val = NAN;                                                      \
+-      printf(outfmt, (double)val);                                      \
+      printf(outfmt, FLOAT_GET_##WIDTH((OCL_C_AS const WIDTH*)vals+d)); \
+     }                                                                   \
+     DEBUG_PRINTF(("[printf:floats:done]\n"));                           \
+   }
+-- 
+1.8.3.1
+
--- a/pocl-fix-arm-detection.patch
+++ b/pocl-fix-arm-detection.patch
@ -1,42 +0,0 @@
-From d7f89ad2696a253296a5116bd89fe3031de1fb1c Mon Sep 17 00:00:00 2001
-From: Tom Stellard <tstellar@redhat.com>
-Date: Thu, 25 May 2017 19:46:47 +0000
-Subject: [PATCH] Use generic cpu when LLVM can't detect the host processor
-
-LLVM's CPU detection is very basic, it parses /proc/cpuinfo and uses
-information from their to look up CPUs in its own internal tables.
-These tables are incomplete, so it's not uncommon for LLVM to not
-be able to detect the CPU.
-
-Ideally pocl would use something better than llc --version for CPU
-detection, but for now we can at least fallback to the generic
-cpu when LLVM can't detect the host.
---
- cmake/LLVM.cmake | 7 ++++++-
- 1 file changed, 6 insertions(+), 1 deletion(-)
-
-diff --git a/cmake/LLVM.cmake b/cmake/LLVM.cmake
-index 22e9b851..440fc0ca 100644
--- a/cmake/LLVM.cmake
-+++ b/cmake/LLVM.cmake
-@@ -567,6 +567,10 @@ endif()
- 
- set_cache_var(LLC_TRIPLE "LLC_TRIPLE")
- 
-+# FIXME: The cpu name printed by llc --version is the same cpu that will be
-+# targeted if ypu pass -mcpu=native to llc, so we could replace this auto-detection
-+# with just: set(LLC_HOST_CPU "native"), however, we can't do this at the moment
-+# because of the work-around for arm1176jz-s.
- if(NOT DEFINED LLC_HOST_CPU AND NOT CMAKE_CROSSCOMPILING)
-   message(STATUS "Find out LLC host CPU with ${LLVM_LLC}")
-   execute_process(COMMAND ${LLVM_LLC} "--version" RESULT_VARIABLE RES_VAR OUTPUT_VARIABLE OUTPUT_VAR)
-@@ -589,7 +593,8 @@ if(NOT DEFINED LLC_HOST_CPU AND NOT CMAKE_CROSSCOMPILING)
- endif()
- 
- if(LLC_HOST_CPU MATCHES "unknown")
-  message(FATAL_ERROR "LLVM could not recognize your CPU model automatically. Please rerun cmake with -DLLC_HOST_CPU=<model> (to see a list of models, try: llc -mcpu help)")
-+  message(WARNING "LLVM could not recognize your CPU model automatically.  Using a generic CPU target.")
-+  set(LLC_HOST_CPU "generic")
- endif()
- 
- set(LLC_HOST_CPU "${LLC_HOST_CPU}" CACHE STRING "The Host CPU to use with llc")
--- a/pocl.spec
+++ b/pocl.spec
@ -1,22 +1,29 @@
 %global sover 1

+%global commit 53ef5e89aa99b5e85ad512ff46aa8f38b36acf4f
+%global shortcommit %(c=%{commit}; echo ${c:0:7})
+
 # hardended builds are breaking the pocl tests with llvm 3.9
 #undefine _hardened_build

 Summary:        Portable Computing Language - an OpenCL implementation
 Name:           pocl
-Version:        0.14
-Release:        4%{?dist}
+Version:        0.15
+Release:        0.2.20171023git%{shortcommit}%{?dist}

 # The whole code is under MIT
 # except include/utlist.h which is under BSD (and unbundled) and
 # except lib/kernel/vecmath which is under GPLv3+ or LGPLv3+ (and unbundled in future)
 License:        MIT and BSD and (GPLv3+ or LGPLv3+)
 URL:            http://pocl.sourceforge.net
-Source0:        http://portablecl.org/downloads/%{name}-%{version}.tar.gz
+#Source0:        http://portablecl.org/downloads/%{name}-%{version}.tar.gz
+Source0:        https://github.com/pocl/pocl/archive/%{commit}/%{name}-%{shortcommit}.tar.gz

 Patch1:         0001-wrap-more-stuff-for-64-bit-atomics.patch
-Patch2:         pocl-fix-arm-detection.patch
+Patch2:         0001-Fix-build-on-i686.patch
+#https://github.com/pocl/pocl/issues/544
+Patch3:         0001-Revert-lib-kernel-printf.c-fix-nan-on-output.patch
+
 # aarch64 seems to have a build system problem that makes it pass -march=(unknown).
 # ppc64 builds fine, but: 41% tests passed, 64 tests failed out of 108.
 # ppc64le fails with compilation errors.
@ -69,7 +76,7 @@ Requires:       uthash-devel
 Portable Computing Language development files.

 %prep
-%autosetup -p1
+%autosetup -p1 -n %{name}-%{commit}

 # Unbundle uthash
 find . -depth -name utlist* | xargs rm -f
@ -78,6 +85,7 @@ find . -depth -name utlist* | xargs rm -f
 %build
 mkdir %{_target_platform}
 pushd %{_target_platform}
+# CPU detection fails on ARM, so we need to manually specify the CPU as generic.
  %cmake ../ \
    -DENABLE_ICD=1 \
    -DPOCL_INSTALL_ICD_VENDORDIR=%{_sysconfdir}/OpenCL/vendors \
@ -86,6 +94,7 @@ pushd %{_target_platform}
    -DKERNELLIB_HOST_CPU_VARIANTS=distro
 %endif
 %ifarch %{arm}
+    -DLLC_HOST_CPU="generic" \
    -DCL_DISABLE_LONG=true
 %endif
    # -DENABLE_TESTSUITES=all Requires clBLAS
@ -129,6 +138,12 @@ popd
 %{_includedir}/%{name}u.h

 %changelog
+* Wed May 23 2018 Dave Airlie <airlied@redhat.com> - 0.15-0.2.20171023git53ef5e8
+- pocl rebuild for newer clang include paths
+
+* Tue Oct 24 2017 Tom Stellard <tstellar@redhat.com> - 0.15-0.1.20171023git53ef5e8
+- Rebase to latest master branch for LLVM 5.0 support
+
 * Thu Aug 03 2017 Fedora Release Engineering <releng@fedoraproject.org> - 0.14-4
 - Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild

--- a/2
+++ b/2
@ -1 +1 @@
-SHA512 (pocl-0.14.tar.gz) = 36bf41222315e13bac6c37cc942b9c09e22af0374abf63052fa8deb1ffe383da0ed9b5dc3548844d52749642d0b1288af5e0128e8dd4deb38e70128adb28c066
+SHA512 (pocl-53ef5e8.tar.gz) = 313c3dd937825178823bcba0254be462748c221025df2d79ea93ae0201bc7e04c0e7fa965d39cac0e05ae8e1105f604c48177b4cb54d08a1d38a78e756f783d1
Author	SHA1	Message	Date
Dave Airlie	0ba206e014	pocl rebuild for newer clang include paths	2018-05-23 16:40:44 +10:00
Tom Stellard	f1c4214843	Rebase to latest master branch for LLVM 5.0 support	2017-12-13 03:45:18 +00:00