Compare commits

...

2 Commits
rawhide ... f27

Author SHA1 Message Date
Dave Airlie 0ba206e014 pocl rebuild for newer clang include paths 2018-05-23 16:40:44 +10:00
Tom Stellard f1c4214843 Rebase to latest master branch for LLVM 5.0 support 2017-12-13 03:45:18 +00:00
5 changed files with 269 additions and 48 deletions

View File

@ -0,0 +1,192 @@
From 14c2f22ae98f854e927b24ba4c4c6c8dcd2ba90c Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Tue, 24 Oct 2017 14:44:56 -0700
Subject: [PATCH] Fix build on i686
Use the clang builtin names for intrinsics in vload_store_half_f16c.c
so we don't need to include x86intrin.h which redifes the size_t
typedef which is also defined in pocl_types.h
In file included from /builddir/build/BUILD/pocl-53ef5e89aa99b5e85ad512ff46aa8f38b36acf4f/lib/kernel/vload_store_half_f16c.c:50:
In file included from /usr/lib/clang/5.0.0/include/x86intrin.h:29:
In file included from /usr/lib/clang/5.0.0/include/immintrin.h:32:
In file included from /usr/lib/clang/5.0.0/include/xmmintrin.h:39:
In file included from /usr/lib/clang/5.0.0/include/mm_malloc.h:27:
In file included from /usr/include/stdlib.h:31:
/usr/lib/clang/5.0.0/include/stddef.h:62:23: error: typedef redefinition with different types ('unsigned int' vs 'ulong' (aka 'unsigned long'))
typedef __SIZE_TYPE__ size_t;
^
/builddir/build/BUILD/pocl-53ef5e89aa99b5e85ad512ff46aa8f38b36acf4f/include/pocl_types.h:23:15: note: previous definition is here
typedef ulong size_t;
^
---
lib/kernel/vload_store_half_f16c.c | 38 ++++++++++++++++++--------------------
1 file changed, 18 insertions(+), 20 deletions(-)
diff --git a/lib/kernel/vload_store_half_f16c.c b/lib/kernel/vload_store_half_f16c.c
index ae21e1f..a494380 100644
--- a/lib/kernel/vload_store_half_f16c.c
+++ b/lib/kernel/vload_store_half_f16c.c
@@ -47,19 +47,17 @@
-#include <x86intrin.h>
-
/** FLOAT -> HALF vec4 ************************************************/
typedef union
{
- __m128 i;
+ float4 i;
float4 low, hi;
} f2h4_i;
typedef union
{
- __m128i o;
+ short8 o;
ushort4 low, hi;
} f2h4_o;
@@ -69,7 +67,7 @@ _cl_float2half4_rte (const float4 data)
f2h4_i ui;
f2h4_o uo;
ui.low = data;
- uo.o = _mm_cvtps_ph (ui.i, 0);
+ uo.o = __builtin_ia32_vcvtps2ph (ui.i, 0);
return uo.low;
}
@@ -79,7 +77,7 @@ _cl_float2half4_rtn (const float4 data)
f2h4_i ui;
f2h4_o uo;
ui.low = data;
- uo.o = _mm_cvtps_ph (ui.i, 1);
+ uo.o = __builtin_ia32_vcvtps2ph (ui.i, 1);
return uo.low;
}
@@ -89,7 +87,7 @@ _cl_float2half4_rtp (const float4 data)
f2h4_i ui;
f2h4_o uo;
ui.low = data;
- uo.o = _mm_cvtps_ph (ui.i, 2);
+ uo.o = __builtin_ia32_vcvtps2ph (ui.i, 2);
return uo.low;
}
@@ -99,7 +97,7 @@ _cl_float2half4_rtz (const float4 data)
f2h4_i ui;
f2h4_o uo;
ui.low = data;
- uo.o = _mm_cvtps_ph (ui.i, 3);
+ uo.o = __builtin_ia32_vcvtps2ph (ui.i, 3);
return uo.low;
}
@@ -113,13 +111,13 @@ _cl_float2half4 (const float4 data)
typedef union
{
- __m256 i;
+ float8 i;
float8 f;
} f2h8_i;
typedef union
{
- __m128i o;
+ ushort8 o;
ushort8 f;
} f2h8_o;
@@ -129,7 +127,7 @@ _cl_float2half8_rte (const float8 data)
f2h8_i ui;
f2h8_o uo;
ui.f = data;
- uo.o = _mm256_cvtps_ph (ui.i, 0);
+ uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 0);
return uo.f;
}
@@ -139,7 +137,7 @@ _cl_float2half8_rtn (const float8 data)
f2h8_i ui;
f2h8_o uo;
ui.f = data;
- uo.o = _mm256_cvtps_ph (ui.i, 1);
+ uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 1);
return uo.f;
}
@@ -149,7 +147,7 @@ _cl_float2half8_rtp (const float8 data)
f2h8_i ui;
f2h8_o uo;
ui.f = data;
- uo.o = _mm256_cvtps_ph (ui.i, 2);
+ uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 2);
return uo.f;
}
@@ -159,7 +157,7 @@ _cl_float2half8_rtz (const float8 data)
f2h8_i ui;
f2h8_o uo;
ui.f = data;
- uo.o = _mm256_cvtps_ph (ui.i, 3);
+ uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 3);
return uo.f;
}
@@ -173,13 +171,13 @@ _cl_float2half8 (const float8 data)
typedef union
{
- __m128i i;
+ short8 i;
ushort4 low, hi;
} h2f4_i;
typedef union
{
- __m128 o;
+ float4 o;
float4 f;
} h2f4_o;
@@ -189,7 +187,7 @@ _cl_half2float4 (const ushort4 data)
h2f4_i ui;
h2f4_o uo;
ui.low = data;
- uo.o = _mm_cvtph_ps (ui.i);
+ uo.o = __builtin_ia32_vcvtph2ps (ui.i);
return uo.f;
}
@@ -197,13 +195,13 @@ _cl_half2float4 (const ushort4 data)
typedef union
{
- __m128i i;
+ short8 i;
ushort8 u;
} h2f8_i;
typedef union
{
- __m256 o;
+ float8 o;
float8 f;
} h2f8_o;
@@ -213,7 +211,7 @@ _cl_half2float8 (const ushort8 data)
h2f8_i ui;
h2f8_o uo;
ui.u = data;
- uo.o = _mm256_cvtph_ps (ui.i);
+ uo.o = __builtin_ia32_vcvtph2ps256 (ui.i);
return uo.f;
}
--
1.8.3.1

View File

@ -0,0 +1,56 @@
From e9bab88cffde2871c3f7ca4339552fb3dcb66154 Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Wed, 25 Oct 2017 15:04:43 -0700
Subject: [PATCH] Revert "lib/kernel/printf.c: fix "-nan" on output"
This reverts commit 3877d6316985fdf36a54cfee745952436744ebcc.
---
lib/kernel/printf.c | 11 +++++------
1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/lib/kernel/printf.c b/lib/kernel/printf.c
index bd522fb..1564121 100644
--- a/lib/kernel/printf.c
+++ b/lib/kernel/printf.c
@@ -28,7 +28,6 @@
#include <limits.h>
#include <stdarg.h>
#include <stdbool.h>
-#include <math.h>
// We implement the OpenCL printf by calling the C99 printf. This is
// not very efficient, but is easy to implement.
@@ -111,6 +110,9 @@ float __attribute__((overloadable)) vload_half(size_t offset,
// Note: To simplify implementation, we print double values with %lf,
// although %f would suffice as well
+#define FLOAT_CONV_half "h"
+#define FLOAT_CONV_float ""
+#define FLOAT_CONV_double "l"
#define FLOAT_GET_half(ptr) vload_half(0, ptr)
#define FLOAT_GET_float(ptr) (*(ptr))
#define FLOAT_GET_double(ptr) (*(ptr))
@@ -121,7 +123,7 @@ float __attribute__((overloadable)) vload_half(size_t offset,
{ \
DEBUG_PRINTF(("[printf:floats:n=%dd]\n", n)); \
char outfmt[1000]; \
- OCL_C_AS char str[] = "%%%s%s%s%s%s%.0d%s%.0d" "%c"; \
+ OCL_C_AS char str[] = "%%%s%s%s%s%s%.0d%s%.0d" FLOAT_CONV_##WIDTH "%c"; \
snprintf(outfmt, sizeof outfmt, \
str, \
flags.left ? "-" : "", \
@@ -138,10 +140,7 @@ float __attribute__((overloadable)) vload_half(size_t offset,
for (int d=0; d<n; ++d) { \
DEBUG_PRINTF(("[printf:floats:d=%d]\n", d)); \
if (d != 0) printf(comma); \
- WIDTH val = (FLOAT_GET_##WIDTH((OCL_C_AS const WIDTH*)vals+d)); \
- if (isnan (val)) \
- val = NAN; \
- printf(outfmt, (double)val); \
+ printf(outfmt, FLOAT_GET_##WIDTH((OCL_C_AS const WIDTH*)vals+d)); \
} \
DEBUG_PRINTF(("[printf:floats:done]\n")); \
}
--
1.8.3.1

View File

@ -1,42 +0,0 @@
From d7f89ad2696a253296a5116bd89fe3031de1fb1c Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Thu, 25 May 2017 19:46:47 +0000
Subject: [PATCH] Use generic cpu when LLVM can't detect the host processor
LLVM's CPU detection is very basic, it parses /proc/cpuinfo and uses
information from their to look up CPUs in its own internal tables.
These tables are incomplete, so it's not uncommon for LLVM to not
be able to detect the CPU.
Ideally pocl would use something better than llc --version for CPU
detection, but for now we can at least fallback to the generic
cpu when LLVM can't detect the host.
---
cmake/LLVM.cmake | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/cmake/LLVM.cmake b/cmake/LLVM.cmake
index 22e9b851..440fc0ca 100644
--- a/cmake/LLVM.cmake
+++ b/cmake/LLVM.cmake
@@ -567,6 +567,10 @@ endif()
set_cache_var(LLC_TRIPLE "LLC_TRIPLE")
+# FIXME: The cpu name printed by llc --version is the same cpu that will be
+# targeted if ypu pass -mcpu=native to llc, so we could replace this auto-detection
+# with just: set(LLC_HOST_CPU "native"), however, we can't do this at the moment
+# because of the work-around for arm1176jz-s.
if(NOT DEFINED LLC_HOST_CPU AND NOT CMAKE_CROSSCOMPILING)
message(STATUS "Find out LLC host CPU with ${LLVM_LLC}")
execute_process(COMMAND ${LLVM_LLC} "--version" RESULT_VARIABLE RES_VAR OUTPUT_VARIABLE OUTPUT_VAR)
@@ -589,7 +593,8 @@ if(NOT DEFINED LLC_HOST_CPU AND NOT CMAKE_CROSSCOMPILING)
endif()
if(LLC_HOST_CPU MATCHES "unknown")
- message(FATAL_ERROR "LLVM could not recognize your CPU model automatically. Please rerun cmake with -DLLC_HOST_CPU=<model> (to see a list of models, try: llc -mcpu help)")
+ message(WARNING "LLVM could not recognize your CPU model automatically. Using a generic CPU target.")
+ set(LLC_HOST_CPU "generic")
endif()
set(LLC_HOST_CPU "${LLC_HOST_CPU}" CACHE STRING "The Host CPU to use with llc")

View File

@ -1,22 +1,29 @@
%global sover 1
%global commit 53ef5e89aa99b5e85ad512ff46aa8f38b36acf4f
%global shortcommit %(c=%{commit}; echo ${c:0:7})
# hardended builds are breaking the pocl tests with llvm 3.9
#undefine _hardened_build
Summary: Portable Computing Language - an OpenCL implementation
Name: pocl
Version: 0.14
Release: 4%{?dist}
Version: 0.15
Release: 0.2.20171023git%{shortcommit}%{?dist}
# The whole code is under MIT
# except include/utlist.h which is under BSD (and unbundled) and
# except lib/kernel/vecmath which is under GPLv3+ or LGPLv3+ (and unbundled in future)
License: MIT and BSD and (GPLv3+ or LGPLv3+)
URL: http://pocl.sourceforge.net
Source0: http://portablecl.org/downloads/%{name}-%{version}.tar.gz
#Source0: http://portablecl.org/downloads/%{name}-%{version}.tar.gz
Source0: https://github.com/pocl/pocl/archive/%{commit}/%{name}-%{shortcommit}.tar.gz
Patch1: 0001-wrap-more-stuff-for-64-bit-atomics.patch
Patch2: pocl-fix-arm-detection.patch
Patch2: 0001-Fix-build-on-i686.patch
#https://github.com/pocl/pocl/issues/544
Patch3: 0001-Revert-lib-kernel-printf.c-fix-nan-on-output.patch
# aarch64 seems to have a build system problem that makes it pass -march=(unknown).
# ppc64 builds fine, but: 41% tests passed, 64 tests failed out of 108.
# ppc64le fails with compilation errors.
@ -69,7 +76,7 @@ Requires: uthash-devel
Portable Computing Language development files.
%prep
%autosetup -p1
%autosetup -p1 -n %{name}-%{commit}
# Unbundle uthash
find . -depth -name utlist* | xargs rm -f
@ -78,6 +85,7 @@ find . -depth -name utlist* | xargs rm -f
%build
mkdir %{_target_platform}
pushd %{_target_platform}
# CPU detection fails on ARM, so we need to manually specify the CPU as generic.
%cmake ../ \
-DENABLE_ICD=1 \
-DPOCL_INSTALL_ICD_VENDORDIR=%{_sysconfdir}/OpenCL/vendors \
@ -86,6 +94,7 @@ pushd %{_target_platform}
-DKERNELLIB_HOST_CPU_VARIANTS=distro
%endif
%ifarch %{arm}
-DLLC_HOST_CPU="generic" \
-DCL_DISABLE_LONG=true
%endif
# -DENABLE_TESTSUITES=all Requires clBLAS
@ -129,6 +138,12 @@ popd
%{_includedir}/%{name}u.h
%changelog
* Wed May 23 2018 Dave Airlie <airlied@redhat.com> - 0.15-0.2.20171023git53ef5e8
- pocl rebuild for newer clang include paths
* Tue Oct 24 2017 Tom Stellard <tstellar@redhat.com> - 0.15-0.1.20171023git53ef5e8
- Rebase to latest master branch for LLVM 5.0 support
* Thu Aug 03 2017 Fedora Release Engineering <releng@fedoraproject.org> - 0.14-4
- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild

View File

@ -1 +1 @@
SHA512 (pocl-0.14.tar.gz) = 36bf41222315e13bac6c37cc942b9c09e22af0374abf63052fa8deb1ffe383da0ed9b5dc3548844d52749642d0b1288af5e0128e8dd4deb38e70128adb28c066
SHA512 (pocl-53ef5e8.tar.gz) = 313c3dd937825178823bcba0254be462748c221025df2d79ea93ae0201bc7e04c0e7fa965d39cac0e05ae8e1105f604c48177b4cb54d08a1d38a78e756f783d1