Compare commits
2 Commits
Author | SHA1 | Date |
---|---|---|
Dave Airlie | 0ba206e014 | |
Tom Stellard | f1c4214843 |
|
@ -0,0 +1,192 @@
|
|||
From 14c2f22ae98f854e927b24ba4c4c6c8dcd2ba90c Mon Sep 17 00:00:00 2001
|
||||
From: Tom Stellard <tstellar@redhat.com>
|
||||
Date: Tue, 24 Oct 2017 14:44:56 -0700
|
||||
Subject: [PATCH] Fix build on i686
|
||||
|
||||
Use the clang builtin names for intrinsics in vload_store_half_f16c.c
|
||||
so we don't need to include x86intrin.h which redifes the size_t
|
||||
typedef which is also defined in pocl_types.h
|
||||
|
||||
In file included from /builddir/build/BUILD/pocl-53ef5e89aa99b5e85ad512ff46aa8f38b36acf4f/lib/kernel/vload_store_half_f16c.c:50:
|
||||
In file included from /usr/lib/clang/5.0.0/include/x86intrin.h:29:
|
||||
In file included from /usr/lib/clang/5.0.0/include/immintrin.h:32:
|
||||
In file included from /usr/lib/clang/5.0.0/include/xmmintrin.h:39:
|
||||
In file included from /usr/lib/clang/5.0.0/include/mm_malloc.h:27:
|
||||
In file included from /usr/include/stdlib.h:31:
|
||||
/usr/lib/clang/5.0.0/include/stddef.h:62:23: error: typedef redefinition with different types ('unsigned int' vs 'ulong' (aka 'unsigned long'))
|
||||
typedef __SIZE_TYPE__ size_t;
|
||||
^
|
||||
/builddir/build/BUILD/pocl-53ef5e89aa99b5e85ad512ff46aa8f38b36acf4f/include/pocl_types.h:23:15: note: previous definition is here
|
||||
typedef ulong size_t;
|
||||
^
|
||||
---
|
||||
lib/kernel/vload_store_half_f16c.c | 38 ++++++++++++++++++--------------------
|
||||
1 file changed, 18 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/lib/kernel/vload_store_half_f16c.c b/lib/kernel/vload_store_half_f16c.c
|
||||
index ae21e1f..a494380 100644
|
||||
--- a/lib/kernel/vload_store_half_f16c.c
|
||||
+++ b/lib/kernel/vload_store_half_f16c.c
|
||||
@@ -47,19 +47,17 @@
|
||||
|
||||
|
||||
|
||||
-#include <x86intrin.h>
|
||||
-
|
||||
/** FLOAT -> HALF vec4 ************************************************/
|
||||
|
||||
typedef union
|
||||
{
|
||||
- __m128 i;
|
||||
+ float4 i;
|
||||
float4 low, hi;
|
||||
} f2h4_i;
|
||||
|
||||
typedef union
|
||||
{
|
||||
- __m128i o;
|
||||
+ short8 o;
|
||||
ushort4 low, hi;
|
||||
} f2h4_o;
|
||||
|
||||
@@ -69,7 +67,7 @@ _cl_float2half4_rte (const float4 data)
|
||||
f2h4_i ui;
|
||||
f2h4_o uo;
|
||||
ui.low = data;
|
||||
- uo.o = _mm_cvtps_ph (ui.i, 0);
|
||||
+ uo.o = __builtin_ia32_vcvtps2ph (ui.i, 0);
|
||||
return uo.low;
|
||||
}
|
||||
|
||||
@@ -79,7 +77,7 @@ _cl_float2half4_rtn (const float4 data)
|
||||
f2h4_i ui;
|
||||
f2h4_o uo;
|
||||
ui.low = data;
|
||||
- uo.o = _mm_cvtps_ph (ui.i, 1);
|
||||
+ uo.o = __builtin_ia32_vcvtps2ph (ui.i, 1);
|
||||
return uo.low;
|
||||
}
|
||||
|
||||
@@ -89,7 +87,7 @@ _cl_float2half4_rtp (const float4 data)
|
||||
f2h4_i ui;
|
||||
f2h4_o uo;
|
||||
ui.low = data;
|
||||
- uo.o = _mm_cvtps_ph (ui.i, 2);
|
||||
+ uo.o = __builtin_ia32_vcvtps2ph (ui.i, 2);
|
||||
return uo.low;
|
||||
}
|
||||
|
||||
@@ -99,7 +97,7 @@ _cl_float2half4_rtz (const float4 data)
|
||||
f2h4_i ui;
|
||||
f2h4_o uo;
|
||||
ui.low = data;
|
||||
- uo.o = _mm_cvtps_ph (ui.i, 3);
|
||||
+ uo.o = __builtin_ia32_vcvtps2ph (ui.i, 3);
|
||||
return uo.low;
|
||||
}
|
||||
|
||||
@@ -113,13 +111,13 @@ _cl_float2half4 (const float4 data)
|
||||
|
||||
typedef union
|
||||
{
|
||||
- __m256 i;
|
||||
+ float8 i;
|
||||
float8 f;
|
||||
} f2h8_i;
|
||||
|
||||
typedef union
|
||||
{
|
||||
- __m128i o;
|
||||
+ ushort8 o;
|
||||
ushort8 f;
|
||||
} f2h8_o;
|
||||
|
||||
@@ -129,7 +127,7 @@ _cl_float2half8_rte (const float8 data)
|
||||
f2h8_i ui;
|
||||
f2h8_o uo;
|
||||
ui.f = data;
|
||||
- uo.o = _mm256_cvtps_ph (ui.i, 0);
|
||||
+ uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 0);
|
||||
return uo.f;
|
||||
}
|
||||
|
||||
@@ -139,7 +137,7 @@ _cl_float2half8_rtn (const float8 data)
|
||||
f2h8_i ui;
|
||||
f2h8_o uo;
|
||||
ui.f = data;
|
||||
- uo.o = _mm256_cvtps_ph (ui.i, 1);
|
||||
+ uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 1);
|
||||
return uo.f;
|
||||
}
|
||||
|
||||
@@ -149,7 +147,7 @@ _cl_float2half8_rtp (const float8 data)
|
||||
f2h8_i ui;
|
||||
f2h8_o uo;
|
||||
ui.f = data;
|
||||
- uo.o = _mm256_cvtps_ph (ui.i, 2);
|
||||
+ uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 2);
|
||||
return uo.f;
|
||||
}
|
||||
|
||||
@@ -159,7 +157,7 @@ _cl_float2half8_rtz (const float8 data)
|
||||
f2h8_i ui;
|
||||
f2h8_o uo;
|
||||
ui.f = data;
|
||||
- uo.o = _mm256_cvtps_ph (ui.i, 3);
|
||||
+ uo.o = __builtin_ia32_vcvtps2ph256 (ui.i, 3);
|
||||
return uo.f;
|
||||
}
|
||||
|
||||
@@ -173,13 +171,13 @@ _cl_float2half8 (const float8 data)
|
||||
|
||||
typedef union
|
||||
{
|
||||
- __m128i i;
|
||||
+ short8 i;
|
||||
ushort4 low, hi;
|
||||
} h2f4_i;
|
||||
|
||||
typedef union
|
||||
{
|
||||
- __m128 o;
|
||||
+ float4 o;
|
||||
float4 f;
|
||||
} h2f4_o;
|
||||
|
||||
@@ -189,7 +187,7 @@ _cl_half2float4 (const ushort4 data)
|
||||
h2f4_i ui;
|
||||
h2f4_o uo;
|
||||
ui.low = data;
|
||||
- uo.o = _mm_cvtph_ps (ui.i);
|
||||
+ uo.o = __builtin_ia32_vcvtph2ps (ui.i);
|
||||
return uo.f;
|
||||
}
|
||||
|
||||
@@ -197,13 +195,13 @@ _cl_half2float4 (const ushort4 data)
|
||||
|
||||
typedef union
|
||||
{
|
||||
- __m128i i;
|
||||
+ short8 i;
|
||||
ushort8 u;
|
||||
} h2f8_i;
|
||||
|
||||
typedef union
|
||||
{
|
||||
- __m256 o;
|
||||
+ float8 o;
|
||||
float8 f;
|
||||
} h2f8_o;
|
||||
|
||||
@@ -213,7 +211,7 @@ _cl_half2float8 (const ushort8 data)
|
||||
h2f8_i ui;
|
||||
h2f8_o uo;
|
||||
ui.u = data;
|
||||
- uo.o = _mm256_cvtph_ps (ui.i);
|
||||
+ uo.o = __builtin_ia32_vcvtph2ps256 (ui.i);
|
||||
return uo.f;
|
||||
}
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
|
@ -0,0 +1,56 @@
|
|||
From e9bab88cffde2871c3f7ca4339552fb3dcb66154 Mon Sep 17 00:00:00 2001
|
||||
From: Tom Stellard <tstellar@redhat.com>
|
||||
Date: Wed, 25 Oct 2017 15:04:43 -0700
|
||||
Subject: [PATCH] Revert "lib/kernel/printf.c: fix "-nan" on output"
|
||||
|
||||
This reverts commit 3877d6316985fdf36a54cfee745952436744ebcc.
|
||||
---
|
||||
lib/kernel/printf.c | 11 +++++------
|
||||
1 file changed, 5 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/lib/kernel/printf.c b/lib/kernel/printf.c
|
||||
index bd522fb..1564121 100644
|
||||
--- a/lib/kernel/printf.c
|
||||
+++ b/lib/kernel/printf.c
|
||||
@@ -28,7 +28,6 @@
|
||||
#include <limits.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdbool.h>
|
||||
-#include <math.h>
|
||||
|
||||
// We implement the OpenCL printf by calling the C99 printf. This is
|
||||
// not very efficient, but is easy to implement.
|
||||
@@ -111,6 +110,9 @@ float __attribute__((overloadable)) vload_half(size_t offset,
|
||||
|
||||
// Note: To simplify implementation, we print double values with %lf,
|
||||
// although %f would suffice as well
|
||||
+#define FLOAT_CONV_half "h"
|
||||
+#define FLOAT_CONV_float ""
|
||||
+#define FLOAT_CONV_double "l"
|
||||
#define FLOAT_GET_half(ptr) vload_half(0, ptr)
|
||||
#define FLOAT_GET_float(ptr) (*(ptr))
|
||||
#define FLOAT_GET_double(ptr) (*(ptr))
|
||||
@@ -121,7 +123,7 @@ float __attribute__((overloadable)) vload_half(size_t offset,
|
||||
{ \
|
||||
DEBUG_PRINTF(("[printf:floats:n=%dd]\n", n)); \
|
||||
char outfmt[1000]; \
|
||||
- OCL_C_AS char str[] = "%%%s%s%s%s%s%.0d%s%.0d" "%c"; \
|
||||
+ OCL_C_AS char str[] = "%%%s%s%s%s%s%.0d%s%.0d" FLOAT_CONV_##WIDTH "%c"; \
|
||||
snprintf(outfmt, sizeof outfmt, \
|
||||
str, \
|
||||
flags.left ? "-" : "", \
|
||||
@@ -138,10 +140,7 @@ float __attribute__((overloadable)) vload_half(size_t offset,
|
||||
for (int d=0; d<n; ++d) { \
|
||||
DEBUG_PRINTF(("[printf:floats:d=%d]\n", d)); \
|
||||
if (d != 0) printf(comma); \
|
||||
- WIDTH val = (FLOAT_GET_##WIDTH((OCL_C_AS const WIDTH*)vals+d)); \
|
||||
- if (isnan (val)) \
|
||||
- val = NAN; \
|
||||
- printf(outfmt, (double)val); \
|
||||
+ printf(outfmt, FLOAT_GET_##WIDTH((OCL_C_AS const WIDTH*)vals+d)); \
|
||||
} \
|
||||
DEBUG_PRINTF(("[printf:floats:done]\n")); \
|
||||
}
|
||||
--
|
||||
1.8.3.1
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
From d7f89ad2696a253296a5116bd89fe3031de1fb1c Mon Sep 17 00:00:00 2001
|
||||
From: Tom Stellard <tstellar@redhat.com>
|
||||
Date: Thu, 25 May 2017 19:46:47 +0000
|
||||
Subject: [PATCH] Use generic cpu when LLVM can't detect the host processor
|
||||
|
||||
LLVM's CPU detection is very basic, it parses /proc/cpuinfo and uses
|
||||
information from their to look up CPUs in its own internal tables.
|
||||
These tables are incomplete, so it's not uncommon for LLVM to not
|
||||
be able to detect the CPU.
|
||||
|
||||
Ideally pocl would use something better than llc --version for CPU
|
||||
detection, but for now we can at least fallback to the generic
|
||||
cpu when LLVM can't detect the host.
|
||||
---
|
||||
cmake/LLVM.cmake | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/cmake/LLVM.cmake b/cmake/LLVM.cmake
|
||||
index 22e9b851..440fc0ca 100644
|
||||
--- a/cmake/LLVM.cmake
|
||||
+++ b/cmake/LLVM.cmake
|
||||
@@ -567,6 +567,10 @@ endif()
|
||||
|
||||
set_cache_var(LLC_TRIPLE "LLC_TRIPLE")
|
||||
|
||||
+# FIXME: The cpu name printed by llc --version is the same cpu that will be
|
||||
+# targeted if ypu pass -mcpu=native to llc, so we could replace this auto-detection
|
||||
+# with just: set(LLC_HOST_CPU "native"), however, we can't do this at the moment
|
||||
+# because of the work-around for arm1176jz-s.
|
||||
if(NOT DEFINED LLC_HOST_CPU AND NOT CMAKE_CROSSCOMPILING)
|
||||
message(STATUS "Find out LLC host CPU with ${LLVM_LLC}")
|
||||
execute_process(COMMAND ${LLVM_LLC} "--version" RESULT_VARIABLE RES_VAR OUTPUT_VARIABLE OUTPUT_VAR)
|
||||
@@ -589,7 +593,8 @@ if(NOT DEFINED LLC_HOST_CPU AND NOT CMAKE_CROSSCOMPILING)
|
||||
endif()
|
||||
|
||||
if(LLC_HOST_CPU MATCHES "unknown")
|
||||
- message(FATAL_ERROR "LLVM could not recognize your CPU model automatically. Please rerun cmake with -DLLC_HOST_CPU=<model> (to see a list of models, try: llc -mcpu help)")
|
||||
+ message(WARNING "LLVM could not recognize your CPU model automatically. Using a generic CPU target.")
|
||||
+ set(LLC_HOST_CPU "generic")
|
||||
endif()
|
||||
|
||||
set(LLC_HOST_CPU "${LLC_HOST_CPU}" CACHE STRING "The Host CPU to use with llc")
|
25
pocl.spec
25
pocl.spec
|
@ -1,22 +1,29 @@
|
|||
%global sover 1
|
||||
|
||||
%global commit 53ef5e89aa99b5e85ad512ff46aa8f38b36acf4f
|
||||
%global shortcommit %(c=%{commit}; echo ${c:0:7})
|
||||
|
||||
# hardended builds are breaking the pocl tests with llvm 3.9
|
||||
#undefine _hardened_build
|
||||
|
||||
Summary: Portable Computing Language - an OpenCL implementation
|
||||
Name: pocl
|
||||
Version: 0.14
|
||||
Release: 4%{?dist}
|
||||
Version: 0.15
|
||||
Release: 0.2.20171023git%{shortcommit}%{?dist}
|
||||
|
||||
# The whole code is under MIT
|
||||
# except include/utlist.h which is under BSD (and unbundled) and
|
||||
# except lib/kernel/vecmath which is under GPLv3+ or LGPLv3+ (and unbundled in future)
|
||||
License: MIT and BSD and (GPLv3+ or LGPLv3+)
|
||||
URL: http://pocl.sourceforge.net
|
||||
Source0: http://portablecl.org/downloads/%{name}-%{version}.tar.gz
|
||||
#Source0: http://portablecl.org/downloads/%{name}-%{version}.tar.gz
|
||||
Source0: https://github.com/pocl/pocl/archive/%{commit}/%{name}-%{shortcommit}.tar.gz
|
||||
|
||||
Patch1: 0001-wrap-more-stuff-for-64-bit-atomics.patch
|
||||
Patch2: pocl-fix-arm-detection.patch
|
||||
Patch2: 0001-Fix-build-on-i686.patch
|
||||
#https://github.com/pocl/pocl/issues/544
|
||||
Patch3: 0001-Revert-lib-kernel-printf.c-fix-nan-on-output.patch
|
||||
|
||||
# aarch64 seems to have a build system problem that makes it pass -march=(unknown).
|
||||
# ppc64 builds fine, but: 41% tests passed, 64 tests failed out of 108.
|
||||
# ppc64le fails with compilation errors.
|
||||
|
@ -69,7 +76,7 @@ Requires: uthash-devel
|
|||
Portable Computing Language development files.
|
||||
|
||||
%prep
|
||||
%autosetup -p1
|
||||
%autosetup -p1 -n %{name}-%{commit}
|
||||
|
||||
# Unbundle uthash
|
||||
find . -depth -name utlist* | xargs rm -f
|
||||
|
@ -78,6 +85,7 @@ find . -depth -name utlist* | xargs rm -f
|
|||
%build
|
||||
mkdir %{_target_platform}
|
||||
pushd %{_target_platform}
|
||||
# CPU detection fails on ARM, so we need to manually specify the CPU as generic.
|
||||
%cmake ../ \
|
||||
-DENABLE_ICD=1 \
|
||||
-DPOCL_INSTALL_ICD_VENDORDIR=%{_sysconfdir}/OpenCL/vendors \
|
||||
|
@ -86,6 +94,7 @@ pushd %{_target_platform}
|
|||
-DKERNELLIB_HOST_CPU_VARIANTS=distro
|
||||
%endif
|
||||
%ifarch %{arm}
|
||||
-DLLC_HOST_CPU="generic" \
|
||||
-DCL_DISABLE_LONG=true
|
||||
%endif
|
||||
# -DENABLE_TESTSUITES=all Requires clBLAS
|
||||
|
@ -129,6 +138,12 @@ popd
|
|||
%{_includedir}/%{name}u.h
|
||||
|
||||
%changelog
|
||||
* Wed May 23 2018 Dave Airlie <airlied@redhat.com> - 0.15-0.2.20171023git53ef5e8
|
||||
- pocl rebuild for newer clang include paths
|
||||
|
||||
* Tue Oct 24 2017 Tom Stellard <tstellar@redhat.com> - 0.15-0.1.20171023git53ef5e8
|
||||
- Rebase to latest master branch for LLVM 5.0 support
|
||||
|
||||
* Thu Aug 03 2017 Fedora Release Engineering <releng@fedoraproject.org> - 0.14-4
|
||||
- Rebuilt for https://fedoraproject.org/wiki/Fedora_27_Binutils_Mass_Rebuild
|
||||
|
||||
|
|
2
sources
2
sources
|
@ -1 +1 @@
|
|||
SHA512 (pocl-0.14.tar.gz) = 36bf41222315e13bac6c37cc942b9c09e22af0374abf63052fa8deb1ffe383da0ed9b5dc3548844d52749642d0b1288af5e0128e8dd4deb38e70128adb28c066
|
||||
SHA512 (pocl-53ef5e8.tar.gz) = 313c3dd937825178823bcba0254be462748c221025df2d79ea93ae0201bc7e04c0e7fa965d39cac0e05ae8e1105f604c48177b4cb54d08a1d38a78e756f783d1
|
||||
|
|
Loading…
Reference in New Issue