embree: Update to 4.4.0

This commit is contained in:
Rémi Verschelde
2025-04-24 17:51:03 +02:00
parent 1f787b63a5
commit 57640dd806
53 changed files with 1448 additions and 753 deletions

View File

@@ -7,7 +7,7 @@ import subprocess
import sys
from typing import Any, Callable
git_tag = "v4.3.1"
git_tag = "v4.4.0"
include_dirs = [
"common/tasking",

View File

@@ -199,7 +199,7 @@ Files extracted from upstream source:
## embree
- Upstream: https://github.com/embree/embree
- Version: 4.3.1 (daa8de0e714e18ad5e5c9841b67c1950d9c91c51, 2024)
- Version: 4.4.0 (ff9381774dc99fea81a932ad276677aad6a3d4dd, 2025)
- License: Apache 2.0
Files extracted from upstream:
@@ -216,7 +216,6 @@ Patches:
- `0003-emscripten-nthreads.patch` (GH-69799)
- `0004-mingw-no-cpuidex.patch` (GH-92488)
- `0005-mingw-llvm-arm64.patch` (GH-93364)
- `0006-include-order-dllexport.patch` (GH-94256)
The `modules/raycast/godot_update_embree.py` script can be used to pull the
relevant files from the latest Embree release and apply patches automatically.

View File

@@ -39,10 +39,7 @@ namespace embree
std::vector<char> str; str.reserve(64);
while (cin->peek() != EOF && !isSeparator(cin->peek())) {
int c = cin->get();
//if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
if (!isValidChar(c)) {
abort();
}
if (!isValidChar(c)) abort(); //throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
str.push_back((char)c);
}
str.push_back(0);

View File

@@ -95,8 +95,8 @@ namespace embree
__forceinline Vec2fa abs ( const Vec2fa& a ) { return Vec2fa(sycl::fabs(a.x),sycl::fabs(a.y)); }
__forceinline Vec2fa sign ( const Vec2fa& a ) { return Vec2fa(sycl::sign(a.x),sycl::sign(a.y)); }
//__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::recip(a.x),sycl::recip(a.y)); }
__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y)); }
//__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::native::recip(a.x),sycl::native::recip(a.y)); }
__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::native::recip(a.x),sycl::native::recip(a.y)); }
__forceinline Vec2fa sqrt ( const Vec2fa& a ) { return Vec2fa(sycl::sqrt(a.x),sycl::sqrt(a.y)); }
__forceinline Vec2fa sqr ( const Vec2fa& a ) { return Vec2fa(a.x*a.x,a.y*a.y); }

View File

@@ -92,7 +92,7 @@ namespace embree
__forceinline Vec3fa sign ( const Vec3fa& a ) { return Vec3fa(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z)); }
//__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z)); }
__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::native::recip(a.x),sycl::native::recip(a.y),sycl::native::recip(a.z)); }
__forceinline Vec3fa sqrt ( const Vec3fa& a ) { return Vec3fa(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z)); }
__forceinline Vec3fa sqr ( const Vec3fa& a ) { return Vec3fa(a.x*a.x,a.y*a.y,a.z*a.z); }
@@ -393,7 +393,7 @@ namespace embree
__forceinline Vec3fx sign ( const Vec3fx& a ) { return Vec3fx(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z),sycl::sign(a.z)); }
//__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z),__sycl_std::__invoke_native_recip<float>(a.w)); }
__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::native::recip(a.x),sycl::native::recip(a.y),sycl::native::recip(a.z),sycl::native::recip(a.w)); }
__forceinline Vec3fx sqrt ( const Vec3fx& a ) { return Vec3fx(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z),sycl::sqrt(a.w)); }
__forceinline Vec3fx sqr ( const Vec3fx& a ) { return Vec3fx(a.x*a.x,a.y*a.y,a.z*a.z,a.w*a.w); }
@@ -614,4 +614,12 @@ namespace embree
//__forceinline Vec3ia::Vec3ia( const Vec3fx& a )
// : x((int)a.x), y((int)a.y), z((int)a.z) {}
}
#if __SYCL_COMPILER_VERSION >= 20210801
namespace sycl {
template<> struct is_device_copyable<embree::Vec3fa> : std::true_type {};
template<> struct is_device_copyable<const embree::Vec3fa> : std::true_type {};
}
#endif

View File

@@ -12,69 +12,6 @@
namespace embree
{
size_t total_allocations = 0;
#if defined(EMBREE_SYCL_SUPPORT)
__thread sycl::context* tls_context_tutorial = nullptr;
__thread sycl::device* tls_device_tutorial = nullptr;
__thread sycl::context* tls_context_embree = nullptr;
__thread sycl::device* tls_device_embree = nullptr;
void enableUSMAllocEmbree(sycl::context* context, sycl::device* device)
{
//if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
//if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
if (tls_context_embree != nullptr) {
abort();
}
if (tls_device_embree != nullptr) {
abort();
}
tls_context_embree = context;
tls_device_embree = device;
}
void disableUSMAllocEmbree()
{
//if (tls_context_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
//if (tls_device_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
if (tls_context_embree == nullptr) {
abort();
}
if (tls_device_embree == nullptr) {
abort();
}
tls_context_embree = nullptr;
tls_device_embree = nullptr;
}
void enableUSMAllocTutorial(sycl::context* context, sycl::device* device)
{
//if (tls_context_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled");
//if (tls_device_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled");
tls_context_tutorial = context;
tls_device_tutorial = device;
}
void disableUSMAllocTutorial()
{
//if (tls_context_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
//if (tls_device_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
if (tls_context_tutorial == nullptr) {
abort();
}
if (tls_device_tutorial == nullptr) {
abort();
}
tls_context_tutorial = nullptr;
tls_device_tutorial = nullptr;
}
#endif
void* alignedMalloc(size_t size, size_t align)
{
if (size == 0)
@@ -82,18 +19,16 @@ namespace embree
assert((align & (align-1)) == 0);
void* ptr = _mm_malloc(size,align);
//if (size != 0 && ptr == nullptr)
// throw std::bad_alloc();
if (size != 0 && ptr == nullptr) {
abort();
}
if (size != 0 && ptr == nullptr)
abort(); //throw std::bad_alloc();
return ptr;
}
void alignedFree(void* ptr)
{
if (ptr)
if (ptr) {
_mm_free(ptr);
}
}
#if defined(EMBREE_SYCL_SUPPORT)
@@ -107,68 +42,67 @@ namespace embree
return nullptr;
assert((align & (align-1)) == 0);
total_allocations++;
void* ptr = nullptr;
if (mode == EMBREE_USM_SHARED_DEVICE_READ_ONLY)
if (mode == EmbreeUSMMode::DEVICE_READ_ONLY)
ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only());
else
ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
//if (size != 0 && ptr == nullptr)
// throw std::bad_alloc();
if (size != 0 && ptr == nullptr) {
abort();
if (size != 0 && ptr == nullptr)
abort(); //throw std::bad_alloc();
return ptr;
}
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode, EmbreeMemoryType type)
{
assert(context);
assert(device);
if (size == 0)
return nullptr;
assert((align & (align-1)) == 0);
void* ptr = nullptr;
if (type == EmbreeMemoryType::USM_SHARED) {
if (mode == EmbreeUSMMode::DEVICE_READ_ONLY)
ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only());
else
ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
}
else if (type == EmbreeMemoryType::USM_HOST) {
ptr = sycl::aligned_alloc_host(align,size,*context);
}
else if (type == EmbreeMemoryType::USM_DEVICE) {
ptr = sycl::aligned_alloc_device(align,size,*device,*context);
}
else {
ptr = alignedMalloc(size,align);
}
if (size != 0 && ptr == nullptr)
abort(); //throw std::bad_alloc();
return ptr;
}
static MutexSys g_alloc_mutex;
void* alignedSYCLMalloc(size_t size, size_t align, EmbreeUSMMode mode)
{
if (tls_context_tutorial) return alignedSYCLMalloc(tls_context_tutorial, tls_device_tutorial, size, align, mode);
if (tls_context_embree ) return alignedSYCLMalloc(tls_context_embree, tls_device_embree, size, align, mode);
return nullptr;
}
void alignedSYCLFree(sycl::context* context, void* ptr)
{
assert(context);
if (ptr) {
sycl::free(ptr,*context);
sycl::usm::alloc type = sycl::get_pointer_type(ptr, *context);
if (type == sycl::usm::alloc::host || type == sycl::usm::alloc::device || type == sycl::usm::alloc::shared)
sycl::free(ptr,*context);
else {
alignedFree(ptr);
}
}
}
void alignedSYCLFree(void* ptr)
{
if (tls_context_tutorial) return alignedSYCLFree(tls_context_tutorial, ptr);
if (tls_context_embree ) return alignedSYCLFree(tls_context_embree, ptr);
}
#endif
void* alignedUSMMalloc(size_t size, size_t align, EmbreeUSMMode mode)
{
#if defined(EMBREE_SYCL_SUPPORT)
if (tls_context_embree || tls_context_tutorial)
return alignedSYCLMalloc(size,align,mode);
else
#endif
return alignedMalloc(size,align);
}
void alignedUSMFree(void* ptr)
{
#if defined(EMBREE_SYCL_SUPPORT)
if (tls_context_embree || tls_context_tutorial)
return alignedSYCLFree(ptr);
else
#endif
return alignedFree(ptr);
}
static bool huge_pages_enabled = false;
static MutexSys os_init_mutex;
@@ -265,10 +199,7 @@ namespace embree
/* fall back to 4k pages */
int flags = MEM_COMMIT | MEM_RESERVE;
char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
//if (ptr == nullptr) throw std::bad_alloc();
if (ptr == nullptr) {
abort();
}
if (ptr == nullptr) abort(); //throw std::bad_alloc();
hugepages = false;
return ptr;
}
@@ -284,11 +215,8 @@ namespace embree
if (bytesNew >= bytesOld)
return bytesOld;
//if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
// throw std::bad_alloc();
if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) {
abort();
}
if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
abort(); //throw std::bad_alloc();
return bytesNew;
}
@@ -298,11 +226,8 @@ namespace embree
if (bytes == 0)
return;
//if (!VirtualFree(ptr,0,MEM_RELEASE))
// throw std::bad_alloc();
if (!VirtualFree(ptr,0,MEM_RELEASE)) {
abort();
}
if (!VirtualFree(ptr,0,MEM_RELEASE))
abort(); //throw std::bad_alloc();
}
void os_advise(void *ptr, size_t bytes)
@@ -406,10 +331,7 @@ namespace embree
/* fallback to 4k pages */
void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
//if (ptr == MAP_FAILED) throw std::bad_alloc();
if (ptr == MAP_FAILED) {
abort();
}
if (ptr == MAP_FAILED) abort(); //throw std::bad_alloc();
hugepages = false;
/* advise huge page hint for THP */
@@ -425,11 +347,8 @@ namespace embree
if (bytesNew >= bytesOld)
return bytesOld;
//if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
// throw std::bad_alloc();
if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) {
abort();
}
if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
abort(); //throw std::bad_alloc();
return bytesNew;
}
@@ -442,11 +361,8 @@ namespace embree
/* for hugepages we need to also align the size */
const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
bytes = (bytes+pageSize-1) & ~(pageSize-1);
//if (munmap(ptr,bytes) == -1)
// throw std::bad_alloc();
if (munmap(ptr,bytes) == -1) {
abort();
}
if (munmap(ptr,bytes) == -1)
abort(); //throw std::bad_alloc();
}
/* hint for transparent huge pages (THP) */

View File

@@ -9,71 +9,42 @@
namespace embree
{
#if defined(EMBREE_SYCL_SUPPORT)
/* enables SYCL USM allocation */
void enableUSMAllocEmbree(sycl::context* context, sycl::device* device);
void enableUSMAllocTutorial(sycl::context* context, sycl::device* device);
/* disables SYCL USM allocation */
void disableUSMAllocEmbree();
void disableUSMAllocTutorial();
#endif
#define ALIGNED_STRUCT_(align) \
void* operator new(size_t size) { return alignedMalloc(size,align); } \
void operator delete(void* ptr) { alignedFree(ptr); } \
void* operator new[](size_t size) { return alignedMalloc(size,align); } \
void operator delete[](void* ptr) { alignedFree(ptr); }
#define ALIGNED_STRUCT_USM_(align) \
void* operator new(size_t size) { return alignedUSMMalloc(size,align); } \
void operator delete(void* ptr) { alignedUSMFree(ptr); } \
void* operator new[](size_t size) { return alignedUSMMalloc(size,align); } \
void operator delete[](void* ptr) { alignedUSMFree(ptr); }
#define ALIGNED_CLASS_(align) \
public: \
ALIGNED_STRUCT_(align) \
private:
#define ALIGNED_CLASS_USM_(align) \
public: \
ALIGNED_STRUCT_USM_(align) \
private:
enum EmbreeUSMMode {
EMBREE_USM_SHARED = 0,
EMBREE_USM_SHARED_DEVICE_READ_WRITE = 0,
EMBREE_USM_SHARED_DEVICE_READ_ONLY = 1
};
/*! aligned allocation */
void* alignedMalloc(size_t size, size_t align);
void alignedFree(void* ptr);
/*! aligned allocation using SYCL USM */
void* alignedUSMMalloc(size_t size, size_t align = 16, EmbreeUSMMode mode = EMBREE_USM_SHARED_DEVICE_READ_ONLY);
void alignedUSMFree(void* ptr);
enum class EmbreeUSMMode {
DEFAULT = 0,
DEVICE_READ_WRITE = 0,
DEVICE_READ_ONLY = 1
};
enum class EmbreeMemoryType {
USM_HOST = 0,
USM_DEVICE = 1,
USM_SHARED = 2,
MALLOC = 3
};
#if defined(EMBREE_SYCL_SUPPORT)
/*! aligned allocation using SYCL USM */
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode);
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode, EmbreeMemoryType type);
void alignedSYCLFree(sycl::context* context, void* ptr);
// deleter functor to use as deleter in std unique or shared pointers that
// capture raw pointers created by sycl::malloc and it's variants
template<typename T>
struct sycl_deleter
{
void operator()(T const* ptr)
{
alignedUSMFree((void*)ptr);
}
};
#endif
/*! allocator that performs aligned allocations */
@@ -160,8 +131,7 @@ namespace embree
typedef std::ptrdiff_t difference_type;
__forceinline pointer allocate( size_type n ) {
//throw std::runtime_error("no allocation supported");
abort();
abort(); //throw std::runtime_error("no allocation supported");
}
__forceinline void deallocate( pointer p, size_type n ) {

View File

@@ -35,18 +35,6 @@ namespace embree
filename.resize(filename.size()-1);
}
/*! returns path to home folder */
FileName FileName::homeFolder()
{
#ifdef __WIN32__
const char* home = getenv("UserProfile");
#else
const char* home = getenv("HOME");
#endif
if (home) return home;
return "";
}
/*! returns path to executable */
FileName FileName::executableFolder() {
return FileName(getExecutableFileName()).path();

View File

@@ -20,9 +20,6 @@ namespace embree
/*! create a valid filename from a string */
FileName (const std::string& filename);
/*! returns path to home folder */
static FileName homeFolder();
/*! returns path to executable */
static FileName executableFolder();

View File

@@ -213,15 +213,12 @@
#define UPRINT4(x,y,z,w) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
#if defined(DEBUG) // only report file and line in debug mode
//#define THROW_RUNTIME_ERROR(str) \
// throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
#define THROW_RUNTIME_ERROR(str) \
printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort();
//throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
#else
//#define THROW_RUNTIME_ERROR(str) \
// throw std::runtime_error(str);
#define THROW_RUNTIME_ERROR(str) \
abort();
abort(); //throw std::runtime_error(str);
#endif
#define FATAL(x) THROW_RUNTIME_ERROR(x)

View File

@@ -181,8 +181,15 @@ namespace embree
return sycl::select(b,a,mask);
}
#define XSTR(x) STR(x)
#define STR(x) #x
__forceinline const sycl::sub_group this_sub_group() {
return sycl::ext::oneapi::experimental::this_sub_group();
#if __LIBSYCL_MAJOR_VERSION >= 8
return sycl::ext::oneapi::this_work_item::get_sub_group();
#else
return sycl::ext::oneapi::experimental::this_sub_group();
#endif
}
__forceinline const uint32_t get_sub_group_local_id() {
@@ -275,18 +282,6 @@ namespace embree
return sycl::inclusive_scan_over_group(this_sub_group(),x,binary_op,init);
}
template <typename T> __forceinline T sub_group_shuffle(T x, sycl::id<1> local_id) {
return this_sub_group().shuffle(x, local_id);
}
template <typename T> __forceinline T sub_group_shuffle_down(T x, uint32_t delta) {
return this_sub_group().shuffle_down(x, delta);
}
template <typename T> __forceinline T sub_group_shuffle_up(T x, uint32_t delta) {
return this_sub_group().shuffle_up(x, delta);
}
template <typename T> __forceinline T sub_group_load(const void* src) {
return this_sub_group().load(sycl::multi_ptr<T,sycl::access::address_space::global_space>((T*)src));
}

View File

@@ -344,6 +344,19 @@ namespace embree
if (cpuid_leaf_7[EBX] & CPU_FEATURE_BIT_AVX512VL ) cpu_features |= CPU_FEATURE_AVX512VL;
if (cpuid_leaf_7[ECX] & CPU_FEATURE_BIT_AVX512VBMI) cpu_features |= CPU_FEATURE_AVX512VBMI;
#if defined(__MACOSX__)
if ( (cpu_features & CPU_FEATURE_AVX512F)
|| (cpu_features & CPU_FEATURE_AVX512DQ)
|| (cpu_features & CPU_FEATURE_AVX512CD)
|| (cpu_features & CPU_FEATURE_AVX512BW)
|| (cpu_features & CPU_FEATURE_AVX512VL) )
{
// on macOS AVX512 will be enabled automatically by the kernel when the first AVX512 instruction is called
// see https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L176
// therefore we ignore the state of XCR0
cpu_features |= CPU_FEATURE_ZMM_ENABLED;
}
#endif
return cpu_features;
#elif defined(__ARM_NEON) || defined(__EMSCRIPTEN__)

View File

@@ -158,7 +158,7 @@ namespace embree
static const int SSE41 = SSSE3 | CPU_FEATURE_SSE41;
static const int SSE42 = SSE41 | CPU_FEATURE_SSE42 | CPU_FEATURE_POPCNT;
static const int AVX = SSE42 | CPU_FEATURE_AVX | CPU_FEATURE_YMM_ENABLED;
static const int AVXI = AVX | CPU_FEATURE_F16C | CPU_FEATURE_RDRAND;
static const int AVXI = AVX | CPU_FEATURE_F16C;
static const int AVX2 = AVXI | CPU_FEATURE_AVX2 | CPU_FEATURE_FMA3 | CPU_FEATURE_BMI1 | CPU_FEATURE_BMI2 | CPU_FEATURE_LZCNT;
static const int AVX512 = AVX2 | CPU_FEATURE_AVX512F | CPU_FEATURE_AVX512DQ | CPU_FEATURE_AVX512CD | CPU_FEATURE_AVX512BW | CPU_FEATURE_AVX512VL | CPU_FEATURE_ZMM_ENABLED;
static const int NEON = CPU_FEATURE_NEON | CPU_FEATURE_SSE | CPU_FEATURE_SSE2;

View File

@@ -124,8 +124,10 @@ namespace embree
__forceinline T* data() { return items; };
__forceinline const T* data() const { return items; };
/* dangerous only use if you know what you're doing */
__forceinline void setDataPtr(T* data) { items = data; }
/******************** Modifiers **************************/
__forceinline void push_back(const T& nt)

View File

@@ -130,11 +130,8 @@ namespace embree
__forceinline void* alloc(size_t bytes, size_t align = 64)
{
size_t ofs = bytes + ((align - stackPtr) & (align-1));
//if (stackPtr + ofs > CLOSURE_STACK_SIZE)
// throw std::runtime_error("closure stack overflow");
if (stackPtr + ofs > CLOSURE_STACK_SIZE) {
abort();
}
if (stackPtr + ofs > CLOSURE_STACK_SIZE)
abort(); //throw std::runtime_error("closure stack overflow");
stackPtr += ofs;
return &stack[stackPtr-bytes];
}
@@ -142,11 +139,8 @@ namespace embree
template<typename Closure>
__forceinline void push_right(Thread& thread, const size_t size, const Closure& closure, TaskGroupContext* context)
{
//if (right >= TASK_STACK_SIZE)
// throw std::runtime_error("task stack overflow");
if (right >= TASK_STACK_SIZE) {
abort();
}
if (right >= TASK_STACK_SIZE)
abort(); //throw std::runtime_error("task stack overflow");
/* allocate new task on right side of stack */
size_t oldStackPtr = stackPtr;

View File

@@ -38,12 +38,32 @@ typedef struct RTCBufferTy* RTCBuffer;
/* Creates a new buffer. */
RTC_API RTCBuffer rtcNewBuffer(RTCDevice device, size_t byteSize);
/* Creates a new buffer using explicit host device memory. */
RTC_API RTCBuffer rtcNewBufferHostDevice(RTCDevice device, size_t byteSize);
/* Creates a new shared buffer. */
RTC_API RTCBuffer rtcNewSharedBuffer(RTCDevice device, void* ptr, size_t byteSize);
/* Creates a new shared buffer using explicit host device memory. */
RTC_API RTCBuffer rtcNewSharedBufferHostDevice(RTCDevice device, void* ptr, size_t byteSize);
/* Synchronize host and device memory by copying data from host to device. */
RTC_API void rtcCommitBuffer(RTCBuffer buffer);
#if defined(EMBREE_SYCL_SUPPORT) && defined(SYCL_LANGUAGE_VERSION)
RTC_API_CPP sycl::event rtcCommitBufferWithQueue(RTCBuffer buffer, sycl::queue queue);
#endif
/* Returns a pointer to the buffer data. */
RTC_API void* rtcGetBufferData(RTCBuffer buffer);
/* Returns a pointer to the buffer data on the device. Returns the same pointer as
rtcGetBufferData if the device is no SYCL device or if Embree is executed on a
system with unified memory (e.g., iGPUs). */
RTC_API void* rtcGetBufferDataDevice(RTCBuffer buffer);
/* Retains the buffer (increments the reference count). */
RTC_API void rtcRetainBuffer(RTCBuffer buffer);

View File

@@ -8,10 +8,10 @@
#endif
#define RTC_VERSION_MAJOR 4
#define RTC_VERSION_MINOR 3
#define RTC_VERSION_PATCH 1
#define RTC_VERSION 40301
#define RTC_VERSION_STRING "4.3.1"
#define RTC_VERSION_MINOR 4
#define RTC_VERSION_PATCH 0
#define RTC_VERSION 40400
#define RTC_VERSION_STRING "4.4.0"
#define RTC_MAX_INSTANCE_LEVEL_COUNT 1
@@ -36,6 +36,7 @@
# define RTC_NAMESPACE_END }
# define RTC_NAMESPACE_USE using namespace;
# define RTC_API_EXTERN_C
# define RTC_API_EXTERN_CPP
# undef EMBREE_API_NAMESPACE
#else
# define RTC_NAMESPACE_BEGIN
@@ -43,6 +44,7 @@
# define RTC_NAMESPACE_USE
# if defined(__cplusplus)
# define RTC_API_EXTERN_C extern "C"
# define RTC_API_EXTERN_CPP extern "C++"
# else
# define RTC_API_EXTERN_C
# endif
@@ -62,12 +64,32 @@
# define RTC_API_EXPORT RTC_API_EXTERN_C __attribute__ ((visibility ("default")))
#endif
#if defined(ISPC)
# define RTC_API_IMPORT_CPP extern "C++" unmasked
# define RTC_API_EXPORT_CPP extern "C++" unmasked
#elif defined(EMBREE_STATIC_LIB)
# define RTC_API_IMPORT_CPP RTC_API_EXTERN_CPP
# define RTC_API_EXPORT_CPP RTC_API_EXTERN_CPP
#elif defined(_WIN32)
# define RTC_API_IMPORT_CPP RTC_API_EXTERN_CPP __declspec(dllimport)
# define RTC_API_EXPORT_CPP RTC_API_EXTERN_CPP __declspec(dllexport)
#else
# define RTC_API_IMPORT_CPP RTC_API_EXTERN_CPP
# define RTC_API_EXPORT_CPP RTC_API_EXTERN_CPP __attribute__ ((visibility ("default")))
#endif
#if defined(RTC_EXPORT_API)
# define RTC_API RTC_API_EXPORT
#else
# define RTC_API RTC_API_IMPORT
#endif
#if defined(RTC_EXPORT_API)
# define RTC_API_CPP RTC_API_EXPORT_CPP
#else
# define RTC_API_CPP RTC_API_IMPORT_CPP
#endif
#if defined(ISPC)
# define RTC_SYCL_INDIRECTLY_CALLABLE
#elif defined(__SYCL_DEVICE_ONLY__)

View File

@@ -9,14 +9,18 @@ RTC_NAMESPACE_BEGIN
/* Opaque device type */
typedef struct RTCDeviceTy* RTCDevice;
typedef struct RTCSceneTy* RTCScene;
/* Creates a new Embree device. */
RTC_API RTCDevice rtcNewDevice(const char* config);
#if defined(EMBREE_SYCL_SUPPORT) && defined(SYCL_LANGUAGE_VERSION)
/* Creates a new Embree SYCL device. */
/*
Creates a new Embree SYCL device. It will internally select the first SYCL device of
the SYCL context as the default device for memory allocations. You can set a specific
SYCL device that's part of the SYCL context by calling rtcSetDeviceSYCLDevice.
*/
RTC_API_EXTERN_C RTCDevice rtcNewSYCLDevice(sycl::context context, const char* config);
/* Checks if SYCL device is supported by Embree. */
@@ -28,6 +32,10 @@ RTC_API int rtcSYCLDeviceSelector(const sycl::device sycl_device);
/* Set the SYCL device to be used to allocate data */
RTC_API void rtcSetDeviceSYCLDevice(RTCDevice device, const sycl::device sycl_device);
/* rtcCommitSceneWithQueue is asynchronous, user has to call queue.wait()
for synchronization. rtcCommitScene is blocking. */
RTC_API_CPP sycl::event rtcCommitSceneWithQueue(RTCScene scene, sycl::queue queue);
#endif
@@ -66,7 +74,10 @@ enum RTCDeviceProperty
RTC_DEVICE_PROPERTY_TASKING_SYSTEM = 128,
RTC_DEVICE_PROPERTY_JOIN_COMMIT_SUPPORTED = 129,
RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED = 130
RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED = 130,
RTC_DEVICE_PROPERTY_CPU_DEVICE = 140,
RTC_DEVICE_PROPERTY_SYCL_DEVICE = 141
};
/* Gets a device property. */
@@ -78,18 +89,27 @@ RTC_API void rtcSetDeviceProperty(RTCDevice device, const enum RTCDeviceProperty
/* Error codes */
enum RTCError
{
RTC_ERROR_NONE = 0,
RTC_ERROR_UNKNOWN = 1,
RTC_ERROR_INVALID_ARGUMENT = 2,
RTC_ERROR_INVALID_OPERATION = 3,
RTC_ERROR_OUT_OF_MEMORY = 4,
RTC_ERROR_UNSUPPORTED_CPU = 5,
RTC_ERROR_CANCELLED = 6,
RTC_ERROR_NONE = 0,
RTC_ERROR_UNKNOWN = 1,
RTC_ERROR_INVALID_ARGUMENT = 2,
RTC_ERROR_INVALID_OPERATION = 3,
RTC_ERROR_OUT_OF_MEMORY = 4,
RTC_ERROR_UNSUPPORTED_CPU = 5,
RTC_ERROR_CANCELLED = 6,
RTC_ERROR_LEVEL_ZERO_RAYTRACING_SUPPORT_MISSING = 7,
};
/* Returns the string representation for the error code. For example, for RTC_ERROR_UNKNOWN the string "RTC_ERROR_UNKNOWN" will be returned. */
RTC_API const char* rtcGetErrorString(enum RTCError error);
/* Returns the error code. */
RTC_API enum RTCError rtcGetDeviceError(RTCDevice device);
/* Returns a message corresponding to the last error code (returned by rtcGetDeviceError) which provides details about the error that happened.
The same message will be written to console when verbosity is > 0 or when an error callback function is set for the device.
However, when device creation itself fails this is the only way to get additional information about the error. */
RTC_API const char* rtcGetDeviceLastErrorMessage(RTCDevice device);
/* Error callback function */
typedef void (*RTCErrorFunction)(void* userPtr, enum RTCError code, const char* str);

View File

@@ -171,16 +171,26 @@ RTC_API void rtcSetGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type,
/* Sets a shared geometry buffer. */
RTC_API void rtcSetSharedGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot, enum RTCFormat format, const void* ptr, size_t byteOffset, size_t byteStride, size_t itemCount);
/* Sets a shared host/device geometry buffer pair. */
RTC_API void rtcSetSharedGeometryBufferHostDevice(RTCGeometry geometry, enum RTCBufferType bufferType, unsigned int slot, enum RTCFormat format, const void* ptr, const void* dptr, size_t byteOffset, size_t byteStride, size_t itemCount);
/* Creates and sets a new geometry buffer. */
RTC_API void* rtcSetNewGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot, enum RTCFormat format, size_t byteStride, size_t itemCount);
/* Creates and sets a new host/device geometry buffer pair. */
RTC_API void rtcSetNewGeometryBufferHostDevice(RTCGeometry geometry, enum RTCBufferType bufferType, unsigned int slot, enum RTCFormat format, size_t byteStride, size_t itemCount, void** ptr, void** dptr);
/* Returns the pointer to the data of a buffer. */
RTC_API void* rtcGetGeometryBufferData(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot);
/* Returns a pointer to the buffer data on the device. Returns the same pointer as
rtcGetGeometryBufferData if the device is no SYCL device or if Embree is executed on a
system with unified memory (e.g., iGPUs). */
RTC_API void* rtcGetGeometryBufferDataDevice(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot);
/* Updates a geometry buffer. */
RTC_API void rtcUpdateGeometryBuffer(RTCGeometry geometry, enum RTCBufferType type, unsigned int slot);
/* Sets the intersection filter callback function of the geometry. */
RTC_API void rtcSetGeometryIntersectFilterFunction(RTCGeometry geometry, RTCFilterFunctionN filter);

View File

@@ -225,8 +225,8 @@ RTC_FORCEINLINE RTCRayN* RTCRayHitN_RayN(RTCRayHitN* rayhit, unsigned int N) { r
RTC_FORCEINLINE RTCHitN* RTCRayHitN_HitN(RTCRayHitN* rayhit, unsigned int N) { return (RTCHitN*)&((float*)rayhit)[12*N]; }
/* Helper structure for a ray packet of compile-time size N */
template<int N>
struct RTCRayNt
template<unsigned int N>
struct RTC_ALIGN((N && !(N & (N - 1)) ? (N * 4 > 16 ? N * 4 : 16) : 16)) RTCRayNt
{
float org_x[N];
float org_y[N];
@@ -245,8 +245,8 @@ struct RTCRayNt
};
/* Helper structure for a hit packet of compile-time size N */
template<int N>
struct RTCHitNt
template<unsigned int N>
struct RTC_ALIGN((N && !(N & (N - 1)) ? (N * 4 > 16 ? N * 4 : 16) : 16)) RTCHitNt
{
float Ng_x[N];
float Ng_y[N];

View File

@@ -6,7 +6,10 @@
#include "rtcore_device.h"
RTC_NAMESPACE_BEGIN
/* Opaque traversable type */
typedef struct RTCTraversableTy* RTCTraversable;
/* Forward declarations for ray structures */
struct RTCRayHit;
struct RTCRayHit4;
@@ -16,11 +19,12 @@ struct RTCRayHit16;
/* Scene flags */
enum RTCSceneFlags
{
RTC_SCENE_FLAG_NONE = 0,
RTC_SCENE_FLAG_DYNAMIC = (1 << 0),
RTC_SCENE_FLAG_COMPACT = (1 << 1),
RTC_SCENE_FLAG_ROBUST = (1 << 2),
RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS = (1 << 3)
RTC_SCENE_FLAG_NONE = 0,
RTC_SCENE_FLAG_DYNAMIC = (1 << 0),
RTC_SCENE_FLAG_COMPACT = (1 << 1),
RTC_SCENE_FLAG_ROBUST = (1 << 2),
RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS = (1 << 3),
RTC_SCENE_FLAG_PREFETCH_USM_SHARED_ON_GPU = (1 << 4),
};
/* Additional arguments for rtcIntersect1/4/8/16 calls */
@@ -91,6 +95,8 @@ RTC_API void rtcRetainScene(RTCScene scene);
/* Releases the scene (decrements the reference count). */
RTC_API void rtcReleaseScene(RTCScene scene);
/* Returns the traversable object of the scene which can be passed to ray queries. */
RTC_API RTCTraversable rtcGetSceneTraversable(RTCScene scene);
/* Attaches the geometry to a scene. */
RTC_API unsigned int rtcAttachGeometry(RTCScene scene, RTCGeometry geometry);
@@ -107,12 +113,6 @@ RTC_API RTCGeometry rtcGetGeometry(RTCScene scene, unsigned int geomID);
/* Gets a geometry handle from the scene. This function is thread safe and should NOT get used during rendering. */
RTC_API RTCGeometry rtcGetGeometryThreadSafe(RTCScene scene, unsigned int geomID);
/* Gets the user-defined data pointer of the geometry. This function is not thread safe and should get used during rendering. */
RTC_SYCL_API void* rtcGetGeometryUserDataFromScene(RTCScene scene, unsigned int geomID);
/* Returns the interpolated transformation of an instance for the specified time. */
RTC_SYCL_API void rtcGetGeometryTransformFromScene(RTCScene scene, unsigned int geomID, float time, enum RTCFormat format, void* xfm);
/* Commits the scene. */
RTC_API void rtcCommitScene(RTCScene scene);
@@ -142,6 +142,13 @@ RTC_API void rtcGetSceneBounds(RTCScene scene, struct RTCBounds* bounds_o);
/* Returns the linear axis-aligned bounds of the scene. */
RTC_API void rtcGetSceneLinearBounds(RTCScene scene, struct RTCLinearBounds* bounds_o);
#if !defined(__SYCL_DEVICE_ONLY__)
/* Gets the user-defined data pointer of the geometry. This function is not thread safe and should get used during rendering. */
RTC_SYCL_API void* rtcGetGeometryUserDataFromScene(RTCScene scene, unsigned int geomID);
/* Returns the interpolated transformation of an instance for the specified time. */
RTC_SYCL_API void rtcGetGeometryTransformFromScene(RTCScene scene, unsigned int geomID, float time, enum RTCFormat format, void* xfm);
/* Perform a closest point query of the scene. */
RTC_API bool rtcPointQuery(RTCScene scene, struct RTCPointQuery* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void* userPtr);
@@ -231,6 +238,102 @@ RTC_API void rtcForwardOccluded16(const int* valid, const struct RTCOccludedFunc
/* Forwards occlusion ray packet of size 16 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_API void rtcForwardOccluded16Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay16* ray, unsigned int instID, unsigned int instPrimID);
#endif
/* Gets the user-defined data pointer of the geometry. This function is not thread safe and should get used during rendering. */
RTC_SYCL_API void* rtcGetGeometryUserDataFromTraversable(RTCTraversable traversable, unsigned int geomID);
/* Returns the interpolated transformation of an instance for the specified time. */
RTC_SYCL_API void rtcGetGeometryTransformFromTraversable(RTCTraversable traversable, unsigned int geomID, float time, enum RTCFormat format, void* xfm);
/* Perform a closest point query of the scene. */
RTC_API bool rtcTraversablePointQuery(RTCTraversable traversable, struct RTCPointQuery* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void* userPtr);
/* Perform a closest point query with a packet of 4 points with the scene. */
RTC_API bool rtcTraversablePointQuery4(const int* valid, RTCTraversable traversable, struct RTCPointQuery4* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
/* Perform a closest point query with a packet of 4 points with the scene. */
RTC_API bool rtcTraversablePointQuery8(const int* valid, RTCTraversable traversable, struct RTCPointQuery8* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
/* Perform a closest point query with a packet of 4 points with the scene. */
RTC_API bool rtcTraversablePointQuery16(const int* valid, RTCTraversable traversable, struct RTCPointQuery16* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
/* Intersects a single ray with the scene. */
RTC_SYCL_API void rtcTraversableIntersect1(RTCTraversable traversable, struct RTCRayHit* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
/* Intersects a packet of 4 rays with the scene. */
RTC_API void rtcTraversableIntersect4(const int* valid, RTCTraversable traversable, struct RTCRayHit4* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
/* Intersects a packet of 8 rays with the scene. */
RTC_API void rtcTraversableIntersect8(const int* valid, RTCTraversable traversable, struct RTCRayHit8* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
/* Intersects a packet of 16 rays with the scene. */
RTC_API void rtcTraversableIntersect16(const int* valid, RTCTraversable traversable, struct RTCRayHit16* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
/* Forwards ray inside user geometry callback. */
RTC_SYCL_API void rtcTraversableForwardIntersect1(const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay* ray, unsigned int instID);
/* Forwards ray inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_SYCL_API void rtcTraversableForwardIntersect1Ex(const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay* ray, unsigned int instID, unsigned int instPrimID);
/* Forwards ray packet of size 4 inside user geometry callback. */
RTC_API void rtcTraversableForwardIntersect4(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay4* ray, unsigned int instID);
/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_API void rtcTraversableForwardIntersect4Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay4* ray, unsigned int instID, unsigned int primInstID);
/* Forwards ray packet of size 8 inside user geometry callback. */
RTC_API void rtcTraversableForwardIntersect8(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay8* ray, unsigned int instID);
/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_API void rtcTraversableForwardIntersect8Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay8* ray, unsigned int instID, unsigned int primInstID);
/* Forwards ray packet of size 16 inside user geometry callback. */
RTC_API void rtcTraversableForwardIntersect16(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay16* ray, unsigned int instID);
/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_API void rtcTraversableForwardIntersect16Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCTraversable traversable, struct RTCRay16* ray, unsigned int instID, unsigned int primInstID);
/* Tests a single ray for occlusion with the scene. */
RTC_SYCL_API void rtcTraversableOccluded1(RTCTraversable traversable, struct RTCRay* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
/* Tests a packet of 4 rays for occlusion occluded with the scene. */
RTC_API void rtcTraversableOccluded4(const int* valid, RTCTraversable traversable, struct RTCRay4* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
/* Tests a packet of 8 rays for occlusion with the scene. */
RTC_API void rtcTraversableOccluded8(const int* valid, RTCTraversable traversable, struct RTCRay8* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
/* Tests a packet of 16 rays for occlusion with the scene. */
RTC_API void rtcTraversableOccluded16(const int* valid, RTCTraversable traversable, struct RTCRay16* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
/* Forwards single occlusion ray inside user geometry callback. */
RTC_SYCL_API void rtcTraversableForwardOccluded1(const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay* ray, unsigned int instID);
/* Forwards single occlusion ray inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_SYCL_API void rtcTraversableForwardOccluded1Ex(const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay* ray, unsigned int instID, unsigned int instPrimID);
/* Forwards occlusion ray packet of size 4 inside user geometry callback. */
RTC_API void rtcTraversableForwardOccluded4(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay4* ray, unsigned int instID);
/* Forwards occlusion ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_API void rtcTraversableForwardOccluded4Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay4* ray, unsigned int instID, unsigned int instPrimID);
/* Forwards occlusion ray packet of size 8 inside user geometry callback. */
RTC_API void rtcTraversableForwardOccluded8(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay8* ray, unsigned int instID);
/* Forwards occlusion ray packet of size 8 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_API void rtcTraversableForwardOccluded8Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay8* ray, unsigned int instID, unsigned int instPrimID);
/* Forwards occlusion ray packet of size 16 inside user geometry callback. */
RTC_API void rtcTraversableForwardOccluded16(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay16* ray, unsigned int instID);
/* Forwards occlusion ray packet of size 16 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_API void rtcTraversableForwardOccluded16Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCTraversable traversable, struct RTCRay16* ray, unsigned int instID, unsigned int instPrimID);
/*! collision callback */
struct RTCCollision { unsigned int geomID0; unsigned int primID0; unsigned int geomID1; unsigned int primID1; };
@@ -238,7 +341,7 @@ typedef void (*RTCCollideFunc) (void* userPtr, struct RTCCollision* collisions,
/*! Performs collision detection of two scenes */
RTC_API void rtcCollide (RTCScene scene0, RTCScene scene1, RTCCollideFunc callback, void* userPtr);
#if defined(__cplusplus)
/* Helper for easily combining scene flags */

View File

@@ -48,10 +48,9 @@ namespace embree
}
Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold, size_t primrefarrayalloc = inf)
: branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
: branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(min(minLeafSize,maxLeafSize)), maxLeafSize(maxLeafSize),
travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold), primrefarrayalloc(primrefarrayalloc)
{
minLeafSize = min(minLeafSize,maxLeafSize);
}
public:

View File

@@ -283,11 +283,9 @@ namespace embree
if (likely(splits <= 1)) continue; /* todo: does this ever happen ? */
//int bin0 = split.mapping.bin(prims0[i].lower)[split.dim];
//int bin1 = split.mapping.bin(prims0[i].upper)[split.dim];
//if (unlikely(bin0 < split.pos && bin1 >= split.pos))
if (unlikely(prims0[i].lower[split.dim] < fpos && prims0[i].upper[split.dim] > fpos))
const int bin0 = split.mapping.bin(prims0[i].lower)[split.dim];
const int bin1 = split.mapping.bin(prims0[i].upper)[split.dim];
if (unlikely(bin0 < split.pos && bin1 >= split.pos))
{
assert(splits > 1);

View File

@@ -179,9 +179,9 @@ namespace embree
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH4Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
Builder* BVH4Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
Builder* BVH4Triangle4iSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4i,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,inf,mode); }
Builder* BVH4Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); }
Builder* BVH4Triangle4vSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4v,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); }
Builder* BVH4Triangle4iSceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<4,TriangleMesh,Triangle4i,TriangleSplitterFactory>((BVH4*)bvh,scene,4,1.0f,4,scene->device->max_triangles_per_leaf,mode); }
#if defined(__AVX__)
Builder* BVH8Triangle4SceneBuilderFastSpatialSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderFastSpatialSAH<8,TriangleMesh,Triangle4,TriangleSplitterFactory>((BVH8*)bvh,scene,4,1.0f,4,inf,mode); }

View File

@@ -150,8 +150,7 @@ namespace embree
}
}
else {
//throw std::runtime_error("not supported node type in bvh_statistics");
abort();
abort(); //throw std::runtime_error("not supported node type in bvh_statistics");
}
return s;
}

View File

@@ -189,11 +189,8 @@ namespace embree
, atype(osAllocation ? EMBREE_OS_MALLOC : ALIGNED_MALLOC)
, primrefarray(device,0)
{
//if (osAllocation && useUSM)
// throw std::runtime_error("USM allocation cannot be combined with OS allocation.");
if (osAllocation && useUSM) {
abort();
}
if (osAllocation && useUSM)
abort(); //throw std::runtime_error("USM allocation cannot be combined with OS allocation.");
for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++)
{
@@ -505,11 +502,8 @@ namespace embree
Block* myUsedBlocks = threadUsedBlocks[slot];
if (myUsedBlocks) {
void* ptr = myUsedBlocks->malloc(device,bytes,align,partial);
//if (ptr == nullptr && !blockAllocation)
// throw std::bad_alloc();
if (ptr == nullptr && !blockAllocation) {
abort();
}
if (ptr == nullptr && !blockAllocation)
abort(); //throw std::bad_alloc();
if (ptr) return ptr;
}

View File

@@ -8,120 +8,232 @@
namespace embree
{
enum class BufferDataPointerType {
HOST = 0,
DEVICE = 1,
UNKNOWN = 2
};
/*! Implements an API data buffer object. This class may or may not own the data. */
class Buffer : public RefCount
{
public:
/*! Buffer construction */
//Buffer()
//: device(nullptr), ptr(nullptr), numBytes(0), shared(false) {}
/*! Buffer construction */
Buffer(Device* device, size_t numBytes_in, void* ptr_in = nullptr)
: device(device), numBytes(numBytes_in)
private:
char* alloc(void* ptr_in, bool &shared, EmbreeMemoryType memoryType)
{
device->refInc();
if (ptr_in)
{
shared = true;
ptr = (char*)ptr_in;
return (char*)ptr_in;
}
else
{
shared = false;
alloc();
device->memoryMonitor(this->bytes(), false);
size_t b = (this->bytes()+15) & ssize_t(-16);
return (char*)device->malloc(b,16,memoryType);
}
}
public:
Buffer(Device* device, size_t numBytes_in, void* ptr_in)
: device(device), numBytes(numBytes_in)
{
device->refInc();
ptr = alloc(ptr_in, shared, EmbreeMemoryType::USM_SHARED);
#if defined(EMBREE_SYCL_SUPPORT)
dshared = true;
dptr = ptr;
modified = true;
#endif
}
Buffer(Device* device, size_t numBytes_in, void* ptr_in, void* dptr_in)
: device(device), numBytes(numBytes_in)
{
device->refInc();
#if defined(EMBREE_SYCL_SUPPORT)
modified = true;
if (device->is_gpu() && !device->has_unified_memory())
{
ptr = alloc( ptr_in, shared, EmbreeMemoryType::MALLOC);
dptr = alloc(dptr_in, dshared, EmbreeMemoryType::USM_DEVICE);
}
else if (device->is_gpu() && device->has_unified_memory())
{
ptr = alloc(ptr_in, shared, EmbreeMemoryType::USM_SHARED);
if (device->get_memory_type(ptr) != EmbreeMemoryType::USM_SHARED)
{
dptr = alloc(dptr_in, dshared, EmbreeMemoryType::USM_DEVICE);
}
else
{
dshared = true;
dptr = ptr;
}
}
else
#endif
{
ptr = alloc(ptr_in, shared, EmbreeMemoryType::MALLOC);
#if defined(EMBREE_SYCL_SUPPORT)
dshared = true;
dptr = ptr;
#endif
}
}
/*! Buffer destruction */
~Buffer() {
virtual ~Buffer() {
free();
device->refDec();
}
/*! this class is not copyable */
private:
Buffer(const Buffer& other) DELETED; // do not implement
Buffer& operator =(const Buffer& other) DELETED; // do not implement
public:
/* inits and allocates the buffer */
void create(Device* device_in, size_t numBytes_in)
/*! frees the buffer */
virtual void free()
{
init(device_in, numBytes_in);
alloc();
}
/* inits the buffer */
void init(Device* device_in, size_t numBytes_in)
{
free();
device = device_in;
ptr = nullptr;
numBytes = numBytes_in;
shared = false;
if (!shared && ptr) {
#if defined(EMBREE_SYCL_SUPPORT)
if (dptr == ptr) {
dptr = nullptr;
}
#endif
device->free(ptr);
device->memoryMonitor(-ssize_t(this->bytes()), true);
ptr = nullptr;
}
#if defined(EMBREE_SYCL_SUPPORT)
if (!dshared && dptr) {
device->free(dptr);
device->memoryMonitor(-ssize_t(this->bytes()), true);
dptr = nullptr;
}
#endif
}
/*! sets shared buffer */
void set(Device* device_in, void* ptr_in, size_t numBytes_in)
{
free();
device = device_in;
ptr = (char*)ptr_in;
if (numBytes_in != (size_t)-1)
numBytes = numBytes_in;
shared = true;
}
/*! allocated buffer */
void alloc()
{
device->memoryMonitor(this->bytes(), false);
size_t b = (this->bytes()+15) & ssize_t(-16);
ptr = (char*)device->malloc(b,16);
}
/*! frees the buffer */
void free()
{
if (shared) return;
device->free(ptr);
device->memoryMonitor(-ssize_t(this->bytes()), true);
ptr = nullptr;
}
/*! gets buffer pointer */
void* data()
{
/* report error if buffer is not existing */
if (!device)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified");
/* return buffer */
return ptr;
}
/*! gets buffer pointer */
void* dataDevice()
{
/* report error if buffer is not existing */
if (!device)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer specified");
/* return buffer */
#if defined(EMBREE_SYCL_SUPPORT)
return dptr;
#else
return ptr;
#endif
}
/*! returns pointer to first element */
__forceinline char* getPtr() const {
__forceinline char* getPtr(BufferDataPointerType type) const
{
if (type == BufferDataPointerType::HOST) return getHostPtr();
else if (type == BufferDataPointerType::DEVICE) return getDevicePtr();
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer data pointer type specified");
return nullptr;
}
/*! returns pointer to first element */
__forceinline virtual char* getHostPtr() const {
return ptr;
}
/*! returns pointer to first element */
__forceinline virtual char* getDevicePtr() const {
#if defined(EMBREE_SYCL_SUPPORT)
return dptr;
#else
return ptr;
#endif
}
/*! returns the number of bytes of the buffer */
__forceinline size_t bytes() const {
__forceinline size_t bytes() const {
return numBytes;
}
/*! returns true of the buffer is not empty */
__forceinline operator bool() const {
return ptr;
__forceinline operator bool() const {
return ptr;
}
__forceinline void commit() {
#if defined(EMBREE_SYCL_SUPPORT)
DeviceGPU* gpu_device = dynamic_cast<DeviceGPU*>(device);
if (gpu_device) {
sycl::queue queue(gpu_device->getGPUDevice());
commit(queue);
queue.wait_and_throw();
}
modified = false;
#endif
}
#if defined(EMBREE_SYCL_SUPPORT)
__forceinline sycl::event commit(sycl::queue queue) {
if (dptr == ptr)
return sycl::event();
modified = false;
return queue.memcpy(dptr, ptr, numBytes);
}
#endif
__forceinline bool needsCommit() const {
#if defined(EMBREE_SYCL_SUPPORT)
return (dptr == ptr) ? false : modified;
#else
return false;
#endif
}
__forceinline void setNeedsCommit(bool isModified = true) {
#if defined(EMBREE_SYCL_SUPPORT)
modified = isModified;
#endif
}
__forceinline void commitIfNeeded() {
if (needsCommit()) {
commit();
}
}
public:
Device* device; //!< device to report memory usage to
char* ptr; //!< pointer to buffer data
size_t numBytes; //!< number of bytes in the buffer
bool shared; //!< set if memory is shared with application
Device* device; //!< device to report memory usage to
size_t numBytes; //!< number of bytes in the buffer
char* ptr; //!< pointer to buffer data
#if defined(EMBREE_SYCL_SUPPORT)
char* dptr; //!< pointer to buffer data on device
#endif
bool shared; //!< set if memory is shared with application
#if defined(EMBREE_SYCL_SUPPORT)
bool dshared; //!< set if device memory is shared with application
bool modified; //!< to be set when host memory has been modified and dev needs update
#endif
};
/*! An untyped contiguous range of a buffer. This class does not own the buffer content. */
@@ -130,7 +242,7 @@ namespace embree
public:
/*! Buffer construction */
RawBufferView()
: ptr_ofs(nullptr), stride(0), num(0), format(RTC_FORMAT_UNDEFINED), modCounter(1), modified(true), userData(0) {}
: ptr_ofs(nullptr), dptr_ofs(nullptr), stride(0), num(0), format(RTC_FORMAT_UNDEFINED), modCounter(1), modified(true), userData(0) {}
public:
/*! sets the buffer view */
@@ -139,7 +251,8 @@ namespace embree
if ((offset_in + stride_in * num_in) > (stride_in * buffer_in->numBytes))
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "buffer range out of bounds");
ptr_ofs = buffer_in->ptr + offset_in;
ptr_ofs = buffer_in->getHostPtr() + offset_in;
dptr_ofs = buffer_in->getDevicePtr() + offset_in;
stride = stride_in;
num = num_in;
format = format_in;
@@ -148,28 +261,48 @@ namespace embree
buffer = buffer_in;
}
/*! returns pointer to the i'th element */
__forceinline char* getPtr(BufferDataPointerType pointerType) const
{
if (pointerType == BufferDataPointerType::HOST)
return ptr_ofs;
else if (pointerType == BufferDataPointerType::DEVICE)
return dptr_ofs;
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer data pointer type specified");
return nullptr;
}
/*! returns pointer to the first element */
__forceinline char* getPtr() const {
return ptr_ofs;
#if defined(__SYCL_DEVICE_ONLY__)
return dptr_ofs;
#else
return ptr_ofs;
#endif
}
/*! returns pointer to the i'th element */
__forceinline char* getPtr(size_t i) const
{
assert(i<num);
return ptr_ofs + i*stride;
#if defined(__SYCL_DEVICE_ONLY__)
assert(i<num);
return dptr_ofs + i*stride;
#else
return ptr_ofs + i*stride;
#endif
}
/*! returns the number of elements of the buffer */
__forceinline size_t size() const {
return num;
__forceinline size_t size() const {
return num;
}
/*! returns the number of bytes of the buffer */
__forceinline size_t bytes() const {
return num*stride;
__forceinline size_t bytes() const {
return num*stride;
}
/*! returns the buffer stride */
__forceinline unsigned getStride() const
{
@@ -186,6 +319,7 @@ namespace embree
__forceinline void setModified() {
modCounter++;
modified = true;
if (buffer) buffer->setNeedsCommit();
}
/*! mark buffer as modified or unmodified */
@@ -205,7 +339,7 @@ namespace embree
/*! returns true of the buffer is not empty */
__forceinline operator bool() const {
return ptr_ofs;
return ptr_ofs;
}
/*! checks padding to 16 byte check, fails hard */
@@ -217,6 +351,7 @@ namespace embree
public:
char* ptr_ofs; //!< base pointer plus offset
char* dptr_ofs; //!< base pointer plus offset in device memory
size_t stride; //!< stride of the buffer in bytes
size_t num; //!< number of elements in the buffer
RTCFormat format; //!< format of the buffer
@@ -233,9 +368,15 @@ namespace embree
public:
typedef T value_type;
#if defined(__SYCL_DEVICE_ONLY__)
/*! access to the ith element of the buffer */
__forceinline T& operator [](size_t i) { assert(i<num); return *(T*)(dptr_ofs + i*stride); }
__forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(dptr_ofs + i*stride); }
#else
/*! access to the ith element of the buffer */
__forceinline T& operator [](size_t i) { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
__forceinline const T& operator [](size_t i) const { assert(i<num); return *(T*)(ptr_ofs + i*stride); }
#endif
};
template<>
@@ -250,14 +391,14 @@ namespace embree
__forceinline const Vec3fa operator [](size_t i) const
{
assert(i<num);
return Vec3fa::loadu(ptr_ofs + i*stride);
return Vec3fa::loadu(dptr_ofs + i*stride);
}
/*! writes the i'th element */
__forceinline void store(size_t i, const Vec3fa& v)
{
assert(i<num);
Vec3fa::storeu(ptr_ofs + i*stride, v);
Vec3fa::storeu(dptr_ofs + i*stride, v);
}
#else

View File

@@ -229,6 +229,7 @@ namespace embree
#endif
std::cout << std::endl;
#if defined(__X86_64__)
/* check of FTZ and DAZ flags are set in CSR */
if (!hasFTZ || !hasDAZ)
{
@@ -252,57 +253,68 @@ namespace embree
std::cout << std::endl;
}
}
#endif
std::cout << std::endl;
}
void Device::setDeviceErrorCode(RTCError error)
void Device::setDeviceErrorCode(RTCError error, std::string const& msg)
{
RTCError* stored_error = errorHandler.error();
if (*stored_error == RTC_ERROR_NONE)
*stored_error = error;
RTCErrorMessage* stored_error = errorHandler.error();
if (stored_error->error == RTC_ERROR_NONE) {
stored_error->error = error;
if (msg != "")
stored_error->msg = msg;
}
}
RTCError Device::getDeviceErrorCode()
{
RTCError* stored_error = errorHandler.error();
RTCError error = *stored_error;
*stored_error = RTC_ERROR_NONE;
return error;
RTCErrorMessage* stored_error = errorHandler.error();
RTCErrorMessage error = *stored_error;
stored_error->error = RTC_ERROR_NONE;
return error.error;
}
void Device::setThreadErrorCode(RTCError error)
const char* Device::getDeviceLastErrorMessage()
{
RTCError* stored_error = g_errorHandler.error();
if (*stored_error == RTC_ERROR_NONE)
*stored_error = error;
RTCErrorMessage* stored_error = errorHandler.error();
return stored_error->msg.c_str();
}
void Device::setThreadErrorCode(RTCError error, std::string const& msg)
{
RTCErrorMessage* stored_error = g_errorHandler.error();
if (stored_error->error == RTC_ERROR_NONE) {
stored_error->error = error;
if (msg != "")
stored_error->msg = msg;
}
}
RTCError Device::getThreadErrorCode()
{
RTCError* stored_error = g_errorHandler.error();
RTCError error = *stored_error;
*stored_error = RTC_ERROR_NONE;
return error;
RTCErrorMessage* stored_error = g_errorHandler.error();
RTCErrorMessage error = *stored_error;
stored_error->error = RTC_ERROR_NONE;
return error.error;
}
const char* Device::getThreadLastErrorMessage()
{
RTCErrorMessage* stored_error = g_errorHandler.error();
return stored_error->msg.c_str();
}
void Device::process_error(Device* device, RTCError error, const char* str)
{
{
/* store global error code when device construction failed */
if (!device)
return setThreadErrorCode(error);
return setThreadErrorCode(error, str ? std::string(str) : std::string());
/* print error when in verbose mode */
if (device->verbosity(1))
if (device->verbosity(1))
{
switch (error) {
case RTC_ERROR_NONE : std::cerr << "Embree: No error"; break;
case RTC_ERROR_UNKNOWN : std::cerr << "Embree: Unknown error"; break;
case RTC_ERROR_INVALID_ARGUMENT : std::cerr << "Embree: Invalid argument"; break;
case RTC_ERROR_INVALID_OPERATION: std::cerr << "Embree: Invalid operation"; break;
case RTC_ERROR_OUT_OF_MEMORY : std::cerr << "Embree: Out of memory"; break;
case RTC_ERROR_UNSUPPORTED_CPU : std::cerr << "Embree: Unsupported CPU"; break;
default : std::cerr << "Embree: Invalid error code"; break;
};
std::cerr << "Embree: " << getErrorString(error);
if (str) std::cerr << ", (" << str << ")";
std::cerr << std::endl;
}
@@ -312,7 +324,7 @@ namespace embree
device->error_function(device->error_function_userptr,error,str);
/* record error code */
device->setDeviceErrorCode(error);
device->setDeviceErrorCode(error, str ? std::string(str) : std::string());
}
void Device::memoryMonitor(ssize_t bytes, bool post)
@@ -570,6 +582,22 @@ namespace embree
case RTC_DEVICE_PROPERTY_PARALLEL_COMMIT_SUPPORTED: return 0;
#endif
#if defined(EMBREE_SYCL_SUPPORT)
case RTC_DEVICE_PROPERTY_CPU_DEVICE: {
if (!dynamic_cast<DeviceGPU*>(this))
return 1;
return 0;
};
case RTC_DEVICE_PROPERTY_SYCL_DEVICE: {
if (!dynamic_cast<DeviceGPU*>(this))
return 0;
return 1;
};
#else
case RTC_DEVICE_PROPERTY_CPU_DEVICE: return 1;
case RTC_DEVICE_PROPERTY_SYCL_DEVICE: return 0;
#endif
default: throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown readable property"); break;
};
}
@@ -578,10 +606,31 @@ namespace embree
return alignedMalloc(size,align);
}
void* Device::malloc(size_t size, size_t align, EmbreeMemoryType type) {
return alignedMalloc(size,align);
}
void Device::free(void* ptr) {
alignedFree(ptr);
}
const std::vector<std::string> Device::error_strings = {
"No Error",
"Unknown error",
"Invalid argument",
"Invalid operation",
"Out of Memory",
"Unsupported CPU",
"Build cancelled",
"Level Zero raytracing support missing"
};
const char* Device::getErrorString(RTCError error) {
if (error >= 0 && error < error_strings.size()) {
return error_strings.at(error).c_str();
}
return "Invalid error code";
}
#if defined(EMBREE_SYCL_SUPPORT)
@@ -613,7 +662,6 @@ namespace embree
if (result != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "zeDriverGetExtensionProperties failed");
#if defined(EMBREE_SYCL_L0_RTAS_BUILDER)
bool ze_rtas_builder = false;
for (uint32_t i=0; i<extensions.size(); i++)
{
@@ -621,23 +669,18 @@ namespace embree
ze_rtas_builder = true;
}
if (!ze_rtas_builder)
throw_RTCError(RTC_ERROR_UNKNOWN, "ZE_experimental_rtas_builder extension not found");
throw_RTCError(RTC_ERROR_LEVEL_ZERO_RAYTRACING_SUPPORT_MISSING, "ZE_experimental_rtas_builder extension not found. Please install a recent driver. On Linux, make sure that the package intel-level-zero-gpu-raytracing is installed");
result = ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::LEVEL_ZERO);
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE)
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot load ZE_experimental_rtas_builder extension");
result = ZeWrapper::initRTASBuilder(hDriver);
if (result == ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE) {
throw_RTCError(RTC_ERROR_LEVEL_ZERO_RAYTRACING_SUPPORT_MISSING, "cannot load ZE_experimental_rtas_builder extension. Please install a recent driver. On Linux, make sure that the package intel-level-zero-gpu-raytracing is installed");
}
if (result != ZE_RESULT_SUCCESS)
throw_RTCError(RTC_ERROR_UNKNOWN, "cannot initialize ZE_experimental_rtas_builder extension");
#else
ZeWrapper::initRTASBuilder(hDriver,ZeWrapper::INTERNAL);
#endif
if (State::verbosity(1))
{
if (ZeWrapper::rtas_builder == ZeWrapper::INTERNAL)
std::cout << " Internal RTAS Builder" << std::endl;
else
std::cout << " Level Zero RTAS Builder" << std::endl;
std::cout << " Level Zero RTAS Builder" << std::endl;
}
/* check if extension library can get loaded */
@@ -670,15 +713,17 @@ namespace embree
}
void DeviceGPU::enter() {
enableUSMAllocEmbree(&gpu_context,&gpu_device);
}
void DeviceGPU::leave() {
disableUSMAllocEmbree();
}
void* DeviceGPU::malloc(size_t size, size_t align) {
return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EMBREE_USM_SHARED_DEVICE_READ_ONLY);
return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EmbreeUSMMode::DEVICE_READ_ONLY);
}
void* DeviceGPU::malloc(size_t size, size_t align, EmbreeMemoryType type) {
return alignedSYCLMalloc(&gpu_context,&gpu_device,size,align,EmbreeUSMMode::DEVICE_READ_ONLY,type);
}
void DeviceGPU::free(void* ptr) {
@@ -688,7 +733,16 @@ namespace embree
void DeviceGPU::setSYCLDevice(const sycl::device sycl_device_in) {
gpu_device = sycl_device_in;
}
// turn off deprecation warning for host_unified_memory property usage.
// there is currently no equivalent SYCL aspect that replaces this property.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
bool DeviceGPU::has_unified_memory() const {
return gpu_device.get_info<sycl::info::device::host_unified_memory>();
}
#pragma GCC diagnostic pop
#endif
DeviceEnterLeave::DeviceEnterLeave (RTCDevice hdevice)

View File

@@ -38,7 +38,7 @@ namespace embree
__forceinline pointer allocate( size_type n ) {
assert(device);
return (pointer) device->malloc(n*sizeof(T),alignment);
return (pointer) device->malloc(n*sizeof(T),alignment,EmbreeMemoryType::MALLOC);
}
__forceinline void deallocate( pointer p, size_type n ) {
@@ -75,17 +75,27 @@ namespace embree
void print();
/*! sets the error code */
void setDeviceErrorCode(RTCError error);
void setDeviceErrorCode(RTCError error, std::string const& msg = "");
/*! returns and clears the error code */
RTCError getDeviceErrorCode();
/*! Returns the string representation for the error code. For example, for RTC_ERROR_UNKNOWN the string "RTC_ERROR_UNKNOWN" will be returned. */
static char* getDeviceErrorString();
/*! returns the last error message */
const char* getDeviceLastErrorMessage();
/*! sets the error code */
static void setThreadErrorCode(RTCError error);
static void setThreadErrorCode(RTCError error, std::string const& msg = "");
/*! returns and clears the error code */
static RTCError getThreadErrorCode();
/*! returns the last error message */
static const char* getThreadLastErrorMessage();
/*! processes error codes, do not call directly */
static void process_error(Device* device, RTCError error, const char* str);
@@ -107,12 +117,23 @@ namespace embree
/*! leave device by setting up some global state */
virtual void leave() {}
/*! buffer allocation */
/*! buffer allocation - using USM shared */
virtual void* malloc(size_t size, size_t align);
/*! buffer allocation */
virtual void* malloc(size_t size, size_t align, EmbreeMemoryType type);
/*! buffer deallocation */
virtual void free(void* ptr);
/*! returns true if device is of type DeviceGPU */
virtual bool is_gpu() const { return false; }
/*! returns true if device and host have shared memory system (e.g., integrated GPU) */
virtual bool has_unified_memory() const { return true; }
virtual EmbreeMemoryType get_memory_type(void* ptr) const { return EmbreeMemoryType::MALLOC; }
private:
/*! initializes the tasking system */
@@ -140,6 +161,13 @@ namespace embree
#if defined(EMBREE_TARGET_SIMD8)
std::unique_ptr<BVH8Factory> bvh8_factory;
#endif
private:
static const std::vector<std::string> error_strings;
public:
static const char* getErrorString(RTCError error);
};
#if defined(EMBREE_SYCL_SUPPORT)
@@ -154,11 +182,27 @@ namespace embree
virtual void enter() override;
virtual void leave() override;
virtual void* malloc(size_t size, size_t align) override;
virtual void* malloc(size_t size, size_t align, EmbreeMemoryType type) override;
virtual void free(void* ptr) override;
/* set SYCL device */
void setSYCLDevice(const sycl::device sycl_device);
/*! returns true if device is of type DeviceGPU */
virtual bool is_gpu() const override { return true; }
/*! returns true if device and host have shared memory system (e.g., integrated GPU) */
virtual bool has_unified_memory() const override;
virtual EmbreeMemoryType get_memory_type(void* ptr) const override {
switch(sycl::get_pointer_type(ptr, gpu_context)) {
case sycl::usm::alloc::host: return EmbreeMemoryType::USM_HOST;
case sycl::usm::alloc::device: return EmbreeMemoryType::USM_DEVICE;
case sycl::usm::alloc::shared: return EmbreeMemoryType::USM_SHARED;
default: return EmbreeMemoryType::MALLOC;
}
}
private:
sycl::context gpu_context;
sycl::device gpu_device;

View File

@@ -116,7 +116,7 @@ namespace embree
{
}
void Geometry::enable ()
void Geometry::enable ()
{
if (isEnabled())
return;

View File

@@ -126,10 +126,8 @@ namespace embree
};
/*! Base class all geometries are derived from */
class Geometry : public RefCount
class __aligned(16) Geometry : public RefCount
{
ALIGNED_CLASS_USM_(16);
friend class Scene;
public:
@@ -372,7 +370,7 @@ namespace embree
/*! called before every build */
virtual void preCommit();
/*! called after every build */
virtual void postCommit();
@@ -469,7 +467,7 @@ namespace embree
}
/*! Gets specified buffer. */
virtual void* getBuffer(RTCBufferType type, unsigned int slot) {
virtual void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType) {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"operation not supported for this geometry");
}
@@ -543,6 +541,17 @@ namespace embree
return numTimeSteps-1;
}
public:
/*! methods for converting host geometry data to device geometry data */
virtual size_t getGeometryDataDeviceByteSize() const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"getGeometryDataDeviceByteSize not implemented for this geometry");
}
virtual void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const {
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"convertToDeviceRepresentation not implemented for this geometry");
}
public:
virtual PrimInfo createPrimRefArray(PrimRef* prims, const range<size_t>& r, size_t k, unsigned int geomID) const {

View File

@@ -143,7 +143,7 @@ namespace embree
vbool<K> valid = valid0 & geomID != vuint<K>(RTC_INVALID_GEOMETRY_ID);
const vbool<K> vt = (abs(tfar) <= vfloat<K>(FLT_LARGE)) | (tfar == vfloat<K>(neg_inf));
const vbool<K> vu = (abs(u) <= vfloat<K>(FLT_LARGE));
const vbool<K> vv = (abs(u) <= vfloat<K>(FLT_LARGE));
const vbool<K> vv = (abs(v) <= vfloat<K>(FLT_LARGE));
const vbool<K> vnx = abs(Ng.x) <= vfloat<K>(FLT_LARGE);
const vbool<K> vny = abs(Ng.y) <= vfloat<K>(FLT_LARGE);
const vbool<K> vnz = abs(Ng.z) <= vfloat<K>(FLT_LARGE);

View File

@@ -46,19 +46,23 @@ RTC_NAMESPACE_BEGIN;
RTC_API bool rtcIsSYCLDeviceSupported(const sycl::device device)
{
RTC_CATCH_BEGIN;
RTC_TRACE(rtcIsSYCLDeviceSupported);
return rthwifIsSYCLDeviceSupported(device) > 0;
RTC_CATCH_END(nullptr);
try {
RTC_TRACE(rtcIsSYCLDeviceSupported);
return rthwifIsSYCLDeviceSupported(device) > 0;
} catch (...) {
return false;
}
return false;
}
RTC_API int rtcSYCLDeviceSelector(const sycl::device device)
{
RTC_CATCH_BEGIN;
RTC_TRACE(rtcSYCLDeviceSelector);
return rthwifIsSYCLDeviceSupported(device);
RTC_CATCH_END(nullptr);
try {
RTC_TRACE(rtcSYCLDeviceSelector);
return rthwifIsSYCLDeviceSupported(device);
} catch (...) {
return -1;
}
return -1;
}
@@ -79,6 +83,29 @@ RTC_NAMESPACE_BEGIN;
RTC_CATCH_END(nullptr);
}
RTC_API_CPP sycl::event rtcCommitSceneWithQueue (RTCScene hscene, sycl::queue queue)
{
Scene* scene = (Scene*) hscene;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcCommitSceneWithQueue);
RTC_VERIFY_HANDLE(hscene);
RTC_ENTER_DEVICE(hscene);
return scene->commit(false, queue);
RTC_CATCH_END2(scene);
return sycl::event();
}
RTC_API_CPP sycl::event rtcCommitBufferWithQueue(RTCBuffer hbuffer, sycl::queue queue) {
Buffer* buffer = (Buffer*)hbuffer;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcCommitBufferWithQueue);
RTC_VERIFY_HANDLE(hbuffer);
RTC_ENTER_DEVICE(hbuffer);
return buffer->commit(queue);
RTC_CATCH_END2(buffer);
return sycl::event();
}
#endif
RTC_API void rtcRetainDevice(RTCDevice hdevice)
@@ -138,6 +165,17 @@ RTC_NAMESPACE_BEGIN;
return RTC_ERROR_UNKNOWN;
}
RTC_API const char* rtcGetDeviceLastErrorMessage(RTCDevice hdevice)
{
Device* device = (Device*) hdevice;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcGetDeviceLastErrorMessage);
if (device == nullptr) return Device::getThreadLastErrorMessage();
else return device->getDeviceLastErrorMessage();
RTC_CATCH_END(device);
return "";
}
RTC_API void rtcSetDeviceErrorFunction(RTCDevice hdevice, RTCErrorFunction error, void* userPtr)
{
Device* device = (Device*) hdevice;
@@ -163,7 +201,19 @@ RTC_NAMESPACE_BEGIN;
RTC_TRACE(rtcNewBuffer);
RTC_VERIFY_HANDLE(hdevice);
RTC_ENTER_DEVICE(hdevice);
Buffer* buffer = new Buffer((Device*)hdevice, byteSize);
Buffer* buffer = new Buffer((Device*)hdevice, byteSize, nullptr);
return (RTCBuffer)buffer->refInc();
RTC_CATCH_END((Device*)hdevice);
return nullptr;
}
RTC_API RTCBuffer rtcNewBufferHostDevice(RTCDevice hdevice, size_t byteSize)
{
RTC_CATCH_BEGIN;
RTC_TRACE(rtcNewBufferHostDevice);
RTC_VERIFY_HANDLE(hdevice);
RTC_ENTER_DEVICE(hdevice);
Buffer* buffer = new Buffer((Device*)hdevice, byteSize, nullptr, nullptr);
return (RTCBuffer)buffer->refInc();
RTC_CATCH_END((Device*)hdevice);
return nullptr;
@@ -181,6 +231,30 @@ RTC_NAMESPACE_BEGIN;
return nullptr;
}
RTC_API RTCBuffer rtcNewSharedBufferHostDevice(RTCDevice hdevice, void* ptr, size_t byteSize)
{
RTC_CATCH_BEGIN;
RTC_TRACE(rtcNewSharedBufferHostDevice);
RTC_VERIFY_HANDLE(hdevice);
RTC_ENTER_DEVICE(hdevice);
Buffer* buffer = new Buffer((Device*)hdevice, byteSize, ptr, nullptr);
return (RTCBuffer)buffer->refInc();
RTC_CATCH_END((Device*)hdevice);
return nullptr;
}
RTC_API void* rtcGetBufferDataDevice(RTCBuffer hbuffer)
{
Buffer* buffer = (Buffer*)hbuffer;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcGetBufferDataDevice);
RTC_VERIFY_HANDLE(hbuffer);
RTC_ENTER_DEVICE(hbuffer);
return buffer->dataDevice();
RTC_CATCH_END2(buffer);
return nullptr;
}
RTC_API void* rtcGetBufferData(RTCBuffer hbuffer)
{
Buffer* buffer = (Buffer*)hbuffer;
@@ -215,6 +289,16 @@ RTC_NAMESPACE_BEGIN;
RTC_CATCH_END2(buffer);
}
RTC_API void rtcCommitBuffer(RTCBuffer hbuffer) {
Buffer* buffer = (Buffer*)hbuffer;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcCommitBuffer);
RTC_VERIFY_HANDLE(hbuffer);
RTC_ENTER_DEVICE(hbuffer);
buffer->commit();
RTC_CATCH_END2(buffer);
}
RTC_API RTCScene rtcNewScene (RTCDevice hdevice)
{
RTC_CATCH_BEGIN;
@@ -238,6 +322,20 @@ RTC_NAMESPACE_BEGIN;
return (RTCDevice)nullptr;
}
RTC_API RTCTraversable rtcGetSceneTraversable(RTCScene hscene)
{
Scene* scene = (Scene*) hscene;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcGetSceneTraversable);
RTC_VERIFY_HANDLE(hscene);
RTCTraversable traversable = (RTCTraversable)scene->getTraversable();
if (!traversable)
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"Traversable is NULL. The scene has to be committed first.");
return traversable;
RTC_CATCH_END2(scene);
return (RTCTraversable)nullptr;
}
RTC_API void rtcSetSceneProgressMonitorFunction(RTCScene hscene, RTCProgressMonitorFunction progress, void* ptr)
{
Scene* scene = (Scene*) hscene;
@@ -257,15 +355,10 @@ RTC_NAMESPACE_BEGIN;
RTC_TRACE(rtcSetSceneBuildQuality);
RTC_VERIFY_HANDLE(hscene);
RTC_ENTER_DEVICE(hscene);
//if (quality != RTC_BUILD_QUALITY_LOW &&
// quality != RTC_BUILD_QUALITY_MEDIUM &&
// quality != RTC_BUILD_QUALITY_HIGH)
// throw std::runtime_error("invalid build quality");
if (quality != RTC_BUILD_QUALITY_LOW &&
quality != RTC_BUILD_QUALITY_MEDIUM &&
quality != RTC_BUILD_QUALITY_HIGH) {
abort();
}
quality != RTC_BUILD_QUALITY_HIGH)
abort(); //throw std::runtime_error("invalid build quality");
scene->setBuildQuality(quality);
RTC_CATCH_END2(scene);
}
@@ -287,20 +380,28 @@ RTC_NAMESPACE_BEGIN;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcGetSceneFlags);
RTC_VERIFY_HANDLE(hscene);
RTC_ENTER_DEVICE(hscene);
//RTC_ENTER_DEVICE(hscene);
return scene->getSceneFlags();
RTC_CATCH_END2(scene);
return RTC_SCENE_FLAG_NONE;
}
RTC_API void rtcCommitScene (RTCScene hscene)
RTC_API_EXTERN_C bool prefetchUSMSharedOnGPU(RTCScene scene);
RTC_API void rtcCommitScene (RTCScene hscene)
{
Scene* scene = (Scene*) hscene;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcCommitScene);
RTC_VERIFY_HANDLE(hscene);
RTC_ENTER_DEVICE(hscene);
scene->commit(false);
#if defined(EMBREE_SYCL_SUPPORT)
//prefetchUSMSharedOnGPU(hscene);
#endif
RTC_CATCH_END2(scene);
}
@@ -311,6 +412,7 @@ RTC_NAMESPACE_BEGIN;
RTC_TRACE(rtcJoinCommitScene);
RTC_VERIFY_HANDLE(hscene);
RTC_ENTER_DEVICE(hscene);
scene->commit(true);
RTC_CATCH_END2(scene);
}
@@ -1130,7 +1232,159 @@ RTC_NAMESPACE_BEGIN;
rtcForwardOccludedN<RTCRay16,16>(valid, args, hscene, iray, instID, instPrimID);
RTC_CATCH_END2(scene);
}
RTC_API bool rtcTraversablePointQuery(RTCTraversable htraversable, RTCPointQuery* query, RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void* userPtr)
{
return rtcPointQuery((RTCScene)htraversable, query, userContext, queryFunc, userPtr);
}
RTC_API bool rtcTraversablePointQuery4 (const int* valid, RTCTraversable htraversable, RTCPointQuery4* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN)
{
return rtcPointQuery4(valid, (RTCScene)htraversable, query, userContext, queryFunc, userPtrN);
}
RTC_API bool rtcTraversablePointQuery8 (const int* valid, RTCTraversable htraversable, RTCPointQuery8* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN)
{
return rtcPointQuery8(valid, (RTCScene)htraversable, query, userContext, queryFunc, userPtrN);
}
RTC_API bool rtcTraversablePointQuery16 (const int* valid, RTCTraversable htraversable, RTCPointQuery16* query, struct RTCPointQueryContext* userContext, RTCPointQueryFunction queryFunc, void** userPtrN)
{
return rtcPointQuery16(valid, (RTCScene)htraversable, query, userContext, queryFunc, userPtrN);
}
RTC_API void rtcTraversableIntersect1 (RTCTraversable htraversable, RTCRayHit* rayhit, RTCIntersectArguments* args)
{
rtcIntersect1((RTCScene)htraversable, rayhit, args);
}
RTC_API void rtcTraversableForwardIntersect1 (const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray_, unsigned int instID)
{
rtcForwardIntersect1(args, (RTCScene)htraversable, iray_, instID);
}
RTC_API void rtcTraversableForwardIntersect1Ex(const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray_, unsigned int instID, unsigned int instPrimID)
{
rtcForwardIntersect1Ex(args, (RTCScene)htraversable, iray_, instID, instPrimID);
}
RTC_API void rtcTraversableIntersect4 (const int* valid, RTCTraversable htraversable, RTCRayHit4* rayhit, RTCIntersectArguments* args)
{
rtcIntersect4(valid, (RTCScene)htraversable, rayhit, args);
}
template<typename RTCRay, typename RTCRayHit, int N>
__forceinline void rtcTraversableForwardIntersectN(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray, unsigned int instID, unsigned int instPrimID)
{
rtcForwardIntersetN(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
}
RTC_API void rtcTraversableForwardIntersect4(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay4* iray, unsigned int instID)
{
rtcForwardIntersect4(valid, args, (RTCScene)htraversable, iray, instID);
}
RTC_API void rtcTraversableForwardIntersect4Ex(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay4* iray, unsigned int instID, unsigned int instPrimID)
{
rtcForwardIntersect4Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
}
RTC_API void rtcTraversableIntersect8 (const int* valid, RTCTraversable htraversable, RTCRayHit8* rayhit, RTCIntersectArguments* args)
{
rtcIntersect8(valid, (RTCScene)htraversable, rayhit, args);
}
RTC_API void rtcTraversableForwardIntersect8(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay8* iray, unsigned int instID)
{
rtcForwardIntersect8(valid, args, (RTCScene)htraversable, iray, instID);
}
RTC_API void rtcTraversableForwardIntersect8Ex(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay8* iray, unsigned int instID, unsigned int instPrimID)
{
rtcForwardIntersect8Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
}
RTC_API void rtcTraversableIntersect16 (const int* valid, RTCTraversable htraversable, RTCRayHit16* rayhit, RTCIntersectArguments* args)
{
rtcIntersect16(valid, (RTCScene)htraversable, rayhit, args);
}
RTC_API void rtcTraversableForwardIntersect16(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay16* iray, unsigned int instID)
{
rtcForwardIntersect16(valid, args, (RTCScene)htraversable, iray, instID);
}
RTC_API void rtcTraversableForwardIntersect16Ex(const int* valid, const RTCIntersectFunctionNArguments* args, RTCTraversable htraversable, RTCRay16* iray, unsigned int instID, unsigned int instPrimID)
{
rtcForwardIntersect16Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
}
RTC_API void rtcTraversableOccluded1 (RTCTraversable htraversable, RTCRay* ray, RTCOccludedArguments* args)
{
rtcOccluded1((RTCScene)htraversable, ray, args);
}
RTC_API void rtcTraversableForwardOccluded1 (const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray_, unsigned int instID)
{
rtcForwardOccluded1(args, (RTCScene)htraversable, iray_, instID);
}
RTC_API void rtcTraversableForwardOccluded1Ex(const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray_, unsigned int instID, unsigned int instPrimID)
{
rtcForwardOccluded1Ex(args, (RTCScene)htraversable, iray_, instID, instPrimID);
}
RTC_API void rtcTraversableOccluded4 (const int* valid, RTCTraversable htraversable, RTCRay4* ray, RTCOccludedArguments* args)
{
rtcOccluded4(valid, (RTCScene)htraversable, ray, args);
}
template<typename RTCRay, int N>
__forceinline void rtcTraversableForwardOccludedN (const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay* iray, unsigned int instID, unsigned int instPrimID)
{
rtcForwardOccludedN(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
}
RTC_API void rtcTraversableForwardOccluded4(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay4* iray, unsigned int instID)
{
rtcForwardOccluded4(valid, args, (RTCScene)htraversable, iray, instID);
}
RTC_API void rtcTraversableForwardOccluded4Ex(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay4* iray, unsigned int instID, unsigned int instPrimID)
{
rtcForwardOccluded4Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
}
RTC_API void rtcTraversableOccluded8 (const int* valid, RTCTraversable htraversable, RTCRay8* ray, RTCOccludedArguments* args)
{
rtcOccluded8(valid, (RTCScene)htraversable, ray, args);
}
RTC_API void rtcTraversableForwardOccluded8(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay8* iray, unsigned int instID)
{
rtcForwardOccluded8(valid, args, (RTCScene)htraversable, iray, instID);
}
RTC_API void rtcTraversableForwardOccluded8Ex(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay8* iray, unsigned int instID, unsigned int instPrimID)
{
rtcForwardOccluded8Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
}
RTC_API void rtcTraversableOccluded16 (const int* valid, RTCTraversable htraversable, RTCRay16* ray, RTCOccludedArguments* args)
{
rtcOccluded16(valid, (RTCScene)htraversable, ray, args);
}
RTC_API void rtcTraversableForwardOccluded16(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay16* iray, unsigned int instID)
{
rtcForwardOccluded16(valid, args, (RTCScene)htraversable, iray, instID);
}
RTC_API void rtcTraversableForwardOccluded16Ex(const int* valid, const RTCOccludedFunctionNArguments* args, RTCTraversable htraversable, RTCRay16* iray, unsigned int instID, unsigned int instPrimID)
{
rtcForwardOccluded16Ex(valid, args, (RTCScene)htraversable, iray, instID, instPrimID);
}
RTC_API void rtcRetainScene (RTCScene hscene)
{
Scene* scene = (Scene*) hscene;
@@ -1292,6 +1546,11 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
RTC_CATCH_END2(scene);
}
RTC_API void rtcGetGeometryTransformFromTraversable(RTCTraversable htraversable, unsigned int geomID, float time, RTCFormat format, void* xfm)
{
rtcGetGeometryTransformFromScene((RTCScene)htraversable, geomID, time, format, xfm);
}
RTC_API void rtcInvokeIntersectFilterFromGeometry(const struct RTCIntersectFunctionNArguments* const args_i, const struct RTCFilterFunctionNArguments* filter_args)
{
IntersectFunctionNArguments* args = (IntersectFunctionNArguments*) args_i;
@@ -1568,17 +1827,11 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
RTC_TRACE(rtcSetGeometryBuildQuality);
RTC_VERIFY_HANDLE(hgeometry);
RTC_ENTER_DEVICE(hgeometry);
//if (quality != RTC_BUILD_QUALITY_LOW &&
// quality != RTC_BUILD_QUALITY_MEDIUM &&
// quality != RTC_BUILD_QUALITY_HIGH &&
// quality != RTC_BUILD_QUALITY_REFIT)
// throw std::runtime_error("invalid build quality");
if (quality != RTC_BUILD_QUALITY_LOW &&
quality != RTC_BUILD_QUALITY_MEDIUM &&
quality != RTC_BUILD_QUALITY_HIGH &&
quality != RTC_BUILD_QUALITY_REFIT) {
abort();
}
quality != RTC_BUILD_QUALITY_REFIT)
abort(); //throw std::runtime_error("invalid build quality");
geometry->setBuildQuality(quality);
RTC_CATCH_END2(geometry);
}
@@ -1667,6 +1920,37 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
RTC_CATCH_END2(geometry);
}
RTC_API void rtcSetSharedGeometryBufferHostDevice(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, const void* ptr, const void* dptr, size_t byteOffset, size_t byteStride, size_t itemCount)
{
Geometry* geometry = (Geometry*) hgeometry;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcSetSharedGeometryBufferHostDevice);
RTC_VERIFY_HANDLE(hgeometry);
RTC_ENTER_DEVICE(hgeometry);
#if defined(EMBREE_SYCL_SUPPORT)
if (geometry->device->is_gpu())
{
if (itemCount > 0xFFFFFFFFu)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large");
if ((ptr == nullptr) || (dptr == nullptr))
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"host and device pointer may not be NULL pointers when using SYCL devices");
Ref<Buffer> buffer = new Buffer(geometry->device, itemCount*byteStride, (char*)ptr + byteOffset, (char*)dptr + byteOffset);
geometry->setBuffer(type, slot, format, buffer, 0, byteStride, (unsigned int)itemCount);
}
else
#endif
{
if (dptr != nullptr)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"Embree device is no SYCL device. Device pointer argument must not be valid, pass NULL instead");
rtcSetSharedGeometryBuffer(hgeometry, type, slot, format, ptr, byteOffset, byteStride, itemCount);
}
RTC_CATCH_END2(geometry);
}
RTC_API void* rtcSetNewGeometryBuffer(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot, RTCFormat format, size_t byteStride, size_t itemCount)
{
Geometry* geometry = (Geometry*) hgeometry;
@@ -1683,13 +1967,54 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
if (type == RTC_BUFFER_TYPE_VERTEX || type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
bytes += (16 - (byteStride%16))%16;
Ref<Buffer> buffer = new Buffer(geometry->device, bytes);
Ref<Buffer> buffer = new Buffer(geometry->device, bytes, nullptr);
geometry->setBuffer(type, slot, format, buffer, 0, byteStride, (unsigned int)itemCount);
return buffer->data();
RTC_CATCH_END2(geometry);
return nullptr;
}
RTC_API void rtcSetNewGeometryBufferHostDevice(RTCGeometry hgeometry, RTCBufferType bufferType, unsigned int slot, RTCFormat format, size_t byteStride, size_t itemCount, void** ptr, void** dptr)
{
Geometry* geometry = (Geometry*) hgeometry;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcSetNewGeometryBufferHostDevice);
RTC_VERIFY_HANDLE(hgeometry);
RTC_ENTER_DEVICE(hgeometry);
#if defined(EMBREE_SYCL_SUPPORT)
if (geometry->device->is_gpu())
{
if (itemCount > 0xFFFFFFFFu)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"buffer too large");
/* vertex buffers need to get overallocated slightly as elements are accessed using SSE loads */
size_t bytes = itemCount*byteStride;
if (bufferType == RTC_BUFFER_TYPE_VERTEX || bufferType == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
bytes += (16 - (byteStride%16))%16;
Ref<Buffer> buffer = new Buffer(geometry->device, bytes, nullptr, nullptr);
geometry->setBuffer(bufferType, slot, format, buffer, 0, byteStride, (unsigned int)itemCount);
if(ptr)
*ptr = buffer->getHostPtr();
if (dptr)
*dptr = buffer->getDevicePtr();
}
else
#endif
{
void* tmp = rtcSetNewGeometryBuffer(hgeometry, bufferType, slot, format, byteStride, itemCount);
if(ptr)
*ptr = tmp;
if (dptr) {
*dptr = tmp;
}
}
RTC_CATCH_END2(geometry);
}
RTC_API void* rtcGetGeometryBufferData(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot)
{
Geometry* geometry = (Geometry*) hgeometry;
@@ -1697,7 +2022,19 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
RTC_TRACE(rtcGetGeometryBufferData);
RTC_VERIFY_HANDLE(hgeometry);
RTC_ENTER_DEVICE(hgeometry);
return geometry->getBuffer(type, slot);
return geometry->getBufferData(type, slot, BufferDataPointerType::HOST);
RTC_CATCH_END2(geometry);
return nullptr;
}
RTC_API void* rtcGetGeometryBufferDataDevice(RTCGeometry hgeometry, RTCBufferType type, unsigned int slot)
{
Geometry* geometry = (Geometry*) hgeometry;
RTC_CATCH_BEGIN;
RTC_TRACE(rtcGetGeometryBufferDataDevice);
RTC_VERIFY_HANDLE(hgeometry);
RTC_ENTER_DEVICE(hgeometry);
return geometry->getBufferData(type, slot, BufferDataPointerType::DEVICE);
RTC_CATCH_END2(geometry);
return nullptr;
}
@@ -1713,7 +2050,7 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
RTC_CATCH_END2(geometry);
}
RTC_API void rtcUpdateGeometryBuffer (RTCGeometry hgeometry, RTCBufferType type, unsigned int slot)
RTC_API void rtcUpdateGeometryBuffer (RTCGeometry hgeometry, RTCBufferType type, unsigned int slot)
{
Geometry* geometry = (Geometry*) hgeometry;
RTC_CATCH_BEGIN;
@@ -1784,6 +2121,11 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
return nullptr;
}
RTC_API void* rtcGetGeometryUserDataFromTraversable (RTCTraversable htraversable, unsigned int geomID)
{
return rtcGetGeometryUserDataFromScene((RTCScene)htraversable, geomID);
}
RTC_API void rtcSetGeometryBoundsFunction (RTCGeometry hgeometry, RTCBoundsFunction bounds, void* userPtr)
{
Geometry* geometry = (Geometry*) hgeometry;
@@ -2060,4 +2402,14 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
return nullptr;
}
RTC_API const char* rtcGetErrorString(RTCError error)
{
RTC_CATCH_BEGIN;
RTC_TRACE(rtcGetErrorString);
return Device::getErrorString(error);
RTC_CATCH_END(nullptr);
return nullptr;
}
RTC_NAMESPACE_END

View File

@@ -42,10 +42,21 @@ namespace embree
Scene::Scene (Device* device)
: device(device),
scene_device(nullptr),
flags_modified(true), enabled_geometry_types(0),
scene_flags(RTC_SCENE_FLAG_NONE),
quality_flags(RTC_BUILD_QUALITY_MEDIUM),
modified(true),
maxTimeSegments(0),
#if defined(EMBREE_SYCL_SUPPORT)
geometries_device(nullptr),
geometry_data_device(nullptr),
num_geometries(0),
geometry_data_byte_size(0),
offsets(nullptr),
geometries_host(nullptr),
geometry_data_host(nullptr),
#endif
taskGroup(new TaskGroup()),
progressInterface(this), progress_monitor_function(nullptr), progress_monitor_ptr(nullptr), progress_monitor_counter(0)
{
@@ -55,8 +66,8 @@ namespace embree
/* use proper device and context for SYCL allocations */
#if defined(EMBREE_SYCL_SUPPORT)
if (DeviceGPU* gpu_device = dynamic_cast<DeviceGPU*>(device))
hwaccel = AccelBuffer(AccelAllocator<char>(device,gpu_device->getGPUDevice(),gpu_device->getGPUContext()),0);
if (dynamic_cast<DeviceGPU*>(device))
accelBuffer = AccelBuffer(device);
#endif
/* one can overwrite flags through device for debugging */
@@ -68,6 +79,27 @@ namespace embree
Scene::~Scene() noexcept
{
#if defined(EMBREE_SYCL_SUPPORT)
if (geometry_data_device) {
device->free(geometry_data_device);
}
if (geometries_device) {
device->free(geometries_device);
}
if (scene_device) {
device->free(scene_device);
}
if (offsets) {
device->free(offsets);
}
if (geometries_host) {
device->free(geometries_host);
}
if (geometry_data_host) {
device->free(geometry_data_host);
}
#endif
device->refDec();
}
@@ -788,9 +820,8 @@ namespace embree
void Scene::build_gpu_accels()
{
#if defined(EMBREE_SYCL_SUPPORT)
const BBox3f aabb = rthwifBuild(this,hwaccel);
bounds = LBBox<embree::Vec3fa>(aabb);
hwaccel_bounds = aabb;
accelBuffer.build(this);
bounds = LBBox<embree::Vec3fa>(accelBuffer.getBounds());
#endif
}
@@ -799,6 +830,7 @@ namespace embree
checkIfModifiedAndSet();
if (!isModified()) return;
/* print scene statistics */
if (device->verbosity(2))
printStatistics();
@@ -825,8 +857,18 @@ namespace embree
std::plus<GeometryCounts>()
);
/* calculate maximal number of motion blur time segments in scene */
maxTimeSegments = 1;
for (size_t geomID=0; geomID<size(); geomID++)
{
Geometry* geom = get(geomID);
if (geom == nullptr) continue;
maxTimeSegments = std::max(maxTimeSegments, geom->numTimeSegments());
}
#if defined(EMBREE_SYCL_SUPPORT)
if (DeviceGPU* gpu_device = dynamic_cast<DeviceGPU*>(device))
DeviceGPU* gpu_device = dynamic_cast<DeviceGPU*>(device);
if (gpu_device)
build_gpu_accels();
else
#endif
@@ -865,10 +907,36 @@ namespace embree
RTCSceneFlags Scene::getSceneFlags() const {
return scene_flags;
}
#if defined(EMBREE_SYCL_SUPPORT)
sycl::event Scene::commit (bool join, sycl::queue queue)
{
commit_internal(join);
return syncWithDevice(queue);
}
#endif
void Scene::commit (bool join)
{
commit_internal(join);
#if defined(EMBREE_SYCL_SUPPORT)
syncWithDevice();
#endif
}
Scene* Scene::getTraversable() {
#if defined(EMBREE_SYCL_SUPPORT)
if(device->is_gpu()) {
return scene_device;
}
#endif
return this;
}
#if defined(TASKING_INTERNAL)
void Scene::commit (bool join)
void Scene::commit_internal (bool join)
{
Lock<MutexSys> buildLock(buildMutex,false);
@@ -910,7 +978,7 @@ namespace embree
#if defined(TASKING_TBB)
void Scene::commit (bool join)
void Scene::commit_internal (bool join)
{
#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR < 8)
if (join)
@@ -974,7 +1042,7 @@ namespace embree
#if defined(TASKING_PPL)
void Scene::commit (bool join)
void Scene::commit_internal (bool join)
{
#if defined(TASKING_PPL)
if (join)
@@ -1011,6 +1079,7 @@ namespace embree
accels_clear();
throw;
}
}
#endif
@@ -1029,4 +1098,5 @@ namespace embree
}
}
}
}

View File

@@ -25,15 +25,18 @@
#include "../sycl/rthwif_embree_builder.h"
#endif
#if !defined(EMBREE_SYCL_SUPPORT)
namespace sycl {
struct queue;
}
#endif
namespace embree
{
struct TaskGroup;
/*! Base class all scenes are derived from */
class Scene : public AccelN
class __aligned(16) Scene : public AccelN
{
ALIGNED_CLASS_USM_(std::alignment_of<Scene>::value);
public:
template<typename Ty, bool mblur = false>
class Iterator
@@ -59,10 +62,6 @@ namespace embree
return at(i);
}
__forceinline size_t size() const {
return scene->size();
}
__forceinline size_t numPrimitives() const {
return scene->getNumPrimitives(Ty::geom_type,mblur);
}
@@ -99,6 +98,11 @@ namespace embree
}
return ret;
}
__forceinline size_t size() const {
return scene->size();
}
private:
Scene* scene;
@@ -188,12 +192,22 @@ namespace embree
void build_cpu_accels();
void build_gpu_accels();
void commit_internal (bool join);
#if defined(EMBREE_SYCL_SUPPORT)
sycl::event commit (bool join, sycl::queue queue);
#endif
void commit (bool join);
void commit_task ();
void build () {}
Scene* getTraversable();
/* return number of geometries */
#if defined(__SYCL_DEVICE_ONLY__)
__forceinline size_t size() const { return num_geometries; }
#else
__forceinline size_t size() const { return geometries.size(); }
#endif
/* bind geometry to the scene */
unsigned int bind (unsigned geomID, Ref<Geometry> geometry);
@@ -206,12 +220,18 @@ namespace embree
modified = f;
}
__forceinline bool hasMotionBlur() const { return maxTimeSegments > 1; };
__forceinline uint32_t getMaxTimeSegments() const { return maxTimeSegments; };
#if !defined(__SYCL_DEVICE_ONLY__)
__forceinline bool isGeometryModified(size_t geomID)
{
Ref<Geometry>& g = geometries[geomID];
if (!g) return false;
return g->getModCounter() > geometryModCounters_[geomID];
}
#endif
protected:
@@ -219,6 +239,27 @@ namespace embree
public:
#if defined(__SYCL_DEVICE_ONLY__)
/* get mesh by ID */
__forceinline Geometry* get(size_t i) { return geometries_device[i]; }
__forceinline const Geometry* get(size_t i) const { return geometries_device[i]; }
template<typename Mesh>
__forceinline Mesh* get(size_t i) {
return (Mesh*)geometries_device[i];
}
template<typename Mesh>
__forceinline const Mesh* get(size_t i) const {
return (Mesh*)geometries_device[i];
}
template<typename Mesh>
__forceinline Mesh* getSafe(size_t i) {
if (geometries_device[i] == nullptr) return nullptr;
if (!(geometries_device[i]->getTypeMask() & Mesh::geom_type)) return nullptr;
else return (Mesh*) geometries_device[i];
}
#else
/* get mesh by ID */
__forceinline Geometry* get(size_t i) { assert(i < geometries.size()); return geometries[i].ptr; }
__forceinline const Geometry* get(size_t i) const { assert(i < geometries.size()); return geometries[i].ptr; }
@@ -243,12 +284,16 @@ namespace embree
if (!(geometries[i]->getTypeMask() & Mesh::geom_type)) return nullptr;
else return (Mesh*) geometries[i].ptr;
}
#endif
#if !defined(__SYCL_DEVICE_ONLY__)
__forceinline Ref<Geometry> get_locked(size_t i) {
Lock<MutexSys> lock(geometriesMutex);
assert(i < geometries.size());
return geometries[i];
}
#endif
/* flag decoding */
__forceinline bool isFastAccel() const { return !isCompactAccel() && !isRobustAccel(); }
@@ -270,9 +315,16 @@ namespace embree
}
void* createQBVH6Accel();
#if defined(EMBREE_SYCL_SUPPORT)
private:
void syncWithDevice();
sycl::event syncWithDevice(sycl::queue queue);
#endif
public:
Device* device;
Scene* scene_device;
public:
IDPool<unsigned,0xFFFFFFFE> id_pool;
@@ -292,12 +344,26 @@ namespace embree
#if defined(EMBREE_SYCL_SUPPORT)
public:
BBox3f hwaccel_bounds = empty;
AccelBuffer hwaccel;
AccelBuffer accelBuffer;
#endif
private:
bool modified; //!< true if scene got modified
bool modified; //!< true if scene got modified
uint32_t maxTimeSegments; //!< maximal number of motion blur time segments in scene
#if defined(EMBREE_SYCL_SUPPORT)
Geometry** geometries_device; //!< list of all geometries on device
char* geometry_data_device; //!< data buffer of all geometries on device
size_t num_geometries;
size_t geometry_data_byte_size;
// host buffers used for creating representation of scene/geometry for device
// will be freed after scene commit if the scene is static, otherwise the
// buffers will stay for quicker rebuild.
size_t *offsets;
Geometry **geometries_host;
char *geometry_data_host;
#endif
public:

View File

@@ -31,13 +31,15 @@ namespace embree
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void setTessellationRate(float N);
void setMaxRadiusScale(float s);
void addElementsToCount (GeometryCounts & counts) const;
size_t getGeometryDataDeviceByteSize() const;
void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const;
public:

View File

@@ -50,12 +50,19 @@ namespace embree
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void interpolate(const RTCInterpolateArguments* const args);
#if defined(EMBREE_SYCL_SUPPORT)
size_t getGeometryDataDeviceByteSize() const;
void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const;
#endif
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)
{

View File

@@ -13,7 +13,6 @@ namespace embree
/*! Instanced acceleration structure */
struct Instance : public Geometry
{
//ALIGNED_STRUCT_(16);
static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE;
public:
@@ -55,6 +54,8 @@ namespace embree
virtual void build() {}
virtual void addElementsToCount (GeometryCounts & counts) const override;
virtual void commit() override;
virtual size_t getGeometryDataDeviceByteSize() const override;
virtual void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const override;
public:

View File

@@ -13,7 +13,6 @@ namespace embree
/*! Instanced acceleration structure */
struct InstanceArray : public Geometry
{
//ALIGNED_STRUCT_(16);
static const Geometry::GTypeMask geom_type = Geometry::MTY_INSTANCE_ARRAY;
public:
@@ -48,7 +47,7 @@ namespace embree
public:
virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) override;
virtual void* getBuffer(RTCBufferType type, unsigned int slot) override;
virtual void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType) override;
virtual void updateBuffer(RTCBufferType type, unsigned int slot) override;
virtual void setNumTimeSteps (unsigned int numTimeSteps) override;
@@ -59,6 +58,8 @@ namespace embree
virtual void build() {}
virtual void addElementsToCount (GeometryCounts & counts) const override;
virtual void commit() override;
size_t getGeometryDataDeviceByteSize() const override;
void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const override;
public:

View File

@@ -25,7 +25,7 @@ namespace embree
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify ();
@@ -33,6 +33,8 @@ namespace embree
void setTessellationRate(float N);
void setMaxRadiusScale(float s);
void addElementsToCount (GeometryCounts & counts) const;
size_t getGeometryDataDeviceByteSize() const;
void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const;
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)

View File

@@ -30,12 +30,14 @@ namespace embree
size_t offset,
size_t stride,
unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void setMaxRadiusScale(float s);
void addElementsToCount (GeometryCounts & counts) const;
size_t getGeometryDataDeviceByteSize() const;
void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const;
public:
/*! returns the number of vertices */

View File

@@ -42,12 +42,14 @@ namespace embree
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void interpolate(const RTCInterpolateArguments* const args);
void addElementsToCount (GeometryCounts & counts) const;
size_t getGeometryDataDeviceByteSize() const;
void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const;
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)

View File

@@ -61,7 +61,7 @@ namespace embree
void setVertexAttributeCount (unsigned int N);
void setTopologyCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType);
void updateBuffer(RTCBufferType type, unsigned int slot);
void setTessellationRate(float N);
bool verify();

View File

@@ -35,7 +35,7 @@ namespace embree
void TriangleMesh::setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num)
{
/* verify that all accesses are 4 bytes aligned */
if (((size_t(buffer->getPtr()) + offset) & 0x3) || (stride & 0x3))
if (((size_t(buffer->getHostPtr()) + offset) & 0x3) || (stride & 0x3))
throw_RTCError(RTC_ERROR_INVALID_OPERATION, "data must be 4 bytes aligned");
if (type == RTC_BUFFER_TYPE_VERTEX)
@@ -79,25 +79,25 @@ namespace embree
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "unknown buffer type");
}
void* TriangleMesh::getBuffer(RTCBufferType type, unsigned int slot)
void* TriangleMesh::getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType)
{
if (type == RTC_BUFFER_TYPE_INDEX)
{
if (slot != 0)
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
return triangles.getPtr();
return triangles.getPtr(pointerType);
}
else if (type == RTC_BUFFER_TYPE_VERTEX)
{
if (slot >= vertices.size())
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
return vertices[slot].getPtr();
return vertices[slot].getPtr(pointerType);
}
else if (type == RTC_BUFFER_TYPE_VERTEX_ATTRIBUTE)
{
if (slot >= vertexAttribs.size())
throw_RTCError(RTC_ERROR_INVALID_ARGUMENT, "invalid buffer slot");
return vertexAttribs[slot].getPtr();
return vertexAttribs[slot].getPtr(pointerType);
}
else
{
@@ -137,10 +137,12 @@ namespace embree
void TriangleMesh::commit()
{
/* verify that stride of all time steps are identical */
for (unsigned int t=0; t<numTimeSteps; t++)
for (unsigned int t=0; t<numTimeSteps; t++) {
if (vertices[t].getStride() != vertices[0].getStride())
throw_RTCError(RTC_ERROR_INVALID_OPERATION,"stride of vertex buffers have to be identical for each time step");
if (vertices[t]) vertices[t].buffer->commitIfNeeded();
}
if (triangles) triangles.buffer->commitIfNeeded();
Geometry::commit();
}
@@ -182,7 +184,29 @@ namespace embree
void TriangleMesh::interpolate(const RTCInterpolateArguments* const args) {
interpolate_impl<4>(args);
}
size_t TriangleMesh::getGeometryDataDeviceByteSize() const {
size_t byte_size = sizeof(TriangleMesh);
byte_size += numTimeSteps * sizeof(BufferView<Vec3fa>);
return 16 * ((byte_size + 15) / 16);
}
void TriangleMesh::convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const {
TriangleMesh* mesh = (TriangleMesh*)(data_host + offset);
std::memcpy(data_host + offset, (void*)this, sizeof(TriangleMesh));
offset += sizeof(TriangleMesh);
// store offset for overriding vertices pointer with device pointer after copying
const size_t offsetVertices = offset;
// copy vertices BufferViews for each time step
for (size_t t = 0; t < numTimeSteps; ++t) {
std::memcpy(data_host + offset, &(vertices[t]), sizeof(BufferView<Vec3fa>));
offset += sizeof(BufferView<Vec3fa>);
}
// override vertices pointer with device ptr
mesh->vertices.setDataPtr((BufferView<Vec3fa>*)(data_device + offsetVertices));
}
#endif
namespace isa

View File

@@ -32,16 +32,18 @@ namespace embree
/* geometry interface */
public:
void setMask(unsigned mask);
void setNumTimeSteps (unsigned int numTimeSteps);
void setVertexAttributeCount (unsigned int N);
void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num);
void* getBuffer(RTCBufferType type, unsigned int slot);
void updateBuffer(RTCBufferType type, unsigned int slot);
void commit();
bool verify();
void interpolate(const RTCInterpolateArguments* const args);
void addElementsToCount (GeometryCounts & counts) const;
virtual void setMask(unsigned mask) override;
virtual void setNumTimeSteps (unsigned int numTimeSteps) override;
virtual void setVertexAttributeCount (unsigned int N) override;
virtual void setBuffer(RTCBufferType type, unsigned int slot, RTCFormat format, const Ref<Buffer>& buffer, size_t offset, size_t stride, unsigned int num) override;
virtual void* getBufferData(RTCBufferType type, unsigned int slot, BufferDataPointerType pointerType) override;
virtual void updateBuffer(RTCBufferType type, unsigned int slot) override;
virtual void commit() override;
virtual bool verify() override;
virtual void interpolate(const RTCInterpolateArguments* const args) override;
virtual void addElementsToCount (GeometryCounts & counts) const override;
virtual size_t getGeometryDataDeviceByteSize() const override;
virtual void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const override;
template<int N>
void interpolate_impl(const RTCInterpolateArguments* const args)
@@ -98,12 +100,12 @@ namespace embree
}
public:
/*! returns number of vertices */
__forceinline size_t numVertices() const {
return vertices[0].size();
}
/*! returns i'th triangle*/
__forceinline const Triangle& triangle(size_t i) const {
return triangles[i];
@@ -246,7 +248,7 @@ namespace embree
}
/*! get fast access to first vertex buffer */
__forceinline float * getCompactVertexArray () const {
__forceinline float * getCompactVertexArray () const override {
return (float*) vertices0.getPtr();
}
@@ -283,6 +285,7 @@ namespace embree
TriangleMeshISA (Device* device)
: TriangleMesh(device) {}
#if !defined(__SYCL_DEVICE_ONLY__)
LBBox3fa vlinearBounds(size_t primID, const BBox1f& time_range) const {
return linearBounds(primID,time_range);
}
@@ -344,6 +347,7 @@ namespace embree
}
return pinfo;
}
#endif
};
}

View File

@@ -15,12 +15,14 @@ namespace embree
public:
UserGeometry (Device* device, unsigned int items = 0, unsigned int numTimeSteps = 1);
virtual void setMask (unsigned mask);
virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr);
virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect);
virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded);
virtual void build() {}
virtual void addElementsToCount (GeometryCounts & counts) const;
virtual void setMask (unsigned mask) override;
virtual void setBoundsFunction (RTCBoundsFunction bounds, void* userPtr) override;
virtual void setIntersectFunctionN (RTCIntersectFunctionN intersect) override;
virtual void setOccludedFunctionN (RTCOccludedFunctionN occluded) override;
virtual void build() override {}
virtual void addElementsToCount (GeometryCounts & counts) const override;
virtual size_t getGeometryDataDeviceByteSize() const override;
virtual void convertToDeviceRepresentation(size_t offset, char* data_host, char* data_device) const override;
__forceinline float projectedPrimitiveArea(const size_t i) const { return 0.0f; }
};

View File

@@ -16,19 +16,22 @@ namespace embree
State::ErrorHandler::~ErrorHandler()
{
Lock<MutexSys> lock(errors_mutex);
for (size_t i=0; i<thread_errors.size(); i++)
for (size_t i=0; i<thread_errors.size(); i++) {
delete thread_errors[i];
}
destroyTls(thread_error);
thread_errors.clear();
}
RTCError* State::ErrorHandler::error()
RTCErrorMessage* State::ErrorHandler::error()
{
RTCError* stored_error = (RTCError*) getTls(thread_error);
if (stored_error) return stored_error;
RTCErrorMessage* stored_error = (RTCErrorMessage*) getTls(thread_error);
if (stored_error) {
return stored_error;
}
Lock<MutexSys> lock(errors_mutex);
stored_error = new RTCError(RTC_ERROR_NONE);
stored_error = new RTCErrorMessage(RTC_ERROR_NONE, "");
thread_errors.push_back(stored_error);
setTls(thread_error,stored_error);
return stored_error;
@@ -84,6 +87,8 @@ namespace embree
max_spatial_split_replications = 1.2f;
useSpatialPreSplits = false;
max_triangles_per_leaf = inf;
tessellation_cache_size = 128*1024*1024;
subdiv_accel = "default";
@@ -428,6 +433,9 @@ namespace embree
else if (tok == Token::Id("max_spatial_split_replications") && cin->trySymbol("="))
max_spatial_split_replications = cin->get().Float();
else if (tok == Token::Id("max_triangles_per_leaf") && cin->trySymbol("="))
max_triangles_per_leaf = cin->get().Float();
else if (tok == Token::Id("presplits") && cin->trySymbol("="))
useSpatialPreSplits = cin->get().Int() != 0 ? true : false;

View File

@@ -9,6 +9,14 @@ namespace embree
{
/* mutex to make printing to cout thread safe */
extern MutexSys g_printMutex;
struct RTCErrorMessage
{
RTCErrorMessage(RTCError error, std::string const& msg)
: error(error), msg(msg) {}
RTCError error;
std::string msg;
};
struct State : public RefCount
{
@@ -109,6 +117,7 @@ namespace embree
float max_spatial_split_replications; //!< maximally replications*N many primitives in accel for spatial splits
bool useSpatialPreSplits; //!< use spatial pre-splits instead of the full spatial split builder
size_t tessellation_cache_size; //!< size of the shared tessellation cache
size_t max_triangles_per_leaf;
public:
size_t instancing_open_min; //!< instancing opens tree to minimally that number of subtrees
@@ -163,11 +172,11 @@ namespace embree
public:
ErrorHandler();
~ErrorHandler();
RTCError* error();
RTCErrorMessage* error();
public:
tls_t thread_error;
std::vector<RTCError*> thread_errors;
std::vector<RTCErrorMessage*> thread_errors;
MutexSys errors_mutex;
};
ErrorHandler errorHandler;

View File

@@ -1,4 +1,4 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#define RTC_HASH "daa8de0e714e18ad5e5c9841b67c1950d9c91c51"
#define RTC_HASH "ff9381774dc99fea81a932ad276677aad6a3d4dd"

View File

@@ -23,216 +23,131 @@ index b52b1e2e13..fbff38f660 100644
#endif
#else // TASKING_PPL
diff --git a/thirdparty/embree/common/lexers/stringstream.cpp b/thirdparty/embree/common/lexers/stringstream.cpp
index 42ffb10176..fa4266d0b9 100644
index 42ffb10176..9779fc74c7 100644
--- a/thirdparty/embree/common/lexers/stringstream.cpp
+++ b/thirdparty/embree/common/lexers/stringstream.cpp
@@ -39,7 +39,10 @@ namespace embree
@@ -39,7 +39,7 @@ namespace embree
std::vector<char> str; str.reserve(64);
while (cin->peek() != EOF && !isSeparator(cin->peek())) {
int c = cin->get();
- if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
+ //if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
+ if (!isValidChar(c)) {
+ abort();
+ }
+ if (!isValidChar(c)) abort(); //throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
str.push_back((char)c);
}
str.push_back(0);
diff --git a/thirdparty/embree/common/sys/alloc.cpp b/thirdparty/embree/common/sys/alloc.cpp
index de225fafc6..8e83646031 100644
index c92bb99ece..2288df76ef 100644
--- a/thirdparty/embree/common/sys/alloc.cpp
+++ b/thirdparty/embree/common/sys/alloc.cpp
@@ -24,16 +24,28 @@ namespace embree
void enableUSMAllocEmbree(sycl::context* context, sycl::device* device)
{
- if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
- if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
+ //if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
+ //if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
+ if (tls_context_embree != nullptr) {
+ abort();
+ }
+ if (tls_device_embree != nullptr) {
+ abort();
+ }
tls_context_embree = context;
tls_device_embree = device;
}
void disableUSMAllocEmbree()
{
- if (tls_context_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
- if (tls_device_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
+ //if (tls_context_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
+ //if (tls_device_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
+ if (tls_context_embree == nullptr) {
+ abort();
+ }
+ if (tls_device_embree == nullptr) {
+ abort();
+ }
tls_context_embree = nullptr;
tls_device_embree = nullptr;
}
@@ -48,8 +60,14 @@ namespace embree
void disableUSMAllocTutorial()
{
- if (tls_context_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
- if (tls_device_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
+ //if (tls_context_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
+ //if (tls_device_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
+ if (tls_context_tutorial == nullptr) {
+ abort();
+ }
+ if (tls_device_tutorial == nullptr) {
+ abort();
+ }
tls_context_tutorial = nullptr;
tls_device_tutorial = nullptr;
@@ -64,8 +82,11 @@ namespace embree
@@ -20,7 +20,7 @@ namespace embree
assert((align & (align-1)) == 0);
void* ptr = _mm_malloc(size,align);
- if (size != 0 && ptr == nullptr)
if (size != 0 && ptr == nullptr)
- throw std::bad_alloc();
+ //if (size != 0 && ptr == nullptr)
+ // throw std::bad_alloc();
+ if (size != 0 && ptr == nullptr) {
+ abort();
+ }
+ abort(); //throw std::bad_alloc();
return ptr;
}
@@ -94,8 +115,11 @@ namespace embree
else
@@ -50,7 +50,7 @@ namespace embree
ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
- if (size != 0 && ptr == nullptr)
if (size != 0 && ptr == nullptr)
- throw std::bad_alloc();
+ //if (size != 0 && ptr == nullptr)
+ // throw std::bad_alloc();
+ if (size != 0 && ptr == nullptr) {
+ abort();
+ }
+ abort(); //throw std::bad_alloc();
return ptr;
}
@@ -241,7 +265,10 @@ namespace embree
@@ -83,7 +83,7 @@ namespace embree
}
if (size != 0 && ptr == nullptr)
- throw std::bad_alloc();
+ abort(); //throw std::bad_alloc();
return ptr;
}
@@ -199,7 +199,7 @@ namespace embree
/* fall back to 4k pages */
int flags = MEM_COMMIT | MEM_RESERVE;
char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
- if (ptr == nullptr) throw std::bad_alloc();
+ //if (ptr == nullptr) throw std::bad_alloc();
+ if (ptr == nullptr) {
+ abort();
+ }
+ if (ptr == nullptr) abort(); //throw std::bad_alloc();
hugepages = false;
return ptr;
}
@@ -257,8 +284,11 @@ namespace embree
if (bytesNew >= bytesOld)
@@ -216,7 +216,7 @@ namespace embree
return bytesOld;
- if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
- throw std::bad_alloc();
+ //if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
+ // throw std::bad_alloc();
+ if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) {
+ abort();
+ }
+ abort(); //throw std::bad_alloc();
return bytesNew;
}
@@ -268,8 +298,11 @@ namespace embree
if (bytes == 0)
@@ -227,7 +227,7 @@ namespace embree
return;
- if (!VirtualFree(ptr,0,MEM_RELEASE))
if (!VirtualFree(ptr,0,MEM_RELEASE))
- throw std::bad_alloc();
+ //if (!VirtualFree(ptr,0,MEM_RELEASE))
+ // throw std::bad_alloc();
+ if (!VirtualFree(ptr,0,MEM_RELEASE)) {
+ abort();
+ }
+ abort(); //throw std::bad_alloc();
}
void os_advise(void *ptr, size_t bytes)
@@ -373,7 +406,10 @@ namespace embree
@@ -331,7 +331,7 @@ namespace embree
/* fallback to 4k pages */
void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
- if (ptr == MAP_FAILED) throw std::bad_alloc();
+ //if (ptr == MAP_FAILED) throw std::bad_alloc();
+ if (ptr == MAP_FAILED) {
+ abort();
+ }
+ if (ptr == MAP_FAILED) abort(); //throw std::bad_alloc();
hugepages = false;
/* advise huge page hint for THP */
@@ -389,8 +425,11 @@ namespace embree
if (bytesNew >= bytesOld)
@@ -348,7 +348,7 @@ namespace embree
return bytesOld;
- if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
- throw std::bad_alloc();
+ //if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
+ // throw std::bad_alloc();
+ if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) {
+ abort();
+ }
+ abort(); //throw std::bad_alloc();
return bytesNew;
}
@@ -403,8 +442,11 @@ namespace embree
/* for hugepages we need to also align the size */
@@ -362,7 +362,7 @@ namespace embree
const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
bytes = (bytes+pageSize-1) & ~(pageSize-1);
- if (munmap(ptr,bytes) == -1)
if (munmap(ptr,bytes) == -1)
- throw std::bad_alloc();
+ //if (munmap(ptr,bytes) == -1)
+ // throw std::bad_alloc();
+ if (munmap(ptr,bytes) == -1) {
+ abort();
+ }
+ abort(); //throw std::bad_alloc();
}
/* hint for transparent huge pages (THP) */
diff --git a/thirdparty/embree/common/sys/alloc.h b/thirdparty/embree/common/sys/alloc.h
index e19c2c221a..e2c942049a 100644
index 5c63d0bfaf..1f6f230ed3 100644
--- a/thirdparty/embree/common/sys/alloc.h
+++ b/thirdparty/embree/common/sys/alloc.h
@@ -160,7 +160,8 @@ namespace embree
@@ -131,7 +131,7 @@ namespace embree
typedef std::ptrdiff_t difference_type;
__forceinline pointer allocate( size_type n ) {
- throw std::runtime_error("no allocation supported");
+ //throw std::runtime_error("no allocation supported");
+ abort();
+ abort(); //throw std::runtime_error("no allocation supported");
}
__forceinline void deallocate( pointer p, size_type n ) {
diff --git a/thirdparty/embree/common/sys/platform.h b/thirdparty/embree/common/sys/platform.h
index 6dc0cf3318..9f08cd1516 100644
index 6dc0cf3318..1e5b02550e 100644
--- a/thirdparty/embree/common/sys/platform.h
+++ b/thirdparty/embree/common/sys/platform.h
@@ -213,11 +213,15 @@
#define UPRINT4(x,y,z,w) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
@@ -214,10 +214,11 @@
#if defined(DEBUG) // only report file and line in debug mode
+ //#define THROW_RUNTIME_ERROR(str) \
+ // throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
#define THROW_RUNTIME_ERROR(str) \
- throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
+ printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort();
+ //throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
#else
+ //#define THROW_RUNTIME_ERROR(str) \
+ // throw std::runtime_error(str);
#define THROW_RUNTIME_ERROR(str) \
- throw std::runtime_error(str);
+ abort();
+ abort(); //throw std::runtime_error(str);
#endif
#define FATAL(x) THROW_RUNTIME_ERROR(x)
@@ -261,122 +176,81 @@ index 83ead95122..e89ae04f8b 100644
add_dependencies(-1);
}
diff --git a/thirdparty/embree/common/tasking/taskschedulerinternal.h b/thirdparty/embree/common/tasking/taskschedulerinternal.h
index b01bebf7c3..4a04323b80 100644
index b01bebf7c3..d4e0c7386b 100644
--- a/thirdparty/embree/common/tasking/taskschedulerinternal.h
+++ b/thirdparty/embree/common/tasking/taskschedulerinternal.h
@@ -130,8 +130,11 @@ namespace embree
__forceinline void* alloc(size_t bytes, size_t align = 64)
@@ -131,7 +131,7 @@ namespace embree
{
size_t ofs = bytes + ((align - stackPtr) & (align-1));
- if (stackPtr + ofs > CLOSURE_STACK_SIZE)
if (stackPtr + ofs > CLOSURE_STACK_SIZE)
- throw std::runtime_error("closure stack overflow");
+ //if (stackPtr + ofs > CLOSURE_STACK_SIZE)
+ // throw std::runtime_error("closure stack overflow");
+ if (stackPtr + ofs > CLOSURE_STACK_SIZE) {
+ abort();
+ }
+ abort(); //throw std::runtime_error("closure stack overflow");
stackPtr += ofs;
return &stack[stackPtr-bytes];
}
@@ -139,8 +142,11 @@ namespace embree
template<typename Closure>
@@ -140,7 +140,7 @@ namespace embree
__forceinline void push_right(Thread& thread, const size_t size, const Closure& closure, TaskGroupContext* context)
{
- if (right >= TASK_STACK_SIZE)
if (right >= TASK_STACK_SIZE)
- throw std::runtime_error("task stack overflow");
+ //if (right >= TASK_STACK_SIZE)
+ // throw std::runtime_error("task stack overflow");
+ if (right >= TASK_STACK_SIZE) {
+ abort();
+ }
+ abort(); //throw std::runtime_error("task stack overflow");
/* allocate new task on right side of stack */
size_t oldStackPtr = stackPtr;
diff --git a/thirdparty/embree/kernels/bvh/bvh_statistics.cpp b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp
index 40f9043736..7ea9736c5c 100644
index 40f9043736..f3b93e5925 100644
--- a/thirdparty/embree/kernels/bvh/bvh_statistics.cpp
+++ b/thirdparty/embree/kernels/bvh/bvh_statistics.cpp
@@ -150,7 +150,8 @@ namespace embree
@@ -150,7 +150,7 @@ namespace embree
}
}
else {
- throw std::runtime_error("not supported node type in bvh_statistics");
+ //throw std::runtime_error("not supported node type in bvh_statistics");
+ abort();
+ abort(); //throw std::runtime_error("not supported node type in bvh_statistics");
}
return s;
}
diff --git a/thirdparty/embree/kernels/common/alloc.h b/thirdparty/embree/kernels/common/alloc.h
index 2bd292de4d..8ac22e53ec 100644
index 2bd292de4d..10f629a244 100644
--- a/thirdparty/embree/kernels/common/alloc.h
+++ b/thirdparty/embree/kernels/common/alloc.h
@@ -189,8 +189,11 @@ namespace embree
, atype(osAllocation ? EMBREE_OS_MALLOC : ALIGNED_MALLOC)
@@ -190,7 +190,7 @@ namespace embree
, primrefarray(device,0)
{
- if (osAllocation && useUSM)
if (osAllocation && useUSM)
- throw std::runtime_error("USM allocation cannot be combined with OS allocation.");
+ //if (osAllocation && useUSM)
+ // throw std::runtime_error("USM allocation cannot be combined with OS allocation.");
+ if (osAllocation && useUSM) {
+ abort();
+ }
+ abort(); //throw std::runtime_error("USM allocation cannot be combined with OS allocation.");
for (size_t i=0; i<MAX_THREAD_USED_BLOCK_SLOTS; i++)
{
@@ -502,8 +505,11 @@ namespace embree
Block* myUsedBlocks = threadUsedBlocks[slot];
@@ -503,7 +503,7 @@ namespace embree
if (myUsedBlocks) {
void* ptr = myUsedBlocks->malloc(device,bytes,align,partial);
- if (ptr == nullptr && !blockAllocation)
if (ptr == nullptr && !blockAllocation)
- throw std::bad_alloc();
+ //if (ptr == nullptr && !blockAllocation)
+ // throw std::bad_alloc();
+ if (ptr == nullptr && !blockAllocation) {
+ abort();
+ }
+ abort(); //throw std::bad_alloc();
if (ptr) return ptr;
}
diff --git a/thirdparty/embree/kernels/common/rtcore.cpp b/thirdparty/embree/kernels/common/rtcore.cpp
index 8dc5d7045b..e19c243bf6 100644
index 8da5c9d86c..a34eb2a0c4 100644
--- a/thirdparty/embree/kernels/common/rtcore.cpp
+++ b/thirdparty/embree/kernels/common/rtcore.cpp
@@ -257,10 +257,15 @@ RTC_NAMESPACE_BEGIN;
RTC_TRACE(rtcSetSceneBuildQuality);
RTC_VERIFY_HANDLE(hscene);
RTC_ENTER_DEVICE(hscene);
+ //if (quality != RTC_BUILD_QUALITY_LOW &&
+ // quality != RTC_BUILD_QUALITY_MEDIUM &&
+ // quality != RTC_BUILD_QUALITY_HIGH)
+ // throw std::runtime_error("invalid build quality");
@@ -358,7 +358,7 @@ RTC_NAMESPACE_BEGIN;
if (quality != RTC_BUILD_QUALITY_LOW &&
quality != RTC_BUILD_QUALITY_MEDIUM &&
- quality != RTC_BUILD_QUALITY_HIGH)
quality != RTC_BUILD_QUALITY_HIGH)
- throw std::runtime_error("invalid build quality");
+ quality != RTC_BUILD_QUALITY_HIGH) {
+ abort();
+ }
+ abort(); //throw std::runtime_error("invalid build quality");
scene->setBuildQuality(quality);
RTC_CATCH_END2(scene);
}
@@ -1563,11 +1568,17 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
RTC_TRACE(rtcSetGeometryBuildQuality);
RTC_VERIFY_HANDLE(hgeometry);
RTC_ENTER_DEVICE(hgeometry);
+ //if (quality != RTC_BUILD_QUALITY_LOW &&
+ // quality != RTC_BUILD_QUALITY_MEDIUM &&
+ // quality != RTC_BUILD_QUALITY_HIGH &&
+ // quality != RTC_BUILD_QUALITY_REFIT)
+ // throw std::runtime_error("invalid build quality");
if (quality != RTC_BUILD_QUALITY_LOW &&
@@ -1831,7 +1831,7 @@ RTC_API void rtcSetGeometryTransform(RTCGeometry hgeometry, unsigned int timeSte
quality != RTC_BUILD_QUALITY_MEDIUM &&
quality != RTC_BUILD_QUALITY_HIGH &&
- quality != RTC_BUILD_QUALITY_REFIT)
quality != RTC_BUILD_QUALITY_REFIT)
- throw std::runtime_error("invalid build quality");
+ quality != RTC_BUILD_QUALITY_REFIT) {
+ abort();
+ }
+ abort(); //throw std::runtime_error("invalid build quality");
geometry->setBuildQuality(quality);
RTC_CATCH_END2(geometry);
}
@@ -421,10 +295,10 @@ index 73a061de11..cd7a6f4395 100644
#define RTC_BUILD_ARGUMENTS_HAS(settings,member) \
diff --git a/thirdparty/embree/kernels/common/scene.cpp b/thirdparty/embree/kernels/common/scene.cpp
index fda8dd938a..706cc512df 100644
index 84a84f8c69..3bfcebd298 100644
--- a/thirdparty/embree/kernels/common/scene.cpp
+++ b/thirdparty/embree/kernels/common/scene.cpp
@@ -894,16 +894,16 @@ namespace embree
@@ -962,16 +962,16 @@ namespace embree
}
/* initiate build */
@@ -450,10 +324,10 @@ index fda8dd938a..706cc512df 100644
#endif
diff --git a/thirdparty/embree/kernels/common/state.cpp b/thirdparty/embree/kernels/common/state.cpp
index 4e3ab6ddfb..8e83c95bd7 100644
index cce5eafce1..509dbc7120 100644
--- a/thirdparty/embree/kernels/common/state.cpp
+++ b/thirdparty/embree/kernels/common/state.cpp
@@ -194,13 +194,13 @@ namespace embree
@@ -199,13 +199,13 @@ namespace embree
bool State::parseFile(const FileName& fileName)
{
Ref<Stream<int> > file;

View File

@@ -1,5 +1,5 @@
diff --git a/thirdparty/embree/include/embree4/rtcore_config.h b/thirdparty/embree/include/embree4/rtcore_config.h
index 8abd6954c3..cb3a8678a7 100644
index eba966989e..91cf129dc6 100644
--- a/thirdparty/embree/include/embree4/rtcore_config.h
+++ b/thirdparty/embree/include/embree4/rtcore_config.h
@@ -4,7 +4,7 @@
@@ -12,8 +12,8 @@ index 8abd6954c3..cb3a8678a7 100644
#define RTC_VERSION_MAJOR 4
@@ -13,28 +13,28 @@
#define RTC_VERSION 40301
#define RTC_VERSION_STRING "4.3.1"
#define RTC_VERSION 40400
#define RTC_VERSION_STRING "4.4.0"
-#define RTC_MAX_INSTANCE_LEVEL_COUNT @EMBREE_MAX_INSTANCE_LEVEL_COUNT@
+#define RTC_MAX_INSTANCE_LEVEL_COUNT 1
@@ -47,8 +47,8 @@ index 8abd6954c3..cb3a8678a7 100644
-# define RTC_NAMESPACE_USE using namespace @EMBREE_API_NAMESPACE@;
+# define RTC_NAMESPACE_USE using namespace;
# define RTC_API_EXTERN_C
# define RTC_API_EXTERN_CPP
# undef EMBREE_API_NAMESPACE
#else
diff --git a/thirdparty/embree/kernels/config.h b/thirdparty/embree/kernels/config.h
index 1669c4af72..5979b543c9 100644
--- a/thirdparty/embree/kernels/config.h

View File

@@ -1,8 +1,8 @@
diff --git a/thirdparty/embree/common/sys/sysinfo.cpp b/thirdparty/embree/common/sys/sysinfo.cpp
index 61256b34f2..4583e49b1c 100644
index 5f375cd95c..3c23fb1610 100644
--- a/thirdparty/embree/common/sys/sysinfo.cpp
+++ b/thirdparty/embree/common/sys/sysinfo.cpp
@@ -646,6 +646,10 @@ namespace embree
@@ -659,6 +659,10 @@ namespace embree
#if defined(__EMSCRIPTEN__)
#include <emscripten.h>
@@ -13,7 +13,7 @@ index 61256b34f2..4583e49b1c 100644
#endif
namespace embree
@@ -659,6 +663,8 @@ namespace embree
@@ -672,6 +676,8 @@ namespace embree
nThreads = sysconf(_SC_NPROCESSORS_ONLN); // does not work in Linux LXC container
assert(nThreads);
#elif defined(__EMSCRIPTEN__)
@@ -22,7 +22,7 @@ index 61256b34f2..4583e49b1c 100644
// WebAssembly supports pthreads, but not pthread_getaffinity_np. Get the number of logical
// threads from the browser or Node.js using JavaScript.
nThreads = MAIN_THREAD_EM_ASM_INT({
@@ -674,6 +680,7 @@ namespace embree
@@ -687,6 +693,7 @@ namespace embree
return 1;
}
});

View File

@@ -1,20 +0,0 @@
diff --git a/thirdparty/embree/common/tasking/taskschedulerinternal.h b/thirdparty/embree/common/tasking/taskschedulerinternal.h
index e72d3b72ba..8e3befb739 100644
--- a/thirdparty/embree/common/tasking/taskschedulerinternal.h
+++ b/thirdparty/embree/common/tasking/taskschedulerinternal.h
@@ -3,6 +3,7 @@
#pragma once
+#include "../../include/embree4/rtcore.h"
#include "../sys/platform.h"
#include "../sys/alloc.h"
#include "../sys/barrier.h"
@@ -12,7 +13,6 @@
#include "../sys/ref.h"
#include "../sys/atomic.h"
#include "../math/range.h"
-#include "../../include/embree4/rtcore.h"
#include <list>