Josh Posted Friday at 11:06 PM Posted Friday at 11:06 PM Using Intel Embree, I get some pretty impressive results. 100,000 raycasts with half of them hitting, on a 16,000 triangle mesh, in 5-7 milliseconds. There's a lot of variation each time it runs, so I won't bother posting the printed output. It looks like they are probably all falling back to the non-SIMD code. If we can perform 200,000 raycasts in one frame, that means over one second we could perform 12 million raycasts on a single core, or 50 million using four CPU cores. It makes one wonder what one could do with such things... #include "Leadwerks.h" using namespace UltraEngine; int main(int argc, const char* argv[]) { // Create raytrace device auto rt = CreateRayTracer(); // Create RT scene auto scene = CreateRTScene(rt); // Create RT mesh auto box = CreateSphere(NULL, 0.5, 64); //auto box = CreateBox(NULL); Print(String(box->GetMesh(0)->CountPrimitives()) + " polys"); auto mesh = CreateRTMesh(rt, box->GetMesh(0)); // Create RT instance Mat4 m; auto instance = CreateRTInstance(scene, m); instance->AddMesh(mesh); // Perform single ray trace RTRay ray; ray.origin = Vec3(-10, 0, 0); ray.dir = Vec3(1, 0, 0); ray.length = 20.0f; auto r = instance->TraceRay(ray);// this must be done on the instance, won't work yet on the scene (instances ARE scenes) //Print("Hit position: " + String(r.position.x) + String("\n")); // verify it's working, seems to work with a box but misses the sphere // Trace a bunch of rays int count = 100000; // must be divisible by 16, 8, and 4 std::vector<RTRay> rays(count); std::vector<RTTrace> results; results.resize(count); for (int n = 0; n < count; ++n) { rays[n].origin.x = -10; rays[n].origin.y = Random(-1.0f, 1.0f); rays[n].origin.z = Random(-1.0f, 1.0f); Vec3 p1 = Vec3(10, Random(-1.0f, 1.0f), Random(-1.0f, 1.0f)); rays[n].dir = p1 - rays[n].origin; rays[n].length = rays[n].dir.Length(); rays[n].dir /= rays[n].length; } Print(String(count) + " rays\n"); // Slower method auto tm = Millisecs(); count = instance->TraceRays(rays, results); tm = Millisecs() - tm; Print("1x rays"); Print("Time: " + String(tm)); Print(String(count) + " hits\n"); // Faster batched method - requires AVX-256 support tm = Millisecs(); count = instance->TraceRays4(rays, results); tm = Millisecs() - tm; Print("4x rays"); Print("Time: " + String(tm)); Print(String(count) + " hits\n"); // Even faster batched method - requires AVX-256 support tm = Millisecs(); count = instance->TraceRays8(rays, results); tm = Millisecs() - tm; Print("8x rays"); Print("Time: " + String(tm)); Print(String(count) + " hits\n"); // Fastest batched method - requires AVX-512 support tm = Millisecs(); count = instance->TraceRays16(rays, results); tm = Millisecs() - tm; Print("16x rays"); Print("Time: " + String(tm)); Print(String(count) + " hits\n"); return 0; } 1 Quote My job is to make tools you love, with the features you want, and performance you can't live without.
Josh Posted Sunday at 04:38 PM Author Posted Sunday at 04:38 PM This example has been replaced with the code here, which I think is actually the final API. Quote My job is to make tools you love, with the features you want, and performance you can't live without.
Recommended Posts
Join the conversation
You can post now and register later. If you have an account, sign in now to post with your account.
Note: Your post will require moderator approval before it will be visible.