My ultimate goal is to render 1 million spheres of different sizes and colors at 60 fps. I want to be able to move the camera around the screen as well.
I have modified the code on this page of the tutorial I am studying to try to instance many spheres. However, I find that at as little as 64 spheres my fps falls below 60, and at 900 spheres my fps is a measly 4. My understanding of instancing is naive, but I believe that I should be getting more frames-per-second than this. 60 fps should be attainable with only 64 spheres. I believe that I am, in some way, causing the CPU and GPU to communicate more often than they should have to. So my question is: How do I instance so many objects (ideally millions) without causing the fps to fall low (ideally 60 fps)?
I am calculating fps by calculating (10 / time_elapsed)
every 10 frames, where time_elapsed
is the time that has elapsed since the last fps call. I am printing this out using printf
on line 118 of my code.
I have been learning OpenGL through this tutorial and so I use 32-bit GLEW and 32-bit GLFW in Visual Studio 2013. I have 8 GB of RAM on a 64-bit operating system (Windows 7) with a 2.30 GHz CPU.
I have tried coding my own example based on the tutorial above. Source code:
(set line #2 to be the number of spheres to be instanced. Make sure line#2 has a whole-number square root. Set line 4 to be the detail of the sphere, the lowest it can go is 0. Higher number = more detailed.)
// Make sure NUM_INS is a square number
#define NUM_INS 1
// Detail up to 4 is probably good enough
#define SPHERE_DETAIL 4
#include <vector>
// GLEW
#define GLEW_STATIC
#include <GL/glew.h>
// GLFW
#include <GLFW/glfw3.h>
// GL includes
#include "Shader.h"
// GLM Mathemtics
#include <glm/glm.hpp>
#include <glm/gtc/matrix_transform.hpp>
#include <glm/gtc/type_ptr.hpp>
// Properties
GLuint screenWidth = 800, screenHeight = 600;
// Function prototypes
void key_callback(GLFWwindow* window, int key, int scancode, int action, int mode);
std::vector<GLfloat> create_sphere(int recursion);
// The MAIN function, from here we start our application and run the Game loop
int main()
{
// Init GLFW
glfwInit();
glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 3);
glfwWindowHint(GLFW_CONTEXT_VERSION_MINOR, 3);
glfwWindowHint(GLFW_OPENGL_PROFILE, GLFW_OPENGL_CORE_PROFILE);
glfwWindowHint(GLFW_RESIZABLE, GL_FALSE);
GLFWwindow* window = glfwCreateWindow(screenWidth, screenHeight, "LearnOpenGL", nullptr, nullptr); // Windowed
glfwMakeContextCurrent(window);
// Set the required callback functions
glfwSetKeyCallback(window, key_callback);
// Initialize GLEW to setup the OpenGL Function pointers
glewExperimental = GL_TRUE;
glewInit();
// Define the viewport dimensions
glViewport(0, 0, screenWidth, screenHeight);
glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); // Comment to remove wireframe mode
// Setup OpenGL options
glEnable(GL_DEPTH_TEST);
// Setup and compile our shader(s)
Shader shader("core.vs", "core.frag");
// Generate a list of 100 quad locations/translation-vectors
std::vector<glm::vec2> translations(NUM_INS);
//glm::vec2 translations[NUM_INS];
int index = 0;
GLfloat offset = 1.0f / (float)sqrt(NUM_INS);
for (GLint y = -(float)sqrt(NUM_INS); y < (float)sqrt(NUM_INS); y += 2)
{
for (GLint x = -(float)sqrt(NUM_INS); x < (float)sqrt(NUM_INS); x += 2)
{
glm::vec2 translation;
translation.x = (GLfloat)x / (float)sqrt(NUM_INS) + offset;
translation.y = (GLfloat)y / (float)sqrt(NUM_INS) + offset;
translations[index++] = translation;
}
}
// Store instance data in an array buffer
GLuint instanceVBO;
glGenBuffers(1, &instanceVBO);
glBindBuffer(GL_ARRAY_BUFFER, instanceVBO);
glBufferData(GL_ARRAY_BUFFER, sizeof(glm::vec2) * NUM_INS, &translations[0], GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
// create 12 vertices of a icosahedron
std::vector<GLfloat> vv = create_sphere(SPHERE_DETAIL);
GLuint quadVAO, quadVBO;
glGenVertexArrays(1, &quadVAO);
glGenBuffers(1, &quadVBO);
glBindVertexArray(quadVAO);
glBindBuffer(GL_ARRAY_BUFFER, quadVBO);
glBufferData(GL_ARRAY_BUFFER, vv.size() * sizeof(GLfloat), &vv[0], GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glVertexAttribPointer(0, 3, GL_FLOAT, GL_FALSE, 6 * sizeof(GLfloat), (GLvoid*)0);
glEnableVertexAttribArray(1);
glVertexAttribPointer(1, 3, GL_FLOAT, GL_FALSE, 6 * sizeof(GLfloat), (GLvoid*)(2 * sizeof(GLfloat)));
// Also set instance data
glEnableVertexAttribArray(2);
glBindBuffer(GL_ARRAY_BUFFER, instanceVBO);
glVertexAttribPointer(2, 2, GL_FLOAT, GL_FALSE, 2 * sizeof(GLfloat), (GLvoid*)0);
glBindBuffer(GL_ARRAY_BUFFER, 0);
glVertexAttribDivisor(2, 1); // Tell OpenGL this is an instanced vertex attribute.
glBindVertexArray(0);
// For printing frames-per-second
float counter = 0;
double get_time = 0;
double new_time;
// Game loop
while (!glfwWindowShouldClose(window))
{
// Print fps by printing (number_of_frames / time_elapsed)
counter += 1;
if (counter > 10) {
counter -= 10;
new_time = glfwGetTime();
printf("fps: %.2f ", (10/(new_time - get_time)));
get_time = new_time;
}
// Check and call events
glfwPollEvents();
// Clear buffers
//glClearColor(0.2f, 0.3f, 0.3f, 1.0f);
glClearColor(0.2f, 0.3f, 0.3f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
// Draw 100 instanced quads
shader.Use();
glm::mat4 model;
model = glm::rotate(model, 0.0f, glm::vec3(1.0f, 0.0f, 0.0f));
// Camera/View transformation
glm::mat4 view;
GLfloat radius = 10.0f;
GLfloat camX = sin(glfwGetTime()) * radius;
GLfloat camZ = cos(glfwGetTime()) * radius;
view = glm::lookAt(glm::vec3(camX, 0.0f, camZ), glm::vec3(0.0f, 0.0f, 0.0f), glm::vec3(0.0f, 1.0f, 0.0f));
// Projection
glm::mat4 projection;
projection = glm::perspective(45.0f, (GLfloat)screenWidth / (GLfloat)screenHeight, 0.1f, 100.0f);
// Get the uniform locations
GLint modelLoc = glGetUniformLocation(shader.Program, "model");
GLint viewLoc = glGetUniformLocation(shader.Program, "view");
GLint projLoc = glGetUniformLocation(shader.Program, "projection");
// Pass the matrices to the shader
glUniformMatrix4fv(modelLoc, 1, GL_FALSE, glm::value_ptr(model));
glUniformMatrix4fv(viewLoc, 1, GL_FALSE, glm::value_ptr(view));
glUniformMatrix4fv(projLoc, 1, GL_FALSE, glm::value_ptr(projection));
glBindVertexArray(quadVAO);
glDrawArraysInstanced(GL_TRIANGLES, 0, vv.size() / 3, NUM_INS); // 100 triangles of 6 vertices each
glBindVertexArray(0);
// Swap the buffers
glfwSwapBuffers(window);
}
glfwTerminate();
return 0;
}
// Is called whenever a key is pressed/released via GLFW
void key_callback(GLFWwindow* window, int key, int scancode, int action, int mode)
{
if (key == GLFW_KEY_ESCAPE && action == GLFW_PRESS)
glfwSetWindowShouldClose(window, GL_TRUE);
}
std::vector<GLfloat> add_color(std::vector<GLfloat> sphere) {
// Add color
std::vector<GLfloat> colored_sphere;
for (GLint i = 0; i < sphere.size(); i+=9) {
colored_sphere.push_back(sphere[i]);
colored_sphere.push_back(sphere[i+1]);
colored_sphere.push_back(sphere[i+2]);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(sphere[i+3]);
colored_sphere.push_back(sphere[i+4]);
colored_sphere.push_back(sphere[i+5]);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(sphere[i+6]);
colored_sphere.push_back(sphere[i+7]);
colored_sphere.push_back(sphere[i+8]);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
colored_sphere.push_back(0.0f);
}
return colored_sphere;
}
std::vector<GLfloat> tesselate(std::vector<GLfloat> shape, int recursion) {
if (recursion > 0) {
std::vector<GLfloat> new_sphere = {};
for (GLint i = 0; i < shape.size(); i += 9) {
// 1.902113 approximately
GLfloat radius = sqrt(1.0f + pow((1.0f + sqrt(5.0f)) / 2.0f, 2));
// Every 9 points is a triangle. Take 1 triangle and turn it into 4 triangles.
GLfloat p_one[] = {shape[i], shape[i + 1], shape[i + 2]};
GLfloat p_two[] = {shape[i + 3], shape[i + 4], shape[i + 5]};
GLfloat p_thr[] = {shape[i + 6], shape[i + 7], shape[i + 8]};
GLfloat p_one_two[] = { (p_one[0] + p_two[0]) / 2.0f, (p_one[1] + p_two[1]) / 2.0f, (p_one[2] + p_two[2]) / 2.0f };
GLfloat p_one_thr[] = { (p_one[0] + p_thr[0]) / 2.0f, (p_one[1] + p_thr[1]) / 2.0f, (p_one[2] + p_thr[2]) / 2.0f };
GLfloat p_two_thr[] = { (p_two[0] + p_thr[0]) / 2.0f, (p_two[1] + p_thr[1]) / 2.0f, (p_two[2] + p_thr[2]) / 2.0f };
GLfloat r_one_two = sqrt((p_one_two[0]*p_one_two[0]) + (p_one_two[1]*p_one_two[1]) + (p_one_two[2]*p_one_two[2]));
GLfloat r_one_thr = sqrt((p_one_thr[0]*p_one_thr[0]) + (p_one_thr[1]*p_one_thr[1]) + (p_one_thr[2]*p_one_thr[2]));
GLfloat r_two_thr = sqrt((p_two_thr[0]*p_two_thr[0]) + (p_two_thr[1]*p_two_thr[1]) + (p_two_thr[2]*p_two_thr[2]));
GLfloat t_one_two[] = { radius * p_one_two[0] / r_one_two, radius * p_one_two[1] / r_one_two, radius * p_one_two[2] / r_one_two };
GLfloat t_one_thr[] = { radius * p_one_thr[0] / r_one_thr, radius * p_one_thr[1] / r_one_thr, radius * p_one_thr[2] / r_one_thr };
GLfloat t_two_thr[] = { radius * p_two_thr[0] / r_two_thr, radius * p_two_thr[1] / r_two_thr, radius * p_two_thr[2] / r_two_thr };
// Triangle 1:
new_sphere.push_back(p_one[0]);
new_sphere.push_back(p_one[1]);
new_sphere.push_back(p_one[2]);
new_sphere.push_back(t_one_two[0]);
new_sphere.push_back(t_one_two[1]);
new_sphere.push_back(t_one_two[2]);
new_sphere.push_back(t_one_thr[0]);
new_sphere.push_back(t_one_thr[1]);
new_sphere.push_back(t_one_thr[2]);
// Triangle 2:
new_sphere.push_back(p_two[0]);
new_sphere.push_back(p_two[1]);
new_sphere.push_back(p_two[2]);
new_sphere.push_back(t_one_two[0]);
new_sphere.push_back(t_one_two[1]);
new_sphere.push_back(t_one_two[2]);
new_sphere.push_back(t_two_thr[0]);
new_sphere.push_back(t_two_thr[1]);
new_sphere.push_back(t_two_thr[2]);
// Triangle 3:
new_sphere.push_back(p_thr[0]);
new_sphere.push_back(p_thr[1]);
new_sphere.push_back(p_thr[2]);
new_sphere.push_back(t_one_thr[0]);
new_sphere.push_back(t_one_thr[1]);
new_sphere.push_back(t_one_thr[2]);
new_sphere.push_back(t_two_thr[0]);
new_sphere.push_back(t_two_thr[1]);
new_sphere.push_back(t_two_thr[2]);
// Center Triangle:
new_sphere.push_back(t_one_two[0]);
new_sphere.push_back(t_one_two[1]);
new_sphere.push_back(t_one_two[2]);
new_sphere.push_back(t_one_thr[0]);
new_sphere.push_back(t_one_thr[1]);
new_sphere.push_back(t_one_thr[2]);
new_sphere.push_back(t_two_thr[0]);
new_sphere.push_back(t_two_thr[1]);
new_sphere.push_back(t_two_thr[2]);
}
return tesselate(new_sphere, recursion - 1);
}
printf("number of vertices to be rendered: %d || ", shape.size());
return shape;
}
std::vector<GLfloat> create_sphere(int recursion) {
// Define the starting icosahedron
GLfloat t_ = (1.0f + sqrt(5.0f)) / 2.0f;
std::vector<GLfloat> icosahedron = {
-1.0f, t_, 0.0f, -t_, 0.0f, 1.0f, 0.0f, 1.0f, t_,
-1.0f, t_, 0.0f, 0.0f, 1.0f, t_, 1.0f, t_, 0.0f,
-1.0f, t_, 0.0f, 1.0f, t_, 0.0f, 0.0f, 1.0f, -t_,
-1.0f, t_, 0.0f, 0.0f, 1.0f, -t_, -t_, 0.0f, -1.0f,
-1.0f, t_, 0.0f, -t_, 0.0f, -1.0f, -t_, 0.0f, 1.0f,
1.0f, t_, 0.0f, 0.0f, 1.0f, t_, t_, 0.0f, 1.0f,
0.0f, 1.0f, t_, -t_, 0.0f, 1.0f, 0.0f, -1.0f, t_,
-t_, 0.0f, 1.0f, -t_, 0.0f, -1.0f, -1.0f, -t_, 0.0f,
-t_, 0.0f, -1.0f, 0.0f, 1.0f, -t_, 0.0f, -1.0f, -t_,
0.0f, 1.0f, -t_, 1.0f, t_, 0.0f, t_, 0.0f, -1.0f,
1.0f, -t_, 0.0f, t_, 0.0f, 1.0f, 0.0f, -1.0f, t_,
1.0f, -t_, 0.0f, 0.0f, -1.0f, t_,-1.0f, -t_, 0.0f,
1.0f, -t_, 0.0f,-1.0f, -t_, 0.0f, 0.0f, -1.0f, -t_,
1.0f, -t_, 0.0f, 0.0f, -1.0f, -t_, t_, 0.0f, -1.0f,
1.0f, -t_, 0.0f, t_, 0.0f, -1.0f, t_, 0.0f, 1.0f,
0.0f, -1.0f, t_, t_, 0.0f, 1.0f, 0.0f, 1.0f, t_,
-1.0f, -t_, 0.0f, 0.0f, -1.0f, t_,-t_, 0.0f, 1.0f,
0.0f, -1.0f, -t_,-1.0f, -t_, 0.0f,-t_, 0.0f, -1.0f,
t_, 0.0f, -1.0f, 0.0f, -1.0f, -t_, 0.0f, 1.0f, -t_,
t_, 0.0f, 1.0f, t_, 0.0f, -1.0f, 1.0f, t_, 0.0f,
};
// Tesselate the icososphere the number of times recursion
std::vector<GLfloat> colorless_sphere = tesselate(icosahedron, recursion);
// Add color and return
return add_color(colorless_sphere);
}
Vertex Shader: (named core.vs)
#version 330 core
layout (location = 0) in vec3 position;
layout (location = 1) in vec3 color;
layout (location = 2) in vec2 offset;
out vec3 fColor;
uniform mat4 model;
uniform mat4 view;
uniform mat4 projection;
void main()
{
gl_Position = projection * view * model * vec4(position.x + offset.x, position.y + offset.y, position.z, 1.0f);
fColor = color;
}
Fragment Shader: (named core.frag)
#version 330 core
in vec3 fColor;
out vec4 color;
void main()
{
color = vec4(fColor, 1.0f);
}
Shader class: (named Shader.h)
#ifndef SHADER_H
#define SHADER_H
#include <string>
#include <fstream>
#include <sstream>
#include <iostream>
#include <GL/glew.h>
class Shader
{
public:
GLuint Program;
// Constructor generates the shader on the fly
Shader(const GLchar* vertexPath, const GLchar* fragmentPath)
{
// 1. Retrieve the vertex/fragment source code from filePath
std::string vertexCode;
std::string fragmentCode;
std::ifstream vShaderFile;
std::ifstream fShaderFile;
// ensures ifstream objects can throw exceptions:
vShaderFile.exceptions(std::ifstream::badbit);
fShaderFile.exceptions(std::ifstream::badbit);
try
{
// Open files
vShaderFile.open(vertexPath);
fShaderFile.open(fragmentPath);
std::stringstream vShaderStream, fShaderStream;
// Read file's buffer contents into streams
vShaderStream << vShaderFile.rdbuf();
fShaderStream << fShaderFile.rdbuf();
// close file handlers
vShaderFile.close();
fShaderFile.close();
// Convert stream into string
vertexCode = vShaderStream.str();
fragmentCode = fShaderStream.str();
}
catch (std::ifstream::failure e)
{
std::cout << "ERROR::SHADER::FILE_NOT_SUCCESFULLY_READ" << std::endl;
}
const GLchar* vShaderCode = vertexCode.c_str();
const GLchar * fShaderCode = fragmentCode.c_str();
// 2. Compile shaders
GLuint vertex, fragment;
GLint success;
GLchar infoLog[512];
// Vertex Shader
vertex = glCreateShader(GL_VERTEX_SHADER);
glShaderSource(vertex, 1, &vShaderCode, NULL);
glCompileShader(vertex);
// Print compile errors if any
glGetShaderiv(vertex, GL_COMPILE_STATUS, &success);
if (!success)
{
glGetShaderInfoLog(vertex, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::VERTEX::COMPILATION_FAILED\n" << infoLog << std::endl;
}
// Fragment Shader
fragment = glCreateShader(GL_FRAGMENT_SHADER);
glShaderSource(fragment, 1, &fShaderCode, NULL);
glCompileShader(fragment);
// Print compile errors if any
glGetShaderiv(fragment, GL_COMPILE_STATUS, &success);
if (!success)
{
glGetShaderInfoLog(fragment, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::FRAGMENT::COMPILATION_FAILED\n" << infoLog << std::endl;
}
// Shader Program
this->Program = glCreateProgram();
glAttachShader(this->Program, vertex);
glAttachShader(this->Program, fragment);
glLinkProgram(this->Program);
// Print linking errors if any
glGetProgramiv(this->Program, GL_LINK_STATUS, &success);
if (!success)
{
glGetProgramInfoLog(this->Program, 512, NULL, infoLog);
std::cout << "ERROR::SHADER::PROGRAM::LINKING_FAILED\n" << infoLog << std::endl;
}
// Delete the shaders as they're linked into our program now and no longer necessery
glDeleteShader(vertex);
glDeleteShader(fragment);
}
// Uses the current shader
void Use()
{
glUseProgram(this->Program);
}
};
#endif
My ultimate goal is to render 1 million spheres of different sizes and colors at 60 fps.
This is an unreasonable expectation.
Let's say that each sphere consists of 50 triangles. Kinda small for a good sphere shape, but lets assume they're that small.
1 million spheres at 50 tris per sphere is 50 million triangles per frame. At 60 FPS, that's 3 billion triangles per second.
No commercially available GPU is good enough to do that. And that's just a 50 triangle sphere; your 4x tessellated icosahedron will be over 5,000 triangles.
Now yes, drawing 60 such spheres is only ~300,000 triangles per frame. But even that at 60 FPS is ~18 million triangles per second. Hardware does exist that can handle that many triangles, but it's very clearly a lot. And you're definitely not going to get 1 million of them.
This is not a matter of GPU/CPU communication or overhead. You're simply throwing more work at your GPU than it could handle. You might be able to improve a couple of things here and there, but nothing that's going to get you even one tenth of what you want.
At least, not with this overall approach.
For your particular case of wanting to draw millions of spheres, I would use raytraced impostors rather than actual geometry of spheres. That is, you draw quads, who's positions are generated by the vertex (or geometry) shader. You generate a quad per sphere, such that the quad circumscribes the sphere. Then the fragment shader does a simple ray-sphere intersection test to see if the fragment in question (from the direction of the camera view) hits the sphere or not. If the ray doesn't hit the sphere, you discard the fragment.
You would also need to modify gl_FragDepth
to give the impostor the proper depth value, so that intersecting spheres can work.