Cocos中,对于使用相同贴图且没有自定义uniform变量且采用默认绘制指令TrianglesCommand或QuaCommand的节点可以进行自动合批绘制。(其实自定义uniform的值如果相同也是可以合批的,就相当于把这个自定义uniform也当作内置的uniform)

合批绘制的本质是将两个模型动态合并为一个模型,并使用相同的材质在一个DrawCall中进行绘制,以减少DrawCall数量,提高运行效率。

Cocos每个节点都会关联零个或一个RenderCommand(渲染指令对象

Cocos判断两个节点是否能够合批,靠的是GLProgram(着色器)、TextureID和BlendState联合计算的一个Hash值,这个Hash值是渲染指令对象中的一个叫_materialID的成员。材质ID的生成函数如下:

void QuadCommand::generateMaterialID()
{
    _skipBatching = false;

    if(_glProgramState->getUniformCount() == 0)
    {
        int glProgram = (int)_glProgramState->getGLProgram()->getProgram();
        int intArray[4] = { glProgram, (int)_textureID, (int)_blendType.src, (int)_blendType.dst};

        _materialID = XXH32((const void*)intArray, sizeof(intArray), 0);
    }
    else
    {
        _materialID = Renderer::MATERIAL_ID_DO_NOT_BATCH;
        _skipBatching = true;
    }
}

合批相关逻辑:在遍历渲染指令进行glDrawElements渲染的的时候,不是每个指令都会调用一次glDrawElements渲染(也就是一次drawcall),遍历的时候判断当前指令的材质ID(_materialID)是否跟上次的材质ID相同,如果相同则不会进行drawcall而是继续遍历,直到_lastMaterialID != newMaterialID,才会执行上一个渲染指令的drawcall。遍历结束后,会执行剩余的最后一批drawcall。代码里注释: //Draw any remaining triangles

在渲染器类Render的成员函数   drawBatchedTriangles()、drawBatchedQuads()中实现:


void Renderer::drawBatchedTriangles()
{
    //TODO: we can improve the draw performance by insert material switching command before hand.
    int indexToDraw = 0;
    int startIndex = 0;
    //Upload buffer to VBO
    if(_filledVertex <= 0 || _filledIndex <= 0 || _batchedCommands.empty())
    {
        return;
    }
    if (Configuration::getInstance()->supportsShareableVAO())
    {
        //Bind VAO
        GL::bindVAO(_buffersVAO);
        //Set VBO data
        glBindBuffer(GL_ARRAY_BUFFER, _buffersVBO[0]);
        glBufferData(GL_ARRAY_BUFFER, sizeof(_verts[0]) * _filledVertex, nullptr, GL_DYNAMIC_DRAW);
        void *buf = glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY);
        memcpy(buf, _verts, sizeof(_verts[0])* _filledVertex);
        glUnmapBuffer(GL_ARRAY_BUFFER);

        glBindBuffer(GL_ARRAY_BUFFER, 0);

        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _buffersVBO[1]);
        glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(_indices[0]) * _filledIndex, _indices, GL_STATIC_DRAW);
    }
    else
    {
#define kQuadSize sizeof(_verts[0])
        glBindBuffer(GL_ARRAY_BUFFER, _buffersVBO[0]);
        glBufferData(GL_ARRAY_BUFFER, sizeof(_verts[0]) * _filledVertex , _verts, GL_DYNAMIC_DRAW);
        GL::enableVertexAttribs(GL::VERTEX_ATTRIB_FLAG_POS_COLOR_TEX);
        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _buffersVBO[1]);
        glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(_indices[0]) * _filledIndex, _indices, GL_STATIC_DRAW);
    }

    //Start drawing vertices in batch,合批逻辑在此
    for(const auto& cmd : _batchedCommands)
    {
        auto newMaterialID = cmd->getMaterialID();
        if(_lastMaterialID != newMaterialID || newMaterialID == MATERIAL_ID_DO_NOT_BATCH)
        {
            //Draw quads
            if(indexToDraw > 0)
            {
                glDrawElements(GL_TRIANGLES, (GLsizei) indexToDraw, GL_UNSIGNED_SHORT, (GLvoid*) (startIndex*sizeof(_indices[0])) );
                _drawnBatches++;
                _drawnVertices += indexToDraw;

                startIndex += indexToDraw;
                indexToDraw = 0;
            }

            //Use new material
            cmd->useMaterial();
            _lastMaterialID = newMaterialID;
        }

        indexToDraw += cmd->getIndexCount();
    }

    //Draw any remaining triangles
    if(indexToDraw > 0)
    {
        glDrawElements(GL_TRIANGLES, (GLsizei) indexToDraw, GL_UNSIGNED_SHORT, (GLvoid*) (startIndex*sizeof(_indices[0])) );
        _drawnBatches++;
        _drawnVertices += indexToDraw;
    }

    if (Configuration::getInstance()->supportsShareableVAO())
    {
        //Unbind VAO
        GL::bindVAO(0);
    }
    else
    {
        glBindBuffer(GL_ARRAY_BUFFER, 0);
        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
    }

    _batchedCommands.clear();
    _filledVertex = 0;
    _filledIndex = 0;
}

在cocos2d-x中,绘制的顺序通常是这样的: 场景中的所有可见节点按照渲染顺序排序。 引擎遍历所有节点并将它们的顶点数据、纹理信息等打包成一批数据发送到显存。 GPU 根据这批数据执行渲染操作,绘制整个场景。 这个过程中,会将场景中所有节点的顶点数据打包成一批数据一次性发送给显存。这样可以减少 CPU 和 GPU 之间的通信次数,提高渲染效率。

主循环中收集完场景内所有节点的渲染指令后,调用Renderer::render()会将所有节点的顶点数据的和顶点索引赋值添加到_verts[]和_indices[]中,然后把_verts[]和_indices[]上传到CPU,这些数据准备好后就可以调用glDrawElements一次性绘制多个物体,实现合批绘制。

代码如下:

Renderer::render()->visitRenderQueue(_renderGroups[0]);->processRenderCommand()->fillVerticesAndIndices()

void Renderer::fillVerticesAndIndices(const TrianglesCommand* cmd)
{
    memcpy(_verts + _filledVertex, cmd->getVertices(), sizeof(V3F_C4B_T2F) * cmd->getVertexCount());
    const Mat4& modelView = cmd->getModelView();
    
    for(ssize_t i=0; i< cmd->getVertexCount(); ++i)
    {
        V3F_C4B_T2F *q = &_verts[i + _filledVertex];
        Vec3 *vec1 = (Vec3*)&q->vertices;
        modelView.transformPoint(vec1);
    }
    
    const unsigned short* indices = cmd->getIndices();
    //fill index
    for(ssize_t i=0; i< cmd->getIndexCount(); ++i)
    {
        _indices[_filledIndex + i] = _filledVertex + indices[i];
    }
    
    _filledVertex += cmd->getVertexCount();
    _filledIndex += cmd->getIndexCount();
}

void Renderer::processRenderCommand(RenderCommand* command)
{
    auto commandType = command->getType();
    if( RenderCommand::Type::TRIANGLES_COMMAND == commandType)
    {
        //Draw if we have batched other commands which are not triangle command
        flush3D();
        flushQuads();
        
        //Process triangle command
        auto cmd = static_cast<TrianglesCommand*>(command);
        
        //...省略
        
        //Batch Triangles
        _batchedCommands.push_back(cmd);
        
        fillVerticesAndIndices(cmd);
        
        if(cmd->isSkipBatching())
        {
            drawBatchedTriangles();
        }
        
    }
    //else if (RenderCommand::Type::MESH_COMMAND == commandType)
    //...省略剩余代码
    
}

void Renderer::visitRenderQueue(RenderQueue& queue)
{
    queue.saveRenderState();
    
    //
    //Process Global-Z < 0 Objects
    //
    const auto& zNegQueue = queue.getSubQueue(RenderQueue::QUEUE_GROUP::GLOBALZ_NEG);
    if (zNegQueue.size() > 0)
    {
      
           //...省略
        for (auto it = zNegQueue.cbegin(); it != zNegQueue.cend(); ++it)
        {
            processRenderCommand(*it);
        }
        flush();
    }

    //
    //Process Global-Z = 0 Queue
    //
    const auto& zZeroQueue = queue.getSubQueue(RenderQueue::QUEUE_GROUP::GLOBALZ_ZERO);
    if (zZeroQueue.size() > 0)
    {
           //...省略
        for (auto it = zZeroQueue.cbegin(); it != zZeroQueue.cend(); ++it)
        {
            processRenderCommand(*it);
        }
        flush();
    }
    
    //
    //Process Global-Z > 0 Queue
    //
    const auto& zPosQueue = queue.getSubQueue(RenderQueue::QUEUE_GROUP::GLOBALZ_POS);
    if (zPosQueue.size() > 0)
    {
         //...省略
        for (auto it = zPosQueue.cbegin(); it != zPosQueue.cend(); ++it)
        {
            processRenderCommand(*it);
        }
        flush();
    }
    //...省略
    queue.restoreRenderState();

}
void Renderer::render()
{
    //Uncomment this once everything is rendered by new renderer
    //glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);

    //TODO: setup camera or MVP
    _isRendering = true;
    
    if (_glViewAssigned)
    {
        //Process render commands
        //1. Sort render commands based on ID
        for (auto &renderqueue : _renderGroups)
        {
            renderqueue.sort();
        }
        visitRenderQueue(_renderGroups[0]);
    }
    clean();
    _isRendering = false;
}

Logo

这里是一个专注于游戏开发的社区,我们致力于为广大游戏爱好者提供一个良好的学习和交流平台。我们的专区包含了各大流行引擎的技术博文,涵盖了从入门到进阶的各个阶段,无论你是初学者还是资深开发者,都能在这里找到适合自己的内容。除此之外,我们还会不定期举办游戏开发相关的活动,让大家更好地交流互动。加入我们,一起探索游戏开发的奥秘吧!

更多推荐