c++optimizationvector

Why are these functions performing so differently?


I made two functions for adding vectors together so that I can add add together sine waves, one of them adds with the + operator and the other uses a lambda function to add them, which makes it easier for me to make functions for multiple operations. one of them takes 5 microseconds, the other takes around 17000.

#include <iostream>
#include <vector>
#include <stdint.h>
#include <numbers>
#include <functional>
#include <chrono>
#include <cmath>

using i16 = int16_t;

constexpr float sample_rate = 44100;

std::vector<i16> sineWave(float frequency, float amplitude, float duration){
    int sampleCount = duration*sample_rate;
    std::vector<i16> samples(sampleCount);
    
    for(float i = 0;i<sampleCount;i++){
        samples[i] = static_cast<i16>(std::sin(2.0f*std::numbers::pi*frequency*(i/sample_rate))*amplitude);
    }
    
    return samples;
    
}

std::vector<i16> waveAdd(std::vector<i16>& wave1, std::vector<i16>& wave2){
    if(wave1.size() > wave2.size()){
        wave1.resize(wave2.size());
    }
    else if(wave2.size() > wave1.size()){
        wave2.resize(wave1.size());
    }
    
    std::vector<i16> final(wave1.size());
    for(size_t i = 0;i<wave1.size();i++){
        final[i] = wave1[i]+wave2[i];
    }
    return final;
}

std::vector<i16> waveOperation(std::vector<i16>& wave1, std::vector<i16>& wave2, std::function<i16(i16,i16)> operation){
    if(wave1.size() > wave2.size()){
        wave1.resize(wave2.size());
    }
    else if(wave2.size() > wave1.size()){
        wave2.resize(wave1.size());
    }
    
    std::vector<i16> final(wave1.size());
    for(size_t i = 0;i<wave1.size();i++){
        final[i] = operation(wave1[i],wave2[i]);
    }
    return final;
}

int main(){
    
    auto a = sineWave(10000,1000,20);
    
    auto b = sineWave(1000,2000,20);
    
    auto start1 = std::chrono::high_resolution_clock::now();
    auto d = waveAdd(a,b);
    auto end1 = std::chrono::high_resolution_clock::now();
   
    auto start2 = std::chrono::high_resolution_clock::now();
    auto c = waveOperation(a,b,[] (i16 a, i16 b) { return (a+b); } );
    auto end2 = std::chrono::high_resolution_clock::now();
    
    auto duration1 = std::chrono::duration_cast<std::chrono::microseconds>(end1-start1);
    auto duration2 = std::chrono::duration_cast<std::chrono::microseconds>(end2-start2);
    
    std::cout << "+ operator: " << duration1.count() << std::endl;
    std::cout << "lambda: " << duration2.count() << std::endl; 
    
}

Is there a way that I can even this out, or would it be better to just make an equalize length helper function then copy and paste and change the operator?

I did run this in cpp.sh and not on my machine, but I don't think that makes much of a difference on the time it takes.


Solution

  • is there a way that I can even this out, or would it be better to just make an equalize length helper function then copy and paste and change the operator?

    First, understand that std::function carries a runtime performance cost. Its type-erasure is not free!

    Second, your lambda function moves compile-time information to runtime. The compiler can optimize multiple adds, but will have a much harder time optimizing multiple calls to an arbitrary std::function::operator().

    If you know what lambda you want at compile-time, pass the specific lambda you are using.

    Either as a compile-time template parameter:

    template<auto operation>
    std::vector<i16> waveOperation(std::vector<i16>& wave1, std::vector<i16>& wave2){
        if(wave1.size() > wave2.size()){
            wave1.resize(wave2.size());
        }
        else if(wave2.size() > wave1.size()){
            wave2.resize(wave1.size());
        }
        
        std::vector<i16> final(wave1.size());
        for(size_t i = 0;i<wave1.size();i++){
            final[i] = operation(wave1[i],wave2[i]);
        }
        return final;
    }
    

    See the new results

    Or as a deduced template type:

    std::vector<i16> waveOperation(std::vector<i16>& wave1, auto& wave2, auto && operation ){
        if(wave1.size() > wave2.size()){
            wave1.resize(wave2.size());
        }
        else if(wave2.size() > wave1.size()){
            wave2.resize(wave1.size());
        }
        
        std::vector<i16> final(wave1.size());
        for(size_t i = 0;i<wave1.size();i++){
            final[i] = operation(wave1[i],wave2[i]);
        }
        return final;
    }
    

    See the new results

    (Third, std::chrono::high_resolution_clock is never correct. The clock for timing things is spelled std::chrono::steady_clock)