I noticed when implementing the dot product between two sparse vectors in a multi-threaded for loop, that the efficiency of the multi-threading (as measured by the ratio between the CPU time and wall-time) is very bad.
I tried to implement the dot product myself as below, in a multi-threaded way, but still with no improvement! float Dot(Svec v1, Svec& v2) // Svec is the GMM Sparse vector
{
typename gmm::linalg_traits<Svec>::const_iterator its = gmm::vect_const_begin(v1);
typename gmm::linalg_traits<Svec>::const_iterator ite = gmm::vect_const_end(v1);
typename gmm::linalg_traits<Svec>::const_iterator it;
float result=0.0f;
omp_set_num_threads(8);
#pragma omp parallel for default(shared)
for(it=its;it!=ite;it++)
{
int i=it.index() ;
#pragma omp atomic
result += real( conj(v1[i]) * v2[i] );
}
return result;
}