[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Getfem-commits] r5376 - in /trunk/getfem: contrib/opt_assembly/opt_asse
From: |
Yves . Renard |
Subject: |
[Getfem-commits] r5376 - in /trunk/getfem: contrib/opt_assembly/opt_assembly.cc src/getfem_generic_assembly.cc src/gmm/gmm_vector.h |
Date: |
Mon, 03 Oct 2016 13:40:46 -0000 |
Author: renard
Date: Mon Oct 3 15:40:45 2016
New Revision: 5376
URL: http://svn.gna.org/viewcvs/getfem?rev=5376&view=rev
Log:
small optimizations
Modified:
trunk/getfem/contrib/opt_assembly/opt_assembly.cc
trunk/getfem/src/getfem_generic_assembly.cc
trunk/getfem/src/gmm/gmm_vector.h
Modified: trunk/getfem/contrib/opt_assembly/opt_assembly.cc
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/contrib/opt_assembly/opt_assembly.cc?rev=5376&r1=5375&r2=5376&view=diff
==============================================================================
--- trunk/getfem/contrib/opt_assembly/opt_assembly.cc (original)
+++ trunk/getfem/contrib/opt_assembly/opt_assembly.cc Mon Oct 3 15:40:45 2016
@@ -128,15 +128,17 @@
#define MAT_TEST_1(title, ndof1, ndof2, expr, mim_, I1_, I2_, old_asm) \
cout << "\n" << title << endl; \
+ getfem::model_real_sparse_matrix K(ndof1, ndof2), K2(ndof1, ndof2); \
+ getfem::model_real_sparse_matrix K3(I1_.last()+1, I2_.last()+1); \
workspace.clear_expressions(); \
- workspace.add_expression(expr, mim_); \
- ch.init(); ch.tic(); workspace.assembly(2); ch.toc(); \
+ workspace.set_assembled_matrix(K3); \
+ workspace.add_expression(expr, mim_);
\
+ ch.init(); ch.tic(); workspace.assembly(2); ch.toc();
\
cout << "Elapsed time for new assembly " << ch.elapsed() << endl; \
- getfem::model_real_sparse_matrix K(ndof1, ndof2), K2(ndof1, ndof2); \
ch.init(); ch.tic(); old_asm; ch.toc(); \
gmm::copy(K, K2); \
cout << "Elapsed time for old assembly " << ch.elapsed() << endl; \
- gmm::add(gmm::scaled(gmm::sub_matrix(workspace.assembled_matrix(), \
+ gmm::add(gmm::scaled(gmm::sub_matrix(K3, \
I1_, I2_), scalar_type(-1)), K); \
scalar_type norm_error = gmm::mat_norminf(K); \
cout << "Error : " << norm_error << endl;
@@ -410,22 +412,59 @@
}
-// 1 + Evaluation of the matrix storage computational time
-// 2 + Evaluation of the assembly time
-// 3 + Evaluation of the global instruction time
-// 4 + Resizing opération computational cost
-// 6 - fem_interpolation_context cost
-
int main(int /* argc */, char * /* argv */[]) {
GMM_SET_EXCEPTION_DEBUG; // Exceptions make a memory fault, to debug.
FE_ENABLE_EXCEPT; // Enable floating point exception for Nan.
- // Mesured times for new assembly, old one, alterantive new assembly,
+ // Mesured times for new assembly, old one,
// storage estimate part for the new assembly, global assembly part,
// ga_exec cost (instructions not executed), J computation, resizing
// instruction cost.
+ // new | old | sto | asse | exec | J |resize|
+ test_new_assembly(2, 400, 1); // ndofu = 321602 ndofp = 160801 ndofchi = 1201
+ // Mass : 0.87 | 0.86 | 0.19 | 0.32 | 0.26 | 0.09 | 0.16 |
+ // Laplacian : 0.47 | 0.85 | 0.10 | 0.16 | 0.19 | 0.08 | 0.03 |
+ // Homogeneous elas : 0.77 | 1.91 | 0.23 | 0.30 | 0.18 | 0.07 | 0.08 |
+ // Non-homogeneous elast: 0.94 | 2.32 | 0.26 | 0.32 | 0.18 | 0.08 | 0.08 |
+ test_new_assembly(3, 36, 1); // ndofu = 151959 ndofp = 50653 ndofchi = 6553
+ // Mass : 1.46 | 1.68 | 0.34 | 0.54 | 0.31 | 0.15 | 0.12 |
+ // Laplacian : 0.90 | 1.51 | 0.10 | 0.17 | 0.24 | 0.14 | 0.06 |
+ // Homogeneous elas : 1.94 | 4.77 | 0.88 | 0.95 | 0.24 | 0.14 | 0.11 |
+ // Non-homogeneous elast: 2.06 | 6.81 | 0.74 | 0.86 | 0.24 | 0.14 | 0.11 |
+ test_new_assembly(2, 200, 2); // ndofu = 321602 ndofp = 160801 ndofchi = 1201
+ // Mass : 0.49 | 0.45 | 0.14 | 0.22 | 0.07 | 0.03 | 0.01 |
+ // Laplacian : 0.24 | 0.38 | 0.06 | 0.10 | 0.06 | 0.03 | 0.03 |
+ // Homogeneous elas : 0.62 | 1.28 | 0.22 | 0.25 | 0.05 | 0.02 | 0.11 |
+ // Non-homogeneous elast: 0.72 | 2.43 | 0.23 | 0.28 | 0.05 | 0.02 | 0.11 |
+ test_new_assembly(3, 18, 2); // ndofu = 151959 ndofp = 50653 ndofchi = 6553
+ // Mass : 2.00 | 0.90 | 0.27 | 0.63 | 0.05 | 0.02 | 0.27 |
+ // Laplacian : 0.30 | 0.58 | 0.16 | 0.17 | 0.04 | 0.02 | 0.02 |
+ // Homogeneous elas : 2.50 | 3.48 | 1.53 | 1.72 | 0.04 | 0.02 | 0.35 |
+ // Non-homogeneous elast: 2.55 | 9.32 | 1.48 | 1.68 | 0.04 | 0.02 | 0.37 |
+ test_new_assembly(3, 9, 4); // ndofu = 151959 ndofp = 50653 ndofchi = 6553
+ // Mass : 6.64 | 1.33 | 0.65 | 1.77 | 0.01 | .005 | 0.21 |
+ // Laplacian : 0.78 | 0.79 | 0.32 | 0.43 | 0.01 | .005 | 0.07 |
+ // Homogeneous elas : 10.2 | 5.52 | 0.90 | 1.69 | 0.01 | .005 | 2.50 |
+ // Non-homogeneous elast: 10.1 | 48.0 | 0.95 | 1.48 | 0.01 | .005 | 2.45 |
+
+ // Conclusions :
+ // Desactivation of debug test has no sensible effect.
+ // Compile time of assembly strings is negligible (< 0.0004)
+ // J computation takes half the computational time of the exec part
+ // The optimized instruction call is negligible
+
+
+ // Possible optimizations (focusing on a case)
+ // - Optimization of J computation (especially in 2D and standard cases)
+ // - Unroll loop in used instructions
+ // - Assembly optimization
+ // - storage optimization (matrices ...)
+ // - Fem interpolation context optimization.
+
+ // Original table :
+#if 0
// new | old | sto | asse | exec | J |resize|
test_new_assembly(2, 400, 1);
// Mass : 0.94 | 0.93 | 0.19 | 0.32 | 0.26 | 0.09 | 0.16 |
@@ -452,20 +491,7 @@
// Laplacian : 0.90 | 0.89 | 0.32 | 0.43 | 0.01 | .005 | 0.07 |
// Homogeneous elas : 11.1 | 6.65 | 0.90 | 1.69 | 0.01 | .005 | 2.50 |
// Non-homogeneous elast: 11.0 | 49.1 | 0.95 | 1.48 | 0.01 | .005 | 2.45 |
-
- // Conclusions :
- // Desactivation of debug test has no sensible effect.
- // Compile time is negligible (< 0.0004)
- // J computation takes half the computational time of the exec part
- // The optimized instruction call is negligible
-
-
- // Possible optimizations (focusing on a case)
- // - Optimization of J computation (especially in 2D and standard cases)
- // - Unroll lop in used instructions
- // - Assembly optimization
- // - storage optimization (matrices ...)
- // - Fem interpolation context optimization.
+#endif
return 0;
}
Modified: trunk/getfem/src/getfem_generic_assembly.cc
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/getfem_generic_assembly.cc?rev=5376&r1=5375&r2=5376&view=diff
==============================================================================
--- trunk/getfem/src/getfem_generic_assembly.cc (original)
+++ trunk/getfem/src/getfem_generic_assembly.cc Mon Oct 3 15:40:45 2016
@@ -5210,6 +5210,7 @@
for (const size_type &dof1 : dofs1) {
if (gmm::abs(*it) > threshold)
K(dof1, dof2) += *it;
+ // K.col(dof2).wa(dof1, *it);
++it;
}
}
Modified: trunk/getfem/src/gmm/gmm_vector.h
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/gmm/gmm_vector.h?rev=5376&r1=5375&r2=5376&view=diff
==============================================================================
--- trunk/getfem/src/gmm/gmm_vector.h (original)
+++ trunk/getfem/src/gmm/gmm_vector.h Mon Oct 3 15:40:45 2016
@@ -65,9 +65,9 @@
inline bool operator !=(std::complex<T> v) const
{ return ((*pm).r(l) != v); }
inline ref_elt_vector &operator +=(T v)
- { (*pm).w(l,(*pm).r(l) + v); return *this; }
+ { (*pm).wa(l, v); return *this; }
inline ref_elt_vector &operator -=(T v)
- { (*pm).w(l,(*pm).r(l) - v); return *this; }
+ { (*pm).wa(l, -v); return *this; }
inline ref_elt_vector &operator /=(T v)
{ (*pm).w(l,(*pm).r(l) / v); return *this; }
inline ref_elt_vector &operator *=(T v)
@@ -572,6 +572,9 @@
else *(write_access(c)) = e;
}
+ inline void wa(size_type c, const T &e)
+ { if (e != T(0)) { if (read_access(c)) *(write_access(c)) += e; } }
+
inline T r(size_type c) const
{ const T *p = read_access(c); if (p) return *p; else return T(0); }
@@ -743,6 +746,15 @@
else base_type::operator [](c) = e;
}
+ inline void wa(size_type c, const T &e) {
+ GMM_ASSERT2(c < nbl, "out of range");
+ if (e != T(0)) {
+ iterator it = this->lower_bound(c);
+ if (it != this->end()) it->second += e;
+ else base_type::operator [](c) = e;
+ }
+ }
+
inline T r(size_type c) const {
GMM_ASSERT2(c < nbl, "out of range");
const_iterator it = this->lower_bound(c);
@@ -866,7 +878,7 @@
template<typename T> struct elt_rsvector_ {
size_type c; T e;
- /* e is initialized by default to avoid some false warnings of valgrind..
+ /* e is initialized by default to avoid some false warnings of valgrind.
(from http://valgrind.org/docs/manual/mc-manual.html:
When memory is read into the CPU's floating point registers, the
@@ -968,6 +980,7 @@
{ return ref_elt_vector<T, rsvector<T> >(this, c); }
void w(size_type c, const T &e);
+ void wa(size_type c, const T &e);
T r(size_type c) const;
void swap_indices(size_type i, size_type j);
@@ -1020,7 +1033,7 @@
iterator it = std::lower_bound(this->begin(), this->end(), ev);
if (it != this->end() && it->c == j) {
for (iterator ite = this->end() - 1; it != ite; ++it) *it = *(it+1);
- base_type_::resize(nb_stored()-1);
+ base_resize(nb_stored()-1);
}
}
}
@@ -1039,7 +1052,7 @@
else {
elt_rsvector_<T> ev(c, e);
if (nb_stored() == 0) {
- base_type_::resize(1,ev);
+ base_type_::reserve(16); base_type_::resize(1,ev);
}
else {
iterator it = std::lower_bound(this->begin(), this->end(), ev);
@@ -1052,8 +1065,35 @@
base_type_::resize(nb_stored()+1, ev);
if (ind != this->nb_stored() - 1) {
it = this->begin() + ind;
- for (iterator ite = this->end() - 1; ite != it; --ite)
- *ite = *(ite-1);
+ iterator ite = this->end(); --ite; iterator itee = ite; --itee;
+ for (; ite != it; --ite, --itee) *ite = *itee;
+ *it = ev;
+ }
+ }
+ }
+ }
+ }
+
+ template <typename T> void rsvector<T>::wa(size_type c, const T &e) {
+ GMM_ASSERT2(c < nbl, "out of range");
+ if (e != T(0)) {
+ elt_rsvector_<T> ev(c, e);
+ if (nb_stored() == 0) {
+ base_type_::reserve(16); base_type_::resize(1, ev);
+ }
+ else {
+ iterator it = std::lower_bound(this->begin(), this->end(), ev);
+ if (it != this->end() && it->c == c) it->e += e;
+ else {
+ size_type ind = it - this->begin();
+ if (this->nb_stored() - ind > 1100)
+ GMM_WARNING2("Inefficient addition of element in rsvector with "
+ << this->nb_stored() - ind << " non-zero entries");
+ base_type_::resize(nb_stored()+1, ev);
+ if (ind != this->nb_stored() - 1) {
+ it = this->begin() + ind;
+ iterator ite = this->end(); --ite; iterator itee = ite; --itee;
+ for (; ite != it; --ite, --itee) *ite = *itee;
*it = ev;
}
}
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Getfem-commits] r5376 - in /trunk/getfem: contrib/opt_assembly/opt_assembly.cc src/getfem_generic_assembly.cc src/gmm/gmm_vector.h,
Yves . Renard <=