[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Getfem-commits] r5429 - in /trunk/getfem: contrib/opt_assembly/ src/ te
From: |
Yves . Renard |
Subject: |
[Getfem-commits] r5429 - in /trunk/getfem: contrib/opt_assembly/ src/ tests/ |
Date: |
Thu, 20 Oct 2016 11:27:47 -0000 |
Author: renard
Date: Thu Oct 20 13:27:46 2016
New Revision: 5429
URL: http://svn.gna.org/viewcvs/getfem?rev=5429&view=rev
Log:
further small opitmizations and a fiw on assembly assignmenta
Modified:
trunk/getfem/contrib/opt_assembly/opt_assembly.cc
trunk/getfem/src/bgeot_geometric_trans.cc
trunk/getfem/src/getfem_generic_assembly.cc
trunk/getfem/tests/test_interpolated_fem.cc
Modified: trunk/getfem/contrib/opt_assembly/opt_assembly.cc
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/contrib/opt_assembly/opt_assembly.cc?rev=5429&r1=5428&r2=5429&view=diff
==============================================================================
--- trunk/getfem/contrib/opt_assembly/opt_assembly.cc (original)
+++ trunk/getfem/contrib/opt_assembly/opt_assembly.cc Thu Oct 20 13:27:46 2016
@@ -438,35 +438,35 @@
// - Instructions execution except for assembly ones
// new | old | sto | asse | exec | Ins |
test_new_assembly(2, 400, 1); // ndofu = 321602 ndofp = 160801 ndofchi = 1201
- // Mass (scalar) : 0.23 | 0.61 | 0.04 | 0.07 | 0.10 | 0.06 |
- // Mass (vector) : 0.36 | 0.82 | 0.09 | 0.17 | 0.10 | 0.09 |
- // Laplacian : 0.20 | 0.83 | 0.04 | 0.05 | 0.10 | 0.05 |
- // Homogeneous elas : 0.34 | 1.88 | 0.08 | 0.13 | 0.10 | 0.11 |
- // Non-homogeneous elast: 0.40 | 2.26 | 0.09 | 0.16 | 0.10 | 0.14 |
+ // Mass (scalar) : 0.18 | 0.61 | 0.04 | 0.06 | 0.06 | 0.06 |
+ // Mass (vector) : 0.30 | 0.82 | 0.09 | 0.15 | 0.06 | 0.09 |
+ // Laplacian : 0.16 | 0.83 | 0.04 | 0.05 | 0.06 | 0.05 |
+ // Homogeneous elas : 0.31 | 1.88 | 0.08 | 0.13 | 0.06 | 0.10 |
+ // Non-homogeneous elast: 0.37 | 2.26 | 0.09 | 0.16 | 0.06 | 0.15 |
test_new_assembly(3, 36, 1); // ndofu = 151959 ndofp = 50653 ndofchi = 6553
- // Mass (scalar) : 0.33 | 0.77 | 0.05 | 0.10 | 0.17 | 0.06 |
- // Mass (vector) : 0.86 | 1.54 | 0.17 | 0.34 | 0.17 | 0.35 |
- // Laplacian : 0.37 | 1.38 | 0.03 | 0.06 | 0.17 | 0.14 |
- // Homogeneous elas : 0.98 | 4.58 | 0.26 | 0.34 | 0.17 | 0.47 |
- // Non-homogeneous elast: 1.05 | 6.72 | 0.26 | 0.34 | 0.17 | 0.54 |
+ // Mass (scalar) : 0.28 | 0.77 | 0.05 | 0.09 | 0.13 | 0.06 |
+ // Mass (vector) : 0.76 | 1.54 | 0.17 | 0.29 | 0.13 | 0.34 |
+ // Laplacian : 0.32 | 1.38 | 0.03 | 0.06 | 0.13 | 0.13 |
+ // Homogeneous elas : 0.94 | 4.58 | 0.26 | 0.33 | 0.14 | 0.47 |
+ // Non-homogeneous elast: 1.01 | 6.72 | 0.26 | 0.33 | 0.14 | 0.54 |
test_new_assembly(2, 200, 2); // ndofu = 321602 ndofp = 160801 ndofchi = 1201
- // Mass (scalar) : 0.11 | 0.25 | 0.02 | 0.04 | 0.04 | 0.03 |
- // Mass (vector) : 0.30 | 0.44 | 0.05 | 0.12 | 0.04 | 0.14 |
- // Laplacian : 0.10 | 0.37 | 0.02 | 0.03 | 0.04 | 0.03 |
- // Homogeneous elas : 0.28 | 1.28 | 0.06 | 0.10 | 0.04 | 0.14 |
- // Non-homogeneous elast: 0.34 | 2.40 | 0.07 | 0.11 | 0.04 | 0.19 |
+ // Mass (scalar) : 0.09 | 0.25 | 0.02 | 0.03 | 0.03 | 0.03 |
+ // Mass (vector) : 0.26 | 0.44 | 0.05 | 0.09 | 0.03 | 0.14 |
+ // Laplacian : 0.09 | 0.37 | 0.02 | 0.03 | 0.03 | 0.03 |
+ // Homogeneous elas : 0.26 | 1.28 | 0.06 | 0.09 | 0.03 | 0.14 |
+ // Non-homogeneous elast: 0.31 | 2.40 | 0.07 | 0.10 | 0.03 | 0.18 |
test_new_assembly(3, 18, 2); // ndofu = 151959 ndofp = 50653 ndofchi = 6553
- // Mass (scalar) : 0.15 | 0.29 | 0.05 | 0.09 | 0.03 | 0.03 |
- // Mass (vector) : 1.32 | 0.90 | 0.21 | 0.51 | 0.03 | 0.78 |
+ // Mass (scalar) : 0.13 | 0.29 | 0.05 | 0.07 | 0.03 | 0.03 |
+ // Mass (vector) : 1.18 | 0.90 | 0.21 | 0.37 | 0.03 | 0.78 |
// Laplacian : 0.11 | 0.55 | 0.03 | 0.05 | 0.03 | 0.05 |
- // Homogeneous elas : 1.73 | 3.47 | 0.59 | 0.76 | 0.03 | 0.94 |
- // Non-homogeneous elast: 1.80 | 9.25 | 0.59 | 0.76 | 0.03 | 1.01 |
+ // Homogeneous elas : 1.70 | 3.47 | 0.59 | 0.73 | 0.03 | 0.94 |
+ // Non-homogeneous elast: 1.77 | 9.25 | 0.59 | 0.73 | 0.03 | 1.01 |
test_new_assembly(3, 9, 4); // ndofu = 151959 ndofp = 50653 ndofchi = 6553
- // Mass (scalar) : 0.59 | 0.34 | 0.09 | 0.23 | 0.01 | 0.34 |
- // Mass (vector) : 4.87 | 1.32 | 0.41 | 1.71 | 0.01 | 3.15 |
- // Laplacian : 0.44 | 0.77 | 0.09 | 0.18 | 0.01 | 0.25 |
- // Homogeneous elas : 9.22 | 5.26 | 0.88 | 1.66 | 0.01 | 7.55 |
- // Non-homogeneous elast: 9.29 | 48.0 | 0.76 | 1.56 | 0.01 | 7.72 |
+ // Mass (scalar) : 0.52 | 0.34 | 0.09 | 0.16 | 0.01 | 0.34 |
+ // Mass (vector) : 4.46 | 1.32 | 0.41 | 1.30 | 0.01 | 3.15 |
+ // Laplacian : 0.41 | 0.77 | 0.09 | 0.15 | 0.01 | 0.25 |
+ // Homogeneous elas : 8.99 | 5.26 | 0.88 | 1.43 | 0.01 | 7.55 |
+ // Non-homogeneous elast: 9.14 | 48.0 | 0.76 | 1.41 | 0.01 | 7.72 |
// Conclusions :
// - Desactivation of debug test has no sensible effect.
Modified: trunk/getfem/src/bgeot_geometric_trans.cc
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/bgeot_geometric_trans.cc?rev=5429&r1=5428&r2=5429&view=diff
==============================================================================
--- trunk/getfem/src/bgeot_geometric_trans.cc (original)
+++ trunk/getfem/src/bgeot_geometric_trans.cc Thu Oct 20 13:27:46 2016
@@ -29,24 +29,23 @@
namespace bgeot {
void geometric_trans::compute_K_matrix
- (const base_matrix &G, const base_matrix &pc, base_matrix &K) const{
+ (const base_matrix &G, const base_matrix &pc, base_matrix &K) const {
// gmm::mult(G, pc, K);
-
// Faster than the lapack call on my config ...
size_type N=gmm::mat_nrows(G), P=gmm::mat_nrows(pc), Q=gmm::mat_ncols(pc);
if (N && P && Q) {
auto itK = K.begin();
- for (size_type i = 0; i < N; ++i)
- for (size_type j = 0; j < Q; ++j) {
- auto itG = G.begin() + j;
- auto itpc = pc.begin() + i*P;
- *itK = *(itG) * (*itpc++); itG += N;
+ for (size_type j = 0; j < Q; ++j)
+ for (size_type i = 0; i < N; ++i) {
+ auto itG = G.begin() + i;
+ auto itpc = pc.begin() + j*P;
+ register scalar_type a = *(itG) * (*itpc++); itG += N;
for (size_type k = 1; k < P; ++k, itG += N)
- *itK += *(itG) * (*itpc++);
- ++itK;
+ a += *(itG) * (*itpc++);
+ *itK++ = a;
}
- }
+ } else gmm::clear(K);
}
const base_node& geotrans_interpolation_context::xref() const {
@@ -81,9 +80,9 @@
else {
// J_ = gmm::abs(gmm::lu_det(KK));
if (P <= 2) {
- const scalar_type *p = &(KK(0,0));
- if (P == 1) J_ = gmm::abs(*p);
- else J_ = gmm::abs((*p) * (*(p+3)) - (*(p+1)) * (*(p+2)));
+ auto it = KK.begin();
+ if (P == 1) J_ = gmm::abs(*it);
+ else J_ = gmm::abs((*it) * (it[3]) - (it[1]) * (it[2]));
} else {
B_factors.base_resize(P, P);
gmm::copy(gmm::transposed(KK), B_factors);
Modified: trunk/getfem/src/getfem_generic_assembly.cc
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/getfem_generic_assembly.cc?rev=5429&r1=5428&r2=5429&view=diff
==============================================================================
--- trunk/getfem/src/getfem_generic_assembly.cc (original)
+++ trunk/getfem/src/getfem_generic_assembly.cc Thu Oct 20 13:27:46 2016
@@ -2025,6 +2025,7 @@
std::list<ga_tree> trees; // The trees are stored mainly because they
// contain the intermediary tensors.
+ std::list<ga_tree> interpolation_trees;
typedef std::map<region_mim, region_mim_instructions> instructions_set;
@@ -5594,9 +5595,26 @@
GA_DEBUG_INFO("Instruction: matrix term assembly");
if (ipt == 0 || interpolate) {
elem.resize(t.size());
- gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
+ auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
+ scalar_type e = coeff*alpha1*alpha2;
+ size_type nd = t.size() / 4;
+ for (size_type i = 0; i < nd; ++i) {
+ *it++ = (*itt++) * e; *it++ = (*itt++) * e;
+ *it++ = (*itt++) * e; *it++ = (*itt++) * e;
+ }
+ for (; it != ite;) *it++ = (*itt++) * e;
+ // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
} else {
- gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
+ // Faster than a daxpy blas call on my config
+ auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
+ scalar_type e = coeff*alpha1*alpha2;
+ size_type nd = t.size() / 4;
+ for (size_type i = 0; i < nd; ++i) {
+ *it++ += (*itt++) * e; *it++ += (*itt++) * e;
+ *it++ += (*itt++) * e; *it++ += (*itt++) * e;
+ }
+ for (; it != ite;) *it++ += (*itt++) * e;
+ // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
}
if (ipt == nbpt-1 || interpolate) {
const mesh_fem *pmf1 = mfg1 ? *mfg1 : mfn1;
@@ -5704,9 +5722,26 @@
"scalar fems");
if (ipt == 0) {
elem.resize(t.size());
- gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
+ auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
+ scalar_type e = coeff*alpha1*alpha2;
+ size_type nd = t.size() / 4;
+ for (size_type i = 0; i < nd; ++i) {
+ *it++ = (*itt++) * e; *it++ = (*itt++) * e;
+ *it++ = (*itt++) * e; *it++ = (*itt++) * e;
+ }
+ for (; it != ite;) *it++ = (*itt++) * e;
+ // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
} else {
- gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
+ // Faster than a daxpy blas call on my config
+ auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
+ scalar_type e = coeff*alpha1*alpha2;
+ size_type nd = t.size() / 4;
+ for (size_type i = 0; i < nd; ++i) {
+ *it++ += (*itt++) * e; *it++ += (*itt++) * e;
+ *it++ += (*itt++) * e; *it++ += (*itt++) * e;
+ }
+ for (; it != ite;) *it++ += (*itt++) * e;
+ // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
}
if (ipt == nbpt-1) {
GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
@@ -5774,9 +5809,26 @@
"vector fems");
if (ipt == 0) {
elem.resize(t.size());
- gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
+ auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
+ scalar_type e = coeff*alpha1*alpha2;
+ size_type nd = t.size() / 4;
+ for (size_type i = 0; i < nd; ++i) {
+ *it++ = (*itt++) * e; *it++ = (*itt++) * e;
+ *it++ = (*itt++) * e; *it++ = (*itt++) * e;
+ }
+ for (; it != ite;) *it++ = (*itt++) * e;
+ // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
} else {
- gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
+ // (Far) faster than a daxpy blas call on my config.
+ auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
+ scalar_type e = coeff*alpha1*alpha2;
+ size_type nd = t.size() / 4;
+ for (size_type i = 0; i < nd; ++i) {
+ *it++ += (*itt++) * e; *it++ += (*itt++) * e;
+ *it++ += (*itt++) * e; *it++ += (*itt++) * e;
+ }
+ for (; it != ite;) *it++ += (*itt++) * e;
+ // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
}
if (ipt == nbpt-1) {
GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
@@ -11627,14 +11679,21 @@
((version == 0 && td.order == order) || // Assembly
((version > 0 && (td.order == size_type(-1) || // Assignment
td.order == size_type(-2) - order))))) {
- gis.trees.push_back(*(td.ptree));
-
+ ga_tree *added_tree = 0;
+ if (td.interpolation) {
+ gis.interpolation_trees.push_back(*(td.ptree));
+ added_tree = &(gis.interpolation_trees.back());
+ } else {
+ gis.trees.push_back(*(td.ptree));
+ added_tree = &(gis.trees.back());
+ }
+
// Semantic analysis mainly to evaluate fixed size variables and data
- ga_semantic_analysis("", gis.trees.back(), workspace,
+ ga_semantic_analysis("", *added_tree, workspace,
td.mim->linked_mesh().dim(),
ref_elt_dim_of_mesh(td.mim->linked_mesh()),
true, false);
- pga_tree_node root = gis.trees.back().root;
+ pga_tree_node root = added_tree->root;
if (root) {
// Compile tree
// cout << "Will compile "; ga_print_node(root, cout); cout << endl;
Modified: trunk/getfem/tests/test_interpolated_fem.cc
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/tests/test_interpolated_fem.cc?rev=5429&r1=5428&r2=5429&view=diff
==============================================================================
--- trunk/getfem/tests/test_interpolated_fem.cc (original)
+++ trunk/getfem/tests/test_interpolated_fem.cc Thu Oct 20 13:27:46 2016
@@ -41,7 +41,7 @@
using bgeot::dim_type;
typedef gmm::wsvector<scalar_type> sparse_vector_type;
-typedef gmm::row_matrix<sparse_vector_type> sparse_matrix_type;
+typedef gmm::col_matrix<sparse_vector_type> sparse_matrix_type;
typedef std::vector<scalar_type> linalg_vector;
/**************************************************************************/
@@ -302,7 +302,7 @@
cout << "MM=" << MM << "\n";
cout << "mflnk.nb_dof()=" << mflnk.nb_dof() << ", mf2.nb_dof()="
<< mf2.nb_dof() << ", mf1.nb_dof=" << mf1.nb_dof() << "\n";
- cout << "Matrice de masse\n";
+ cout << "Mass matrix\n";
scalar_type sum = 0.0;
for (size_type i = 0; i < MM.nrows(); i++) {
scalar_type slig = 0;
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Getfem-commits] r5429 - in /trunk/getfem: contrib/opt_assembly/ src/ tests/,
Yves . Renard <=