getfem-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Getfem-commits] r5429 - in /trunk/getfem: contrib/opt_assembly/ src/ te


From: Yves . Renard
Subject: [Getfem-commits] r5429 - in /trunk/getfem: contrib/opt_assembly/ src/ tests/
Date: Thu, 20 Oct 2016 11:27:47 -0000

Author: renard
Date: Thu Oct 20 13:27:46 2016
New Revision: 5429

URL: http://svn.gna.org/viewcvs/getfem?rev=5429&view=rev
Log:
further small opitmizations and a fiw on assembly assignmenta

Modified:
    trunk/getfem/contrib/opt_assembly/opt_assembly.cc
    trunk/getfem/src/bgeot_geometric_trans.cc
    trunk/getfem/src/getfem_generic_assembly.cc
    trunk/getfem/tests/test_interpolated_fem.cc

Modified: trunk/getfem/contrib/opt_assembly/opt_assembly.cc
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/contrib/opt_assembly/opt_assembly.cc?rev=5429&r1=5428&r2=5429&view=diff
==============================================================================
--- trunk/getfem/contrib/opt_assembly/opt_assembly.cc   (original)
+++ trunk/getfem/contrib/opt_assembly/opt_assembly.cc   Thu Oct 20 13:27:46 2016
@@ -438,35 +438,35 @@
   // - Instructions execution except for assembly ones
   //                        new  | old  | sto  | asse | exec | Ins  |
   test_new_assembly(2, 400, 1); // ndofu = 321602 ndofp = 160801 ndofchi = 1201
-  // Mass (scalar)        : 0.23 | 0.61 | 0.04 | 0.07 | 0.10 | 0.06 |
-  // Mass (vector)        : 0.36 | 0.82 | 0.09 | 0.17 | 0.10 | 0.09 |
-  // Laplacian            : 0.20 | 0.83 | 0.04 | 0.05 | 0.10 | 0.05 |
-  // Homogeneous elas     : 0.34 | 1.88 | 0.08 | 0.13 | 0.10 | 0.11 |
-  // Non-homogeneous elast: 0.40 | 2.26 | 0.09 | 0.16 | 0.10 | 0.14 |
+  // Mass (scalar)        : 0.18 | 0.61 | 0.04 | 0.06 | 0.06 | 0.06 |
+  // Mass (vector)        : 0.30 | 0.82 | 0.09 | 0.15 | 0.06 | 0.09 |
+  // Laplacian            : 0.16 | 0.83 | 0.04 | 0.05 | 0.06 | 0.05 |
+  // Homogeneous elas     : 0.31 | 1.88 | 0.08 | 0.13 | 0.06 | 0.10 |
+  // Non-homogeneous elast: 0.37 | 2.26 | 0.09 | 0.16 | 0.06 | 0.15 |
   test_new_assembly(3, 36, 1);  // ndofu = 151959 ndofp =  50653 ndofchi = 6553
-  // Mass (scalar)        : 0.33 | 0.77 | 0.05 | 0.10 | 0.17 | 0.06 |
-  // Mass (vector)        : 0.86 | 1.54 | 0.17 | 0.34 | 0.17 | 0.35 |
-  // Laplacian            : 0.37 | 1.38 | 0.03 | 0.06 | 0.17 | 0.14 |
-  // Homogeneous elas     : 0.98 | 4.58 | 0.26 | 0.34 | 0.17 | 0.47 |
-  // Non-homogeneous elast: 1.05 | 6.72 | 0.26 | 0.34 | 0.17 | 0.54 |
+  // Mass (scalar)        : 0.28 | 0.77 | 0.05 | 0.09 | 0.13 | 0.06 |
+  // Mass (vector)        : 0.76 | 1.54 | 0.17 | 0.29 | 0.13 | 0.34 |
+  // Laplacian            : 0.32 | 1.38 | 0.03 | 0.06 | 0.13 | 0.13 |
+  // Homogeneous elas     : 0.94 | 4.58 | 0.26 | 0.33 | 0.14 | 0.47 |
+  // Non-homogeneous elast: 1.01 | 6.72 | 0.26 | 0.33 | 0.14 | 0.54 |
   test_new_assembly(2, 200, 2); // ndofu = 321602 ndofp = 160801 ndofchi = 1201
-  // Mass (scalar)        : 0.11 | 0.25 | 0.02 | 0.04 | 0.04 | 0.03 |
-  // Mass (vector)        : 0.30 | 0.44 | 0.05 | 0.12 | 0.04 | 0.14 |
-  // Laplacian            : 0.10 | 0.37 | 0.02 | 0.03 | 0.04 | 0.03 |
-  // Homogeneous elas     : 0.28 | 1.28 | 0.06 | 0.10 | 0.04 | 0.14 |
-  // Non-homogeneous elast: 0.34 | 2.40 | 0.07 | 0.11 | 0.04 | 0.19 |
+  // Mass (scalar)        : 0.09 | 0.25 | 0.02 | 0.03 | 0.03 | 0.03 |
+  // Mass (vector)        : 0.26 | 0.44 | 0.05 | 0.09 | 0.03 | 0.14 |
+  // Laplacian            : 0.09 | 0.37 | 0.02 | 0.03 | 0.03 | 0.03 |
+  // Homogeneous elas     : 0.26 | 1.28 | 0.06 | 0.09 | 0.03 | 0.14 |
+  // Non-homogeneous elast: 0.31 | 2.40 | 0.07 | 0.10 | 0.03 | 0.18 |
   test_new_assembly(3, 18, 2);  // ndofu = 151959 ndofp =  50653 ndofchi = 6553
-  // Mass (scalar)        : 0.15 | 0.29 | 0.05 | 0.09 | 0.03 | 0.03 |
-  // Mass (vector)        : 1.32 | 0.90 | 0.21 | 0.51 | 0.03 | 0.78 |
+  // Mass (scalar)        : 0.13 | 0.29 | 0.05 | 0.07 | 0.03 | 0.03 |
+  // Mass (vector)        : 1.18 | 0.90 | 0.21 | 0.37 | 0.03 | 0.78 |
   // Laplacian            : 0.11 | 0.55 | 0.03 | 0.05 | 0.03 | 0.05 |
-  // Homogeneous elas     : 1.73 | 3.47 | 0.59 | 0.76 | 0.03 | 0.94 |
-  // Non-homogeneous elast: 1.80 | 9.25 | 0.59 | 0.76 | 0.03 | 1.01 |
+  // Homogeneous elas     : 1.70 | 3.47 | 0.59 | 0.73 | 0.03 | 0.94 |
+  // Non-homogeneous elast: 1.77 | 9.25 | 0.59 | 0.73 | 0.03 | 1.01 |
   test_new_assembly(3, 9, 4);   // ndofu = 151959 ndofp =  50653 ndofchi = 6553
-  // Mass (scalar)        : 0.59 | 0.34 | 0.09 | 0.23 | 0.01 | 0.34 |
-  // Mass (vector)        : 4.87 | 1.32 | 0.41 | 1.71 | 0.01 | 3.15 |
-  // Laplacian            : 0.44 | 0.77 | 0.09 | 0.18 | 0.01 | 0.25 |
-  // Homogeneous elas     : 9.22 | 5.26 | 0.88 | 1.66 | 0.01 | 7.55 |
-  // Non-homogeneous elast: 9.29 | 48.0 | 0.76 | 1.56 | 0.01 | 7.72 |
+  // Mass (scalar)        : 0.52 | 0.34 | 0.09 | 0.16 | 0.01 | 0.34 |
+  // Mass (vector)        : 4.46 | 1.32 | 0.41 | 1.30 | 0.01 | 3.15 |
+  // Laplacian            : 0.41 | 0.77 | 0.09 | 0.15 | 0.01 | 0.25 |
+  // Homogeneous elas     : 8.99 | 5.26 | 0.88 | 1.43 | 0.01 | 7.55 |
+  // Non-homogeneous elast: 9.14 | 48.0 | 0.76 | 1.41 | 0.01 | 7.72 |
 
   // Conclusions :
   // - Desactivation of debug test has no sensible effect.

Modified: trunk/getfem/src/bgeot_geometric_trans.cc
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/bgeot_geometric_trans.cc?rev=5429&r1=5428&r2=5429&view=diff
==============================================================================
--- trunk/getfem/src/bgeot_geometric_trans.cc   (original)
+++ trunk/getfem/src/bgeot_geometric_trans.cc   Thu Oct 20 13:27:46 2016
@@ -29,24 +29,23 @@
 namespace bgeot {
 
   void geometric_trans::compute_K_matrix
-    (const base_matrix &G, const base_matrix &pc, base_matrix &K) const{
+    (const base_matrix &G, const base_matrix &pc, base_matrix &K) const {
     // gmm::mult(G, pc, K);
-
     // Faster than the lapack call on my config ...
     size_type N=gmm::mat_nrows(G), P=gmm::mat_nrows(pc), Q=gmm::mat_ncols(pc);
     if (N && P && Q) {
       auto itK = K.begin();
       
-      for (size_type i = 0; i < N; ++i)
-       for (size_type j = 0; j < Q; ++j) {
-         auto itG = G.begin() + j;
-         auto itpc = pc.begin() + i*P;
-         *itK = *(itG) * (*itpc++); itG += N;
+      for (size_type j = 0; j < Q; ++j)
+       for (size_type i = 0; i < N; ++i) {
+         auto itG = G.begin() + i;
+         auto itpc = pc.begin() + j*P;
+         register scalar_type a = *(itG) * (*itpc++); itG += N;
          for (size_type k = 1; k < P; ++k, itG += N)
-           *itK += *(itG) * (*itpc++);
-         ++itK;
+           a += *(itG) * (*itpc++);
+         *itK++ = a;
        }
-    }
+    } else gmm::clear(K);
   }
 
   const base_node& geotrans_interpolation_context::xref() const {
@@ -81,9 +80,9 @@
     else {
       // J_ = gmm::abs(gmm::lu_det(KK));
       if (P <= 2) {
-       const scalar_type *p = &(KK(0,0));
-       if (P == 1) J_ = gmm::abs(*p);
-       else J_ = gmm::abs((*p) * (*(p+3)) - (*(p+1)) * (*(p+2)));
+       auto it = KK.begin();
+       if (P == 1) J_ = gmm::abs(*it);
+       else J_ = gmm::abs((*it) * (it[3]) - (it[1]) * (it[2]));
       } else {
        B_factors.base_resize(P, P);
        gmm::copy(gmm::transposed(KK), B_factors);

Modified: trunk/getfem/src/getfem_generic_assembly.cc
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/getfem_generic_assembly.cc?rev=5429&r1=5428&r2=5429&view=diff
==============================================================================
--- trunk/getfem/src/getfem_generic_assembly.cc (original)
+++ trunk/getfem/src/getfem_generic_assembly.cc Thu Oct 20 13:27:46 2016
@@ -2025,6 +2025,7 @@
 
     std::list<ga_tree> trees; // The trees are stored mainly because they
                               // contain the intermediary tensors.
+    std::list<ga_tree> interpolation_trees;
 
     typedef std::map<region_mim, region_mim_instructions> instructions_set;
 
@@ -5594,9 +5595,26 @@
       GA_DEBUG_INFO("Instruction: matrix term assembly");
       if (ipt == 0 || interpolate) {
         elem.resize(t.size());
-        gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
+       auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
+       scalar_type e = coeff*alpha1*alpha2;
+       size_type nd = t.size() / 4;
+       for (size_type i = 0; i < nd; ++i) {
+         *it++ = (*itt++) * e; *it++ = (*itt++) * e;
+         *it++ = (*itt++) * e; *it++ = (*itt++) * e;
+       }
+       for (; it != ite;) *it++ = (*itt++) * e;
+        // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
       } else {
-        gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
+       // Faster than a daxpy blas call on my config
+       auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
+       scalar_type e = coeff*alpha1*alpha2;
+       size_type nd = t.size() / 4;
+       for (size_type i = 0; i < nd; ++i) {
+         *it++ += (*itt++) * e; *it++ += (*itt++) * e;
+         *it++ += (*itt++) * e; *it++ += (*itt++) * e;
+       }
+       for (; it != ite;) *it++ += (*itt++) * e;
+        // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
       }
       if (ipt == nbpt-1 || interpolate) {
         const mesh_fem *pmf1 = mfg1 ? *mfg1 : mfn1;
@@ -5704,9 +5722,26 @@
                     "scalar fems");
       if (ipt == 0) {
         elem.resize(t.size());
-        gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
+       auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
+       scalar_type e = coeff*alpha1*alpha2;
+       size_type nd = t.size() / 4;
+       for (size_type i = 0; i < nd; ++i) {
+         *it++ = (*itt++) * e; *it++ = (*itt++) * e;
+         *it++ = (*itt++) * e; *it++ = (*itt++) * e;
+       }
+       for (; it != ite;) *it++ = (*itt++) * e;
+        // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
       } else {
-        gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
+       // Faster than a daxpy blas call on my config
+       auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
+       scalar_type e = coeff*alpha1*alpha2;
+       size_type nd = t.size() / 4;
+       for (size_type i = 0; i < nd; ++i) {
+         *it++ += (*itt++) * e; *it++ += (*itt++) * e;
+         *it++ += (*itt++) * e; *it++ += (*itt++) * e;
+       }
+       for (; it != ite;) *it++ += (*itt++) * e;
+        // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
       }
       if (ipt == nbpt-1) {
         GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
@@ -5774,9 +5809,26 @@
                         "vector fems");
       if (ipt == 0) {
         elem.resize(t.size());
-        gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
+       auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
+       scalar_type e = coeff*alpha1*alpha2;
+       size_type nd = t.size() / 4;
+       for (size_type i = 0; i < nd; ++i) {
+         *it++ = (*itt++) * e; *it++ = (*itt++) * e;
+         *it++ = (*itt++) * e; *it++ = (*itt++) * e;
+       }
+       for (; it != ite;) *it++ = (*itt++) * e;
+        // gmm::copy(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
       } else {
-        gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
+       // (Far) faster than a daxpy blas call on my config.
+       auto itt = t.begin(); auto it = elem.begin(), ite = elem.end();
+       scalar_type e = coeff*alpha1*alpha2;
+       size_type nd = t.size() / 4;
+       for (size_type i = 0; i < nd; ++i) {
+         *it++ += (*itt++) * e; *it++ += (*itt++) * e;
+         *it++ += (*itt++) * e; *it++ += (*itt++) * e;
+       }
+       for (; it != ite;) *it++ += (*itt++) * e;
+        // gmm::add(gmm::scaled(t.as_vector(), coeff*alpha1*alpha2), elem);
       }
       if (ipt == nbpt-1) {
         GA_DEBUG_ASSERT(I1.size() && I2.size(), "Internal error");
@@ -11627,14 +11679,21 @@
            ((version == 0 && td.order == order) || // Assembly
             ((version > 0 && (td.order == size_type(-1) || // Assignment
                                td.order == size_type(-2) - order))))) {
-         gis.trees.push_back(*(td.ptree));
-         
+         ga_tree *added_tree = 0;
+         if (td.interpolation) {
+           gis.interpolation_trees.push_back(*(td.ptree));
+           added_tree = &(gis.interpolation_trees.back());
+         } else {
+           gis.trees.push_back(*(td.ptree));
+           added_tree = &(gis.trees.back());
+         }
+
          // Semantic analysis mainly to evaluate fixed size variables and data
-         ga_semantic_analysis("", gis.trees.back(), workspace,
+         ga_semantic_analysis("", *added_tree, workspace,
                               td.mim->linked_mesh().dim(),
                               ref_elt_dim_of_mesh(td.mim->linked_mesh()),
                               true, false);
-         pga_tree_node root = gis.trees.back().root;
+         pga_tree_node root = added_tree->root;
          if (root) {
            // Compile tree
            // cout << "Will compile "; ga_print_node(root, cout); cout << endl;

Modified: trunk/getfem/tests/test_interpolated_fem.cc
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/tests/test_interpolated_fem.cc?rev=5429&r1=5428&r2=5429&view=diff
==============================================================================
--- trunk/getfem/tests/test_interpolated_fem.cc (original)
+++ trunk/getfem/tests/test_interpolated_fem.cc Thu Oct 20 13:27:46 2016
@@ -41,7 +41,7 @@
 using bgeot::dim_type;
 
 typedef gmm::wsvector<scalar_type> sparse_vector_type;
-typedef gmm::row_matrix<sparse_vector_type> sparse_matrix_type;
+typedef gmm::col_matrix<sparse_vector_type> sparse_matrix_type;
 typedef std::vector<scalar_type> linalg_vector;
 
 /**************************************************************************/
@@ -302,7 +302,7 @@
   cout << "MM=" << MM << "\n";
   cout << "mflnk.nb_dof()=" << mflnk.nb_dof() << ", mf2.nb_dof()="
        << mf2.nb_dof() << ", mf1.nb_dof=" << mf1.nb_dof() << "\n";
-  cout << "Matrice de masse\n";
+  cout << "Mass matrix\n";
   scalar_type sum = 0.0; 
   for (size_type i = 0; i < MM.nrows(); i++) { 
     scalar_type slig = 0;




reply via email to

[Prev in Thread] Current Thread [Next in Thread]