getfem-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Getfem-commits] r5381 - in /trunk/getfem: contrib/opt_assembly/opt_asse


From: Yves . Renard
Subject: [Getfem-commits] r5381 - in /trunk/getfem: contrib/opt_assembly/opt_assembly.cc src/getfem_generic_assembly.cc
Date: Tue, 04 Oct 2016 08:13:59 -0000

Author: renard
Date: Tue Oct  4 10:13:57 2016
New Revision: 5381

URL: http://svn.gna.org/viewcvs/getfem?rev=5381&view=rev
Log:
another small fix

Modified:
    trunk/getfem/contrib/opt_assembly/opt_assembly.cc
    trunk/getfem/src/getfem_generic_assembly.cc

Modified: trunk/getfem/contrib/opt_assembly/opt_assembly.cc
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/contrib/opt_assembly/opt_assembly.cc?rev=5381&r1=5380&r2=5381&view=diff
==============================================================================
--- trunk/getfem/contrib/opt_assembly/opt_assembly.cc   (original)
+++ trunk/getfem/contrib/opt_assembly/opt_assembly.cc   Tue Oct  4 10:13:57 2016
@@ -419,42 +419,46 @@
   GMM_SET_EXCEPTION_DEBUG; // Exceptions make a memory fault, to debug.
   FE_ENABLE_EXCEPT;        // Enable floating point exception for Nan.
   
-  // Mesured times for new assembly, old one,
-  // storage estimate part for the new assembly, global assembly part,
-  // ga_exec cost (instructions not executed), J computation, resizing
-  // instruction cost.
+  // Mesured times for
+  // - new assembly,
+  // - old one,
+  // - estimate of the storage in sparse matrices part for the new assembly,
+  // - global assembly part (assembly instruction),
+  // - ga_exec cost (instructions not executed),
+  // - J computation.
   //                        new  | old  | sto  | asse | exec |  J   |
   test_new_assembly(2, 400, 1); // ndofu = 321602 ndofp = 160801 ndofchi = 1201
-  // Mass                 : 0.79 | 0.86 | 0.19 | 0.32 | 0.26 | 0.09 |
-  // Laplacian            : 0.43 | 0.85 | 0.10 | 0.16 | 0.19 | 0.08 |
-  // Homogeneous elas     : 0.67 | 1.91 | 0.23 | 0.30 | 0.18 | 0.07 |
-  // Non-homogeneous elast: 0.83 | 2.32 | 0.26 | 0.32 | 0.18 | 0.08 |
+  // Mass                 : 0.78 | 0.84 | 0.11 | 0.22 | 0.26 | 0.09 |
+  // Laplacian            : 0.42 | 0.83 | 0.05 | 0.11 | 0.19 | 0.08 |
+  // Homogeneous elas     : 0.65 | 1.89 | 0.14 | 0.21 | 0.18 | 0.07 |
+  // Non-homogeneous elast: 0.82 | 2.32 | 0.13 | 0.23 | 0.18 | 0.08 |
   test_new_assembly(3, 36, 1);  // ndofu = 151959 ndofp = 50653 ndofchi = 6553
-  // Mass                 : 1.36 | 1.68 | 0.34 | 0.54 | 0.31 | 0.15 |
-  // Laplacian            : 0.89 | 1.51 | 0.10 | 0.17 | 0.24 | 0.14 |
-  // Homogeneous elas     : 1.92 | 4.77 | 0.88 | 0.95 | 0.24 | 0.14 |
-  // Non-homogeneous elast: 2.05 | 6.81 | 0.74 | 0.86 | 0.24 | 0.14 |
+  // Mass                 : 1.36 | 1.68 | 0.12 | 0.34 | 0.31 | 0.15 |
+  // Laplacian            : 0.87 | 1.49 | 0.05 | 0.11 | 0.24 | 0.14 |
+  // Homogeneous elas     : 1.87 | 4.73 | 0.50 | 0.62 | 0.24 | 0.14 |
+  // Non-homogeneous elast: 2.03 | 6.81 | 0.45 | 0.63 | 0.24 | 0.14 |
   test_new_assembly(2, 200, 2); // ndofu = 321602 ndofp = 160801 ndofchi = 1201
-  // Mass                 : 0.49 | 0.45 | 0.14 | 0.22 | 0.07 | 0.03 |
-  // Laplacian            : 0.21 | 0.38 | 0.06 | 0.10 | 0.06 | 0.03 |
-  // Homogeneous elas     : 0.53 | 1.28 | 0.22 | 0.25 | 0.05 | 0.02 |
-  // Non-homogeneous elast: 0.64 | 2.43 | 0.23 | 0.28 | 0.05 | 0.02 |
+  // Mass                 : 0.44 | 0.45 | 0.07 | 0.14 | 0.07 | 0.03 |
+  // Laplacian            : 0.21 | 0.38 | 0.03 | 0.06 | 0.06 | 0.03 |
+  // Homogeneous elas     : 0.53 | 1.28 | 0.13 | 0.14 | 0.05 | 0.02 |
+  // Non-homogeneous elast: 0.63 | 2.42 | 0.12 | 0.17 | 0.05 | 0.02 |
   test_new_assembly(3, 18, 2);  // ndofu = 151959 ndofp = 50653 ndofchi = 6553
-  // Mass                 : 1.95 | 0.90 | 0.27 | 0.63 | 0.05 | 0.02 |
-  // Laplacian            : 0.29 | 0.58 | 0.16 | 0.17 | 0.04 | 0.02 |
-  // Homogeneous elas     : 2.48 | 3.48 | 1.53 | 1.72 | 0.04 | 0.02 |
-  // Non-homogeneous elast: 2.55 | 9.32 | 1.48 | 1.68 | 0.04 | 0.02 |
+  // Mass                 : 1.95 | 0.90 | 0.22 | 0.53 | 0.05 | 0.02 |
+  // Laplacian            : 0.29 | 0.57 | 0.09 | 0.11 | 0.04 | 0.02 |
+  // Homogeneous elas     : 2.47 | 3.48 | 0.99 | 1.19 | 0.04 | 0.02 |
+  // Non-homogeneous elast: 2.55 | 9.25 | 0.99 | 1.18 | 0.04 | 0.02 |
   test_new_assembly(3, 9, 4);   // ndofu = 151959 ndofp = 50653 ndofchi = 6553
-  // Mass                 : 6.64 | 1.33 | 0.65 | 1.77 | 0.01 | .005 |
-  // Laplacian            : 0.78 | 0.79 | 0.32 | 0.43 | 0.01 | .005 |
-  // Homogeneous elas     : 10.2 | 5.52 | 0.90 | 1.69 | 0.01 | .005 |
-  // Non-homogeneous elast: 10.1 | 48.0 | 0.95 | 1.48 | 0.01 | .005 |
+  // Mass                 : 6.64 | 1.33 | 0.41 | 1.69 | 0.01 | .005 |
+  // Laplacian            : 0.78 | 0.79 | 0.23 | 0.32 | 0.01 | .005 |
+  // Homogeneous elas     : 10.3 | 5.52 | 0.30 | 0.84 | 0.01 | .005 |
+  // Non-homogeneous elast: 10.2 | 48.0 | 0.30 | 0.70 | 0.01 | .005 |
 
   // Conclusions :
-  // Desactivation of debug test has no sensible effect.
-  // Compile time of assembly strings is negligible (< 0.0004)
-  // J computation takes half the computational time of the exec part
-  // The optimized instruction call is negligible
+  // - Desactivation of debug test has no sensible effect.
+  // - Compile time of assembly strings is negligible (< 0.0004)
+  // - J computation takes half the computational time of the exec part
+  // - The optimized instruction call is negligible
+  // - The resize operations has been suppressed for uniform fems
 
 
   // Possible optimizations (focusing on a case)
@@ -468,7 +472,7 @@
 #if 0
   //                        new  | old  | sto  | asse | exec |  J   |resize|
   test_new_assembly(2, 400, 1);
-  // Mass                 : 0.94 | 0.93 | 0.19 | 0.32 | 0.26 | 0.09 | 0.16 |
+  // Mass                 : 0.94 | 0.93 | 0.19 | 0.32 | 0.26 | 0.09 | 0.08 |
   // Laplacian            : 0.49 | 0.88 | 0.10 | 0.16 | 0.19 | 0.08 | 0.03 |
   // Homogeneous elas     : 0.85 | 2.06 | 0.23 | 0.30 | 0.18 | 0.07 | 0.08 |
   // Non-homogeneous elast: 1.04 | 2.43 | 0.26 | 0.32 | 0.18 | 0.08 | 0.08 |

Modified: trunk/getfem/src/getfem_generic_assembly.cc
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/getfem_generic_assembly.cc?rev=5381&r1=5380&r2=5381&view=diff
==============================================================================
--- trunk/getfem/src/getfem_generic_assembly.cc (original)
+++ trunk/getfem/src/getfem_generic_assembly.cc Tue Oct  4 10:13:57 2016
@@ -4416,9 +4416,8 @@
     virtual int exec() {
       GA_DEBUG_INFO("Instruction: reduction operation of size 2 optimized ");
       size_type s1 = tc1.size()/2, s2 = tc2.size()/2;
-      GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error " << t.size()
+      GA_DEBUG_ASSERT(t.size() == s1*s2, "Internal error, " << t.size()
                      << " != " << s1 << "*" << s2);
-
       base_tensor::iterator it1=tc1.begin(), it2=tc2.begin(), it2end=it2 + s2;
       for (base_tensor::iterator it = t.begin(); it != t.end(); ++it) {
         *it = (*it1)*(*it2) + it1[s1] * it2[s2];
@@ -5210,8 +5209,7 @@
         for (const size_type &dof2 : dofs2)
           for (const size_type &dof1 : dofs1) {
             if (gmm::abs(*it) > threshold)
-              K(dof1, dof2) += *it;
-             // K.col(dof2).wa(dof1, *it);
+             K(dof1, dof2) += *it;
             ++it;
           }
       }
@@ -9470,7 +9468,6 @@
         pnode->node_type == GA_NODE_SPEC_FUNC ||
         pnode->node_type == GA_NODE_CONSTANT ||
         pnode->node_type == GA_NODE_ALLINDICES ||
-        // pnode->node_type == GA_NODE_ZERO ||   // zero nodes can still have 
test functions
         pnode->node_type == GA_NODE_RESHAPE) return;
 
     // cout << "compiling "; ga_print_node(pnode, cout); cout << endl;
@@ -10302,8 +10299,6 @@
 
        case GA_MINUS:
          if (pnode->t.size() == 1) {
-           // GA_DEBUG_ASSERT(pnode->nb_test_functions() == 0,
-           //              "Internal error: non zero number of test 
functions");
            GA_DEBUG_ASSERT(child0->t.size() == 1,
                            "Internal error: child0 not scalar");
            GA_DEBUG_ASSERT(child1->t.size() == 1,
@@ -10332,7 +10327,9 @@
 
        case GA_DOT: case GA_COLON: case GA_MULT:
          {
-           size_type s1 = (child0->t.size()*child1->t.size())/pnode->t.size();
+          size_type tps1 = child0->tensor_proper_size();
+          size_type tps2 = child1->tensor_proper_size();
+           size_type s1 = (tps1 * tps2) / pnode->tensor_proper_size();
            size_type s2 = size_type(round(sqrt(scalar_type(s1))));
 
            pgai = pga_instruction();
@@ -10496,11 +10493,9 @@
          break;
 
        case GA_SKEW:
-         {
-           pgai = std::make_shared<ga_instruction_skew>
-             (pnode->t, child0->t);
-           rmi.instructions.push_back(std::move(pgai));
-         }
+        pgai = std::make_shared<ga_instruction_skew>
+          (pnode->t, child0->t);
+        rmi.instructions.push_back(std::move(pgai));
          break;
 
        case GA_TRACE:
@@ -10682,17 +10677,17 @@
                 (pnode->t[0], child1->t[0], child2->t[0], F);
           } else if (child1->t.size() == 1) {
             if (F.ftype() == 0)
-              pgai = 
std::make_shared<ga_instruction_eval_func_2arg_first_scalar>
+              pgai=std::make_shared<ga_instruction_eval_func_2arg_first_scalar>
                 (pnode->t, child1->t, child2->t, F.f2());
             else
-              pgai = 
std::make_shared<ga_instruction_eval_func_2arg_first_scalar_expr>
+              
pgai=std::make_shared<ga_instruction_eval_func_2arg_first_scalar_expr>
                 (pnode->t, child1->t, child2->t, F);
           } else if (child2->t.size() == 1) {
             if (F.ftype() == 0)
-              pgai = 
std::make_shared<ga_instruction_eval_func_2arg_second_scalar>
+              
pgai=std::make_shared<ga_instruction_eval_func_2arg_second_scalar>
                 (pnode->t, child1->t, child2->t, F.f2());
             else
-              pgai = 
std::make_shared<ga_instruction_eval_func_2arg_second_scalar_expr>
+              
pgai=std::make_shared<ga_instruction_eval_func_2arg_second_scalar_expr>
                 (pnode->t, child1->t, child2->t, F);
           } else {
             if (F.ftype() == 0)




reply via email to

[Prev in Thread] Current Thread [Next in Thread]