[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Getfem-commits] r5477 - in /trunk/getfem: contrib/opt_assembly/ doc/sph
From: |
Yves . Renard |
Subject: |
[Getfem-commits] r5477 - in /trunk/getfem: contrib/opt_assembly/ doc/sphinx/source/project/ src/ |
Date: |
Thu, 10 Nov 2016 16:40:52 -0000 |
Author: renard
Date: Thu Nov 10 17:40:51 2016
New Revision: 5477
URL: http://svn.gna.org/viewcvs/getfem?rev=5477&view=rev
Log:
an optimisation for vector source term
Modified:
trunk/getfem/contrib/opt_assembly/opt_assembly.cc
trunk/getfem/doc/sphinx/source/project/libdesc_high_gen_assemb.rst
trunk/getfem/src/getfem_generic_assembly.cc
Modified: trunk/getfem/contrib/opt_assembly/opt_assembly.cc
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/contrib/opt_assembly/opt_assembly.cc?rev=5477&r1=5476&r2=5477&view=diff
==============================================================================
--- trunk/getfem/contrib/opt_assembly/opt_assembly.cc (original)
+++ trunk/getfem/contrib/opt_assembly/opt_assembly.cc Thu Nov 10 17:40:51 2016
@@ -677,7 +677,7 @@
// Non-homogeneous elast: 0.34 | 2.26 | 0.09 | 0.15 | 0.06 | 0.13 |
if (all || only_one == 2) // ndofu = 151959 ndofp = 50653 ndofchi = 6553
test_new_assembly(3, 36, 1);
- // Vector source term : 0.25 | 0.79 |
+ // Vector source term : 0.23 | 0.79 |
// Nonlinear residual : 0.46 | |
// Mass (scalar) : 0.21 | 0.58 | 0.05 | 0.09 | 0.08 | 0.05 |
// Mass (vector) : 0.36 | 1.37 | 0.12 | 0.17 | 0.08 | 0.11 |
@@ -695,7 +695,7 @@
// Non-homogeneous elast: 0.26 | 2.38 | 0.06 | 0.10 | 0.03 | 0.13 |
if (all || only_one == 4) // ndofu = 151959 ndofp = 50653 ndofchi = 6553
test_new_assembly(3, 18, 2);
- // Vector source term : 0.12 | 0.23 |
+ // Vector source term : 0.10 | 0.23 |
// Nonlinear residual : 0.28 | |
// Mass (scalar) : 0.11 | 0.25 | 0.05 | 0.05 | 0.03 | 0.03 |
// Mass (vector) : 0.29 | 0.89 | 0.11 | 0.16 | 0.03 | 0.10 |
@@ -704,7 +704,7 @@
// Non-homogeneous elast: 1.68 | 9.08 | 0.59 | 0.73 | 0.03 | 0.92 |
if (all || only_one == 5) // ndofu = 151959 ndofp = 50653 ndofchi = 6553
test_new_assembly(3, 9, 4);
- // Vector source term : 0.10 | 0.19 |
+ // Vector source term : 0.08 | 0.19 |
// Nonlinear residual : 0.27 | |
// Mass (scalar) : 0.51 | 0.34 | 0.09 | 0.16 | 0.01 | 0.34 |
// Mass (vector) : 1.29 | 1.31 | 0.23 | 0.41 | 0.01 | 0.87 |
@@ -716,7 +716,9 @@
// - Deactivation of debug test has no sensible effect.
// - Compile time of assembly strings is negligible (< 0.0004)
// - (J, K, B) computation takes half the computational time of the exec part
- // - The optimized instruction call is negligible
+ // - The call itself of optimized instruction virtual functions is negligible
+ // It means that a real compilation (g++) avoiding them is not necessary
+ // and would we far more expensive in compilation time.
// - For uniform mesh_fem, the resize operations has been suppressed and
// the "update pfp" has been isolated in a set of instruction being
// executed only on change of integration method.
Modified: trunk/getfem/doc/sphinx/source/project/libdesc_high_gen_assemb.rst
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/doc/sphinx/source/project/libdesc_high_gen_assemb.rst?rev=5477&r1=5476&r2=5477&view=diff
==============================================================================
--- trunk/getfem/doc/sphinx/source/project/libdesc_high_gen_assemb.rst
(original)
+++ trunk/getfem/doc/sphinx/source/project/libdesc_high_gen_assemb.rst Thu Nov
10 17:40:51 2016
@@ -102,10 +102,8 @@
:header: "Scalar tensor", "Vectorized tensor"
:widths: 5, 12
- ":math:`\bar{t}(i) = \varphi_i(x)`",
- ":math:`t(j,k) = \varphi_{j/Q}(x) \delta_{k, (j \mbox{ mod } Q)}`"
- ":math:`[\varphi_0(x), \varphi_1(x)]`",
- ":math:`[\varphi_0(x), 0, \varphi_1(x), 0, 0, \varphi_0(x), 0,
\varphi_1(x)]`"
+ ":math:`\bar{t}(i) = \varphi_i(x)`", ":math:`t(j,k) = \varphi_{j/Q}(x)
\delta_{k, (j \mbox{ mod } Q)}`"
+ ":math:`[\varphi_0(x), \varphi_1(x)]`", ":math:`[\varphi_0(x), 0,
\varphi_1(x), 0, 0, \varphi_0(x), 0, \varphi_1(x)]`"
- 2: Grad condensed format
Modified: trunk/getfem/src/getfem_generic_assembly.cc
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/getfem_generic_assembly.cc?rev=5477&r1=5476&r2=5477&view=diff
==============================================================================
--- trunk/getfem/src/getfem_generic_assembly.cc (original)
+++ trunk/getfem/src/getfem_generic_assembly.cc Thu Nov 10 17:40:51 2016
@@ -4761,14 +4761,71 @@
: t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
};
-
// Performs Ani Bmi -> Cmn
- struct ga_instruction_reduction_opt1 : public ga_instruction {
+ struct ga_instruction_reduction_opt0_1 : public ga_instruction {
+ base_tensor &t, &tc1, &tc2;
+ size_type nn;
+ virtual int exec() {
+ cout << "Not unrolled " << nn << " tc1 = " << tc1 << endl;
+ cout << "tc2 = " << tc2 << endl;
+ GA_DEBUG_INFO("Instruction: reduction operation of size " << nn
+ " optimized for vectorized second tensor of type 1");
+ size_type ss1=tc1.size(), s2=tc2.size()/nn, s2_n=s2/nn;
+ auto it = t.begin(), it2 = tc2.begin();
+ for (size_type i = 0; i < s2_n; ++i, it2 += nn) {
+ auto it1 = tc1.begin();
+ for (size_type j = 0; j < ss1; ++j) *it++ = (*it1++) * (*it2);
+ }
+ cout << "t = " << t << endl;
+ ga_instruction_reduction toto(t, tc1, tc2, nn);
+ toto.exec();
+ cout << "t = " << t << endl;
+
+ getchar();
+ return 0;
+ }
+ ga_instruction_reduction_opt0_1(base_tensor &t_, base_tensor &tc1_,
+ base_tensor &tc2_, size_type n_)
+ : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
+ };
+
+ template<int N> inline void reduc_elem_unrolled_opt1_
+ (base_vector::iterator &it, base_vector::iterator &it1, scalar_type a) {
+ *it++ = (*it1++) * a;
+ reduc_elem_unrolled_opt1_<N-1>(it, it1, a);
+ }
+ template<> inline void reduc_elem_unrolled_opt1_<1>
+ (base_vector::iterator &it, base_vector::iterator &it1, scalar_type a)
+ { *it++ = (*it1++) * a; }
+
+ // Performs Ani Bmi -> Cmn
+ template <int N>
+ struct ga_instruction_reduction_opt0_1_unrolled : public ga_instruction {
+ base_tensor &t, &tc1, &tc2;
+ size_type nn;
+ virtual int exec() {
+ GA_DEBUG_INFO("Instruction: unrolled reduction operation of size " << nn
+ " optimized for vectorized second tensor of type 1");
+ size_type s2=tc2.size()/nn, s2_n=s2/nn;
+ base_vector::iterator it = t.begin(), it2 = tc2.begin();
+ for (size_type i = 0; i < s2_n; ++i, it2 += nn) {
+ auto it1 = tc1.begin();
+ reduc_elem_unrolled_opt1_<N>(it, it1, *it2);
+ }
+ return 0;
+ }
+ ga_instruction_reduction_opt0_1_unrolled(base_tensor &t_, base_tensor
&tc1_,
+ base_tensor &tc2_, size_type n_)
+ : t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
+ };
+
+ // Performs Ani Bmi -> Cmn
+ struct ga_instruction_reduction_opt1_1 : public ga_instruction {
base_tensor &t, &tc1, &tc2;
size_type nn;
virtual int exec() {
GA_DEBUG_INFO("Instruction: reduction operation of size " << nn <<
- " optimized for vectorized tensor");
+ " optimized for both vectorized tensor of type 1");
size_type s1 = tc1.size()/nn, s2 = tc2.size()/nn, s2_1 = s2+1;
GA_DEBUG_ASSERT(t.size() == s2*s1, "Internal error");
size_type ss1 = s1/nn, ss2 = s2/nn;
@@ -4786,8 +4843,8 @@
}
return 0;
}
- ga_instruction_reduction_opt1(base_tensor &t_, base_tensor &tc1_,
- base_tensor &tc2_, size_type n_)
+ ga_instruction_reduction_opt1_1(base_tensor &t_, base_tensor &tc1_,
+ base_tensor &tc2_, size_type n_)
: t(t_), tc1(tc1_), tc2(tc2_), nn(n_) {}
};
@@ -4909,50 +4966,52 @@
pga_instruction ga_instruction_reduction_switch
- (assembly_tensor &t, assembly_tensor &tc1, assembly_tensor &tc2,
+ (assembly_tensor &t_, assembly_tensor &tc1_, assembly_tensor &tc2_,
size_type n, bool &to_clear) {
-
- if (tc1.sparsity() == 1 && tc2.sparsity() == 1 &&
- tc1.qdim() == n && tc2.qdim() == n) {
+ base_tensor &t = t_.tensor(), &tc1 = tc1_.tensor(), &tc2 = tc2_.tensor();
+
+ if (tc1_.sparsity() == 1 && tc2_.sparsity() == 1 &&
+ tc1_.qdim() == n && tc2_.qdim() == n) {
to_clear = true;
- t.set_sparsity(10, tc1.qdim());
- return std::make_shared<ga_instruction_reduction_opt1>
- (t.tensor(), tc1.tensor(), tc2.tensor(), n);
- }
-
+ t_.set_sparsity(10, tc1_.qdim());
+ return std::make_shared<ga_instruction_reduction_opt1_1>(t, tc1, tc2, n);
+ }
+ if (tc2_.sparsity() == 1 && false)
+ return std::make_shared<ga_instruction_reduction_opt0_1>(t, tc1, tc2, n);
+
switch(n) {
case 2 : return std::make_shared<ga_instruction_reduction_unrolled< 2>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 3 : return std::make_shared<ga_instruction_reduction_unrolled< 3>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 4 : return std::make_shared<ga_instruction_reduction_unrolled< 4>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 5 : return std::make_shared<ga_instruction_reduction_unrolled< 5>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 6 : return std::make_shared<ga_instruction_reduction_unrolled< 6>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 7 : return std::make_shared<ga_instruction_reduction_unrolled< 7>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 8 : return std::make_shared<ga_instruction_reduction_unrolled< 8>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 9 : return std::make_shared<ga_instruction_reduction_unrolled< 9>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 10 : return std::make_shared<ga_instruction_reduction_unrolled<10>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 11 : return std::make_shared<ga_instruction_reduction_unrolled<11>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 12 : return std::make_shared<ga_instruction_reduction_unrolled<12>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 13 : return std::make_shared<ga_instruction_reduction_unrolled<13>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 14 : return std::make_shared<ga_instruction_reduction_unrolled<14>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 15 : return std::make_shared<ga_instruction_reduction_unrolled<15>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
case 16 : return std::make_shared<ga_instruction_reduction_unrolled<16>>
- (t.tensor(), tc1.tensor(), tc2.tensor());
+ (t, tc1, tc2);
default : return std::make_shared<ga_instruction_reduction>
- (t.tensor(), tc1.tensor(), tc2.tensor(), n);
+ (t, tc1, tc2, n);
}
}
@@ -4965,8 +5024,40 @@
tc1_.qdim() == n && tc2_.qdim() == n) {
to_clear = true;
t_.set_sparsity(10, tc1_.qdim());
- return std::make_shared<ga_instruction_reduction_opt1>(t,tc1,tc2,n);
- }
+ return std::make_shared<ga_instruction_reduction_opt1_1>(t,tc1,tc2,n);
+ }
+ if (tc2_.sparsity() == 1 && false) {
+ cout << "tc1.size() = " << tc1.size() << endl;
+ switch(tc1.size()) {
+ case 1 : break;
+ case 2:
+ if (tc2.size() > 16)
+ return std::make_shared<ga_instruction_reduction_opt0_1_unrolled<2>>
+ (t, tc1, tc2, n);
+ break;
+ case 3:
+ return std::make_shared<ga_instruction_reduction_opt0_1_unrolled<3>>
+ (t, tc1, tc2, n);
+ case 4:
+ return std::make_shared<ga_instruction_reduction_opt0_1_unrolled<4>>
+ (t, tc1, tc2, n);
+ case 5:
+ return std::make_shared<ga_instruction_reduction_opt0_1_unrolled<5>>
+ (t, tc1, tc2, n);
+ case 6:
+ return std::make_shared<ga_instruction_reduction_opt0_1_unrolled<6>>
+ (t, tc1, tc2, n);
+ case 7:
+ return std::make_shared<ga_instruction_reduction_opt0_1_unrolled<7>>
+ (t, tc1, tc2, n);
+ case 8:
+ return std::make_shared<ga_instruction_reduction_opt0_1_unrolled<8>>
+ (t, tc1, tc2, n);
+ default:
+ return std::make_shared<ga_instruction_reduction_opt0_1>(t,tc1,tc2, n);
+ }
+ }
+
// Only specialized for certain values
size_type s2 = tc2.size()/n;
switch(s2) {
@@ -12662,6 +12753,17 @@
const ga_instruction_list &gilb = instr.second.begin_instructions;
const ga_instruction_list &gile = instr.second.elt_instructions;
const ga_instruction_list &gil = instr.second.instructions;
+#if 0
+ if (gilb.size()) cout << "Begin instructions\n";
+ for (size_type j = 0; j < gilb.size(); ++j)
+ cout << typeid(*(gilb[j])).name() << endl;
+ if (gile.size()) cout << "\nElement instructions\n";
+ for (size_type j = 0; j < gile.size(); ++j)
+ cout << typeid(*(gile[j])).name() << endl;
+ cout << "\nGauss pt instructions\n";
+ for (size_type j = 0; j < gil.size(); ++j)
+ cout << typeid(*(gil[j])).name() << endl;
+#endif
const mesh_region ®ion = *(instr.first.region());
// iteration on elements (or faces of elements)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Getfem-commits] r5477 - in /trunk/getfem: contrib/opt_assembly/ doc/sphinx/source/project/ src/,
Yves . Renard <=