commit-gnuradio
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Commit-gnuradio] r8118 - gnuradio/branches/developers/ngoergen/spe_fir_


From: ngoergen
Subject: [Commit-gnuradio] r8118 - gnuradio/branches/developers/ngoergen/spe_fir_fff
Date: Thu, 27 Mar 2008 15:45:07 -0600 (MDT)

Author: ngoergen
Date: 2008-03-27 15:45:06 -0600 (Thu, 27 Mar 2008)
New Revision: 8118

Added:
   gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spu.cc
   gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spu.cc
   gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spu.cc
   gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spu_dma_lock.h
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.S
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.h
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.S
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.h
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.S
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.h
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_params.h
Removed:
   gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spe.cpp
   gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spe.cpp
   gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
   gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spe_dma_lock.h
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.S
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.h
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.S
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.h
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.h
   gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_params.h
   gnuradio/branches/developers/ngoergen/spe_fir_fff/src/
Modified:
   gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile
   gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_ccc_ppe.c
   gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff64_ppe.c
   gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
Log:
spe_fir_xxx: New scheduling after static timing analysis of _ccc and _fff.  
Maximum dual issue with least stalls.  Decrementer time analysis.  Rename of 
files.

Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile  2008-03-26 
21:21:52 UTC (rev 8117)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile  2008-03-27 
21:45:06 UTC (rev 8118)
@@ -4,19 +4,23 @@
 CFLAGS     = -Wall
 
 SPU_CC     = spu-gcc
-SPU_CINCS  = -I.
+SPU_CINCS  = -I. -I /usr/lib/gcc/spu/4.1.1/include
 SPU_CLIBS  =
 SPU_CFLAGS = -Wall
 
 SPU_AS    = spu-as
 SPU_CPP           = spu-g++
 
-all: multi_fir_fff_ppe multi_fir_fff64_ppe multi_fir_ccc_ppe fir_fff_spe.elf 
fir_fff64_spe.elf fir_ccc_spe.elf
+OUTPUTS =      multi_fir_fff_ppe \
+                       multi_fir_fff64_ppe \
+                       multi_fir_ccc_ppe \
+                       fir_fff_spu.elf \
+                       fir_fff64_spu.elf \
+                       fir_ccc_spu.elf
 
-asm: fir_fff_spe.s
+all: $(OUTPUTS)
 
-fir_fff_spe.s: fir_fff_spe.cpp
-       $(SPU_CC) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) -S $^
+time: spu_fir_fff_as.time spu_fir_fff64_as.time spu_fir_ccc_as.time
 
 multi_fir_fff_ppe: multi_fir_fff_ppe.c
        $(CC) $(CFLAGS) $(CINCS) $(CLIBS) $^ -o $@
@@ -27,20 +31,21 @@
 multi_fir_ccc_ppe: multi_fir_ccc_ppe.c
        $(CC) $(CFLAGS) $(CINCS) $(CLIBS) $^ -o $@
 
-%.o: %.cpp
+%.o: %.cc
        $(SPU_CC) -c $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
 
 %.o: %.S
        $(SPU_AS) -o $@ $^      
 
-fir_fff_spe.elf: fir_fff_spe.o spe_fir_fff_as.o
+%.time: %.S
+       /opt/cell/sdk/usr/bin/spu_timing -o $@ $^
+       
+fir_fff_spu.elf: fir_fff_spu.o spu_fir_fff_as.o
        $(SPU_CPP) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
-
-fir_fff64_spe.elf: fir_fff64_spe.o spe_fir_fff64_as.o
+fir_fff64_spu.elf: fir_fff64_spu.o spu_fir_fff64_as.o
        $(SPU_CPP) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
-       
-fir_ccc_spe.elf: fir_ccc_spe.o spe_fir_ccc_as.o
+fir_ccc_spu.elf: fir_ccc_spu.o spu_fir_ccc_as.o
        $(SPU_CPP) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
        
 clean:
-       rm -f multi_fir_fff_ppe fir_fff_spe.elf fir_fff_spe.s 
multi_fir_fff64_ppe multi_fir_ccc_ppe fir_fff64_spe.elf fir_ccc_spe.elf *.o
+       rm -f $(OUTPUTS) *.o *.time

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spe.cpp

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spu.cc (from 
rev 8117, gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spe.cpp)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spu.cc            
                (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spu.cc    
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,76 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include "gr_spu_dma_lock.h"
+#include "spu_fir_fff_params.h"
+#include "spu_fir_ccc_as.h"
+#include <spu_mfcio.h>
+
+#define MAX_BUFSIZE (128*100)
+
+//float inputs[MAX_BUFSIZE]  __attribute__((aligned(16))) = {1,  2,  1,  2, 1, 
 2, 1,  2, 1,  2, 1,  2, 1,  2, 1,  2};
+//float taps[MAX_BUFSIZE]  __attribute__((aligned(16))) = {2.32, -23.6563, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0};
+float inputs[MAX_BUFSIZE]  __attribute__((aligned(16)));
+float taps[MAX_BUFSIZE]  __attribute__((aligned(16)));
+float outputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+
+int main(unsigned long long spu, unsigned long long argp,
+    unsigned long long envp)
+{
+  int tag = 1;
+  spu_fir_fff_params_t spu_fir_fff_params __attribute__((aligned(16)));
+
+  {
+    // NOTE: spu_fir_fff_params_t can be an _IN only param,
+    // it is sent back now for the time value only.
+    gr_spu_dma_lock_inout<spu_fir_fff_params_t> argp_lock(argp,
+        &spu_fir_fff_params, sizeof(spu_fir_fff_params_t), tag);
+    gr_spu_dma_lock_in<__vector float> inputs_lock(spu_fir_fff_params.ea_in1,
+        reinterpret_cast<__vector float *>(&inputs), spu_fir_fff_params.size
+            * sizeof(float), tag);
+    gr_spu_dma_lock_in<__vector float> taps_lock(spu_fir_fff_params.ea_in2,
+        reinterpret_cast<__vector float *>(&taps), spu_fir_fff_params.size
+            * sizeof(float), tag);
+    gr_spu_dma_lock_out<__vector float> outputs_lock(spu_fir_fff_params.ea_out,
+        reinterpret_cast<__vector float *>(&outputs), spu_fir_fff_params.size
+            * sizeof(float), tag);
+    //
+    //         spu_fir_fff_params.offset = 0; 
+    //         spu_fir_fff_params.nsamples = 7; 
+    //         spu_fir_fff_params.ntaps = 16;
+
+    spu_write_decrementer(~0);
+    int start(spu_read_decrementer());
+    
+    spu_fir_ccc( reinterpret_cast<__vector float *>(&inputs),
+        reinterpret_cast<__vector float *>(&taps),
+        reinterpret_cast<__vector float *>(&outputs), 0,
+        spu_fir_fff_params.offset, spu_fir_fff_params.nsamples,
+        spu_fir_fff_params.ntaps);
+    
+    spu_fir_fff_params.pad[0] = start - spu_read_decrementer();
+
+  }
+
+  return 0;
+}

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spe.cpp

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spu.cc 
(from rev 8117, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spe.cpp)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spu.cc          
                (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spu.cc  
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,63 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include "gr_spu_dma_lock.h"
+#include "spu_fir_fff_params.h"
+#include "spu_fir_fff64_as.h"
+
+#define MAX_BUFSIZE (64*100)
+
+double inputs[MAX_BUFSIZE]  __attribute__((aligned(16)));
+double taps[MAX_BUFSIZE]  __attribute__((aligned(16)));
+double outputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+
+int main(unsigned long long spu, unsigned long long argp,
+    unsigned long long envp)
+{
+  int tag = 1;
+  spu_fir_fff_params_t spu_fir_fff_params __attribute__((aligned(16)));
+
+  {
+    gr_spu_dma_lock_in<spu_fir_fff_params_t> argp_lock(argp,
+        &spu_fir_fff_params, sizeof(spu_fir_fff_params_t), tag);
+    gr_spu_dma_lock_in<__vector double> inputs_lock(spu_fir_fff_params.ea_in1,
+        reinterpret_cast<__vector double *>(&inputs), spu_fir_fff_params.size
+            * sizeof(double), tag);
+    gr_spu_dma_lock_in<__vector double> taps_lock(spu_fir_fff_params.ea_in2,
+        reinterpret_cast<__vector double *>(&taps), spu_fir_fff_params.size
+            * sizeof(double), tag);
+    gr_spu_dma_lock_out<__vector double> outputs_lock(
+        spu_fir_fff_params.ea_out,
+        reinterpret_cast<__vector double *>(&outputs), spu_fir_fff_params.size
+            * sizeof(double), tag);
+
+    spu_fir_fff64( reinterpret_cast<__vector double *>(&inputs),
+        reinterpret_cast<__vector double *>(&taps),
+        reinterpret_cast<__vector double *>(&outputs), 0,
+        spu_fir_fff_params.offset, spu_fir_fff_params.nsamples,
+        spu_fir_fff_params.ntaps);
+
+  }
+
+  return 0;
+}

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spu.cc (from 
rev 8117, gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spu.cc            
                (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spu.cc    
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,70 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include "gr_spu_dma_lock.h"
+#include "spu_fir_fff_params.h"
+#include "spu_fir_fff_as.h"
+#include <spu_mfcio.h>
+
+#define MAX_BUFSIZE (128*100)
+
+float inputs[MAX_BUFSIZE]  __attribute__((aligned(16)));
+float taps[MAX_BUFSIZE]  __attribute__((aligned(16)));
+float outputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+
+int main(unsigned long long spu, unsigned long long argp,
+    unsigned long long envp)
+{
+  int tag = 1;
+  spu_fir_fff_params_t spu_fir_fff_params __attribute__((aligned(16)));
+
+  {
+    // NOTE: spu_fir_fff_params_t can be an _IN only param,
+    // it is sent back now for the time value only.
+    gr_spu_dma_lock_inout<spu_fir_fff_params_t> argp_lock(argp,
+        &spu_fir_fff_params, sizeof(spu_fir_fff_params_t), tag);
+    gr_spu_dma_lock_in<__vector float> inputs_lock(spu_fir_fff_params.ea_in1,
+        reinterpret_cast<__vector float *>(&inputs), spu_fir_fff_params.size
+            * sizeof(float), tag);
+    gr_spu_dma_lock_in<__vector float> taps_lock(spu_fir_fff_params.ea_in2,
+        reinterpret_cast<__vector float *>(&taps), spu_fir_fff_params.size
+            * sizeof(float), tag);
+    gr_spu_dma_lock_out<__vector float> outputs_lock(spu_fir_fff_params.ea_out,
+        reinterpret_cast<__vector float *>(&outputs), spu_fir_fff_params.size
+            * sizeof(float), tag);
+
+    spu_write_decrementer(~0);
+    int start(spu_read_decrementer());
+    
+    spu_fir_fff( reinterpret_cast<__vector float *>(&inputs),
+        reinterpret_cast<__vector float *>(&taps),
+        reinterpret_cast<__vector float *>(&outputs), 0,
+        spu_fir_fff_params.offset, spu_fir_fff_params.nsamples,
+        spu_fir_fff_params.ntaps);
+    
+    spu_fir_fff_params.pad[0] = start - spu_read_decrementer();
+  
+  }
+
+  return 0;
+}

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spe_dma_lock.h

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spu_dma_lock.h 
(from rev 8117, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spe_dma_lock.h)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spu_dma_lock.h         
                (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spu_dma_lock.h 
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,97 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef GR_SPU_DMA_LOCK_H_
+#define GR_SPU_DMA_LOCK_H_
+
+#include <spu_mfcio.h>
+
+template <class Tout>
+class gr_spu_dma_lock {
+protected:
+       gr_spu_dma_lock(unsigned long long in, Tout* target, unsigned int size, 
int tag) :
+               d_in(in), d_target(target), d_size(size), d_tag(tag) {
+       }
+       
+  ~gr_spu_dma_lock() {};
+  
+       void pull() {
+           spu_mfcdma64(d_target, mfc_ea2h(d_in), mfc_ea2l(d_in),
+                       d_size, d_tag, MFC_GET_CMD);
+           spu_writech(MFC_WrTagMask, 1 << d_tag);
+           spu_mfcstat(MFC_TAG_UPDATE_ALL);
+       }
+       
+       void push() {
+           spu_mfcdma64(d_target, mfc_ea2h(d_in), mfc_ea2l(d_in), d_size, 
d_tag, MFC_PUT_CMD);
+           spu_writech(MFC_WrTagMask, 1 << d_tag);
+           spu_mfcstat(MFC_TAG_UPDATE_ALL);
+       }
+       
+private:
+       unsigned long long      d_in;
+       Tout*   d_target;
+       unsigned int d_size;
+       int d_tag;
+};
+
+template <class Tout>
+class gr_spu_dma_lock_in : gr_spu_dma_lock<Tout> {
+public:
+       gr_spu_dma_lock_in(unsigned long long in, Tout* target, unsigned int 
size, int tag) :
+               gr_spu_dma_lock<Tout>(in, target, size, tag) {
+           pull();
+       }
+       
+       void pull() { gr_spu_dma_lock<Tout>::pull(); }
+
+       ~gr_spu_dma_lock_in() {}
+};
+
+template <class Tout>
+class gr_spu_dma_lock_out : gr_spu_dma_lock<Tout> {
+public:
+       gr_spu_dma_lock_out(unsigned long long in, Tout* target, unsigned int 
size, int tag) :
+               gr_spu_dma_lock<Tout>(in, target, size, tag) {
+       }
+       
+       void push() { gr_spu_dma_lock<Tout>::push(); }
+
+       ~gr_spu_dma_lock_out() { push(); }
+};
+
+template <class Tout>
+class gr_spu_dma_lock_inout : gr_spu_dma_lock<Tout> {
+public:
+       gr_spu_dma_lock_inout(unsigned long long in, Tout* target, unsigned int 
size, int tag) :
+               gr_spu_dma_lock<Tout>(in, target, size, tag) {
+               pull();
+       }
+       
+       void push() { gr_spu_dma_lock<Tout>::push(); }
+
+       void pull() { gr_spu_dma_lock<Tout>::pull(); }
+
+       ~gr_spu_dma_lock_inout() { push(); }
+};
+
+#endif /*GR_SPU_DMA_LOCK_H_*/

Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_ccc_ppe.c
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_ccc_ppe.c       
2008-03-26 21:21:52 UTC (rev 8117)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_ccc_ppe.c       
2008-03-27 21:45:06 UTC (rev 8118)
@@ -24,29 +24,11 @@
 #include <stdlib.h>
 #include <libspe2.h>
 #include <pthread.h>
-#include <spe_fir_fff_params.h>
+#include <spu_fir_fff_params.h>
 
-#define NUM_SPE 1
+#define NUM_SPU 1
 #define SIZE    (64*1 )
 
-//#define MYMATRIX 1,  2,  3,   4,  5,   6,  7,   8, \
-                  9,  10, 11,  12, 13,  14, 15,  16, \
-                 17,  18, 19,  20, 21,  22, 23,  24, \
-                 25,  26, 27,  28, 29,  30, 31,  32, \
-                 33,  34, 35,  36, 37,  38, 39,  40, \
-                 41,  42, 43,  44, 45,  46, 47,  48, \
-                 49,  50, 51,  52, 53,  54, 55,  56, \
-                 57,  58, 59,  60, 61,  62, 63,  64
-
-//#define MYMATRIX      0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,1,2,3,4
-
 #define MYMATRIX        1,    2,  3,    4,   5,   6,  7,   8, \
                         9,    10,  1,    -1,   1,   1,  1,   1, \
                         1,    1,  1,    1,   1,   1,  1,   1, \
@@ -56,24 +38,6 @@
                         1,    1,  1,    1,   1,   1,  1,   1, \
                         1,    1,  1,    1,   1,   1,  1,   1
 
-//#define MYMATRIX2    1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23
-
-//#define MYMATRIX2     -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2
-
 #define MYMATRIX2       1,    1,  1,    1,   1,   1,  1,   1, \
                         1,    1,  1,    1,   1,   1,  1,   1, \
                         1,    1,  1,    1,   1,   1,  1,   1, \
@@ -96,8 +60,9 @@
                                        MYMATRIX210, MYMATRIX210, MYMATRIX210, 
MYMATRIX210, MYMATRIX210
 
 #define TESTMATRIX 1,  2,  3,  4, -5,  6, -7,  -8, 9,  -10, -11.11,  -12.22, 
13.33,  14.44, 15.55,  16.66
-//#define TESTMATRIX 1,    2,  3,    4,   5,   6,  7,   8, \
-                        9,    10, 234, 234, 234, 234, 234, 234 
+
+//#define TESTMATRIX 1,    2,  3,    4,   5,   6,  7,   8, 9, 10, 234, 234, 
234, 234, 234, 234 
+
 #define TESTTAPS 2.32, -23.6563, -432.5434, -34.323, 0, 0, 0, 0, 0, 0, 0, 0,0, 
0, 0, 0
 
 float in1[16] =
@@ -113,15 +78,15 @@
 
 float out[SIZE] __attribute__((aligned(16)));
 
-spe_fir_fff_params_t spe_fir_fff_params[NUM_SPE] __attribute__((aligned(16)));
+spu_fir_fff_params_t spu_fir_fff_params[NUM_SPU] __attribute__((aligned(16)));
 
 typedef struct
 {
-  spe_context_ptr_t spe;
-  spe_fir_fff_params_t *spe_fir_fff_params;
+  spe_context_ptr_t spu;
+  spu_fir_fff_params_t *spu_fir_fff_params;
 } thread_arg_t;
 
-void *run_abs_spe(void *thread_arg)
+void *run_abs_spu(void *thread_arg)
 {
   int ret;
   thread_arg_t *arg = (thread_arg_t *) thread_arg;
@@ -129,10 +94,10 @@
   spe_stop_info_t stop_info;
 
   entry = SPE_DEFAULT_ENTRY;
-  ret = spe_context_run(arg->spe, &entry, 0, arg->spe_fir_fff_params, NULL,
+  ret = spe_context_run(arg->spu, &entry, 0, arg->spu_fir_fff_params, NULL,
       &stop_info);
   if (ret < 0) {
-    perror("spe_context_run");
+    perror("spu_context_run");
     return NULL;
   }
 
@@ -145,50 +110,50 @@
   int ret;
 
   spe_program_handle_t *prog;
-  spe_context_ptr_t spe[NUM_SPE];
-  pthread_t thread[NUM_SPE];
-  thread_arg_t arg[NUM_SPE];
+  spe_context_ptr_t spu[NUM_SPU];
+  pthread_t thread[NUM_SPU];
+  thread_arg_t arg[NUM_SPU];
 
-  printf("size: spe_fir_ccc_params_t: %02d\n", sizeof(spe_fir_fff_params_t));
+  printf("size: spu_fir_ccc_params_t: %02d\n", sizeof(spu_fir_fff_params_t));
 
-  prog = spe_image_open("fir_ccc_spe.elf");
+  prog = spe_image_open("fir_ccc_spu.elf");
   if (!prog) {
-    perror("spe_image_open");
+    perror("spu_image_open");
     exit(1);
   }
 
-  for (i = 0; i < NUM_SPE; ++i) {
-    spe[i] = spe_context_create(0, NULL);
-    if (!spe[i]) {
-      perror("spe_context_create");
+  for (i = 0; i < NUM_SPU; ++i) {
+    spu[i] = spe_context_create(0, NULL);
+    if (!spu[i]) {
+      perror("spu_context_create");
       exit(1);
     }
 
-    ret = spe_program_load(spe[i], prog);
+    ret = spe_program_load(spu[i], prog);
     if (ret) {
-      perror("spe_program_load");
+      perror("spu_program_load");
       exit(1);
     }
   }
 
   unsigned int j;
-  int size= SIZE/NUM_SPE;
+  int size= SIZE/NUM_SPU;
 
   for (j = 0; j < 1; ++j) {
 
-    for (i = 0; i < NUM_SPE; ++i) {
-      spe_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
-      spe_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
-      spe_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
-      spe_fir_fff_params[i].ntaps = 16;
-      spe_fir_fff_params[i].nsamples = 7;
-      spe_fir_fff_params[i].offset = 0;
-      spe_fir_fff_params[i].size = size;
+    for (i = 0; i < NUM_SPU; ++i) {
+      spu_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
+      spu_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
+      spu_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
+      spu_fir_fff_params[i].ntaps = 16;
+      spu_fir_fff_params[i].nsamples = 7;
+      spu_fir_fff_params[i].offset = 0;
+      spu_fir_fff_params[i].size = size;
 
-      arg[i].spe = spe[i];
-      arg[i].spe_fir_fff_params = &spe_fir_fff_params[i];
+      arg[i].spu = spu[i];
+      arg[i].spu_fir_fff_params = &spu_fir_fff_params[i];
 
-      ret = pthread_create(&thread[i], NULL, run_abs_spe, &arg[i]);
+      ret = pthread_create(&thread[i], NULL, run_abs_spu, &arg[i]);
       if (ret) {
         perror("pthread_create");
         exit(1);
@@ -197,24 +162,25 @@
     pthread_join(thread[i], NULL);
   }
 
-  for (i = 0; i < NUM_SPE; ++i) {
+  for (i = 0; i < NUM_SPU; ++i) {
     //pthread_join(thread[i], NULL);
-    ret = spe_context_destroy(spe[i]);
+    ret = spe_context_destroy(spu[i]);
     if (ret) {
-      perror("spe_context_destroy");
+      perror("spu_context_destroy");
       exit(1);
     }
   }
 
   ret = spe_image_close(prog);
   if (ret) {
-    perror("spe_image_close");
+    perror("spu_image_close");
     exit(1);
   }
 
   for (i = 0; i < 32; i +=2) {
     printf("out[%02d]=%f, %fi\n", i>>1, out[i], out[i+1]);
   }
+  printf("time result=%d\n", spu_fir_fff_params[0].pad[0]);
 
   return 0;
 }

Modified: 
gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff64_ppe.c
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff64_ppe.c     
2008-03-26 21:21:52 UTC (rev 8117)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff64_ppe.c     
2008-03-27 21:45:06 UTC (rev 8118)
@@ -24,29 +24,11 @@
 #include <stdlib.h>
 #include <libspe2.h>
 #include <pthread.h>
-#include <spe_fir_fff_params.h>
+#include <spu_fir_fff_params.h>
 
-#define NUM_SPE 1
+#define NUM_SPU 1
 #define SIZE    (64*1 )
 
-//#define MYMATRIX 1,  2,  3,   4,  5,   6,  7,   8, \
-                  9,  10, 11,  12, 13,  14, 15,  16, \
-                 17,  18, 19,  20, 21,  22, 23,  24, \
-                 25,  26, 27,  28, 29,  30, 31,  32, \
-                 33,  34, 35,  36, 37,  38, 39,  40, \
-                 41,  42, 43,  44, 45,  46, 47,  48, \
-                 49,  50, 51,  52, 53,  54, 55,  56, \
-                 57,  58, 59,  60, 61,  62, 63,  64
-
-//#define MYMATRIX      0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,1,2,3,4
-
 #define MYMATRIX        1,    2,  3,    4,   5,   6,  7,   8, \
                         9,    10,  1,    -1,   1,   1,  1,   1, \
                         1,    1,  1,    1,   1,   1,  1,   1, \
@@ -56,24 +38,6 @@
                         1,    1,  1,    1,   1,   1,  1,   1, \
                         1,    1,  1,    1,   1,   1,  1,   1
 
-//#define MYMATRIX2    1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23
-
-//#define MYMATRIX2     -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2
-
 #define MYMATRIX2       1,    1,  1,    1,   1,   1,  1,   1, \
                         1,    1,  1,    1,   1,   1,  1,   1, \
                         1,    1,  1,    1,   1,   1,  1,   1, \
@@ -94,9 +58,11 @@
 
 #define MYMATRIX2100 MYMATRIX210, MYMATRIX210, MYMATRIX210, MYMATRIX210, 
MYMATRIX210, \
                                        MYMATRIX210, MYMATRIX210, MYMATRIX210, 
MYMATRIX210, MYMATRIX210
+
 #define TESTMATRIX 234,  -4,  23,  -56,  45,    98,  -23,  -7, 0, 0, 0, 0, 0, 
0, 0, 0 
-//#define TESTMATRIX 1,    2,  3,    4,   5,   6,  7,   8, \
-                        9,    10, 234, 234, 234, 234, 234, 234 
+
+//#define TESTMATRIX 1,    2,  3,    4,   5,   6,  7,   8, 9,    10, 234, 234, 
234, 234, 234, 234 
+
 #define TESTTAPS 5, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 
 double in1[16] =
@@ -111,15 +77,15 @@
 
 double out[SIZE] __attribute__((aligned(16)));
 
-spe_fir_fff_params_t spe_fir_fff_params[NUM_SPE] __attribute__((aligned(16)));
+spu_fir_fff_params_t spu_fir_fff_params[NUM_SPU] __attribute__((aligned(16)));
 
 typedef struct
 {
-  spe_context_ptr_t spe;
-  spe_fir_fff_params_t *spe_fir_fff_params;
+  spe_context_ptr_t spu;
+  spu_fir_fff_params_t *spu_fir_fff_params;
 } thread_arg_t;
 
-void *run_abs_spe(void *thread_arg)
+void *run_abs_spu(void *thread_arg)
 {
   int ret;
   thread_arg_t *arg = (thread_arg_t *) thread_arg;
@@ -127,10 +93,10 @@
   spe_stop_info_t stop_info;
 
   entry = SPE_DEFAULT_ENTRY;
-  ret = spe_context_run(arg->spe, &entry, 0, arg->spe_fir_fff_params, NULL,
+  ret = spe_context_run(arg->spu, &entry, 0, arg->spu_fir_fff_params, NULL,
       &stop_info);
   if (ret < 0) {
-    perror("spe_context_run");
+    perror("spu_context_run");
     return NULL;
   }
 
@@ -143,50 +109,50 @@
   int ret;
 
   spe_program_handle_t *prog;
-  spe_context_ptr_t spe[NUM_SPE];
-  pthread_t thread[NUM_SPE];
-  thread_arg_t arg[NUM_SPE];
+  spe_context_ptr_t spu[NUM_SPU];
+  pthread_t thread[NUM_SPU];
+  thread_arg_t arg[NUM_SPU];
 
-  printf("size: spe_fir_fff_params_t: %02d\n", sizeof(spe_fir_fff_params_t));
+  printf("size: spu_fir_fff_params_t: %02d\n", sizeof(spu_fir_fff_params_t));
 
-  prog = spe_image_open("fir_fff64_spe.elf");
+  prog = spe_image_open("fir_fff64_spu.elf");
   if (!prog) {
-    perror("spe_image_open");
+    perror("spu_image_open");
     exit(1);
   }
 
-  for (i = 0; i < NUM_SPE; ++i) {
-    spe[i] = spe_context_create(0, NULL);
-    if (!spe[i]) {
-      perror("spe_context_create");
+  for (i = 0; i < NUM_SPU; ++i) {
+    spu[i] = spe_context_create(0, NULL);
+    if (!spu[i]) {
+      perror("spu_context_create");
       exit(1);
     }
 
-    ret = spe_program_load(spe[i], prog);
+    ret = spe_program_load(spu[i], prog);
     if (ret) {
-      perror("spe_program_load");
+      perror("spu_program_load");
       exit(1);
     }
   }
 
   unsigned int j;
-  int size= SIZE/NUM_SPE;
+  int size= SIZE/NUM_SPU;
 
   for (j = 0; j < 1; ++j) {
 
-    for (i = 0; i < NUM_SPE; ++i) {
-      spe_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
-      spe_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
-      spe_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
-      spe_fir_fff_params[i].ntaps = 16;
-      spe_fir_fff_params[i].nsamples = 7;
-      spe_fir_fff_params[i].offset = 0;
-      spe_fir_fff_params[i].size = size;
+    for (i = 0; i < NUM_SPU; ++i) {
+      spu_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
+      spu_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
+      spu_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
+      spu_fir_fff_params[i].ntaps = 16;
+      spu_fir_fff_params[i].nsamples = 7;
+      spu_fir_fff_params[i].offset = 0;
+      spu_fir_fff_params[i].size = size;
 
-      arg[i].spe = spe[i];
-      arg[i].spe_fir_fff_params = &spe_fir_fff_params[i];
+      arg[i].spu = spu[i];
+      arg[i].spu_fir_fff_params = &spu_fir_fff_params[i];
 
-      ret = pthread_create(&thread[i], NULL, run_abs_spe, &arg[i]);
+      ret = pthread_create(&thread[i], NULL, run_abs_spu, &arg[i]);
       if (ret) {
         perror("pthread_create");
         exit(1);
@@ -195,18 +161,18 @@
     pthread_join(thread[i], NULL);
   }
 
-  for (i = 0; i < NUM_SPE; ++i) {
+  for (i = 0; i < NUM_SPU; ++i) {
     //pthread_join(thread[i], NULL);
-    ret = spe_context_destroy(spe[i]);
+    ret = spe_context_destroy(spu[i]);
     if (ret) {
-      perror("spe_context_destroy");
+      perror("spu_context_destroy");
       exit(1);
     }
   }
 
   ret = spe_image_close(prog);
   if (ret) {
-    perror("spe_image_close");
+    perror("spu_image_close");
     exit(1);
   }
 

Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c       
2008-03-26 21:21:52 UTC (rev 8117)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c       
2008-03-27 21:45:06 UTC (rev 8118)
@@ -24,29 +24,11 @@
 #include <stdlib.h>
 #include <libspe2.h>
 #include <pthread.h>
-#include <spe_fir_fff_params.h>
+#include <spu_fir_fff_params.h>
 
-#define NUM_SPE 1
+#define NUM_SPU 1
 #define SIZE    (64*1 )
 
-//#define MYMATRIX 1,  2,  3,   4,  5,   6,  7,   8, \
-                  9,  10, 11,  12, 13,  14, 15,  16, \
-                 17,  18, 19,  20, 21,  22, 23,  24, \
-                 25,  26, 27,  28, 29,  30, 31,  32, \
-                 33,  34, 35,  36, 37,  38, 39,  40, \
-                 41,  42, 43,  44, 45,  46, 47,  48, \
-                 49,  50, 51,  52, 53,  54, 55,  56, \
-                 57,  58, 59,  60, 61,  62, 63,  64
-
-//#define MYMATRIX      0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,0,0,0,0, \
-                        0,0,0,0,1,2,3,4
-
 #define MYMATRIX        1,    2,  3,    4,   5,   6,  7,   8, \
                         9,    10,  1,    -1,   1,   1,  1,   1, \
                         1,    1,  1,    1,   1,   1,  1,   1, \
@@ -56,24 +38,6 @@
                         1,    1,  1,    1,   1,   1,  1,   1, \
                         1,    1,  1,    1,   1,   1,  1,   1
 
-//#define MYMATRIX2    1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23, \
-                       1.23,   1.23,  1.23,   1.23,  1.23,   1.23,  1.23,   
1.23
-
-//#define MYMATRIX2     -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2, \
-                        -2,    -2,  -2,    -2,   -2,   -2,  -2,   -2
-
 #define MYMATRIX2       1,    1,  1,    1,   1,   1,  1,   1, \
                         1,    1,  1,    1,   1,   1,  1,   1, \
                         1,    1,  1,    1,   1,   1,  1,   1, \
@@ -94,12 +58,18 @@
 
 #define MYMATRIX2100 MYMATRIX210, MYMATRIX210, MYMATRIX210, MYMATRIX210, 
MYMATRIX210, \
                                        MYMATRIX210, MYMATRIX210, MYMATRIX210, 
MYMATRIX210, MYMATRIX210
-#define TESTMATRIX 234,  -4,  23,  -56,  45,    98,  -23,  -7, 0, 0, 0, 0, 0, 
0, 0, 0 
-//#define TESTMATRIX 1,    2,  3,    4,   5,   6,  7,   8, \
-                        9,    10, 234, 234, 234, 234, 234, 234 
-#define TESTTAPS 5, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 
-float in1[16] =
+//#define TESTMATRIX 234,  -4,  23,  -56,  45,    98,  -23,  -7, 0, 0, 0, 0, 
0, 0, 0, 0 
+
+#define TESTMATRIX -43.34,  -23.4,  3.23,   5.66,     754.564,  345.23, 
-23.34,   -65.45, \
+                   -34.65,  32.3,   23.454, 3456.334, 34.234,   34.65,  
-765.56,  23.87, \
+                   -23.54,  3.65,   -234.2, -223.234, -3.0,     -12.34, 
-23.53,   211.231, \
+                   -23.214, 645.45, 23.34,  -2.45,    -345.23,  12.453, 
-23.45,   234.645, \
+                   0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 
+
+#define TESTTAPS 234,  -4,  23,  -56,  45,    98,  -23,  -7, 0, 0, 0, 0, 0, 0, 
0, 0
+
+float in1[48] =
 {
 // MYMATRIX10 
     TESTMATRIX};
@@ -112,15 +82,15 @@
 
 float out[SIZE] __attribute__((aligned(16)));
 
-spe_fir_fff_params_t spe_fir_fff_params[NUM_SPE] __attribute__((aligned(16)));
+spu_fir_fff_params_t spu_fir_fff_params[NUM_SPU] __attribute__((aligned(16)));
 
 typedef struct
 {
-  spe_context_ptr_t spe;
-  spe_fir_fff_params_t *spe_fir_fff_params;
+  spe_context_ptr_t spu;
+  spu_fir_fff_params_t *spu_fir_fff_params;
 } thread_arg_t;
 
-void *run_abs_spe(void *thread_arg)
+void *run_abs_spu(void *thread_arg)
 {
   int ret;
   thread_arg_t *arg = (thread_arg_t *) thread_arg;
@@ -128,10 +98,10 @@
   spe_stop_info_t stop_info;
 
   entry = SPE_DEFAULT_ENTRY;
-  ret = spe_context_run(arg->spe, &entry, 0, arg->spe_fir_fff_params, NULL,
+  ret = spe_context_run(arg->spu, &entry, 0, arg->spu_fir_fff_params, NULL,
       &stop_info);
   if (ret < 0) {
-    perror("spe_context_run");
+    perror("spu_context_run");
     return NULL;
   }
 
@@ -144,50 +114,50 @@
   int ret;
 
   spe_program_handle_t *prog;
-  spe_context_ptr_t spe[NUM_SPE];
-  pthread_t thread[NUM_SPE];
-  thread_arg_t arg[NUM_SPE];
+  spe_context_ptr_t spu[NUM_SPU];
+  pthread_t thread[NUM_SPU];
+  thread_arg_t arg[NUM_SPU];
 
-  printf("size: spe_fir_fff_params_t: %02d\n", sizeof(spe_fir_fff_params_t));
+  printf("size: spu_fir_fff_params_t: %02d\n", sizeof(spu_fir_fff_params_t));
 
-  prog = spe_image_open("fir_fff_spe.elf");
+  prog = spe_image_open("fir_fff_spu.elf");
   if (!prog) {
-    perror("spe_image_open");
+    perror("spu_image_open");
     exit(1);
   }
 
-  for (i = 0; i < NUM_SPE; ++i) {
-    spe[i] = spe_context_create(0, NULL);
-    if (!spe[i]) {
-      perror("spe_context_create");
+  for (i = 0; i < NUM_SPU; ++i) {
+    spu[i] = spe_context_create(0, NULL);
+    if (!spu[i]) {
+      perror("spu_context_create");
       exit(1);
     }
 
-    ret = spe_program_load(spe[i], prog);
+    ret = spe_program_load(spu[i], prog);
     if (ret) {
-      perror("spe_program_load");
+      perror("spu_program_load");
       exit(1);
     }
   }
 
   unsigned int j;
-  int size= SIZE/NUM_SPE;
+  int size= SIZE/NUM_SPU;
 
   for (j = 0; j < 1; ++j) {
 
-    for (i = 0; i < NUM_SPE; ++i) {
-      spe_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
-      spe_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
-      spe_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
-      spe_fir_fff_params[i].ntaps = 16;
-      spe_fir_fff_params[i].nsamples = 7;
-      spe_fir_fff_params[i].offset = 0;
-      spe_fir_fff_params[i].size = size;
+    for (i = 0; i < NUM_SPU; ++i) {
+      spu_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
+      spu_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
+      spu_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
+      spu_fir_fff_params[i].ntaps = 16;
+      spu_fir_fff_params[i].nsamples = 32;
+      spu_fir_fff_params[i].offset = 0;
+      spu_fir_fff_params[i].size = size;
 
-      arg[i].spe = spe[i];
-      arg[i].spe_fir_fff_params = &spe_fir_fff_params[i];
+      arg[i].spu = spu[i];
+      arg[i].spu_fir_fff_params = &spu_fir_fff_params[i];
 
-      ret = pthread_create(&thread[i], NULL, run_abs_spe, &arg[i]);
+      ret = pthread_create(&thread[i], NULL, run_abs_spu, &arg[i]);
       if (ret) {
         perror("pthread_create");
         exit(1);
@@ -196,25 +166,25 @@
     pthread_join(thread[i], NULL);
   }
 
-  for (i = 0; i < NUM_SPE; ++i) {
+  for (i = 0; i < NUM_SPU; ++i) {
     //pthread_join(thread[i], NULL);
-    ret = spe_context_destroy(spe[i]);
+    ret = spe_context_destroy(spu[i]);
     if (ret) {
-      perror("spe_context_destroy");
+      perror("spu_context_destroy");
       exit(1);
     }
   }
 
   ret = spe_image_close(prog);
   if (ret) {
-    perror("spe_image_close");
+    perror("spu_image_close");
     exit(1);
   }
 
   for (i = 0; i < 32; ++i) {
     printf("out[%02d]=%f\n", i, out[i]);
   }
-  printf("size %0d result=%f\n", size, out[0]);
+  printf("time result=%d\n", spu_fir_fff_params[0].pad[0]);
 
   return 0;
 }

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.S

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.h

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.S

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.h

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.h

Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_params.h

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.S 
(from rev 8117, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.S)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.S          
                (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.S  
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,277 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+
+# taps are guarenteed to be 16 byte aligned.
+# ntaps != 0
+
+#  void spu_fir_ccc (  
+#      const float *input,
+#      const float *taps, 
+#      float *output,
+#      const float *delayline,
+#      const unsigned int offset,
+#      const unsigned int nsamples,
+#      const unsigned int ntaps
+#  )
+#  {
+#    unsigned int o_index = 0;
+#
+#    do {  
+#      float sum0 = 0;
+#      float sum1 = 0;
+#      float sum2 = 0;
+#      float sum3 = 0;
+#
+#         unsigned int n_2_complex_blocks = ntaps / 2;
+#
+#         unsigned int cur_tap = 0;
+#      unsigned int cur_input = input + offset + o_index;
+#
+#      do {
+#  
+#        sum0 += cur_input[0] * cur_tap[0] - cur_input[1] * cur_tap[1];
+#        sum1 += cur_input[0] * cur_tap[1] + cur_input[1] * cur_tap[0];
+#        sum2 += cur_input[2] * cur_tap[2] - cur_input[3] * cur_tap[3];
+#        sum3 += cur_input[2] * cur_tap[3] + cur_input[3] * cur_tap[2];
+#  
+#        cur_input += 4;
+#        cur_tap += 4;
+#  
+#      } while ((n_2_complex_blocks -= 2) != 0);
+#
+#              output[o_index] = sum0 + sum2;
+#              output[o_index+1] = sum1 + sum3;
+#
+#    } while ((o_index += 2) != nsamples * 2);
+#  
+#  }
+#      
+
+       .file   "fir_ccc_spu.S"
+.text
+       .align  3
+       .global spu_fir_ccc
+       .type   spu_fir_ccc, @function
+spu_fir_ccc:
+                               ori     $32,$5,0        # 0-2
+                               lqa $41, spu_fir_ccc_hightapmask        # 1-6 
expand masks for taps
+                               ori     $33,$7,0        # 0-2                   
        
+                               lqa $42, spu_fir_ccc_lowtapmask # 1-6 expand 
masks for taps
+                               ori     $31,$8,0        # 0-2                   
        
+                               lqa $43, spu_fir_ccc_xormask    # 1-6 mask for 
inverse of bd
+                               nop
+                               lqa $44, spu_fir_ccc_leftexpand # 1-6 mask to 
shift bd bc to prefered slot
+                       .spu_fir_ccc_start2: 
+                               xor $30,$30,$30         # 0-2 initilize the 
current output vector                               
+                               il  $34, 16             # 0-2 shift mask for 
output insertion
+                       
+                       .spu_fir_ccc_start1: 
+                               andi    $37,$33,0x0c     # 0-2 find index into 
masks [0-4]
+                               xor     $5,$5,$5         # 0-2 
+                               shlqbii $38,$37,2        # 1-4 mult by 4
+                               xor     $6,$6,$6         # 0-2
+                               xor     $7,$7,$7         # 0-2
+                               ori     $36,$9,0         # 0-2
+                               xor     $8,$8,$8         # 0-2
+                               lqd     $39,spu_fir_ccc_highshiftmasks($38)     
 # 1-6 load the right shift mask                        
+                               a       $35, $33, $3     # 0-2
+                               lqd     $40,spu_fir_ccc_lowshiftmasks($38)      
 # 1-6 load the right shift mask
+                               ori     $38,$4,0         # 0-2
+               
+                               hbra    .spu_fir_ccc_inner_loop_branch, 
.spu_fir_ccc_inner_loop         # inner-loop hint
+                               
+                               lqd     $10,0($35)       # 1-6
+                               lqd     $11,16($35)      # 1-6
+                               nop
+                               lqd     $12,32($35)      # 1-6
+                               
+                       .spu_fir_ccc_inner_loop:                
+                               ai      $36,$36,-4       # 0-2
+                               lqd     $14,0($38)       # 1-6
+                               ai      $35,$35,32       # 0-2
+                               lqd     $15,16($38)      # 1-6
+                               shufb   $13,$10,$11,$40         # 1-4
+                               shufb   $16,$14,$14,$42         # 1-4
+                               ai      $38,$38,32       # 0-2
+
+
+                               shufb   $10,$10,$11,$39         # 1-4
+                               nop
+                               # expensive, but needed
+                               shufb   $14,$14,$14,$41         # 1-4
+                               fma     $6, $13, $16, $6        # 0-6
+                               shufb   $18,$11,$12,$40         # 1-4
+                               fma     $5, $10, $14, $5        # 0-6
+
+                               shufb   $17,$15,$15,$42         # 1-4
+                               lqd     $10,0($35)       # 1-6
+                               shufb   $15,$15,$15,$41         # 1-4
+                               nop
+                               shufb   $11,$11,$12,$39         # 1-4   
+                               fma     $8, $18, $17, $8        # 0-6
+                       
+                               lqd     $12,32($35)      # 1-6
+                               fma     $7, $11, $15, $7        # 0-6
+                               
+                               lqd     $11,16($35)      # 1-6
+
+
+                       .spu_fir_ccc_inner_loop_branch: 
+                               brnz    $36,.spu_fir_ccc_inner_loop
+                               
+                               fsmbi   $10,0xFF00       # 1-4
+                               fa      $18,$5,$6                # 0-6
+                               hbra    .outter_loop_branch, 
.spu_fir_ccc_start1         # 1-
+                               fa      $19,$7,$8                # 0-6
+                               hbra    .spu_fir_ccc_finish_branch, 
.spu_fir_ccc_finish_branch_targ      # 1-
+                               fa      $5,$18,$19               # 0-6          
                
+
+                               shufb   $6, $5, $5, $44 # 1-4 expand 5 to 6
+                               xor     $6,$6,$43        # 0-2
+                               fa      $11, $5, $6  # 0-6      
+
+                               and     $12,$10,$11      # 0-2
+
+                               rotqby  $11, $12, $34    # 1-4
+                               or      $30,$11,$30      # 0-2
+                               ai      $31,$31,-1       # 0-2
+
+                       .spu_fir_ccc_finish_branch:     
+                               brz     $31,.spu_fir_ccc_finish4         
+                       .spu_fir_ccc_finish_branch_targ:        
+
+                               
+                               ai      $33,$33,8        # 0-2
+                               ai      $34,$34,-8       # 0-2
+                       
+                       .outter_loop_branch:            
+                               brnz    $34, .spu_fir_ccc_start1
+                               hbra    .spu_fir_ccc_outter_outter_loop_branch, 
.spu_fir_ccc_start2     
+# Stores r5 in output
+                       .spu_fir_ccc_finish4:   
+                               stqd    $30,0($32)              
+                               ai      $32,$32,16                              
# increment output pointer by 1 new vector.
+
+                       .spu_fir_ccc_outter_outter_loop_branch: 
+                               brnz    $31,.spu_fir_ccc_start2                 
# start another output vector if needed
+
+                               bi      $lr
+       .size   spu_fir_ccc, .-spu_fir_ccc
+       
+.text
+        .global spu_fir_ccc_highshiftmasks
+        .align  4
+        .type   spu_fir_ccc_highshiftmasks, @object
+        .size   spu_fir_ccc_highshiftmasks, 64
+       
+spu_fir_ccc_highshiftmasks:
+       .long 0x00010203
+       .long 0x00010203
+       .long 0x04050607
+       .long 0x04050607
+
+       .long 0x04050607
+       .long 0x04050607
+       .long 0x08090a0b
+       .long 0x08090a0b
+
+       .long 0x08090a0b
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x0c0d0e0f
+
+       .long 0x0c0d0e0f
+       .long 0x0c0d0e0f
+       .long 0x10111213
+       .long 0x10111213
+
+        .global spu_fir_ccc_lowshiftmasks
+        .align  4
+        .type   spu_fir_ccc_lowshiftmasks, @object
+        .size   spu_fir_ccc_lowshiftmasks, 64
+        
+spu_fir_ccc_lowshiftmasks:
+       .long 0x08090a0b
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x0c0d0e0f
+
+       .long 0x0c0d0e0f
+       .long 0x0c0d0e0f
+       .long 0x10111213
+       .long 0x10111213
+
+       .long 0x10111213
+       .long 0x10111213
+       .long 0x14151617
+       .long 0x14151617
+
+       .long 0x14151617
+       .long 0x14151617
+       .long 0x18191a1b
+       .long 0x18191a1b
+
+        .global spu_fir_ccc_hightapmask
+        .align  4
+        .type   spu_fir_ccc_hightapmask, @object
+        .size   spu_fir_ccc_hightapmask, 16
+        
+spu_fir_ccc_hightapmask:
+       .long 0x00010203
+       .long 0x04050607
+       .long 0x04050607
+       .long 0x00010203
+
+        .global spu_fir_ccc_lowtapmask
+        .align  4
+        .type   spu_fir_ccc_lowtapmask, @object
+        .size   spu_fir_ccc_lowtapmask, 16
+        
+spu_fir_ccc_lowtapmask:
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x0c0d0e0f
+       .long 0x08090a0b
+
+        .global spu_fir_ccc_xormask
+        .align  4
+        .type   spu_fir_ccc_xormask, @object
+        .size   spu_fir_ccc_xormask, 16
+        
+spu_fir_ccc_xormask:
+       .long 0x80000000
+       .long 0x00000000
+       .long 0x00000000
+       .long 0x00000000
+       
+        .global spu_fir_ccc_leftexpand
+        .align  4
+        .type   spu_fir_ccc_leftexpand, @object
+        .size   spu_fir_ccc_leftexpand, 16
+        
+spu_fir_ccc_leftexpand:
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x80808080
+       .long 0x80808080                
+       
+       .ident  "Hand coded Cell SPU assembly"

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.h 
(from rev 8117, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.h)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.h          
                (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.h  
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,64 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef SPU_FIR_CCC_AS_H_
+#define SPU_FIR_CCC_AS_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*!
+ * \brief compute an array of N interleaved complex output values from 
+ * interleaved complex inputs and taps.
+ *
+ * \param input must have (nsamples - 1 + ntaps()) valid entries.  input does 
+ * NOT need to be aligned. input[0] .. input[nsamples - 1 + ntaps() - 1] are 
+ * referenced to compute the output values.
+ * 
+ * \param taps a pointer to the pre-reversed and aligned set of taps.  Taps 
+ * must be a multiple of 4 and padded accordingly.
+ * 
+ * \param output a pointer to the filter output buffer
+ * 
+ * \param delayline NOTE: currently not used
+ * 
+ * \param offset the initial offset into input buffer to start the filter. If
+ * input is aligned, this should be zero.  If input is not aligned, this is
+ * the difference in allignment.
+ * 
+ * \param nsamples number of samples to produce for output
+ * 
+ * \param ntaps length of the tap vector.  Must be a multiple of 4. 
+ */
+
+extern void spu_fir_ccc(const __vector float *input,
+    const __vector float *taps, __vector float *output,
+    const __vector float *delayline, const unsigned int offset,
+    const unsigned int nsamples, const unsigned int ntaps);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //SPU_FIR_CCC_AS_H_

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.S 
(from rev 8117, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.S)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.S        
                        (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.S        
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,180 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+
+# taps are guarenteed to be 16 byte aligned.
+# ntaps != 0
+
+#  void spu_fir_fff64 (        
+#      const __vector double *input,
+#      const __vector double *taps, 
+#      __vector double *output,
+#      const __vector double *delayline,
+#      const unsigned int offset,
+#      const unsigned int nsamples,
+#      const unsigned int ntaps
+#  )
+#  {
+#    unsigned int o_index = 0;
+#
+#    do {  
+#      double sum0 = 0;
+#      double sum1 = 0;
+#      double sum2 = 0;
+#      double sum3 = 0;
+#
+#         unsigned int n_2_float_blocks = ntaps / 2;
+#
+#         unsigned int cur_tap = 0;
+#      unsigned int cur_input = input + offset + o_index;
+#
+#      do {
+#  
+#        sum0 += cur_input[0] * cur_tap[0];
+#        sum1 += cur_input[1] * cur_tap[1];
+#        sum2 += cur_input[2] * cur_tap[2];
+#        sum3 += cur_input[3] * cur_tap[3];
+#  
+#        cur_input += 4;
+#        cur_tap += 4;
+#  
+#      } while ((n_2_float_blocks -= 4) != 0);
+#
+#              output[o_index] = sum0 + sum1 + sum2 + sum3;
+#
+#    } while (o_index++ != nsamples);
+#  
+#  }
+#      
+
+       .file   "fir_fff64_spu.S"
+.text
+       .align  3
+       .global spu_fir_fff64
+       .type   spu_fir_fff64, @function
+spu_fir_fff64:
+                               ori     $32,$5,0        # 0-2
+                               ori     $33,$7,0        # 0-2
+                               ori     $31,$8,0        # 0-2
+                       .spu_fir_fff64_start2: 
+                               xor $30,$30,$30         # 0-2 initilize the 
current output vector                               
+                               il  $34, 16             # 0-2 shift mask for 
output insertion
+                       
+                       .spu_fir_fff64_start1: 
+                               andi    $37,$33,0x0c     # 0-2 find index into 
masks [0-4]  TODO!!!!!
+                               xor     $5,$5,$5         # 0-2 
+                               xor     $6,$6,$6         # 0-2
+                               shlqbii $38,$37,2        # 1-4 mult by 4 
+                               xor     $7,$7,$7         # 0-2
+                               ori     $36,$9,0         # 0-2
+                               lqd     $39,spu_fir_fff64_shiftmasks32($38)     
 # 1-6 load the right shift mask                        
+                               xor     $8,$8,$8         # 0-2
+                               a       $35, $33, $3     # 0-2
+                               ori     $38,$4,0         # 0-2
+               
+                               hbra    .spu_fir_fff64_inner_loop_branch, 
.spu_fir_fff64_inner_loop             # inner-loop hint       
+                       .spu_fir_fff64_inner_loop:              
+                               lqd     $10,0($35)       # 1-6
+                               lqd     $11,16($35)      # 1-6
+                               lqd     $12,32($35)      # 1-6
+                               lqd     $13,48($35)      # 1-6                  
        
+                               lqd     $18,64($35)      # 1-6
+                               lqd     $14,0($38)       # 1-6
+                               lqd     $15,16($38)      # 1-6
+                               ai      $36,$36,-8       # 0-2 consume 8 taps 
every time
+                               lqd     $16,32($38)      # 1-6 
+                               ai      $35,$35,64       # 0-2 consume 64 bytes 
of input data each loop
+                               lqd     $17,48($38)      # 1-6 
+                               ai      $38,$38,64       # 0-2 consume 64 bytes 
of input data each loop
+
+                               shufb   $10,$10,$11,$39         # 1-4 
+                               dfma    $5, $10, $14    # 0-6 
+                               shufb   $11,$11,$12,$39         # 1-4           
        
+                               dfma    $6, $11, $15    # 0-6
+                               shufb   $12,$12,$13,$39         # 1-4
+                               dfma    $7, $12, $16    # 0-6                   
+                               shufb   $13,$13,$18,$39         # 1-4
+                               dfma    $8, $13, $17    # 0-6
+
+                       .spu_fir_fff64_inner_loop_branch:       
+                               fsmbi   $10,0xFF00       # 1-4
+                               brnz    $36,.spu_fir_fff64_inner_loop   
+                               dfa     $18,$5,$6                # 0-6
+                               hbra    .spu_fir_fff64_outter_loop_branch, 
.spu_fir_fff64_start1         # 1-
+                               dfa     $19,$7,$8                # 0-6
+                               hbra    .spu_fir_fff64_finish_branch, 
.spu_fir_fff64_finish_branch_targ  # 1-
+                               dfa     $5,$18,$19               # 0-6
+
+# accumulate word elements in r5 into first element in r5      
+                               ori     $6,$5,0          # 0-2
+                               shlqbyi $6,$6,8          # 1-4
+                               dfa     $5,$6,$5         # 0-2
+                               and     $11,$10,$5       # 0-2
+                               rotqby  $12, $11, $34    # 1-4
+                               or      $30,$12,$30      # 0-2
+                       
+                               ai      $31,$31,-1       # 0-2 produce 1 sample 
each inner-loop
+                       .spu_fir_fff64_finish_branch:   
+                               brz     $31,.spu_fir_fff64_finish2       
+                       .spu_fir_fff64_finish_branch_targ:      
+                               ai      $33,$33,8        # 0-2 offset into 
input data moves 8 bytes each loop
+                               ai      $34,$34,-8       # 0-2 shiftmask moves 
twice each loop
+                       
+                       .spu_fir_fff64_outter_loop_branch:              
+                               brnz    $34, .spu_fir_fff64_start1      
+                               hbra    
.spu_fir_fff64_outter_outter_loop_branch, .spu_fir_fff64_start2 
+# Stores r5 in output
+                       .spu_fir_fff64_finish2:         
+                               stqd    $30,0($32)              
+                               ai      $32,$32,16                              
# increment output pointer by 1 new vector.
+
+                       .spu_fir_fff64_outter_outter_loop_branch:       
+                               brnz    $31,.spu_fir_fff64_start2               
        # start another output vector if needed
+
+                       bi      $lr
+       .size   spu_fir_fff64, .-spu_fir_fff64
+.text
+        .global spu_fir_fff64_shiftmasks32
+        .align  4
+        .type   spu_fir_fff64_shiftmasks32, @object
+        .size   spu_fir_fff64_shiftmasks32, 64
+spu_fir_fff64_shiftmasks32:
+       .long 0x00010203
+       .long 0x04050607
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+
+       .long 0x04050607
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x10111213
+
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x10111213
+       .long 0x14151617
+
+       .long 0x0c0d0e0f
+       .long 0x10111213
+       .long 0x14151617
+       .long 0x18191a1b
+
+       .ident  "Hand coded Cell SPU assembly"

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.h 
(from rev 8117, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.h)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.h        
                        (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.h        
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,63 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef SPU_FIR_FFF64_AS_H_
+#define SPU_FIR_FFF64_AS_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*!
+ * \brief compute an array of N output values.
+ *
+ * \param input must have (nsamples - 1 + ntaps()) valid entries.  input does 
+ * NOT need to be aligned. input[0] .. input[nsamples - 1 + ntaps() - 1] are 
+ * referenced to compute the output values.
+ * 
+ * \param taps a pointer to the pre-reversed and aligned set of taps.  Taps 
+ * must be a multiple of 8 and padded accordingly.
+ * 
+ * \param output a pointer to the filter output buffer
+ * 
+ * \param delayline NOTE: currently not used
+ * 
+ * \param offset the initial offset into input buffer to start the filter. If
+ * input is aligned, this should be zero.  If input is not aligned, this is
+ * the difference in allignment.
+ * 
+ * \param nsamples number of samples to produce for output
+ * 
+ * \param ntaps length of the tap vector.  Must be a multiple of 8. 
+ */
+
+extern void spu_fir_fff64(const __vector double *input,
+    const __vector double *taps, __vector double *output,
+    const __vector double *delayline, const unsigned int offset,
+    const unsigned int nsamples, const unsigned int ntaps);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //SPU_FIR_FFF_AS_H_

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.S 
(from rev 8117, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.S          
                (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.S  
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,197 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+# 
+# This file is part of GNU Radio
+# 
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+# 
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING.  If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+# 
+
+
+# taps are guarenteed to be 16 byte aligned.
+# ntaps != 0
+
+#  void spu_fir_fff (  
+#      const __vector float *input,
+#      const __vector float *taps, 
+#      __vector float *output,
+#      const __vector float *delayline,
+#      const unsigned int offset,
+#      const unsigned int nsamples,
+#      const unsigned int ntaps
+#  )
+#  {
+#    unsigned int o_index = 0;
+#
+#    do {  
+#      float sum0 = 0;
+#      float sum1 = 0;
+#      float sum2 = 0;
+#      float sum3 = 0;
+#
+#         unsigned int n_4_float_blocks = ntaps / 4;
+#
+#         unsigned int cur_tap = 0;
+#      unsigned int cur_input = input + offset + o_index;
+#
+#      do {
+#  
+#        sum0 += cur_input[0] * cur_tap[0];
+#        sum1 += cur_input[1] * cur_tap[1];
+#        sum2 += cur_input[2] * cur_tap[2];
+#        sum3 += cur_input[3] * cur_tap[3];
+#  
+#        cur_input += 4;
+#        cur_tap += 4;
+#  
+#      } while ((n_4_float_blocks -= 4) != 0);
+#
+#              output[o_index] = sum0 + sum1 + sum2 + sum3;
+#
+#    } while (o_index++ != nsamples);
+#  
+#  }
+#      
+
+       .file   "fir_fff_spu.S"
+.text
+       .align  3
+       .global spu_fir_fff
+       .type   spu_fir_fff, @function
+spu_fir_fff:
+                               ori     $32,$5,0        # 0-2
+                               ori     $33,$7,0        # 0-2
+                               ori     $31,$8,0        # 0-2
+                       .spu_fir_fff_start4: 
+                               xor $30,$30,$30         # 0-2 initilize the 
current output vector                               
+                               il  $34, 16             # 0-2 shift mask for 
output insertion
+                       
+                       .spu_fir_fff_start1: 
+                               andi    $37,$33,0x0c     # 0-2 find index into 
masks [0-4]
+                               xor     $5,$5,$5         # 0-2 
+                               xor     $6,$6,$6         # 0-2
+                                       shlqbii $38,$37,2        # 1-4 mult by 4
+                               xor     $7,$7,$7         # 0-2
+                               ori     $36,$9,0         # 0-2
+                       
+                               xor     $8,$8,$8         # 0-2
+                                       lqd     $39,spu_fir_fff_shiftmasks($38) 
 # 1-6 load the right shift mask
+                               a       $35, $33, $3     # 0-2
+                               ori     $38,$4,0         # 0-2
+               
+                               hbra    .spu_fir_fff_inner_loop_branch, 
.spu_fir_fff_inner_loop         # inner-loop hint       
+
+                                       lqd     $10,0($35)       # 1-6
+                                       lqd     $11,16($35)      # 1-6
+                                       lqd     $12,32($35)      # 1-6
+                       .spu_fir_fff_inner_loop:                
+
+                                       lqd     $13,48($35)      # 1-6          
                
+                                       lqd     $18,64($35)      # 1-6
+                                       lqd     $14,0($38)       # 1-6
+                               nop
+                                       lqd     $15,16($38)      # 1-6
+
+                               ai      $36,$36,-16      # 0-2
+                                       shufb   $10,$10,$11,$39         # 1-4 
+                                       lqd     $16,32($38)      # 1-6
+                               nop
+                               ai      $35,$35,64       # 0-2
+                                       shufb   $11,$11,$12,$39         # 1-4
+                                       lqd     $17,48($38)      # 1-6
+                                       shufb   $12,$12,$13,$39         # 1-4
+                               ai      $38,$38,64       # 0-2
+
+                                       shufb   $13,$13,$18,$39         # 1-4
+                               fma     $5, $10, $14, $5        # 0-6 
+                               nop                             
+                               fma     $6, $11, $15, $6        # 0-6
+                                       lqd     $10,0($35)       # 1-6
+                               fma     $7, $12, $16, $7        # 0-6           
        
+                                       lqd     $11,16($35)      # 1-6
+                               fma     $8, $13, $17, $8        # 0-6
+                                       lqd     $12,32($35)      # 1-6
+                               
+                               
+                               
+                       .spu_fir_fff_inner_loop_branch: 
+                               brnz    $36,.spu_fir_fff_inner_loop     
+                                       fsmbi   $10,0xF000       # 1-4
+                               fa      $18,$5,$6                # 0-6
+                                       hbra    
.spu_fir_fff_outter_loop_branch, .spu_fir_fff_start1     # 1-
+                               fa      $19,$7,$8                # 0-6
+                                       hbra    .spu_fir_fff_finish_branch, 
.spu_fir_fff_finish_branch_targ      # 1-
+                               fa      $5,$18,$19               # 0-6
+
+# accumulate word elements in r5 into first element in r5      
+                               ori     $6,$5,0          # 0-2          
+                                       shlqbyi $6,$6,4          # 1-4
+                               ai      $31,$31,-1       # 0-2
+                               fa      $5,$6,$5         # 0-2
+                                       shlqbyi $6,$6,4          # 1-4
+                               fa      $5,$6,$5         # 0-2
+                                       shlqbyi $6,$6,4          # 1-4
+                               fa      $5,$6,$5         # 0-2
+                               and     $11,$10,$5       # 0-2
+                                       rotqby  $12, $11, $34    # 1-4
+                               or      $30,$12,$30      # 0-2
+                       
+                               ai      $34,$34,-4       # 0-2
+                       .spu_fir_fff_finish_branch:     
+                               brz     $31,.spu_fir_fff_finish4         
+                       .spu_fir_fff_finish_branch_targ:        
+
+                               ai      $33,$33,4        # 0-2
+
+                       .spu_fir_fff_outter_loop_branch:                
+                               brnz    $34, .spu_fir_fff_start1        
+                               hbra    .spu_fir_fff_outter_outter_loop_branch, 
.spu_fir_fff_start4     
+# Stores r5 in output
+                       .spu_fir_fff_finish4:   
+                               stqd    $30,0($32)              
+                               ai      $32,$32,16                              
# increment output pointer by 1 new vector.
+
+                       .spu_fir_fff_outter_outter_loop_branch: 
+                               brnz    $31,.spu_fir_fff_start4                 
# start another output vector if needed
+
+       bi      $lr
+       .size   spu_fir_fff, .-spu_fir_fff
+.text
+        .global spu_fir_fff_shiftmasks
+        .align  4
+        .type   spu_fir_fff_shiftmasks, @object
+        .size   spu_fir_fff_shiftmasks, 64
+spu_fir_fff_shiftmasks:
+       .long 0x00010203
+       .long 0x04050607
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+
+       .long 0x04050607
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x10111213
+
+       .long 0x08090a0b
+       .long 0x0c0d0e0f
+       .long 0x10111213
+       .long 0x14151617
+
+       .long 0x0c0d0e0f
+       .long 0x10111213
+       .long 0x14151617
+       .long 0x18191a1b
+
+       .ident  "Hand coded Cell SPU assembly"

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.h 
(from rev 8117, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.h)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.h          
                (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.h  
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,63 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef SPU_FIR_FFF_AS_H_
+#define SPU_FIR_FFF_AS_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*!
+ * \brief compute an array of N output values.
+ *
+ * \param input must have (nsamples - 1 + ntaps()) valid entries.  input does 
+ * NOT need to be aligned. input[0] .. input[nsamples - 1 + ntaps() - 1] are 
+ * referenced to compute the output values.
+ * 
+ * \param taps a pointer to the pre-reversed and aligned set of taps.  Taps 
+ * must be a multiple of 16 and padded accordingly.
+ * 
+ * \param output a pointer to the filter output buffer
+ * 
+ * \param delayline NOTE: currently not used
+ * 
+ * \param offset the initial offset into input buffer to start the filter. If
+ * input is aligned, this should be zero.  If input is not aligned, this is
+ * the difference in allignment.
+ * 
+ * \param nsamples number of samples to produce for output
+ * 
+ * \param ntaps length of the tap vector.  Must be a multiple of 16. 
+ */
+
+extern void spu_fir_fff(const __vector float *input,
+    const __vector float *taps, __vector float *output,
+    const __vector float *delayline, const unsigned int offset,
+    const unsigned int nsamples, const unsigned int ntaps);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //SPU_FIR_FFF_AS_H_

Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_params.h 
(from rev 8117, 
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_params.h)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_params.h      
                        (rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_params.h      
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,38 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ * 
+ * This file is part of GNU Radio
+ * 
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ * 
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ * 
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef SPU_FIR_FFF_PARAMS_H_
+#define SPU_FIR_FFF_PARAMS_H_
+
+typedef struct
+{
+    unsigned long long ea_in1;
+    unsigned long long ea_in2;
+    unsigned long long ea_out;
+    unsigned int nsamples;
+    unsigned int ntaps;
+    unsigned int offset;
+    unsigned int size;
+    int pad[6];
+} spu_fir_fff_params_t;
+
+#endif /*SPU_FIR_FFF_PARAMS_H_*/





reply via email to

[Prev in Thread] Current Thread [Next in Thread]