[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] r7934 - gnuradio/branches/developers/ngoergen/spe_fir_
From: |
ngoergen |
Subject: |
[Commit-gnuradio] r7934 - gnuradio/branches/developers/ngoergen/spe_fir_fff |
Date: |
Wed, 5 Mar 2008 10:04:16 -0700 (MST) |
Author: ngoergen
Date: 2008-03-05 10:04:16 -0700 (Wed, 05 Mar 2008)
New Revision: 7934
Added:
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spe.cpp
gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_ccc_ppe.c
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.S
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.h
Modified:
gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spe.cpp
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spe_dma_lock.h
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.S
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S
Log:
ngoergen: spe_fir - finished interleaved complex float SPE fir routine
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile 2008-03-05
16:49:51 UTC (rev 7933)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile 2008-03-05
17:04:16 UTC (rev 7934)
@@ -11,7 +11,7 @@
SPU_AS = spu-as
SPU_CPP = spu-g++
-all: multi_fir_fff_ppe multi_fir_fff64_ppe fir_fff_spe.elf fir_fff64_spe.elf
+all: multi_fir_fff_ppe multi_fir_fff64_ppe multi_fir_ccc_ppe fir_fff_spe.elf
fir_fff64_spe.elf fir_ccc_spe.elf
asm: fir_fff_spe.s
@@ -23,6 +23,9 @@
multi_fir_fff64_ppe: multi_fir_fff64_ppe.c
$(CC) $(CFLAGS) $(CINCS) $(CLIBS) $^ -o $@
+
+multi_fir_ccc_ppe: multi_fir_ccc_ppe.c
+ $(CC) $(CFLAGS) $(CINCS) $(CLIBS) $^ -o $@
%.o: %.cpp
$(SPU_CC) -c $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
@@ -35,5 +38,9 @@
fir_fff64_spe.elf: fir_fff64_spe.o spe_fir_fff64_as.o
$(SPU_CPP) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
+
+fir_ccc_spe.elf: fir_ccc_spe.o spe_fir_ccc_as.o
+ $(SPU_CPP) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
+
clean:
- rm -f multi_fir_fff_ppe fir_fff_spe.elf fir_fff_spe.s
multi_fir_fff64_ppe fir_fff64_spe.elf *.o
+ rm -f multi_fir_fff_ppe fir_fff_spe.elf fir_fff_spe.s
multi_fir_fff64_ppe multi_fir_ccc_ppe fir_fff64_spe.elf fir_ccc_spe.elf *.o
Added: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spe.cpp
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spe.cpp
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spe.cpp
2008-03-05 17:04:16 UTC (rev 7934)
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include "gr_spe_dma_lock.h"
+#include "spe_fir_fff_params.h"
+#include "spe_fir_ccc_as.h"
+
+#define MAX_BUFSIZE (128*100)
+
+//float inputs[MAX_BUFSIZE] __attribute__((aligned(16))) = {1, 2, 1, 2, 1,
2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2};
+//float taps[MAX_BUFSIZE] __attribute__((aligned(16))) = {2.32, -23.6563, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0};
+float inputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+float taps[MAX_BUFSIZE] __attribute__((aligned(16)));
+float outputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+
+
+int main(unsigned long long spe, unsigned long long argp, unsigned long long
envp)
+{
+ int tag = 1;
+ spe_fir_fff_params_t spe_fir_fff_params __attribute__((aligned(16)));
+
+ {
+ gr_spe_dma_lock_in<spe_fir_fff_params_t> argp_lock(
+ argp, &spe_fir_fff_params,
sizeof(spe_fir_fff_params_t), tag);
+ gr_spe_dma_lock_in<__vector float> inputs_lock(
+ spe_fir_fff_params.ea_in1, reinterpret_cast<__vector
float *>(&inputs),
+ spe_fir_fff_params.size * sizeof(float), tag);
+ gr_spe_dma_lock_in<__vector float> taps_lock(
+ spe_fir_fff_params.ea_in2, reinterpret_cast<__vector
float *>(&taps),
+ spe_fir_fff_params.size * sizeof(float), tag);
+ gr_spe_dma_lock_out<__vector float> outputs_lock(
+ spe_fir_fff_params.ea_out, reinterpret_cast<__vector
float *>(&outputs),
+ spe_fir_fff_params.size * sizeof(float), tag);
+ //
+ // spe_fir_fff_params.offset = 0;
+ // spe_fir_fff_params.nsamples = 7;
+ // spe_fir_fff_params.ntaps = 16;
+
+ spe_fir_ccc(
+ reinterpret_cast<__vector float *>(&inputs),
+ reinterpret_cast<__vector float *>(&taps),
+ reinterpret_cast<__vector float *>(&outputs),
+ 0,
+ spe_fir_fff_params.offset,
+ spe_fir_fff_params.nsamples,
+ spe_fir_fff_params.ntaps);
+
+ }
+
+ return 0;
+}
Property changes on:
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spe.cpp
___________________________________________________________________
Name: svn:executable
+ *
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spe.cpp
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spe.cpp
2008-03-05 16:49:51 UTC (rev 7933)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spe.cpp
2008-03-05 17:04:16 UTC (rev 7934)
@@ -1,43 +1,41 @@
#include <stdio.h>
-#include <spu_intrinsics.h>
#include "gr_spe_dma_lock.h"
#include "spe_fir_fff_params.h"
#include "spe_fir_fff64_as.h"
#define MAX_BUFSIZE (64*100)
-double in1_spe[MAX_BUFSIZE] __attribute__((aligned(16)));
-double in2_spe[MAX_BUFSIZE] __attribute__((aligned(16)));
-double out_spe[MAX_BUFSIZE] __attribute__((aligned(16)));
+double inputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+double taps[MAX_BUFSIZE] __attribute__((aligned(16)));
+double outputs[MAX_BUFSIZE] __attribute__((aligned(16)));
-spe_fir_fff_params_t spe_fir_fff_params __attribute__((aligned(16)));
-
int main(unsigned long long spe, unsigned long long argp, unsigned long long
envp)
{
int tag = 1;
- __vector double *vin1 = (__vector double *) in1_spe;
- __vector double *vin2 = (__vector double *) in2_spe;
- __vector double *vout = (__vector double *) out_spe;
+ spe_fir_fff_params_t spe_fir_fff_params __attribute__((aligned(16)));
{
gr_spe_dma_lock_in<spe_fir_fff_params_t> argp_lock(
- argp, &spe_fir_fff_params, sizeof(spe_fir_fff_params_t), tag);
- gr_spe_dma_lock_in<__vector double> vin1_lock(
- spe_fir_fff_params.ea_in1, vin1, spe_fir_fff_params.size *
sizeof(double), tag);
- gr_spe_dma_lock_in<__vector double> vin2_lock(
- spe_fir_fff_params.ea_in2, vin2, spe_fir_fff_params.size *
sizeof(double), tag);
- gr_spe_dma_lock_out<__vector double> out_lock(
- spe_fir_fff_params.ea_out, vout, spe_fir_fff_params.size *
sizeof(double), tag);
+ argp, &spe_fir_fff_params, sizeof(spe_fir_fff_params_t), tag);
+ gr_spe_dma_lock_in<__vector double> inputs_lock(
+ spe_fir_fff_params.ea_in1, reinterpret_cast<__vector
double *>(&inputs),
+ spe_fir_fff_params.size * sizeof(double), tag);
+ gr_spe_dma_lock_in<__vector double> taps_lock(
+ spe_fir_fff_params.ea_in2, reinterpret_cast<__vector
double *>(&taps),
+ spe_fir_fff_params.size * sizeof(double), tag);
+ gr_spe_dma_lock_out<__vector double> outputs_lock(
+ spe_fir_fff_params.ea_out, reinterpret_cast<__vector
double *>(&outputs),
+ spe_fir_fff_params.size * sizeof(double), tag);
+
+ spe_fir_fff64(
+ reinterpret_cast<__vector double *>(&inputs),
+ reinterpret_cast<__vector double *>(&taps),
+ reinterpret_cast<__vector double *>(&outputs),
+ 0,
+ spe_fir_fff_params.offset,
+ spe_fir_fff_params.nsamples,
+ spe_fir_fff_params.ntaps);
- spe_fir_fff64(
- vin1,
- vin2,
- vout,
- 0,
- spe_fir_fff_params.offset,
- spe_fir_fff_params.nsamples,
- spe_fir_fff_params.ntaps);
-
}
return 0;
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
2008-03-05 16:49:51 UTC (rev 7933)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
2008-03-05 17:04:16 UTC (rev 7934)
@@ -1,43 +1,42 @@
#include <stdio.h>
-#include <spu_intrinsics.h>
#include "gr_spe_dma_lock.h"
#include "spe_fir_fff_params.h"
#include "spe_fir_fff_as.h"
#define MAX_BUFSIZE (128*100)
-float in1_spe[MAX_BUFSIZE] __attribute__((aligned(16)));
-float in2_spe[MAX_BUFSIZE] __attribute__((aligned(16)));
-float out_spe[MAX_BUFSIZE] __attribute__((aligned(16)));
+float inputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+float taps[MAX_BUFSIZE] __attribute__((aligned(16)));
+float outputs[MAX_BUFSIZE] __attribute__((aligned(16)));
-spe_fir_fff_params_t spe_fir_fff_params __attribute__((aligned(16)));
int main(unsigned long long spe, unsigned long long argp, unsigned long long
envp)
{
int tag = 1;
- __vector float *vin1 = (__vector float *) in1_spe;
- __vector float *vin2 = (__vector float *) in2_spe;
- __vector float *vout = (__vector float *) out_spe;
+ spe_fir_fff_params_t spe_fir_fff_params __attribute__((aligned(16)));
{
- gr_spe_dma_lock_in<spe_fir_fff_params_t> argp_lock(
- argp, &spe_fir_fff_params, sizeof(spe_fir_fff_params_t), tag);
- gr_spe_dma_lock_in<__vector float> vin1_lock(
- spe_fir_fff_params.ea_in1, vin1, spe_fir_fff_params.size *
sizeof(float), tag);
- gr_spe_dma_lock_in<__vector float> vin2_lock(
- spe_fir_fff_params.ea_in2, vin2, spe_fir_fff_params.size *
sizeof(float), tag);
- gr_spe_dma_lock_out<__vector float> out_lock(
- spe_fir_fff_params.ea_out, vout, spe_fir_fff_params.size *
sizeof(float), tag);
+ gr_spe_dma_lock_in<spe_fir_fff_params_t> argp_lock(
+ argp, &spe_fir_fff_params,
sizeof(spe_fir_fff_params_t), tag);
+ gr_spe_dma_lock_in<__vector float> inputs_lock(
+ spe_fir_fff_params.ea_in1, reinterpret_cast<__vector
float *>(&inputs),
+ spe_fir_fff_params.size * sizeof(float), tag);
+ gr_spe_dma_lock_in<__vector float> taps_lock(
+ spe_fir_fff_params.ea_in2, reinterpret_cast<__vector
float *>(&taps),
+ spe_fir_fff_params.size * sizeof(float), tag);
+ gr_spe_dma_lock_out<__vector float> outputs_lock(
+ spe_fir_fff_params.ea_out, reinterpret_cast<__vector
float *>(&outputs),
+ spe_fir_fff_params.size * sizeof(float), tag);
+
+ spe_fir_fff(
+ reinterpret_cast<__vector float *>(&inputs),
+ reinterpret_cast<__vector float *>(&taps),
+ reinterpret_cast<__vector float *>(&outputs),
+ 0,
+ spe_fir_fff_params.offset,
+ spe_fir_fff_params.nsamples,
+ spe_fir_fff_params.ntaps);
- spe_fir_fff(
- vin1,
- vin2,
- vout,
- 0,
- spe_fir_fff_params.offset,
- spe_fir_fff_params.nsamples,
- spe_fir_fff_params.ntaps);
-
}
return 0;
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spe_dma_lock.h
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spe_dma_lock.h
2008-03-05 16:49:51 UTC (rev 7933)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spe_dma_lock.h
2008-03-05 17:04:16 UTC (rev 7934)
@@ -5,14 +5,13 @@
template <class Tout>
class gr_spe_dma_lock {
-public:
+protected:
gr_spe_dma_lock(unsigned long long in, Tout* target, unsigned int size,
int tag) :
d_in(in), d_target(target), d_size(size), d_tag(tag) {
}
~gr_spe_dma_lock() {};
-protected:
void pull() {
spu_mfcdma64(d_target, mfc_ea2h(d_in), mfc_ea2l(d_in),
d_size, d_tag, MFC_GET_CMD);
Added: gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_ccc_ppe.c
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_ccc_ppe.c
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_ccc_ppe.c
2008-03-05 17:04:16 UTC (rev 7934)
@@ -0,0 +1,198 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <libspe2.h>
+#include <pthread.h>
+#include <spe_fir_fff_params.h>
+
+#define NUM_SPE 1
+#define SIZE (64*1 )
+
+//#define MYMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
+ 9, 10, 11, 12, 13, 14, 15, 16, \
+ 17, 18, 19, 20, 21, 22, 23, 24, \
+ 25, 26, 27, 28, 29, 30, 31, 32, \
+ 33, 34, 35, 36, 37, 38, 39, 40, \
+ 41, 42, 43, 44, 45, 46, 47, 48, \
+ 49, 50, 51, 52, 53, 54, 55, 56, \
+ 57, 58, 59, 60, 61, 62, 63, 64
+
+//#define MYMATRIX 0,0,0,0,0,0,0,0, \
+ 0,0,0,0,0,0,0,0, \
+ 0,0,0,0,0,0,0,0, \
+ 0,0,0,0,0,0,0,0, \
+ 0,0,0,0,0,0,0,0, \
+ 0,0,0,0,0,0,0,0, \
+ 0,0,0,0,0,0,0,0, \
+ 0,0,0,0,1,2,3,4
+
+#define MYMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
+ 9, 10, 1, -1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1
+
+//#define MYMATRIX2 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
+ 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
+ 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
+ 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
+ 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
+ 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
+ 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
+ 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23
+
+//#define MYMATRIX2 -2, -2, -2, -2, -2, -2, -2, -2, \
+ -2, -2, -2, -2, -2, -2, -2, -2, \
+ -2, -2, -2, -2, -2, -2, -2, -2, \
+ -2, -2, -2, -2, -2, -2, -2, -2, \
+ -2, -2, -2, -2, -2, -2, -2, -2, \
+ -2, -2, -2, -2, -2, -2, -2, -2, \
+ -2, -2, -2, -2, -2, -2, -2, -2, \
+ -2, -2, -2, -2, -2, -2, -2, -2
+
+#define MYMATRIX2 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1
+
+
+#define MYMATRIX10 MYMATRIX, MYMATRIX, MYMATRIX, MYMATRIX, MYMATRIX, \
+ MYMATRIX, MYMATRIX, MYMATRIX, MYMATRIX,
MYMATRIX
+
+#define MYMATRIX100 MYMATRIX10, MYMATRIX10, MYMATRIX10, MYMATRIX10,
MYMATRIX10, \
+ MYMATRIX10, MYMATRIX10, MYMATRIX10,
MYMATRIX10, MYMATRIX10
+
+
+#define MYMATRIX210 MYMATRIX2, MYMATRIX2, MYMATRIX2, MYMATRIX2, MYMATRIX2, \
+ MYMATRIX2, MYMATRIX2, MYMATRIX2, MYMATRIX2, MYMATRIX2
+
+#define MYMATRIX2100 MYMATRIX210, MYMATRIX210, MYMATRIX210, MYMATRIX210,
MYMATRIX210, \
+ MYMATRIX210, MYMATRIX210, MYMATRIX210,
MYMATRIX210, MYMATRIX210
+
+#define TESTMATRIX 1, 2, 3, 4, -5, 6, -7, -8, 9, -10, -11.11, -12.22,
13.33, 14.44, 15.55, 16.66
+//#define TESTMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
+ 9, 10, 234, 234, 234, 234, 234, 234
+#define TESTTAPS 2.32, -23.6563, -432.5434, -34.323, 0, 0, 0, 0, 0, 0, 0, 0,0,
0, 0, 0
+
+float in1[16] = {
+ // MYMATRIX10
+ TESTMATRIX
+ };
+float in2[16] __attribute__((aligned(16))) = {
+ // MYMATRIX210
+ TESTTAPS
+} ;
+
+float out[SIZE] __attribute__((aligned(16)));
+
+spe_fir_fff_params_t spe_fir_fff_params[NUM_SPE] __attribute__((aligned(16)));
+
+typedef struct {
+ spe_context_ptr_t spe;
+ spe_fir_fff_params_t *spe_fir_fff_params;
+} thread_arg_t;
+
+void *run_abs_spe(void *thread_arg)
+{
+ int ret;
+ thread_arg_t *arg = (thread_arg_t *) thread_arg;
+ unsigned int entry;
+ spe_stop_info_t stop_info;
+
+ entry = SPE_DEFAULT_ENTRY;
+ ret = spe_context_run(arg->spe, &entry, 0, arg->spe_fir_fff_params, NULL,
&stop_info);
+ if (ret < 0) {
+ perror("spe_context_run");
+ return NULL;
+ }
+
+ return NULL;
+}
+
+int main(int argc, char **argv)
+{
+ int i;
+ int ret;
+
+ spe_program_handle_t *prog;
+ spe_context_ptr_t spe[NUM_SPE];
+ pthread_t thread[NUM_SPE];
+ thread_arg_t arg[NUM_SPE];
+
+ printf("size: spe_fir_ccc_params_t: %02d\n", sizeof(spe_fir_fff_params_t));
+
+ prog = spe_image_open("fir_ccc_spe.elf");
+ if (!prog) {
+ perror("spe_image_open");
+ exit(1);
+ }
+
+ for (i = 0; i < NUM_SPE; ++i) {
+ spe[i] = spe_context_create(0, NULL);
+ if (!spe[i]) {
+ perror("spe_context_create");
+ exit(1);
+ }
+
+ ret = spe_program_load(spe[i], prog);
+ if (ret) {
+ perror("spe_program_load");
+ exit(1);
+ }
+ }
+
+ unsigned int j;
+ int size = SIZE/NUM_SPE;
+
+ for (j = 0; j < 1; ++j) {
+
+
+ for (i = 0; i < NUM_SPE; ++i) {
+ spe_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
+ spe_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
+ spe_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
+ spe_fir_fff_params[i].ntaps = 16;
+ spe_fir_fff_params[i].nsamples = 7;
+ spe_fir_fff_params[i].offset = 0;
+ spe_fir_fff_params[i].size = size;
+
+ arg[i].spe = spe[i];
+ arg[i].spe_fir_fff_params = &spe_fir_fff_params[i];
+
+ ret = pthread_create(&thread[i], NULL, run_abs_spe, &arg[i]);
+ if (ret) {
+ perror("pthread_create");
+ exit(1);
+ }
+ }
+ pthread_join(thread[i], NULL);
+ }
+
+
+ for (i = 0; i < NUM_SPE; ++i) {
+ //pthread_join(thread[i], NULL);
+ ret = spe_context_destroy(spe[i]);
+ if (ret) {
+ perror("spe_context_destroy");
+ exit(1);
+ }
+ }
+
+ ret = spe_image_close(prog);
+ if (ret) {
+ perror("spe_image_close");
+ exit(1);
+ }
+
+ for (i = 0; i < 32; i +=2) {
+ printf("out[%02d]=%f, %fi\n", i>>1, out[i], out[i+1]);
+ }
+
+ return 0;
+}
Property changes on:
gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_ccc_ppe.c
___________________________________________________________________
Name: svn:executable
+ *
Added: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.S
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.S
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.S
2008-03-05 17:04:16 UTC (rev 7934)
@@ -0,0 +1,196 @@
+ .file "fir_ccc_spe.S"
+.text
+ .align 3
+ .global spe_fir_ccc
+ .type spe_fir_ccc, @function
+spe_fir_ccc:
+ ori $32,$5,0 # 0-2
+ lqa $41, spe_fir_ccc_hightapmask # 1-6
expand masks for taps
+ ori $33,$7,0 # 0-2
+ lqa $42, spe_fir_ccc_lowtapmask # 1-6 expand
masks for taps
+ ori $31,$8,0 # 0-2
+ lqa $43, spe_fir_ccc_xormask # 1-6 mask for
inverse of bd
+ lqa $44, spe_fir_ccc_leftexpand # 1-6 mask to
shift bd bc to prefered slot
+ .spe_fir_ccc_start2:
+ xor $30,$30,$30 # 0-2 initilize the
current output vector
+ il $34, 16 # 0-2 shift mask for
output insertion
+
+ .spe_fir_ccc_start1:
+ andi $37,$33,0x0c # 0-2 find index into
masks [0-4]
+ xor $5,$5,$5 # 0-2
+ xor $6,$6,$6 # 0-2
+ shlqbii $38,$37,2 # 1-4 mult by 4
+ xor $7,$7,$7 # 0-2
+ ori $36,$9,0 # 0-2
+ lqd $39,spe_fir_ccc_highshiftmasks($38)
# 1-6 load the right shift mask
+ xor $8,$8,$8 # 0-2
+ a $35, $33, $3 # 0-2
+ lqd $40,spe_fir_ccc_lowshiftmasks($38)
# 1-6 load the right shift mask
+ ori $38,$4,0 # 0-2
+
+ hbra .spe_fir_ccc_inner_loop_branch,
.spe_fir_ccc_inner_loop # inner-loop hint
+ .spe_fir_ccc_inner_loop:
+ lqd $10,0($35) # 1-6
+ lqd $11,16($35) # 1-6
+ lqd $12,32($35) # 1-6
+ ai $36,$36,-4 # 0-2
+ lqd $14,0($38) # 1-6
+ ai $35,$35,32 # 0-2
+ lqd $15,16($38) # 1-6
+ ai $38,$38,32 # 0-2
+
+ # expensive, but needed
+ shufb $16,$14,$14,$42 # 1-4
+ shufb $14,$14,$14,$41 # 1-4
+ shufb $17,$15,$15,$42 # 1-4
+ shufb $15,$15,$15,$41 # 1-4
+
+ shufb $13,$10,$11,$40 # 1-4
+ fma $6, $13, $16, $6 # 0-6
+ shufb $10,$10,$11,$39 # 1-4
+ fma $5, $10, $14, $5 # 0-6
+ shufb $18,$11,$12,$40 # 1-4
+ fma $8, $18, $17, $8 # 0-6
+ shufb $11,$11,$12,$39 # 1-4
+ fma $7, $11, $15, $7 # 0-6
+
+ .spe_fir_ccc_inner_loop_branch:
+ brnz $36,.spe_fir_ccc_inner_loop
+
+ fsmbi $10,0xFF00 # 1-4
+ fa $18,$5,$6 # 0-6
+ hbra .outter_loop_branch,
.spe_fir_ccc_start1 # 1-
+ fa $19,$7,$8 # 0-6
+ hbra .spe_fir_ccc_finish_branch,
.spe_fir_ccc_finish_branch_targ # 1-
+ fa $5,$18,$19 # 0-6
+
+ shufb $6, $5, $5, $44 # 1-4 expand 5 to 6
+ xor $6,$6,$43 # 0-2
+ fa $11, $5, $6 # 0-6
+
+ and $12,$10,$11 # 0-2
+
+ rotqby $11, $12, $34 # 1-4
+ or $30,$11,$30 # 0-2
+ ai $31,$31,-1 # 0-2
+
+ .spe_fir_ccc_finish_branch:
+ brz $31,.spe_fir_ccc_finish4
+ .spe_fir_ccc_finish_branch_targ:
+
+
+ ai $33,$33,8 # 0-2
+ ai $34,$34,-8 # 0-2
+
+ .outter_loop_branch:
+ brnz $34, .spe_fir_ccc_start1
+ hbra .spe_fir_ccc_outter_outter_loop_branch,
.spe_fir_ccc_start2
+# Stores r5 in output
+ .spe_fir_ccc_finish4:
+ stqd $30,0($32)
+ ai $32,$32,16
# increment output pointer by 1 new vector.
+
+ .spe_fir_ccc_outter_outter_loop_branch:
+ brnz $31,.spe_fir_ccc_start2
# start another output vector if needed
+
+ bi $lr
+ .size spe_fir_ccc, .-spe_fir_ccc
+
+.text
+ .global spe_fir_ccc_highshiftmasks
+ .align 4
+ .type spe_fir_ccc_highshiftmasks, @object
+ .size spe_fir_ccc_highshiftmasks, 64
+
+spe_fir_ccc_highshiftmasks:
+ .long 0x00010203
+ .long 0x00010203
+ .long 0x04050607
+ .long 0x04050607
+
+ .long 0x04050607
+ .long 0x04050607
+ .long 0x08090a0b
+ .long 0x08090a0b
+
+ .long 0x08090a0b
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x0c0d0e0f
+
+ .long 0x0c0d0e0f
+ .long 0x0c0d0e0f
+ .long 0x10111213
+ .long 0x10111213
+
+ .global spe_fir_ccc_lowshiftmasks
+ .align 4
+ .type spe_fir_ccc_lowshiftmasks, @object
+ .size spe_fir_ccc_lowshiftmasks, 64
+
+spe_fir_ccc_lowshiftmasks:
+ .long 0x08090a0b
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x0c0d0e0f
+
+ .long 0x0c0d0e0f
+ .long 0x0c0d0e0f
+ .long 0x10111213
+ .long 0x10111213
+
+ .long 0x10111213
+ .long 0x10111213
+ .long 0x14151617
+ .long 0x14151617
+
+ .long 0x14151617
+ .long 0x14151617
+ .long 0x18191a1b
+ .long 0x18191a1b
+
+ .global spe_fir_ccc_hightapmask
+ .align 4
+ .type spe_fir_ccc_hightapmask, @object
+ .size spe_fir_ccc_hightapmask, 16
+
+spe_fir_ccc_hightapmask:
+ .long 0x00010203
+ .long 0x04050607
+ .long 0x04050607
+ .long 0x00010203
+
+ .global spe_fir_ccc_lowtapmask
+ .align 4
+ .type spe_fir_ccc_lowtapmask, @object
+ .size spe_fir_ccc_lowtapmask, 16
+
+spe_fir_ccc_lowtapmask:
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x0c0d0e0f
+ .long 0x08090a0b
+
+ .global spe_fir_ccc_xormask
+ .align 4
+ .type spe_fir_ccc_xormask, @object
+ .size spe_fir_ccc_xormask, 16
+
+spe_fir_ccc_xormask:
+ .long 0x80000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+
+ .global spe_fir_ccc_leftexpand
+ .align 4
+ .type spe_fir_ccc_leftexpand, @object
+ .size spe_fir_ccc_leftexpand, 16
+
+spe_fir_ccc_leftexpand:
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x80808080
+ .long 0x80808080
+
+ .ident "Hand coded Cell SPU assembly"
Added: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.h
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.h
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.h
2008-03-05 17:04:16 UTC (rev 7934)
@@ -0,0 +1,22 @@
+#ifndef SPE_FIR_CCC_AS_H_
+#define SPE_FIR_CCC_AS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void spe_fir_ccc (
+ const __vector float *input,
+ const __vector float *taps,
+ __vector float *output,
+ const __vector float *delayline,
+ const unsigned int offset,
+ const unsigned int nsamples,
+ const unsigned int ntaps
+);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //SPE_FIR_CCC_AS_H_
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.S
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.S
2008-03-05 16:49:51 UTC (rev 7933)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.S
2008-03-05 17:04:16 UTC (rev 7934)
@@ -7,24 +7,24 @@
ori $32,$5,0 # 0-2
ori $33,$7,0 # 0-2
ori $31,$8,0 # 0-2
- .start2:
+ .spe_fir_fff64_start2:
xor $30,$30,$30 # 0-2 initilize the
current output vector
il $34, 16 # 0-2 shift mask for
output insertion
- .start1:
+ .spe_fir_fff64_start1:
andi $37,$33,0x0c # 0-2 find index into
masks [0-4] TODO!!!!!
xor $5,$5,$5 # 0-2
xor $6,$6,$6 # 0-2
shlqbii $38,$37,2 # 1-4 mult by 4
xor $7,$7,$7 # 0-2
ori $36,$9,0 # 0-2
- lqd $39,shiftmasks32($38) # 1-6 load the
right shift mask
+ lqd $39,spe_fir_fff64_shiftmasks32($38)
# 1-6 load the right shift mask
xor $8,$8,$8 # 0-2
a $35, $33, $3 # 0-2
ori $38,$4,0 # 0-2
- hbra .inner_loop_branch, .inner_loop
# inner-loop hint
- .inner_loop:
+ hbra .spe_fir_fff64_inner_loop_branch,
.spe_fir_fff64_inner_loop # inner-loop hint
+ .spe_fir_fff64_inner_loop:
lqd $10,0($35) # 1-6
lqd $11,16($35) # 1-6
lqd $12,32($35) # 1-6
@@ -47,49 +47,49 @@
shufb $13,$13,$18,$39 # 1-4
dfma $8, $13, $17 # 0-6
- .inner_loop_branch:
- brnz $36,.inner_loop
+ .spe_fir_fff64_inner_loop_branch:
+ fsmbi $10,0xFF00 # 1-4
+ brnz $36,.spe_fir_fff64_inner_loop
dfa $18,$5,$6 # 0-6
- hbra .outter_loop_branch, .start1 # 1-
+ hbra .spe_fir_fff64_outter_loop_branch,
.spe_fir_fff64_start1 # 1-
dfa $19,$7,$8 # 0-6
- hbra .finish_branch, .finish_branch_targ
# 1-
+ hbra .spe_fir_fff64_finish_branch,
.spe_fir_fff64_finish_branch_targ # 1-
dfa $5,$18,$19 # 0-6
# accumulate word elements in r5 into first element in r5
ori $6,$5,0 # 0-2
shlqbyi $6,$6,8 # 1-4
dfa $5,$6,$5 # 0-2
- fsmbi $10,0xFF00 # 1-4
and $11,$10,$5 # 0-2
rotqby $12, $11, $34 # 1-4
or $30,$12,$30 # 0-2
ai $31,$31,-1 # 0-2 produce 1 sample
each inner-loop
- .finish_branch:
- brz $31,.finish2
- .finish_branch_targ:
+ .spe_fir_fff64_finish_branch:
+ brz $31,.spe_fir_fff64_finish2
+ .spe_fir_fff64_finish_branch_targ:
ai $33,$33,8 # 0-2 offset into
input data moves 8 bytes each loop
ai $34,$34,-8 # 0-2 shiftmask moves
twice each loop
- .outter_loop_branch:
- brnz $34, .start1
- hbra .outter_outter_loop_branch, .start2
+ .spe_fir_fff64_outter_loop_branch:
+ brnz $34, .spe_fir_fff64_start1
+ hbra
.spe_fir_fff64_outter_outter_loop_branch, .spe_fir_fff64_start2
# Stores r5 in output
- .finish2:
+ .spe_fir_fff64_finish2:
stqd $30,0($32)
ai $32,$32,16
# increment output pointer by 1 new vector.
- .outter_outter_loop_branch:
- brnz $31,.start2 # start
another output vector if needed
+ .spe_fir_fff64_outter_outter_loop_branch:
+ brnz $31,.spe_fir_fff64_start2
# start another output vector if needed
- bi $lr
+ bi $lr
.size spe_fir_fff64, .-spe_fir_fff64
.text
- .global shiftmasks32
+ .global spe_fir_fff64_shiftmasks32
.align 4
- .type shiftmasks32, @object
- .size shiftmasks32, 64
-shiftmasks32:
+ .type spe_fir_fff64_shiftmasks32, @object
+ .size spe_fir_fff64_shiftmasks32, 64
+spe_fir_fff64_shiftmasks32:
.long 0x00010203
.long 0x04050607
.long 0x08090a0b
@@ -110,51 +110,4 @@
.long 0x14151617
.long 0x18191a1b
-# .long 66051
-# .long 67438087
-# .long 134810123
-# .long 202182159
-# .long 67438087
-# .long 134810123
-# .long 202182159
-# .long 269554195
-# .long 134810123
-# .long 202182159
-# .long 269554195
-# .long 336926231
-# .long 202182159
-# .long 269554195
-# .long 336926231
-# .long 404298267
-
-# old code to build stack for above routine
-# stqd $sp,-128($sp)
-# ai $sp,$sp,-128
-# lqd $3,32($sp)
-# hbrp # 1
-# cwd $9,0($sp)
-# shufb $3,$4,$3,$9
-# stqd $3,32($sp)
-# lqd $3,48($sp)
-# cwd $4,0($sp)
-# shufb $3,$4,$3,$4
-# stqd $3,48($sp)
-# lqd $3,64($sp)
-# cwd $4,0($sp)
-# shufb $3,$5,$3,$4
-# stqd $3,64($sp)
-# lqd $3,80($sp)
-# hbrp # 2
-# cwd $4,0($sp)
-# shufb $3,$6,$3,$4
-# stqd $3,80($sp)
-# lqd $3,96($sp)
-# cwd $4,0($sp)
-# shufb $3,$7,$3,$4
-# stqd $3,96($sp)
-# lqd $3,112($sp)
-# cwd $4,0($sp)
-# shufb $3,$8,$3,$4
-# stqd $3,112($sp)
-
.ident "Hand coded Cell SPU assembly"
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S
2008-03-05 16:49:51 UTC (rev 7933)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S
2008-03-05 17:04:16 UTC (rev 7934)
@@ -7,24 +7,24 @@
ori $32,$5,0 # 0-2
ori $33,$7,0 # 0-2
ori $31,$8,0 # 0-2
- .start4:
+ .spe_fir_fff_start4:
xor $30,$30,$30 # 0-2 initilize the
current output vector
il $34, 16 # 0-2 shift mask for
output insertion
- .start1:
+ .spe_fir_fff_start1:
andi $37,$33,0x0c # 0-2 find index into
masks [0-4]
xor $5,$5,$5 # 0-2
xor $6,$6,$6 # 0-2
shlqbii $38,$37,2 # 1-4 mult by 4
xor $7,$7,$7 # 0-2
ori $36,$9,0 # 0-2
- lqd $39,shiftmasks($38) # 1-6 load the
right shift mask
+ lqd $39,spe_fir_fff_shiftmasks($38) # 1-6
load the right shift mask
xor $8,$8,$8 # 0-2
a $35, $33, $3 # 0-2
ori $38,$4,0 # 0-2
- hbra .inner_loop_branch, .inner_loop
# inner-loop hint
- .inner_loop:
+ hbra .spe_fir_fff_inner_loop_branch,
.spe_fir_fff_inner_loop # inner-loop hint
+ .spe_fir_fff_inner_loop:
lqd $10,0($35) # 1-6
lqd $11,16($35) # 1-6
lqd $12,32($35) # 1-6
@@ -47,12 +47,13 @@
shufb $13,$13,$18,$39 # 1-4
fma $8, $13, $17, $8 # 0-6
- .inner_loop_branch:
- brnz $36,.inner_loop
+ .spe_fir_fff_inner_loop_branch:
+ brnz $36,.spe_fir_fff_inner_loop
+ fsmbi $10,0xF000 # 1-4
fa $18,$5,$6 # 0-6
- hbra .outter_loop_branch, .start1 # 1-
+ hbra .spe_fir_fff_outter_loop_branch,
.spe_fir_fff_start1 # 1-
fa $19,$7,$8 # 0-6
- hbra .finish_branch, .finish_branch_targ
# 1-
+ hbra .spe_fir_fff_finish_branch,
.spe_fir_fff_finish_branch_targ # 1-
fa $5,$18,$19 # 0-6
# accumulate word elements in r5 into first element in r5
@@ -63,37 +64,36 @@
fa $5,$6,$5 # 0-2
shlqbyi $6,$6,4 # 1-4
fa $5,$6,$5 # 0-2
- fsmbi $10,0xF000 # 1-4
and $11,$10,$5 # 0-2
rotqby $12, $11, $34 # 1-4
or $30,$12,$30 # 0-2
ai $31,$31,-1 # 0-2
- .finish_branch:
- brz $31,.finish4
- .finish_branch_targ:
+ .spe_fir_fff_finish_branch:
+ brz $31,.spe_fir_fff_finish4
+ .spe_fir_fff_finish_branch_targ:
ai $33,$33,4 # 0-2
ai $34,$34,-4 # 0-2
- .outter_loop_branch:
- brnz $34, .start1
- hbra .outter_outter_loop_branch, .start4
+ .spe_fir_fff_outter_loop_branch:
+ brnz $34, .spe_fir_fff_start1
+ hbra .spe_fir_fff_outter_outter_loop_branch,
.spe_fir_fff_start4
# Stores r5 in output
- .finish4:
+ .spe_fir_fff_finish4:
stqd $30,0($32)
ai $32,$32,16
# increment output pointer by 1 new vector.
- .outter_outter_loop_branch:
- brnz $31,.start4 # start
another output vector if needed
+ .spe_fir_fff_outter_outter_loop_branch:
+ brnz $31,.spe_fir_fff_start4
# start another output vector if needed
bi $lr
.size spe_fir_fff, .-spe_fir_fff
.text
- .global shiftmasks
+ .global spe_fir_fff_shiftmasks
.align 4
- .type shiftmasks, @object
- .size shiftmasks, 64
-shiftmasks:
+ .type spe_fir_fff_shiftmasks, @object
+ .size spe_fir_fff_shiftmasks, 64
+spe_fir_fff_shiftmasks:
.long 0x00010203
.long 0x04050607
.long 0x08090a0b
@@ -114,51 +114,4 @@
.long 0x14151617
.long 0x18191a1b
-# .long 66051
-# .long 67438087
-# .long 134810123
-# .long 202182159
-# .long 67438087
-# .long 134810123
-# .long 202182159
-# .long 269554195
-# .long 134810123
-# .long 202182159
-# .long 269554195
-# .long 336926231
-# .long 202182159
-# .long 269554195
-# .long 336926231
-# .long 404298267
-
-# old code to build stack for above routine
-# stqd $sp,-128($sp)
-# ai $sp,$sp,-128
-# lqd $3,32($sp)
-# hbrp # 1
-# cwd $9,0($sp)
-# shufb $3,$4,$3,$9
-# stqd $3,32($sp)
-# lqd $3,48($sp)
-# cwd $4,0($sp)
-# shufb $3,$4,$3,$4
-# stqd $3,48($sp)
-# lqd $3,64($sp)
-# cwd $4,0($sp)
-# shufb $3,$5,$3,$4
-# stqd $3,64($sp)
-# lqd $3,80($sp)
-# hbrp # 2
-# cwd $4,0($sp)
-# shufb $3,$6,$3,$4
-# stqd $3,80($sp)
-# lqd $3,96($sp)
-# cwd $4,0($sp)
-# shufb $3,$7,$3,$4
-# stqd $3,96($sp)
-# lqd $3,112($sp)
-# cwd $4,0($sp)
-# shufb $3,$8,$3,$4
-# stqd $3,112($sp)
-
.ident "Hand coded Cell SPU assembly"
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Commit-gnuradio] r7934 - gnuradio/branches/developers/ngoergen/spe_fir_fff,
ngoergen <=