[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] r8118 - gnuradio/branches/developers/ngoergen/spe_fir_
From: |
ngoergen |
Subject: |
[Commit-gnuradio] r8118 - gnuradio/branches/developers/ngoergen/spe_fir_fff |
Date: |
Thu, 27 Mar 2008 15:45:07 -0600 (MDT) |
Author: ngoergen
Date: 2008-03-27 15:45:06 -0600 (Thu, 27 Mar 2008)
New Revision: 8118
Added:
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spu.cc
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spu.cc
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spu.cc
gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spu_dma_lock.h
gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.S
gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.h
gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.S
gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.h
gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.S
gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.h
gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_params.h
Removed:
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spe.cpp
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spe.cpp
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spe_dma_lock.h
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.S
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.h
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.S
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.h
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.h
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_params.h
gnuradio/branches/developers/ngoergen/spe_fir_fff/src/
Modified:
gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile
gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_ccc_ppe.c
gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff64_ppe.c
gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
Log:
spe_fir_xxx: New scheduling after static timing analysis of _ccc and _fff.
Maximum dual issue with least stalls. Decrementer time analysis. Rename of
files.
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile 2008-03-26
21:21:52 UTC (rev 8117)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile 2008-03-27
21:45:06 UTC (rev 8118)
@@ -4,19 +4,23 @@
CFLAGS = -Wall
SPU_CC = spu-gcc
-SPU_CINCS = -I.
+SPU_CINCS = -I. -I /usr/lib/gcc/spu/4.1.1/include
SPU_CLIBS =
SPU_CFLAGS = -Wall
SPU_AS = spu-as
SPU_CPP = spu-g++
-all: multi_fir_fff_ppe multi_fir_fff64_ppe multi_fir_ccc_ppe fir_fff_spe.elf
fir_fff64_spe.elf fir_ccc_spe.elf
+OUTPUTS = multi_fir_fff_ppe \
+ multi_fir_fff64_ppe \
+ multi_fir_ccc_ppe \
+ fir_fff_spu.elf \
+ fir_fff64_spu.elf \
+ fir_ccc_spu.elf
-asm: fir_fff_spe.s
+all: $(OUTPUTS)
-fir_fff_spe.s: fir_fff_spe.cpp
- $(SPU_CC) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) -S $^
+time: spu_fir_fff_as.time spu_fir_fff64_as.time spu_fir_ccc_as.time
multi_fir_fff_ppe: multi_fir_fff_ppe.c
$(CC) $(CFLAGS) $(CINCS) $(CLIBS) $^ -o $@
@@ -27,20 +31,21 @@
multi_fir_ccc_ppe: multi_fir_ccc_ppe.c
$(CC) $(CFLAGS) $(CINCS) $(CLIBS) $^ -o $@
-%.o: %.cpp
+%.o: %.cc
$(SPU_CC) -c $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
%.o: %.S
$(SPU_AS) -o $@ $^
-fir_fff_spe.elf: fir_fff_spe.o spe_fir_fff_as.o
+%.time: %.S
+ /opt/cell/sdk/usr/bin/spu_timing -o $@ $^
+
+fir_fff_spu.elf: fir_fff_spu.o spu_fir_fff_as.o
$(SPU_CPP) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
-
-fir_fff64_spe.elf: fir_fff64_spe.o spe_fir_fff64_as.o
+fir_fff64_spu.elf: fir_fff64_spu.o spu_fir_fff64_as.o
$(SPU_CPP) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
-
-fir_ccc_spe.elf: fir_ccc_spe.o spe_fir_ccc_as.o
+fir_ccc_spu.elf: fir_ccc_spu.o spu_fir_ccc_as.o
$(SPU_CPP) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
clean:
- rm -f multi_fir_fff_ppe fir_fff_spe.elf fir_fff_spe.s
multi_fir_fff64_ppe multi_fir_ccc_ppe fir_fff64_spe.elf fir_ccc_spe.elf *.o
+ rm -f $(OUTPUTS) *.o *.time
Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spe.cpp
Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spu.cc (from
rev 8117, gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spe.cpp)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spu.cc
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_ccc_spu.cc
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,76 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include "gr_spu_dma_lock.h"
+#include "spu_fir_fff_params.h"
+#include "spu_fir_ccc_as.h"
+#include <spu_mfcio.h>
+
+#define MAX_BUFSIZE (128*100)
+
+//float inputs[MAX_BUFSIZE] __attribute__((aligned(16))) = {1, 2, 1, 2, 1,
2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2};
+//float taps[MAX_BUFSIZE] __attribute__((aligned(16))) = {2.32, -23.6563, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0};
+float inputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+float taps[MAX_BUFSIZE] __attribute__((aligned(16)));
+float outputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+
+int main(unsigned long long spu, unsigned long long argp,
+ unsigned long long envp)
+{
+ int tag = 1;
+ spu_fir_fff_params_t spu_fir_fff_params __attribute__((aligned(16)));
+
+ {
+ // NOTE: spu_fir_fff_params_t can be an _IN only param,
+ // it is sent back now for the time value only.
+ gr_spu_dma_lock_inout<spu_fir_fff_params_t> argp_lock(argp,
+ &spu_fir_fff_params, sizeof(spu_fir_fff_params_t), tag);
+ gr_spu_dma_lock_in<__vector float> inputs_lock(spu_fir_fff_params.ea_in1,
+ reinterpret_cast<__vector float *>(&inputs), spu_fir_fff_params.size
+ * sizeof(float), tag);
+ gr_spu_dma_lock_in<__vector float> taps_lock(spu_fir_fff_params.ea_in2,
+ reinterpret_cast<__vector float *>(&taps), spu_fir_fff_params.size
+ * sizeof(float), tag);
+ gr_spu_dma_lock_out<__vector float> outputs_lock(spu_fir_fff_params.ea_out,
+ reinterpret_cast<__vector float *>(&outputs), spu_fir_fff_params.size
+ * sizeof(float), tag);
+ //
+ // spu_fir_fff_params.offset = 0;
+ // spu_fir_fff_params.nsamples = 7;
+ // spu_fir_fff_params.ntaps = 16;
+
+ spu_write_decrementer(~0);
+ int start(spu_read_decrementer());
+
+ spu_fir_ccc( reinterpret_cast<__vector float *>(&inputs),
+ reinterpret_cast<__vector float *>(&taps),
+ reinterpret_cast<__vector float *>(&outputs), 0,
+ spu_fir_fff_params.offset, spu_fir_fff_params.nsamples,
+ spu_fir_fff_params.ntaps);
+
+ spu_fir_fff_params.pad[0] = start - spu_read_decrementer();
+
+ }
+
+ return 0;
+}
Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spe.cpp
Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spu.cc
(from rev 8117,
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spe.cpp)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spu.cc
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff64_spu.cc
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,63 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include "gr_spu_dma_lock.h"
+#include "spu_fir_fff_params.h"
+#include "spu_fir_fff64_as.h"
+
+#define MAX_BUFSIZE (64*100)
+
+double inputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+double taps[MAX_BUFSIZE] __attribute__((aligned(16)));
+double outputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+
+int main(unsigned long long spu, unsigned long long argp,
+ unsigned long long envp)
+{
+ int tag = 1;
+ spu_fir_fff_params_t spu_fir_fff_params __attribute__((aligned(16)));
+
+ {
+ gr_spu_dma_lock_in<spu_fir_fff_params_t> argp_lock(argp,
+ &spu_fir_fff_params, sizeof(spu_fir_fff_params_t), tag);
+ gr_spu_dma_lock_in<__vector double> inputs_lock(spu_fir_fff_params.ea_in1,
+ reinterpret_cast<__vector double *>(&inputs), spu_fir_fff_params.size
+ * sizeof(double), tag);
+ gr_spu_dma_lock_in<__vector double> taps_lock(spu_fir_fff_params.ea_in2,
+ reinterpret_cast<__vector double *>(&taps), spu_fir_fff_params.size
+ * sizeof(double), tag);
+ gr_spu_dma_lock_out<__vector double> outputs_lock(
+ spu_fir_fff_params.ea_out,
+ reinterpret_cast<__vector double *>(&outputs), spu_fir_fff_params.size
+ * sizeof(double), tag);
+
+ spu_fir_fff64( reinterpret_cast<__vector double *>(&inputs),
+ reinterpret_cast<__vector double *>(&taps),
+ reinterpret_cast<__vector double *>(&outputs), 0,
+ spu_fir_fff_params.offset, spu_fir_fff_params.nsamples,
+ spu_fir_fff_params.ntaps);
+
+ }
+
+ return 0;
+}
Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spu.cc (from
rev 8117, gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spu.cc
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spu.cc
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,70 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#include <stdio.h>
+#include "gr_spu_dma_lock.h"
+#include "spu_fir_fff_params.h"
+#include "spu_fir_fff_as.h"
+#include <spu_mfcio.h>
+
+#define MAX_BUFSIZE (128*100)
+
+float inputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+float taps[MAX_BUFSIZE] __attribute__((aligned(16)));
+float outputs[MAX_BUFSIZE] __attribute__((aligned(16)));
+
+int main(unsigned long long spu, unsigned long long argp,
+ unsigned long long envp)
+{
+ int tag = 1;
+ spu_fir_fff_params_t spu_fir_fff_params __attribute__((aligned(16)));
+
+ {
+ // NOTE: spu_fir_fff_params_t can be an _IN only param,
+ // it is sent back now for the time value only.
+ gr_spu_dma_lock_inout<spu_fir_fff_params_t> argp_lock(argp,
+ &spu_fir_fff_params, sizeof(spu_fir_fff_params_t), tag);
+ gr_spu_dma_lock_in<__vector float> inputs_lock(spu_fir_fff_params.ea_in1,
+ reinterpret_cast<__vector float *>(&inputs), spu_fir_fff_params.size
+ * sizeof(float), tag);
+ gr_spu_dma_lock_in<__vector float> taps_lock(spu_fir_fff_params.ea_in2,
+ reinterpret_cast<__vector float *>(&taps), spu_fir_fff_params.size
+ * sizeof(float), tag);
+ gr_spu_dma_lock_out<__vector float> outputs_lock(spu_fir_fff_params.ea_out,
+ reinterpret_cast<__vector float *>(&outputs), spu_fir_fff_params.size
+ * sizeof(float), tag);
+
+ spu_write_decrementer(~0);
+ int start(spu_read_decrementer());
+
+ spu_fir_fff( reinterpret_cast<__vector float *>(&inputs),
+ reinterpret_cast<__vector float *>(&taps),
+ reinterpret_cast<__vector float *>(&outputs), 0,
+ spu_fir_fff_params.offset, spu_fir_fff_params.nsamples,
+ spu_fir_fff_params.ntaps);
+
+ spu_fir_fff_params.pad[0] = start - spu_read_decrementer();
+
+ }
+
+ return 0;
+}
Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spe_dma_lock.h
Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spu_dma_lock.h
(from rev 8117,
gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spe_dma_lock.h)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spu_dma_lock.h
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/gr_spu_dma_lock.h
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,97 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef GR_SPU_DMA_LOCK_H_
+#define GR_SPU_DMA_LOCK_H_
+
+#include <spu_mfcio.h>
+
+template <class Tout>
+class gr_spu_dma_lock {
+protected:
+ gr_spu_dma_lock(unsigned long long in, Tout* target, unsigned int size,
int tag) :
+ d_in(in), d_target(target), d_size(size), d_tag(tag) {
+ }
+
+ ~gr_spu_dma_lock() {};
+
+ void pull() {
+ spu_mfcdma64(d_target, mfc_ea2h(d_in), mfc_ea2l(d_in),
+ d_size, d_tag, MFC_GET_CMD);
+ spu_writech(MFC_WrTagMask, 1 << d_tag);
+ spu_mfcstat(MFC_TAG_UPDATE_ALL);
+ }
+
+ void push() {
+ spu_mfcdma64(d_target, mfc_ea2h(d_in), mfc_ea2l(d_in), d_size,
d_tag, MFC_PUT_CMD);
+ spu_writech(MFC_WrTagMask, 1 << d_tag);
+ spu_mfcstat(MFC_TAG_UPDATE_ALL);
+ }
+
+private:
+ unsigned long long d_in;
+ Tout* d_target;
+ unsigned int d_size;
+ int d_tag;
+};
+
+template <class Tout>
+class gr_spu_dma_lock_in : gr_spu_dma_lock<Tout> {
+public:
+ gr_spu_dma_lock_in(unsigned long long in, Tout* target, unsigned int
size, int tag) :
+ gr_spu_dma_lock<Tout>(in, target, size, tag) {
+ pull();
+ }
+
+ void pull() { gr_spu_dma_lock<Tout>::pull(); }
+
+ ~gr_spu_dma_lock_in() {}
+};
+
+template <class Tout>
+class gr_spu_dma_lock_out : gr_spu_dma_lock<Tout> {
+public:
+ gr_spu_dma_lock_out(unsigned long long in, Tout* target, unsigned int
size, int tag) :
+ gr_spu_dma_lock<Tout>(in, target, size, tag) {
+ }
+
+ void push() { gr_spu_dma_lock<Tout>::push(); }
+
+ ~gr_spu_dma_lock_out() { push(); }
+};
+
+template <class Tout>
+class gr_spu_dma_lock_inout : gr_spu_dma_lock<Tout> {
+public:
+ gr_spu_dma_lock_inout(unsigned long long in, Tout* target, unsigned int
size, int tag) :
+ gr_spu_dma_lock<Tout>(in, target, size, tag) {
+ pull();
+ }
+
+ void push() { gr_spu_dma_lock<Tout>::push(); }
+
+ void pull() { gr_spu_dma_lock<Tout>::pull(); }
+
+ ~gr_spu_dma_lock_inout() { push(); }
+};
+
+#endif /*GR_SPU_DMA_LOCK_H_*/
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_ccc_ppe.c
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_ccc_ppe.c
2008-03-26 21:21:52 UTC (rev 8117)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_ccc_ppe.c
2008-03-27 21:45:06 UTC (rev 8118)
@@ -24,29 +24,11 @@
#include <stdlib.h>
#include <libspe2.h>
#include <pthread.h>
-#include <spe_fir_fff_params.h>
+#include <spu_fir_fff_params.h>
-#define NUM_SPE 1
+#define NUM_SPU 1
#define SIZE (64*1 )
-//#define MYMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
- 9, 10, 11, 12, 13, 14, 15, 16, \
- 17, 18, 19, 20, 21, 22, 23, 24, \
- 25, 26, 27, 28, 29, 30, 31, 32, \
- 33, 34, 35, 36, 37, 38, 39, 40, \
- 41, 42, 43, 44, 45, 46, 47, 48, \
- 49, 50, 51, 52, 53, 54, 55, 56, \
- 57, 58, 59, 60, 61, 62, 63, 64
-
-//#define MYMATRIX 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,1,2,3,4
-
#define MYMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
9, 10, 1, -1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
@@ -56,24 +38,6 @@
1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1
-//#define MYMATRIX2 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23
-
-//#define MYMATRIX2 -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2
-
#define MYMATRIX2 1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
@@ -96,8 +60,9 @@
MYMATRIX210, MYMATRIX210, MYMATRIX210,
MYMATRIX210, MYMATRIX210
#define TESTMATRIX 1, 2, 3, 4, -5, 6, -7, -8, 9, -10, -11.11, -12.22,
13.33, 14.44, 15.55, 16.66
-//#define TESTMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
- 9, 10, 234, 234, 234, 234, 234, 234
+
+//#define TESTMATRIX 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 234, 234,
234, 234, 234, 234
+
#define TESTTAPS 2.32, -23.6563, -432.5434, -34.323, 0, 0, 0, 0, 0, 0, 0, 0,0,
0, 0, 0
float in1[16] =
@@ -113,15 +78,15 @@
float out[SIZE] __attribute__((aligned(16)));
-spe_fir_fff_params_t spe_fir_fff_params[NUM_SPE] __attribute__((aligned(16)));
+spu_fir_fff_params_t spu_fir_fff_params[NUM_SPU] __attribute__((aligned(16)));
typedef struct
{
- spe_context_ptr_t spe;
- spe_fir_fff_params_t *spe_fir_fff_params;
+ spe_context_ptr_t spu;
+ spu_fir_fff_params_t *spu_fir_fff_params;
} thread_arg_t;
-void *run_abs_spe(void *thread_arg)
+void *run_abs_spu(void *thread_arg)
{
int ret;
thread_arg_t *arg = (thread_arg_t *) thread_arg;
@@ -129,10 +94,10 @@
spe_stop_info_t stop_info;
entry = SPE_DEFAULT_ENTRY;
- ret = spe_context_run(arg->spe, &entry, 0, arg->spe_fir_fff_params, NULL,
+ ret = spe_context_run(arg->spu, &entry, 0, arg->spu_fir_fff_params, NULL,
&stop_info);
if (ret < 0) {
- perror("spe_context_run");
+ perror("spu_context_run");
return NULL;
}
@@ -145,50 +110,50 @@
int ret;
spe_program_handle_t *prog;
- spe_context_ptr_t spe[NUM_SPE];
- pthread_t thread[NUM_SPE];
- thread_arg_t arg[NUM_SPE];
+ spe_context_ptr_t spu[NUM_SPU];
+ pthread_t thread[NUM_SPU];
+ thread_arg_t arg[NUM_SPU];
- printf("size: spe_fir_ccc_params_t: %02d\n", sizeof(spe_fir_fff_params_t));
+ printf("size: spu_fir_ccc_params_t: %02d\n", sizeof(spu_fir_fff_params_t));
- prog = spe_image_open("fir_ccc_spe.elf");
+ prog = spe_image_open("fir_ccc_spu.elf");
if (!prog) {
- perror("spe_image_open");
+ perror("spu_image_open");
exit(1);
}
- for (i = 0; i < NUM_SPE; ++i) {
- spe[i] = spe_context_create(0, NULL);
- if (!spe[i]) {
- perror("spe_context_create");
+ for (i = 0; i < NUM_SPU; ++i) {
+ spu[i] = spe_context_create(0, NULL);
+ if (!spu[i]) {
+ perror("spu_context_create");
exit(1);
}
- ret = spe_program_load(spe[i], prog);
+ ret = spe_program_load(spu[i], prog);
if (ret) {
- perror("spe_program_load");
+ perror("spu_program_load");
exit(1);
}
}
unsigned int j;
- int size= SIZE/NUM_SPE;
+ int size= SIZE/NUM_SPU;
for (j = 0; j < 1; ++j) {
- for (i = 0; i < NUM_SPE; ++i) {
- spe_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
- spe_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
- spe_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
- spe_fir_fff_params[i].ntaps = 16;
- spe_fir_fff_params[i].nsamples = 7;
- spe_fir_fff_params[i].offset = 0;
- spe_fir_fff_params[i].size = size;
+ for (i = 0; i < NUM_SPU; ++i) {
+ spu_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
+ spu_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
+ spu_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
+ spu_fir_fff_params[i].ntaps = 16;
+ spu_fir_fff_params[i].nsamples = 7;
+ spu_fir_fff_params[i].offset = 0;
+ spu_fir_fff_params[i].size = size;
- arg[i].spe = spe[i];
- arg[i].spe_fir_fff_params = &spe_fir_fff_params[i];
+ arg[i].spu = spu[i];
+ arg[i].spu_fir_fff_params = &spu_fir_fff_params[i];
- ret = pthread_create(&thread[i], NULL, run_abs_spe, &arg[i]);
+ ret = pthread_create(&thread[i], NULL, run_abs_spu, &arg[i]);
if (ret) {
perror("pthread_create");
exit(1);
@@ -197,24 +162,25 @@
pthread_join(thread[i], NULL);
}
- for (i = 0; i < NUM_SPE; ++i) {
+ for (i = 0; i < NUM_SPU; ++i) {
//pthread_join(thread[i], NULL);
- ret = spe_context_destroy(spe[i]);
+ ret = spe_context_destroy(spu[i]);
if (ret) {
- perror("spe_context_destroy");
+ perror("spu_context_destroy");
exit(1);
}
}
ret = spe_image_close(prog);
if (ret) {
- perror("spe_image_close");
+ perror("spu_image_close");
exit(1);
}
for (i = 0; i < 32; i +=2) {
printf("out[%02d]=%f, %fi\n", i>>1, out[i], out[i+1]);
}
+ printf("time result=%d\n", spu_fir_fff_params[0].pad[0]);
return 0;
}
Modified:
gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff64_ppe.c
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff64_ppe.c
2008-03-26 21:21:52 UTC (rev 8117)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff64_ppe.c
2008-03-27 21:45:06 UTC (rev 8118)
@@ -24,29 +24,11 @@
#include <stdlib.h>
#include <libspe2.h>
#include <pthread.h>
-#include <spe_fir_fff_params.h>
+#include <spu_fir_fff_params.h>
-#define NUM_SPE 1
+#define NUM_SPU 1
#define SIZE (64*1 )
-//#define MYMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
- 9, 10, 11, 12, 13, 14, 15, 16, \
- 17, 18, 19, 20, 21, 22, 23, 24, \
- 25, 26, 27, 28, 29, 30, 31, 32, \
- 33, 34, 35, 36, 37, 38, 39, 40, \
- 41, 42, 43, 44, 45, 46, 47, 48, \
- 49, 50, 51, 52, 53, 54, 55, 56, \
- 57, 58, 59, 60, 61, 62, 63, 64
-
-//#define MYMATRIX 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,1,2,3,4
-
#define MYMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
9, 10, 1, -1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
@@ -56,24 +38,6 @@
1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1
-//#define MYMATRIX2 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23
-
-//#define MYMATRIX2 -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2
-
#define MYMATRIX2 1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
@@ -94,9 +58,11 @@
#define MYMATRIX2100 MYMATRIX210, MYMATRIX210, MYMATRIX210, MYMATRIX210,
MYMATRIX210, \
MYMATRIX210, MYMATRIX210, MYMATRIX210,
MYMATRIX210, MYMATRIX210
+
#define TESTMATRIX 234, -4, 23, -56, 45, 98, -23, -7, 0, 0, 0, 0, 0,
0, 0, 0
-//#define TESTMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
- 9, 10, 234, 234, 234, 234, 234, 234
+
+//#define TESTMATRIX 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 234, 234,
234, 234, 234, 234
+
#define TESTTAPS 5, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
double in1[16] =
@@ -111,15 +77,15 @@
double out[SIZE] __attribute__((aligned(16)));
-spe_fir_fff_params_t spe_fir_fff_params[NUM_SPE] __attribute__((aligned(16)));
+spu_fir_fff_params_t spu_fir_fff_params[NUM_SPU] __attribute__((aligned(16)));
typedef struct
{
- spe_context_ptr_t spe;
- spe_fir_fff_params_t *spe_fir_fff_params;
+ spe_context_ptr_t spu;
+ spu_fir_fff_params_t *spu_fir_fff_params;
} thread_arg_t;
-void *run_abs_spe(void *thread_arg)
+void *run_abs_spu(void *thread_arg)
{
int ret;
thread_arg_t *arg = (thread_arg_t *) thread_arg;
@@ -127,10 +93,10 @@
spe_stop_info_t stop_info;
entry = SPE_DEFAULT_ENTRY;
- ret = spe_context_run(arg->spe, &entry, 0, arg->spe_fir_fff_params, NULL,
+ ret = spe_context_run(arg->spu, &entry, 0, arg->spu_fir_fff_params, NULL,
&stop_info);
if (ret < 0) {
- perror("spe_context_run");
+ perror("spu_context_run");
return NULL;
}
@@ -143,50 +109,50 @@
int ret;
spe_program_handle_t *prog;
- spe_context_ptr_t spe[NUM_SPE];
- pthread_t thread[NUM_SPE];
- thread_arg_t arg[NUM_SPE];
+ spe_context_ptr_t spu[NUM_SPU];
+ pthread_t thread[NUM_SPU];
+ thread_arg_t arg[NUM_SPU];
- printf("size: spe_fir_fff_params_t: %02d\n", sizeof(spe_fir_fff_params_t));
+ printf("size: spu_fir_fff_params_t: %02d\n", sizeof(spu_fir_fff_params_t));
- prog = spe_image_open("fir_fff64_spe.elf");
+ prog = spe_image_open("fir_fff64_spu.elf");
if (!prog) {
- perror("spe_image_open");
+ perror("spu_image_open");
exit(1);
}
- for (i = 0; i < NUM_SPE; ++i) {
- spe[i] = spe_context_create(0, NULL);
- if (!spe[i]) {
- perror("spe_context_create");
+ for (i = 0; i < NUM_SPU; ++i) {
+ spu[i] = spe_context_create(0, NULL);
+ if (!spu[i]) {
+ perror("spu_context_create");
exit(1);
}
- ret = spe_program_load(spe[i], prog);
+ ret = spe_program_load(spu[i], prog);
if (ret) {
- perror("spe_program_load");
+ perror("spu_program_load");
exit(1);
}
}
unsigned int j;
- int size= SIZE/NUM_SPE;
+ int size= SIZE/NUM_SPU;
for (j = 0; j < 1; ++j) {
- for (i = 0; i < NUM_SPE; ++i) {
- spe_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
- spe_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
- spe_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
- spe_fir_fff_params[i].ntaps = 16;
- spe_fir_fff_params[i].nsamples = 7;
- spe_fir_fff_params[i].offset = 0;
- spe_fir_fff_params[i].size = size;
+ for (i = 0; i < NUM_SPU; ++i) {
+ spu_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
+ spu_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
+ spu_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
+ spu_fir_fff_params[i].ntaps = 16;
+ spu_fir_fff_params[i].nsamples = 7;
+ spu_fir_fff_params[i].offset = 0;
+ spu_fir_fff_params[i].size = size;
- arg[i].spe = spe[i];
- arg[i].spe_fir_fff_params = &spe_fir_fff_params[i];
+ arg[i].spu = spu[i];
+ arg[i].spu_fir_fff_params = &spu_fir_fff_params[i];
- ret = pthread_create(&thread[i], NULL, run_abs_spe, &arg[i]);
+ ret = pthread_create(&thread[i], NULL, run_abs_spu, &arg[i]);
if (ret) {
perror("pthread_create");
exit(1);
@@ -195,18 +161,18 @@
pthread_join(thread[i], NULL);
}
- for (i = 0; i < NUM_SPE; ++i) {
+ for (i = 0; i < NUM_SPU; ++i) {
//pthread_join(thread[i], NULL);
- ret = spe_context_destroy(spe[i]);
+ ret = spe_context_destroy(spu[i]);
if (ret) {
- perror("spe_context_destroy");
+ perror("spu_context_destroy");
exit(1);
}
}
ret = spe_image_close(prog);
if (ret) {
- perror("spe_image_close");
+ perror("spu_image_close");
exit(1);
}
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
2008-03-26 21:21:52 UTC (rev 8117)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
2008-03-27 21:45:06 UTC (rev 8118)
@@ -24,29 +24,11 @@
#include <stdlib.h>
#include <libspe2.h>
#include <pthread.h>
-#include <spe_fir_fff_params.h>
+#include <spu_fir_fff_params.h>
-#define NUM_SPE 1
+#define NUM_SPU 1
#define SIZE (64*1 )
-//#define MYMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
- 9, 10, 11, 12, 13, 14, 15, 16, \
- 17, 18, 19, 20, 21, 22, 23, 24, \
- 25, 26, 27, 28, 29, 30, 31, 32, \
- 33, 34, 35, 36, 37, 38, 39, 40, \
- 41, 42, 43, 44, 45, 46, 47, 48, \
- 49, 50, 51, 52, 53, 54, 55, 56, \
- 57, 58, 59, 60, 61, 62, 63, 64
-
-//#define MYMATRIX 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,0,0,0,0, \
- 0,0,0,0,1,2,3,4
-
#define MYMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
9, 10, 1, -1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
@@ -56,24 +38,6 @@
1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1
-//#define MYMATRIX2 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
- 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23
-
-//#define MYMATRIX2 -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2, \
- -2, -2, -2, -2, -2, -2, -2, -2
-
#define MYMATRIX2 1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
@@ -94,12 +58,18 @@
#define MYMATRIX2100 MYMATRIX210, MYMATRIX210, MYMATRIX210, MYMATRIX210,
MYMATRIX210, \
MYMATRIX210, MYMATRIX210, MYMATRIX210,
MYMATRIX210, MYMATRIX210
-#define TESTMATRIX 234, -4, 23, -56, 45, 98, -23, -7, 0, 0, 0, 0, 0,
0, 0, 0
-//#define TESTMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
- 9, 10, 234, 234, 234, 234, 234, 234
-#define TESTTAPS 5, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-float in1[16] =
+//#define TESTMATRIX 234, -4, 23, -56, 45, 98, -23, -7, 0, 0, 0, 0,
0, 0, 0, 0
+
+#define TESTMATRIX -43.34, -23.4, 3.23, 5.66, 754.564, 345.23,
-23.34, -65.45, \
+ -34.65, 32.3, 23.454, 3456.334, 34.234, 34.65,
-765.56, 23.87, \
+ -23.54, 3.65, -234.2, -223.234, -3.0, -12.34,
-23.53, 211.231, \
+ -23.214, 645.45, 23.34, -2.45, -345.23, 12.453,
-23.45, 234.645, \
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+
+#define TESTTAPS 234, -4, 23, -56, 45, 98, -23, -7, 0, 0, 0, 0, 0, 0,
0, 0
+
+float in1[48] =
{
// MYMATRIX10
TESTMATRIX};
@@ -112,15 +82,15 @@
float out[SIZE] __attribute__((aligned(16)));
-spe_fir_fff_params_t spe_fir_fff_params[NUM_SPE] __attribute__((aligned(16)));
+spu_fir_fff_params_t spu_fir_fff_params[NUM_SPU] __attribute__((aligned(16)));
typedef struct
{
- spe_context_ptr_t spe;
- spe_fir_fff_params_t *spe_fir_fff_params;
+ spe_context_ptr_t spu;
+ spu_fir_fff_params_t *spu_fir_fff_params;
} thread_arg_t;
-void *run_abs_spe(void *thread_arg)
+void *run_abs_spu(void *thread_arg)
{
int ret;
thread_arg_t *arg = (thread_arg_t *) thread_arg;
@@ -128,10 +98,10 @@
spe_stop_info_t stop_info;
entry = SPE_DEFAULT_ENTRY;
- ret = spe_context_run(arg->spe, &entry, 0, arg->spe_fir_fff_params, NULL,
+ ret = spe_context_run(arg->spu, &entry, 0, arg->spu_fir_fff_params, NULL,
&stop_info);
if (ret < 0) {
- perror("spe_context_run");
+ perror("spu_context_run");
return NULL;
}
@@ -144,50 +114,50 @@
int ret;
spe_program_handle_t *prog;
- spe_context_ptr_t spe[NUM_SPE];
- pthread_t thread[NUM_SPE];
- thread_arg_t arg[NUM_SPE];
+ spe_context_ptr_t spu[NUM_SPU];
+ pthread_t thread[NUM_SPU];
+ thread_arg_t arg[NUM_SPU];
- printf("size: spe_fir_fff_params_t: %02d\n", sizeof(spe_fir_fff_params_t));
+ printf("size: spu_fir_fff_params_t: %02d\n", sizeof(spu_fir_fff_params_t));
- prog = spe_image_open("fir_fff_spe.elf");
+ prog = spe_image_open("fir_fff_spu.elf");
if (!prog) {
- perror("spe_image_open");
+ perror("spu_image_open");
exit(1);
}
- for (i = 0; i < NUM_SPE; ++i) {
- spe[i] = spe_context_create(0, NULL);
- if (!spe[i]) {
- perror("spe_context_create");
+ for (i = 0; i < NUM_SPU; ++i) {
+ spu[i] = spe_context_create(0, NULL);
+ if (!spu[i]) {
+ perror("spu_context_create");
exit(1);
}
- ret = spe_program_load(spe[i], prog);
+ ret = spe_program_load(spu[i], prog);
if (ret) {
- perror("spe_program_load");
+ perror("spu_program_load");
exit(1);
}
}
unsigned int j;
- int size= SIZE/NUM_SPE;
+ int size= SIZE/NUM_SPU;
for (j = 0; j < 1; ++j) {
- for (i = 0; i < NUM_SPE; ++i) {
- spe_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
- spe_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
- spe_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
- spe_fir_fff_params[i].ntaps = 16;
- spe_fir_fff_params[i].nsamples = 7;
- spe_fir_fff_params[i].offset = 0;
- spe_fir_fff_params[i].size = size;
+ for (i = 0; i < NUM_SPU; ++i) {
+ spu_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
+ spu_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
+ spu_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
+ spu_fir_fff_params[i].ntaps = 16;
+ spu_fir_fff_params[i].nsamples = 32;
+ spu_fir_fff_params[i].offset = 0;
+ spu_fir_fff_params[i].size = size;
- arg[i].spe = spe[i];
- arg[i].spe_fir_fff_params = &spe_fir_fff_params[i];
+ arg[i].spu = spu[i];
+ arg[i].spu_fir_fff_params = &spu_fir_fff_params[i];
- ret = pthread_create(&thread[i], NULL, run_abs_spe, &arg[i]);
+ ret = pthread_create(&thread[i], NULL, run_abs_spu, &arg[i]);
if (ret) {
perror("pthread_create");
exit(1);
@@ -196,25 +166,25 @@
pthread_join(thread[i], NULL);
}
- for (i = 0; i < NUM_SPE; ++i) {
+ for (i = 0; i < NUM_SPU; ++i) {
//pthread_join(thread[i], NULL);
- ret = spe_context_destroy(spe[i]);
+ ret = spe_context_destroy(spu[i]);
if (ret) {
- perror("spe_context_destroy");
+ perror("spu_context_destroy");
exit(1);
}
}
ret = spe_image_close(prog);
if (ret) {
- perror("spe_image_close");
+ perror("spu_image_close");
exit(1);
}
for (i = 0; i < 32; ++i) {
printf("out[%02d]=%f\n", i, out[i]);
}
- printf("size %0d result=%f\n", size, out[0]);
+ printf("time result=%d\n", spu_fir_fff_params[0].pad[0]);
return 0;
}
Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.S
Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.h
Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.S
Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.h
Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S
Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.h
Deleted: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_params.h
Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.S
(from rev 8117,
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.S)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.S
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.S
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,277 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+#
+
+
+# taps are guarenteed to be 16 byte aligned.
+# ntaps != 0
+
+# void spu_fir_ccc (
+# const float *input,
+# const float *taps,
+# float *output,
+# const float *delayline,
+# const unsigned int offset,
+# const unsigned int nsamples,
+# const unsigned int ntaps
+# )
+# {
+# unsigned int o_index = 0;
+#
+# do {
+# float sum0 = 0;
+# float sum1 = 0;
+# float sum2 = 0;
+# float sum3 = 0;
+#
+# unsigned int n_2_complex_blocks = ntaps / 2;
+#
+# unsigned int cur_tap = 0;
+# unsigned int cur_input = input + offset + o_index;
+#
+# do {
+#
+# sum0 += cur_input[0] * cur_tap[0] - cur_input[1] * cur_tap[1];
+# sum1 += cur_input[0] * cur_tap[1] + cur_input[1] * cur_tap[0];
+# sum2 += cur_input[2] * cur_tap[2] - cur_input[3] * cur_tap[3];
+# sum3 += cur_input[2] * cur_tap[3] + cur_input[3] * cur_tap[2];
+#
+# cur_input += 4;
+# cur_tap += 4;
+#
+# } while ((n_2_complex_blocks -= 2) != 0);
+#
+# output[o_index] = sum0 + sum2;
+# output[o_index+1] = sum1 + sum3;
+#
+# } while ((o_index += 2) != nsamples * 2);
+#
+# }
+#
+
+ .file "fir_ccc_spu.S"
+.text
+ .align 3
+ .global spu_fir_ccc
+ .type spu_fir_ccc, @function
+spu_fir_ccc:
+ ori $32,$5,0 # 0-2
+ lqa $41, spu_fir_ccc_hightapmask # 1-6
expand masks for taps
+ ori $33,$7,0 # 0-2
+ lqa $42, spu_fir_ccc_lowtapmask # 1-6 expand
masks for taps
+ ori $31,$8,0 # 0-2
+ lqa $43, spu_fir_ccc_xormask # 1-6 mask for
inverse of bd
+ nop
+ lqa $44, spu_fir_ccc_leftexpand # 1-6 mask to
shift bd bc to prefered slot
+ .spu_fir_ccc_start2:
+ xor $30,$30,$30 # 0-2 initilize the
current output vector
+ il $34, 16 # 0-2 shift mask for
output insertion
+
+ .spu_fir_ccc_start1:
+ andi $37,$33,0x0c # 0-2 find index into
masks [0-4]
+ xor $5,$5,$5 # 0-2
+ shlqbii $38,$37,2 # 1-4 mult by 4
+ xor $6,$6,$6 # 0-2
+ xor $7,$7,$7 # 0-2
+ ori $36,$9,0 # 0-2
+ xor $8,$8,$8 # 0-2
+ lqd $39,spu_fir_ccc_highshiftmasks($38)
# 1-6 load the right shift mask
+ a $35, $33, $3 # 0-2
+ lqd $40,spu_fir_ccc_lowshiftmasks($38)
# 1-6 load the right shift mask
+ ori $38,$4,0 # 0-2
+
+ hbra .spu_fir_ccc_inner_loop_branch,
.spu_fir_ccc_inner_loop # inner-loop hint
+
+ lqd $10,0($35) # 1-6
+ lqd $11,16($35) # 1-6
+ nop
+ lqd $12,32($35) # 1-6
+
+ .spu_fir_ccc_inner_loop:
+ ai $36,$36,-4 # 0-2
+ lqd $14,0($38) # 1-6
+ ai $35,$35,32 # 0-2
+ lqd $15,16($38) # 1-6
+ shufb $13,$10,$11,$40 # 1-4
+ shufb $16,$14,$14,$42 # 1-4
+ ai $38,$38,32 # 0-2
+
+
+ shufb $10,$10,$11,$39 # 1-4
+ nop
+ # expensive, but needed
+ shufb $14,$14,$14,$41 # 1-4
+ fma $6, $13, $16, $6 # 0-6
+ shufb $18,$11,$12,$40 # 1-4
+ fma $5, $10, $14, $5 # 0-6
+
+ shufb $17,$15,$15,$42 # 1-4
+ lqd $10,0($35) # 1-6
+ shufb $15,$15,$15,$41 # 1-4
+ nop
+ shufb $11,$11,$12,$39 # 1-4
+ fma $8, $18, $17, $8 # 0-6
+
+ lqd $12,32($35) # 1-6
+ fma $7, $11, $15, $7 # 0-6
+
+ lqd $11,16($35) # 1-6
+
+
+ .spu_fir_ccc_inner_loop_branch:
+ brnz $36,.spu_fir_ccc_inner_loop
+
+ fsmbi $10,0xFF00 # 1-4
+ fa $18,$5,$6 # 0-6
+ hbra .outter_loop_branch,
.spu_fir_ccc_start1 # 1-
+ fa $19,$7,$8 # 0-6
+ hbra .spu_fir_ccc_finish_branch,
.spu_fir_ccc_finish_branch_targ # 1-
+ fa $5,$18,$19 # 0-6
+
+ shufb $6, $5, $5, $44 # 1-4 expand 5 to 6
+ xor $6,$6,$43 # 0-2
+ fa $11, $5, $6 # 0-6
+
+ and $12,$10,$11 # 0-2
+
+ rotqby $11, $12, $34 # 1-4
+ or $30,$11,$30 # 0-2
+ ai $31,$31,-1 # 0-2
+
+ .spu_fir_ccc_finish_branch:
+ brz $31,.spu_fir_ccc_finish4
+ .spu_fir_ccc_finish_branch_targ:
+
+
+ ai $33,$33,8 # 0-2
+ ai $34,$34,-8 # 0-2
+
+ .outter_loop_branch:
+ brnz $34, .spu_fir_ccc_start1
+ hbra .spu_fir_ccc_outter_outter_loop_branch,
.spu_fir_ccc_start2
+# Stores r5 in output
+ .spu_fir_ccc_finish4:
+ stqd $30,0($32)
+ ai $32,$32,16
# increment output pointer by 1 new vector.
+
+ .spu_fir_ccc_outter_outter_loop_branch:
+ brnz $31,.spu_fir_ccc_start2
# start another output vector if needed
+
+ bi $lr
+ .size spu_fir_ccc, .-spu_fir_ccc
+
+.text
+ .global spu_fir_ccc_highshiftmasks
+ .align 4
+ .type spu_fir_ccc_highshiftmasks, @object
+ .size spu_fir_ccc_highshiftmasks, 64
+
+spu_fir_ccc_highshiftmasks:
+ .long 0x00010203
+ .long 0x00010203
+ .long 0x04050607
+ .long 0x04050607
+
+ .long 0x04050607
+ .long 0x04050607
+ .long 0x08090a0b
+ .long 0x08090a0b
+
+ .long 0x08090a0b
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x0c0d0e0f
+
+ .long 0x0c0d0e0f
+ .long 0x0c0d0e0f
+ .long 0x10111213
+ .long 0x10111213
+
+ .global spu_fir_ccc_lowshiftmasks
+ .align 4
+ .type spu_fir_ccc_lowshiftmasks, @object
+ .size spu_fir_ccc_lowshiftmasks, 64
+
+spu_fir_ccc_lowshiftmasks:
+ .long 0x08090a0b
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x0c0d0e0f
+
+ .long 0x0c0d0e0f
+ .long 0x0c0d0e0f
+ .long 0x10111213
+ .long 0x10111213
+
+ .long 0x10111213
+ .long 0x10111213
+ .long 0x14151617
+ .long 0x14151617
+
+ .long 0x14151617
+ .long 0x14151617
+ .long 0x18191a1b
+ .long 0x18191a1b
+
+ .global spu_fir_ccc_hightapmask
+ .align 4
+ .type spu_fir_ccc_hightapmask, @object
+ .size spu_fir_ccc_hightapmask, 16
+
+spu_fir_ccc_hightapmask:
+ .long 0x00010203
+ .long 0x04050607
+ .long 0x04050607
+ .long 0x00010203
+
+ .global spu_fir_ccc_lowtapmask
+ .align 4
+ .type spu_fir_ccc_lowtapmask, @object
+ .size spu_fir_ccc_lowtapmask, 16
+
+spu_fir_ccc_lowtapmask:
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x0c0d0e0f
+ .long 0x08090a0b
+
+ .global spu_fir_ccc_xormask
+ .align 4
+ .type spu_fir_ccc_xormask, @object
+ .size spu_fir_ccc_xormask, 16
+
+spu_fir_ccc_xormask:
+ .long 0x80000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+
+ .global spu_fir_ccc_leftexpand
+ .align 4
+ .type spu_fir_ccc_leftexpand, @object
+ .size spu_fir_ccc_leftexpand, 16
+
+spu_fir_ccc_leftexpand:
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x80808080
+ .long 0x80808080
+
+ .ident "Hand coded Cell SPU assembly"
Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.h
(from rev 8117,
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_ccc_as.h)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.h
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_ccc_as.h
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,64 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef SPU_FIR_CCC_AS_H_
+#define SPU_FIR_CCC_AS_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*!
+ * \brief compute an array of N interleaved complex output values from
+ * interleaved complex inputs and taps.
+ *
+ * \param input must have (nsamples - 1 + ntaps()) valid entries. input does
+ * NOT need to be aligned. input[0] .. input[nsamples - 1 + ntaps() - 1] are
+ * referenced to compute the output values.
+ *
+ * \param taps a pointer to the pre-reversed and aligned set of taps. Taps
+ * must be a multiple of 4 and padded accordingly.
+ *
+ * \param output a pointer to the filter output buffer
+ *
+ * \param delayline NOTE: currently not used
+ *
+ * \param offset the initial offset into input buffer to start the filter. If
+ * input is aligned, this should be zero. If input is not aligned, this is
+ * the difference in allignment.
+ *
+ * \param nsamples number of samples to produce for output
+ *
+ * \param ntaps length of the tap vector. Must be a multiple of 4.
+ */
+
+extern void spu_fir_ccc(const __vector float *input,
+ const __vector float *taps, __vector float *output,
+ const __vector float *delayline, const unsigned int offset,
+ const unsigned int nsamples, const unsigned int ntaps);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //SPU_FIR_CCC_AS_H_
Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.S
(from rev 8117,
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.S)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.S
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.S
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,180 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+#
+
+
+# taps are guarenteed to be 16 byte aligned.
+# ntaps != 0
+
+# void spu_fir_fff64 (
+# const __vector double *input,
+# const __vector double *taps,
+# __vector double *output,
+# const __vector double *delayline,
+# const unsigned int offset,
+# const unsigned int nsamples,
+# const unsigned int ntaps
+# )
+# {
+# unsigned int o_index = 0;
+#
+# do {
+# double sum0 = 0;
+# double sum1 = 0;
+# double sum2 = 0;
+# double sum3 = 0;
+#
+# unsigned int n_2_float_blocks = ntaps / 2;
+#
+# unsigned int cur_tap = 0;
+# unsigned int cur_input = input + offset + o_index;
+#
+# do {
+#
+# sum0 += cur_input[0] * cur_tap[0];
+# sum1 += cur_input[1] * cur_tap[1];
+# sum2 += cur_input[2] * cur_tap[2];
+# sum3 += cur_input[3] * cur_tap[3];
+#
+# cur_input += 4;
+# cur_tap += 4;
+#
+# } while ((n_2_float_blocks -= 4) != 0);
+#
+# output[o_index] = sum0 + sum1 + sum2 + sum3;
+#
+# } while (o_index++ != nsamples);
+#
+# }
+#
+
+ .file "fir_fff64_spu.S"
+.text
+ .align 3
+ .global spu_fir_fff64
+ .type spu_fir_fff64, @function
+spu_fir_fff64:
+ ori $32,$5,0 # 0-2
+ ori $33,$7,0 # 0-2
+ ori $31,$8,0 # 0-2
+ .spu_fir_fff64_start2:
+ xor $30,$30,$30 # 0-2 initilize the
current output vector
+ il $34, 16 # 0-2 shift mask for
output insertion
+
+ .spu_fir_fff64_start1:
+ andi $37,$33,0x0c # 0-2 find index into
masks [0-4] TODO!!!!!
+ xor $5,$5,$5 # 0-2
+ xor $6,$6,$6 # 0-2
+ shlqbii $38,$37,2 # 1-4 mult by 4
+ xor $7,$7,$7 # 0-2
+ ori $36,$9,0 # 0-2
+ lqd $39,spu_fir_fff64_shiftmasks32($38)
# 1-6 load the right shift mask
+ xor $8,$8,$8 # 0-2
+ a $35, $33, $3 # 0-2
+ ori $38,$4,0 # 0-2
+
+ hbra .spu_fir_fff64_inner_loop_branch,
.spu_fir_fff64_inner_loop # inner-loop hint
+ .spu_fir_fff64_inner_loop:
+ lqd $10,0($35) # 1-6
+ lqd $11,16($35) # 1-6
+ lqd $12,32($35) # 1-6
+ lqd $13,48($35) # 1-6
+ lqd $18,64($35) # 1-6
+ lqd $14,0($38) # 1-6
+ lqd $15,16($38) # 1-6
+ ai $36,$36,-8 # 0-2 consume 8 taps
every time
+ lqd $16,32($38) # 1-6
+ ai $35,$35,64 # 0-2 consume 64 bytes
of input data each loop
+ lqd $17,48($38) # 1-6
+ ai $38,$38,64 # 0-2 consume 64 bytes
of input data each loop
+
+ shufb $10,$10,$11,$39 # 1-4
+ dfma $5, $10, $14 # 0-6
+ shufb $11,$11,$12,$39 # 1-4
+ dfma $6, $11, $15 # 0-6
+ shufb $12,$12,$13,$39 # 1-4
+ dfma $7, $12, $16 # 0-6
+ shufb $13,$13,$18,$39 # 1-4
+ dfma $8, $13, $17 # 0-6
+
+ .spu_fir_fff64_inner_loop_branch:
+ fsmbi $10,0xFF00 # 1-4
+ brnz $36,.spu_fir_fff64_inner_loop
+ dfa $18,$5,$6 # 0-6
+ hbra .spu_fir_fff64_outter_loop_branch,
.spu_fir_fff64_start1 # 1-
+ dfa $19,$7,$8 # 0-6
+ hbra .spu_fir_fff64_finish_branch,
.spu_fir_fff64_finish_branch_targ # 1-
+ dfa $5,$18,$19 # 0-6
+
+# accumulate word elements in r5 into first element in r5
+ ori $6,$5,0 # 0-2
+ shlqbyi $6,$6,8 # 1-4
+ dfa $5,$6,$5 # 0-2
+ and $11,$10,$5 # 0-2
+ rotqby $12, $11, $34 # 1-4
+ or $30,$12,$30 # 0-2
+
+ ai $31,$31,-1 # 0-2 produce 1 sample
each inner-loop
+ .spu_fir_fff64_finish_branch:
+ brz $31,.spu_fir_fff64_finish2
+ .spu_fir_fff64_finish_branch_targ:
+ ai $33,$33,8 # 0-2 offset into
input data moves 8 bytes each loop
+ ai $34,$34,-8 # 0-2 shiftmask moves
twice each loop
+
+ .spu_fir_fff64_outter_loop_branch:
+ brnz $34, .spu_fir_fff64_start1
+ hbra
.spu_fir_fff64_outter_outter_loop_branch, .spu_fir_fff64_start2
+# Stores r5 in output
+ .spu_fir_fff64_finish2:
+ stqd $30,0($32)
+ ai $32,$32,16
# increment output pointer by 1 new vector.
+
+ .spu_fir_fff64_outter_outter_loop_branch:
+ brnz $31,.spu_fir_fff64_start2
# start another output vector if needed
+
+ bi $lr
+ .size spu_fir_fff64, .-spu_fir_fff64
+.text
+ .global spu_fir_fff64_shiftmasks32
+ .align 4
+ .type spu_fir_fff64_shiftmasks32, @object
+ .size spu_fir_fff64_shiftmasks32, 64
+spu_fir_fff64_shiftmasks32:
+ .long 0x00010203
+ .long 0x04050607
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+
+ .long 0x04050607
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x10111213
+
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x10111213
+ .long 0x14151617
+
+ .long 0x0c0d0e0f
+ .long 0x10111213
+ .long 0x14151617
+ .long 0x18191a1b
+
+ .ident "Hand coded Cell SPU assembly"
Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.h
(from rev 8117,
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff64_as.h)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.h
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff64_as.h
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,63 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef SPU_FIR_FFF64_AS_H_
+#define SPU_FIR_FFF64_AS_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*!
+ * \brief compute an array of N output values.
+ *
+ * \param input must have (nsamples - 1 + ntaps()) valid entries. input does
+ * NOT need to be aligned. input[0] .. input[nsamples - 1 + ntaps() - 1] are
+ * referenced to compute the output values.
+ *
+ * \param taps a pointer to the pre-reversed and aligned set of taps. Taps
+ * must be a multiple of 8 and padded accordingly.
+ *
+ * \param output a pointer to the filter output buffer
+ *
+ * \param delayline NOTE: currently not used
+ *
+ * \param offset the initial offset into input buffer to start the filter. If
+ * input is aligned, this should be zero. If input is not aligned, this is
+ * the difference in allignment.
+ *
+ * \param nsamples number of samples to produce for output
+ *
+ * \param ntaps length of the tap vector. Must be a multiple of 8.
+ */
+
+extern void spu_fir_fff64(const __vector double *input,
+ const __vector double *taps, __vector double *output,
+ const __vector double *delayline, const unsigned int offset,
+ const unsigned int nsamples, const unsigned int ntaps);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //SPU_FIR_FFF_AS_H_
Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.S
(from rev 8117,
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.S
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.S
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,197 @@
+#
+# Copyright 2008 Free Software Foundation, Inc.
+#
+# This file is part of GNU Radio
+#
+# GNU Radio is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GNU Radio is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Radio; see the file COPYING. If not, write to
+# the Free Software Foundation, Inc., 51 Franklin Street,
+# Boston, MA 02110-1301, USA.
+#
+
+
+# taps are guarenteed to be 16 byte aligned.
+# ntaps != 0
+
+# void spu_fir_fff (
+# const __vector float *input,
+# const __vector float *taps,
+# __vector float *output,
+# const __vector float *delayline,
+# const unsigned int offset,
+# const unsigned int nsamples,
+# const unsigned int ntaps
+# )
+# {
+# unsigned int o_index = 0;
+#
+# do {
+# float sum0 = 0;
+# float sum1 = 0;
+# float sum2 = 0;
+# float sum3 = 0;
+#
+# unsigned int n_4_float_blocks = ntaps / 4;
+#
+# unsigned int cur_tap = 0;
+# unsigned int cur_input = input + offset + o_index;
+#
+# do {
+#
+# sum0 += cur_input[0] * cur_tap[0];
+# sum1 += cur_input[1] * cur_tap[1];
+# sum2 += cur_input[2] * cur_tap[2];
+# sum3 += cur_input[3] * cur_tap[3];
+#
+# cur_input += 4;
+# cur_tap += 4;
+#
+# } while ((n_4_float_blocks -= 4) != 0);
+#
+# output[o_index] = sum0 + sum1 + sum2 + sum3;
+#
+# } while (o_index++ != nsamples);
+#
+# }
+#
+
+ .file "fir_fff_spu.S"
+.text
+ .align 3
+ .global spu_fir_fff
+ .type spu_fir_fff, @function
+spu_fir_fff:
+ ori $32,$5,0 # 0-2
+ ori $33,$7,0 # 0-2
+ ori $31,$8,0 # 0-2
+ .spu_fir_fff_start4:
+ xor $30,$30,$30 # 0-2 initilize the
current output vector
+ il $34, 16 # 0-2 shift mask for
output insertion
+
+ .spu_fir_fff_start1:
+ andi $37,$33,0x0c # 0-2 find index into
masks [0-4]
+ xor $5,$5,$5 # 0-2
+ xor $6,$6,$6 # 0-2
+ shlqbii $38,$37,2 # 1-4 mult by 4
+ xor $7,$7,$7 # 0-2
+ ori $36,$9,0 # 0-2
+
+ xor $8,$8,$8 # 0-2
+ lqd $39,spu_fir_fff_shiftmasks($38)
# 1-6 load the right shift mask
+ a $35, $33, $3 # 0-2
+ ori $38,$4,0 # 0-2
+
+ hbra .spu_fir_fff_inner_loop_branch,
.spu_fir_fff_inner_loop # inner-loop hint
+
+ lqd $10,0($35) # 1-6
+ lqd $11,16($35) # 1-6
+ lqd $12,32($35) # 1-6
+ .spu_fir_fff_inner_loop:
+
+ lqd $13,48($35) # 1-6
+ lqd $18,64($35) # 1-6
+ lqd $14,0($38) # 1-6
+ nop
+ lqd $15,16($38) # 1-6
+
+ ai $36,$36,-16 # 0-2
+ shufb $10,$10,$11,$39 # 1-4
+ lqd $16,32($38) # 1-6
+ nop
+ ai $35,$35,64 # 0-2
+ shufb $11,$11,$12,$39 # 1-4
+ lqd $17,48($38) # 1-6
+ shufb $12,$12,$13,$39 # 1-4
+ ai $38,$38,64 # 0-2
+
+ shufb $13,$13,$18,$39 # 1-4
+ fma $5, $10, $14, $5 # 0-6
+ nop
+ fma $6, $11, $15, $6 # 0-6
+ lqd $10,0($35) # 1-6
+ fma $7, $12, $16, $7 # 0-6
+ lqd $11,16($35) # 1-6
+ fma $8, $13, $17, $8 # 0-6
+ lqd $12,32($35) # 1-6
+
+
+
+ .spu_fir_fff_inner_loop_branch:
+ brnz $36,.spu_fir_fff_inner_loop
+ fsmbi $10,0xF000 # 1-4
+ fa $18,$5,$6 # 0-6
+ hbra
.spu_fir_fff_outter_loop_branch, .spu_fir_fff_start1 # 1-
+ fa $19,$7,$8 # 0-6
+ hbra .spu_fir_fff_finish_branch,
.spu_fir_fff_finish_branch_targ # 1-
+ fa $5,$18,$19 # 0-6
+
+# accumulate word elements in r5 into first element in r5
+ ori $6,$5,0 # 0-2
+ shlqbyi $6,$6,4 # 1-4
+ ai $31,$31,-1 # 0-2
+ fa $5,$6,$5 # 0-2
+ shlqbyi $6,$6,4 # 1-4
+ fa $5,$6,$5 # 0-2
+ shlqbyi $6,$6,4 # 1-4
+ fa $5,$6,$5 # 0-2
+ and $11,$10,$5 # 0-2
+ rotqby $12, $11, $34 # 1-4
+ or $30,$12,$30 # 0-2
+
+ ai $34,$34,-4 # 0-2
+ .spu_fir_fff_finish_branch:
+ brz $31,.spu_fir_fff_finish4
+ .spu_fir_fff_finish_branch_targ:
+
+ ai $33,$33,4 # 0-2
+
+ .spu_fir_fff_outter_loop_branch:
+ brnz $34, .spu_fir_fff_start1
+ hbra .spu_fir_fff_outter_outter_loop_branch,
.spu_fir_fff_start4
+# Stores r5 in output
+ .spu_fir_fff_finish4:
+ stqd $30,0($32)
+ ai $32,$32,16
# increment output pointer by 1 new vector.
+
+ .spu_fir_fff_outter_outter_loop_branch:
+ brnz $31,.spu_fir_fff_start4
# start another output vector if needed
+
+ bi $lr
+ .size spu_fir_fff, .-spu_fir_fff
+.text
+ .global spu_fir_fff_shiftmasks
+ .align 4
+ .type spu_fir_fff_shiftmasks, @object
+ .size spu_fir_fff_shiftmasks, 64
+spu_fir_fff_shiftmasks:
+ .long 0x00010203
+ .long 0x04050607
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+
+ .long 0x04050607
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x10111213
+
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x10111213
+ .long 0x14151617
+
+ .long 0x0c0d0e0f
+ .long 0x10111213
+ .long 0x14151617
+ .long 0x18191a1b
+
+ .ident "Hand coded Cell SPU assembly"
Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.h
(from rev 8117,
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.h)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.h
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_as.h
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,63 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef SPU_FIR_FFF_AS_H_
+#define SPU_FIR_FFF_AS_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*!
+ * \brief compute an array of N output values.
+ *
+ * \param input must have (nsamples - 1 + ntaps()) valid entries. input does
+ * NOT need to be aligned. input[0] .. input[nsamples - 1 + ntaps() - 1] are
+ * referenced to compute the output values.
+ *
+ * \param taps a pointer to the pre-reversed and aligned set of taps. Taps
+ * must be a multiple of 16 and padded accordingly.
+ *
+ * \param output a pointer to the filter output buffer
+ *
+ * \param delayline NOTE: currently not used
+ *
+ * \param offset the initial offset into input buffer to start the filter. If
+ * input is aligned, this should be zero. If input is not aligned, this is
+ * the difference in allignment.
+ *
+ * \param nsamples number of samples to produce for output
+ *
+ * \param ntaps length of the tap vector. Must be a multiple of 16.
+ */
+
+extern void spu_fir_fff(const __vector float *input,
+ const __vector float *taps, __vector float *output,
+ const __vector float *delayline, const unsigned int offset,
+ const unsigned int nsamples, const unsigned int ntaps);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //SPU_FIR_FFF_AS_H_
Copied: gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_params.h
(from rev 8117,
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_params.h)
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_params.h
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spu_fir_fff_params.h
2008-03-27 21:45:06 UTC (rev 8118)
@@ -0,0 +1,38 @@
+/* -*- c++ -*- */
+/*
+ * Copyright 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of GNU Radio
+ *
+ * GNU Radio is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GNU Radio is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Radio; see the file COPYING. If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef SPU_FIR_FFF_PARAMS_H_
+#define SPU_FIR_FFF_PARAMS_H_
+
+typedef struct
+{
+ unsigned long long ea_in1;
+ unsigned long long ea_in2;
+ unsigned long long ea_out;
+ unsigned int nsamples;
+ unsigned int ntaps;
+ unsigned int offset;
+ unsigned int size;
+ int pad[6];
+} spu_fir_fff_params_t;
+
+#endif /*SPU_FIR_FFF_PARAMS_H_*/
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Commit-gnuradio] r8118 - gnuradio/branches/developers/ngoergen/spe_fir_fff,
ngoergen <=