Summary: This is an implementation of an autocorrelation-based power spectral density (PSD) estimator. This implementation estimates the PSD of an IIR-filtered pseudo-noise generator.
We provide for you in Appendix D
and E a complete C implementation
of a PSD estimator. The input is an IIR-filtered pseudo-noise (PN)
sequence generator and the PSD estimate is based on windowing the
autocorrelation with a rectangular window.
The code consists of the files lab4b.c,
lab4b.h, intrinsics.h, pn.c,
iirfilt.c, autocorr.c,
c_fft_given_iirc.asm, and the previously-given TI FFT
routine. The assembly file c_fft_given_iirc.asm differs
from c_fft_given.asm in that the window array has been
removed and variables and arrays associated with IIR filtering have
been added. Note that the multiply functions in the functions are
actually compiler directives contained in intrinsics.h.
Make sure you know which ones are used and why; note that
VPO is not defined by the TI compiler, therefore the
corresponding section of the #ifdef statement is not used.
Open up the lab4b.pjt project and Rebuild All.
Load lab4b.out onto the DSP and
run the code. Make sure that an
IIR-filtered PN sequence appears on channel 1 and its PSD estimate
appears on channel 2.
Does the output match your expectations based on the theory? Does this application illustrate any limitations of the FFT implementation? (Hint: note that most of the values in the FFT input are zero.) The previously-given C implementation uses a similar algorithm as the TI FFT; take a look at the C code for help. What are the limitation(s) of the FFT that show up in this application?
In lab4b.h
M sets the number of
autocorrelation points that are calculated. What is the maximum value
of M that allows the reference routines to run in real
time? In determining this value you may find it useful to connect a
wave-function generator to the input and copy input on that channel
into channel 1 of output. You may limit M to
powers of 2 minus 1.
1 #define N 1024 /* Length of output buffers */
2 #define L N /* Length of input data */
3 #define logL 10 /* log base 2 of L */
4 #define M 31 /* Compute 2*M+1 autocorrelation points */
5
6 /* #define M (L/2-1) */ /* Be sure to use ()'s in this case */
7 /* or algebraic substitution bugs */
8 /* can be introduced */
/* Compiler intrinsics for the TI compiler */
/* and the Very Portable Optimizer (VPO) port */
/* to TMS320C54X series DSPs */
/* */
/* Use compile option -DVPO when using VPO */
/* */
/* Copyright September 2005 by Matt Kleffner */
/* under the Creative Commons Attribution License */
/* Works with TMS320C55X series */
#ifndef INTRINSICS_H
#define INTRINSICS_H
#ifdef VPO
long int vpo_l_mul_ii(int w0, int w1);
/* fractional multiply without fractional mode (long result) */
#define _l_mul_fract_fb0_ii(w0,w1) \
(vpo_l_mul_ii(w0,w1) << 1)
/* fractional multiply with fractional mode already on (long result) */
#define _l_mul_fract_fb1_ii(w0,w1) \
(vpo_l_mul_ii(w0,w1))
/* fractional multiply without fractional mode (int result) */
#define _i_mul_fract_fb0_ii(w0,w1) \
(vpo_l_mul_ii(w0,w1) >> 15)
/* fractional multiply with fractional mode already on (int result) */
#define _i_mul_fract_fb1_ii(w0,w1) \
(vpo_l_mul_ii(w0,w1) >> 16)
#define _set_fract_bit() vpo_set_fract()
#define _reset_fract_bit() vpo_reset_fract()
#define _set_ovm_bit() vpo_set_ovm()
#define _reset_ovm_bit() vpo_reset_ovm()
#define _l_add_shiftl_li(w0,w1) (((int32)(w0))+(((int32)(int16)(w1))<<16) )
#define _l_sub_shiftl_li(w0,w1) (((int32)(w0))-(((int32)(int16)(w1))<<16) )
#else
/* fractional multiply without fractional mode (long result) */
#define _l_mul_fract_fb0_ii(w0,w1) \
(((long int)w0 * (long int)w1) << 1)
/* fractional multiply with fractional mode already on (long result) */
#define _l_mul_fract_fb1_ii(w0,w1) \
(((long int)w0 * (long int)w1))
/* fractional multiply without fractional mode (int result) */
#define _i_mul_fract_fb0_ii(w0,w1) \
(((long int)w0 * (long int)w1) >> 15)
/* fractional multiply with fractional mode already on (int result) */
#define _i_mul_fract_fb1_ii(w0,w1) \
(((long int)w0 * (long int)w1) >> 16)
#define _set_fract_bit() asm(" ssbx frct")
#define _reset_fract_bit() asm(" rsbx frct")
#define _set_ovm_bit() asm(" ssbx ovm")
#define _reset_ovm_bit() asm(" rsbx ovm")
#endif /* VPO */
#endif /* INTRINSICS_H */
// lab4b.c
// Uses PN generation, IIR filtering, and autocorrelation
// code by Matt Kleffner -9/2004
// Based on swi_process.c by Educational DSP
#include "dsk5510_dual3006cfg.h"
#include "dsk5510.h"
#include "swi_process.h"
#include "dsplib.h"
#include "lab4b.h" /* Define N here in header file */
/* function defined in pn.c */
void rand_fillbuffer(void);
/* IIR values and buffers (declared in c_fft_given_iirc.asm) */
#define IIR_order 4
extern int scale;
extern int coef[IIR_order];
extern int state[IIR_order];
/* Pointer to state buffer location */
int iirptr;
extern unsigned int *iseed; /* seed for rand_fillbuffer() and randbit() */
/* function defined in iirfilt.c */
void iirfilt(void);
/* function defined in autocorr.c */
void autocorr(void);
/* Function defined by c_fft_given_iirc.asm */
//void bit_rev_fft(void);
/* FFT data buffers (declared in c_fft_given_iirc.asm) */
extern int bit_rev_data[N*2]; /* Data output for bit-reverse function */
extern int fft_data[N*2]; /* In-place FFT input & Output array */
/* Our input/output buffers */
int autocorr_in[N];
int autocorr_out[N];
// all data processing should be done in SWI_ProcessBuffer
void SWI_ProcessBuffer()
{
static unsigned int mbox_value = 0;
short *psrc, *pdest;
unsigned int i;
mbox_value |= SWI_getmbox();
// buffers are only processed when both transmit and receive are ready
if((mbox_value & DMA_RECEIVE_DONE) && (mbox_value & DMA_TRANSMIT_DONE)) {
mbox_value = 0;
// get buffer pointers
psrc = receive_buffer[receive_buffer_to_process_index];
pdest = transmit_buffer[transmit_buffer_to_fill_index];
/* First, transfer inputs and outputs */
for (i = 0; i < N; i++)
{
pdest[4*i] = autocorr_in[i];
pdest[4*i+1] = bit_rev_data[i*2] << 8;
/* Some statements useful in debugging */
/* pdest[4*i] = psrc[4*i+2]; */
/* Be sure to comment out PN-sequence generation */
/* when using the next two lines */
//autocorr_in[i] = psrc[4i+2];
}
/* Last, set the DC coefficient to -1 for a trigger pulse */
pdest[0] = -32768;
/* Generate PN input */
rand_fillbuffer();
/* Filter input */
iirfilt();
/* Calculate autocorrelation */
autocorr();
/* Transfer autocorr output to FFT input buffer */
for (i = 0; i < N; i++) {
fft_data[i*2] = autocorr_out[i];
fft_data[i*2+1] = 0;
}
// Bit-reverse and compute FFT
cfft((DATA *)fft_data,N, SCALE);
cbrev((DATA *)fft_data,(DATA *)bit_rev_data,N);
/* Done, wait for next time around! */
receive_buffer_processed = 1; // flag receive buffer as processed
transmit_buffer_filled = 1; // flag output buffer as full
}
}
// lab4bmain.c
// based on main.c by Educational DSP
// Initializes autocorr_out, state, and iseed.
// Can be modified for optimization.
#include "dsk5510_dual3006cfg.h"
#include "dsk5510.h"
#include <csl.h>
#include "lab4b.h"
#define IIR_order 4
extern int iirptr;
unsigned int *iseed;
extern int autocorr_out[N];
extern int state[IIR_order];
void init_DMA();
void main()
{
int i;
DSK5510_init(); // init BSL
DSK5510_rset(DSK5510_MISC, 0x03); // route McBSP0/1 to J3
// Initialize autocorr_out to zero since some values will remain zero
for (i = 0; i < N; ++i)
{
autocorr_out[i] = 0;
}
for ( i = 0; i < IIR_order; ++i)
state[i] = 0;
// Start McBSP0 I2S slave
MCBSP_start(hMcbsp0, MCBSP_XMIT_START | MCBSP_RCV_START |
MCBSP_SRGR_START | MCBSP_SRGR_FRAMESYNC, 220);
// Start McBSP1 I2S master
MCBSP_start(hMcbsp1, MCBSP_XMIT_START | MCBSP_RCV_START |
MCBSP_SRGR_START | MCBSP_SRGR_FRAMESYNC, 220);
init_DMA(); // configure DMA and interrupts
*iseed = 1;
iirptr = 0;
return; // let DSP/BIOS scheduler take over
}
/* ECE420, Lab 4, Reference PN Generator Implementation (Non-Optimized) */
/* Matt Kleffner 08/04 */
/* Original by Michael Frutiger 02/24/04 */
/* Use governed by the Creative Commons Attribution License */
#include "lab4b.h"
extern unsigned int *iseed;
extern int autocorr_in[N];
/* Returns as an integer a random bit, based on the 15 lowest significant
bits in iseed (which is modified for the next call). */
int randbit()
{
int newbit;
/* XOR bits 15, 1 and 0 of iseed */
newbit = (*iseed >> 15) & 1 ^ (*iseed >> 1) & 1 ^ *iseed & 1;
/* Leftshift the seed and put the result of the XOR's in bit 1. */
*iseed=(*iseed << 1) | newbit;
return(newbit);
}
void rand_fillbuffer(void)
{
int i;
for (i = 0; i < N; ++i)
{
if (randbit()) autocorr_in[i] = 32767;
else autocorr_in[i] = -32767;
}
}
/* Simple, unoptimized IIR filter (feedback only) */
/* for TMS320C54X series DSPs */
/* Copyright September 2005 by Matt Kleffner */
/* under the Creative Commons Attribution License */
/* Works for TMS320C55X series as well */
#include "lab4b.h"
#include "intrinsics.h"
/* IIR values and buffers (declared in c_fft_given_iirc.asm) */
#define IIR_order 4
extern int scale;
extern int coef[IIR_order];
extern int state[IIR_order];
/* Arrays declared in main routine */
extern int autocorr_in[N];
extern int autocorr_out[N];
/* Pointer to state buffer location */
extern int iirptr;
void iirfilt()
{
int i, j;
_set_fract_bit();
/* Filter PN input */
for (i = 0; i < N; ++i)
{
int sum = 0;
/* Calculate and sum all feedback terms except the "oldest" one */
for (j = 0; j < (IIR_order-1); ++j)
{
sum += _i_mul_fract_fb1_ii(coef[j],state[iirptr]);
/* Avoid usage of "modulo" routine */
iirptr++;
if (iirptr == IIR_order) iirptr = 0;
}
/* Calculate and sum oldest feedback term without incrementing iirptr */
sum += _i_mul_fract_fb1_ii(coef[IIR_order-1],state[iirptr]);
/* Calculate direct input contribution */
sum += _i_mul_fract_fb1_ii(scale,autocorr_in[i]);
autocorr_in[i] = sum;
state[iirptr] = autocorr_in[i];
}
_reset_fract_bit();
}
/***********************************************************/
/* autocorr.c */
/* Copyright August 2004 by Matt Kleffner */
/* under the Creative Commons Attribution License */
/* */
/* Simple, unoptimized autocorrelation function */
/* for ECE 420 (TMS320C54X series) */
/* */
/* #defines expected in lab4b.h */
/* L: length of data in autocorr_in buffer */
/* N: length of data in autocorr_out buffer */
/* logL: log base 2 of L (used for scaling output) */
/* M: Largest positive lag of autocorrelation desired */
/* (must be < L and < N/2) */
/* */
/* 16-bit-limited input/output (must be defined elsewhere) */
/* autocorr_in: buffer for input data (L pts) */
/* autocorr_out: buffer for output data (N pts) */
/* N must be >= 2*M+1 */
/* assumed to be full of zeros at start */
/* output in zero-phase form */
/***********************************************************/
/* Works for TMS320C55X series */
#include "lab4b.h"
#include "intrinsics.h"
extern int autocorr_in[L];
extern int autocorr_out[N];
void autocorr(void)
{
int i,j,temp;
_set_fract_bit();
for(i=0;i<=M;++i)
{
long int sum=0;
for(j=0;j<(L-i);++j)
{
temp = _i_mul_fract_fb1_ii(autocorr_in[j],autocorr_in[j+i]);
sum += temp;
}
autocorr_out[i]=(int)(sum >> logL);
}
_reset_fract_bit();
/* Copy values for negative indeces at end of buffer */
for(i=1,j=N-1;i<=M;++i,--j)
{ autocorr_out[j]=autocorr_out[i]; }
}
; c_fft_given_iirc.asm
; Designed for use in lab4b for ECE420
.ARMS_off ;enable assembler for ARMS=0
.CPL_on ;enable assembler for CPL=1
.mmregs ;enable mem mapped register names
.global _bit_rev_data
.global _fft_data
.global _state
.global _scale
.global _coef
.copy "macro.asm"
.sect ".data"
N .set 1024
.align 4*N
_bit_rev_data .space 16*2*N
.align 4*N
_fft_data .space 16*2*N
; IIR filter
.align 4
_coef
.word 0
.word 0
.word 0
.word -13421
_state
.space 16*4
_scale
.word 19345
.sect ".text"
"Real-Time DSP with MATLAB"