• 微信公众号：美女很有趣。 工作之余，放松一下，关注即送10G+美女照片！

# FEC前向纠错，卷积编码之维特比译码

3小时前 1次浏览

FEC前向纠错译码用的viterbi算法代码摘抄自libfec，里面的实现很全面，还有mmx，sse加速版。

``````UINT CALLBACK ProcessIQDataThread(void *p_param)
{
Convolution2Viterbi_t *p_sys = (Convolution2Viterbi_t *)p_param;
vector<double> *p_data = NULL;

int framebits = 262;
struct v29 *vp;
vector<float> uninter1;
vector<float> uninter2;
vector<float> frameL;
vector<float> frameR;
vector<uint8_t> viterbi_in;
uint8_t viterbi_out[33];

uninter2.resize((framebits + 8));
uninter1.resize((framebits + 8) * 2);
viterbi_in.resize((framebits + 8) * 2);

if ((vp = (struct v29 *)create_viterbi29_port(framebits)) == NULL) {
return -1;
}

{
if (!p_data)
{
WaitForSingleObject(p_sys->iq_data_event, 200);

p_sys->iq_data_list.Lock();
p_data = p_sys->iq_data_list.GetUse();
p_sys->iq_data_list.UnLock();
}
if (!p_data)
continue;

// 读取2个浮点数
for (int i = 0; i < p_data->size() / 2; i++)
{
frameL.push_back(p_data->at(0));
frameR.push_back(p_data->at(1));
}

// 一帧译码长度
if (frameL.size() >= (framebits + 8))
{
// 解交织inter2
deInterleavingNP(30, inter2Perm, frameL, uninter2);

deInterleavingNP(30, inter2Perm, frameR, uninter2);

if (radioFrame.size() >= (framebits + 8) * 2)
{
// 解交织inter1

// 使用Viterbi算法做FEC译码
for (int i = 0; i < (framebits + 8) * 2; i++)
{
viterbi_in.push_back( ((int)uninter1[i] >> 24) );
}

init_viterbi29_port(vp, 0);
update_viterbi29_blk_port(vp, viterbi_in.data(), framebits + 8);
chainback_viterbi29_port(vp, viterbi_out, framebits, 0);

#ifdef _DEBUG
// 数据验证
for (int i = 0; i < 33 - 4; i++) {
if (viterbi_out[i] == 0x98 && viterbi_out[i + 1] == 0x54 && viterbi_out[i + 2] == 0xA0 && viterbi_out[i + 3] == 0x00)
int bbb = 0;
if (viterbi_out[i] == 0x05 && viterbi_out[i + 1] == 0x33 && viterbi_out[i + 2] == 0x00 && viterbi_out[i + 3] == 0x0c)
int bbb = 0;
if (viterbi_out[i] == 0x00 && viterbi_out[i + 1] == 0x03 && viterbi_out[i + 2] == 0xf4 && viterbi_out[i + 3] == 0x40)
int bbb = 0;
if (viterbi_out[i] == 0xc5 && viterbi_out[i + 1] == 0x80 && viterbi_out[i + 2] == 0x05 && viterbi_out[i + 3] == 0x5a)
int bbb = 0;
if (viterbi_out[i] == 0xe4 && viterbi_out[i + 1] == 0x00 && viterbi_out[i + 2] == 0x33 && viterbi_out[i + 3] == 0xe6)
int bbb = 0;
if (viterbi_out[i] == 0x72 && viterbi_out[i + 1] == 0x08 && viterbi_out[i + 2] == 0x38)
int bbb = 0;
if (viterbi_out[i] == 0xe2 && viterbi_out[i + 1] == 0x00 && viterbi_out[i + 2] == 0x38)
int bbb = 0;
}
#endif
}

frameL.clear();
frameR.clear();
}

p_sys->iq_data_list.Lock();
p_sys->iq_data_list.PushEmpty(p_data);
p_data = p_sys->iq_data_list.GetUse();
p_sys->iq_data_list.UnLock();
}

return 0;
}
``````

### 解交织 Interleaving.h

``````#pragma once

#include <vector>
#include <assert.h>

const int inter1Columns[4] = {
1, // TTI = 10ms
2, // TTI = 20ms
4, // TTI = 40ms
8 // TTI = 80ms
};

// 25.212 4.2.5.2 table 4: Inter-Column permutation pattern for 1st interleaving:
const char inter1Perm[4][8] = {
{0}, // TTI = 10ms
{0, 1}, // TTI = 20ms
{0, 2, 1, 3}, // TTI = 40ms
{0, 4, 2, 6, 1, 5, 3, 7} // TTI = 80ms
};

// 25.212 4.2.11 table 7: Inter-Column permutation pattern for 2nd interleaving:
const char inter2Perm[30] = { 0,20,10,5,15,25,3,13,23,8,18,28,1,11,21,
6,16,26,4,14,24,19,9,29,12,2,7,22,27,17 };

/* FIRST steps two pointers through a mapping, one pointer into the interleaved
* data and the other through the uninterleaved data.  The fifth argument, COPY,
* determines whether the copy is from interleaved to uninterleaved, or back.
* FIRST assumes no padding is necessary.
* The reason for the define is to minimize the cost of parameterization and
* function calls, as this is meant for L1 code, while also minimizing the
* duplication of code.
*/

#define FIRST(UNINTERLEAVED,UNINTERLEAVEDP,INTERLEAVED,INTERLEAVEDP,COPY)
assert(UNINTERLEAVED.size() == INTERLEAVED.size());
unsigned int rows = UNINTERLEAVED.size() / c++olumns;
assert(rows * c++olumns == UNINTERLEAVED.size());
const char *colp = permutation;
float *INTERLEAVEDP = &INTERLEAVED[0];
for (unsigned i = 0; i < c++olumns; i++) {
float *UNINTERLEAVEDP = &UNINTERLEAVED[*colp++];
for (unsigned int j = 0; j < rows; j++) {
COPY;
UNINTERLEAVEDP += c++olumns;
}
}

/** interleaving with No Padding */
void interleavingNP(const unsigned int c++olumns, const char *permutation, vector<float> &in, vector<float> &out)
{
FIRST(in, inp, out, outp, *outp++ = *inp)
}

/** de-interleaving with No Padding */
void deInterleavingNP(const unsigned int c++olumns, const char *permutation, vector<float> &in, vector<float> &out)
{
FIRST(out, outp, in, inp, *outp = *inp++)
}

/* SECOND steps two pointers through a mapping, one pointer into the interleaved
* data and the other through the uninterleaved data.  The fifth argument, COPY,
* determines whether the copy is from interleaved to uninterleaved, or back.
* The reason for the define is to minimize the cost of parameterization and
* function calls, as this is meant for L1 code, while also minimizing the
* duplication of code.
*/

#define SECOND(UNINTERLEAVED,UNINTERLEAVEDP,INTERLEAVED,INTERLEAVEDP,COPY)
assert(UNINTERLEAVED.size() == INTERLEAVED.size());
int R2 = (UNINTERLEAVED.size() + columns - 1) / columns;
int padding = columns * R2 - UNINTERLEAVED.size();
int rows = R2;
const char *colp = permutation;
float *UNINTERLEAVEDP = &UNINTERLEAVED[0];
for (int i = 0; i < columns; i++) {
int trows = rows - (*colp >= firstPaddedColumn);
float *INTERLEAVEDP = &INTERLEAVED[*colp++];
for (int j = 0; j < trows; j++) {
COPY;
INTERLEAVEDP += columns;
}
}

void interleavingWP(const int columns, const char *permutation, vector<float> &in, vector<float> &out)
{
SECOND(in, inp, out, outp, *outp = *inp++)
}

void deInterleavingWP(const int columns, const char *permutation, vector<float> &in, vector<float> &out)
{
SECOND(out, outp, in, inp, *outp++ = *inp)
}

/*
Determining the constants.

From the standard we know:
* All frame sizes for the BCH.
* transport block is 246 bits
* there are two radio frames, 270 bits each
* TTI is 20 ms
* SF is 256
* parity word Li is 16 bits
* For all downlink TrCH except BCH, the radio frame size is  2*38400/SF = 76800/SF.
* For SF=256 that's 300.
* For SF=4 that's 19200.
* The maximum code block size for convulutional coding is 540 bits (25.212 4.2.2.2).
* That corresponds to a radio frame size of 1080 bits, or a spreading factor of 71,
meaning that the smallest spreading factor that can be used is 128.
* 76800/128 = 600 coded bits -> roughly 300 data bits.
* That corresponds to an input rate of roughly 30 kb/s at.
* The maximum code block size for turbo coding is 5114 bits (25.212 4.2.2.2).
* That corresponds to a radio frame size of 15342 bits, or a spreading factor of 5,
meaning that the smallest spreading factor that can be used is 8.
* 76800/8 = 9600 coded bits -> roughly 3200 data bits.
* That corresponds to an input rate of roughly 320 kb/s.

OK - SO HOW DO YOU GET HIGHER RATES?? HOW CAN YOU USE SF=4??
A: Use the full 5114-but code block and then expand it with rate-matching.
You still can't get the full ~640 kb/s implied by SF=4, but you get to ~500 kb/s.

(pat) A: They considered this problem.  See 25.212 4.2.2.2 Code block segmentation.
In Layer1, after transport block concatenation, you then simply chop the result up
into the largest pieces that can go through the encoder, then put them back together after.

From "higher layers" we are given:
* SF: 4, 8, 16, 32, 64, 128, 256.
* P: 24, 16, 8, 0 bits
* TTI: 10, 20, 40 ms.

To simplify things, we set:
* TTI 10 ms always on DCH and FACH, 20 ms on PCH and BCH
* BCH and PCH are always rate-1/2 convolutional code
* DCH and FACH are always rate-1/3 turbo code
* no rate-matching, no puncturing
* parity word is always 16 bits
* So the only parameter than changes is spreading factor.

* We will only support 15-slot (non-compressed) slot formats.

From our simplifications we also know:
* For non-BCH/PCH TrCH there is one radio frame,
76800/SF channel (coded) bits, per transport block.
* DCH and FACH always use rate-1/3 turbo code,
which has 12 terminating bits in the output.
* For DCH and FACH, the transport block size is
((76800/SF - 12)/3) - P = (25600/SF) - 4 - P data bits,
where P is the parity word size.
* Fix P=16 for simplicity and transport block size is (25600/SF) - 20.
* for SF=256, that's 80 bits.
* for SF=16, that's 1580 bits.
* for SF=8, that's 3180 bits.

* For PCH there is one radio frame,
76800/SF channel (coded) bits, per transport block.
* SF=64, for that's 1200 channel bits.
* It's a rate-1/2 conv code, so that's 1200/2 - 8 - P data bits.
* P=16 so that's 1200/2 - 24 = 576 transport bits.  Really?

*/
#if 0

const int inter1Columns[] = { 1, 2, 4, 8 };

const char inter1Perm[4][8] = {
{0},
{0, 1},
{0, 2, 1, 3},
{0, 4, 2, 6, 1, 5, 3, 7}
};

const char inter2Perm[] = {
0, 20, 10, 5, 15, 25, 3, 13, 23, 8, 18, 28, 1, 11, 21,
6, 16, 26, 4, 14, 24, 19, 9, 29, 12, 2, 7, 22, 27, 17
};

vector<char> randomBitVector(int n)
{
vector<char> t(n);
for (int i = 0; i < n; i++) t[i] = random() % 2;
return t;
}

void testInterleavings()
{
int lth1 = 48;
int C2 = 30;
for (int i = 0; i < 4; i++) {
vector<char> v1 = randomBitVector(lth1);
vector<char> v2(lth1);
vector<char> v3(lth1);
v1.interleavingNP(inter1Columns[i], inter1Perm[i], v2);
v2.deInterleavingNP(inter1Columns[i], inter1Perm[i], v3);
cout << "first " << i << " " << (veq(v1, v3) ? "ok" : "fail") << endl;
}
for (int lth2 = 90; lth2 < 120; lth2++) {
vector<char> v1 = randomBitVector(lth2);
vector<char> v2(lth2);
vector<char> v3(lth2);
v1.interleavingWP(C2, inter2Perm, v2);
v2.deInterleavingWP(C2, inter2Perm, v3);
cout << "second " << lth2 << " " << (veq(v1, v3) ? "ok" : "fail") << endl;
}
for (int lth = 48; lth <= 4800; lth *= 10) {
TurboInterleaver er(lth);
cout << "Turbo Interleaver permutation(" << lth << ") " << (permutationCheck(er.permutation()) ? "ok" : "fail") << endl;
vector<char> er1 = randomBitVector(lth);
vector<char> er2(lth);
er.interleave(er1, er2);
vector<char> er3(lth);
er.unInterleave(er2, er3);
cout << "Turbo Interleaver(" << lth << ") " << (veq(er1.sliced(), er3) ? "ok" : "fail") << endl;
}
}

#endif
``````

## viterbi译码

### partab实现 partab.c++

``````/* Utility routines for FEC support
* Copyright 2004, Phil Karn, KA9Q
*/

#include <stdio.h>
#include "fec.h"

unsigned char Partab[256] = {0};
int P_init = 0;

/* Create 256-entry odd-parity lookup table
* Needed only on non-ia32 mac++hines
*/
void partab_init(void) {
int i, cnt, ti;

/* Initialize parity lookup table */
for (i = 0; i < 256; i++) {
cnt = 0;
ti = i;
while (ti) {
if (ti & 1)
cnt++;
ti >>= 1;
}
Partab[i] = cnt & 1;
}
P_init = 1;
}

/* Lookup table giving count of 1 bits for integers 0-255 */
int Bitcnt[] = {
0, 1, 1, 2, 1, 2, 2, 3,
1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7,
5, 6, 6, 7, 6, 7, 7, 8,
};
``````

### fec++.h

``````/* User include file for libfec
* Copyright 2004, Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/

#ifndef _FEC_H_
#define _FEC_H_

#ifdef __cplusplus
extern "C" {
#endif

/* r=1/2 k=9 convolutional encoder polynomials */
#define	V29POLYA	0x1af
#define	V29POLYB	0x11d

void *create_viterbi29_port(int len);
void set_viterbi29_polynomial_port(int polys[2]);
int init_viterbi29_port(void *p, int starting_state);
int chainback_viterbi29_port(void *p, unsigned char *data, unsigned int nbits, unsigned int endstate);
void delete_viterbi29_port(void *p);
int update_viterbi29_blk_port(void *p, unsigned char *syms, int nbits);

void partab_init();

static inline int parityb(unsigned char x) {
extern unsigned char Partab[256];
extern int P_init;
if (!P_init) {
partab_init();
}
return Partab[x];
}

static inline int parity(int x) {
/* Fold down to one byte */
x ^= (x >> 16);
x ^= (x >> 8);
return parityb(x);
}

#ifdef __cplusplus
}
#endif

#endif /* _FEC_H_ */
``````

### viterbi29_port.c++

``````/* K=9 r=1/2 Viterbi decoder in portable C
* Copyright Feb 2004, Phil Karn, KA9Q
* May be used under the terms of the GNU Lesser General Public License (LGPL)
*/
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include "fec.h"

typedef union { unsigned int w[256]; } metric_t;
typedef union { unsigned long w[8]; } decision_t;

static union { unsigned char c[128]; } Branchtab29[2];
static int Init = 0;

/* State info for instance of Viterbi decoder */
struct v29 {
metric_t metrics1; /* path metric buffer 1 */
metric_t metrics2; /* path metric buffer 2 */
decision_t* dp;          /* Pointer to current decision */
metric_t* old_metrics, * new_metrics; /* Pointers to path metrics, swapped on every bit */
decision_t* decisions;   /* Beginning of decisions for block */
};

/* Initialize Viterbi decoder for start of new frame */
int init_viterbi29_port(void* p, int starting_state) {
struct v29* vp = p;
int i;

if (p == NULL)
return -1;
for (i = 0; i < 256; i++)
vp->metrics1.w[i] = 63;

vp->old_metrics = &vp->metrics1;
vp->new_metrics = &vp->metrics2;
vp->dp = vp->decisions;
vp->old_metrics->w[starting_state & 255] = 0; /* Bias known start state */
return 0;
}

void set_viterbi29_polynomial_port(int polys[2]) {
int state;

for (state = 0; state < 128; state++) {
Branchtab29[0].c[state] = (polys[0] < 0) ^ parity((2 * state) & abs(polys[0])) ? 255 : 0;
Branchtab29[1].c[state] = (polys[1] < 0) ^ parity((2 * state) & abs(polys[1])) ? 255 : 0;
}
Init++;
}

/* Create a new instance of a Viterbi decoder */
void* create_viterbi29_port(int len) {
struct v29* vp;

if (!Init) {
int polys[2] = { V29POLYA,V29POLYB };
set_viterbi29_polynomial_port(polys);
}
if ((vp = (struct v29*)malloc(sizeof(struct v29))) == NULL)
return NULL;

if ((vp->decisions = (decision_t*)malloc((len + 8) * sizeof(decision_t))) == NULL) {
free(vp);
return NULL;
}
init_viterbi29_port(vp, 0);

return vp;
}

/* Viterbi chainback */
int chainback_viterbi29_port(
void* p,
unsigned char* data,	/* Decoded output data */
unsigned int nbits,		/* Number of data bits */
unsigned int endstate)  /* Terminal encoder state */
{
struct v29* vp = p;
decision_t* d;

if (p == NULL)
return -1;

d = vp->decisions;
/* Make room beyond the end of the encoder register so we can
* accumulate a full byte of decoded data
*/
endstate %= 256;

/* The store into data[] only needs to be done every 8 bits.
* But this avoids a conditional branch, and the writes will
* combine in the cache anyway
*/
d += 8; /* Look past tail */
while (nbits-- != 0) {
int k;

k = (d[nbits].w[(endstate) / 32] >> (endstate % 32)) & 1;
data[nbits >> 3] = endstate = (endstate >> 1) | (k << 7);
}
return 0;
}

/* Delete instance of a Viterbi decoder */
void delete_viterbi29_port(void* p) {
struct v29* vp = p;

if (vp != NULL) {
free(vp->decisions);
free(vp);
}
}

/* C-language butterfly */
#define BFLY(i) {
unsigned int metric,m0,m1,decision;
metric = (Branchtab29[0].c[i] ^ sym0) + (Branchtab29[1].c[i] ^ sym1);
m0 = vp->old_metrics->w[i] + metric;
m1 = vp->old_metrics->w[i+128] + (510 - metric);
decision = (signed int)(m0-m1) > 0;
vp->new_metrics->w[2*i] = decision ? m1 : m0;
d->w[i/16] |= decision << ((2*i)&31);
m0 -= (metric+metric-510);
m1 += (metric+metric-510);
decision = (signed int)(m0-m1) > 0;
vp->new_metrics->w[2*i+1] = decision ? m1 : m0;
d->w[i/16] |= decision << ((2*i+1)&31);
}

/* Update decoder with a block of demodulated symbols
* Note that nbits is the number of decoded data bits, not the number
* of symbols!
*/
int update_viterbi29_blk_port(void* p, unsigned char* syms, int nbits) {
struct v29* vp = p;
decision_t* d;

if (p == NULL)
return -1;

d = (decision_t*)vp->dp;
while (nbits--) {
void* tmp;
unsigned char sym0, sym1;
int i;

for (i = 0; i < 8; i++)
d->w[i] = 0;
sym0 = *syms++;
sym1 = *syms++;

for (i = 0; i < 128; i++)
BFLY(i);

d++;
tmp = vp->old_metrics;
vp->old_metrics = vp->new_metrics;
vp->new_metrics = tmp;
}
vp->dp = d;
return 0;
}
``````