Making codebase C90-compliant
This commit is contained in:
parent
0f2b8d4a09
commit
60a009b457
10 changed files with 76 additions and 86 deletions
|
@ -182,8 +182,8 @@ int main(int argc, char **argv) {
|
||||||
int ret;
|
int ret;
|
||||||
unsigned char buf[8];
|
unsigned char buf[8];
|
||||||
float features[4][NB_TOTAL_FEATURES];
|
float features[4][NB_TOTAL_FEATURES];
|
||||||
//int c0_id, main_pitch, modulation, corr_id, vq_end[3], vq_mid, interp_id;
|
/*int c0_id, main_pitch, modulation, corr_id, vq_end[3], vq_mid, interp_id;*/
|
||||||
//ret = fscanf(f1, "%d %d %d %d %d %d %d %d %d\n", &c0_id, &main_pitch, &modulation, &corr_id, &vq_end[0], &vq_end[1], &vq_end[2], &vq_mid, &interp_id);
|
/*ret = fscanf(f1, "%d %d %d %d %d %d %d %d %d\n", &c0_id, &main_pitch, &modulation, &corr_id, &vq_end[0], &vq_end[1], &vq_end[2], &vq_mid, &interp_id);*/
|
||||||
ret = fread(buf, 1, 8, f1);
|
ret = fread(buf, 1, 8, f1);
|
||||||
if (ret != 8) break;
|
if (ret != 8) break;
|
||||||
decode_packet(features, vq_mem, buf);
|
decode_packet(features, vq_mem, buf);
|
||||||
|
@ -279,7 +279,7 @@ int main(int argc, char **argv) {
|
||||||
}
|
}
|
||||||
st->pcount = 0;
|
st->pcount = 0;
|
||||||
}
|
}
|
||||||
//if (fpcm) fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);
|
/*if (fpcm) fwrite(pcm, sizeof(short), FRAME_SIZE, fpcm);*/
|
||||||
for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
|
for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
|
||||||
old_speech_gain = speech_gain;
|
old_speech_gain = speech_gain;
|
||||||
count++;
|
count++;
|
||||||
|
|
|
@ -56,21 +56,22 @@ static void print_vector(float *x, int N)
|
||||||
#ifdef END2END
|
#ifdef END2END
|
||||||
void rc2lpc(float *lpc, const float *rc)
|
void rc2lpc(float *lpc, const float *rc)
|
||||||
{
|
{
|
||||||
|
int i, j, k;
|
||||||
float tmp[LPC_ORDER];
|
float tmp[LPC_ORDER];
|
||||||
float ntmp[LPC_ORDER] = {0.0};
|
float ntmp[LPC_ORDER] = {0.0};
|
||||||
RNN_COPY(tmp, rc, LPC_ORDER);
|
RNN_COPY(tmp, rc, LPC_ORDER);
|
||||||
for(int i = 0; i < LPC_ORDER ; i++)
|
for(i = 0; i < LPC_ORDER ; i++)
|
||||||
{
|
{
|
||||||
for(int j = 0; j <= i-1; j++)
|
for(j = 0; j <= i-1; j++)
|
||||||
{
|
{
|
||||||
ntmp[j] = tmp[j] + tmp[i]*tmp[i - j - 1];
|
ntmp[j] = tmp[j] + tmp[i]*tmp[i - j - 1];
|
||||||
}
|
}
|
||||||
for(int k = 0; k <= i-1; k++)
|
for(k = 0; k <= i-1; k++)
|
||||||
{
|
{
|
||||||
tmp[k] = ntmp[k];
|
tmp[k] = ntmp[k];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for(int i = 0; i < LPC_ORDER ; i++)
|
for(i = 0; i < LPC_ORDER ; i++)
|
||||||
{
|
{
|
||||||
lpc[i] = tmp[i];
|
lpc[i] = tmp[i];
|
||||||
}
|
}
|
||||||
|
|
|
@ -92,7 +92,7 @@ void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const un
|
||||||
int sub;
|
int sub;
|
||||||
int voiced = 1;
|
int voiced = 1;
|
||||||
float frame_corr;
|
float frame_corr;
|
||||||
;
|
float sign;
|
||||||
unpacker bits;
|
unpacker bits;
|
||||||
|
|
||||||
bits_unpacker_init(&bits, buf, 8);
|
bits_unpacker_init(&bits, buf, 8);
|
||||||
|
@ -105,7 +105,7 @@ void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const un
|
||||||
vq_end[2] = bits_unpack(&bits, 10);
|
vq_end[2] = bits_unpack(&bits, 10);
|
||||||
vq_mid = bits_unpack(&bits, 13);
|
vq_mid = bits_unpack(&bits, 13);
|
||||||
interp_id = bits_unpack(&bits, 3);
|
interp_id = bits_unpack(&bits, 3);
|
||||||
//fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id, main_pitch, modulation, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);
|
/*fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id, main_pitch, modulation, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);*/
|
||||||
|
|
||||||
|
|
||||||
for (i=0;i<4;i++) RNN_CLEAR(&features[i][0], NB_TOTAL_FEATURES);
|
for (i=0;i<4;i++) RNN_CLEAR(&features[i][0], NB_TOTAL_FEATURES);
|
||||||
|
@ -133,7 +133,7 @@ void decode_packet(float features[4][NB_TOTAL_FEATURES], float *vq_mem, const un
|
||||||
features[3][i+1] = ceps_codebook1[vq_end[0]*NB_BANDS_1 + i] + ceps_codebook2[vq_end[1]*NB_BANDS_1 + i] + ceps_codebook3[vq_end[2]*NB_BANDS_1 + i];
|
features[3][i+1] = ceps_codebook1[vq_end[0]*NB_BANDS_1 + i] + ceps_codebook2[vq_end[1]*NB_BANDS_1 + i] + ceps_codebook3[vq_end[2]*NB_BANDS_1 + i];
|
||||||
}
|
}
|
||||||
|
|
||||||
float sign = 1;
|
sign = 1;
|
||||||
if (vq_mid >= 4096) {
|
if (vq_mid >= 4096) {
|
||||||
vq_mid -= 4096;
|
vq_mid -= 4096;
|
||||||
sign = -1;
|
sign = -1;
|
||||||
|
|
|
@ -43,9 +43,6 @@
|
||||||
#include "lpcnet.h"
|
#include "lpcnet.h"
|
||||||
|
|
||||||
|
|
||||||
//#define NB_FEATURES (NB_BANDS+2+LPC_ORDER)
|
|
||||||
|
|
||||||
|
|
||||||
#define SURVIVORS 5
|
#define SURVIVORS 5
|
||||||
|
|
||||||
|
|
||||||
|
@ -158,10 +155,10 @@ int quantize_3stage_mbest(float *x, int entry[3])
|
||||||
index2[m][1] = curr_index[m];
|
index2[m][1] = curr_index[m];
|
||||||
glob_dist[m] = curr_dist[m];
|
glob_dist[m] = curr_dist[m];
|
||||||
}
|
}
|
||||||
//printf("%f ", glob_dist[0]);
|
/*printf("%f ", glob_dist[0]);*/
|
||||||
} else if (curr_dist[0] < glob_dist[SURVIVORS-1]) {
|
} else if (curr_dist[0] < glob_dist[SURVIVORS-1]) {
|
||||||
m=0;
|
|
||||||
int pos;
|
int pos;
|
||||||
|
m=0;
|
||||||
for (pos=0;pos<SURVIVORS;pos++) {
|
for (pos=0;pos<SURVIVORS;pos++) {
|
||||||
if (curr_dist[m] < glob_dist[pos]) {
|
if (curr_dist[m] < glob_dist[pos]) {
|
||||||
int j;
|
int j;
|
||||||
|
@ -192,10 +189,10 @@ int quantize_3stage_mbest(float *x, int entry[3])
|
||||||
index3[m][2] = curr_index[m];
|
index3[m][2] = curr_index[m];
|
||||||
glob_dist[m] = curr_dist[m];
|
glob_dist[m] = curr_dist[m];
|
||||||
}
|
}
|
||||||
//printf("%f ", glob_dist[0]);
|
/*printf("%f ", glob_dist[0]);*/
|
||||||
} else if (curr_dist[0] < glob_dist[SURVIVORS-1]) {
|
} else if (curr_dist[0] < glob_dist[SURVIVORS-1]) {
|
||||||
m=0;
|
|
||||||
int pos;
|
int pos;
|
||||||
|
m=0;
|
||||||
for (pos=0;pos<SURVIVORS;pos++) {
|
for (pos=0;pos<SURVIVORS;pos++) {
|
||||||
if (curr_dist[m] < glob_dist[pos]) {
|
if (curr_dist[m] < glob_dist[pos]) {
|
||||||
int j;
|
int j;
|
||||||
|
@ -217,14 +214,14 @@ int quantize_3stage_mbest(float *x, int entry[3])
|
||||||
entry[0] = id = index3[0][0];
|
entry[0] = id = index3[0][0];
|
||||||
entry[1] = id2 = index3[0][1];
|
entry[1] = id2 = index3[0][1];
|
||||||
entry[2] = id3 = index3[0][2];
|
entry[2] = id3 = index3[0][2];
|
||||||
//printf("%f ", glob_dist[0]);
|
/*printf("%f ", glob_dist[0]);*/
|
||||||
for (i=0;i<NB_BANDS_1;i++) {
|
for (i=0;i<NB_BANDS_1;i++) {
|
||||||
x[i] -= ceps_codebook1[id*NB_BANDS_1 + i];
|
x[i] -= ceps_codebook1[id*NB_BANDS_1 + i];
|
||||||
}
|
}
|
||||||
for (i=0;i<NB_BANDS_1;i++) {
|
for (i=0;i<NB_BANDS_1;i++) {
|
||||||
x[i] -= ceps_codebook2[id2*NB_BANDS_1 + i];
|
x[i] -= ceps_codebook2[id2*NB_BANDS_1 + i];
|
||||||
}
|
}
|
||||||
//id3 = vq_quantize(ceps_codebook3, 1024, x, NB_BANDS_1, NULL);
|
/*id3 = vq_quantize(ceps_codebook3, 1024, x, NB_BANDS_1, NULL);*/
|
||||||
for (i=0;i<NB_BANDS_1;i++) {
|
for (i=0;i<NB_BANDS_1;i++) {
|
||||||
x[i] = ceps_codebook1[id*NB_BANDS_1 + i] + ceps_codebook2[id2*NB_BANDS_1 + i] + ceps_codebook3[id3*NB_BANDS_1 + i];
|
x[i] = ceps_codebook1[id*NB_BANDS_1 + i] + ceps_codebook2[id2*NB_BANDS_1 + i] + ceps_codebook3[id3*NB_BANDS_1 + i];
|
||||||
}
|
}
|
||||||
|
@ -304,7 +301,7 @@ int quantize_diff(float *x, float *left, float *right, float *codebook, int bits
|
||||||
for (i=0;i<NB_BANDS;i++) {
|
for (i=0;i<NB_BANDS;i++) {
|
||||||
x[i] = pred[(id&MULTI_MASK)*NB_BANDS + i] + s*codebook[id*NB_BANDS + i];
|
x[i] = pred[(id&MULTI_MASK)*NB_BANDS + i] + s*codebook[id*NB_BANDS + i];
|
||||||
}
|
}
|
||||||
//printf("%d %f ", id&MULTI_MASK, s);
|
/*printf("%d %f ", id&MULTI_MASK, s);*/
|
||||||
if (0) {
|
if (0) {
|
||||||
float err = 0;
|
float err = 0;
|
||||||
for (i=0;i<NB_BANDS;i++) {
|
for (i=0;i<NB_BANDS;i++) {
|
||||||
|
@ -362,7 +359,7 @@ void interp_diff(float *x, float *left, float *right, float *codebook, int bits,
|
||||||
best_pred = k;
|
best_pred = k;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//printf("%d ", best_pred);
|
/*printf("%d ", best_pred);*/
|
||||||
for (i=0;i<NB_BANDS;i++) {
|
for (i=0;i<NB_BANDS;i++) {
|
||||||
x[i] = pred[best_pred*NB_BANDS + i];
|
x[i] = pred[best_pred*NB_BANDS + i];
|
||||||
}
|
}
|
||||||
|
@ -394,7 +391,7 @@ int double_interp_search(float features[4][NB_TOTAL_FEATURES], const float *mem)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//printf("%d %d %f %d %f\n", id0, id1, dist[0][id0] + dist[1][id1], best_id, min_dist);
|
/*printf("%d %d %f %d %f\n", id0, id1, dist[0][id0] + dist[1][id1], best_id, min_dist);*/
|
||||||
return best_id - (best_id >= FORBIDDEN_INTERP);
|
return best_id - (best_id >= FORBIDDEN_INTERP);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -531,7 +528,7 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
|
||||||
st->pitch_mem[0] = aligned_in[i];
|
st->pitch_mem[0] = aligned_in[i];
|
||||||
st->exc_buf[PITCH_MAX_PERIOD+i] = sum + .7*st->pitch_filt;
|
st->exc_buf[PITCH_MAX_PERIOD+i] = sum + .7*st->pitch_filt;
|
||||||
st->pitch_filt = sum;
|
st->pitch_filt = sum;
|
||||||
//printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);
|
/*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/
|
||||||
}
|
}
|
||||||
/* Cross-correlation on half-frames. */
|
/* Cross-correlation on half-frames. */
|
||||||
for (sub=0;sub<2;sub++) {
|
for (sub=0;sub<2;sub++) {
|
||||||
|
@ -539,7 +536,7 @@ void compute_frame_features(LPCNetEncState *st, const float *in) {
|
||||||
celt_pitch_xcorr(&st->exc_buf[PITCH_MAX_PERIOD+off], st->exc_buf+off, xcorr, FRAME_SIZE/2, PITCH_MAX_PERIOD);
|
celt_pitch_xcorr(&st->exc_buf[PITCH_MAX_PERIOD+off], st->exc_buf+off, xcorr, FRAME_SIZE/2, PITCH_MAX_PERIOD);
|
||||||
ener0 = celt_inner_prod(&st->exc_buf[PITCH_MAX_PERIOD+off], &st->exc_buf[PITCH_MAX_PERIOD+off], FRAME_SIZE/2);
|
ener0 = celt_inner_prod(&st->exc_buf[PITCH_MAX_PERIOD+off], &st->exc_buf[PITCH_MAX_PERIOD+off], FRAME_SIZE/2);
|
||||||
st->frame_weight[2+2*st->pcount+sub] = ener0;
|
st->frame_weight[2+2*st->pcount+sub] = ener0;
|
||||||
//printf("%f\n", st->frame_weight[2+2*st->pcount+sub]);
|
/*printf("%f\n", st->frame_weight[2+2*st->pcount+sub]);*/
|
||||||
for (i=0;i<PITCH_MAX_PERIOD;i++) {
|
for (i=0;i<PITCH_MAX_PERIOD;i++) {
|
||||||
ener = (1 + ener0 + celt_inner_prod(&st->exc_buf[i+off], &st->exc_buf[i+off], FRAME_SIZE/2));
|
ener = (1 + ener0 + celt_inner_prod(&st->exc_buf[i+off], &st->exc_buf[i+off], FRAME_SIZE/2));
|
||||||
st->xc[2+2*st->pcount+sub][i] = 2*xcorr[i] / ener;
|
st->xc[2+2*st->pcount+sub][i] = 2*xcorr[i] / ener;
|
||||||
|
@ -619,8 +616,8 @@ void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int
|
||||||
}
|
}
|
||||||
/* Renormalize. */
|
/* Renormalize. */
|
||||||
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
|
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
|
||||||
//for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
|
/*for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
|
||||||
//printf("\n");
|
printf("\n");*/
|
||||||
RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
|
RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
|
||||||
st->pitch_max_path_all = max_path_all;
|
st->pitch_max_path_all = max_path_all;
|
||||||
st->best_i = best_i;
|
st->best_i = best_i;
|
||||||
|
@ -636,9 +633,9 @@ void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int
|
||||||
frame_corr /= 8;
|
frame_corr /= 8;
|
||||||
if (quantize && frame_corr < 0) frame_corr = 0;
|
if (quantize && frame_corr < 0) frame_corr = 0;
|
||||||
for (sub=0;sub<8;sub++) {
|
for (sub=0;sub<8;sub++) {
|
||||||
//printf("%d %f\n", best[2+sub], frame_corr);
|
/*printf("%d %f\n", best[2+sub], frame_corr);*/
|
||||||
}
|
}
|
||||||
//printf("\n");
|
/*printf("\n");*/
|
||||||
for (sub=2;sub<10;sub++) {
|
for (sub=2;sub<10;sub++) {
|
||||||
w = st->frame_weight[sub];
|
w = st->frame_weight[sub];
|
||||||
sw += w;
|
sw += w;
|
||||||
|
@ -663,7 +660,7 @@ void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int
|
||||||
corr_id = (int)floor(frame_corr/.075f);
|
corr_id = (int)floor(frame_corr/.075f);
|
||||||
if (quantize) frame_corr = 0.0375f + .075f*corr_id;
|
if (quantize) frame_corr = 0.0375f + .075f*corr_id;
|
||||||
}
|
}
|
||||||
//best_b = (sxx*sy - sx*sxy)/(sw*sxx - sx*sx);
|
/*best_b = (sxx*sy - sx*sxy)/(sw*sxx - sx*sx);*/
|
||||||
best_b = (sy - best_a*sx)/sw;
|
best_b = (sy - best_a*sx)/sw;
|
||||||
/* Quantizing the pitch as "main" pitch + slope. */
|
/* Quantizing the pitch as "main" pitch + slope. */
|
||||||
center_pitch = best_b+5.5*best_a;
|
center_pitch = best_b+5.5*best_a;
|
||||||
|
@ -671,9 +668,9 @@ void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int
|
||||||
main_pitch = IMAX(0, IMIN(63, main_pitch));
|
main_pitch = IMAX(0, IMIN(63, main_pitch));
|
||||||
modulation = (int)floor(.5 + 16*7*best_a/center_pitch);
|
modulation = (int)floor(.5 + 16*7*best_a/center_pitch);
|
||||||
modulation = IMAX(-3, IMIN(3, modulation));
|
modulation = IMAX(-3, IMIN(3, modulation));
|
||||||
//printf("%d %d\n", main_pitch, modulation);
|
/*printf("%d %d\n", main_pitch, modulation);*/
|
||||||
//printf("%f %f\n", best_a/center_pitch, best_corr);
|
/*printf("%f %f\n", best_a/center_pitch, best_corr);*/
|
||||||
//for (sub=2;sub<10;sub++) printf("%f %d %f\n", best_b + sub*best_a, best[sub], best_corr);
|
/*for (sub=2;sub<10;sub++) printf("%f %d %f\n", best_b + sub*best_a, best[sub], best_corr);*/
|
||||||
for (sub=0;sub<4;sub++) {
|
for (sub=0;sub<4;sub++) {
|
||||||
if (quantize) {
|
if (quantize) {
|
||||||
float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
|
float p = pow(2.f, main_pitch/21.)*PITCH_MIN_PERIOD;
|
||||||
|
@ -685,13 +682,13 @@ void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int
|
||||||
st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
|
st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
|
||||||
st->features[sub][NB_BANDS + 1] = frame_corr-.5;
|
st->features[sub][NB_BANDS + 1] = frame_corr-.5;
|
||||||
}
|
}
|
||||||
//printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);
|
/*printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);*/
|
||||||
}
|
}
|
||||||
//printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);
|
/*printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);*/
|
||||||
RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
|
RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
|
||||||
RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD);
|
RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD);
|
||||||
if (quantize) {
|
if (quantize) {
|
||||||
//printf("%f\n", st->features[3][0]);
|
/*printf("%f\n", st->features[3][0]);*/
|
||||||
c0_id = (int)floor(.5 + st->features[3][0]*4);
|
c0_id = (int)floor(.5 + st->features[3][0]*4);
|
||||||
c0_id = IMAX(-64, IMIN(63, c0_id));
|
c0_id = IMAX(-64, IMIN(63, c0_id));
|
||||||
st->features[3][0] = c0_id/4.;
|
st->features[3][0] = c0_id/4.;
|
||||||
|
@ -705,11 +702,11 @@ void process_superframe(LPCNetEncState *st, unsigned char *buf, FILE *ffeat, int
|
||||||
lpc_from_cepstrum(st->lpc, st->features[sub]);
|
lpc_from_cepstrum(st->lpc, st->features[sub]);
|
||||||
for (i=0;i<LPC_ORDER;i++) st->features[sub][NB_BANDS+2+i] = st->lpc[i];
|
for (i=0;i<LPC_ORDER;i++) st->features[sub][NB_BANDS+2+i] = st->lpc[i];
|
||||||
}
|
}
|
||||||
//printf("\n");
|
/*printf("\n");*/
|
||||||
RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);
|
RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);
|
||||||
if (encode) {
|
if (encode) {
|
||||||
packer bits;
|
packer bits;
|
||||||
//fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id+64, main_pitch, voiced ? modulation+4 : 0, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);
|
/*fprintf(stdout, "%d %d %d %d %d %d %d %d %d\n", c0_id+64, main_pitch, voiced ? modulation+4 : 0, corr_id, vq_end[0], vq_end[1], vq_end[2], vq_mid, interp_id);*/
|
||||||
bits_packer_init(&bits, buf, 8);
|
bits_packer_init(&bits, buf, 8);
|
||||||
bits_pack(&bits, c0_id+64, 7);
|
bits_pack(&bits, c0_id+64, 7);
|
||||||
bits_pack(&bits, main_pitch, 6);
|
bits_pack(&bits, main_pitch, 6);
|
||||||
|
@ -765,8 +762,8 @@ void process_multi_frame(LPCNetEncState *st, FILE *ffeat) {
|
||||||
}
|
}
|
||||||
/* Renormalize. */
|
/* Renormalize. */
|
||||||
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
|
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
|
||||||
//for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
|
/*for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
|
||||||
//printf("\n");
|
printf("\n");*/
|
||||||
RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
|
RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
|
||||||
st->pitch_max_path_all = max_path_all;
|
st->pitch_max_path_all = max_path_all;
|
||||||
st->best_i = best_i;
|
st->best_i = best_i;
|
||||||
|
@ -783,12 +780,12 @@ void process_multi_frame(LPCNetEncState *st, FILE *ffeat) {
|
||||||
for (sub=0;sub<4;sub++) {
|
for (sub=0;sub<4;sub++) {
|
||||||
st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
|
st->features[sub][NB_BANDS] = .01*(IMAX(66, IMIN(510, best[2+2*sub]+best[2+2*sub+1]))-200);
|
||||||
st->features[sub][NB_BANDS + 1] = frame_corr-.5;
|
st->features[sub][NB_BANDS + 1] = frame_corr-.5;
|
||||||
//printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);
|
/*printf("%f %d %f\n", st->features[sub][NB_BANDS], best[2+2*sub], frame_corr);*/
|
||||||
}
|
}
|
||||||
//printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);
|
/*printf("%d %f %f %f\n", best_period, best_a, best_b, best_corr);*/
|
||||||
RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
|
RNN_COPY(&st->xc[0][0], &st->xc[8][0], PITCH_MAX_PERIOD);
|
||||||
RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD);
|
RNN_COPY(&st->xc[1][0], &st->xc[9][0], PITCH_MAX_PERIOD);
|
||||||
//printf("\n");
|
/*printf("\n");*/
|
||||||
RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);
|
RNN_COPY(st->vq_mem, &st->features[3][0], NB_BANDS);
|
||||||
if (ffeat) {
|
if (ffeat) {
|
||||||
for (i=0;i<4;i++) {
|
for (i=0;i<4;i++) {
|
||||||
|
@ -833,8 +830,8 @@ void process_single_frame(LPCNetEncState *st, FILE *ffeat) {
|
||||||
}
|
}
|
||||||
/* Renormalize. */
|
/* Renormalize. */
|
||||||
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
|
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) st->pitch_max_path[1][i] -= max_path_all;
|
||||||
//for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
|
/*for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) printf("%f ", st->pitch_max_path[1][i]);
|
||||||
//printf("\n");
|
printf("\n");*/
|
||||||
RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
|
RNN_COPY(&st->pitch_max_path[0][0], &st->pitch_max_path[1][0], PITCH_MAX_PERIOD);
|
||||||
st->pitch_max_path_all = max_path_all;
|
st->pitch_max_path_all = max_path_all;
|
||||||
st->best_i = best_i;
|
st->best_i = best_i;
|
||||||
|
|
|
@ -57,7 +57,7 @@ LPCNET_EXPORT int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
|
||||||
short output[FRAME_SIZE];
|
short output[FRAME_SIZE];
|
||||||
st->enc.pcount = 0;
|
st->enc.pcount = 0;
|
||||||
if (st->skip_analysis) {
|
if (st->skip_analysis) {
|
||||||
//fprintf(stderr, "skip update\n");
|
/*fprintf(stderr, "skip update\n");*/
|
||||||
if (st->blend) {
|
if (st->blend) {
|
||||||
short tmp[FRAME_SIZE-TRAINING_OFFSET];
|
short tmp[FRAME_SIZE-TRAINING_OFFSET];
|
||||||
lpcnet_synthesize_tail_impl(&st->lpcnet, tmp, FRAME_SIZE-TRAINING_OFFSET, 0);
|
lpcnet_synthesize_tail_impl(&st->lpcnet, tmp, FRAME_SIZE-TRAINING_OFFSET, 0);
|
||||||
|
@ -73,10 +73,10 @@ LPCNET_EXPORT int lpcnet_plc_update(LPCNetPLCState *st, short *pcm) {
|
||||||
RNN_COPY(&st->pcm[st->pcm_fill], pcm, FRAME_SIZE);
|
RNN_COPY(&st->pcm[st->pcm_fill], pcm, FRAME_SIZE);
|
||||||
st->pcm_fill += FRAME_SIZE;
|
st->pcm_fill += FRAME_SIZE;
|
||||||
}
|
}
|
||||||
//fprintf(stderr, "fill at %d\n", st->pcm_fill);
|
/*fprintf(stderr, "fill at %d\n", st->pcm_fill);*/
|
||||||
}
|
}
|
||||||
/* Update state. */
|
/* Update state. */
|
||||||
//fprintf(stderr, "update state\n");
|
/*fprintf(stderr, "update state\n");*/
|
||||||
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
|
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
|
||||||
preemphasis(x, &st->enc.mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
preemphasis(x, &st->enc.mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
|
||||||
compute_frame_features(&st->enc, x);
|
compute_frame_features(&st->enc, x);
|
||||||
|
@ -105,7 +105,7 @@ LPCNET_EXPORT int lpcnet_plc_conceal(LPCNetPLCState *st, short *pcm) {
|
||||||
/* If we concealed the previous frame, finish synthesizing the rest of the samples. */
|
/* If we concealed the previous frame, finish synthesizing the rest of the samples. */
|
||||||
/* FIXME: Copy/predict features. */
|
/* FIXME: Copy/predict features. */
|
||||||
while (st->pcm_fill > 0) {
|
while (st->pcm_fill > 0) {
|
||||||
//fprintf(stderr, "update state for PLC %d\n", st->pcm_fill);
|
/*fprintf(stderr, "update state for PLC %d\n", st->pcm_fill);*/
|
||||||
int update_count;
|
int update_count;
|
||||||
update_count = IMIN(st->pcm_fill, FRAME_SIZE);
|
update_count = IMIN(st->pcm_fill, FRAME_SIZE);
|
||||||
RNN_COPY(output, &st->pcm[0], update_count);
|
RNN_COPY(output, &st->pcm[0], update_count);
|
||||||
|
|
|
@ -144,6 +144,8 @@ void compute_mdense(const MDenseLayer *layer, float *output, const float *input)
|
||||||
int sample_mdense(const MDenseLayer *layer, const float *input, const float *sampling_logit_table, kiss99_ctx *rng)
|
int sample_mdense(const MDenseLayer *layer, const float *input, const float *sampling_logit_table, kiss99_ctx *rng)
|
||||||
{
|
{
|
||||||
int b, j, N, M, C, stride;
|
int b, j, N, M, C, stride;
|
||||||
|
int val=0;
|
||||||
|
float thresholds[8];
|
||||||
M = layer->nb_inputs;
|
M = layer->nb_inputs;
|
||||||
N = layer->nb_neurons;
|
N = layer->nb_neurons;
|
||||||
C = layer->nb_channels;
|
C = layer->nb_channels;
|
||||||
|
@ -151,8 +153,6 @@ int sample_mdense(const MDenseLayer *layer, const float *input, const float *sam
|
||||||
stride = M*C;
|
stride = M*C;
|
||||||
|
|
||||||
celt_assert(N <= DUAL_FC_OUT_SIZE);
|
celt_assert(N <= DUAL_FC_OUT_SIZE);
|
||||||
int val=0;
|
|
||||||
float thresholds[8];
|
|
||||||
|
|
||||||
/* Computing all the random thresholds in advance. These thresholds are directly
|
/* Computing all the random thresholds in advance. These thresholds are directly
|
||||||
based on the logit to avoid computing the sigmoid.*/
|
based on the logit to avoid computing the sigmoid.*/
|
||||||
|
@ -181,7 +181,7 @@ int sample_mdense(const MDenseLayer *layer, const float *input, const float *sam
|
||||||
sum1 = layer->factor[i]*tanh_approx(sum1);
|
sum1 = layer->factor[i]*tanh_approx(sum1);
|
||||||
sum2 = layer->factor[N + i]*tanh_approx(sum2);
|
sum2 = layer->factor[N + i]*tanh_approx(sum2);
|
||||||
sum1 += sum2;
|
sum1 += sum2;
|
||||||
//sum1 = 1.f/(1 + exp(-sum1));
|
/*sum1 = 1.f/(1 + exp(-sum1));*/
|
||||||
#if 1 /* Sample the decision based on the logit. */
|
#if 1 /* Sample the decision based on the logit. */
|
||||||
bit = thresholds[b] < sum1;
|
bit = thresholds[b] < sum1;
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -37,9 +37,6 @@
|
||||||
|
|
||||||
#include "pitch.h"
|
#include "pitch.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
//#include "modes.h"
|
|
||||||
//#include "stack_alloc.h"
|
|
||||||
//#include "mathops.h"
|
|
||||||
#include "celt_lpc.h"
|
#include "celt_lpc.h"
|
||||||
#include "math.h"
|
#include "math.h"
|
||||||
|
|
||||||
|
|
|
@ -34,8 +34,6 @@
|
||||||
#ifndef PITCH_H
|
#ifndef PITCH_H
|
||||||
#define PITCH_H
|
#define PITCH_H
|
||||||
|
|
||||||
//#include "modes.h"
|
|
||||||
//#include "cpu_support.h"
|
|
||||||
#include "arch.h"
|
#include "arch.h"
|
||||||
|
|
||||||
/* OPT: This is the kernel you really want to optimize. It gets used a lot
|
/* OPT: This is the kernel you really want to optimize. It gets used a lot
|
||||||
|
|
|
@ -47,7 +47,7 @@
|
||||||
|
|
||||||
#ifndef DISABLE_DOT_PROD
|
#ifndef DISABLE_DOT_PROD
|
||||||
#define DOT_PROD
|
#define DOT_PROD
|
||||||
//#define USE_SU_BIAS
|
/*#define USE_SU_BIAS*/
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef DOT_PROD
|
#ifdef DOT_PROD
|
||||||
|
|
|
@ -320,15 +320,12 @@ static inline void vector_ps_to_epi8(unsigned char *x, const float *_x, int len)
|
||||||
__m256 xf;
|
__m256 xf;
|
||||||
__m256i xi;
|
__m256i xi;
|
||||||
xf = _mm256_loadu_ps(&_x[i]);
|
xf = _mm256_loadu_ps(&_x[i]);
|
||||||
//xf = _mm256_mul_ps(xf, const127);
|
|
||||||
//xf = _mm256_add_ps(xf, const127);
|
|
||||||
xf = _mm256_fmadd_ps(xf, const127, const127);
|
xf = _mm256_fmadd_ps(xf, const127, const127);
|
||||||
xi = _mm256_cvtps_epi32(xf);
|
xi = _mm256_cvtps_epi32(xf);
|
||||||
xi = _mm256_packus_epi32(xi, _mm256_setzero_si256());
|
xi = _mm256_packus_epi32(xi, _mm256_setzero_si256());
|
||||||
xi = _mm256_permute4x64_epi64(xi, 0xD8);
|
xi = _mm256_permute4x64_epi64(xi, 0xD8);
|
||||||
xi = _mm256_packus_epi16(xi, _mm256_setzero_si256());
|
xi = _mm256_packus_epi16(xi, _mm256_setzero_si256());
|
||||||
xi = _mm256_permutevar8x32_epi32(xi, _mm256_setr_epi32(0,1, 0,0, 0,0, 0,0));
|
xi = _mm256_permutevar8x32_epi32(xi, _mm256_setr_epi32(0,1, 0,0, 0,0, 0,0));
|
||||||
//xi = _mm256_permute4x64_epi64(xi, 0x);
|
|
||||||
_mm256_storeu_si256 ((__m256i *)&x[i], xi);
|
_mm256_storeu_si256 ((__m256i *)&x[i], xi);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -618,7 +615,7 @@ static inline void sgemv_accum16(float *out, const float *weights, int rows, int
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i=0;i<rows;i+=16)
|
for (i=0;i<rows;i+=16)
|
||||||
{
|
{
|
||||||
float * restrict y;
|
float *y;
|
||||||
__m256 vy0, vy8;
|
__m256 vy0, vy8;
|
||||||
y = &out[i];
|
y = &out[i];
|
||||||
vy0 = _mm256_loadu_ps(&y[0]);
|
vy0 = _mm256_loadu_ps(&y[0]);
|
||||||
|
@ -644,7 +641,7 @@ static inline void sparse_sgemv_accum16(float *out, const float *weights, int ro
|
||||||
int i, j;
|
int i, j;
|
||||||
for (i=0;i<rows;i+=16)
|
for (i=0;i<rows;i+=16)
|
||||||
{
|
{
|
||||||
float * restrict y;
|
float *y;
|
||||||
int cols;
|
int cols;
|
||||||
__m256 vy0, vy8;
|
__m256 vy0, vy8;
|
||||||
y = &out[i];
|
y = &out[i];
|
||||||
|
@ -692,7 +689,7 @@ static inline void sgemv_accum8x4(float *_out, const qweight *w, int rows, int c
|
||||||
unsigned char x[MAX_INPUTS];
|
unsigned char x[MAX_INPUTS];
|
||||||
(void)col_stride;
|
(void)col_stride;
|
||||||
ones = _mm256_set1_epi16(1);
|
ones = _mm256_set1_epi16(1);
|
||||||
//for (i=0;i<cols;i++) x[i] = 127+floor(.5+127*_x[i]);
|
/*for (i=0;i<cols;i++) x[i] = 127+floor(.5+127*_x[i]);*/
|
||||||
vector_ps_to_epi8(x, _x, cols);
|
vector_ps_to_epi8(x, _x, cols);
|
||||||
for (i=0;i<rows;i+=8)
|
for (i=0;i<rows;i+=8)
|
||||||
{
|
{
|
||||||
|
@ -709,26 +706,26 @@ static inline void sgemv_accum8x4(float *_out, const qweight *w, int rows, int c
|
||||||
__m256i vxj;
|
__m256i vxj;
|
||||||
__m256i vw;
|
__m256i vw;
|
||||||
vxj = _mm256_set1_epi32(*(int*)&x[j]);
|
vxj = _mm256_set1_epi32(*(int*)&x[j]);
|
||||||
vw = _mm256_loadu_si256((const __m256i *)w); //_mm256_lddqu_si256?
|
vw = _mm256_loadu_si256((const __m256i *)w);
|
||||||
tmp = _mm256_maddubs_epi16(vxj, vw); //swap?
|
tmp = _mm256_maddubs_epi16(vxj, vw);
|
||||||
tmp = _mm256_madd_epi16(tmp, ones);
|
tmp = _mm256_madd_epi16(tmp, ones);
|
||||||
vy0 = _mm256_add_epi32(vy0, tmp);
|
vy0 = _mm256_add_epi32(vy0, tmp);
|
||||||
w += 32;
|
w += 32;
|
||||||
vxj = _mm256_set1_epi32(*(int*)&x[j+4]);
|
vxj = _mm256_set1_epi32(*(int*)&x[j+4]);
|
||||||
vw = _mm256_loadu_si256((const __m256i *)w); //_mm256_lddqu_si256?
|
vw = _mm256_loadu_si256((const __m256i *)w);
|
||||||
tmp = _mm256_maddubs_epi16(vxj, vw); //swap?
|
tmp = _mm256_maddubs_epi16(vxj, vw);
|
||||||
tmp = _mm256_madd_epi16(tmp, ones);
|
tmp = _mm256_madd_epi16(tmp, ones);
|
||||||
vy0 = _mm256_add_epi32(vy0, tmp);
|
vy0 = _mm256_add_epi32(vy0, tmp);
|
||||||
w += 32;
|
w += 32;
|
||||||
vxj = _mm256_set1_epi32(*(int*)&x[j+8]);
|
vxj = _mm256_set1_epi32(*(int*)&x[j+8]);
|
||||||
vw = _mm256_loadu_si256((const __m256i *)w); //_mm256_lddqu_si256?
|
vw = _mm256_loadu_si256((const __m256i *)w);
|
||||||
tmp = _mm256_maddubs_epi16(vxj, vw); //swap?
|
tmp = _mm256_maddubs_epi16(vxj, vw);
|
||||||
tmp = _mm256_madd_epi16(tmp, ones);
|
tmp = _mm256_madd_epi16(tmp, ones);
|
||||||
vy0 = _mm256_add_epi32(vy0, tmp);
|
vy0 = _mm256_add_epi32(vy0, tmp);
|
||||||
w += 32;
|
w += 32;
|
||||||
vxj = _mm256_set1_epi32(*(int*)&x[j+12]);
|
vxj = _mm256_set1_epi32(*(int*)&x[j+12]);
|
||||||
vw = _mm256_loadu_si256((const __m256i *)w); //_mm256_lddqu_si256?
|
vw = _mm256_loadu_si256((const __m256i *)w);
|
||||||
tmp = _mm256_maddubs_epi16(vxj, vw); //swap?
|
tmp = _mm256_maddubs_epi16(vxj, vw);
|
||||||
tmp = _mm256_madd_epi16(tmp, ones);
|
tmp = _mm256_madd_epi16(tmp, ones);
|
||||||
vy0 = _mm256_add_epi32(vy0, tmp);
|
vy0 = _mm256_add_epi32(vy0, tmp);
|
||||||
w += 32;
|
w += 32;
|
||||||
|
@ -740,8 +737,8 @@ static inline void sgemv_accum8x4(float *_out, const qweight *w, int rows, int c
|
||||||
__m256i vxj;
|
__m256i vxj;
|
||||||
__m256i vw;
|
__m256i vw;
|
||||||
vxj = _mm256_set1_epi32(*(int*)&x[j]);
|
vxj = _mm256_set1_epi32(*(int*)&x[j]);
|
||||||
vw = _mm256_loadu_si256((const __m256i *)w); //_mm256_lddqu_si256?
|
vw = _mm256_loadu_si256((const __m256i *)w);
|
||||||
tmp = _mm256_maddubs_epi16(vxj, vw); //swap?
|
tmp = _mm256_maddubs_epi16(vxj, vw);
|
||||||
tmp = _mm256_madd_epi16(tmp, ones);
|
tmp = _mm256_madd_epi16(tmp, ones);
|
||||||
vy0 = _mm256_add_epi32(vy0, tmp);
|
vy0 = _mm256_add_epi32(vy0, tmp);
|
||||||
w += 32;
|
w += 32;
|
||||||
|
@ -763,7 +760,7 @@ static inline void sgemv_accum8x4(float *out, const qweight *w, int rows, int co
|
||||||
{
|
{
|
||||||
for (j=0;j<cols;j+=4)
|
for (j=0;j<cols;j+=4)
|
||||||
{
|
{
|
||||||
float * restrict y;
|
float *y;
|
||||||
float xj0, xj1, xj2, xj3;
|
float xj0, xj1, xj2, xj3;
|
||||||
xj0 = x[j+0];
|
xj0 = x[j+0];
|
||||||
xj1 = x[j+1];
|
xj1 = x[j+1];
|
||||||
|
@ -791,7 +788,7 @@ static inline void sparse_sgemv_accum8x4(float *_out, const qweight *w, int rows
|
||||||
int i, j;
|
int i, j;
|
||||||
unsigned char x[MAX_INPUTS];
|
unsigned char x[MAX_INPUTS];
|
||||||
ones = _mm256_set1_epi16(1);
|
ones = _mm256_set1_epi16(1);
|
||||||
//for (i=0;i<cols;i++) x[i] = 127+floor(.5+127*_x[i]);
|
/*for (i=0;i<cols;i++) x[i] = 127+floor(.5+127*_x[i]);*/
|
||||||
vector_ps_to_epi8(x, _x, cols);
|
vector_ps_to_epi8(x, _x, cols);
|
||||||
for (i=0;i<rows;i+=8)
|
for (i=0;i<rows;i+=8)
|
||||||
{
|
{
|
||||||
|
@ -810,26 +807,26 @@ static inline void sparse_sgemv_accum8x4(float *_out, const qweight *w, int rows
|
||||||
__m256i vxj;
|
__m256i vxj;
|
||||||
__m256i vw;
|
__m256i vw;
|
||||||
vxj = _mm256_set1_epi32(*(int*)&x[*idx++]);
|
vxj = _mm256_set1_epi32(*(int*)&x[*idx++]);
|
||||||
vw = _mm256_loadu_si256((const __m256i *)w); //_mm256_lddqu_si256?
|
vw = _mm256_loadu_si256((const __m256i *)w);
|
||||||
tmp = _mm256_maddubs_epi16(vxj, vw); //swap?
|
tmp = _mm256_maddubs_epi16(vxj, vw);
|
||||||
tmp = _mm256_madd_epi16(tmp, ones);
|
tmp = _mm256_madd_epi16(tmp, ones);
|
||||||
vy0 = _mm256_add_epi32(vy0, tmp);
|
vy0 = _mm256_add_epi32(vy0, tmp);
|
||||||
w += 32;
|
w += 32;
|
||||||
vxj = _mm256_set1_epi32(*(int*)&x[*idx++]);
|
vxj = _mm256_set1_epi32(*(int*)&x[*idx++]);
|
||||||
vw = _mm256_loadu_si256((const __m256i *)w); //_mm256_lddqu_si256?
|
vw = _mm256_loadu_si256((const __m256i *)w);
|
||||||
tmp = _mm256_maddubs_epi16(vxj, vw); //swap?
|
tmp = _mm256_maddubs_epi16(vxj, vw);
|
||||||
tmp = _mm256_madd_epi16(tmp, ones);
|
tmp = _mm256_madd_epi16(tmp, ones);
|
||||||
vy0 = _mm256_add_epi32(vy0, tmp);
|
vy0 = _mm256_add_epi32(vy0, tmp);
|
||||||
w += 32;
|
w += 32;
|
||||||
vxj = _mm256_set1_epi32(*(int*)&x[*idx++]);
|
vxj = _mm256_set1_epi32(*(int*)&x[*idx++]);
|
||||||
vw = _mm256_loadu_si256((const __m256i *)w); //_mm256_lddqu_si256?
|
vw = _mm256_loadu_si256((const __m256i *)w);
|
||||||
tmp = _mm256_maddubs_epi16(vxj, vw); //swap?
|
tmp = _mm256_maddubs_epi16(vxj, vw);
|
||||||
tmp = _mm256_madd_epi16(tmp, ones);
|
tmp = _mm256_madd_epi16(tmp, ones);
|
||||||
vy0 = _mm256_add_epi32(vy0, tmp);
|
vy0 = _mm256_add_epi32(vy0, tmp);
|
||||||
w += 32;
|
w += 32;
|
||||||
vxj = _mm256_set1_epi32(*(int*)&x[*idx++]);
|
vxj = _mm256_set1_epi32(*(int*)&x[*idx++]);
|
||||||
vw = _mm256_loadu_si256((const __m256i *)w); //_mm256_lddqu_si256?
|
vw = _mm256_loadu_si256((const __m256i *)w);
|
||||||
tmp = _mm256_maddubs_epi16(vxj, vw); //swap?
|
tmp = _mm256_maddubs_epi16(vxj, vw);
|
||||||
tmp = _mm256_madd_epi16(tmp, ones);
|
tmp = _mm256_madd_epi16(tmp, ones);
|
||||||
vy0 = _mm256_add_epi32(vy0, tmp);
|
vy0 = _mm256_add_epi32(vy0, tmp);
|
||||||
w += 32;
|
w += 32;
|
||||||
|
@ -843,8 +840,8 @@ static inline void sparse_sgemv_accum8x4(float *_out, const qweight *w, int rows
|
||||||
int pos;
|
int pos;
|
||||||
pos = (*idx++);
|
pos = (*idx++);
|
||||||
vxj = _mm256_set1_epi32(*(int*)&x[pos]);
|
vxj = _mm256_set1_epi32(*(int*)&x[pos]);
|
||||||
vw = _mm256_loadu_si256((const __m256i *)w); //_mm256_lddqu_si256?
|
vw = _mm256_loadu_si256((const __m256i *)w);
|
||||||
tmp = _mm256_maddubs_epi16(vxj, vw); //swap?
|
tmp = _mm256_maddubs_epi16(vxj, vw);
|
||||||
tmp = _mm256_madd_epi16(tmp, ones);
|
tmp = _mm256_madd_epi16(tmp, ones);
|
||||||
vy0 = _mm256_add_epi32(vy0, tmp);
|
vy0 = _mm256_add_epi32(vy0, tmp);
|
||||||
w += 32;
|
w += 32;
|
||||||
|
@ -866,7 +863,7 @@ static inline void sparse_sgemv_accum8x4(float *out, const qweight *weights, int
|
||||||
(void)ignore;
|
(void)ignore;
|
||||||
for (i=0;i<rows;i+=8)
|
for (i=0;i<rows;i+=8)
|
||||||
{
|
{
|
||||||
float * restrict y;
|
float *y;
|
||||||
int cols;
|
int cols;
|
||||||
__m256 vy0;
|
__m256 vy0;
|
||||||
y = &out[i];
|
y = &out[i];
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue