added NFEC decoder C implementation

This commit is contained in:
Jan Buethe 2022-10-20 17:27:39 +00:00
parent ea4d8f54c3
commit d1646a680a
8 changed files with 327 additions and 13 deletions

118
dnn/nfec_dec.c Normal file
View file

@ -0,0 +1,118 @@
#include "nfec_dec.h"
//#define DEBUG
#ifdef DEBUG
#include <stdio.h>
#endif
void nfec_dec_init_states(
NFECDecState *h, /* io: state buffer handle */
const float *initial_state /* i: initial state */
)
{
/* initialize GRU states from initial state */
compute_dense(&state1, h->dense2_state, initial_state);
compute_dense(&state2, h->dense4_state, initial_state);
compute_dense(&state3, h->dense6_state, initial_state);
}
void nfec_dec_unquantize_latent_vector(
float *z, /* o: unquantized latent vector */
const int *zq, /* i: quantized latent vector */
int quant_level /* i: quantization level */
)
{
int i;
/* inverse scaling and type conversion */
for (i = 0; i < NFEC_STATS_NUM_LATENTS; i ++)
{
z[i] = (float) zq[i] / nfec_stats_quant_scales[quant_level * NFEC_STATS_NUM_LATENTS + i];
}
}
void nfec_decode_qframe(
NFECDecState *dec_state, /* io: state buffer handle */
float *qframe, /* o: quadruple feature frame (four concatenated frames) */
const float *input /* i: latent vector */
)
{
float buffer[DEC_DENSE1_OUT_SIZE + DEC_DENSE2_OUT_SIZE + DEC_DENSE3_OUT_SIZE + DEC_DENSE4_OUT_SIZE + DEC_DENSE5_OUT_SIZE + DEC_DENSE6_OUT_SIZE + DEC_DENSE7_OUT_SIZE + DEC_DENSE8_OUT_SIZE];
int output_index = 0;
int input_index = 0;
#ifdef DEBUG
static FILE *fids[8] = {NULL};
int i;
char filename[256];
for (i=0; i < 8; i ++)
{
if (fids[i] == NULL)
{
sprintf(filename, "y%d.f32", i + 1);
fids[i] = fopen(filename, "wb");
}
}
#endif
/* run encoder stack and concatenate output in buffer*/
compute_dense(&dec_dense1, &buffer[output_index], input);
#ifdef DEBUG
fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE1_OUT_SIZE, fids[0]);
#endif
input_index = output_index;
output_index += DEC_DENSE1_OUT_SIZE;
compute_gru2(&dec_dense2, dec_state->dense2_state, &buffer[input_index]);
memcpy(&buffer[output_index], dec_state->dense2_state, DEC_DENSE2_OUT_SIZE * sizeof(float));
#ifdef DEBUG
fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE2_OUT_SIZE, fids[1]);
#endif
input_index = output_index;
output_index += DEC_DENSE2_OUT_SIZE;
compute_dense(&dec_dense3, &buffer[output_index], &buffer[input_index]);
#ifdef DEBUG
fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE3_OUT_SIZE, fids[2]);
#endif
input_index = output_index;
output_index += DEC_DENSE3_OUT_SIZE;
compute_gru2(&dec_dense4, dec_state->dense4_state, &buffer[input_index]);
memcpy(&buffer[output_index], dec_state->dense4_state, DEC_DENSE4_OUT_SIZE * sizeof(float));
#ifdef DEBUG
fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE4_OUT_SIZE, fids[3]);
#endif
input_index = output_index;
output_index += DEC_DENSE4_OUT_SIZE;
compute_dense(&dec_dense5, &buffer[output_index], &buffer[input_index]);
#ifdef DEBUG
fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE5_OUT_SIZE, fids[4]);
#endif
input_index = output_index;
output_index += DEC_DENSE5_OUT_SIZE;
compute_gru2(&dec_dense6, dec_state->dense6_state, &buffer[input_index]);
memcpy(&buffer[output_index], dec_state->dense6_state, DEC_DENSE6_OUT_SIZE * sizeof(float));
#ifdef DEBUG
fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE6_OUT_SIZE, fids[5]);
#endif
input_index = output_index;
output_index += DEC_DENSE6_OUT_SIZE;
compute_dense(&dec_dense7, &buffer[output_index], &buffer[input_index]);
#ifdef DEBUG
fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE7_OUT_SIZE, fids[6]);
#endif
input_index = output_index;
output_index += DEC_DENSE7_OUT_SIZE;
compute_dense(&dec_dense8, &buffer[output_index], &buffer[input_index]);
#ifdef DEBUG
fwrite(&buffer[output_index], sizeof(buffer[0]), DEC_DENSE8_OUT_SIZE, fids[7]);
#endif
output_index += DEC_DENSE8_OUT_SIZE;
compute_dense(&dec_final, qframe, buffer);
}

17
dnn/nfec_dec.h Normal file
View file

@ -0,0 +1,17 @@
#ifndef _NFEC_DEC_H
#define _NFEC_DEC_H
#include "nfec_dec_data.h"
#include "nfec_stats_data.h"
typedef struct {
float dense2_state[DEC_DENSE2_STATE_SIZE];
float dense4_state[DEC_DENSE2_STATE_SIZE];
float dense6_state[DEC_DENSE2_STATE_SIZE];
} NFECDecState;
void nfec_dec_init_states(NFECDecState *h, const float * initial_state);
void nfec_dec_unquantize_latent_vector(float *z, const int *zq, int quant_level);
void nfec_decode_qframe(NFECDecState *h, float *qframe, const float * z);
#endif

68
dnn/nfec_dec_demo.c Normal file
View file

@ -0,0 +1,68 @@
#include <stdlib.h>
#include <stdio.h>
#include "nfec_dec.h"
#include "nfec_enc.h"
void usage()
{
printf("nfec_dec_demo <input> <output>\n");
exit(1);
}
int main(int argc, char **argv)
{
NFECDecState dec_state;
float feature_buffer[36];
float qframe[4 * NFEC_DEC_NUM_FEATURES];
float latents[80];
float initial_state[24];
int quantized_latents[80];
int index = 0;
FILE *in_fid, *out_fid;
int qlevel = 0;
memset(&dec_state, 0, sizeof(dec_state));
if (argc < 3) usage();
in_fid = fopen(argv[1], "rb");
if (in_fid == NULL)
{
perror("Could not open input file");
usage();
}
out_fid = fopen(argv[2], "wb");
if (out_fid == NULL)
{
perror("Could not open output file");
usage();
}
/* read initial state from input stream */
if (fread(initial_state, sizeof(float), 24, in_fid) != 24)
{
perror("error while reading initial state");
return 1;
}
/* initialize GRU states */
nfec_dec_init_states(&dec_state, initial_state);
/* start decoding */
while (fread(latents, sizeof(float), 80, in_fid) == 80)
{
nfec_decode_qframe(&dec_state, qframe, latents);
fwrite(qframe, sizeof(float), 4*20, out_fid);
}
fclose(in_fid);
fclose(out_fid);
return 0;
}
/* gcc -DDISABLE_DOT_PROD -DDISABLE_NEON nfec_dec_demo.c nfec_dec.c nnet.c nfec_dec_data.c nfec_stats_data.c kiss99.c -g -o nfec_dec_demo */

View file

@ -1,6 +1,9 @@
#include <math.h>
#include "nfec_enc.h"
#include "nnet.h"
#include "nfec_enc_data.h"
#include "nfec_stats_data.h"
//#define DEBUG
@ -8,7 +11,12 @@
#include <stdio.h>
#endif
void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *initial_state, const float *input)
void nfec_encode_dframe(
struct NFECEncState *enc_state, /* io: encoder state */
float *latents, /* o: latent vector */
float *initial_state, /* o: initial state */
const float *input /* i: double feature frame (concatenated) */
)
{
float buffer[ENC_DENSE1_OUT_SIZE + ENC_DENSE2_OUT_SIZE + ENC_DENSE3_OUT_SIZE + ENC_DENSE4_OUT_SIZE + ENC_DENSE5_OUT_SIZE + ENC_DENSE6_OUT_SIZE + ENC_DENSE7_OUT_SIZE + ENC_DENSE8_OUT_SIZE + GDENSE1_OUT_SIZE];
int output_index = 0;
@ -105,4 +113,28 @@ void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *i
input_index = output_index;
compute_dense(&gdense2, initial_state, &buffer[input_index]);
}
void nfec_quantize_latent_vector(
int *z_q, /* o: quantized latent vector */
const float *z, /* i: unquantized latent vector */
int quant_level /* i: quantization level */
)
{
int i;
float delta;
float tmp[NFEC_LATENT_DIM];
for (i = 0; i < NFEC_LATENT_DIM; i ++)
{
/* dead-zone transform */
delta = nfec_stats_dead_zone_theta[quant_level * NFEC_LATENT_DIM + i] - .5f;
tmp[i] = z[i] - delta * tanhf(z[i] / (delta + 0.1f));
/* scaling */
tmp[i] *= nfec_stats_quant_scales[quant_level * NFEC_LATENT_DIM + i];
/* quantization by rounding (CAVE: is there a quantization routine with overlfow check available?) */
z_q[i] = (int) roundf(tmp[i]);
}
}

View file

@ -11,5 +11,6 @@ struct NFECEncState{
};
void nfec_encode_dframe(struct NFECEncState *enc_state, float *latents, float *initial_state, const float *input);
void nfec_quantize_latent_vector(int *z_q, const float *z, int quant_level);
#endif

View file

@ -16,8 +16,9 @@ int main(int argc, char **argv)
float dframe[2 * NFEC_NUM_FEATURES];
float latents[80];
float initial_state[24];
int quantized_latents[NFEC_LATENT_DIM];
int index = 0;
FILE *fid, *latents_fid, *states_fid;
FILE *fid, *latents_fid, *quantized_latents_fid, *states_fid;
memset(&enc_state, 0, sizeof(enc_state));
@ -40,6 +41,16 @@ int main(int argc, char **argv)
usage();
}
char filename[256];
strcpy(filename, argv[2]);
strcat(filename, ".quantized.f32");
quantized_latents_fid = fopen(filename, "wb");
if (latents_fid == NULL)
{
fprintf(stderr, "could not open latents file %s\n", filename);
usage();
}
states_fid = fopen(argv[3], "wb");
if (states_fid == NULL)
{
@ -55,8 +66,10 @@ int main(int argc, char **argv)
if (index == 2)
{
nfec_encode_dframe(&enc_state, latents, initial_state, dframe);
nfec_quantize_latent_vector(quantized_latents, latents, 0);
index = 0;
fwrite(latents, sizeof(float), NFEC_LATENT_DIM, latents_fid);
fwrite(quantized_latents, sizeof(int), NFEC_LATENT_DIM, quantized_latents_fid);
fwrite(initial_state, sizeof(float), GDENSE2_OUT_SIZE, states_fid);
}
}
@ -64,6 +77,9 @@ int main(int argc, char **argv)
fclose(fid);
fclose(states_fid);
fclose(latents_fid);
fclose(quantized_latents_fid);
return 0;
}
/* gcc -DDISABLE_DOT_PROD -DDISABLE_NEON nfec_enc_demo.c nfec_enc.c nnet.c nfec_enc_data.c kiss99.c -g -o nfec_enc_demo */
/* gcc -DDISABLE_DOT_PROD -DDISABLE_NEON nfec_enc_demo.c nfec_enc.c nnet.c nfec_enc_data.c nfec_stats_data.c kiss99.c -g -o nfec_enc_demo */

View file

@ -1,6 +1,7 @@
import argparse
import os
os.environ['CUDA_VISIBLE_DEVICES'] = ""
parser = argparse.ArgumentParser()
@ -59,17 +60,17 @@ def dump_statistical_model(qembedding, f, fh):
r = 0.5 + 0.5 * tf.math.sigmoid(w[:, 4 * N : 5 * N]).numpy()
theta = tf.math.sigmoid(w[:, 5 * N : 6 * N]).numpy()
printVector(f, quant_scales[:], 'nfec_stats_quant_scales')
printVector(f, dead_zone_theta[:], 'nfec_stats_dead_zone_theta')
printVector(f, r, 'nfec_stats_r')
printVector(f, theta, 'nfec_stats_theta')
printVector(f, quant_scales[:], 'nfec_stats_quant_scales', static=False)
printVector(f, dead_zone_theta[:], 'nfec_stats_dead_zone_theta', static=False)
printVector(f, r, 'nfec_stats_r', static=False)
printVector(f, theta, 'nfec_stats_theta', static=False)
fh.write(
f"""
extern float nfec_stats_quant_scales;
extern float nfec_stats_dead_zone_theta;
extern float nfec_stats_r;
extern float nfec_stats_theta;
extern const float nfec_stats_quant_scales[{levels * N}];
extern const float nfec_stats_dead_zone_theta[{levels * N}];
extern const float nfec_stats_r[{levels * N}];
extern const float nfec_stats_theta[{levels * N}];
"""
)
@ -159,6 +160,7 @@ f"""
header_fid.write(
f"""
#define NFEC_STATS_NUM_LEVELS {num_levels}
#define NFEC_STATS_NUM_LATENTS {args.latent_dim}
"""
)
@ -171,3 +173,60 @@ f"""
header_fid.close()
source_fid.close()
# decoder
decoder_dense_names = [
'state1',
'state2',
'state3',
'dec_dense1',
'dec_dense3',
'dec_dense5',
'dec_dense7',
'dec_dense8',
'dec_final'
]
decoder_gru_names = [
'dec_dense2',
'dec_dense4',
'dec_dense6'
]
source_fid = open("nfec_dec_data.c", 'w')
header_fid = open("nfec_dec_data.h", 'w')
start_header(header_fid, "nfec_dec_data.h")
start_source(source_fid, "nfec_dec_data.h", os.path.basename(args.weights))
# some global constants
header_fid.write(
f"""
#define NFEC_DEC_NUM_FEATURES 20
#define NFEC_DEC_LATENT_DIM {args.latent_dim}
#define NFEC_DEC_MAX_RNN_NEURONS {max_rnn_neurons}
"""
)
# dump GRUs
max_rnn_neurons = max(
[
dump_gru_layer(decoder.get_layer(name), source_fid, header_fid)
for name in decoder_gru_names
]
)
# dump Dense layers
for name in decoder_dense_names:
layer = decoder.get_layer(name)
dump_dense_layer(layer, source_fid, header_fid)
finish_header(header_fid)
finish_source(source_fid)
header_fid.close()
source_fid.close()

View file

@ -3,13 +3,16 @@
import numpy as np
def printVector(f, vector, name, dtype='float', dotp=False):
def printVector(f, vector, name, dtype='float', dotp=False, static=True):
""" prints vector as one-dimensional C array """
if dotp:
vector = vector.reshape((vector.shape[0]//4, 4, vector.shape[1]//8, 8))
vector = vector.transpose((2, 0, 3, 1))
v = np.reshape(vector, (-1))
f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
if static:
f.write('static const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
else:
f.write('const {} {}[{}] = {{\n '.format(dtype, name, len(v)))
for i in range(0, len(v)):
f.write('{}'.format(v[i]))
if (i!=len(v)-1):