diff --git a/celt/celt.h b/celt/celt.h index 70175301..f73f29dd 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -59,7 +59,8 @@ typedef struct { float noisiness; float activity; float music_prob; - float vad_prob; + float music_prob_min; + float music_prob_max; int bandwidth; float activity_probability; /* Store as Q6 char to save space. */ diff --git a/scripts/dump_rnn.py b/scripts/dump_rnn.py new file mode 100755 index 00000000..dd66403b --- /dev/null +++ b/scripts/dump_rnn.py @@ -0,0 +1,57 @@ +#!/usr/bin/python + +from __future__ import print_function + +from keras.models import Sequential +from keras.layers import Dense +from keras.layers import LSTM +from keras.layers import GRU +from keras.models import load_model +from keras import backend as K + +import numpy as np + +def printVector(f, vector, name): + v = np.reshape(vector, (-1)); + #print('static const float ', name, '[', len(v), '] = \n', file=f) + f.write('static const opus_int16 {}[{}] = {{\n '.format(name, len(v))) + for i in range(0, len(v)): + f.write('{}'.format(int(round(8192*v[i])))) + if (i!=len(v)-1): + f.write(',') + else: + break; + if (i%8==7): + f.write("\n ") + else: + f.write(" ") + #print(v, file=f) + f.write('\n};\n\n') + return; + +def binary_crossentrop2(y_true, y_pred): + return K.mean(2*K.abs(y_true-0.5) * K.binary_crossentropy(y_pred, y_true), axis=-1) + + +model = load_model("weights.hdf5", custom_objects={'binary_crossentrop2': binary_crossentrop2}) + +weights = model.get_weights() + +f = open('rnn_weights.c', 'w') + +f.write('/*This file is automatically generated from a Keras model*/\n\n') +f.write('#ifdef HAVE_CONFIG_H\n#include "config.h"\n#endif\n\n#include "mlp.h"\n\n') + +printVector(f, weights[0], 'layer0_weights') +printVector(f, weights[1], 'layer0_bias') +printVector(f, weights[2], 'layer1_weights') +printVector(f, weights[3], 'layer1_recur_weights') +printVector(f, weights[4], 'layer1_bias') +printVector(f, weights[5], 'layer2_weights') +printVector(f, weights[6], 'layer2_bias') + +f.write('const DenseLayer layer0 = {\n layer0_bias,\n layer0_weights,\n 25, 16, 0\n};\n\n') +f.write('const GRULayer layer1 = {\n layer1_bias,\n layer1_weights,\n layer1_recur_weights,\n 16, 12\n};\n\n') +f.write('const DenseLayer layer2 = {\n layer2_bias,\n layer2_weights,\n 12, 2, 1\n};\n\n') + +f.close() diff --git a/scripts/rnn_train.py b/scripts/rnn_train.py new file mode 100755 index 00000000..ffdaa1e7 --- /dev/null +++ b/scripts/rnn_train.py @@ -0,0 +1,67 @@ +#!/usr/bin/python + +from __future__ import print_function + +from keras.models import Sequential +from keras.models import Model +from keras.layers import Input +from keras.layers import Dense +from keras.layers import LSTM +from keras.layers import GRU +from keras.layers import SimpleRNN +from keras.layers import Dropout +from keras import losses +import h5py + +from keras import backend as K +import numpy as np + +def binary_crossentrop2(y_true, y_pred): + return K.mean(2*K.abs(y_true-0.5) * K.binary_crossentropy(y_pred, y_true), axis=-1) + +print('Build model...') +#model = Sequential() +#model.add(Dense(16, activation='tanh', input_shape=(None, 25))) +#model.add(GRU(12, dropout=0.0, recurrent_dropout=0.0, activation='tanh', recurrent_activation='sigmoid', return_sequences=True)) +#model.add(Dense(2, activation='sigmoid')) + +main_input = Input(shape=(None, 25), name='main_input') +x = Dense(16, activation='tanh')(main_input) +x = GRU(12, dropout=0.1, recurrent_dropout=0.1, activation='tanh', recurrent_activation='sigmoid', return_sequences=True)(x) +x = Dense(2, activation='sigmoid')(x) +model = Model(inputs=main_input, outputs=x) + +batch_size = 64 + +print('Loading data...') +with h5py.File('features.h5', 'r') as hf: + all_data = hf['features'][:] +print('done.') + +window_size = 1500 + +nb_sequences = len(all_data)/window_size +print(nb_sequences, ' sequences') +x_train = all_data[:nb_sequences*window_size, :-2] +x_train = np.reshape(x_train, (nb_sequences, window_size, 25)) + +y_train = np.copy(all_data[:nb_sequences*window_size, -2:]) +y_train = np.reshape(y_train, (nb_sequences, window_size, 2)) + +all_data = 0; +x_train = x_train.astype('float32') +y_train = y_train.astype('float32') + +print(len(x_train), 'train sequences. x shape =', x_train.shape, 'y shape = ', y_train.shape) + +# try using different optimizers and different optimizer configs +model.compile(loss=binary_crossentrop2, + optimizer='adam', + metrics=['binary_accuracy']) + +print('Train...') +model.fit(x_train, y_train, + batch_size=batch_size, + epochs=200, + validation_data=(x_train, y_train)) +model.save("newweights.hdf5") diff --git a/src/analysis.c b/src/analysis.c index f4160e4b..1d6dd829 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -50,6 +50,8 @@ #ifndef DISABLE_FLOAT_API +#define TRANSITION_PENALTY 10 + static const float dct_table[128] = { 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, @@ -224,19 +226,22 @@ void tonality_analysis_reset(TonalityAnalysisState *tonal) /* Clear non-reusable fields. */ char *start = (char*)&tonal->TONALITY_ANALYSIS_RESET_START; OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal)); - tonal->music_confidence = .9f; - tonal->speech_confidence = .1f; } void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len) { int pos; int curr_lookahead; - float psum; float tonality_max; float tonality_avg; int tonality_count; int i; + int pos0; + float prob_avg; + float prob_count; + float prob_min, prob_max; + float vad_prob; + int mpos, vpos; pos = tonal->read_pos; curr_lookahead = tonal->write_pos-tonal->read_pos; @@ -254,6 +259,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int pos--; if (pos<0) pos = DETECT_SIZE-1; + pos0 = pos; OPUS_COPY(info_out, &tonal->info[pos], 1); tonality_max = tonality_avg = info_out->tonality; tonality_count = 1; @@ -270,6 +276,107 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int tonality_count++; } info_out->tonality = MAX32(tonality_avg/tonality_count, tonality_max-.2f); + + mpos = vpos = pos0; + /* If we have enough look-ahead, compensate for the ~5-frame delay in the music prob and + ~1 frame delay in the VAD prob. */ + if (curr_lookahead > 15) + { + mpos += 5; + if (mpos>=DETECT_SIZE) + mpos -= DETECT_SIZE; + vpos += 1; + if (vpos>=DETECT_SIZE) + vpos -= DETECT_SIZE; + } + + /* The following calculations attempt to minimize a "badness function" + for the transition. When switching from speech to music, the badness + of switching at frame k is + b_k = S*v_k + \sum_{i=0}^{k-1} v_i*(p_i - T) + where + v_i is the activity probability (VAD) at frame i, + p_i is the music probability at frame i + T is the probability threshold for switching + S is the penalty for switching during active audio rather than silence + the current frame has index i=0 + + Rather than apply badness to directly decide when to switch, what we compute + instead is the threshold for which the optimal switching point is now. When + considering whether to switch now (frame 0) or at frame k, we have: + S*v_0 = S*v_k + \sum_{i=0}^{k-1} v_i*(p_i - T) + which gives us: + T = ( \sum_{i=0}^{k-1} v_i*p_i + S*(v_k-v_0) ) / ( \sum_{i=0}^{k-1} v_i ) + We take the min threshold across all positive values of k (up to the maximum + amount of lookahead we have) to give us the threshold for which the current + frame is the optimal switch point. + + The last step is that we need to consider whether we want to switch at all. + For that we use the average of the music probability over the entire window. + If the threshold is higher than that average we're not going to + switch, so we compute a min with the average as well. The result of all these + min operations is music_prob_min, which gives the threshold for switching to music + if we're currently encoding for speech. + + We do the exact opposite to compute music_prob_max which is used for switching + from music to speech. + */ + prob_min = 1.f; + prob_max = 0.f; + vad_prob = tonal->info[vpos].activity_probability; + prob_count = MAX16(.1f, vad_prob); + prob_avg = MAX16(.1f, vad_prob)*tonal->info[mpos].music_prob; + while (1) + { + float pos_vad; + mpos++; + if (mpos==DETECT_SIZE) + mpos = 0; + if (mpos == tonal->write_pos) + break; + vpos++; + if (vpos==DETECT_SIZE) + vpos = 0; + if (vpos == tonal->write_pos) + break; + pos_vad = tonal->info[vpos].activity_probability; + prob_min = MIN16((prob_avg - TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_min); + prob_max = MAX16((prob_avg + TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_max); + prob_count += MAX16(.1f, pos_vad); + prob_avg += MAX16(.1f, pos_vad)*tonal->info[mpos].music_prob; + } + info_out->music_prob = prob_avg/prob_count; + prob_min = MIN16(prob_avg/prob_count, prob_min); + prob_max = MAX16(prob_avg/prob_count, prob_max); + prob_min = MAX16(prob_min, 0.f); + prob_max = MIN16(prob_max, 1.f); + + /* If we don't have enough look-ahead, do our best to make a decent decision. */ + if (curr_lookahead < 10) + { + float pmin, pmax; + pmin = prob_min; + pmax = prob_max; + pos = pos0; + /* Look for min/max in the past. */ + for (i=0;icount-1, 15);i++) + { + pos--; + if (pos < 0) + pos = DETECT_SIZE-1; + pmin = MIN16(pmin, tonal->info[pos].music_prob); + pmax = MAX16(pmax, tonal->info[pos].music_prob); + } + /* Bias against switching on active audio. */ + pmin = MAX16(0.f, pmin - .1f*vad_prob); + pmax = MIN16(1.f, pmax + .1f*vad_prob); + prob_min += (1.f-.1f*curr_lookahead)*(pmin - prob_min); + prob_max += (1.f-.1f*curr_lookahead)*(pmax - prob_max); + } + info_out->music_prob_min = prob_min; + info_out->music_prob_max = prob_max; + + /* printf("%f %f %f %f %f\n", prob_min, prob_max, prob_avg/prob_count, vad_prob, info_out->music_prob); */ tonal->read_subframe += len/(tonal->Fs/400); while (tonal->read_subframe>=8) { @@ -278,21 +385,6 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int } if (tonal->read_pos>=DETECT_SIZE) tonal->read_pos-=DETECT_SIZE; - - /* The -1 is to compensate for the delay in the features themselves. */ - curr_lookahead = IMAX(curr_lookahead-1, 0); - - psum=0; - /* Summing the probability of transition patterns that involve music at - time (DETECT_SIZE-curr_lookahead-1) */ - for (i=0;ipmusic[i]; - for (;ipspeech[i]; - psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence; - /*printf("%f %f %f %f %f\n", psum, info_out->music_prob, info_out->vad_prob, info_out->activity_probability, info_out->tonality);*/ - - info_out->music_prob = psum; } static const float std_feature_bias[9] = { @@ -352,6 +444,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt float band_log2[NB_TBANDS+1]; float leakage_from[NB_TBANDS+1]; float leakage_to[NB_TBANDS+1]; + float layer_out[MAX_NEURONS]; SAVE_STACK; alpha = 1.f/IMIN(10, 1+tonal->count); @@ -368,12 +461,6 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt offset = 3*offset/2; } - if (tonal->count<4) { - if (tonal->application == OPUS_APPLICATION_VOIP) - tonal->music_prob = .1f; - else - tonal->music_prob = .625f; - } kfft = celt_mode->mdct.kfft[0]; if (tonal->count==0) tonal->mem_fill = 240; @@ -761,139 +848,17 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt features[23] = info->tonality_slope + 0.069216f; features[24] = tonal->lowECount - 0.067930f; - mlp_process(&net, features, frame_probs); - frame_probs[0] = .5f*(frame_probs[0]+1); - /* Curve fitting between the MLP probability and the actual probability */ - /*frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);*/ - /* Probability of active audio (as opposed to silence) */ - frame_probs[1] = .5f*frame_probs[1]+.5f; - frame_probs[1] *= frame_probs[1]; + compute_dense(&layer0, layer_out, features); + compute_gru(&layer1, tonal->rnn_state, layer_out); + compute_dense(&layer2, frame_probs, tonal->rnn_state); /* Probability of speech or music vs noise */ info->activity_probability = frame_probs[1]; + /* It seems like the RNN tends to have a bias towards speech and this + warping of the probabilities compensates for it. */ + info->music_prob = frame_probs[0] * (2 - frame_probs[0]); - /*printf("%f %f\n", frame_probs[0], frame_probs[1]);*/ - { - /* Probability of state transition */ - float tau; - /* Represents independence of the MLP probabilities, where - beta=1 means fully independent. */ - float beta; - /* Denormalized probability of speech (p0) and music (p1) after update */ - float p0, p1; - /* Probabilities for "all speech" and "all music" */ - float s0, m0; - /* Probability sum for renormalisation */ - float psum; - /* Instantaneous probability of speech and music, with beta pre-applied. */ - float speech0; - float music0; - float p, q; - - /* More silence transitions for speech than for music. */ - tau = .001f*tonal->music_prob + .01f*(1-tonal->music_prob); - p = MAX16(.05f,MIN16(.95f,frame_probs[1])); - q = MAX16(.05f,MIN16(.95f,tonal->vad_prob)); - beta = .02f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p)); - /* p0 and p1 are the probabilities of speech and music at this frame - using only information from previous frame and applying the - state transition model */ - p0 = (1-tonal->vad_prob)*(1-tau) + tonal->vad_prob *tau; - p1 = tonal->vad_prob *(1-tau) + (1-tonal->vad_prob)*tau; - /* We apply the current probability with exponent beta to work around - the fact that the probability estimates aren't independent. */ - p0 *= (float)pow(1-frame_probs[1], beta); - p1 *= (float)pow(frame_probs[1], beta); - /* Normalise the probabilities to get the Marokv probability of music. */ - tonal->vad_prob = p1/(p0+p1); - info->vad_prob = tonal->vad_prob; - /* Consider that silence has a 50-50 probability of being speech or music. */ - frame_probs[0] = tonal->vad_prob*frame_probs[0] + (1-tonal->vad_prob)*.5f; - - /* One transition every 3 minutes of active audio */ - tau = .0001f; - /* Adapt beta based on how "unexpected" the new prob is */ - p = MAX16(.05f,MIN16(.95f,frame_probs[0])); - q = MAX16(.05f,MIN16(.95f,tonal->music_prob)); - beta = .02f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p)); - /* p0 and p1 are the probabilities of speech and music at this frame - using only information from previous frame and applying the - state transition model */ - p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau; - p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau; - /* We apply the current probability with exponent beta to work around - the fact that the probability estimates aren't independent. */ - p0 *= (float)pow(1-frame_probs[0], beta); - p1 *= (float)pow(frame_probs[0], beta); - /* Normalise the probabilities to get the Marokv probability of music. */ - tonal->music_prob = p1/(p0+p1); - info->music_prob = tonal->music_prob; - - /*printf("%f %f %f %f\n", frame_probs[0], frame_probs[1], tonal->music_prob, tonal->vad_prob);*/ - /* This chunk of code deals with delayed decision. */ - psum=1e-20f; - /* Instantaneous probability of speech and music, with beta pre-applied. */ - speech0 = (float)pow(1-frame_probs[0], beta); - music0 = (float)pow(frame_probs[0], beta); - if (tonal->count==1) - { - if (tonal->application == OPUS_APPLICATION_VOIP) - tonal->pmusic[0] = .1f; - else - tonal->pmusic[0] = .625f; - tonal->pspeech[0] = 1-tonal->pmusic[0]; - } - /* Updated probability of having only speech (s0) or only music (m0), - before considering the new observation. */ - s0 = tonal->pspeech[0] + tonal->pspeech[1]; - m0 = tonal->pmusic [0] + tonal->pmusic [1]; - /* Updates s0 and m0 with instantaneous probability. */ - tonal->pspeech[0] = s0*(1-tau)*speech0; - tonal->pmusic [0] = m0*(1-tau)*music0; - /* Propagate the transition probabilities */ - for (i=1;ipspeech[i] = tonal->pspeech[i+1]*speech0; - tonal->pmusic [i] = tonal->pmusic [i+1]*music0; - } - /* Probability that the latest frame is speech, when all the previous ones were music. */ - tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0; - /* Probability that the latest frame is music, when all the previous ones were speech. */ - tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0; - - /* Renormalise probabilities to 1 */ - for (i=0;ipspeech[i] + tonal->pmusic[i]; - psum = 1.f/psum; - for (i=0;ipspeech[i] *= psum; - tonal->pmusic [i] *= psum; - } - psum = tonal->pmusic[0]; - for (i=1;ipspeech[i]; - - /* Estimate our confidence in the speech/music decisions */ - if (frame_probs[1]>.75) - { - if (tonal->music_prob>.9) - { - float adapt; - adapt = 1.f/(++tonal->music_confidence_count); - tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500); - tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence); - } - if (tonal->music_prob<.1) - { - float adapt; - adapt = 1.f/(++tonal->speech_confidence_count); - tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500); - tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence); - } - } - } - tonal->last_music = tonal->music_prob>.5f; + /*printf("%f %f %f\n", frame_probs[0], frame_probs[1], info->music_prob);*/ #ifdef MLP_TRAINING for (i=0;i<25;i++) printf("%f ", features[i]); diff --git a/src/analysis.h b/src/analysis.h index cac51dfa..289c845e 100644 --- a/src/analysis.h +++ b/src/analysis.h @@ -30,6 +30,7 @@ #include "celt.h" #include "opus_private.h" +#include "mlp.h" #define NB_FRAMES 8 #define NB_TBANDS 18 @@ -64,28 +65,16 @@ typedef struct { float mem[32]; float cmean[8]; float std[9]; - float music_prob; - float vad_prob; float Etracker; float lowECount; int E_count; - int last_music; int count; int analysis_offset; - /** Probability of having speech for time i to DETECT_SIZE-1 (and music before). - pspeech[0] is the probability that all frames in the window are speech. */ - float pspeech[DETECT_SIZE]; - /** Probability of having music for time i to DETECT_SIZE-1 (and speech before). - pmusic[0] is the probability that all frames in the window are music. */ - float pmusic[DETECT_SIZE]; - float speech_confidence; - float music_confidence; - int speech_confidence_count; - int music_confidence_count; int write_pos; int read_pos; int read_subframe; float hp_ener_accum; + float rnn_state[MAX_NEURONS]; opus_val32 downmix_state[3]; AnalysisInfo info[DETECT_SIZE]; } TonalityAnalysisState; diff --git a/src/mlp.c b/src/mlp.c index ff9e50df..0e5ef16b 100644 --- a/src/mlp.c +++ b/src/mlp.c @@ -1,5 +1,5 @@ /* Copyright (c) 2008-2011 Octasic Inc. - Written by Jean-Marc Valin */ + 2012-2017 Jean-Marc Valin */ /* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -29,42 +29,13 @@ #include "config.h" #endif +#include #include "opus_types.h" #include "opus_defines.h" - -#include -#include "mlp.h" #include "arch.h" #include "tansig_table.h" -#define MAX_NEURONS 100 +#include "mlp.h" -#if 0 -static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ -{ - int i; - opus_val16 xx; /* Q11 */ - /*double x, y;*/ - opus_val16 dy, yy; /* Q14 */ - /*x = 1.9073e-06*_x;*/ - if (_x>=QCONST32(8,19)) - return QCONST32(1.,14); - if (_x<=-QCONST32(8,19)) - return -QCONST32(1.,14); - xx = EXTRACT16(SHR32(_x, 8)); - /*i = lrint(25*x);*/ - i = SHR32(ADD32(1024,MULT16_16(25, xx)),11); - /*x -= .04*i;*/ - xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8)); - /*x = xx*(1./2048);*/ - /*y = tansig_table[250+i];*/ - yy = tansig_table[250+i]; - /*y = yy*(1./16384);*/ - dy = 16384-MULT16_16_Q14(yy,yy); - yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx))); - return yy; -} -#else -/*extern const float tansig_table[501];*/ static OPUS_INLINE float tansig_approx(float x) { int i; @@ -92,54 +63,79 @@ static OPUS_INLINE float tansig_approx(float x) y = y + x*dy*(1 - y*x); return sign*y; } -#endif -#if 0 -void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) +static OPUS_INLINE float sigmoid_approx(float x) { - int j; - opus_val16 hidden[MAX_NEURONS]; - const opus_val16 *W = m->weights; - /* Copy to tmp_in */ - for (j=0;jtopo[1];j++) - { - int k; - opus_val32 sum = SHL32(EXTEND32(*W++),8); - for (k=0;ktopo[0];k++) - sum = MAC16_16(sum, in[k],*W++); - hidden[j] = tansig_approx(sum); - } - for (j=0;jtopo[2];j++) - { - int k; - opus_val32 sum = SHL32(EXTEND32(*W++),14); - for (k=0;ktopo[1];k++) - sum = MAC16_16(sum, hidden[k], *W++); - out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); - } + return .5 + .5*tansig_approx(.5*x); } -#else -void mlp_process(const MLP *m, const float *in, float *out) + +void compute_dense(const DenseLayer *layer, float *output, const float *input) { - int j; - float hidden[MAX_NEURONS]; - const float *W = m->weights; - /* Copy to tmp_in */ - for (j=0;jtopo[1];j++) - { - int k; - float sum = *W++; - for (k=0;ktopo[0];k++) - sum = sum + in[k]**W++; - hidden[j] = tansig_approx(sum); - } - for (j=0;jtopo[2];j++) - { - int k; - float sum = *W++; - for (k=0;ktopo[1];k++) - sum = sum + hidden[k]**W++; - out[j] = tansig_approx(sum); - } + int i, j; + int N, M; + int stride; + M = layer->nb_inputs; + N = layer->nb_neurons; + stride = N; + for (i=0;ibias[i]; + for (j=0;jinput_weights[j*stride + i]*input[j]; + output[i] = WEIGHTS_SCALE*sum; + } + if (layer->sigmoid) { + for (i=0;inb_inputs; + N = gru->nb_neurons; + stride = 3*N; + for (i=0;ibias[i]; + for (j=0;jinput_weights[j*stride + i]*input[j]; + for (j=0;jrecurrent_weights[j*stride + i]*state[j]; + z[i] = sigmoid_approx(WEIGHTS_SCALE*sum); + } + for (i=0;ibias[N + i]; + for (j=0;jinput_weights[N + j*stride + i]*input[j]; + for (j=0;jrecurrent_weights[N + j*stride + i]*state[j]; + r[i] = sigmoid_approx(WEIGHTS_SCALE*sum); + } + for (i=0;ibias[2*N + i]; + for (j=0;jinput_weights[2*N + j*stride + i]*input[j]; + for (j=0;jrecurrent_weights[2*N + j*stride + i]*state[j]*r[j]; + h[i] = z[i]*state[i] + (1-z[i])*tansig_approx(WEIGHTS_SCALE*sum); + } + for (i=0;i