From ed8e1b49370e39b776930918e5d5f7d6408c1a3c Mon Sep 17 00:00:00 2001
From: Jean-Marc Valin <jean-marc.valin@octasic.com>
Date: Thu, 8 Jul 2010 13:10:28 -0400
Subject: [PATCH] Support for 10ms frame size

---
 doc/draft-valin-codec-prototype.xml | 27 +++++++++++++++++----------
 src/hybrid_decoder.c                |  4 ++--
 src/hybrid_encoder.c                |  7 ++++---
 3 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/doc/draft-valin-codec-prototype.xml b/doc/draft-valin-codec-prototype.xml
index 99b64a2a..8ebda971 100644
--- a/doc/draft-valin-codec-prototype.xml
+++ b/doc/draft-valin-codec-prototype.xml
@@ -115,11 +115,11 @@ which references two other
 repositories (for SILK and CELT). Some snapshots are provided for 
 convenience at <eref target='http://people.xiph.org/~jm/ietfcodec/'/> along
 with sample files.
-Although the build system is very primitive
-(and inconsistent), some instructions are provided in the toplevel README file.
+Although the build system is very primitive, some instructions are provided 
+in the toplevel README file.
 This is very early development so both the quality and feature set should
-greatly improve over time. In the current version, only 48 kHz audio with 20 ms
-frames are supported, but support for all configurations listed in 
+greatly improve over time. In the current version, only 48 kHz audio is 
+supported, but support for all configurations listed in 
 <xref target="modes"></xref> is planned. 
 </t>
 </section>
@@ -176,11 +176,13 @@ There is thus a total of 30 configurations, so 5 bits are necessary (with 2 code
 indicate the mode, frame size and sampling rate (MFS). This leaves 3 bits for the number of frames per packets (codes 0 to 7):
 <list style="symbols">
 <t>0-2:  1-3 frames in the packet, each with equal compressed size</t>
-<t>3:    arbitrary number of frames in the packet, each with equal compressed size (size needs to be signalled)</t>
+<t>3:    arbitrary number of frames in the packet, each with equal compressed size (one size needs to be encoded)</t>
 <t>4-5:  2-3 frames in the packet, with different compressed sizes, which need to be encoded (except the last one)</t>
 <t>6:    arbitrary number of frames in the packet, with different compressed sizes, each of which needs to be encoded</t>
-<t>7:    The first frame has this MFS, but others have different MFS. Compressed sizes need to be encoded.</t>
+<t>7:    The first frame has this MFS, but others have different MFS. Each compressed size needs to be encoded.</t>
 </list>
+When code 7 is used and the last frames of a packet have the same MFS, it is 
+allowed to switch to another code for them.
 </t>
 
 <t>
@@ -248,7 +250,7 @@ Two frames of different compressed size:
 </t>
 
 <t>
-Three frames of different *durations*:
+Three frames of different <spanx style="emph">durations</spanx>:
 
 </t>
 
@@ -273,9 +275,14 @@ Three frames of different *durations*:
 <section anchor="security" title="Security Considerations">
 
 <t>
-The codec needs to take appropriate
-security considerations into account, for example as outlined in 
-<xref target="DOS"/> and <xref target="SECGUIDE"/>.
+The codec needs to take appropriate security considerations 
+into account, as outlined in <xref target="DOS"/> and <xref target="SECGUIDE"/>.
+It is extremely important for the decoder to be robust against malicious
+payloads. Malicious payloads must not cause the decoder to overrun its
+allocated memory or to take much more resources to decode. Although problems
+in encoders are typically rarer, the same applies to the encoder. Malicious
+audio stream must not cause the encoder to misbehave because this would
+allow an attacker to attack transcoding gateways.
 </t>
 
 </section> 
diff --git a/src/hybrid_decoder.c b/src/hybrid_decoder.c
index 9fdaf3ea..2d258a44 100644
--- a/src/hybrid_decoder.c
+++ b/src/hybrid_decoder.c
@@ -94,7 +94,7 @@ int hybrid_decode(HybridDecoder *st, const unsigned char *data,
             /* Handle error */
         }
     } else {
-        for (i=0;i<960;i++)
+        for (i=0;i<frame_size;i++)
             pcm[i] = 0;
     }
 
@@ -110,7 +110,7 @@ int hybrid_decode(HybridDecoder *st, const unsigned char *data,
     {
         /* Encode high band with CELT */
         celt_ret = celt_decode_with_ec(st->celt_dec, data, len, pcm_celt, frame_size, &dec);
-        for (i=0;i<960;i++)
+        for (i=0;i<frame_size;i++)
             pcm[i] += pcm_celt[i];
     }
 	return celt_ret;
diff --git a/src/hybrid_encoder.c b/src/hybrid_encoder.c
index 6c41a083..b2222c16 100644
--- a/src/hybrid_encoder.c
+++ b/src/hybrid_encoder.c
@@ -97,9 +97,10 @@ int hybrid_encode(HybridEncoder *st, const short *pcm, int frame_size,
 	if (st->mode != MODE_CELT_ONLY)
 	{
 	    st->encControl.bitRate = (bytes_per_packet*50*8+6000)/2;
+	    st->encControl.packetSize = frame_size;
 	    /* Call SILK encoder for the low band */
 	    nBytes = bytes_per_packet;
-	    ret = SKP_Silk_SDK_Encode( st->silk_enc, &st->encControl, pcm, 960, &enc, &nBytes );
+	    ret = SKP_Silk_SDK_Encode( st->silk_enc, &st->encControl, pcm, frame_size, &enc, &nBytes );
 	    if( ret ) {
 	        fprintf (stderr, "SILK encode error\n");
 	        /* Handle error */
@@ -121,7 +122,7 @@ int hybrid_encode(HybridEncoder *st, const short *pcm, int frame_size,
 
 	    for (i=0;i<ENCODER_DELAY_COMPENSATION;i++)
 	        buf[i] = st->delay_buffer[i];
-        for (;i<960;i++)
+        for (;i<frame_size;i++)
             buf[i] = pcm[i-ENCODER_DELAY_COMPENSATION];
 
         celt_encoder_ctl(st->celt_enc, CELT_SET_PREDICTION(1));
@@ -129,7 +130,7 @@ int hybrid_encode(HybridEncoder *st, const short *pcm, int frame_size,
 	    /* FIXME: Do some delay compensation here */
 	    ret = celt_encode_with_ec(st->celt_enc, buf, NULL, frame_size, data, bytes_per_packet, &enc);
 	    for (i=0;i<ENCODER_DELAY_COMPENSATION;i++)
-	        st->delay_buffer[i] = pcm[960-ENCODER_DELAY_COMPENSATION+i];
+	        st->delay_buffer[i] = pcm[frame_size-ENCODER_DELAY_COMPENSATION+i];
 	} else {
 	    ec_enc_done(&enc);
 	}