| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873 | 
							- <?xml version="1.0" encoding="utf-8"?>
 
- <!--
 
-    Copyright (c) 2012-2016 Xiph.Org Foundation and contributors
 
-    Redistribution and use in source and binary forms, with or without
 
-    modification, are permitted provided that the following conditions
 
-    are met:
 
-    - Redistributions of source code must retain the above copyright
 
-    notice, this list of conditions and the following disclaimer.
 
-    - Redistributions in binary form must reproduce the above copyright
 
-    notice, this list of conditions and the following disclaimer in the
 
-    documentation and/or other materials provided with the distribution.
 
-    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 
-    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 
-    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 
-    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
 
-    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 
-    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 
-    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 
-    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 
-    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 
-    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 
-    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-    Special permission is granted to remove the above copyright notice, list of
 
-    conditions, and disclaimer when submitting this document, with or without
 
-    modification, to the IETF.
 
- -->
 
- <!DOCTYPE rfc SYSTEM 'rfc2629.dtd' [
 
- <!ENTITY rfc2119 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.2119.xml'>
 
- <!ENTITY rfc3533 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.3533.xml'>
 
- <!ENTITY rfc3629 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.3629.xml'>
 
- <!ENTITY rfc4732 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.4732.xml'>
 
- <!ENTITY rfc5226 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.5226.xml'>
 
- <!ENTITY rfc5334 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.5334.xml'>
 
- <!ENTITY rfc6381 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.6381.xml'>
 
- <!ENTITY rfc6716 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.6716.xml'>
 
- <!ENTITY rfc6982 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.6982.xml'>
 
- <!ENTITY rfc7587 PUBLIC '' 'http://xml.resource.org/public/rfc/bibxml/reference.RFC.7587.xml'>
 
- ]>
 
- <?rfc toc="yes" symrefs="yes" ?>
 
- <rfc ipr="trust200902" category="std" docName="draft-ietf-codec-oggopus-14"
 
-  updates="5334">
 
- <front>
 
- <title abbrev="Ogg Opus">Ogg Encapsulation for the Opus Audio Codec</title>
 
- <author initials="T.B." surname="Terriberry" fullname="Timothy B. Terriberry">
 
- <organization>Mozilla Corporation</organization>
 
- <address>
 
- <postal>
 
- <street>650 Castro Street</street>
 
- <city>Mountain View</city>
 
- <region>CA</region>
 
- <code>94041</code>
 
- <country>USA</country>
 
- </postal>
 
- <phone>+1 650 903-0800</phone>
 
- <email>tterribe@xiph.org</email>
 
- </address>
 
- </author>
 
- <author initials="R." surname="Lee" fullname="Ron Lee">
 
- <organization>Voicetronix</organization>
 
- <address>
 
- <postal>
 
- <street>246 Pulteney Street, Level 1</street>
 
- <city>Adelaide</city>
 
- <region>SA</region>
 
- <code>5000</code>
 
- <country>Australia</country>
 
- </postal>
 
- <phone>+61 8 8232 9112</phone>
 
- <email>ron@debian.org</email>
 
- </address>
 
- </author>
 
- <author initials="R." surname="Giles" fullname="Ralph Giles">
 
- <organization>Mozilla Corporation</organization>
 
- <address>
 
- <postal>
 
- <street>163 West Hastings Street</street>
 
- <city>Vancouver</city>
 
- <region>BC</region>
 
- <code>V6B 1H5</code>
 
- <country>Canada</country>
 
- </postal>
 
- <phone>+1 778 785 1540</phone>
 
- <email>giles@xiph.org</email>
 
- </address>
 
- </author>
 
- <date day="22" month="February" year="2016"/>
 
- <area>RAI</area>
 
- <workgroup>codec</workgroup>
 
- <abstract>
 
- <t>
 
- This document defines the Ogg encapsulation for the Opus interactive speech and
 
-  audio codec.
 
- This allows data encoded in the Opus format to be stored in an Ogg logical
 
-  bitstream.
 
- </t>
 
- </abstract>
 
- </front>
 
- <middle>
 
- <section anchor="intro" title="Introduction">
 
- <t>
 
- The IETF Opus codec is a low-latency audio codec optimized for both voice and
 
-  general-purpose audio.
 
- See <xref target="RFC6716"/> for technical details.
 
- This document defines the encapsulation of Opus in a continuous, logical Ogg
 
-  bitstream <xref target="RFC3533"/>.
 
- Ogg encapsulation provides Opus with a long-term storage format supporting
 
-  all of the essential features, including metadata, fast and accurate seeking,
 
-  corruption detection, recapture after errors, low overhead, and the ability to
 
-  multiplex Opus with other codecs (including video) with minimal buffering.
 
- It also provides a live streamable format, capable of delivery over a reliable
 
-  stream-oriented transport, without requiring all the data, or even the total
 
-  length of the data, up-front, in a form that is identical to the on-disk
 
-  storage format.
 
- </t>
 
- <t>
 
- Ogg bitstreams are made up of a series of 'pages', each of which contains data
 
-  from one or more 'packets'.
 
- Pages are the fundamental unit of multiplexing in an Ogg stream.
 
- Each page is associated with a particular logical stream and contains a capture
 
-  pattern and checksum, flags to mark the beginning and end of the logical
 
-  stream, and a 'granule position' that represents an absolute position in the
 
-  stream, to aid seeking.
 
- A single page can contain up to 65,025 octets of packet data from up to 255
 
-  different packets.
 
- Packets can be split arbitrarily across pages, and continued from one page to
 
-  the next (allowing packets much larger than would fit on a single page).
 
- Each page contains 'lacing values' that indicate how the data is partitioned
 
-  into packets, allowing a demultiplexer (demuxer) to recover the packet
 
-  boundaries without examining the encoded data.
 
- A packet is said to 'complete' on a page when the page contains the final
 
-  lacing value corresponding to that packet.
 
- </t>
 
- <t>
 
- This encapsulation defines the contents of the packet data, including
 
-  the necessary headers, the organization of those packets into a logical
 
-  stream, and the interpretation of the codec-specific granule position field.
 
- It does not attempt to describe or specify the existing Ogg container format.
 
- Readers unfamiliar with the basic concepts mentioned above are encouraged to
 
-  review the details in <xref target="RFC3533"/>.
 
- </t>
 
- </section>
 
- <section anchor="terminology" title="Terminology">
 
- <t>
 
- The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD",
 
-  "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and "OPTIONAL" in this
 
-  document are to be interpreted as described in <xref target="RFC2119"/>.
 
- </t>
 
- </section>
 
- <section anchor="packet_organization" title="Packet Organization">
 
- <t>
 
- An Ogg Opus stream is organized as follows (see
 
-  <xref target="packet-org-example"/> for an example).
 
- </t>
 
- <figure anchor="packet-org-example"
 
-  title="Example packet organization for a logical Ogg Opus stream"
 
-  align="center">
 
- <artwork align="center"><![CDATA[
 
-     Page 0         Pages 1 ... n        Pages (n+1) ...
 
-  +------------+ +---+ +---+ ... +---+ +-----------+ +---------+ +--
 
-  |            | |   | |   |     |   | |           | |         | |
 
-  |+----------+| |+-----------------+| |+-------------------+ +-----
 
-  |||ID Header|| ||  Comment Header || ||Audio Data Packet 1| | ...
 
-  |+----------+| |+-----------------+| |+-------------------+ +-----
 
-  |            | |   | |   |     |   | |           | |         | |
 
-  +------------+ +---+ +---+ ... +---+ +-----------+ +---------+ +--
 
-  ^      ^                           ^
 
-  |      |                           |
 
-  |      |                           Mandatory Page Break
 
-  |      |
 
-  |      ID header is contained on a single page
 
-  |
 
-  'Beginning Of Stream'
 
- ]]></artwork>
 
- </figure>
 
- <t>
 
- There are two mandatory header packets.
 
- The first packet in the logical Ogg bitstream MUST contain the identification
 
-  (ID) header, which uniquely identifies a stream as Opus audio.
 
- The format of this header is defined in <xref target="id_header"/>.
 
- It is placed alone (without any other packet data) on the first page of
 
-  the logical Ogg bitstream, and completes on that page.
 
- This page has its 'beginning of stream' flag set.
 
- </t>
 
- <t>
 
- The second packet in the logical Ogg bitstream MUST contain the comment header,
 
-  which contains user-supplied metadata.
 
- The format of this header is defined in <xref target="comment_header"/>.
 
- It MAY span multiple pages, beginning on the second page of the logical
 
-  stream.
 
- However many pages it spans, the comment header packet MUST finish the page on
 
-  which it completes.
 
- </t>
 
- <t>
 
- All subsequent pages are audio data pages, and the Ogg packets they contain are
 
-  audio data packets.
 
- Each audio data packet contains one Opus packet for each of N different
 
-  streams, where N is typically one for mono or stereo, but MAY be greater than
 
-  one for multichannel audio.
 
- The value N is specified in the ID header (see
 
-  <xref target="channel_mapping"/>), and is fixed over the entire length of the
 
-  logical Ogg bitstream.
 
- </t>
 
- <t>
 
- The first (N - 1) Opus packets, if any, are packed one after another
 
-  into the Ogg packet, using the self-delimiting framing from Appendix B of
 
-  <xref target="RFC6716"/>.
 
- The remaining Opus packet is packed at the end of the Ogg packet using the
 
-  regular, undelimited framing from Section 3 of <xref target="RFC6716"/>.
 
- All of the Opus packets in a single Ogg packet MUST be constrained to have the
 
-  same duration.
 
- An implementation of this specification SHOULD treat any Opus packet whose
 
-  duration is different from that of the first Opus packet in an Ogg packet as
 
-  if it were a malformed Opus packet with an invalid Table Of Contents (TOC)
 
-  sequence.
 
- </t>
 
- <t>
 
- The TOC sequence at the beginning of each Opus packet indicates the coding
 
-  mode, audio bandwidth, channel count, duration (frame size), and number of
 
-  frames per packet, as described in Section 3.1
 
-  of <xref target="RFC6716"/>.
 
- The coding mode is one of SILK, Hybrid, or Constrained Energy Lapped Transform
 
-  (CELT).
 
- The combination of coding mode, audio bandwidth, and frame size is referred to
 
-  as the configuration of an Opus packet.
 
- </t>
 
- <t>
 
- Packets are placed into Ogg pages in order until the end of stream.
 
- Audio data packets might span page boundaries.
 
- The first audio data page could have the 'continued packet' flag set
 
-  (indicating the first audio data packet is continued from a previous page) if,
 
-  for example, it was a live stream joined mid-broadcast, with the headers
 
-  pasted on the front.
 
- If a page has the 'continued packet' flag set and one of the following
 
-  conditions is also true:
 
- <list style="symbols">
 
- <t>the previous page with packet data does not end in a continued packet (does
 
-  not end with a lacing value of 255) OR</t>
 
- <t>the page sequence numbers are not consecutive,</t>
 
- </list>
 
-  then a demuxer MUST NOT attempt to decode the data for the first packet on the
 
-  page unless the demuxer has some special knowledge that would allow it to
 
-  interpret this data despite the missing pieces.
 
- An implementation MUST treat a zero-octet audio data packet as if it were a
 
-  malformed Opus packet as described in
 
-  Section 3.4 of <xref target="RFC6716"/>.
 
- </t>
 
- <t>
 
- A logical stream ends with a page with the 'end of stream' flag set, but
 
-  implementations need to be prepared to deal with truncated streams that do not
 
-  have a page marked 'end of stream'.
 
- There is no reason for the final packet on the last page to be a continued
 
-  packet, i.e., for the final lacing value to be 255.
 
- However, demuxers might encounter such streams, possibly as the result of a
 
-  transfer that did not complete or of corruption.
 
- If a packet continues onto a subsequent page (i.e., when the page ends with a
 
-  lacing value of 255) and one of the following conditions is also true:
 
- <list style="symbols">
 
- <t>the next page with packet data does not have the 'continued packet' flag
 
-  set OR</t>
 
- <t>there is no next page with packet data OR</t>
 
- <t>the page sequence numbers are not consecutive,</t>
 
- </list>
 
-  then a demuxer MUST NOT attempt to decode the data from that packet unless the
 
-  demuxer has some special knowledge that would allow it to interpret this data
 
-  despite the missing pieces.
 
- There MUST NOT be any more pages in an Opus logical bitstream after a page
 
-  marked 'end of stream'.
 
- </t>
 
- </section>
 
- <section anchor="granpos" title="Granule Position">
 
- <t>
 
- The granule position MUST be zero for the ID header page and the
 
-  page where the comment header completes.
 
- That is, the first page in the logical stream, and the last header
 
-  page before the first audio data page both have a granule position of zero.
 
- </t>
 
- <t>
 
- The granule position of an audio data page encodes the total number of PCM
 
-  samples in the stream up to and including the last fully-decodable sample from
 
-  the last packet completed on that page.
 
- The granule position of the first audio data page will usually be larger than
 
-  zero, as described in <xref target="start_granpos_restrictions"/>.
 
- </t>
 
- <t>
 
- A page that is entirely spanned by a single packet (that completes on a
 
-  subsequent page) has no granule position, and the granule position field is
 
-  set to the special value '-1' in two's complement.
 
- </t>
 
- <t>
 
- The granule position of an audio data page is in units of PCM audio samples at
 
-  a fixed rate of 48 kHz (per channel; a stereo stream's granule position
 
-  does not increment at twice the speed of a mono stream).
 
- It is possible to run an Opus decoder at other sampling rates,
 
-  but all Opus packets encode samples at a sampling rate that evenly divides
 
-  48 kHz.
 
- Therefore, the value in the granule position field always counts samples
 
-  assuming a 48 kHz decoding rate, and the rest of this specification makes
 
-  the same assumption.
 
- </t>
 
- <t>
 
- The duration of an Opus packet as defined in <xref target="RFC6716"/> can be
 
-  any multiple of 2.5 ms, up to a maximum of 120 ms.
 
- This duration is encoded in the TOC sequence at the beginning of each packet.
 
- The number of samples returned by a decoder corresponds to this duration
 
-  exactly, even for the first few packets.
 
- For example, a 20 ms packet fed to a decoder running at 48 kHz will
 
-  always return 960 samples.
 
- A demuxer can parse the TOC sequence at the beginning of each Ogg packet to
 
-  work backwards or forwards from a packet with a known granule position (i.e.,
 
-  the last packet completed on some page) in order to assign granule positions
 
-  to every packet, or even every individual sample.
 
- The one exception is the last page in the stream, as described below.
 
- </t>
 
- <t>
 
- All other pages with completed packets after the first MUST have a granule
 
-  position equal to the number of samples contained in packets that complete on
 
-  that page plus the granule position of the most recent page with completed
 
-  packets.
 
- This guarantees that a demuxer can assign individual packets the same granule
 
-  position when working forwards as when working backwards.
 
- For this to work, there cannot be any gaps.
 
- </t>
 
- <section anchor="gap-repair" title="Repairing Gaps in Real-time Streams">
 
- <t>
 
- In order to support capturing a real-time stream that has lost or not
 
-  transmitted packets, a multiplexer (muxer) SHOULD emit packets that explicitly
 
-  request the use of Packet Loss Concealment (PLC) in place of the missing
 
-  packets.
 
- Implementations that fail to do so still MUST NOT increment the granule
 
-  position for a page by anything other than the number of samples contained in
 
-  packets that actually complete on that page.
 
- </t>
 
- <t>
 
- Only gaps that are a multiple of 2.5 ms are repairable, as these are the
 
-  only durations that can be created by packet loss or discontinuous
 
-  transmission.
 
- Muxers need not handle other gap sizes.
 
- Creating the necessary packets involves synthesizing a TOC byte (defined in
 
- Section 3.1 of <xref target="RFC6716"/>)—and whatever
 
-  additional internal framing is needed—to indicate the packet duration
 
-  for each stream.
 
- The actual length of each missing Opus frame inside the packet is zero bytes,
 
-  as defined in Section 3.2.1 of <xref target="RFC6716"/>.
 
- </t>
 
- <t>
 
- Zero-byte frames MAY be packed into packets using any of codes 0, 1,
 
-  2, or 3.
 
- When successive frames have the same configuration, the higher code packings
 
-  reduce overhead.
 
- Likewise, if the TOC configuration matches, the muxer MAY further combine the
 
-  empty frames with previous or subsequent non-zero-length frames (using
 
-  code 2 or VBR code 3).
 
- </t>
 
- <t>
 
- <xref target="RFC6716"/> does not impose any requirements on the PLC, but this
 
-  section outlines choices that are expected to have a positive influence on
 
-  most PLC implementations, including the reference implementation.
 
- Synthesized TOC sequences SHOULD maintain the same mode, audio bandwidth,
 
-  channel count, and frame size as the previous packet (if any).
 
- This is the simplest and usually the most well-tested case for the PLC to
 
-  handle and it covers all losses that do not include a configuration switch,
 
-  as defined in Section 4.5 of <xref target="RFC6716"/>.
 
- </t>
 
- <t>
 
- When a previous packet is available, keeping the audio bandwidth and channel
 
-  count the same allows the PLC to provide maximum continuity in the concealment
 
-  data it generates.
 
- However, if the size of the gap is not a multiple of the most recent frame
 
-  size, then the frame size will have to change for at least some frames.
 
- Such changes SHOULD be delayed as long as possible to simplify
 
-  things for PLC implementations.
 
- </t>
 
- <t>
 
- As an example, a 95 ms gap could be encoded as nineteen 5 ms frames
 
-  in two bytes with a single CBR code 3 packet.
 
- If the previous frame size was 20 ms, using four 20 ms frames
 
-  followed by three 5 ms frames requires 4 bytes (plus an extra byte
 
-  of Ogg lacing overhead), but allows the PLC to use its well-tested steady
 
-  state behavior for as long as possible.
 
- The total bitrate of the latter approach, including Ogg overhead, is about
 
-  0.4 kbps, so the impact on file size is minimal.
 
- </t>
 
- <t>
 
- Changing modes is discouraged, since this causes some decoder implementations
 
-  to reset their PLC state.
 
- However, SILK and Hybrid mode frames cannot fill gaps that are not a multiple
 
-  of 10 ms.
 
- If switching to CELT mode is needed to match the gap size, a muxer SHOULD do
 
-  so at the end of the gap to allow the PLC to function for as long as possible.
 
- </t>
 
- <t>
 
- In the example above, if the previous frame was a 20 ms SILK mode frame,
 
-  the better solution is to synthesize a packet describing four 20 ms SILK
 
-  frames, followed by a packet with a single 10 ms SILK
 
-  frame, and finally a packet with a 5 ms CELT frame, to fill the 95 ms
 
-  gap.
 
- This also requires four bytes to describe the synthesized packet data (two
 
-  bytes for a CBR code 3 and one byte each for two code 0 packets) but three
 
-  bytes of Ogg lacing overhead are needed to mark the packet boundaries.
 
- At 0.6 kbps, this is still a minimal bitrate impact over a naive, low quality
 
-  solution.
 
- </t>
 
- <t>
 
- Since medium-band audio is an option only in the SILK mode, wideband frames
 
-  SHOULD be generated if switching from that configuration to CELT mode, to
 
-  ensure that any PLC implementation which does try to migrate state between
 
-  the modes will be able to preserve all of the available audio bandwidth.
 
- </t>
 
- </section>
 
- <section anchor="preskip" title="Pre-skip">
 
- <t>
 
- There is some amount of latency introduced during the decoding process, to
 
-  allow for overlap in the CELT mode, stereo mixing in the SILK mode, and
 
-  resampling.
 
- The encoder might have introduced additional latency through its own resampling
 
-  and analysis (though the exact amount is not specified).
 
- Therefore, the first few samples produced by the decoder do not correspond to
 
-  real input audio, but are instead composed of padding inserted by the encoder
 
-  to compensate for this latency.
 
- These samples need to be stored and decoded, as Opus is an asymptotically
 
-  convergent predictive codec, meaning the decoded contents of each frame depend
 
-  on the recent history of decoder inputs.
 
- However, a player will want to skip these samples after decoding them.
 
- </t>
 
- <t>
 
- A 'pre-skip' field in the ID header (see <xref target="id_header"/>) signals
 
-  the number of samples that SHOULD be skipped (decoded but discarded) at the
 
-  beginning of the stream, though some specific applications might have a reason
 
-  for looking at that data.
 
- This amount need not be a multiple of 2.5 ms, MAY be smaller than a single
 
-  packet, or MAY span the contents of several packets.
 
- These samples are not valid audio.
 
- </t>
 
- <t>
 
- For example, if the first Opus frame uses the CELT mode, it will always
 
-  produce 120 samples of windowed overlap-add data.
 
- However, the overlap data is initially all zeros (since there is no prior
 
-  frame), meaning this cannot, in general, accurately represent the original
 
-  audio.
 
- The SILK mode requires additional delay to account for its analysis and
 
-  resampling latency.
 
- The encoder delays the original audio to avoid this problem.
 
- </t>
 
- <t>
 
- The pre-skip field MAY also be used to perform sample-accurate cropping of
 
-  already encoded streams.
 
- In this case, a value of at least 3840 samples (80 ms) provides
 
-  sufficient history to the decoder that it will have converged
 
-  before the stream's output begins.
 
- </t>
 
- </section>
 
- <section anchor="pcm_sample_position" title="PCM Sample Position">
 
- <t>
 
- The PCM sample position is determined from the granule position using the
 
-  formula
 
- </t>
 
- <figure align="center">
 
- <artwork align="center"><![CDATA[
 
- 'PCM sample position' = 'granule position' - 'pre-skip' .
 
- ]]></artwork>
 
- </figure>
 
- <t>
 
- For example, if the granule position of the first audio data page is 59,971,
 
-  and the pre-skip is 11,971, then the PCM sample position of the last decoded
 
-  sample from that page is 48,000.
 
- </t>
 
- <t>
 
- This can be converted into a playback time using the formula
 
- </t>
 
- <figure align="center">
 
- <artwork align="center"><![CDATA[
 
-                   'PCM sample position'
 
- 'playback time' = --------------------- .
 
-                          48000.0
 
- ]]></artwork>
 
- </figure>
 
- <t>
 
- The initial PCM sample position before any samples are played is normally '0'.
 
- In this case, the PCM sample position of the first audio sample to be played
 
-  starts at '1', because it marks the time on the clock
 
-  <spanx style="emph">after</spanx> that sample has been played, and a stream
 
-  that is exactly one second long has a final PCM sample position of '48000',
 
-  as in the example here.
 
- </t>
 
- <t>
 
- Vorbis streams use a granule position smaller than the number of audio samples
 
-  contained in the first audio data page to indicate that some of those samples
 
-  are trimmed from the output (see <xref target="vorbis-trim"/>).
 
- However, to do so, Vorbis requires that the first audio data page contains
 
-  exactly two packets, in order to allow the decoder to perform PCM position
 
-  adjustments before needing to return any PCM data.
 
- Opus uses the pre-skip mechanism for this purpose instead, since the encoder
 
-  might introduce more than a single packet's worth of latency, and since very
 
-  large packets in streams with a very large number of channels might not fit
 
-  on a single page.
 
- </t>
 
- </section>
 
- <section anchor="end_trimming" title="End Trimming">
 
- <t>
 
- The page with the 'end of stream' flag set MAY have a granule position that
 
-  indicates the page contains less audio data than would normally be returned by
 
-  decoding up through the final packet.
 
- This is used to end the stream somewhere other than an even frame boundary.
 
- The granule position of the most recent audio data page with completed packets
 
-  is used to make this determination, or '0' is used if there were no previous
 
-  audio data pages with a completed packet.
 
- The difference between these granule positions indicates how many samples to
 
-  keep after decoding the packets that completed on the final page.
 
- The remaining samples are discarded.
 
- The number of discarded samples SHOULD be no larger than the number decoded
 
-  from the last packet.
 
- </t>
 
- </section>
 
- <section anchor="start_granpos_restrictions"
 
-  title="Restrictions on the Initial Granule Position">
 
- <t>
 
- The granule position of the first audio data page with a completed packet MAY
 
-  be larger than the number of samples contained in packets that complete on
 
-  that page, however it MUST NOT be smaller, unless that page has the 'end of
 
-  stream' flag set.
 
- Allowing a granule position larger than the number of samples allows the
 
-  beginning of a stream to be cropped or a live stream to be joined without
 
-  rewriting the granule position of all the remaining pages.
 
- This means that the PCM sample position just before the first sample to be
 
-  played MAY be larger than '0'.
 
- Synchronization when multiplexing with other logical streams still uses the PCM
 
-  sample position relative to '0' to compute sample times.
 
- This does not affect the behavior of pre-skip: exactly 'pre-skip' samples
 
-  SHOULD be skipped from the beginning of the decoded output, even if the
 
-  initial PCM sample position is greater than zero.
 
- </t>
 
- <t>
 
- On the other hand, a granule position that is smaller than the number of
 
-  decoded samples prevents a demuxer from working backwards to assign each
 
-  packet or each individual sample a valid granule position, since granule
 
-  positions are non-negative.
 
- An implementation MUST treat any stream as invalid if the granule position
 
-  is smaller than the number of samples contained in packets that complete on
 
-  the first audio data page with a completed packet, unless that page has the
 
-  'end of stream' flag set.
 
- It MAY defer this action until it decodes the last packet completed on that
 
-  page.
 
- </t>
 
- <t>
 
- If that page has the 'end of stream' flag set, a demuxer MUST treat any stream
 
-  as invalid if its granule position is smaller than the 'pre-skip' amount.
 
- This would indicate that there are more samples to be skipped from the initial
 
-  decoded output than exist in the stream.
 
- If the granule position is smaller than the number of decoded samples produced
 
-  by the packets that complete on that page, then a demuxer MUST use an initial
 
-  granule position of '0', and can work forwards from '0' to timestamp
 
-  individual packets.
 
- If the granule position is larger than the number of decoded samples available,
 
-  then the demuxer MUST still work backwards as described above, even if the
 
-  'end of stream' flag is set, to determine the initial granule position, and
 
-  thus the initial PCM sample position.
 
- Both of these will be greater than '0' in this case.
 
- </t>
 
- </section>
 
- <section anchor="seeking_and_preroll" title="Seeking and Pre-roll">
 
- <t>
 
- Seeking in Ogg files is best performed using a bisection search for a page
 
-  whose granule position corresponds to a PCM position at or before the seek
 
-  target.
 
- With appropriately weighted bisection, accurate seeking can be performed in
 
-  just one or two bisections on average, even in multi-gigabyte files.
 
- See <xref target="seeking"/> for an example of general implementation guidance.
 
- </t>
 
- <t>
 
- When seeking within an Ogg Opus stream, an implementation SHOULD start decoding
 
-  (and discarding the output) at least 3840 samples (80 ms) prior to
 
-  the seek target in order to ensure that the output audio is correct by the
 
-  time it reaches the seek target.
 
- This 'pre-roll' is separate from, and unrelated to, the 'pre-skip' used at the
 
-  beginning of the stream.
 
- If the point 80 ms prior to the seek target comes before the initial PCM
 
-  sample position, an implementation SHOULD start decoding from the beginning of
 
-  the stream, applying pre-skip as normal, regardless of whether the pre-skip is
 
-  larger or smaller than 80 ms, and then continue to discard samples
 
-  to reach the seek target (if any).
 
- </t>
 
- </section>
 
- </section>
 
- <section anchor="headers" title="Header Packets">
 
- <t>
 
- An Ogg Opus logical stream contains exactly two mandatory header packets:
 
-  an identification header and a comment header.
 
- </t>
 
- <section anchor="id_header" title="Identification Header">
 
- <figure anchor="id_header_packet" title="ID Header Packet" align="center">
 
- <artwork align="center"><![CDATA[
 
-  0                   1                   2                   3
 
-  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- |      'O'      |      'p'      |      'u'      |      's'      |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- |      'H'      |      'e'      |      'a'      |      'd'      |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- |  Version = 1  | Channel Count |           Pre-skip            |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- |                     Input Sample Rate (Hz)                    |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- |   Output Gain (Q7.8 in dB)    | Mapping Family|               |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+               :
 
- |                                                               |
 
- :               Optional Channel Mapping Table...               :
 
- |                                                               |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- ]]></artwork>
 
- </figure>
 
- <t>
 
- The fields in the identification (ID) header have the following meaning:
 
- <list style="numbers">
 
- <t>Magic Signature:
 
- <vspace blankLines="1"/>
 
- This is an 8-octet (64-bit) field that allows codec identification and is
 
-  human-readable.
 
- It contains, in order, the magic numbers:
 
- <list style="empty">
 
- <t>0x4F 'O'</t>
 
- <t>0x70 'p'</t>
 
- <t>0x75 'u'</t>
 
- <t>0x73 's'</t>
 
- <t>0x48 'H'</t>
 
- <t>0x65 'e'</t>
 
- <t>0x61 'a'</t>
 
- <t>0x64 'd'</t>
 
- </list>
 
- Starting with "Op" helps distinguish it from audio data packets, as this is an
 
-  invalid TOC sequence.
 
- <vspace blankLines="1"/>
 
- </t>
 
- <t>Version (8 bits, unsigned):
 
- <vspace blankLines="1"/>
 
- The version number MUST always be '1' for this version of the encapsulation
 
-  specification.
 
- Implementations SHOULD treat streams where the upper four bits of the version
 
-  number match that of a recognized specification as backwards-compatible with
 
-  that specification.
 
- That is, the version number can be split into "major" and "minor" version
 
-  sub-fields, with changes to the "minor" sub-field (in the lower four bits)
 
-  signaling compatible changes.
 
- For example, an implementation of this specification SHOULD accept any stream
 
-  with a version number of '15' or less, and SHOULD assume any stream with a
 
-  version number '16' or greater is incompatible.
 
- The initial version '1' was chosen to keep implementations from relying on this
 
-  octet as a null terminator for the "OpusHead" string.
 
- <vspace blankLines="1"/>
 
- </t>
 
- <t>Output Channel Count 'C' (8 bits, unsigned):
 
- <vspace blankLines="1"/>
 
- This is the number of output channels.
 
- This might be different than the number of encoded channels, which can change
 
-  on a packet-by-packet basis.
 
- This value MUST NOT be zero.
 
- The maximum allowable value depends on the channel mapping family, and might be
 
-  as large as 255.
 
- See <xref target="channel_mapping"/> for details.
 
- <vspace blankLines="1"/>
 
- </t>
 
- <t>Pre-skip (16 bits, unsigned, little
 
-  endian):
 
- <vspace blankLines="1"/>
 
- This is the number of samples (at 48 kHz) to discard from the decoder
 
-  output when starting playback, and also the number to subtract from a page's
 
-  granule position to calculate its PCM sample position.
 
- When cropping the beginning of existing Ogg Opus streams, a pre-skip of at
 
-  least 3,840 samples (80 ms) is RECOMMENDED to ensure complete
 
-  convergence in the decoder.
 
- <vspace blankLines="1"/>
 
- </t>
 
- <t>Input Sample Rate (32 bits, unsigned, little
 
-  endian):
 
- <vspace blankLines="1"/>
 
- This is the sample rate of the original input (before encoding), in Hz.
 
- This field is <spanx style="emph">not</spanx> the sample rate to use for
 
-  playback of the encoded data.
 
- <vspace blankLines="1"/>
 
- Opus can switch between internal audio bandwidths of 4, 6, 8, 12, and
 
-  20 kHz.
 
- Each packet in the stream can have a different audio bandwidth.
 
- Regardless of the audio bandwidth, the reference decoder supports decoding any
 
-  stream at a sample rate of 8, 12, 16, 24, or 48 kHz.
 
- The original sample rate of the audio passed to the encoder is not preserved
 
-  by the lossy compression.
 
- <vspace blankLines="1"/>
 
- An Ogg Opus player SHOULD select the playback sample rate according to the
 
-  following procedure:
 
- <list style="numbers">
 
- <t>If the hardware supports 48 kHz playback, decode at 48 kHz.</t>
 
- <t>Otherwise, if the hardware's highest available sample rate is a supported
 
-  rate, decode at this sample rate.</t>
 
- <t>Otherwise, if the hardware's highest available sample rate is less than
 
-  48 kHz, decode at the next higher Opus supported rate above the highest
 
-  available hardware rate and resample.</t>
 
- <t>Otherwise, decode at 48 kHz and resample.</t>
 
- </list>
 
- However, the 'Input Sample Rate' field allows the muxer to pass the sample
 
-  rate of the original input stream as metadata.
 
- This is useful when the user requires the output sample rate to match the
 
-  input sample rate.
 
- For example, when not playing the output, an implementation writing PCM format
 
-  samples to disk might choose to resample the audio back to the original input
 
-  sample rate to reduce surprise to the user, who might reasonably expect to get
 
-  back a file with the same sample rate.
 
- <vspace blankLines="1"/>
 
- A value of zero indicates 'unspecified'.
 
- Muxers SHOULD write the actual input sample rate or zero, but implementations
 
-  which do something with this field SHOULD take care to behave sanely if given
 
-  crazy values (e.g., do not actually upsample the output to 10 MHz if
 
-  requested).
 
- Implementations SHOULD support input sample rates between 8 kHz and
 
-  192 kHz (inclusive).
 
- Rates outside this range MAY be ignored by falling back to the default rate of
 
-  48 kHz instead.
 
- <vspace blankLines="1"/>
 
- </t>
 
- <t>Output Gain (16 bits, signed, little endian):
 
- <vspace blankLines="1"/>
 
- This is a gain to be applied when decoding.
 
- It is 20*log10 of the factor by which to scale the decoder output to achieve
 
-  the desired playback volume, stored in a 16-bit, signed, two's complement
 
-  fixed-point value with 8 fractional bits (i.e.,
 
-  Q7.8 <xref target="q-notation"/>).
 
- <vspace blankLines="1"/>
 
- To apply the gain, an implementation could use
 
- <figure align="center">
 
- <artwork align="center"><![CDATA[
 
- sample *= pow(10, output_gain/(20.0*256)) ,
 
- ]]></artwork>
 
- </figure>
 
-  where output_gain is the raw 16-bit value from the header.
 
- <vspace blankLines="1"/>
 
- Players and media frameworks SHOULD apply it by default.
 
- If a player chooses to apply any volume adjustment or gain modification, such
 
-  as the R128_TRACK_GAIN (see <xref target="comment_header"/>), the adjustment
 
-  MUST be applied in addition to this output gain in order to achieve playback
 
-  at the normalized volume.
 
- <vspace blankLines="1"/>
 
- A muxer SHOULD set this field to zero, and instead apply any gain prior to
 
-  encoding, when this is possible and does not conflict with the user's wishes.
 
- A nonzero output gain indicates the gain was adjusted after encoding, or that
 
-  a user wished to adjust the gain for playback while preserving the ability
 
-  to recover the original signal amplitude.
 
- <vspace blankLines="1"/>
 
- Although the output gain has enormous range (+/- 128 dB, enough to amplify
 
-  inaudible sounds to the threshold of physical pain), most applications can
 
-  only reasonably use a small portion of this range around zero.
 
- The large range serves in part to ensure that gain can always be losslessly
 
-  transferred between OpusHead and R128 gain tags (see below) without
 
-  saturating.
 
- <vspace blankLines="1"/>
 
- </t>
 
- <t>Channel Mapping Family (8 bits, unsigned):
 
- <vspace blankLines="1"/>
 
- This octet indicates the order and semantic meaning of the output channels.
 
- <vspace blankLines="1"/>
 
- Each currently specified value of this octet indicates a mapping family, which
 
-  defines a set of allowed channel counts, and the ordered set of channel names
 
-  for each allowed channel count.
 
- The details are described in <xref target="channel_mapping"/>.
 
- </t>
 
- <t>Channel Mapping Table:
 
- This table defines the mapping from encoded streams to output channels.
 
- Its contents are specified in <xref target="channel_mapping"/>.
 
- </t>
 
- </list>
 
- </t>
 
- <t>
 
- All fields in the ID headers are REQUIRED, except for the channel mapping
 
-  table, which MUST be omitted when the channel mapping family is 0, but
 
-  is REQUIRED otherwise.
 
- Implementations SHOULD treat a stream as invalid if it contains an ID header
 
-  that does not have enough data for these fields, even if it contain a valid
 
-  Magic Signature.
 
- Future versions of this specification, even backwards-compatible versions,
 
-  might include additional fields in the ID header.
 
- If an ID header has a compatible major version, but a larger minor version,
 
-  an implementation MUST NOT treat it as invalid for containing additional data
 
-  not specified here, provided it still completes on the first page.
 
- </t>
 
- <section anchor="channel_mapping" title="Channel Mapping">
 
- <t>
 
- An Ogg Opus stream allows mapping one number of Opus streams (N) to a possibly
 
-  larger number of decoded channels (M + N) to yet another number of
 
-  output channels (C), which might be larger or smaller than the number of
 
-  decoded channels.
 
- The order and meaning of these channels are defined by a channel mapping,
 
-  which consists of the 'channel mapping family' octet and, for channel mapping
 
-  families other than family 0, a channel mapping table, as illustrated in
 
-  <xref target="channel_mapping_table"/>.
 
- </t>
 
- <figure anchor="channel_mapping_table" title="Channel Mapping Table"
 
-  align="center">
 
- <artwork align="center"><![CDATA[
 
-  0                   1                   2                   3
 
-  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
 
-                                                 +-+-+-+-+-+-+-+-+
 
-                                                 | Stream Count  |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- | Coupled Count |              Channel Mapping...               :
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- ]]></artwork>
 
- </figure>
 
- <t>
 
- The fields in the channel mapping table have the following meaning:
 
- <list style="numbers" counter="8">
 
- <t>Stream Count 'N' (8 bits, unsigned):
 
- <vspace blankLines="1"/>
 
- This is the total number of streams encoded in each Ogg packet.
 
- This value is necessary to correctly parse the packed Opus packets inside an
 
-  Ogg packet, as described in <xref target="packet_organization"/>.
 
- This value MUST NOT be zero, as without at least one Opus packet with a valid
 
-  TOC sequence, a demuxer cannot recover the duration of an Ogg packet.
 
- <vspace blankLines="1"/>
 
- For channel mapping family 0, this value defaults to 1, and is not coded.
 
- <vspace blankLines="1"/>
 
- </t>
 
- <t>Coupled Stream Count 'M' (8 bits, unsigned):
 
- This is the number of streams whose decoders are to be configured to produce
 
-  two channels (stereo).
 
- This MUST be no larger than the total number of streams, N.
 
- <vspace blankLines="1"/>
 
- Each packet in an Opus stream has an internal channel count of 1 or 2, which
 
-  can change from packet to packet.
 
- This is selected by the encoder depending on the bitrate and the audio being
 
-  encoded.
 
- The original channel count of the audio passed to the encoder is not
 
-  necessarily preserved by the lossy compression.
 
- <vspace blankLines="1"/>
 
- Regardless of the internal channel count, any Opus stream can be decoded as
 
-  mono (a single channel) or stereo (two channels) by appropriate initialization
 
-  of the decoder.
 
- The 'coupled stream count' field indicates that the decoders for the first M
 
-  Opus streams are to be initialized for stereo (two-channel) output, and the
 
-  remaining (N - M) decoders are to be initialized for mono (a single
 
-  channel) only.
 
- The total number of decoded channels, (M + N), MUST be no larger than
 
-  255, as there is no way to index more channels than that in the channel
 
-  mapping.
 
- <vspace blankLines="1"/>
 
- For channel mapping family 0, this value defaults to (C - 1)
 
-  (i.e., 0 for mono and 1 for stereo), and is not coded.
 
- <vspace blankLines="1"/>
 
- </t>
 
- <t>Channel Mapping (8*C bits):
 
- This contains one octet per output channel, indicating which decoded channel
 
-  is to be used for each one.
 
- Let 'index' be the value of this octet for a particular output channel.
 
- This value MUST either be smaller than (M + N), or be the special
 
-  value 255.
 
- If 'index' is less than 2*M, the output MUST be taken from decoding stream
 
-  ('index'/2) as stereo and selecting the left channel if 'index' is even, and
 
-  the right channel if 'index' is odd.
 
- If 'index' is 2*M or larger, but less than 255, the output MUST be taken from
 
-  decoding stream ('index' - M) as mono.
 
- If 'index' is 255, the corresponding output channel MUST contain pure silence.
 
- <vspace blankLines="1"/>
 
- The number of output channels, C, is not constrained to match the number of
 
-  decoded channels (M + N).
 
- A single index value MAY appear multiple times, i.e., the same decoded channel
 
-  might be mapped to multiple output channels.
 
- Some decoded channels might not be assigned to any output channel, as well.
 
- <vspace blankLines="1"/>
 
- For channel mapping family 0, the first index defaults to 0, and if
 
-  C == 2, the second index defaults to 1.
 
- Neither index is coded.
 
- </t>
 
- </list>
 
- </t>
 
- <t>
 
- After producing the output channels, the channel mapping family determines the
 
-  semantic meaning of each one.
 
- There are three defined mapping families in this specification.
 
- </t>
 
- <section anchor="channel_mapping_0" title="Channel Mapping Family 0">
 
- <t>
 
- Allowed numbers of channels: 1 or 2.
 
- RTP mapping.
 
- This is the same channel interpretation as <xref target="RFC7587"/>.
 
- </t>
 
- <t>
 
- <list style="symbols">
 
- <t>1 channel: monophonic (mono).</t>
 
- <t>2 channels: stereo (left, right).</t>
 
- </list>
 
- Special mapping: This channel mapping value also
 
-  indicates that the contents consists of a single Opus stream that is stereo if
 
-  and only if C == 2, with stream index 0 mapped to output
 
-  channel 0 (mono, or left channel) and stream index 1 mapped to
 
-  output channel 1 (right channel) if stereo.
 
- When the 'channel mapping family' octet has this value, the channel mapping
 
-  table MUST be omitted from the ID header packet.
 
- </t>
 
- </section>
 
- <section anchor="channel_mapping_1" title="Channel Mapping Family 1">
 
- <t>
 
- Allowed numbers of channels: 1...8.
 
- Vorbis channel order (see below).
 
- </t>
 
- <t>
 
- Each channel is assigned to a speaker location in a conventional surround
 
-  arrangement.
 
- Specific locations depend on the number of channels, and are given below
 
-  in order of the corresponding channel indices.
 
- <list style="symbols">
 
-   <t>1 channel: monophonic (mono).</t>
 
-   <t>2 channels: stereo (left, right).</t>
 
-   <t>3 channels: linear surround (left, center, right)</t>
 
-   <t>4 channels: quadraphonic (front left, front right, rear left, rear right).</t>
 
-   <t>5 channels: 5.0 surround (front left, front center, front right, rear left, rear right).</t>
 
-   <t>6 channels: 5.1 surround (front left, front center, front right, rear left, rear right, LFE).</t>
 
-   <t>7 channels: 6.1 surround (front left, front center, front right, side left, side right, rear center, LFE).</t>
 
-   <t>8 channels: 7.1 surround (front left, front center, front right, side left, side right, rear left, rear right, LFE)</t>
 
- </list>
 
- </t>
 
- <t>
 
- This set of surround options and speaker location orderings is the same
 
-  as those used by the Vorbis codec <xref target="vorbis-mapping"/>.
 
- The ordering is different from the one used by the
 
-  WAVE <xref target="wave-multichannel"/> and
 
-  Free Lossless Audio Codec (FLAC) <xref target="flac"/> formats,
 
-  so correct ordering requires permutation of the output channels when decoding
 
-  to or encoding from those formats.
 
- 'LFE' here refers to a Low Frequency Effects channel, often mapped to a
 
-   subwoofer with no particular spatial position.
 
- Implementations SHOULD identify 'side' or 'rear' speaker locations with
 
-  'surround' and 'back' as appropriate when interfacing with audio formats
 
-  or systems which prefer that terminology.
 
- </t>
 
- </section>
 
- <section anchor="channel_mapping_255"
 
-  title="Channel Mapping Family 255">
 
- <t>
 
- Allowed numbers of channels: 1...255.
 
- No defined channel meaning.
 
- </t>
 
- <t>
 
- Channels are unidentified.
 
- General-purpose players SHOULD NOT attempt to play these streams.
 
- Offline implementations MAY deinterleave the output into separate PCM files,
 
-  one per channel.
 
- Implementations SHOULD NOT produce output for channels mapped to stream index
 
-  255 (pure silence) unless they have no other way to indicate the index of
 
-  non-silent channels.
 
- </t>
 
- </section>
 
- <section anchor="channel_mapping_undefined"
 
-  title="Undefined Channel Mappings">
 
- <t>
 
- The remaining channel mapping families (2...254) are reserved.
 
- A demuxer implementation encountering a reserved channel mapping family value
 
-  SHOULD act as though the value is 255.
 
- </t>
 
- </section>
 
- <section anchor="downmix" title="Downmixing">
 
- <t>
 
- An Ogg Opus player MUST support any valid channel mapping with a channel
 
-  mapping family of 0 or 1, even if the number of channels does not match the
 
-  physically connected audio hardware.
 
- Players SHOULD perform channel mixing to increase or reduce the number of
 
-  channels as needed.
 
- </t>
 
- <t>
 
- Implementations MAY use the matrices in
 
-  Figures <xref target="downmix-matrix-3" format="counter"/>
 
-  through <xref target="downmix-matrix-8" format="counter"/> to implement
 
-  downmixing from multichannel files using
 
-  <xref target="channel_mapping_1">Channel Mapping Family 1</xref>, which are
 
-  known to give acceptable results for stereo.
 
- Matrices for 3 and 4 channels are normalized so each coefficient row sums
 
-  to 1 to avoid clipping.
 
- For 5 or more channels they are normalized to 2 as a compromise between
 
-  clipping and dynamic range reduction.
 
- </t>
 
- <t>
 
- In these matrices the front left and front right channels are generally
 
- passed through directly.
 
- When a surround channel is split between both the left and right stereo
 
-  channels, coefficients are chosen so their squares sum to 1, which
 
-  helps preserve the perceived intensity.
 
- Rear channels are mixed more diffusely or attenuated to maintain focus
 
-  on the front channels.
 
- </t>
 
- <figure anchor="downmix-matrix-3"
 
-  title="Stereo downmix matrix for the linear surround channel mapping"
 
-  align="center">
 
- <artwork align="center"><![CDATA[
 
- L output = ( 0.585786 * left + 0.414214 * center                    )
 
- R output = (                   0.414214 * center + 0.585786 * right )
 
- ]]></artwork>
 
- <postamble>
 
- Exact coefficient values are 1 and 1/sqrt(2), multiplied by
 
-  1/(1 + 1/sqrt(2)) for normalization.
 
- </postamble>
 
- </figure>
 
- <figure anchor="downmix-matrix-4"
 
-  title="Stereo downmix matrix for the quadraphonic channel mapping"
 
-  align="center">
 
- <artwork align="center"><![CDATA[
 
- /          \   /                                     \ / FL \
 
- | L output |   | 0.422650 0.000000 0.366025 0.211325 | | FR |
 
- | R output | = | 0.000000 0.422650 0.211325 0.366025 | | RL |
 
- \          /   \                                     / \ RR /
 
- ]]></artwork>
 
- <postamble>
 
- Exact coefficient values are 1, sqrt(3)/2 and 1/2, multiplied by
 
-  1/(1 + sqrt(3)/2 + 1/2) for normalization.
 
- </postamble>
 
- </figure>
 
- <figure anchor="downmix-matrix-5"
 
-  title="Stereo downmix matrix for the 5.0 surround mapping"
 
-  align="center">
 
- <artwork align="center"><![CDATA[
 
-                                                          / FL \
 
- /   \   /                                              \ | FC |
 
- | L |   | 0.650802 0.460186 0.000000 0.563611 0.325401 | | FR |
 
- | R | = | 0.000000 0.460186 0.650802 0.325401 0.563611 | | RL |
 
- \   /   \                                              / | RR |
 
-                                                          \    /
 
- ]]></artwork>
 
- <postamble>
 
- Exact coefficient values are 1, 1/sqrt(2), sqrt(3)/2 and 1/2, multiplied by
 
-  2/(1 + 1/sqrt(2) + sqrt(3)/2 + 1/2)
 
-  for normalization.
 
- </postamble>
 
- </figure>
 
- <figure anchor="downmix-matrix-6"
 
-  title="Stereo downmix matrix for the 5.1 surround mapping"
 
-  align="center">
 
- <artwork align="center"><![CDATA[
 
-                                                                 /FL \
 
- / \   /                                                       \ |FC |
 
- |L|   | 0.529067 0.374107 0.000000 0.458186 0.264534 0.374107 | |FR |
 
- |R| = | 0.000000 0.374107 0.529067 0.264534 0.458186 0.374107 | |RL |
 
- \ /   \                                                       / |RR |
 
-                                                                 \LFE/
 
- ]]></artwork>
 
- <postamble>
 
- Exact coefficient values are 1, 1/sqrt(2), sqrt(3)/2 and 1/2, multiplied by
 
- 2/(1 + 1/sqrt(2) + sqrt(3)/2 + 1/2 + 1/sqrt(2))
 
-  for normalization.
 
- </postamble>
 
- </figure>
 
- <figure anchor="downmix-matrix-7"
 
-  title="Stereo downmix matrix for the 6.1 surround mapping"
 
-  align="center">
 
- <artwork align="center"><![CDATA[
 
-  /                                                                \
 
-  | 0.455310 0.321953 0.000000 0.394310 0.227655 0.278819 0.321953 |
 
-  | 0.000000 0.321953 0.455310 0.227655 0.394310 0.278819 0.321953 |
 
-  \                                                                /
 
- ]]></artwork>
 
- <postamble>
 
- Exact coefficient values are 1, 1/sqrt(2), sqrt(3)/2, 1/2 and
 
-  sqrt(3)/2/sqrt(2), multiplied by
 
-  2/(1 + 1/sqrt(2) + sqrt(3)/2 + 1/2 +
 
-  sqrt(3)/2/sqrt(2) + 1/sqrt(2)) for normalization.
 
- The coefficients are in the same order as in <xref target="channel_mapping_1" />,
 
-  and the matrices above.
 
- </postamble>
 
- </figure>
 
- <figure anchor="downmix-matrix-8"
 
-  title="Stereo downmix matrix for the 7.1 surround mapping"
 
-  align="center">
 
- <artwork align="center"><![CDATA[
 
- /                                                                 \
 
- | .388631 .274804 .000000 .336565 .194316 .336565 .194316 .274804 |
 
- | .000000 .274804 .388631 .194316 .336565 .194316 .336565 .274804 |
 
- \                                                                 /
 
- ]]></artwork>
 
- <postamble>
 
- Exact coefficient values are 1, 1/sqrt(2), sqrt(3)/2 and 1/2, multiplied by
 
-  2/(2 + 2/sqrt(2) + sqrt(3)) for normalization.
 
- The coefficients are in the same order as in <xref target="channel_mapping_1" />,
 
-  and the matrices above.
 
- </postamble>
 
- </figure>
 
- </section>
 
- </section> <!-- end channel_mapping_table -->
 
- </section> <!-- end id_header -->
 
- <section anchor="comment_header" title="Comment Header">
 
- <figure anchor="comment_header_packet" title="Comment Header Packet"
 
-  align="center">
 
- <artwork align="center"><![CDATA[
 
-  0                   1                   2                   3
 
-  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- |      'O'      |      'p'      |      'u'      |      's'      |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- |      'T'      |      'a'      |      'g'      |      's'      |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- |                     Vendor String Length                      |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- |                                                               |
 
- :                        Vendor String...                       :
 
- |                                                               |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- |                   User Comment List Length                    |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- |                 User Comment #0 String Length                 |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- |                                                               |
 
- :                   User Comment #0 String...                   :
 
- |                                                               |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- |                 User Comment #1 String Length                 |
 
- +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 
- :                                                               :
 
- ]]></artwork>
 
- </figure>
 
- <t>
 
- The comment header consists of a 64-bit magic signature, followed by data in
 
-  the same format as the <xref target="vorbis-comment"/> header used in Ogg
 
-  Vorbis, except (like Ogg Theora and Speex) the final "framing bit" specified
 
-  in the Vorbis spec is not present.
 
- <list style="numbers">
 
- <t>Magic Signature:
 
- <vspace blankLines="1"/>
 
- This is an 8-octet (64-bit) field that allows codec identification and is
 
-  human-readable.
 
- It contains, in order, the magic numbers:
 
- <list style="empty">
 
- <t>0x4F 'O'</t>
 
- <t>0x70 'p'</t>
 
- <t>0x75 'u'</t>
 
- <t>0x73 's'</t>
 
- <t>0x54 'T'</t>
 
- <t>0x61 'a'</t>
 
- <t>0x67 'g'</t>
 
- <t>0x73 's'</t>
 
- </list>
 
- Starting with "Op" helps distinguish it from audio data packets, as this is an
 
-  invalid TOC sequence.
 
- <vspace blankLines="1"/>
 
- </t>
 
- <t>Vendor String Length (32 bits, unsigned, little endian):
 
- <vspace blankLines="1"/>
 
- This field gives the length of the following vendor string, in octets.
 
- It MUST NOT indicate that the vendor string is longer than the rest of the
 
-  packet.
 
- <vspace blankLines="1"/>
 
- </t>
 
- <t>Vendor String (variable length, UTF-8 vector):
 
- <vspace blankLines="1"/>
 
- This is a simple human-readable tag for vendor information, encoded as a UTF-8
 
-  string <xref target="RFC3629"/>.
 
- No terminating null octet is necessary.
 
- <vspace blankLines="1"/>
 
- This tag is intended to identify the codec encoder and encapsulation
 
-  implementations, for tracing differences in technical behavior.
 
- User-facing applications can use the 'ENCODER' user comment tag to identify
 
-  themselves.
 
- <vspace blankLines="1"/>
 
- </t>
 
- <t>User Comment List Length (32 bits, unsigned, little endian):
 
- <vspace blankLines="1"/>
 
- This field indicates the number of user-supplied comments.
 
- It MAY indicate there are zero user-supplied comments, in which case there are
 
-  no additional fields in the packet.
 
- It MUST NOT indicate that there are so many comments that the comment string
 
-  lengths would require more data than is available in the rest of the packet.
 
- <vspace blankLines="1"/>
 
- </t>
 
- <t>User Comment #i String Length (32 bits, unsigned, little endian):
 
- <vspace blankLines="1"/>
 
- This field gives the length of the following user comment string, in octets.
 
- There is one for each user comment indicated by the 'user comment list length'
 
-  field.
 
- It MUST NOT indicate that the string is longer than the rest of the packet.
 
- <vspace blankLines="1"/>
 
- </t>
 
- <t>User Comment #i String (variable length, UTF-8 vector):
 
- <vspace blankLines="1"/>
 
- This field contains a single user comment encoded as a UTF-8
 
-  string <xref target="RFC3629"/>.
 
- There is one for each user comment indicated by the 'user comment list length'
 
-  field.
 
- </t>
 
- </list>
 
- </t>
 
- <t>
 
- The vendor string length and user comment list length are REQUIRED, and
 
-  implementations SHOULD treat a stream as invalid if it contains a comment
 
-  header that does not have enough data for these fields, or that does not
 
-  contain enough data for the corresponding vendor string or user comments they
 
-  describe.
 
- Making this check before allocating the associated memory to contain the data
 
-  helps prevent a possible Denial-of-Service (DoS) attack from small comment
 
-  headers that claim to contain strings longer than the entire packet or more
 
-  user comments than than could possibly fit in the packet.
 
- </t>
 
- <t>
 
- Immediately following the user comment list, the comment header MAY
 
-  contain zero-padding or other binary data which is not specified here.
 
- If the least-significant bit of the first byte of this data is 1, then editors
 
-  SHOULD preserve the contents of this data when updating the tags, but if this
 
-  bit is 0, all such data MAY be treated as padding, and truncated or discarded
 
-  as desired.
 
- This allows informal experimentation with the format of this binary data until
 
-  it can be specified later.
 
- </t>
 
- <t>
 
- The comment header can be arbitrarily large and might be spread over a large
 
-  number of Ogg pages.
 
- Implementations MUST avoid attempting to allocate excessive amounts of memory
 
-  when presented with a very large comment header.
 
- To accomplish this, implementations MAY treat a stream as invalid if it has a
 
-  comment header larger than 125,829,120 octets (120 MB), and MAY
 
-  ignore individual comments that are not fully contained within the first
 
-  61,440 octets of the comment header.
 
- </t>
 
- <section anchor="comment_format" title="Tag Definitions">
 
- <t>
 
- The user comment strings follow the NAME=value format described by
 
-  <xref target="vorbis-comment"/> with the same recommended tag names:
 
-  ARTIST, TITLE, DATE, ALBUM, and so on.
 
- </t>
 
- <t>
 
- Two new comment tags are introduced here:
 
- </t>
 
- <t>First, an optional gain for track normalization:</t>
 
- <figure align="center">
 
- <artwork align="left"><![CDATA[
 
- R128_TRACK_GAIN=-573
 
- ]]></artwork>
 
- </figure>
 
- <t>
 
-  representing the volume shift needed to normalize the track's volume
 
-  during isolated playback, in random shuffle, and so on.
 
- The gain is a Q7.8 fixed point number in dB, as in the ID header's 'output
 
-  gain' field.
 
- This tag is similar to the REPLAYGAIN_TRACK_GAIN tag in
 
-  Vorbis <xref target="replay-gain"/>, except that the normal volume
 
-  reference is the <xref target="EBU-R128"/> standard.
 
- </t>
 
- <t>Second, an optional gain for album normalization:</t>
 
- <figure align="center">
 
- <artwork align="left"><![CDATA[
 
- R128_ALBUM_GAIN=111
 
- ]]></artwork>
 
- </figure>
 
- <t>
 
-  representing the volume shift needed to normalize the overall volume when
 
-  played as part of a particular collection of tracks.
 
- The gain is also a Q7.8 fixed point number in dB, as in the ID header's
 
-  'output gain' field.
 
- The values '-573' and '111' given here are just examples.
 
- </t>
 
- <t>
 
- An Ogg Opus stream MUST NOT have more than one of each of these tags, and if
 
-  present their values MUST be an integer from -32768 to 32767, inclusive,
 
-  represented in ASCII as a base 10 number with no whitespace.
 
- A leading '+' or '-' character is valid.
 
- Leading zeros are also permitted, but the value MUST be represented by
 
-  no more than 6 characters.
 
- Other non-digit characters MUST NOT be present.
 
- </t>
 
- <t>
 
- If present, R128_TRACK_GAIN and R128_ALBUM_GAIN MUST correctly represent
 
-  the R128 normalization gain relative to the 'output gain' field specified
 
-  in the ID header.
 
- If a player chooses to make use of the R128_TRACK_GAIN tag or the
 
-  R128_ALBUM_GAIN tag, it MUST apply those gains
 
-  <spanx style="emph">in addition</spanx> to the 'output gain' value.
 
- If a tool modifies the ID header's 'output gain' field, it MUST also update or
 
-  remove the R128_TRACK_GAIN and R128_ALBUM_GAIN comment tags if present.
 
- A muxer SHOULD place the gain it wants other tools to use by default into the
 
-  'output gain' field, and not the comment tag.
 
- </t>
 
- <t>
 
- To avoid confusion with multiple normalization schemes, an Opus comment header
 
-  SHOULD NOT contain any of the REPLAYGAIN_TRACK_GAIN, REPLAYGAIN_TRACK_PEAK,
 
-  REPLAYGAIN_ALBUM_GAIN, or REPLAYGAIN_ALBUM_PEAK tags, unless they are only
 
-  to be used in some context where there is guaranteed to be no such confusion.
 
- <xref target="EBU-R128"/> normalization is preferred to the earlier
 
-  REPLAYGAIN schemes because of its clear definition and adoption by industry.
 
- Peak normalizations are difficult to calculate reliably for lossy codecs
 
-  because of variation in excursion heights due to decoder differences.
 
- In the authors' investigations they were not applied consistently or broadly
 
-  enough to merit inclusion here.
 
- </t>
 
- </section> <!-- end comment_format -->
 
- </section> <!-- end comment_header -->
 
- </section> <!-- end headers -->
 
- <section anchor="packet_size_limits" title="Packet Size Limits">
 
- <t>
 
- Technically, valid Opus packets can be arbitrarily large due to the padding
 
-  format, although the amount of non-padding data they can contain is bounded.
 
- These packets might be spread over a similarly enormous number of Ogg pages.
 
- When encoding, implementations SHOULD limit the use of padding in audio data
 
-  packets to no more than is necessary to make a variable bitrate (VBR) stream
 
-  constant bitrate (CBR), unless they have no reasonable way to determine what
 
-  is necessary.
 
- Demuxers SHOULD treat audio data packets as invalid (treat them as if they were
 
-  malformed Opus packets with an invalid TOC sequence) if they are larger than
 
-  61,440 octets per Opus stream, unless they have a specific reason for
 
-  allowing extra padding.
 
- Such packets necessarily contain more padding than needed to make a stream CBR.
 
- Demuxers MUST avoid attempting to allocate excessive amounts of memory when
 
-  presented with a very large packet.
 
- Demuxers MAY treat audio data packets as invalid or partially process them if
 
-  they are larger than 61,440 octets in an Ogg Opus stream with channel
 
-  mapping families 0 or 1.
 
- Demuxers MAY treat audio data packets as invalid or partially process them in
 
-  any Ogg Opus stream if the packet is larger than 61,440 octets and also
 
-  larger than 7,680 octets per Opus stream.
 
- The presence of an extremely large packet in the stream could indicate a
 
-  memory exhaustion attack or stream corruption.
 
- </t>
 
- <t>
 
- In an Ogg Opus stream, the largest possible valid packet that does not use
 
-  padding has a size of (61,298*N - 2) octets.
 
- With 255 streams, this is 15,630,988 octets and can
 
-  span up to 61,298 Ogg pages, all but one of which will have a granule
 
-  position of -1.
 
- This is of course a very extreme packet, consisting of 255 streams, each
 
-  containing 120 ms of audio encoded as 2.5 ms frames, each frame
 
-  using the maximum possible number of octets (1275) and stored in the least
 
-  efficient manner allowed (a VBR code 3 Opus packet).
 
- Even in such a packet, most of the data will be zeros as 2.5 ms frames
 
-  cannot actually use all 1275 octets.
 
- </t>
 
- <t>
 
- The largest packet consisting of entirely useful data is
 
-  (15,326*N - 2) octets.
 
- This corresponds to 120 ms of audio encoded as 10 ms frames in either
 
-  SILK or Hybrid mode, but at a data rate of over 1 Mbps, which makes little
 
-  sense for the quality achieved.
 
- </t>
 
- <t>
 
- A more reasonable limit is (7,664*N - 2) octets.
 
- This corresponds to 120 ms of audio encoded as 20 ms stereo CELT mode
 
-  frames, with a total bitrate just under 511 kbps (not counting the Ogg
 
-  encapsulation overhead).
 
- For channel mapping family 1, N=8 provides a reasonable upper bound, as it
 
-  allows for each of the 8 possible output channels to be decoded from a
 
-  separate stereo Opus stream.
 
- This gives a size of 61,310 octets, which is rounded up to a multiple of
 
-  1,024 octets to yield the audio data packet size of 61,440 octets
 
-  that any implementation is expected to be able to process successfully.
 
- </t>
 
- </section>
 
- <section anchor="encoder" title="Encoder Guidelines">
 
- <t>
 
- When encoding Opus streams, Ogg muxers SHOULD take into account the
 
-  algorithmic delay of the Opus encoder.
 
- </t>
 
- <t>
 
- In encoders derived from the reference
 
-  implementation <xref target="RFC6716"/>, the number of samples can be
 
-  queried with:
 
- </t>
 
- <figure align="center">
 
- <artwork align="center"><![CDATA[
 
-  opus_encoder_ctl(encoder_state, OPUS_GET_LOOKAHEAD(&delay_samples));
 
- ]]></artwork>
 
- </figure>
 
- <t>
 
- To achieve good quality in the very first samples of a stream, implementations
 
-  MAY use linear predictive coding (LPC) extrapolation to generate at least 120
 
-  extra samples at the beginning to avoid the Opus encoder having to encode a
 
-  discontinuous signal.
 
- For more information on linear prediction, see
 
-  <xref target="linear-prediction"/>.
 
- For an input file containing 'length' samples, the implementation SHOULD set
 
-  the pre-skip header value to (delay_samples + extra_samples), encode
 
-  at least (length + delay_samples + extra_samples)
 
-  samples, and set the granule position of the last page to
 
-  (length + delay_samples + extra_samples).
 
- This ensures that the encoded file has the same duration as the original, with
 
-  no time offset. The best way to pad the end of the stream is to also use LPC
 
-  extrapolation, but zero-padding is also acceptable.
 
- </t>
 
- <section anchor="lpc" title="LPC Extrapolation">
 
- <t>
 
- The first step in LPC extrapolation is to compute linear prediction
 
-  coefficients. <xref target="lpc-sample"/>
 
- When extending the end of the signal, order-N (typically with N ranging from 8
 
-  to 40) LPC analysis is performed on a window near the end of the signal.
 
- The last N samples are used as memory to an infinite impulse response (IIR)
 
-  filter.
 
- </t>
 
- <t>
 
- The filter is then applied on a zero input to extrapolate the end of the signal.
 
- Let a(k) be the kth LPC coefficient and x(n) be the nth sample of the signal,
 
-  each new sample past the end of the signal is computed as:
 
- </t>
 
- <figure align="center">
 
- <artwork align="center"><![CDATA[
 
-         N
 
-        ---
 
- x(n) = \   a(k)*x(n-k)
 
-        /
 
-        ---
 
-        k=1
 
- ]]></artwork>
 
- </figure>
 
- <t>
 
- The process is repeated independently for each channel.
 
- It is possible to extend the beginning of the signal by applying the same
 
-  process backward in time.
 
- When extending the beginning of the signal, it is best to apply a "fade in" to
 
-  the extrapolated signal, e.g. by multiplying it by a half-Hanning window
 
-  <xref target="hanning"/>.
 
- </t>
 
- </section>
 
- <section anchor="continuous_chaining" title="Continuous Chaining">
 
- <t>
 
- In some applications, such as Internet radio, it is desirable to cut a long
 
-  stream into smaller chains, e.g. so the comment header can be updated.
 
- This can be done simply by separating the input streams into segments and
 
-  encoding each segment independently.
 
- The drawback of this approach is that it creates a small discontinuity
 
-  at the boundary due to the lossy nature of Opus.
 
- A muxer MAY avoid this discontinuity by using the following procedure:
 
- <list style="numbers">
 
- <t>Encode the last frame of the first segment as an independent frame by
 
-  turning off all forms of inter-frame prediction.
 
- De-emphasis is allowed.</t>
 
- <t>Set the granule position of the last page to a point near the end of the
 
-  last frame.</t>
 
- <t>Begin the second segment with a copy of the last frame of the first
 
-  segment.</t>
 
- <t>Set the pre-skip value of the second stream in such a way as to properly
 
-  join the two streams.</t>
 
- <t>Continue the encoding process normally from there, without any reset to
 
-  the encoder.</t>
 
- </list>
 
- </t>
 
- <t>
 
- In encoders derived from the reference implementation, inter-frame prediction
 
-  can be turned off by calling:
 
- </t>
 
- <figure align="center">
 
- <artwork align="center"><![CDATA[
 
-  opus_encoder_ctl(encoder_state, OPUS_SET_PREDICTION_DISABLED(1));
 
- ]]></artwork>
 
- </figure>
 
- <t>
 
- For best results, this implementation requires that prediction be explicitly
 
-  enabled again before resuming normal encoding, even after a reset.
 
- </t>
 
- </section>
 
- </section>
 
- <section anchor="implementation" title="Implementation Status">
 
- <t>
 
- A brief summary of major implementations of this draft is available
 
-  at <eref target="https://wiki.xiph.org/OggOpusImplementation"/>,
 
-  along with their status.
 
- </t>
 
- <t>
 
- [Note to RFC Editor: please remove this entire section before
 
-  final publication per <xref target="RFC6982"/>, along with
 
-  its references.]
 
- </t>
 
- </section>
 
- <section anchor="security" title="Security Considerations">
 
- <t>
 
- Implementations of the Opus codec need to take appropriate security
 
-  considerations into account, as outlined in <xref target="RFC4732"/>.
 
- This is just as much a problem for the container as it is for the codec itself.
 
- Malicious payloads and/or input streams can be used to attack codec
 
-  implementations.
 
- Implementations MUST NOT overrun their allocated memory nor consume excessive
 
-  resources when decoding payloads or processing input streams.
 
- Although problems in encoding applications are typically rarer, this still
 
-  applies to a muxer, as vulnerabilities would allow an attacker to attack
 
-  transcoding gateways.
 
- </t>
 
- <t>
 
- Header parsing code contains the most likely area for potential overruns.
 
- It is important for implementations to ensure their buffers contain enough
 
-  data for all of the required fields before attempting to read it (for example,
 
-  for all of the channel map data in the ID header).
 
- Implementations would do well to validate the indices of the channel map, also,
 
-  to ensure they meet all of the restrictions outlined in
 
-  <xref target="channel_mapping"/>, in order to avoid attempting to read data
 
-  from channels that do not exist.
 
- </t>
 
- <t>
 
- To avoid excessive resource usage, we advise implementations to be especially
 
-  wary of streams that might cause them to process far more data than was
 
-  actually transmitted.
 
- For example, a relatively small comment header may contain values for the
 
-  string lengths or user comment list length that imply that it is many
 
-  gigabytes in size.
 
- Even computing the size of the required buffer could overflow a 32-bit integer,
 
-  and actually attempting to allocate such a buffer before verifying it would be
 
-  a reasonable size is a bad idea.
 
- After reading the user comment list length, implementations might wish to
 
-  verify that the header contains at least the minimum amount of data for that
 
-  many comments (4 additional octets per comment, to indicate each has a
 
-  length of zero) before proceeding any further, again taking care to avoid
 
-  overflow in these calculations.
 
- If allocating an array of pointers to point at these strings, the size of the
 
-  pointers may be larger than 4 octets, potentially requiring a separate
 
-  overflow check.
 
- </t>
 
- <t>
 
- Another bug in this class we have observed more than once involves the handling
 
-  of invalid data at the end of a stream.
 
- Often, implementations will seek to the end of a stream to locate the last
 
-  timestamp in order to compute its total duration.
 
- If they do not find a valid capture pattern and Ogg page from the desired
 
-  logical stream, they will back up and try again.
 
- If care is not taken to avoid re-scanning data that was already scanned, this
 
-  search can quickly devolve into something with a complexity that is quadratic
 
-  in the amount of invalid data.
 
- </t>
 
- <t>
 
- In general when seeking, implementations will wish to be cautious about the
 
-  effects of invalid granule position values, and ensure all algorithms will
 
-  continue to make progress and eventually terminate, even if these are missing
 
-  or out-of-order.
 
- </t>
 
- <t>
 
- Like most other container formats, Ogg Opus streams SHOULD NOT be used with
 
-  insecure ciphers or cipher modes that are vulnerable to known-plaintext
 
-  attacks.
 
- Elements such as the Ogg page capture pattern and the magic signatures in the
 
-  ID header and the comment header all have easily predictable values, in
 
-  addition to various elements of the codec data itself.
 
- </t>
 
- </section>
 
- <section anchor="content_type" title="Content Type">
 
- <t>
 
- An "Ogg Opus file" consists of one or more sequentially multiplexed segments,
 
-  each containing exactly one Ogg Opus stream.
 
- The RECOMMENDED mime-type for Ogg Opus files is "audio/ogg".
 
- </t>
 
- <t>
 
- If more specificity is desired, one MAY indicate the presence of Opus streams
 
-  using the codecs parameter defined in <xref target="RFC6381"/> and
 
-  <xref target="RFC5334"/>, e.g.,
 
- </t>
 
- <figure>
 
- <artwork align="center"><![CDATA[
 
-     audio/ogg; codecs=opus
 
- ]]></artwork>
 
- </figure>
 
- <t>
 
-  for an Ogg Opus file.
 
- </t>
 
- <t>
 
- The RECOMMENDED filename extension for Ogg Opus files is '.opus'.
 
- </t>
 
- <t>
 
- When Opus is concurrently multiplexed with other streams in an Ogg container,
 
-  one SHOULD use one of the "audio/ogg", "video/ogg", or "application/ogg"
 
-  mime-types, as defined in <xref target="RFC5334"/>.
 
- Such streams are not strictly "Ogg Opus files" as described above,
 
-  since they contain more than a single Opus stream per sequentially
 
-  multiplexed segment, e.g. video or multiple audio tracks.
 
- In such cases the the '.opus' filename extension is NOT RECOMMENDED.
 
- </t>
 
- <t>
 
- In either case, this document updates <xref target="RFC5334"/>
 
-  to add 'opus' as a codecs parameter value with char[8]: 'OpusHead'
 
-  as Codec Identifier.
 
- </t>
 
- </section>
 
- <section anchor="iana" title="IANA Considerations">
 
- <t>
 
- This document updates the IANA Media Types registry to add .opus
 
-  as a file extension for "audio/ogg", and to add itself as a reference
 
-  alongside <xref target="RFC5334"/> for "audio/ogg", "video/ogg", and
 
-  "application/ogg" Media Types.
 
- </t>
 
- <t>
 
- This document defines a new registry "Opus Channel Mapping Families" to
 
-  indicate how the semantic meanings of the channels in a multi-channel Opus
 
-  stream are described.
 
- IANA is requested to create a new name space of "Opus Channel Mapping
 
-  Families".
 
- This will be a new registry on the IANA Matrix, and not a subregistry of an
 
-  existing registry.
 
- Modifications to this registry follow the "Specification Required" registration
 
-  policy as defined in <xref target="RFC5226"/>.
 
- Each registry entry consists of a Channel Mapping Family Number, which is
 
-  specified in decimal in the range 0 to 255, inclusive, and a Reference (or
 
-  list of references)
 
- Each Reference must point to sufficient documentation to describe what
 
-  information is coded in the Opus identification header for this channel
 
-  mapping family, how a demuxer determines the Stream Count ('N') and Coupled
 
-  Stream Count ('M') from this information, and how it determines the proper
 
-  interpretation of each of the decoded channels.
 
- </t>
 
- <t>
 
- This document defines three initial assignments for this registry.
 
- </t>
 
- <texttable>
 
- <ttcol>Value</ttcol><ttcol>Reference</ttcol>
 
- <c>0</c><c>[RFCXXXX] <xref target="channel_mapping_0"/></c>
 
- <c>1</c><c>[RFCXXXX] <xref target="channel_mapping_1"/></c>
 
- <c>255</c><c>[RFCXXXX] <xref target="channel_mapping_255"/></c>
 
- </texttable>
 
- <t>
 
- The designated expert will determine if the Reference points to a specification
 
-  that meets the requirements for permanence and ready availability laid out
 
-  in <xref target="RFC5226"/> and that it specifies the information
 
-  described above with sufficient clarity to allow interoperable
 
-  implementations.
 
- </t>
 
- </section>
 
- <section anchor="Acknowledgments" title="Acknowledgments">
 
- <t>
 
- Thanks to Ben Campbell, Joel M. Halpern, Mark Harris, Greg Maxwell,
 
-  Christopher "Monty" Montgomery, Jean-Marc Valin, Stephan Wenger, and Mo Zanaty
 
-  for their valuable contributions to this document.
 
- Additional thanks to Andrew D'Addesio, Greg Maxwell, and Vincent Penquerc'h for
 
-  their feedback based on early implementations.
 
- </t>
 
- </section>
 
- <section title="RFC Editor Notes">
 
- <t>
 
- In <xref target="iana"/>, "RFCXXXX" is to be replaced with the RFC number
 
-  assigned to this draft.
 
- </t>
 
- </section>
 
- </middle>
 
- <back>
 
- <references title="Normative References">
 
-  &rfc2119;
 
-  &rfc3533;
 
-  &rfc3629;
 
-  &rfc5226;
 
-  &rfc5334;
 
-  &rfc6381;
 
-  &rfc6716;
 
- <reference anchor="EBU-R128" target="https://tech.ebu.ch/loudness">
 
- <front>
 
-   <title>Loudness Recommendation EBU R128</title>
 
-   <author>
 
-     <organization>EBU Technical Committee</organization>
 
-   </author>
 
-   <date month="August" year="2011"/>
 
- </front>
 
- </reference>
 
- <reference anchor="vorbis-comment"
 
-  target="https://www.xiph.org/vorbis/doc/v-comment.html">
 
- <front>
 
- <title>Ogg Vorbis I Format Specification: Comment Field and Header
 
-  Specification</title>
 
- <author initials="C." surname="Montgomery"
 
-  fullname="Christopher "Monty" Montgomery"/>
 
- <date month="July" year="2002"/>
 
- </front>
 
- </reference>
 
- </references>
 
- <references title="Informative References">
 
- <!--?rfc include="http://xml.resource.org/public/rfc/bibxml/reference.RFC.3550.xml"?-->
 
-  &rfc4732;
 
-  &rfc6982;
 
-  &rfc7587;
 
- <reference anchor="flac"
 
-  target="https://xiph.org/flac/format.html">
 
-   <front>
 
-     <title>FLAC - Free Lossless Audio Codec Format Description</title>
 
-     <author initials="J." surname="Coalson" fullname="Josh Coalson"/>
 
-     <date month="January" year="2008"/>
 
-   </front>
 
- </reference>
 
- <reference anchor="hanning"
 
-  target="https://en.wikipedia.org/w/index.php?title=Window_function&oldid=703074467#Hann_.28Hanning.29_window">
 
-   <front>
 
-     <title>Hann window</title>
 
-     <author>
 
-       <organization>Wikipedia</organization>
 
-     </author>
 
-     <date month="February" year="2016"/>
 
-   </front>
 
- </reference>
 
- <reference anchor="linear-prediction"
 
-  target="https://en.wikipedia.org/w/index.php?title=Linear_predictive_coding&oldid=687498962">
 
-   <front>
 
-     <title>Linear Predictive Coding</title>
 
-     <author>
 
-       <organization>Wikipedia</organization>
 
-     </author>
 
-     <date month="October" year="2015"/>
 
-   </front>
 
- </reference>
 
- <reference anchor="lpc-sample"
 
-   target="https://svn.xiph.org/trunk/vorbis/lib/lpc.c">
 
- <front>
 
-   <title>Autocorrelation LPC coeff generation algorithm
 
-     (Vorbis source code)</title>
 
- <author initials="J." surname="Degener" fullname="Jutta Degener"/>
 
- <author initials="C." surname="Bormann" fullname="Carsten Bormann"/>
 
- <date month="November" year="1994"/>
 
- </front>
 
- </reference>
 
- <reference anchor="q-notation"
 
-  target="https://en.wikipedia.org/w/index.php?title=Q_%28number_format%29&oldid=697252615">
 
- <front>
 
- <title>Q (number format)</title>
 
- <author><organization>Wikipedia</organization></author>
 
- <date month="December" year="2015"/>
 
- </front>
 
- </reference>
 
- <reference anchor="replay-gain"
 
-  target="https://wiki.xiph.org/VorbisComment#Replay_Gain">
 
- <front>
 
- <title>VorbisComment: Replay Gain</title>
 
- <author initials="C." surname="Parker" fullname="Conrad Parker"/>
 
- <author initials="M." surname="Leese" fullname="Martin Leese"/>
 
- <date month="June" year="2009"/>
 
- </front>
 
- </reference>
 
- <reference anchor="seeking"
 
-  target="https://wiki.xiph.org/Seeking">
 
- <front>
 
- <title>Granulepos Encoding and How Seeking Really Works</title>
 
- <author initials="S." surname="Pfeiffer" fullname="Silvia Pfeiffer"/>
 
- <author initials="C." surname="Parker" fullname="Conrad Parker"/>
 
- <author initials="G." surname="Maxwell" fullname="Greg Maxwell"/>
 
- <date month="May" year="2012"/>
 
- </front>
 
- </reference>
 
- <reference anchor="vorbis-mapping"
 
-  target="https://www.xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-810004.3.9">
 
- <front>
 
- <title>The Vorbis I Specification, Section 4.3.9 Output Channel Order</title>
 
- <author initials="C." surname="Montgomery"
 
-  fullname="Christopher "Monty" Montgomery"/>
 
- <date month="January" year="2010"/>
 
- </front>
 
- </reference>
 
- <reference anchor="vorbis-trim"
 
-  target="https://xiph.org/vorbis/doc/Vorbis_I_spec.html#x1-132000A.2">
 
-   <front>
 
-     <title>The Vorbis I Specification, Appendix A: Embedding Vorbis
 
-       into an Ogg stream</title>
 
-     <author initials="C." surname="Montgomery"
 
-      fullname="Christopher "Monty" Montgomery"/>
 
-     <date month="November" year="2008"/>
 
-   </front>
 
- </reference>
 
- <reference anchor="wave-multichannel"
 
-  target="http://msdn.microsoft.com/en-us/windows/hardware/gg463006.aspx">
 
-   <front>
 
-     <title>Multiple Channel Audio Data and WAVE Files</title>
 
-     <author>
 
-       <organization>Microsoft Corporation</organization>
 
-     </author>
 
-     <date month="March" year="2007"/>
 
-   </front>
 
- </reference>
 
- </references>
 
- </back>
 
- </rfc>
 
 
  |