You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
325 lines
8.8 KiB
Go
325 lines
8.8 KiB
Go
// Package av defines basic interfaces and data structures of container demux/mux and audio encode/decode.
|
|
package av
|
|
|
|
import (
|
|
"fmt"
|
|
"time"
|
|
)
|
|
|
|
// Audio sample format.
|
|
type SampleFormat uint8
|
|
|
|
const (
|
|
U8 = SampleFormat(iota + 1) // 8-bit unsigned integer
|
|
S16 // signed 16-bit integer
|
|
S32 // signed 32-bit integer
|
|
FLT // 32-bit float
|
|
DBL // 64-bit float
|
|
U8P // 8-bit unsigned integer in planar
|
|
S16P // signed 16-bit integer in planar
|
|
S32P // signed 32-bit integer in planar
|
|
FLTP // 32-bit float in planar
|
|
DBLP // 64-bit float in planar
|
|
U32 // unsigned 32-bit integer
|
|
)
|
|
|
|
func (self SampleFormat) BytesPerSample() int {
|
|
switch self {
|
|
case U8, U8P:
|
|
return 1
|
|
case S16, S16P:
|
|
return 2
|
|
case FLT, FLTP, S32, S32P, U32:
|
|
return 4
|
|
case DBL, DBLP:
|
|
return 8
|
|
default:
|
|
return 0
|
|
}
|
|
}
|
|
|
|
func (self SampleFormat) String() string {
|
|
switch self {
|
|
case U8:
|
|
return "U8"
|
|
case S16:
|
|
return "S16"
|
|
case S32:
|
|
return "S32"
|
|
case FLT:
|
|
return "FLT"
|
|
case DBL:
|
|
return "DBL"
|
|
case U8P:
|
|
return "U8P"
|
|
case S16P:
|
|
return "S16P"
|
|
case FLTP:
|
|
return "FLTP"
|
|
case DBLP:
|
|
return "DBLP"
|
|
case U32:
|
|
return "U32"
|
|
default:
|
|
return "?"
|
|
}
|
|
}
|
|
|
|
// Check if this sample format is in planar.
|
|
func (self SampleFormat) IsPlanar() bool {
|
|
switch self {
|
|
case S16P, S32P, FLTP, DBLP:
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
// Audio channel layout.
|
|
type ChannelLayout uint16
|
|
|
|
func (self ChannelLayout) String() string {
|
|
return fmt.Sprintf("%dch", self.Count())
|
|
}
|
|
|
|
const (
|
|
CH_FRONT_CENTER = ChannelLayout(1 << iota)
|
|
CH_FRONT_LEFT
|
|
CH_FRONT_RIGHT
|
|
CH_BACK_CENTER
|
|
CH_BACK_LEFT
|
|
CH_BACK_RIGHT
|
|
CH_SIDE_LEFT
|
|
CH_SIDE_RIGHT
|
|
CH_LOW_FREQ
|
|
CH_NR
|
|
|
|
CH_MONO = ChannelLayout(CH_FRONT_CENTER)
|
|
CH_STEREO = ChannelLayout(CH_FRONT_LEFT | CH_FRONT_RIGHT)
|
|
CH_2_1 = ChannelLayout(CH_STEREO | CH_BACK_CENTER)
|
|
CH_2POINT1 = ChannelLayout(CH_STEREO | CH_LOW_FREQ)
|
|
CH_SURROUND = ChannelLayout(CH_STEREO | CH_FRONT_CENTER)
|
|
CH_3POINT1 = ChannelLayout(CH_SURROUND | CH_LOW_FREQ)
|
|
// TODO: add all channel_layout in ffmpeg
|
|
)
|
|
|
|
func (self ChannelLayout) Count() (n int) {
|
|
for self != 0 {
|
|
n++
|
|
self = (self - 1) & self
|
|
}
|
|
return
|
|
}
|
|
|
|
// Video/Audio codec type. can be H264/AAC/SPEEX/...
|
|
type CodecType uint32
|
|
|
|
var (
|
|
H264 = MakeVideoCodecType(avCodecTypeMagic + 1)
|
|
AAC = MakeAudioCodecType(avCodecTypeMagic + 1)
|
|
PCM_MULAW = MakeAudioCodecType(avCodecTypeMagic + 2)
|
|
PCM_ALAW = MakeAudioCodecType(avCodecTypeMagic + 3)
|
|
SPEEX = MakeAudioCodecType(avCodecTypeMagic + 4)
|
|
NELLYMOSER = MakeAudioCodecType(avCodecTypeMagic + 5)
|
|
G729= MakeAudioCodecType(avCodecTypeMagic + 6)
|
|
)
|
|
|
|
const codecTypeAudioBit = 0x1
|
|
const codecTypeOtherBits = 1
|
|
|
|
func (self CodecType) String() string {
|
|
switch self {
|
|
case H264:
|
|
return "H264"
|
|
case AAC:
|
|
return "AAC"
|
|
case PCM_MULAW:
|
|
return "PCM_MULAW"
|
|
case PCM_ALAW:
|
|
return "PCM_ALAW"
|
|
case G729:
|
|
return "G729"
|
|
case SPEEX:
|
|
return "SPEEX"
|
|
case NELLYMOSER:
|
|
return "NELLYMOSER"
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func (self CodecType) IsAudio() bool {
|
|
return self&codecTypeAudioBit != 0
|
|
}
|
|
|
|
func (self CodecType) IsVideo() bool {
|
|
return self&codecTypeAudioBit == 0
|
|
}
|
|
|
|
// Make a new audio codec type.
|
|
func MakeAudioCodecType(base uint32) (c CodecType) {
|
|
c = CodecType(base)<<codecTypeOtherBits | CodecType(codecTypeAudioBit)
|
|
return
|
|
}
|
|
|
|
// Make a new video codec type.
|
|
func MakeVideoCodecType(base uint32) (c CodecType) {
|
|
c = CodecType(base) << codecTypeOtherBits
|
|
return
|
|
}
|
|
|
|
const avCodecTypeMagic = 233333
|
|
|
|
// CodecData is some important bytes for initializing audio/video decoder,
|
|
// can be converted to VideoCodecData or AudioCodecData using:
|
|
//
|
|
// codecdata.(AudioCodecData) or codecdata.(VideoCodecData)
|
|
//
|
|
// for H264, CodecData is AVCDecoderConfigure bytes, includes SPS/PPS.
|
|
type CodecData interface {
|
|
Type() CodecType // Video/Audio codec type
|
|
}
|
|
|
|
type VideoCodecData interface {
|
|
CodecData
|
|
Width() int // Video width
|
|
Height() int // Video height
|
|
}
|
|
|
|
type AudioCodecData interface {
|
|
CodecData
|
|
SampleFormat() SampleFormat // audio sample format
|
|
SampleRate() int // audio sample rate
|
|
ChannelLayout() ChannelLayout // audio channel layout
|
|
PacketDuration([]byte) (time.Duration, error) // get audio compressed packet duration
|
|
}
|
|
|
|
type PacketWriter interface {
|
|
WritePacket(Packet) error
|
|
}
|
|
|
|
type PacketReader interface {
|
|
ReadPacket() (Packet, error)
|
|
}
|
|
|
|
// Muxer describes the steps of writing compressed audio/video packets into container formats like MP4/FLV/MPEG-TS.
|
|
//
|
|
// Container formats, rtmp.Conn, and transcode.Muxer implements Muxer interface.
|
|
type Muxer interface {
|
|
WriteHeader([]CodecData) error // write the file header
|
|
PacketWriter // write compressed audio/video packets
|
|
WriteTrailer() error // finish writing file, this func can be called only once
|
|
}
|
|
|
|
// Muxer with Close() method
|
|
type MuxCloser interface {
|
|
Muxer
|
|
Close() error
|
|
}
|
|
|
|
// Demuxer can read compressed audio/video packets from container formats like MP4/FLV/MPEG-TS.
|
|
type Demuxer interface {
|
|
PacketReader // read compressed audio/video packets
|
|
Streams() ([]CodecData, error) // reads the file header, contains video/audio meta infomations
|
|
}
|
|
|
|
// Demuxer with Close() method
|
|
type DemuxCloser interface {
|
|
Demuxer
|
|
Close() error
|
|
}
|
|
|
|
const (
|
|
I_FRAME = byte(0)
|
|
P_FRAME = byte(100)
|
|
B_FRAME = byte(101)
|
|
)
|
|
|
|
// Packet stores compressed audio/video data.
|
|
type Packet struct {
|
|
IsKeyFrame bool // video packet is key frame
|
|
FrameType byte // video packet is key frame
|
|
Idx int8 // stream index in container format
|
|
CompositionTime time.Duration // packet presentation time minus decode time for H264 B-Frame
|
|
Time time.Duration // packet decode time
|
|
Data []byte // packet data
|
|
}
|
|
|
|
// Raw audio frame.
|
|
type AudioFrame struct {
|
|
SampleFormat SampleFormat // audio sample format, e.g: S16,FLTP,...
|
|
ChannelLayout ChannelLayout // audio channel layout, e.g: CH_MONO,CH_STEREO,...
|
|
SampleCount int // sample count in this frame
|
|
SampleRate int // sample rate
|
|
Data [][]byte // data array for planar format len(Data) > 1
|
|
}
|
|
|
|
func (self AudioFrame) Duration() time.Duration {
|
|
return time.Second * time.Duration(self.SampleCount) / time.Duration(self.SampleRate)
|
|
}
|
|
|
|
// Check this audio frame has same format as other audio frame.
|
|
func (self AudioFrame) HasSameFormat(other AudioFrame) bool {
|
|
if self.SampleRate != other.SampleRate {
|
|
return false
|
|
}
|
|
if self.ChannelLayout != other.ChannelLayout {
|
|
return false
|
|
}
|
|
if self.SampleFormat != other.SampleFormat {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
// Split sample audio sample from this frame.
|
|
func (self AudioFrame) Slice(start int, end int) (out AudioFrame) {
|
|
if start > end {
|
|
panic(fmt.Sprintf("av: AudioFrame split failed start=%d end=%d invalid", start, end))
|
|
}
|
|
out = self
|
|
out.Data = append([][]byte(nil), out.Data...)
|
|
out.SampleCount = end - start
|
|
size := self.SampleFormat.BytesPerSample()
|
|
for i := range out.Data {
|
|
out.Data[i] = out.Data[i][start*size : end*size]
|
|
}
|
|
return
|
|
}
|
|
|
|
// Concat two audio frames.
|
|
func (self AudioFrame) Concat(in AudioFrame) (out AudioFrame) {
|
|
out = self
|
|
out.Data = append([][]byte(nil), out.Data...)
|
|
out.SampleCount += in.SampleCount
|
|
for i := range out.Data {
|
|
out.Data[i] = append(out.Data[i], in.Data[i]...)
|
|
}
|
|
return
|
|
}
|
|
|
|
// AudioEncoder can encode raw audio frame into compressed audio packets.
|
|
// cgo/ffmpeg inplements AudioEncoder, using ffmpeg.NewAudioEncoder to create it.
|
|
type AudioEncoder interface {
|
|
CodecData() (AudioCodecData, error) // encoder's codec data can put into container
|
|
Encode(AudioFrame) ([][]byte, error) // encode raw audio frame into compressed pakcet(s)
|
|
Close() // close encoder, free cgo contexts
|
|
SetSampleRate(int) error // set encoder sample rate
|
|
SetChannelLayout(ChannelLayout) error // set encoder channel layout
|
|
SetSampleFormat(SampleFormat) error // set encoder sample format
|
|
SetBitrate(int) error // set encoder bitrate
|
|
SetOption(string, interface{}) error // encoder setopt, in ffmpeg is av_opt_set_dict()
|
|
GetOption(string, interface{}) error // encoder getopt
|
|
}
|
|
|
|
// AudioDecoder can decode compressed audio packets into raw audio frame.
|
|
// use ffmpeg.NewAudioDecoder to create it.
|
|
type AudioDecoder interface {
|
|
Decode([]byte) (bool, AudioFrame, error) // decode one compressed audio packet
|
|
Close() // close decode, free cgo contexts
|
|
}
|
|
|
|
// AudioResampler can convert raw audio frames in different sample rate/format/channel layout.
|
|
type AudioResampler interface {
|
|
Resample(AudioFrame) (AudioFrame, error) // convert raw audio frames
|
|
}
|