diff options
Diffstat (limited to 'vendor/github.com/abema/go-mp4/probe.go')
-rw-r--r-- | vendor/github.com/abema/go-mp4/probe.go | 673 |
1 files changed, 673 insertions, 0 deletions
diff --git a/vendor/github.com/abema/go-mp4/probe.go b/vendor/github.com/abema/go-mp4/probe.go new file mode 100644 index 000000000..812be5bb6 --- /dev/null +++ b/vendor/github.com/abema/go-mp4/probe.go @@ -0,0 +1,673 @@ +package mp4 + +import ( + "bytes" + "errors" + "io" + + "github.com/abema/go-mp4/bitio" +) + +type ProbeInfo struct { + MajorBrand [4]byte + MinorVersion uint32 + CompatibleBrands [][4]byte + FastStart bool + Timescale uint32 + Duration uint64 + Tracks Tracks + Segments Segments +} + +// Deprecated: replace with ProbeInfo +type FraProbeInfo = ProbeInfo + +type Tracks []*Track + +// Deprecated: replace with Track +type TrackInfo = Track + +type Track struct { + TrackID uint32 + Timescale uint32 + Duration uint64 + Codec Codec + Encrypted bool + EditList EditList + Samples Samples + Chunks Chunks + AVC *AVCDecConfigInfo + MP4A *MP4AInfo +} + +type Codec int + +const ( + CodecUnknown Codec = iota + CodecAVC1 + CodecMP4A +) + +type EditList []*EditListEntry + +type EditListEntry struct { + MediaTime int64 + SegmentDuration uint64 +} + +type Samples []*Sample + +type Sample struct { + Size uint32 + TimeDelta uint32 + CompositionTimeOffset int64 +} + +type Chunks []*Chunk + +type Chunk struct { + DataOffset uint32 + SamplesPerChunk uint32 +} + +type AVCDecConfigInfo struct { + ConfigurationVersion uint8 + Profile uint8 + ProfileCompatibility uint8 + Level uint8 + LengthSize uint16 + Width uint16 + Height uint16 +} + +type MP4AInfo struct { + OTI uint8 + AudOTI uint8 + ChannelCount uint16 +} + +type Segments []*Segment + +// Deprecated: replace with Segment +type SegmentInfo = Segment + +type Segment struct { + TrackID uint32 + MoofOffset uint64 + BaseMediaDecodeTime uint64 + DefaultSampleDuration uint32 + SampleCount uint32 + Duration uint32 + CompositionTimeOffset int32 + Size uint32 +} + +// Probe probes MP4 file +func Probe(r io.ReadSeeker) (*ProbeInfo, error) { + probeInfo := &ProbeInfo{ + Tracks: make([]*Track, 0, 8), + Segments: make([]*Segment, 0, 8), + } + bis, err := ExtractBoxes(r, nil, []BoxPath{ + {BoxTypeFtyp()}, + {BoxTypeMoov()}, + {BoxTypeMoov(), BoxTypeMvhd()}, + {BoxTypeMoov(), BoxTypeTrak()}, + {BoxTypeMoof()}, + {BoxTypeMdat()}, + }) + if err != nil { + return nil, err + } + var mdatAppeared bool + for _, bi := range bis { + switch bi.Type { + case BoxTypeFtyp(): + var ftyp Ftyp + if _, err := bi.SeekToPayload(r); err != nil { + return nil, err + } + if _, err := Unmarshal(r, bi.Size-bi.HeaderSize, &ftyp, bi.Context); err != nil { + return nil, err + } + probeInfo.MajorBrand = ftyp.MajorBrand + probeInfo.MinorVersion = ftyp.MinorVersion + probeInfo.CompatibleBrands = make([][4]byte, 0, len(ftyp.CompatibleBrands)) + for _, entry := range ftyp.CompatibleBrands { + probeInfo.CompatibleBrands = append(probeInfo.CompatibleBrands, entry.CompatibleBrand) + } + case BoxTypeMoov(): + probeInfo.FastStart = !mdatAppeared + case BoxTypeMvhd(): + var mvhd Mvhd + if _, err := bi.SeekToPayload(r); err != nil { + return nil, err + } + if _, err := Unmarshal(r, bi.Size-bi.HeaderSize, &mvhd, bi.Context); err != nil { + return nil, err + } + probeInfo.Timescale = mvhd.Timescale + if mvhd.GetVersion() == 0 { + probeInfo.Duration = uint64(mvhd.DurationV0) + } else { + probeInfo.Duration = mvhd.DurationV1 + } + case BoxTypeTrak(): + track, err := probeTrak(r, bi) + if err != nil { + return nil, err + } + probeInfo.Tracks = append(probeInfo.Tracks, track) + case BoxTypeMoof(): + segment, err := probeMoof(r, bi) + if err != nil { + return nil, err + } + probeInfo.Segments = append(probeInfo.Segments, segment) + case BoxTypeMdat(): + mdatAppeared = true + } + } + return probeInfo, nil +} + +// ProbeFra probes fragmented MP4 file +// Deprecated: replace with Probe +func ProbeFra(r io.ReadSeeker) (*FraProbeInfo, error) { + probeInfo, err := Probe(r) + return (*FraProbeInfo)(probeInfo), err +} + +func probeTrak(r io.ReadSeeker, bi *BoxInfo) (*Track, error) { + track := new(Track) + + bips, err := ExtractBoxesWithPayload(r, bi, []BoxPath{ + {BoxTypeTkhd()}, + {BoxTypeEdts(), BoxTypeElst()}, + {BoxTypeMdia(), BoxTypeMdhd()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeAvc1()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeAvc1(), BoxTypeAvcC()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeEncv()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeEncv(), BoxTypeAvcC()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeMp4a()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeMp4a(), BoxTypeEsds()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeMp4a(), BoxTypeWave(), BoxTypeEsds()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeEnca()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsd(), BoxTypeEnca(), BoxTypeEsds()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStco()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStts()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeCtts()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsc()}, + {BoxTypeMdia(), BoxTypeMinf(), BoxTypeStbl(), BoxTypeStsz()}, + }) + if err != nil { + return nil, err + } + var tkhd *Tkhd + var elst *Elst + var mdhd *Mdhd + var avc1 *VisualSampleEntry + var avcC *AVCDecoderConfiguration + var audioSampleEntry *AudioSampleEntry + var esds *Esds + var stco *Stco + var stts *Stts + var stsc *Stsc + var ctts *Ctts + var stsz *Stsz + for _, bip := range bips { + switch bip.Info.Type { + case BoxTypeTkhd(): + tkhd = bip.Payload.(*Tkhd) + case BoxTypeElst(): + elst = bip.Payload.(*Elst) + case BoxTypeMdhd(): + mdhd = bip.Payload.(*Mdhd) + case BoxTypeAvc1(): + track.Codec = CodecAVC1 + avc1 = bip.Payload.(*VisualSampleEntry) + case BoxTypeAvcC(): + avcC = bip.Payload.(*AVCDecoderConfiguration) + case BoxTypeEncv(): + track.Codec = CodecAVC1 + track.Encrypted = true + case BoxTypeMp4a(): + track.Codec = CodecMP4A + audioSampleEntry = bip.Payload.(*AudioSampleEntry) + case BoxTypeEnca(): + track.Codec = CodecMP4A + track.Encrypted = true + audioSampleEntry = bip.Payload.(*AudioSampleEntry) + case BoxTypeEsds(): + esds = bip.Payload.(*Esds) + case BoxTypeStco(): + stco = bip.Payload.(*Stco) + case BoxTypeStts(): + stts = bip.Payload.(*Stts) + case BoxTypeStsc(): + stsc = bip.Payload.(*Stsc) + case BoxTypeCtts(): + ctts = bip.Payload.(*Ctts) + case BoxTypeStsz(): + stsz = bip.Payload.(*Stsz) + } + } + + if tkhd == nil { + return nil, errors.New("tkhd box not found") + } + track.TrackID = tkhd.TrackID + + if elst != nil { + editList := make([]*EditListEntry, 0, len(elst.Entries)) + for i := range elst.Entries { + editList = append(editList, &EditListEntry{ + MediaTime: elst.GetMediaTime(i), + SegmentDuration: elst.GetSegmentDuration(i), + }) + } + track.EditList = editList + } + + if mdhd == nil { + return nil, errors.New("mdhd box not found") + } + track.Timescale = mdhd.Timescale + track.Duration = mdhd.GetDuration() + + if avc1 != nil && avcC != nil { + track.AVC = &AVCDecConfigInfo{ + ConfigurationVersion: avcC.ConfigurationVersion, + Profile: avcC.Profile, + ProfileCompatibility: avcC.ProfileCompatibility, + Level: avcC.Level, + LengthSize: uint16(avcC.LengthSizeMinusOne) + 1, + Width: avc1.Width, + Height: avc1.Height, + } + } + + if audioSampleEntry != nil && esds != nil { + oti, audOTI, err := detectAACProfile(esds) + if err != nil { + return nil, err + } + track.MP4A = &MP4AInfo{ + OTI: oti, + AudOTI: audOTI, + ChannelCount: audioSampleEntry.ChannelCount, + } + } + + if stco == nil { + return nil, errors.New("stco box not found") + } + track.Chunks = make([]*Chunk, 0) + for _, offset := range stco.ChunkOffset { + track.Chunks = append(track.Chunks, &Chunk{ + DataOffset: offset, + }) + } + + if stts == nil { + return nil, errors.New("stts box not found") + } + track.Samples = make([]*Sample, 0) + for _, entry := range stts.Entries { + for i := uint32(0); i < entry.SampleCount; i++ { + track.Samples = append(track.Samples, &Sample{ + TimeDelta: entry.SampleDelta, + }) + } + } + + if stsc == nil { + return nil, errors.New("stsc box not found") + } + for si, entry := range stsc.Entries { + end := uint32(len(track.Chunks)) + if si != len(stsc.Entries)-1 && stsc.Entries[si+1].FirstChunk-1 < end { + end = stsc.Entries[si+1].FirstChunk - 1 + } + for ci := entry.FirstChunk - 1; ci < end; ci++ { + track.Chunks[ci].SamplesPerChunk = entry.SamplesPerChunk + } + } + + if ctts != nil { + var si uint32 + for ci, entry := range ctts.Entries { + for i := uint32(0); i < entry.SampleCount; i++ { + if si >= uint32(len(track.Samples)) { + break + } + track.Samples[si].CompositionTimeOffset = ctts.GetSampleOffset(ci) + si++ + } + } + } + + if stsz != nil { + for i := 0; i < len(stsz.EntrySize) && i < len(track.Samples); i++ { + track.Samples[i].Size = stsz.EntrySize[i] + } + } + + return track, nil +} + +func detectAACProfile(esds *Esds) (oti, audOTI uint8, err error) { + configDscr := findDescriptorByTag(esds.Descriptors, DecoderConfigDescrTag) + if configDscr == nil || configDscr.DecoderConfigDescriptor == nil { + return 0, 0, nil + } + if configDscr.DecoderConfigDescriptor.ObjectTypeIndication != 0x40 { + return configDscr.DecoderConfigDescriptor.ObjectTypeIndication, 0, nil + } + + specificDscr := findDescriptorByTag(esds.Descriptors, DecSpecificInfoTag) + if specificDscr == nil { + return 0, 0, errors.New("DecoderSpecificationInfoDescriptor not found") + } + + r := bitio.NewReader(bytes.NewReader(specificDscr.Data)) + remaining := len(specificDscr.Data) * 8 + + // audio object type + audioObjectType, read, err := getAudioObjectType(r) + if err != nil { + return 0, 0, err + } + remaining -= read + + // sampling frequency index + samplingFrequencyIndex, err := r.ReadBits(4) + if err != nil { + return 0, 0, err + } + remaining -= 4 + if samplingFrequencyIndex[0] == 0x0f { + if _, err = r.ReadBits(24); err != nil { + return 0, 0, err + } + remaining -= 24 + } + + if audioObjectType == 2 && remaining >= 20 { + if _, err = r.ReadBits(4); err != nil { + return 0, 0, err + } + remaining -= 4 + syncExtensionType, err := r.ReadBits(11) + if err != nil { + return 0, 0, err + } + remaining -= 11 + if syncExtensionType[0] == 0x2 && syncExtensionType[1] == 0xb7 { + extAudioObjectType, _, err := getAudioObjectType(r) + if err != nil { + return 0, 0, err + } + if extAudioObjectType == 5 || extAudioObjectType == 22 { + sbr, err := r.ReadBits(1) + if err != nil { + return 0, 0, err + } + remaining-- + if sbr[0] != 0 { + if extAudioObjectType == 5 { + sfi, err := r.ReadBits(4) + if err != nil { + return 0, 0, err + } + remaining -= 4 + if sfi[0] == 0xf { + if _, err := r.ReadBits(24); err != nil { + return 0, 0, err + } + remaining -= 24 + } + if remaining >= 12 { + syncExtensionType, err := r.ReadBits(11) + if err != nil { + return 0, 0, err + } + if syncExtensionType[0] == 0x5 && syncExtensionType[1] == 0x48 { + ps, err := r.ReadBits(1) + if err != nil { + return 0, 0, err + } + if ps[0] != 0 { + return 0x40, 29, nil + } + } + } + } + return 0x40, 5, nil + } + } + } + } + return 0x40, audioObjectType, nil +} + +func findDescriptorByTag(dscrs []Descriptor, tag int8) *Descriptor { + for _, dscr := range dscrs { + if dscr.Tag == tag { + return &dscr + } + } + return nil +} + +func getAudioObjectType(r bitio.Reader) (byte, int, error) { + audioObjectType, err := r.ReadBits(5) + if err != nil { + return 0, 0, err + } + if audioObjectType[0] != 0x1f { + return audioObjectType[0], 5, nil + } + audioObjectType, err = r.ReadBits(6) + if err != nil { + return 0, 0, err + } + return audioObjectType[0] + 32, 11, nil +} + +func probeMoof(r io.ReadSeeker, bi *BoxInfo) (*Segment, error) { + bips, err := ExtractBoxesWithPayload(r, bi, []BoxPath{ + {BoxTypeTraf(), BoxTypeTfhd()}, + {BoxTypeTraf(), BoxTypeTfdt()}, + {BoxTypeTraf(), BoxTypeTrun()}, + }) + if err != nil { + return nil, err + } + + var tfhd *Tfhd + var tfdt *Tfdt + var trun *Trun + + segment := &Segment{ + MoofOffset: bi.Offset, + } + for _, bip := range bips { + switch bip.Info.Type { + case BoxTypeTfhd(): + tfhd = bip.Payload.(*Tfhd) + case BoxTypeTfdt(): + tfdt = bip.Payload.(*Tfdt) + case BoxTypeTrun(): + trun = bip.Payload.(*Trun) + } + } + + if tfhd == nil { + return nil, errors.New("tfhd not found") + } + segment.TrackID = tfhd.TrackID + segment.DefaultSampleDuration = tfhd.DefaultSampleDuration + + if tfdt != nil { + if tfdt.Version == 0 { + segment.BaseMediaDecodeTime = uint64(tfdt.BaseMediaDecodeTimeV0) + } else { + segment.BaseMediaDecodeTime = tfdt.BaseMediaDecodeTimeV1 + } + } + + if trun != nil { + segment.SampleCount = trun.SampleCount + + if trun.CheckFlag(0x000100) { + segment.Duration = 0 + for ei := range trun.Entries { + segment.Duration += trun.Entries[ei].SampleDuration + } + } else { + segment.Duration = tfhd.DefaultSampleDuration * segment.SampleCount + } + + if trun.CheckFlag(0x000200) { + segment.Size = 0 + for ei := range trun.Entries { + segment.Size += trun.Entries[ei].SampleSize + } + } else { + segment.Size = tfhd.DefaultSampleSize * segment.SampleCount + } + + var duration uint32 + for ei := range trun.Entries { + offset := int32(duration) + int32(trun.GetSampleCompositionTimeOffset(ei)) + if ei == 0 || offset < segment.CompositionTimeOffset { + segment.CompositionTimeOffset = offset + } + if trun.CheckFlag(0x000100) { + duration += trun.Entries[ei].SampleDuration + } else { + duration += tfhd.DefaultSampleDuration + } + } + } + + return segment, nil +} + +func FindIDRFrames(r io.ReadSeeker, trackInfo *TrackInfo) ([]int, error) { + if trackInfo.AVC == nil { + return nil, nil + } + lengthSize := uint32(trackInfo.AVC.LengthSize) + + var si int + idxs := make([]int, 0, 8) + for _, chunk := range trackInfo.Chunks { + end := si + int(chunk.SamplesPerChunk) + dataOffset := chunk.DataOffset + for ; si < end && si < len(trackInfo.Samples); si++ { + sample := trackInfo.Samples[si] + if sample.Size == 0 { + continue + } + for nalOffset := uint32(0); nalOffset+lengthSize+1 <= sample.Size; { + if _, err := r.Seek(int64(dataOffset+nalOffset), io.SeekStart); err != nil { + return nil, err + } + data := make([]byte, lengthSize+1) + if _, err := io.ReadFull(r, data); err != nil { + return nil, err + } + var length uint32 + for i := 0; i < int(lengthSize); i++ { + length = (length << 8) + uint32(data[i]) + } + nalHeader := data[lengthSize] + nalType := nalHeader & 0x1f + if nalType == 5 { + idxs = append(idxs, si) + break + } + nalOffset += lengthSize + length + } + dataOffset += sample.Size + } + } + return idxs, nil +} + +func (samples Samples) GetBitrate(timescale uint32) uint64 { + var totalSize uint64 + var totalDuration uint64 + for _, sample := range samples { + totalSize += uint64(sample.Size) + totalDuration += uint64(sample.TimeDelta) + } + if totalDuration == 0 { + return 0 + } + return 8 * totalSize * uint64(timescale) / totalDuration +} + +func (samples Samples) GetMaxBitrate(timescale uint32, timeDelta uint64) uint64 { + if timeDelta == 0 { + return 0 + } + var maxBitrate uint64 + var size uint64 + var duration uint64 + var begin int + var end int + for end < len(samples) { + for { + size += uint64(samples[end].Size) + duration += uint64(samples[end].TimeDelta) + end++ + if duration >= timeDelta || end == len(samples) { + break + } + } + bitrate := 8 * size * uint64(timescale) / duration + if bitrate > maxBitrate { + maxBitrate = bitrate + } + for { + size -= uint64(samples[begin].Size) + duration -= uint64(samples[begin].TimeDelta) + begin++ + if duration < timeDelta { + break + } + } + } + return maxBitrate +} + +func (segments Segments) GetBitrate(trackID uint32, timescale uint32) uint64 { + var totalSize uint64 + var totalDuration uint64 + for _, segment := range segments { + if segment.TrackID == trackID { + totalSize += uint64(segment.Size) + totalDuration += uint64(segment.Duration) + } + } + if totalDuration == 0 { + return 0 + } + return 8 * totalSize * uint64(timescale) / totalDuration +} + +func (segments Segments) GetMaxBitrate(trackID uint32, timescale uint32) uint64 { + var maxBitrate uint64 + for _, segment := range segments { + if segment.TrackID == trackID && segment.Duration != 0 { + bitrate := 8 * uint64(segment.Size) * uint64(timescale) / uint64(segment.Duration) + if bitrate > maxBitrate { + maxBitrate = bitrate + } + } + } + return maxBitrate +} |