package storage import ( "bytes" "context" "errors" "io" "strconv" "sync" api "github.com/qiniu/go-sdk/v7" ) // 分片上传过程中可能遇到的错误 var ErrUnmatchedChecksum = errors.New("unmatched checksum") const ( // 获取下一个分片Reader失败 ErrNextReader = "ErrNextReader" // 超过了最大的重试上传次数 ErrMaxUpRetry = "ErrMaxUpRetry" ) const ( blockBits = 22 blockMask = (1 << blockBits) - 1 blockSize = 1 << blockBits uploadRecordVersion = "1.0.0" ) // Settings 为分片上传设置 type Settings struct { TaskQsize int // 可选。任务队列大小。为 0 表示取 Workers * 4。 Workers int // 并行 Goroutine 数目。 ChunkSize int // 默认的Chunk大小,不设定则为4M(仅在分片上传 v1 中使用) PartSize int64 // 默认的Part大小,不设定则为4M(仅在分片上传 v2 中使用) TryTimes int // 默认的尝试次数,不设定则为3 } // 分片上传默认参数设置 const ( defaultWorkers = 4 // 默认的并发上传的块数量 defaultChunkSize = 4 * 1024 * 1024 // 默认的分片大小,4MB(仅在分片上传 v1 中使用) defaultPartSize = 4 * 1024 * 1024 // 默认的分片大小,4MB(仅在分片上传 v2 中使用) defaultTryTimes = 3 // mkblk / bput / uploadParts 失败重试次数 ) // 分片上传的默认设置 var settings = Settings{ TaskQsize: defaultWorkers * 4, Workers: defaultWorkers, ChunkSize: defaultChunkSize, PartSize: defaultPartSize, TryTimes: defaultTryTimes, } // SetSettings 可以用来设置分片上传参数 func SetSettings(v *Settings) { settings = *v if settings.Workers == 0 { settings.Workers = defaultWorkers } if settings.TaskQsize == 0 { settings.TaskQsize = settings.Workers * 4 } if settings.ChunkSize == 0 { settings.ChunkSize = defaultChunkSize } if settings.PartSize == 0 { settings.PartSize = defaultPartSize } if settings.TryTimes == 0 { settings.TryTimes = defaultTryTimes } } var ( tasks chan func() once sync.Once ) func worker(tasks chan func()) { for task := range tasks { task() } } func _initWorkers() { tasks = make(chan func(), settings.TaskQsize) for i := 0; i < settings.Workers; i++ { go worker(tasks) } } func initWorkers() { once.Do(_initWorkers) } // 代表一块分片的基本信息 type ( chunk struct { id int64 offset int64 size int64 reader io.ReaderAt } // 代表一块分片的上传错误信息 chunkError struct { chunk err error } // 通用分片上传接口,同时适用于分片上传 v1 和 v2 接口 resumeUploaderBase interface { // 开始上传前调用一次用于初始化,在 v1 中该接口不做任何事情,而在 v2 中该接口对应 initParts initUploader(context.Context) ([]int64, error) // 上传实际的分片数据,允许并发上传。在 v1 中该接口对应 mkblk 和 bput 组合,而在 v2 中该接口对应 uploadParts uploadChunk(context.Context, chunk) error // 上传所有分片后调用一次用于结束上传,在 v1 中该接口对应 mkfile,而在 v2 中该接口对应 completeParts final(context.Context) error } // 将已知数据流大小的情况和未知数据流大小的情况抽象成一个接口 chunkReader interface { readChunks([]int64, func(chunkID, off, size int64, reader io.ReaderAt) error) error } // 已知数据流大小的情况下读取数据流 unsizedChunkReader struct { body io.Reader blockSize int64 } // 未知数据流大小的情况下读取数据流 sizedChunkReader struct { body io.ReaderAt totalSize int64 blockSize int64 } ) // 使用并发 Goroutine 上传数据 func uploadByWorkers(uploader resumeUploaderBase, ctx context.Context, body chunkReader) error { initWorkers() recovered, iErr := uploader.initUploader(ctx) if iErr != nil { return iErr } // 读取 Chunk 并创建任务 var uErr error var locker = sync.Mutex{} var waiter = sync.WaitGroup{} rErr := body.readChunks(recovered, func(chunkID, off, size int64, reader io.ReaderAt) error { locker.Lock() if uErr != nil { locker.Unlock() return uErr } locker.Unlock() waiter.Add(1) tasks <- func() { pErr := uploader.uploadChunk(ctx, chunk{ id: chunkID, offset: off, size: size, reader: reader, }) locker.Lock() if uErr == nil { uErr = pErr } locker.Unlock() waiter.Done() } return nil }) waiter.Wait() if rErr != nil { return rErr } if uErr != nil { return uErr } return uploader.final(ctx) } func newUnsizedChunkReader(body io.Reader, blockSize int64) *unsizedChunkReader { return &unsizedChunkReader{body: body, blockSize: blockSize} } func (r *unsizedChunkReader) readChunks(recovered []int64, f func(chunkID, off, size int64, reader io.ReaderAt) error) error { var ( lastChunk = false chunkID int64 = 0 off int64 = 0 chunkSize int err error recoveredMap = make(map[int64]struct{}, len(recovered)) ) for _, roff := range recovered { recoveredMap[roff] = struct{}{} } for !lastChunk { buf := make([]byte, r.blockSize) if chunkSize, err = io.ReadFull(r.body, buf); err != nil { switch err { case io.EOF: lastChunk = true return nil case io.ErrUnexpectedEOF: buf = buf[:chunkSize] lastChunk = true default: return api.NewError(ErrNextReader, err.Error()) } } if _, ok := recoveredMap[off]; !ok { if err = f(chunkID, off, int64(len(buf)), bytes.NewReader(buf)); err != nil { return err } } chunkID += 1 off += int64(chunkSize) } return nil } func newSizedChunkReader(body io.ReaderAt, totalSize, blockSize int64) *sizedChunkReader { return &sizedChunkReader{body: body, totalSize: totalSize, blockSize: blockSize} } func (r *sizedChunkReader) readChunks(recovered []int64, f func(chunkID, off, size int64, reader io.ReaderAt) error) error { var ( chunkID int64 = 0 off int64 = 0 err error recoveredMap = make(map[int64]struct{}, len(recovered)) ) for _, roff := range recovered { recoveredMap[roff] = struct{}{} } for off < r.totalSize { shouldRead := r.totalSize - off if shouldRead > r.blockSize { shouldRead = r.blockSize } if _, ok := recoveredMap[off]; !ok { if err = f(chunkID, off, shouldRead, io.NewSectionReader(r.body, off, shouldRead)); err != nil { return err } } off += shouldRead chunkID += 1 } return nil } func getRecorderKey(recorder Recorder, upToken string, key string, resumeVersion string, partSize int64, fileDetails *fileDetailsInfo) string { if recorder == nil || fileDetails == nil || len(upToken) == 0 { return "" } accessKey, bucket, err := getAkBucketFromUploadToken(upToken) if err != nil { return "" } return recorder.GenerateRecorderKey( []string{accessKey, bucket, key, resumeVersion, fileDetails.fileFullPath, strconv.FormatInt(partSize, 10)}, fileDetails.fileInfo) }