goamazon-s3

How to perform AWS S3 Multipart Copy with golang


I was looking at the AWS golang documentation for S3 Copy Object function, it contains the following details for handling large file uploads

However, to copy an object greater >than 5 GB, you must use the multipart upload Upload Part - >Copy API. For more information, see Copy Object Using the REST Multipart Upload API >(https://docs.aws.amazon.com/AmazonS3/latest/dev/CopyingObjctsUsingRESTMPUapi.html).

When I follow that link, it only contains code examples for Java and .Net Only Java and .Net examples

Am I missing some documentation/example somewhere showing how to copy an existing large file in S3 using the golang client?


Solution

  • So it took some experimenting but I finally got the multipart copy working

    //imports
    import (
      "context"
      "strconv"
      "github.com/aws/aws-sdk-go/service/s3"
      log "github.com/sirupsen/logrus"
    )
    
    //constant for number of bits in 5 megabyte chunk
    const max_part_size = 5 * 1024 * 1024
    
    //helper function to build the string for the range of bits to copy
    func buildCopySourceRange(start int64, objectSize int64) string {
        end := start + max_part_size - 1
        if end > objectSize {
            end = objectSize - 1
        }
        startRange := strconv.FormatInt(start, 10)
        stopRange := strconv.FormatInt(end, 10)
        return "bytes=" + startRange + "-" + stopRange
    }
    
    //function that starts, perform each part upload, and completes the copy
    func MultiPartCopy(sess *session.Session, sourceBucket string, sourceKey string, destBucket string, destKey string) error {
        svc := s3.New(sess)
    
        ctx, cancelFn := context.WithTimeout(context.TODO(), 10*time.Minute)
        defer cancelFn()
    
        //struct for starting a multipart upload
        startInput := s3.CreateMultipartUploadInput{
            Bucket: &destBucket,
            Key:    &destKey,
        }
    
        //send command to start copy and get the upload id as it is needed later
        var uploadId string
        createOutput, err := svc.CreateMultipartUploadWithContext(ctx, &startInput)
        if err != nil {
            return err
        }
        if createOutput != nil {
            if createOutput.UploadId != nil {
                uploadId = *createOutput.UploadId
            }
        }
        if uploadId == "" {
            return errors.New("No upload id found in start upload request")
        }
    
        var i int64
        var partNumber int64 = 1
        copySource := "/" + sourceBucket + "/" + sourceKey
        parts := make([]*s3.CompletedPart, 0)
        numUploads := fileSize / max_part_size
        log.Infof("Will attempt upload in %d number of parts to %s", numUploads, destKey)
        for i = 0; i < fileSize; i += max_part_size {
            copyRange := buildCopySourceRange(i, fileSize)
            partInput := s3.UploadPartCopyInput{
                Bucket:          &destBucket,
                CopySource:      &copySource,
                CopySourceRange: &copyRange,
                Key:             &destKey,
                PartNumber:      &partNumber,
                UploadId:        &uploadId,
            }
            log.Debugf("Attempting to upload part %d range: %s", partNumber, copyRange)
            partResp, err := svc.UploadPartCopy(&partInput)
    
            if err != nil {
                log.Error("Attempting to abort upload")
                abortIn := s3.AbortMultipartUploadInput{
                    UploadId: &uploadId,
                }
                //ignoring any errors with aborting the copy
                svc.AbortMultipartUploadRequest(&abortIn)
                return fmt.Errorf("Error uploading part %d : %w", partNumber, err)
            }
    
            //copy etag and part number from response as it is needed for completion
            if partResp != nil {
                partNum := partNumber
                etag := strings.Trim(*partResp.CopyPartResult.ETag, "\"")
                cPart := s3.CompletedPart{
                    ETag:       &etag,
                    PartNumber: &partNum,
                }
                parts = append(parts, &cPart)
                log.Debugf("Successfully upload part %d of %s", partNumber, uploadId)
            }
            partNumber++
            if partNumber%50 == 0 {
                log.Infof("Completed part %d of %d to %s", partNumber, numUploads, destKey)
            }
        }
    
        //create struct for completing the upload
        mpu := s3.CompletedMultipartUpload{
            Parts: parts,
        }
    
        //complete actual upload
        //does not actually copy if the complete command is not received
        complete := s3.CompleteMultipartUploadInput{
            Bucket:          &destBucket,
            Key:             &destKey,
            UploadId:        &uploadId,
            MultipartUpload: &mpu,
        }
        compOutput, err := svc.CompleteMultipartUpload(&complete)
        if err != nil {
            return fmt.Errorf("Error completing upload: %w", err)
        }
        if compOutput != nil {
            log.Infof("Successfully copied Bucket: %s Key: %s to Bucket: %s Key: %s", sourceBucket, sourceKey, destBucket, destKey)
        }
        return nil
    }