Download a file in chunks concurrently

Use Transfer Manager to download a single large file in chunks, with concurrency.

Explore further

For detailed documentation that includes this code sample, see the following:

Sliced object downloads

Code sample

Go

For more information, see the Cloud Storage Go API reference documentation .

To authenticate to Cloud Storage, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  package 
  
 transfermanager 
 import 
  
 ( 
  
 "context" 
  
 "fmt" 
  
 "io" 
  
 "os" 
  
 "cloud.google.com/go/storage" 
  
 "cloud.google.com/go/storage/transfermanager" 
 ) 
 // downloadChunksConcurrently downloads a single file in chunks, concurrently in a process pool. 
 func 
  
 downloadChunksConcurrently 
 ( 
 w 
  
 io 
 . 
  Writer 
 
 , 
  
 bucketName 
 , 
  
 blobName 
 , 
  
 filename 
  
 string 
 ) 
  
 error 
  
 { 
  
 // bucketName := "your-bucket-name" 
  
 // blobName := "target-file" 
  
 // filename := "path/to/your/local/file.txt" 
  
 // The chunkSize is the size of each chunk to be downloaded. 
  
 // The performance impact of this value depends on the use case. 
  
 // For example, for a slow network, using a smaller chunkSize may be better. 
  
 // Providing this parameter is optional and the default value is 32 MiB. 
  
 chunkSize 
  
 := 
  
 16 
  
 * 
  
 1024 
  
 * 
  
 1024 
  
 // 16 MiB 
  
 // The maximum number of workers to use for the operation. 
  
 // Please note, providing this parameter is optional. 
  
 // The performance impact of this value depends on the use case. 
  
 // To download one large file, the default value: NumCPU / 2 is usually fine. 
  
 workers 
  
 := 
  
 8 
  
 ctx 
  
 := 
  
 context 
 . 
 Background 
 () 
  
 client 
 , 
  
 err 
  
 := 
  
 storage 
 . 
 NewClient 
 ( 
 ctx 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "storage.NewClient: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 defer 
  
 client 
 . 
 Close 
 () 
  
 d 
 , 
  
 err 
  
 := 
  
 transfermanager 
 . 
 NewDownloader 
 ( 
 client 
 , 
  
 transfermanager 
 . 
 WithPartSize 
 ( 
 int64 
 ( 
 chunkSize 
 )), 
  
 transfermanager 
 . 
 WithWorkers 
 ( 
 workers 
 )) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "transfermanager.NewDownloader: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 f 
 , 
  
 err 
  
 := 
  
 os 
 . 
  Create 
 
 ( 
 filename 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "os.Create: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 defer 
  
 f 
 . 
 Close 
 () 
  
 in 
  
 := 
  
& transfermanager 
 . 
 DownloadObjectInput 
 { 
  
 Bucket 
 : 
  
 bucketName 
 , 
  
 Object 
 : 
  
 blobName 
 , 
  
 Destination 
 : 
  
 f 
 , 
  
 } 
  
 if 
  
 err 
  
 := 
  
 d 
 . 
 DownloadObject 
 ( 
 ctx 
 , 
  
 in 
 ); 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "d.DownloadObject: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 // Wait for all downloads to complete and close the downloader. 
  
 // This allows to synchronize the download processes. 
  
 results 
 , 
  
 err 
  
 := 
  
 d 
 . 
 WaitAndClose 
 () 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "d.WaitAndClose: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 // Process the downloader result. 
  
 if 
  
 len 
 ( 
 results 
 ) 
  
 != 
  
 1 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "expected 1 result, got %d" 
 , 
  
 len 
 ( 
 results 
 )) 
  
 } 
  
 result 
  
 := 
  
 results 
 [ 
 0 
 ] 
  
 if 
  
 result 
 . 
 Err 
  
 != 
  
 nil 
  
 { 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
 "download of %v failed with error %v\n" 
 , 
  
 result 
 . 
  Object 
 
 , 
  
 result 
 . 
 Err 
 ) 
  
 return 
  
 result 
 . 
 Err 
  
 } 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
 "Downloaded %v to %v.\n" 
 , 
  
 blobName 
 , 
  
 filename 
 ) 
  
 return 
  
 nil 
 }

Java

For more information, see the Cloud Storage Java API reference documentation .

To authenticate to Cloud Storage, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  import 
  
 com.google.cloud.storage. BlobInfo 
 
 ; 
 import 
  
 com.google.cloud.storage.transfermanager. DownloadResult 
 
 ; 
 import 
  
 com.google.cloud.storage.transfermanager. ParallelDownloadConfig 
 
 ; 
 import 
  
 com.google.cloud.storage.transfermanager. TransferManager 
 
 ; 
 import 
  
 com.google.cloud.storage.transfermanager. TransferManagerConfig 
 
 ; 
 import 
  
 java.nio.file.Path 
 ; 
 import 
  
 java.util.List 
 ; 
 class 
 AllowDivideAndConquerDownload 
  
 { 
  
 public 
  
 static 
  
 void 
  
 divideAndConquerDownloadAllowed 
 ( 
  
 List<BlobInfo> 
  
 blobs 
 , 
  
 String 
  
 bucketName 
 , 
  
 Path 
  
 destinationDirectory 
 ) 
  
 { 
  
  TransferManager 
 
  
 transferManager 
  
 = 
  
  TransferManagerConfig 
 
 . 
 newBuilder 
 () 
  
 . 
 setAllowDivideAndConquerDownload 
 ( 
 true 
 ) 
  
 . 
 build 
 () 
  
 . 
 getService 
 (); 
  
  ParallelDownloadConfig 
 
  
 parallelDownloadConfig 
  
 = 
  
  ParallelDownloadConfig 
 
 . 
 newBuilder 
 () 
  
 . 
 setBucketName 
 ( 
 bucketName 
 ) 
  
 . 
 setDownloadDirectory 
 ( 
 destinationDirectory 
 ) 
  
 . 
 build 
 (); 
  
 List<DownloadResult> 
  
 results 
  
 = 
  
 transferManager 
 . 
  downloadBlobs 
 
 ( 
 blobs 
 , 
  
 parallelDownloadConfig 
 ). 
 getDownloadResults 
 (); 
  
 for 
  
 ( 
  DownloadResult 
 
  
 result 
  
 : 
  
 results 
 ) 
  
 { 
  
 System 
 . 
 out 
 . 
 println 
 ( 
  
 "Download of " 
  
 + 
  
  result 
 
 . 
 getInput 
 (). 
 getName 
 () 
  
 + 
  
 " completed with status " 
  
 + 
  
  result 
 
 . 
 getStatus 
 ()); 
  
 } 
  
 } 
 }

Node.js

For more information, see the Cloud Storage Node.js API reference documentation .

To authenticate to Cloud Storage, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  /** 
 * TODO(developer): Uncomment the following lines before running the sample. 
 */ 
 // The ID of your GCS bucket 
 // const bucketName = 'your-unique-bucket-name'; 
 // The ID of the GCS file to download 
 // const fileName = 'your-file-name'; 
 // The path to which the file should be downloaded 
 // const destFileName = '/local/path/to/file.txt'; 
 // The size of each chunk to be downloaded 
 // const chunkSize = 1024; 
 // Imports the Google Cloud client library 
 const 
  
 { 
 Storage 
 , 
  
 TransferManager 
 } 
  
 = 
  
 require 
 ( 
 ' @google-cloud/storage 
' 
 ); 
 // Creates a client 
 const 
  
 storage 
  
 = 
  
 new 
  
 Storage 
 (); 
 // Creates a transfer manager client 
 const 
  
 transferManager 
  
 = 
  
 new 
  
  TransferManager 
 
 ( 
 storage 
 . 
 bucket 
 ( 
 bucketName 
 )); 
 async 
  
 function 
  
 downloadFileInChunksWithTransferManager 
 () 
  
 { 
  
 // Downloads the files 
  
 await 
  
 transferManager 
 . 
  downloadFileInChunks 
 
 ( 
 fileName 
 , 
  
 { 
  
 destination 
 : 
  
 destFileName 
 , 
  
 chunkSizeBytes 
 : 
  
 chunkSize 
 , 
  
 }); 
  
 console 
 . 
 log 
 ( 
  
 `gs:// 
 ${ 
 bucketName 
 } 
 / 
 ${ 
 fileName 
 } 
 downloaded to 
 ${ 
 destFileName 
 } 
 .` 
  
 ); 
 } 
 downloadFileInChunksWithTransferManager 
 (). 
 catch 
 ( 
 console 
 . 
 error 
 );

Python

For more information, see the Cloud Storage Python API reference documentation .

To authenticate to Cloud Storage, set up Application Default Credentials. For more information, see Set up authentication for client libraries .

  def 
  
 download_chunks_concurrently 
 ( 
 bucket_name 
 , 
 blob_name 
 , 
 filename 
 , 
 chunk_size 
 = 
 32 
 * 
 1024 
 * 
 1024 
 , 
 workers 
 = 
 8 
 ): 
  
 """Download a single file in chunks, concurrently in a process pool.""" 
 # The ID of your GCS bucket 
 # bucket_name = "your-bucket-name" 
 # The file to be downloaded 
 # blob_name = "target-file" 
 # The destination filename or path 
 # filename = "" 
 # The size of each chunk. The performance impact of this value depends on 
 # the use case. The remote service has a minimum of 5 MiB and a maximum of 
 # 5 GiB. 
 # chunk_size = 32 * 1024 * 1024 (32 MiB) 
 # The maximum number of processes to use for the operation. The performance 
 # impact of this value depends on the use case, but smaller files usually 
 # benefit from a higher number of processes. Each additional process occupies 
 # some CPU and memory resources until finished. Threads can be used instead 
 # of processes by passing `worker_type=transfer_manager.THREAD`. 
 # workers=8 
 from 
  
 google.cloud.storage 
  
 import 
  Client 
 
 , 
  transfer_manager 
 
 storage_client 
 = 
 Client 
 () 
 bucket 
 = 
 storage_client 
 . 
  bucket 
 
 ( 
 bucket_name 
 ) 
 blob 
 = 
 bucket 
 . 
 blob 
 ( 
 blob_name 
 ) 
  transfer_manager 
 
 . 
  download_chunks_concurrently 
 
 ( 
 blob 
 , 
 filename 
 , 
 chunk_size 
 = 
 chunk_size 
 , 
 max_workers 
 = 
 workers 
 ) 
 print 
 ( 
 "Downloaded 
 {} 
 to 
 {} 
 ." 
 . 
 format 
 ( 
 blob_name 
 , 
 filename 
 ))

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .