Upload all files in a directory

Use Transfer Manager to upload all of the files in a directory with concurrency.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

Java

For more information, see the Cloud Storage Java API reference documentation.

To authenticate to Cloud Storage, set up Application Default Credentials. For more information, see Set up authentication for client libraries.

import com.google.cloud.storage.transfermanager.ParallelUploadConfig;
import com.google.cloud.storage.transfermanager.TransferManager;
import com.google.cloud.storage.transfermanager.TransferManagerConfig;
import com.google.cloud.storage.transfermanager.UploadResult;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;

class UploadDirectory {

  public static void uploadDirectoryContents(String bucketName, Path sourceDirectory)
      throws IOException {
    TransferManager transferManager = TransferManagerConfig.newBuilder().build().getService();
    ParallelUploadConfig parallelUploadConfig =
        ParallelUploadConfig.newBuilder().setBucketName(bucketName).build();

    // Create a list to store the file paths
    List<Path> filePaths = new ArrayList<>();
    // Get all files in the directory
    // try-with-resource to ensure pathStream is closed
    try (Stream<Path> pathStream = Files.walk(sourceDirectory)) {
      pathStream.filter(Files::isRegularFile).forEach(filePaths::add);
    }
    List<UploadResult> results =
        transferManager.uploadFiles(filePaths, parallelUploadConfig).getUploadResults();
    for (UploadResult result : results) {
      System.out.println(
          "Upload for "
              + result.getInput().getName()
              + " completed with status "
              + result.getStatus());
    }
  }
}

Node.js

For more information, see the Cloud Storage Node.js API reference documentation.

To authenticate to Cloud Storage, set up Application Default Credentials. For more information, see Set up authentication for client libraries.

/**
 * TODO(developer): Uncomment the following lines before running the sample.
 */
// The ID of your GCS bucket
// const bucketName = 'your-unique-bucket-name';

// The local directory to upload
// const directoryName = 'your-directory';

// Imports the Google Cloud client library
const {Storage, TransferManager} = require('@google-cloud/storage');

// Creates a client
const storage = new Storage();

// Creates a transfer manager client
const transferManager = new TransferManager(storage.bucket(bucketName));

async function uploadDirectoryWithTransferManager() {
  // Uploads the directory
  await transferManager.uploadManyFiles(directoryName);

  console.log(`${directoryName} uploaded to ${bucketName}.`);
}

uploadDirectoryWithTransferManager().catch(console.error);

Python

For more information, see the Cloud Storage Python API reference documentation.

To authenticate to Cloud Storage, set up Application Default Credentials. For more information, see Set up authentication for client libraries.

def upload_directory_with_transfer_manager(bucket_name, source_directory, workers=8):
    """Upload every file in a directory, including all files in subdirectories.

    Each blob name is derived from the filename, not including the `directory`
    parameter itself. For complete control of the blob name for each file (and
    other aspects of individual blob metadata), use
    transfer_manager.upload_many() instead.
    """

    # The ID of your GCS bucket
    # bucket_name = "your-bucket-name"

    # The directory on your computer to upload. Files in the directory and its
    # subdirectories will be uploaded. An empty string means "the current
    # working directory".
    # source_directory=""

    # The maximum number of processes to use for the operation. The performance
    # impact of this value depends on the use case, but smaller files usually
    # benefit from a higher number of processes. Each additional process occupies
    # some CPU and memory resources until finished. Threads can be used instead
    # of processes by passing `worker_type=transfer_manager.THREAD`.
    # workers=8

    from pathlib import Path

    from google.cloud.storage import Client, transfer_manager

    storage_client = Client()
    bucket = storage_client.bucket(bucket_name)

    # Generate a list of paths (in string form) relative to the `directory`.
    # This can be done in a single list comprehension, but is expanded into
    # multiple lines here for clarity.

    # First, recursively get all files in `directory` as Path objects.
    directory_as_path_obj = Path(source_directory)
    paths = directory_as_path_obj.rglob("*")

    # Filter so the list only includes files, not directories themselves.
    file_paths = [path for path in paths if path.is_file()]

    # These paths are relative to the current working directory. Next, make them
    # relative to `directory`
    relative_paths = [path.relative_to(source_directory) for path in file_paths]

    # Finally, convert them all to strings.
    string_paths = [str(path) for path in relative_paths]

    print("Found {} files.".format(len(string_paths)))

    # Start the upload.
    results = transfer_manager.upload_many_from_filenames(
        bucket, string_paths, source_directory=source_directory, max_workers=workers
    )

    for name, result in zip(string_paths, results):
        # The results list is either `None` or an exception for each filename in
        # the input list, in order.

        if isinstance(result, Exception):
            print("Failed to upload {} due to exception: {}".format(name, result))
        else:
            print("Uploaded {} to {}.".format(name, bucket.name))

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser.