AWS – Update massively metadata using boto (python) on multiple S3 objects

A simple script that allows you to update massively Content-Type for files on an S3 bucket.
This script is able to:

  • Browse recursively a bucket
  • Perform action only on files matching specific prefix
  • Auto-detect type of file depending on its extension

Obviously, you can add any extension you want to the function update_md to handle more if needed.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os, re, sys
import boto
from boto.s3.connection import S3Connection
from boto.s3.key import Key

# Variables
AWS_ACCESS_KEY_ID     = 'YOUR_AWS_ACCESS_KEY_ID'
AWS_SECRET_ACCESS_KEY = 'YOUR_AWS_SECRET_ACCESS_KEY'
AWS_BUCKET_NAME       = 'YOUR_BUCKET_NAME'

# Function to update MetaData
def update_md(k):
    """
    Update the metadata with an existing object.
    """
    # Get extension
    ext = k.name.split('.')[-1]
    if ext in ['bmp','BMP']:
        metadata = {'Content-Type':'image/bmp'}
    elif ext in ['jpg','jpeg','JPG','JPEG']:
        metadata = {'Content-Type':'image/jpeg'}
    elif ext in ['gif','GIF']:
        metadata = {'Content-Type':'image/gif'}
    elif ext in ['png','PNG']:
        metadata = {'Content-Type':'image/png'}
    elif ext in ['pdf','PDF']:
        metadata = {'Content-Type':'application/pdf'}
    elif ext in ['txt','TXT']:
        metadata = {'Content-Type':'text/plain'}
    elif ext in ['zip','ZIP']:
        metadata = {'Content-Type':'application/zip'}
    else:
        return
    # If not same type -- update
    if metadata['Content-Type'] != akey.content_type:
      akey.copy(AWS_BUCKET_NAME, k.name, metadata, preserve_acl=True)
    return k

# Main function
if __name__ == '__main__':
    # Connect to S3
    conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
    b = conn.get_bucket(AWS_BUCKET_NAME)

    # Select files to parse (prefix can be specified)
    rs = b.list(prefix="")

    # Browse files
    for k in rs:
        print k.name
        akey = b.get_key(k.name)
        # Print type before
        print "Before:",akey.content_type
        try:
            k = update_md(k)
            akey = b.get_key(k.name)
            print "After: ",akey.content_type
        except Exception,e:
            print "Content-Type not handled by this script"

    print "Script finished!"