A simple script that allows you to update massively Content-Type for files on an S3 bucket.
This script is able to:
- Browse recursively a bucket
- Perform action only on files matching specific prefix
- Auto-detect type of file depending on its extension
Obviously, you can add any extension you want to the function update_md to handle more if needed.
#!/usr/bin/env python # -*- coding: utf-8 -*- import os, re, sys import boto from boto.s3.connection import S3Connection from boto.s3.key import Key # Variables AWS_ACCESS_KEY_ID = 'YOUR_AWS_ACCESS_KEY_ID' AWS_SECRET_ACCESS_KEY = 'YOUR_AWS_SECRET_ACCESS_KEY' AWS_BUCKET_NAME = 'YOUR_BUCKET_NAME' # Function to update MetaData def update_md(k): """ Update the metadata with an existing object. """ # Get extension ext = k.name.split('.')[-1] if ext in ['bmp','BMP']: metadata = {'Content-Type':'image/bmp'} elif ext in ['jpg','jpeg','JPG','JPEG']: metadata = {'Content-Type':'image/jpeg'} elif ext in ['gif','GIF']: metadata = {'Content-Type':'image/gif'} elif ext in ['png','PNG']: metadata = {'Content-Type':'image/png'} elif ext in ['pdf','PDF']: metadata = {'Content-Type':'application/pdf'} elif ext in ['txt','TXT']: metadata = {'Content-Type':'text/plain'} elif ext in ['zip','ZIP']: metadata = {'Content-Type':'application/zip'} else: return # If not same type -- update if metadata['Content-Type'] != akey.content_type: akey.copy(AWS_BUCKET_NAME, k.name, metadata, preserve_acl=True) return k # Main function if __name__ == '__main__': # Connect to S3 conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) b = conn.get_bucket(AWS_BUCKET_NAME) # Select files to parse (prefix can be specified) rs = b.list(prefix="") # Browse files for k in rs: print k.name akey = b.get_key(k.name) # Print type before print "Before:",akey.content_type try: k = update_md(k) akey = b.get_key(k.name) print "After: ",akey.content_type except Exception,e: print "Content-Type not handled by this script" print "Script finished!"