A simple script that allows you to update massively Content-Type for files on an S3 bucket.
This script is able to:
- Browse recursively a bucket
- Perform action only on files matching specific prefix
- Auto-detect type of file depending on its extension
Obviously, you can add any extension you want to the function update_md to handle more if needed.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os, re, sys
import boto
from boto.s3.connection import S3Connection
from boto.s3.key import Key
# Variables
AWS_ACCESS_KEY_ID = 'YOUR_AWS_ACCESS_KEY_ID'
AWS_SECRET_ACCESS_KEY = 'YOUR_AWS_SECRET_ACCESS_KEY'
AWS_BUCKET_NAME = 'YOUR_BUCKET_NAME'
# Function to update MetaData
def update_md(k):
"""
Update the metadata with an existing object.
"""
# Get extension
ext = k.name.split('.')[-1]
if ext in ['bmp','BMP']:
metadata = {'Content-Type':'image/bmp'}
elif ext in ['jpg','jpeg','JPG','JPEG']:
metadata = {'Content-Type':'image/jpeg'}
elif ext in ['gif','GIF']:
metadata = {'Content-Type':'image/gif'}
elif ext in ['png','PNG']:
metadata = {'Content-Type':'image/png'}
elif ext in ['pdf','PDF']:
metadata = {'Content-Type':'application/pdf'}
elif ext in ['txt','TXT']:
metadata = {'Content-Type':'text/plain'}
elif ext in ['zip','ZIP']:
metadata = {'Content-Type':'application/zip'}
else:
return
# If not same type -- update
if metadata['Content-Type'] != akey.content_type:
akey.copy(AWS_BUCKET_NAME, k.name, metadata, preserve_acl=True)
return k
# Main function
if __name__ == '__main__':
# Connect to S3
conn = boto.connect_s3(AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
b = conn.get_bucket(AWS_BUCKET_NAME)
# Select files to parse (prefix can be specified)
rs = b.list(prefix="")
# Browse files
for k in rs:
print k.name
akey = b.get_key(k.name)
# Print type before
print "Before:",akey.content_type
try:
k = update_md(k)
akey = b.get_key(k.name)
print "After: ",akey.content_type
except Exception,e:
print "Content-Type not handled by this script"
print "Script finished!"