# Copyright 2012 Google Inc. All Rights Reserved.
# Author: mrdmnd@ (Matt Redmond)
"""A client to pull data from Bartlett.
Inspired by //depot/google3/experimental/mobile_gwp/database/app_engine_pull.py
The server houses perf.data.gz, board, chrome version for each upload.
This script first authenticates with a proper @google.com account, then
downloads a sample (if it's not already cached) and unzips perf.data
Authenticate(): Gets login info and returns an auth token
DownloadSamples(): Download and unzip samples.
_GetServePage(): Pulls /serve page from the app engine server
_DownloadSampleFromServer(): Downloads a local compressed copy of a sample
_UncompressSample(): Decompresses a sample, deleting the compressed version.
"""
import cookielib
import getpass
import gzip
import optparse
import os
import urllib
import urllib2
SERVER_NAME = 'http://chromeoswideprofiling.appspot.com'
APP_NAME = 'chromeoswideprofiling'
DELIMITER = '~'
def Authenticate(server_name):
"""Gets credentials from user and attempts to retrieve auth token.
TODO: Accept OAuth2 instead of password.
Args:
server_name: (string) URL that the app engine code is living on.
Returns:
authtoken: (string) The authorization token that can be used
to grab other pages.
"""
if server_name.endswith('/'):
server_name = server_name.rstrip('/')
# Grab username and password from user through stdin.
username = raw_input('Email (must be @google.com account): ')
password = getpass.getpass('Password: ')
# Use a cookie to authenticate with GAE.
cookiejar = cookielib.LWPCookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar))
urllib2.install_opener(opener)
# Get an AuthToken from Google accounts service.
auth_uri = 'https://www.google.com/accounts/ClientLogin'
authreq_data = urllib.urlencode({'Email': username,
'Passwd': password,
'service': 'ah',
'source': APP_NAME,
'accountType': 'HOSTED_OR_GOOGLE'})
auth_req = urllib2.Request(auth_uri, data=authreq_data)
try:
auth_resp = urllib2.urlopen(auth_req)
except urllib2.URLError:
print 'Error logging in to Google accounts service.'
return None
body = auth_resp.read()
# Auth response contains several fields.
# We care about the part after Auth=
auth_resp_dict = dict(x.split('=') for x in body.split('\n') if x)
authtoken = auth_resp_dict['Auth']
return authtoken
def DownloadSamples(server_name, authtoken, output_dir, start, stop):
"""Download every sample and write unzipped version
to output directory.
Args:
server_name: (string) URL that the app engine code is living on.
authtoken: (string) Authorization token.
output_dir (string) Filepath to write output to.
start: (int) Index to start downloading from, starting at top.
stop: (int) Index to stop downloading, non-inclusive. -1 for end.
Returns:
None
"""
if server_name.endswith('/'):
server_name = server_name.rstrip('/')
serve_page_string = _GetServePage(server_name, authtoken)
if serve_page_string is None:
print 'Error getting /serve page.'
return
sample_list = serve_page_string.split('</br>')
print 'Will download:'
sample_list_subset = sample_list[start:stop]
for sample in sample_list_subset:
print sample
for sample in sample_list_subset:
assert sample, 'Sample should be valid.'
sample_info = [s.strip() for s in sample.split(DELIMITER)]
key = sample_info[0]
time = sample_info[1]
time = time.replace(' ', '_') # No space between date and time.
# sample_md5 = sample_info[2]
board = sample_info[3]
version = sample_info[4]
# Put a compressed copy of the samples in output directory.
_DownloadSampleFromServer(server_name, authtoken, key, time, board, version,
output_dir)
_UncompressSample(key, time, board, version, output_dir)
def _BuildFilenameFromParams(key, time, board, version):
"""Return the filename for our sample.
Args:
key: (string) Key indexing our sample in the datastore.
time: (string) Date that the sample was uploaded.
board: (string) Board that the sample was taken on.
version: (string) Version string from /etc/lsb-release
Returns:
filename (string)
"""
filename = DELIMITER.join([key, time, board, version])
return filename
def _DownloadSampleFromServer(server_name, authtoken, key, time, board, version,
output_dir):
"""Downloads sample_$(samplekey).gz to current dir.
Args:
server_name: (string) URL that the app engine code is living on.
authtoken: (string) Authorization token.
key: (string) Key indexing our sample in the datastore
time: (string) Date that the sample was uploaded.
board: (string) Board that the sample was taken on.
version: (string) Version string from /etc/lsb-release
output_dir: (string) Filepath to write to output to.
Returns:
None
"""
filename = _BuildFilenameFromParams(key, time, board, version)
compressed_filename = filename + '.gz'
if os.path.exists(os.path.join(output_dir, filename)):
print 'Already downloaded %s, skipping.' % filename
return
serv_uri = server_name + '/serve/' + key
serv_args = {'continue': serv_uri, 'auth': authtoken}
full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args)
serv_req = urllib2.Request(full_serv_uri)
serv_resp = urllib2.urlopen(serv_req)
f = open(os.path.join(output_dir, compressed_filename), 'w+')
f.write(serv_resp.read())
f.close()
def _UncompressSample(key, time, board, version, output_dir):
"""Uncompresses a given sample.gz file and deletes the compressed version.
Args:
key: (string) Sample key to uncompress.
time: (string) Date that the sample was uploaded.
board: (string) Board that the sample was taken on.
version: (string) Version string from /etc/lsb-release
output_dir: (string) Filepath to find sample key in.
Returns:
None
"""
filename = _BuildFilenameFromParams(key, time, board, version)
compressed_filename = filename + '.gz'
if os.path.exists(os.path.join(output_dir, filename)):
print 'Already decompressed %s, skipping.' % filename
return
out_file = open(os.path.join(output_dir, filename), 'wb')
in_file = gzip.open(os.path.join(output_dir, compressed_filename), 'rb')
out_file.write(in_file.read())
in_file.close()
out_file.close()
os.remove(os.path.join(output_dir, compressed_filename))
def _DeleteSampleFromServer(server_name, authtoken, key):
"""Opens the /delete page with the specified key
to delete the sample off the datastore.
Args:
server_name: (string) URL that the app engine code is living on.
authtoken: (string) Authorization token.
key: (string) Key to delete.
Returns:
None
"""
serv_uri = server_name + '/del/' + key
serv_args = {'continue': serv_uri, 'auth': authtoken}
full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args)
serv_req = urllib2.Request(full_serv_uri)
urllib2.urlopen(serv_req)
def _GetServePage(server_name, authtoken):
"""Opens the /serve page and lists all keys.
Args:
server_name: (string) URL the app engine code is living on.
authtoken: (string) Authorization token.
Returns:
The text of the /serve page (including HTML tags)
"""
serv_uri = server_name + '/serve'
serv_args = {'continue': serv_uri, 'auth': authtoken}
full_serv_uri = server_name + '/_ah/login?%s' % urllib.urlencode(serv_args)
serv_req = urllib2.Request(full_serv_uri)
serv_resp = urllib2.urlopen(serv_req)
return serv_resp.read()
def main():
parser = optparse.OptionParser()
parser.add_option('--output_dir',
dest='output_dir',
action='store',
help='Path to output perf data files.')
parser.add_option('--start',
dest='start_ind',
action='store',
default=0,
help='Start index.')
parser.add_option('--stop',
dest='stop_ind',
action='store',
default=-1,
help='Stop index.')
options = parser.parse_args()[0]
if not options.output_dir:
print 'Must specify --output_dir.'
return 1
if not os.path.exists(options.output_dir):
print 'Specified output_dir does not exist.'
return 1
authtoken = Authenticate(SERVER_NAME)
if not authtoken:
print 'Could not obtain authtoken, exiting.'
return 1
DownloadSamples(SERVER_NAME, authtoken, options.output_dir, options.start_ind,
options.stop_ind)
print 'Downloaded samples.'
return 0
if __name__ == '__main__':
exit(main())