# Your task is to read the input DATAFILE line by line, and for the first 10 lines (not including the header)
# split each line on "," and then for each line, create a dictionary
# where the key is the header title of the field, and the value is the value of that field in the row.
# The function parse_file should return a list of dictionaries,
# each data line in the file being a single list entry.
# Field names and values should not contain extra whitespace, like spaces or newline characters.
# You can use the Python string method strip() to remove the extra whitespace.
# You have to parse only the first 10 data lines in this exercise,
# so the returned list should have 10 entries!
import os
DATADIR = "./data/"
DATAFILE = "beatles-diskography.csv"
def parse_file(datafile):
data = []
count = 0
with open(datafile, "r") as f:
for line in f:
# the header row ==> get the keys
if count == 0:
keys = line.strip().split(',')
elif count <= 10:
values = line.strip().split(',')
count += 1
return data
def test():
# a simple test of your implemetation
datafile = os.path.join(DATADIR, DATAFILE)
d = parse_file(datafile)
firstline = {'Title': 'Please Please Me', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)',
'Released': '22 March 1963', 'US Chart Position': '-', 'RIAA Certification': 'Platinum',
'BPI Certification': 'Gold'}
tenthline = {'Title': '', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '10 July 1964',
'US Chart Position': '-', 'RIAA Certification': '', 'BPI Certification': 'Gold'}
assert d[0] == firstline
assert d[9] == tenthline
Your task is as follows:
- read the provided Excel file
- find and return the min, max and average values for the COAST region
- find and return the time value for the min and max entries
- the time values should be returned as Python tuples
Please see the test function for the expected return format
import xlrd
from zipfile import ZipFile
import numpy as np
datafile = "./data/2013_ERCOT_Hourly_Load_Data.xls"
def open_zip(datafile):
#with ZipFile('{0}.zip'.format(datafile), 'r') as myzip:
def parse_file(datafile):
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)
### example on how you can get the data
#sheet_data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)]
### other useful methods:
# print "\nROWS, COLUMNS, and CELLS:"
# print "Number of rows in the sheet:",
# print sheet.nrows
# print "Type of data in cell (row 3, col 2):",
# print sheet.cell_type(3, 2)
# print "Value in cell (row 3, col 2):",
# print sheet.cell_value(3, 2)
# print "Get a slice of values in column 3, from rows 1-3:"
# print sheet.col_values(3, start_rowx=1, end_rowx=4)
# print "\nDATES:"
# print "Type of data in cell (row 1, col 0):",
# print sheet.cell_type(1, 0)
# exceltime = sheet.cell_value(1, 0)
# print "Time in Excel format:",
# print exceltime
# print "Convert time to a Python datetime tuple, from the Excel float:",
# print xlrd.xldate_as_tuple(exceltime, 0)
#sheet_data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)]
times = sheet.col_values(0, start_rowx=1, end_rowx=None)
coast = sheet.col_values(1, start_rowx=1, end_rowx=None)
i_max = np.argmax(coast)
i_min = np.argmin(coast)
maxtime = xlrd.xldate_as_tuple(times[i_max], 0)
mintime = xlrd.xldate_as_tuple(times[i_min], 0)
maxvalue = coast[i_max]
minvalue = coast[i_min]
avgcoast = np.mean(coast)
data = {
'maxtime': maxtime,
'maxvalue': maxvalue,
'mintime': mintime,
'minvalue': minvalue,
'avgcoast': avgcoast
return data
def test():
data = parse_file(datafile)
assert data['maxtime'] == (2013, 8, 13, 17, 0, 0)
assert round(data['maxvalue'], 10) == round(18779.02551, 10)
# To experiment with this code freely you will have to run this code locally.
# Take a look at the main() function for an example of how to use the code.
# We have provided example json output in the other code editor tabs for you to
# look at, but you will not be able to run any queries through our UI.
import json
import requests
BASE_URL = "http://musicbrainz.org/ws/2/"
ARTIST_URL = BASE_URL + "artist/"
# query parameters are given to the requests.get function as a dictionary; this
# variable contains some starter parameters.
query_type = { "simple": {},
"atr": {"inc": "aliases+tags+ratings"},
"aliases": {"inc": "aliases"},
"releases": {"inc": "releases"}}
def query_site(url, params, uid="", fmt="json"):
# This is the main function for making queries to the musicbrainz API.
# A json document should be returned by the query.
params["fmt"] = fmt
r = requests.get(url + uid, params=params)
print "requesting", r.url
if r.status_code == requests.codes.ok:
return r.json()
def query_by_name(url, params, name):
# This adds an artist name to the query parameters before making
# an API call to the function above.
params["query"] = "artist:" + name
return query_site(url, params)
def pretty_print(data, indent=4):
# After we get our output, we can format it to be more readable
# by using this function.
if type(data) == dict:
print json.dumps(data, indent=indent, sort_keys=True)
print data
def main():
Modify the function calls and indexing below to answer the questions on
the next quiz. HINT: Note how the output we get from the site is a
multi-level JSON document, so try making print statements to step through
the structure one level at a time or copy the output to a separate output
results = query_by_name(ARTIST_URL, query_type["simple"], "Nirvana")
print results.keys()
print results['artists'][0]['name']
print len(results)
artist_id = results["artists"][1]["id"]
print "\nARTIST:"
artist_data = query_site(ARTIST_URL, query_type["releases"], artist_id)
releases = artist_data["releases"]
print "\nONE RELEASE:"
pretty_print(releases[0], indent=2)
release_titles = [r["title"] for r in releases]
print "\nALL TITLES:"
for t in release_titles:
print t
if __name__ == '__main__':
import pprint
pp = pprint.PrettyPrinter(indent=2)
# How many bands are there named "First Aid Kit"?
results = query_by_name(ARTIST_URL, query_type["simple"], "First Aid Kit")
names = [artist['name'] for artist in results['artists'] if artist['name']=='First Aid Kit']
print '\n***There are', len(names), 'bands named "First Aid Kit"\n'
# begin-area name for Queen?
results = query_by_name(ARTIST_URL, query_type["simple"], "Queen")
queen = [artist for artist in results['artists'] if artist['name']=='Queen']
print '\n***The begin-area name for Queen is', queen[0]['begin-area']['name'],'\n'
# Spanish alias for the Beatles?
results = query_by_name(ARTIST_URL, query_type["simple"], "Beatles")
beatles = [artist for artist in results['artists'] if (artist['name']=='Beatles' or artist['name']=='The Beatles')]
#print beatles[0]['aliases']
pp.pprint([alias['name'] for alias in beatles[0]['aliases']])
print '\n'
# Nirvana disambiguation
results = query_by_name(ARTIST_URL, query_type["simple"], "Nirvana")
nirvana = [artist for artist in results['artists'] if artist['name']=='Nirvana']
print '\n***The disambiguation for Nirvana is:',nirvana[0]['disambiguation'],'\n'
# when was one direction formed?
results = query_by_name(ARTIST_URL, query_type["simple"], "One Direction")
onedirection = [artist for artist in results['artists'] if artist['name']=='One Direction']
print '\n***One Direction began', onedirection[0]['life-span']['begin']