# Your task is to read the input DATAFILE line by line, and for the first 10 lines (not including the header)
# split each line on "," and then for each line, create a dictionary
# where the key is the header title of the field, and the value is the value of that field in the row.
# The function parse_file should return a list of dictionaries,
# each data line in the file being a single list entry.
# Field names and values should not contain extra whitespace, like spaces or newline characters.
# You can use the Python string method strip() to remove the extra whitespace.
# You have to parse only the first 10 data lines in this exercise,
# so the returned list should have 10 entries!
import os
DATADIR = "./data/"
DATAFILE = "beatles-diskography.csv"
datafile = DATADIR+DATAFILE
def parse_file(datafile):
data = []
count = 0
with open(datafile, "r") as f:
for line in f:
# the header row ==> get the keys
if count == 0:
keys = line.strip().split(',')
elif count <= 10:
values = line.strip().split(',')
data.append(dict(zip(keys,values)))
else:
break
count += 1
return data
def test():
# a simple test of your implemetation
datafile = os.path.join(DATADIR, DATAFILE)
d = parse_file(datafile)
firstline = {'Title': 'Please Please Me', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)',
'Released': '22 March 1963', 'US Chart Position': '-', 'RIAA Certification': 'Platinum',
'BPI Certification': 'Gold'}
tenthline = {'Title': '', 'UK Chart Position': '1', 'Label': 'Parlophone(UK)', 'Released': '10 July 1964',
'US Chart Position': '-', 'RIAA Certification': '', 'BPI Certification': 'Gold'}
assert d[0] == firstline
assert d[9] == tenthline
test()
"""
Your task is as follows:
- read the provided Excel file
- find and return the min, max and average values for the COAST region
- find and return the time value for the min and max entries
- the time values should be returned as Python tuples
Please see the test function for the expected return format
"""
import xlrd
from zipfile import ZipFile
import numpy as np
datafile = "./data/2013_ERCOT_Hourly_Load_Data.xls"
def open_zip(datafile):
pass
#with ZipFile('{0}.zip'.format(datafile), 'r') as myzip:
#myzip.extractall()
def parse_file(datafile):
workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)
### example on how you can get the data
#sheet_data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)]
### other useful methods:
# print "\nROWS, COLUMNS, and CELLS:"
# print "Number of rows in the sheet:",
# print sheet.nrows
# print "Type of data in cell (row 3, col 2):",
# print sheet.cell_type(3, 2)
# print "Value in cell (row 3, col 2):",
# print sheet.cell_value(3, 2)
# print "Get a slice of values in column 3, from rows 1-3:"
# print sheet.col_values(3, start_rowx=1, end_rowx=4)
# print "\nDATES:"
# print "Type of data in cell (row 1, col 0):",
# print sheet.cell_type(1, 0)
# exceltime = sheet.cell_value(1, 0)
# print "Time in Excel format:",
# print exceltime
# print "Convert time to a Python datetime tuple, from the Excel float:",
# print xlrd.xldate_as_tuple(exceltime, 0)
#sheet_data = [[sheet.cell_value(r, col) for col in range(sheet.ncols)] for r in range(sheet.nrows)]
times = sheet.col_values(0, start_rowx=1, end_rowx=None)
coast = sheet.col_values(1, start_rowx=1, end_rowx=None)
i_max = np.argmax(coast)
i_min = np.argmin(coast)
maxtime = xlrd.xldate_as_tuple(times[i_max], 0)
mintime = xlrd.xldate_as_tuple(times[i_min], 0)
maxvalue = coast[i_max]
minvalue = coast[i_min]
avgcoast = np.mean(coast)
data = {
'maxtime': maxtime,
'maxvalue': maxvalue,
'mintime': mintime,
'minvalue': minvalue,
'avgcoast': avgcoast
}
return data
def test():
open_zip(datafile)
data = parse_file(datafile)
assert data['maxtime'] == (2013, 8, 13, 17, 0, 0)
assert round(data['maxvalue'], 10) == round(18779.02551, 10)
test()
# To experiment with this code freely you will have to run this code locally.
# Take a look at the main() function for an example of how to use the code.
# We have provided example json output in the other code editor tabs for you to
# look at, but you will not be able to run any queries through our UI.
import json
import requests
BASE_URL = "http://musicbrainz.org/ws/2/"
ARTIST_URL = BASE_URL + "artist/"
# query parameters are given to the requests.get function as a dictionary; this
# variable contains some starter parameters.
query_type = { "simple": {},
"atr": {"inc": "aliases+tags+ratings"},
"aliases": {"inc": "aliases"},
"releases": {"inc": "releases"}}
def query_site(url, params, uid="", fmt="json"):
# This is the main function for making queries to the musicbrainz API.
# A json document should be returned by the query.
params["fmt"] = fmt
r = requests.get(url + uid, params=params)
print "requesting", r.url
if r.status_code == requests.codes.ok:
return r.json()
else:
r.raise_for_status()
def query_by_name(url, params, name):
# This adds an artist name to the query parameters before making
# an API call to the function above.
params["query"] = "artist:" + name
return query_site(url, params)
def pretty_print(data, indent=4):
# After we get our output, we can format it to be more readable
# by using this function.
if type(data) == dict:
print json.dumps(data, indent=indent, sort_keys=True)
else:
print data
def main():
'''
Modify the function calls and indexing below to answer the questions on
the next quiz. HINT: Note how the output we get from the site is a
multi-level JSON document, so try making print statements to step through
the structure one level at a time or copy the output to a separate output
file.
'''
results = query_by_name(ARTIST_URL, query_type["simple"], "Nirvana")
#pretty_print(results)
print results.keys()
print results['artists'][0]['name']
print len(results)
artist_id = results["artists"][1]["id"]
print "\nARTIST:"
pretty_print(results["artists"][1])
artist_data = query_site(ARTIST_URL, query_type["releases"], artist_id)
releases = artist_data["releases"]
print "\nONE RELEASE:"
pretty_print(releases[0], indent=2)
release_titles = [r["title"] for r in releases]
print "\nALL TITLES:"
for t in release_titles:
print t
if __name__ == '__main__':
main()
import pprint
pp = pprint.PrettyPrinter(indent=2)
# How many bands are there named "First Aid Kit"?
results = query_by_name(ARTIST_URL, query_type["simple"], "First Aid Kit")
names = [artist['name'] for artist in results['artists'] if artist['name']=='First Aid Kit']
print '\n***There are', len(names), 'bands named "First Aid Kit"\n'
# begin-area name for Queen?
results = query_by_name(ARTIST_URL, query_type["simple"], "Queen")
queen = [artist for artist in results['artists'] if artist['name']=='Queen']
print '\n***The begin-area name for Queen is', queen[0]['begin-area']['name'],'\n'
# Spanish alias for the Beatles?
results = query_by_name(ARTIST_URL, query_type["simple"], "Beatles")
beatles = [artist for artist in results['artists'] if (artist['name']=='Beatles' or artist['name']=='The Beatles')]
#pp.pprint(beatles)
#print beatles[0]['aliases']
pp.pprint([alias['name'] for alias in beatles[0]['aliases']])
print '\n'
# Nirvana disambiguation
results = query_by_name(ARTIST_URL, query_type["simple"], "Nirvana")
nirvana = [artist for artist in results['artists'] if artist['name']=='Nirvana']
print '\n***The disambiguation for Nirvana is:',nirvana[0]['disambiguation'],'\n'
# when was one direction formed?
results = query_by_name(ARTIST_URL, query_type["simple"], "One Direction")
onedirection = [artist for artist in results['artists'] if artist['name']=='One Direction']
print '\n***One Direction began', onedirection[0]['life-span']['begin']