#!/usr/bin/env python
# Your task here is to extract data from xml on authors of an article
# and add it to a list, one item for an author.
# See the provided data structure for the expected format.
# The tags for first name, surname and email should map directly
# to the dictionary keys
import xml.etree.ElementTree as ET
article_file = "./data/exampleResearchArticle.xml"
def get_root(fname):
tree = ET.parse(fname)
return tree.getroot()
def get_authors(root):
authors = []
for author in root.findall('./fm/bibl/aug/au'):
fnm = author.find('fnm')
if fnm is not None:
fnm = fnm.text
snm = author.find('snm')
if snm is not None:
snm = snm.text
email = author.find('email')
if email is not None:
email = email.text
data = {
"fnm": fnm,
"snm": snm,
"email": email
}
authors.append(data)
return authors
def test():
solution = [{'fnm': 'Omer', 'snm': 'Mei-Dan', 'email': 'omer@extremegate.com'}, {'fnm': 'Mike', 'snm': 'Carmont',
'email': 'mcarmont@hotmail.com'}, {'fnm': 'Lior', 'snm': 'Laver', 'email': 'laver17@gmail.com'}, {'fnm': 'Meir',
'snm': 'Nyska', 'email': 'nyska@internet-zahav.net'}, {'fnm': 'Hagay', 'snm': 'Kammar', 'email': 'kammarh@gmail.com'},
{'fnm': 'Gideon', 'snm': 'Mann', 'email': 'gideon.mann.md@gmail.com'}, {'fnm': 'Barnaby', 'snm': 'Clarck',
'email': 'barns.nz@gmail.com'}, {'fnm': 'Eugene', 'snm': 'Kots', 'email': 'eukots@gmail.com'}]
root = get_root(article_file)
data = get_authors(root)
assert data[0] == solution[0]
assert data[1]["fnm"] == solution[1]["fnm"]
test()
#!/usr/bin/env python
# Your task here is to extract data from xml on authors of an article
# and add it to a list, one item for an author.
# See the provided data structure for the expected format.
# The tags for first name, surname and email should map directly
# to the dictionary keys, but you have to extract the attributes from the "insr" tag
# and add them to the list for the dictionary key "insr"
import xml.etree.ElementTree as ET
article_file = "./data/exampleResearchArticle.xml"
def get_root(fname):
tree = ET.parse(fname)
return tree.getroot()
def get_authors(root):
authors = []
for author in root.findall('./fm/bibl/aug/au'):
data = {
"fnm": author.find('fnm').text,
"snm": author.find('snm').text,
"email": author.find('email').text,
"insr": [insr.attrib['iid'] for insr in author.findall('insr')]
}
authors.append(data)
return authors
def test():
solution = [{'insr': ['I1'], 'fnm': 'Omer', 'snm': 'Mei-Dan', 'email': 'omer@extremegate.com'},
{'insr': ['I2'], 'fnm': 'Mike', 'snm': 'Carmont', 'email': 'mcarmont@hotmail.com'},
{'insr': ['I3', 'I4'], 'fnm': 'Lior', 'snm': 'Laver', 'email': 'laver17@gmail.com'},
{'insr': ['I3'], 'fnm': 'Meir', 'snm': 'Nyska', 'email': 'nyska@internet-zahav.net'},
{'insr': ['I8'], 'fnm': 'Hagay', 'snm': 'Kammar', 'email': 'kammarh@gmail.com'},
{'insr': ['I3', 'I5'], 'fnm': 'Gideon', 'snm': 'Mann', 'email': 'gideon.mann.md@gmail.com'},
{'insr': ['I6'], 'fnm': 'Barnaby', 'snm': 'Clarck', 'email': 'barns.nz@gmail.com'},
{'insr': ['I7'], 'fnm': 'Eugene', 'snm': 'Kots', 'email': 'eukots@gmail.com'}]
root = get_root(article_file)
data = get_authors(root)
assert data[0] == solution[0]
assert data[1]["insr"] == solution[1]["insr"]
test()
from bs4 import BeautifulSoup
def options(soup, id):
option_values = []
carrier_list = soup.find(id=id)
for option in carrier_list.find_all('option'):
option_values.append(option['value'])
return option_values
def print_list(label, codes):
print '\n%s:' % label
for c in codes:
print c
def main():
#soup = BeautifulSoup(open('./data/page_source.html'))
soup = BeautifulSoup(open('./data/virgin_and_logan_airport.html'))
#codes = options(soup, 'CarrierList')
#print_list('Airports', codes)
airports = options(soup, 'AirportList')
carriers = options(soup, 'CarrierList')
print_list('Airports', airports)
print_list('Carriers', carriers)
main()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Please note that the function 'make_request' is provided for your reference only.
# You will not be able to to actually use it from within the Udacity web UI.
# Your task is to process the HTML using BeautifulSoup, extract the hidden
# form field values for "__EVENTVALIDATION" and "__VIEWSTATE" and set the appropriate
# values in the data dictionary.
# All your changes should be in the 'extract_data' function
from bs4 import BeautifulSoup
import requests
import json
html_page = "./data/page_source.html"
def extract_data(page):
data = {"eventvalidation": "",
"viewstate": ""}
with open(page, "r") as html:
soup = BeautifulSoup(html,'lxml')
data['eventvalidation'] = soup.find(id='__EVENTVALIDATION')['value']
data['viewstate'] = soup.find(id='__VIEWSTATE')['value']
return data
def make_request(data):
eventvalidation = data["eventvalidation"]
viewstate = data["viewstate"]
r = requests.post("http://www.transtats.bts.gov/Data_Elements.aspx?Data=2",
data={'AirportList': "BOS",
'CarrierList': "VX",
'Submit': 'Submit',
"__EVENTTARGET": "",
"__EVENTARGUMENT": "",
"__EVENTVALIDATION": eventvalidation,
"__VIEWSTATE": viewstate
})
return r.text
def test():
data = extract_data(html_page)
assert data["eventvalidation"] != ""
assert data["eventvalidation"].startswith("/wEWjAkCoIj1ng0")
assert data["viewstate"].startswith("/wEPDwUKLTI")
test()
from bs4 import BeautifulSoup
s = requests.Session()
r = s.get('http://www.transtats.bts.gov/Data_Elements.aspx?Data=2')
soup = BeautifulSoup(r.text)
viewstate = soup.find(id='__VIEWSTATE')['value']
eventvalidation = soup.find(id='__EVENTVALIDATION')['value']
r = s.post('http://www.transtats.bts.gov/Data_Elements.aspx?Data=2',
data={'AirportList' : 'BOS',
'CarrierList' : 'VX',
'Submit' : 'Submit',
'__EVENTTARGET': '',
'__EVENTARGUMENT' : '',
'__EVENTVALIDATION' : eventvalidation,
'__VIEWSTATE' : viewstate})
f = open('./data/virgin_and_logan_airport.html','w')
f.write(r.text)