import sys
sys.path.append("C:/Users/Jeff/udacity/Intro_to_Machine_Learning/ud120-projects/tools/")
sys.path.append('C:/Users/Jeff/udacity/Intro_to_Machine_Learning/ud120-projects/choose_your_own')
sys.path.append('C:/Users/Jeff/udacity/Intro_to_Machine_Learning/ud120-projects/svm')
import os
os.chdir('C:/Users/Jeff/udacity/Intro_to_Machine_Learning/ud120-projects/svm')
from class_vis import prettyPicture
from prep_terrain_data import makeTerrainData
import sys
import matplotlib.pyplot as plt
import copy
import numpy as np
import pylab as pl
from sklearn.metrics import accuracy_score
features_train, labels_train, features_test, labels_test = makeTerrainData()
########################## SVM #################################
from sklearn.svm import SVC
def submitAccuracy():
return accuracy_score(pred, labels_test)
clf = SVC(kernel="linear")
clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
print accuracy_score(pred, labels_test)
clf = SVC(kernel="linear", gamma=1.0)
clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
%matplotlib inline
prettyPicture(clf, features_test, labels_test)
C
Parameter¶clf = SVC(kernel="rbf", C=10**5)
clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
%matplotlib inline
prettyPicture(clf, features_test, labels_test)
gamma
Parameter¶clf = SVC(kernel="rbf", gamma=10)
clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
%matplotlib inline
prettyPicture(clf, features_test, labels_test)
#!/usr/bin/python
"""
This is the code to accompany the Lesson 2 (SVM) mini-project.
Use a SVM to identify emails from the Enron corpus by their authors:
Sara has label 0
Chris has label 1
"""
from sklearn.metrics import accuracy_score
from time import time
from email_preprocess import preprocess
### features_train and features_test are the features for the training
### and testing datasets, respectively
### labels_train and labels_test are the corresponding item labels
features_train, features_test, labels_train, labels_test = preprocess()
def my_svm(features_train, features_test, labels_train, labels_test, kernel='linear', C=1.0):
# the classifier
clf = SVC(kernel=kernel, C=C)
# train
t0 = time()
clf.fit(features_train, labels_train)
print "\ntraining time:", round(time()-t0, 3), "s"
# predict
t0 = time()
pred = clf.predict(features_test)
print "predicting time:", round(time()-t0, 3), "s"
accuracy = accuracy_score(pred, labels_test)
print '\naccuracy = {0}'.format(accuracy)
return pred
pred = my_svm(features_train, features_test, labels_train, labels_test)
features_train2 = features_train[:len(features_train)/100]
labels_train2 = labels_train[:len(labels_train)/100]
pred = my_svm(features_train2, features_test, labels_train2, labels_test)
pred = my_svm(features_train2, features_test, labels_train2, labels_test, 'rbf')
C
Parameter¶for C in [10, 100, 1000, 10000]:
print 'C =',C,
pred = my_svm(features_train2, features_test, labels_train2, labels_test, kernel='rbf', C=C)
print '\n\n'
pred = my_svm(features_train, features_test, labels_train, labels_test, kernel='rbf', C=10000)
print pred[10]
print pred[26]
print pred[50]
print sum(pred)