Name: Suprit Darshan Shrestha Reg - no:19BCE2584: Lab DA1 Machine Learning Lab
Name: Suprit Darshan Shrestha Reg - no:19BCE2584: Lab DA1 Machine Learning Lab
Combined different columns with close and similar values to dcrease the
number of columns.
S_algo:
Code:
import csv
a = []
with open('Data1.csv', 'r') as csvfile:
next(csvfile)
for row in csv.reader(csvfile):
a.append(row)
print(a)
num_attribute = len(a[0])-1
if a[i][num_attribute] == "1":
print ("\nInstance ", i+1, "is", a[i], " and is Negative Instance Hence Ignored")
print("The hypothesis for the training instance", i+1," is: " , hypothesis, "\n")
print("\nThe Maximally specific hypothesis for the training instance is ", hypothesis)
Output:
Decision Tree:
Code:
import pandas as pd
dataset=pd.read_csv('Data1.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
y_pred = classifier.predict(X_test)
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
Output:
ID3:
Code:
import pandas as pd
import math
import numpy as np
data = pd.read_csv('Data1.csv')
features = [feat for feat in data]
features.remove("Diagnosis")
class Node:
def __init__(self):
self.children = []
self.value = ""
self.isLeaf = False
self.pred = ""
def entropy(examples):
pos = 0.0
neg = 0.0
for _, row in examples.iterrows():
if row["Diagnosis"] == 0:
pos += 1
else:
neg += 1
if pos == 0.0 or neg == 0.0:
return 0.0
else:
p = pos / (pos + neg)
n = neg / (pos + neg)
return -(p * math.log(p, 2) + n * math.log(n, 2))
max_gain = 0
max_feat = ""
for feature in attrs:
#print ("\n",examples)
gain = info_gain(examples, feature)
if gain > max_gain:
max_gain = gain
max_feat = feature
root.value = max_feat
#print ("\nMax feature attr",max_feat)
uniq = np.unique(examples[max_feat])
#print ("\n",uniq)
for u in uniq:
#print ("\n",u)
subdata = examples[examples[max_feat] == u]
#print ("\n",subdata)
if entropy(subdata) == 0.0:
newNode = Node()
newNode.isLeaf = True
newNode.value = u
newNode.pred = np.unique(subdata["Diagnosis"])
root.children.append(newNode)
else:
dummyNode = Node()
dummyNode.value = u
new_attrs = attrs.copy()
new_attrs.remove(max_feat)
child = ID3(subdata, new_attrs)
dummyNode.children.append(child)
root.children.append(dummyNode)
return root