unsplash-image-n6B49lTx7NM.jpg

Python - Machine Learning

Machine Learning - Python


This is a program written in Python to implement machine learning principals to evaluate unlabeled data and provide a classification for it. Labeled data is used for this and is taken from the user.

This program prompts the user for validated input of a labeled data set and was built using the “Spyder” IDE. With machine learning principals, performs calculations for each defined class using the provided numerical attributes/features. With the results, allows the user to input unclassified data and will label it for them. The user also has the option to grow the dataset by entering more labeled data after defining it initially.

A video demonstrating the output and the code is shown below:

View the Source Code below, or view the full codebase on GitHub

Main
Setup
Calculations
Exported from Notepad++
1 import setup, calculations 2 3 # Author: Mostapha Abdelaziz 4 # Program to predict an items classification based on predefined data 5 6 # ask for item classification name 7 classname = input("Enter name of the item classification (e.g vehicle type, player position)\n > ") 8 9 # Call setup functions to define data set being used 10 11 # how many attributes each label/class uses 12 numofat = setup.enter_attribute_count(classname) 13 14 # ask for attribute names 15 allnames = setup.enter_names(numofat) 16 17 #ask for how many classes there are 18 numofclass = setup.enter_class_count() 19 20 # ask for each class name 21 classes = setup.enter_class_names(classname, numofclass) 22 23 # populate dataset that calculations will be based on 24 i = 0 25 j = 1 26 allatts = [] 27 attributes = [] 28 # loop through each class 29 while i < numofclass: 30 singleatts = [] 31 print(f"\nEnter {classes[i]} {j} attributes") 32 # loop through and get each attribute 33 for att in allnames: 34 num = setup.enter_attribute(classes[i], att) 35 36 #store the attribute 37 singleatts.append(num) 38 39 # append to all attributes 40 allatts.append(singleatts) 41 42 # ask if they want to add another of the same class 43 userinput = 't' 44 while userinput not in ("y","Y","n","N"): 45 userinput = input(f"Add another {classes[i]}? \"y\" for yes or \"n\" for no\n > ") 46 47 # if they do, pass otherwise increment i to go to the next class 48 if userinput in ("y","Y"): 49 pass 50 j += 1 51 else: 52 attributes.append(allatts) 53 allatts = [] 54 i += 1 55 j = 1 56 57 58 # call functions to perform calculations 59 averages = calculations.calculate_averages(attributes, numofat) 60 weights = calculations.calculate_weight(averages, numofclass, numofat) 61 62 63 # loop through menu of program 64 menuoption = 't' 65 # loop until they quit 66 while menuoption not in '3': 67 menuoption = 't' 68 # validate their input 69 while menuoption not in ("1", "2", "3"): 70 menuoption = input("1. Guess a classification\n2. Enter new data\n3. Quit program\n > ") 71 72 # implement menu options 73 if menuoption == '1': 74 # guess based on data, take in attributes 75 singleatts = [] 76 print(f"\nEnter {classname}'s attributes") 77 # loop through and get each attribute 78 for att in allnames: 79 num = setup.enter_attribute(classname, att) 80 81 #store the attribute 82 singleatts.append(num) 83 84 # get the scores 85 scores = calculations.score_data(weights, averages, singleatts) 86 87 # print the most likely one 88 print(f"This is a {classes[scores.index(max(scores))]}\n") 89 90 # print the verdict 91 elif menuoption == '2': 92 # add more data, ask which class first 93 userinput = 't' 94 # ensure it is one of the defined classes 95 while userinput not in classes: 96 userinput = input(f"Which {classname} would you like to add to?\n > ") 97 if userinput not in classes: 98 print(f"Enter one of your predefined classes {classes}. Try again.") 99 100 # add more data for the selected class 101 index = classes.index(userinput) 102 singleatts = [] 103 print(f"\nEnter new {classes[index]} attributes") 104 # loop through and get each attribute 105 for att in allnames: 106 num = setup.enter_attribute(classes[index], att) 107 108 #store the attribute 109 singleatts.append(num) 110 111 #store new attributes with the rest of the data 112 attributes[index].append(singleatts) 113 114 # perform calculations again 115 averages = calculations.calculate_averages(attributes, numofat) 116 weights = calculations.calculate_weight(averages, numofclass, numofat) 117 118 elif menuoption == '3': 119 print ("Good Bye.\n") 120
Exported from Notepad++
1 # Author: Mostapha Abdelaziz 2 # Functions for dealing with initial setup and entering labeled data 3 4 # Asks for and returns how many attributes each item will use 5 def enter_attribute_count(classname): 6 numofat = -1 7 # ask how many attributes there are loop for validation 8 while (numofat == -1): 9 userinput = input(f"How many numerical attributes will each {classname} have?\n > ") 10 # try to convert to integer 11 try: 12 numofat = int(userinput) 13 # if it is a valid number, break otherwise print error 14 if numofat > 0: 15 break 16 else: 17 print("Must be greater than 0. Try again.") 18 numofat = -1 19 except ValueError: 20 # if it was not a number print so 21 print ("Must be a whole number. Try again.") 22 numofat = -1 23 24 # return the validated input 25 return numofat 26 27 28 # Asks for the names of each attribute and returns a list 29 def enter_names(num): 30 i = 1 31 names = [] 32 33 # loop for each attribute name 34 while i <= num: 35 userinput = input(f"Enter attribute {i} name: ") 36 names.append(userinput) 37 i += 1 38 39 #return the list of names 40 return names 41 42 43 # Asks for and returns how many different labels or classifiations to use 44 def enter_class_count(): 45 num = -1 46 # enter how many classifications 47 while (num == -1): 48 userinput = input("How many classifications will there be?\n > ") 49 # try to convert to integer 50 try: 51 num = int(userinput) 52 # if it is a valid number, break otherwise print error 53 if num > 0: 54 break 55 else: 56 print("Must be greater than 0. Try again.") 57 num = -1 58 except ValueError: 59 # if it was not a number print so 60 print ("Must be a whole number. Try again.") 61 num = -1 62 #return validated input 63 return num 64 65 66 # Asks for and returns a list of the names of each label or class 67 def enter_class_names(classname, num): 68 i = 1 69 names = [] 70 # loop for each class name 71 while i <= num: 72 userinput = input(f"Enter {classname} {i} type: ") 73 names.append(userinput) 74 i += 1 75 76 #return the list of names 77 return names 78 79 80 # Asks for and returns a validated attribute value 81 def enter_attribute(classification, attribute): 82 num = -1 83 #ask for the attribute value and validate it is an integer 84 while num == -1: 85 userinput = input(f"Enter {classification} {attribute}\n > ") 86 # try to convert to int 87 try: 88 num = int(userinput) 89 # check range 90 if (num < 0): 91 print("Attribute must be a positive value. Try again.") 92 num = -1 93 94 except ValueError: 95 # if it wasnt an integer 96 print("Attribute must be a whole numerical value. Try again.") 97 num = -1 98 99 #return the attribute 100 return num
Exported from Notepad++
1 # Author: Mostapha Abdelaziz 2 # Functions that perform all machine learning data calculations 3 4 # calculates the average for each attribute in each class 5 def calculate_averages(attributes, numofattributes): 6 # loop thorugh attributes and calculate averages 7 allaverages = [] 8 # loop through each classification 9 for atts in attributes: 10 i = 0 11 averages = [] 12 # loop through each attribute 13 while i < numofattributes: 14 j = 0 15 avg = 0 16 # loop through each data set 17 for instance in atts: 18 # add all the data 19 avg += instance[i] 20 # j counts how many sets there are 21 j += 1 22 23 # increment i to move to the next attribute 24 i += 1 25 # add the average to our list 26 averages.append(avg/j) 27 28 #after each classification append the averages to our list to return 29 allaverages.append(averages) 30 31 #return the list of all averages 32 return allaverages 33 34 35 # calculates how each attribute should be weighed 36 def calculate_weight(averages, numofclasses, numofattributes): 37 alldiffs = [] 38 # loop through class 39 for classification1 in averages: 40 diffs = [] 41 attindex = 0 # attribute index 42 total = 0 43 # loop through each attribute 44 for attribute in classification1: 45 46 diff1 = -1 47 diff2 = -1 48 49 classindex = 0 # index for classes 50 51 # loop through each class 52 while classindex < numofclasses: 53 # set a value as the attribute of the outer loops class 54 value = classification1[attindex] 55 56 # check that we are not comparing with itself 57 if averages.index(classification1) == classindex: 58 pass 59 else: 60 # subtract original attribute from the other classes same attribute 61 diff2 = value - averages[classindex][attindex] 62 if diff2 < 0: 63 # if it is negative make it positive 64 diff2 = diff2 * -1 65 66 # if this is the first run through, set diff1 to diff2 67 if diff1 == -1: 68 diff1 = diff2 69 else: 70 # otherwise check if it is smaller and store if so 71 if diff2 < diff1: 72 diff1 = diff2 73 classindex += 1 74 #end inner loop 75 attindex += 1 76 total += diff1 77 diffs.append(diff1) 78 #end middle loop 79 diffs.append(total) 80 alldiffs.append(diffs) 81 #end outer loop 82 83 # take the differences and calculate the weights or percentages 84 allweights = [] 85 j = 0 86 # loop through each classes averages 87 for instance in averages: 88 weights = [] 89 i = 0 90 # loop through each attribute and calculate the percentage/weight 91 while i < numofattributes: 92 if alldiffs[j][numofattributes] == 0: 93 weights.append(0) 94 i += 1 95 else: 96 weights.append(alldiffs[j][i]/alldiffs[j][numofattributes]) 97 i += 1 98 99 # store and increment 100 allweights.append(weights) 101 j+=1 102 103 return allweights 104 105 106 # takes in attributes values and scores it based on our predefined info 107 def score_data(weights, averages, data): 108 scores = [] 109 i = 0 # index for which class 110 # loop through each class in averages 111 for instance in averages: 112 j = 0 # index for each attribute 113 score = 0 114 # loop through each attribute 115 for attribute in instance: 116 # get a percentage of how close the input is to the class average 117 if attribute > data[j]: 118 mark = (data[j] / attribute) * 100 119 else: 120 mark = (attribute / data[j]) * 100 121 122 # multiply this by the weight each attribute has 123 score += (mark * weights[i][j]) 124 j += 1 125 126 # increment and store the score 127 i += 1 128 scores.append(score) 129 130 return scores 131 132