top of page

Binary Encoding In Machine Learning | Binary Encoding Project Help, Assignment Help, Homework Help

Project: Binary Encoding

In this project:

  • Use binary (bit-string) encoding, with 15 bits for x1, 20 bits for x2, and 25 bits for x3.

  • The algorithm parameters (population size, crossover rate, etc.) do not have to be the same as the ones used in Project #1 and may be taken from the user on the command line or provided at the beginning of execution, or may be stored in a file to be read in.

The output should include the best, mean and standard deviation of the best-of-run fitnesses from 30 independent runs (no generation-wise details needed). Also, show the vector (the three x values) of the best of the 30 best-of-run solutions. Please do not use an off-the-shelf implementation from packages. Please submit a single doc/docx/pdf/ipynb file (no other file type, please, with the following exception: if you are submitting an ipynb file, please ALSO submit the corresponding html) containing the source code and all output. Submission of multiple files is discouraged but may be resorted to only if you absolutely cannot manage to produce a single file. Physical files are to be uploaded – not a link to some web site. At the beginning of your report, you may add notes for any other special issues/techniques that you think might be important in your implementation.



Issues faced: I encountered issue while encoding the numbers to the binary system. I was not able to specify the number of bits while converting floating point numbers to binary. On converting floating point to binary, I was able to generate a string of either 32-bits or 64 bits. Hence, the current code works for only integers.


Parameters:

Population size, N = 30

Number of independent runs, nRun = 30

Number of generations, nGen= 80

Crossover probability, p_c= 0.8

Mutation Probability, p_m=0.1


Function to minimize: X1^2 + X2^2 + x3^2

Range for values of X:

X_MIN= -1.0

X_MAX= 5.0


Output:

The average of the best-of-run fitness is 60.379620051557396

The corresponding standard deviation is 48.524933538699585


best-of-the-run vector: [0, 0, 0]



Code Implementation

import random
import math
N = 10  # size of population
X_MIN = -1.0
X_MAX = 5.0
p_c = 0.8
p_m = 0.1


def run_ga(N, n_gen, plot=False):
    
    """Run Genentic algorithm
       N - size of population
       n_gen - number of generation
       plot - if equals True function plot the graphs
    """
    population = initialize(N)
    results = []
    for i in range(n_gen):
        results.append(get_best_worst(population))
        population = crossover(population, p_c)
        population = mutate(population, p_m)
    
    if plot:
        show_results(results)    
        
    best_of_run = get_best_of_run(results) 
    return best_of_run, results
    
def initialize(N):
    
    """Create initial population of length N
       The X_MIN and X_MAX are global parameters
    """
    population = [ [round(random.uniform(X_MIN, X_MAX)) for _ in range(3)] for _ in range(N)]
    for i in population:
        i[0]='{0:015b}'.format(i[0])
        i[1]='{0:020b}'.format(i[1])
        i[2]='{0:025b}'.format(i[2])
    return population

def fit_fun(X):
    
    """Calculate a fitness function to minimize 
       X1**2 + X2**2 + ... + Xn**2
    """
    decimal=[int(i,2) for i in X]
        
    s = sum([x**2 for x in decimal])
    return 1/(s+0.01)

def get_best_worst(population):
    
    """Find and return best and worst fitness functions along with corresponding
       vectors for a population, also return afrage fitness.
       Return format:
       ((best_f, best_x), (worst_f, worst_x), avrg_f)
    """
    best_f = 0
    best_x = None
    worst_f = 999999
    worst_x = None
    s = 0
    
    for x in population:
        f = fit_fun(x)
        if f > best_f:
            best_f = f
            best_x = x
        if f < worst_f:
            worst_f = f
            worst_x = x
        s += f
    avrg_f = s / len(population)                                
    return ((best_f, best_x), (worst_f, worst_x), avrg_f)           
    
def crossover(population, p_c):
    
    """Form a new population from an old one using crossover 
       with probability p_c
    """
    N = len(population)
    f = [fit_fun(x) for x in population]
        
    new_pop = []
    for _ in range(N//2):
        xx = random.choices(population, weights=f, k=2)
        if random.random() < p_c:
            crossover_gen(xx)
        new_pop += [xx[0][:], xx[1][:]] # add a copy
    return new_pop

def crossover_gen(x):
    
    """Do crossoving in the pair of chromosomes x[0] and x[1] using
       linear combination
    """
    # single point crossover
    
    # select a gen to crossover
    i = random.randrange(len(x[0]))
    s, t = x[0][i], x[1][i]
    
    x[0][i]=t
    x[1][i]=s
    # generate a random parameter
        
def mutate(population, p_m):
    
    """Mutate a population with probability p_m
    """
    new_pop = []
    for x in population:
        if random.random() < p_m:
            mutate_gen(x)
        new_pop.append(x)
    return new_pop
    
def mutate_gen(x):
    
    """Mutate a single chromosome using by flipping one random bit
    """
    # form the chromosome by concatinating the genes
    x_full=''
    for i in x:
        x_full+=i
    
    # select a random position
    j=random.randrange(1,len(x_full))

    # flip the bit
    x_list=list(x_full)

    if x_list[j]=='0':
        x_list[j]='1'
    else:
        x_list[j]='0'

    # mutated chromosome
    x=["".join(x_list[:15]),"".join(x_list[15:35]),"".join(x_list[35:])]
    
def get_best_of_run(results):
    
    """Find the best fitness funcion value and vector among all generations
       results is tuple return by get_best_wost funcion:
       ((best_f, best_x), (worst_f, worst_x), avrg_f)
    """
    the_best_f = 0
    the_best_x = None
    for (f, x), worst, avrg in results:
        if f > the_best_f:
            the_best_f = f
            the_best_x = x
    return the_best_f, the_best_x
                
def show_results(results):
    
    """Plot best, worst and average fitness value vs generation
       results is a list of tuples return by get_best_wost funcion:
       ((best_f, best_x), (worst_f, worst_x), avrg_f)
    """
    import matplotlib.pyplot as plt
    
    best = []
    worst = []
    avrg = []
    for (b, x), (w, x), a in results:
        best.append(b)
        worst.append(w)
        avrg.append(a)
    
    g = range(len(best))
    plt.plot(g, best, label="best")
    plt.plot(g, worst, label="worst")
    plt.plot(g, avrg, label="average")
    plt.legend()
    plt.xlabel('Generation')
    plt.ylabel('Fitness function')
    plt.show()
    
def do_ga_stats(nRuns, N, nGens, plot=False):
    
    """Do nRuns independent runs of GA and collect statistics
       nRuns a number of times to run
       N a size of population
       nGens a number of generations
       if plot is True show a graph
       Return list of best-of-generation avarage values, list of standad deviations,
       avarage value of the best-of-run fitness, standard deviation of the best-of-run
    """
    s = [0.0] * nGens       # sum of best-of_generation
    s2 = [0.0] * nGens      # sum of best-of_generation squares 
    bof = 0.0               # best-of-run
    bof2 = 0.0              # best-of-run square
    for _ in range(nRuns):
        (best_of_run, x), results = run_ga(N, nGens)
        bof += best_of_run
        bof2 += best_of_run**2
        for gen, result  in enumerate(results):
            (b, x), (w, x), a = result
            s[gen] += b
            s2[gen] += b**2
    
    avg = []
    std = []
    
    for i in range(nGens):
        avg.append(s[i]/nRuns)
        x = math.sqrt(s2[i]/nRuns - avg[i]**2)
        std.append(x)
   
    if plot:
        show_stat(avg, std)
    
    avg_bof = bof/nRuns
    std_bof = math.sqrt(bof2/nRuns - avg_bof**2)
    
    (b, x), (w, y), a = result
    
    return avg, std, avg_bof, std_bof,x
        
def show_stat(avg, std):
    
    """Plot average value of the best-of-generation and standard deviation
    """
    import matplotlib.pyplot as plt
    
    g = range(len(avg))
    plt.plot(g, avg)
    plt.xlabel('Generation')
    plt.ylabel('Avarage of fitness function')
    plt.show() 
    
    plt.figure()
    plt.plot(g, std)
    plt.xlabel('Generation')
    plt.ylabel('Standard deviation of fitness function')
    plt.show()       
random.seed(0)
avg, std, avg_bof, std_bof,vector = do_ga_stats(30, 30, 80, plot=False)
print(f'The average of the best-of-run fitness is {avg_bof}')
print(f'The corresponding standard deviation is {std_bof}')
print('\nbest-of-the-run vector:',[int(i,2) for i in vector])

Output:

The average of the best-of-run fitness is 60.379620051557396

The corresponding standard deviation is 48.524933538699585

best-of-the-run vector: [0, 0, 0]




For more details you can contact Us at:

realcode4you@gmail.com
bottom of page