// Package genetic provides functions to work with strings
// using genetic algorithm. https://en.wikipedia.org/wiki/Genetic_algorithm
//
// Author: D4rkia
package genetic
import (
"errors"
"fmt"
"math/rand"
"sort"
"strconv"
"time"
"unicode/utf8"
)
// Population item represent a single step in the evolution process.
// One can think of population item as a single species.
// Key stands for the actual data entity of the species, which is a string
// in current implementation. Key can be interpreted as species DNA.
// Value shows how close this species to the desired target, where 1 means,
// that species DNA equals to the targeted one, 0 for no matchings in the DNA.
//
// **Note** In the current implementation species DNA length is suppose to be
// equal to the target length for algorithm to work.
type PopulationItem struct {
Key string
Value float64
}
// Conf stands for cofigurations set provided to GeneticString function.
type Conf struct {
// Maximum size of the population.
// Bigger could be faster but more memory expensive.
PopulationNum int
// Number of elements selected in every generation for evolution
// the selection takes. Place from the best to the worst of that
// generation must be smaller than PopulationNum.
SelectionNum int
// Probability that an element of a generation can mutate changing one of
// its genes this guarantees that all genes will be used during evolution.
MutationProb float64
// Enables debugging output to the console.
Debug bool
}
// Result structure contains generation process statistics, as well as the
// best resulted population item.
type Result struct {
// Number of generations steps performed.
Generation int
// Number of generated population items.
Analyzed int
// Result of generation with the best Value.
Best PopulationItem
}
// GeneticString generates PopulationItem based on the imputed target
// string, and a set of possible runes to build a string with. In order
// to optimise string generation additional configurations can be provided
// with Conf instance. Empty instance of Conf (&Conf{}) can be provided,
// then default values would be set.
//
// Link to the same algorithm implemented in python:
// https://github.com/TheAlgorithms/Python/blob/master/genetic_algorithm/basic_string.py
func GeneticString(target string, charmap []rune, conf *Conf) (*Result, error) {
populationNum := conf.PopulationNum
if populationNum == 0 {
populationNum = 200
}
selectionNum := conf.SelectionNum
if selectionNum == 0 {
selectionNum = 50
}
// Verify if 'populationNum' s bigger than 'selectionNum'
if populationNum < selectionNum {
return nil, errors.New("populationNum must be bigger than selectionNum")
}
mutationProb := conf.MutationProb
if mutationProb == .0 {
mutationProb = .4
}
debug := conf.Debug
// Just a seed to improve randomness required by the algorithm
rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
// Verify that the target contains no genes besides the ones inside genes variable.
for position, r := range target {
invalid := true
for _, n := range charmap {
if n == r {
invalid = false
}
}
if invalid {
message := fmt.Sprintf("character not available in charmap at position: %v", position)
return nil, errors.New(message)
}
}
// Generate random starting population
pop := make([]PopulationItem, populationNum)
for i := 0; i < populationNum; i++ {
key := ""
for x := 0; x < utf8.RuneCountInString(target); x++ {
choice := rnd.Intn(len(charmap))
key += string(charmap[choice])
}
pop[i] = PopulationItem{key, 0}
}
// Just some logs to know what the algorithms is doing
gen, generatedPop := 0, 0
// This loop will end when we will find a perfect match for our target
for {
gen++
generatedPop += len(pop)
// Random population created now it's time to evaluate
for i, item := range pop {
pop[i].Value = 0
itemKey, targetRune := []rune(item.Key), []rune(target)
for x := 0; x < len(target); x++ {
if itemKey[x] == targetRune[x] {
pop[i].Value++
}
}
pop[i].Value = pop[i].Value / float64(len(targetRune))
}
sort.SliceStable(pop, func(i, j int) bool { return pop[i].Value > pop[j].Value })
// Check if there is a matching evolution
if pop[0].Key == target {
break
}
// Print the best resultPrint the Best result every 10 generations
// just to know that the algorithm is working
if debug && gen%10 == 0 {
fmt.Println("Generation:", strconv.Itoa(gen), "Analyzed:", generatedPop, "Best:", pop[0])
}
// Generate a new population vector keeping some of the best evolutions
// Keeping this avoid regression of evolution
var popChildren []PopulationItem
popChildren = append(popChildren, pop[0:int(selectionNum/3)]...)
// This is Selection
for i := 0; i < int(selectionNum); i++ {
parent1 := pop[i]
// Generate more child proportionally to the fitness score
nChild := (parent1.Value * 100) + 1
if nChild >= 10 {
nChild = 10
}
for x := 0.0; x < nChild; x++ {
parent2 := pop[rnd.Intn(selectionNum)]
// Crossover
split := rnd.Intn(utf8.RuneCountInString(target))
child1 := append([]rune(parent1.Key)[:split], []rune(parent2.Key)[split:]...)
child2 := append([]rune(parent2.Key)[:split], []rune(parent1.Key)[split:]...)
// Clean fitness value
// Mutate
if rnd.Float64() < mutationProb {
child1[rnd.Intn(len(child1))] = charmap[rnd.Intn(len(charmap))]
}
if rnd.Float64() < mutationProb {
child2[rnd.Intn(len(child2))] = charmap[rnd.Intn(len(charmap))]
}
// Push into 'popChildren'
popChildren = append(popChildren, PopulationItem{string(child1), 0})
popChildren = append(popChildren, PopulationItem{string(child2), 0})
// Check if the population has already reached the maximum value and if so,
// break the cycle. If this check is disabled the algorithm will take
// forever to compute large strings but will also calculate small string in
// a lot fewer generationsù
if len(popChildren) >= selectionNum {
break
}
}
}
pop = popChildren
}
return &Result{gen, generatedPop, pop[0]}, nil
}