TSP-Genetic Algorithm

来源:互联网 发布:易酷cms 编辑:程序博客网 时间:2024/06/03 02:21

以前从来没玩过遗传算法,这次是逼上梁山,因为RNNA实在太简单,没法写出上千字的report。实在是为了算法而算法,并不是为了得到什么更优的解。。。汗自己的学习态度一下。。。anyway,GA确实是可以得到一个比较优的解,由于时间关系,这次我没有认真研究它,只是囫囵吞枣,得了点皮毛中的皮毛而已。理解错了,请大侠指正。

先说无性繁殖,无性繁殖很简单拉,其中一种叫local search。就是把找到的路径取其中一段出来,交换其中点的位置,看看能不能得到更优的子路径。其实本质上就是暴力求解,只是因为500个城市太过庞大,500!比天文数字还天文数字。而取一段通常只有几个点,即使全排列也是可以接受的,但还是有极限,超过10个点就不太行了,10!已经是300w+,差不多到机器极限拉。

这里是一个local search的sample,在RNNA的结果上进行优化。结果差不多是184k+,比RNNA好一点了吧。。。

/* 500 cities Hamilton Path */
/* Repetitive Nearest Neighbour Algorithm (RNNA) */
/* with Local Search */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#define CITYSIZE 500
/* 15000 > distance(0,0,10000,10000) */
#define MAXDIST  15000
/* 7500000 > 15000 * (500 - 1) */
#define MAXTOTAL 7500000
/* local search factor
 * don't be greedy, greater number causes deeper recursive
 */
#define STEP 11

/* cities informatin */
typedef struct S_c {
 int id;
 int x;
 int y;
 int visited;
} CITY;

/* a linked-list record local path generated by perm() */
struct S_path {
 int* path;
 struct S_path* next;
 struct S_path* prev;
} *head, *tail;

int tour[CITYSIZE],       // best path
 weight[CITYSIZE][CITYSIZE]={0},   // distances between cities
 total = MAXTOTAL;      // best path length


void showCity(CITY*,int);     // show city coordinate,for test
void showDist(int[][CITYSIZE],int);   // show distances,for test
void showPath(int*,int,int);    // show found path
double distance(int,int,int,int);   // distance formula
void perm(int*,int,int,int);    // generate a permutation
void swap(int*,int*);      // for perm()
void localSearch(int);
int findrepeat();       // check if result valid
int weightOf(int* array);

int main(){
 time_t start, done;
 time(&start);
 FILE *fin;
 int i, j;
 CITY city[CITYSIZE];
 int tmptour[CITYSIZE];

 fin = fopen("cities.dat", "r");
 
 /* read cities locations */
 char dump;  // useless information, used to omit ':'
 for(i = 0; i < CITYSIZE; ++i){
  fscanf(fin, "%d %c %d", &city[i].id, &dump, &city[i].x);
  fscanf(fin, "%c %d", &dump, &city[i].y);
 }
 
 /* calculate cities distance */
 for(i = 0; i < CITYSIZE; ++i){
     for(j = 0; j < CITYSIZE; ++j){
      if(i < j){
          weight[i][j] = (int)distance(city[i].x, city[i].y,
                                    city[j].x, city[j].y);
          weight[j][i] = weight[i][j];
      }//if end
     }//for end
 }// for end
 
 int tmptotal = MAXTOTAL, startCty = 0;
 for(; startCty < CITYSIZE; ++startCty){
  /* if found better path */
  if(tmptotal < total){
   total = tmptotal;
   memcpy(tour, tmptour, sizeof(tour));
   showPath(tour, CITYSIZE, total);
  }
  
  /* initialization */
  tmptotal = 0;
  for(i = 0; i < CITYSIZE; ++i) city[i].visited = 0;
  
  int nextCty, currCty = startCty;
  tmptour[0] = startCty;
  city[startCty].visited = 1;
  
  for(i = 1; i < CITYSIZE; ++i){
   int cost = MAXDIST;
   for(j = 0; j < CITYSIZE; ++j){
    if((cost > weight[currCty][j]) && // currCty~j is better and
       (!city[j].visited)  &&         // j hasn't been visited and
       (j != currCty)){               // j is not currCty
        cost = weight[currCty][j];
        nextCty = j;
    }// if end
   }// for end

   // step to next city
   currCty = nextCty;
   // add this city into path
   tmptour[i] = currCty;
   city[currCty].visited = 1;
   // accumulate path length
   tmptotal += cost;
  }// for end
 }// for end
 
 /* RNNA finished here, local search start */
 
 /* TODO: connect the start city with end city to form a circle path,
  * then break the longest one in the loop.
  */

 head = (struct S_path*)malloc(sizeof(struct S_path));
 tail = (struct S_path*)malloc(sizeof(struct S_path));
 head->next = tail;
 head->prev = NULL;
 tail->prev = head;
 tail->next = NULL;

 int step = 3;
 for(; step < STEP; step++){
  localSearch(step);
 }
 
 time(&done);
 printf("execution time = %d seconds/n",done - start);
}

void showDist(int array[][CITYSIZE], int size){
 int i, j;
 for(i = 0; i < size; ++i){
  for(j = 0; j < size; ++j){
   printf("%4d,", array[i][j]);
  }
  printf("/n");
 }
}

void showCity(CITY* array, int size){
 int i; 
 for(i = 0; i < size; ++i){
  printf("%d:%d:%d:%d/n", array[i].id,
        array[i].x,
        array[i].y,
        array[i].visited);
 }// for end
}

void showPath(int* array, int size, int total){
 int i;
 if(findrepeat()){printf("Wrong answer!/n");exit(0);}
 if(total!=weightOf(array)){
  printf("wrong answer!/n");exit(0);}
 printf("%d:", total);
 for(i = 0; i < size - 1; ++i) printf("%d,", array[i]);
 printf("%d/n", array[i]);
}

double distance(int x1, int y1, int x2, int y2){
 return sqrt((x1-x2)*(x1-x2)+(y1-y2)*(y1-y2));
}

void localSearch(int step){
 int i = 0;
 
 for(; (i + step) < CITYSIZE; i = i + step){
  int j = 0;
  int sublength = 0, newweight = 0, currBest;
  for(; j < step; ++j) sublength += weight[tour[i+j]][tour[i+j+1]];
  currBest = sublength;
  /* for sequence 01234, we need a permutation of p(123) only.
   * so, the lower and upper bound would be i + 1 and i + step - 1.
   */
  //printf("perm(%d~%d):/n", i+1,i+step-1);
  perm(tour, i + 1, i + step - 1, i + 1);
  // check all new path in permutation.
  struct S_path* onePath = tail->prev;
  while(onePath != head){
   onePath->prev->next = tail;
   tail->prev = onePath->prev;
   newweight = weight[tour[i]][onePath->path[0]] +
               weight[onePath->path[step-2]][tour[i+step]];
   for(j = 0; j < step - 2; ++j)
    newweight += weight[onePath->path[j]][onePath->path[j+1]];
   if(newweight < currBest){
    printf("better route found:%d, original:%d/n", newweight, currBest);
    currBest = newweight;
    // update tour
    printf("this path is: %d,", i);
    for(j = 0; j < step - 1; ++j)
     printf("%d,",onePath->path[j]);
    printf("%d/n", i+step);
    memcpy(tour+i+1,onePath->path,sizeof(int)*(step-1));
    if(findrepeat()){printf("bingo!/n");exit(0);}
   }
   free(onePath->path);
   free(onePath);
   onePath = tail->prev;
  }// while end
  // update total length
  total = total - sublength + currBest;
 }// for end
 showPath(tour,CITYSIZE, total);
}

/* variable 'original' record the start point(lower bound) in array,
 * used for output, when invoke perm(), original and k is same.
 */
void perm(int* list, int k, int m, int original){
 int i, j;
 if(k > m){// exist a permutation...
  struct S_path* myPath = (struct S_path*)malloc(sizeof(struct S_path));
  struct S_path* tmp = head->next;
  myPath->path = (int*)malloc(sizeof(int)*(m - original + 1));
  head->next = myPath;
  myPath->prev = head;
  myPath->next = tmp;
  tmp->prev = myPath;
  for(i = original; i <= m; ++i) {myPath->path[i-original] = list[i];}
 } else {
  for(j = k; j <= m; ++j){
   swap(&list[k], &list[j]);
   perm(list, k + 1, m, original);
   swap(&list[k], &list[j]);
  }
 }
}

void swap(int* a, int* b){
 int temp = *a;
 *a = *b;
 *b = temp;
}

int findrepeat(){
 int i, j;
 for(i = 0; i < CITYSIZE; ++i){
  int tmp = 0;
  for(j = 0; j < CITYSIZE; ++j) if(i == tour[j]) tmp++;
  if(tmp > 1){printf("error: repeat city!/n");return 1;}
 }
 return 0;
}

int weightOf(int* array){
 int w = 0, i = 0;
 for(; i < CITYSIZE - 1; ++i)
  w += weight[array[i]][array[i+1]];
 return w;
}

接下来,说说有性繁殖。这里谈的是最最简单的情况。有性繁殖包括crossover和mutation。 crossover就是取两条路径,交换他们的一部分,产生两条新路径看看有没有更好的结果。mutation则是在crossover的基础上,随机的产生一定概率的突变,说不定会得到一个更优解或者更优的parent,从而在下一代里得到更优解。本质上。。。就是撞大运拉。。。比较sophisticated的GA算法会在crossover和mutation时采用一些复杂的策略, 不过整体上都是一样的。

这里给出一个crossover的sample,连mutation都不考虑

void ga(int separator){
 time_t seed;
 time(&seed);
 srand(seed);

 int gen = 0;
 for(; gen < 1000000; ++gen){
  printf("epoch %d:/n", gen);
  rand(); // dump first number
  
  // randomly pick two path index
  int index1 = (int)((double)rand()*500/RAND_MAX);
  int index2 = (int)((double)rand()*500/RAND_MAX);

  // get the two paths copies
  int path1[CITYSIZE], path2[CITYSIZE];
  memcpy(path1, all500paths[index1], sizeof(path1));
  memcpy(path2, all500paths[index2], sizeof(path2));

  // exchange two fragments to get next epoch
  int* frag1 = (int*)malloc(sizeof(int)*separator);
  memcpy(frag1, path1, sizeof(int)*separator);
  memcpy(path1, path2, sizeof(int)*separator);
  memcpy(path2, frag1, sizeof(int)*separator);
  free(frag1);

  // re-generate path1 & path2
  int i, check1[CITYSIZE] = {0}, check2[CITYSIZE] = {0};
  // separator~CITYSIZE-1 is original path, doesn't changed
  for(i = separator; i < CITYSIZE; ++i){
   check1[path1[i]]++;
   check2[path2[i]]++;
  }

  // check new part in path
  // 0~seprator-1 is new part copied from the other one
  for(i = 0; i < separator; ++i){
   if(check1[path1[i]]){
    int j = 0;
    while(check1[j])j++; // step to first non-zero position
    check1[j]++;
    path1[i] = j;
   }else check1[path1[i]]++;
  
   if(check2[path2[i]]){
    int j = 0;
    while(check2[j])j++;
    check2[j]++;
    path2[i] = j;
   }else check2[path2[i]]++;
  }

  int mytotal1 = 0, mytotal2 = 0;
  // calculate new path length
  for(i = 0; i < CITYSIZE - 1; ++i){
   mytotal1 += weight[path1[i]][path1[i+1]];
   mytotal2 += weight[path2[i]][path2[i+1]];
  }
  // update if better route found
  if(mytotal1 < all500weights[path1[0]]){
   printf("better found in path%d: %d/n", path1[0], mytotal1);
   all500weights[path1[0]] = mytotal1;
   memcpy(all500paths[path1[0]], path1, sizeof(int)*CITYSIZE);
  }
  if(mytotal2 < all500weights[path2[0]]){
   printf("better found in path%d: %d/n", path2[0], mytotal2);
   all500weights[path2[0]] = mytotal2;
   memcpy(all500paths[path2[0]], path2, sizeof(int)*CITYSIZE);
  }
 }// for end, one epoch end
}


check[]数组用来再交换后检查重复的city,如果发现city重复了,就用check[]里第一个为零的city来替换,all500paths保存了用NNA产生的路径,all500weights时相应的路径长度。separator标明从哪里断开来产生下一代。

如果想得到更随机的结果,separator本身也是可以是一个随机数。另外,直接在500条路径用ga似乎并不是一个很好的population,在上面选几十条比较短的路径来做初始的population可能会比较好。我没有试过,不敢乱说拉。上面的代码跑1000w代也跑不进19w,汗。。。。我的ga好烂。。。。

还有一种思路完全不同的算法,Ant Colony Optimization,蚁群优化。想出来的人是通过观察蚂蚁觅食得到的灵感。原理是蚂蚁在经过的路上会留下一种叫信息素的东东,就当它是一种气味吧。蚂蚁会根据气味的浓淡来决定走哪条路线。某条路上味道越浓,蚂蚁挑这条路线的概率越大。听上去很简单,也很不可思议吧。实事证明,ACO的效果比GA不遑多让。可惜,算法和原理是两码事,我还没看明白。所以也给不出代码拉。。。hoho,有时间再说吧。

这里有关TSP,GA和ACO的算法都是来自internet,只有代码是原创。写给自己看,就不写reference了。