// Demonstrates Longest Common Prefix based manipulations for suffix arrays // 6/1/07 BPW #include #include #include char s[1000000]; int n, // length of s[] including \0 sa[1000000], // suffix array for s[] rank[1000000], // rank[i] gives the rank (subscript) of s[i] in sa[] lcp[1000000]; // lcp[i] indicates the number of identical prefix symbols // for s[sa[i-1]] and s[sa[i]] int suffixCompare(const void *xVoidPt,const void *yVoidPt) { // Used in qsort call to generate suffix array. int *xPt=(int*) xVoidPt,*yPt=(int*) yVoidPt; return strcmp(&s[*xPt],&s[*yPt]); } void computeRank() { // Computes rank as the inverse permutation of the suffix array int i; for (i=0;i=lcp[rank[i-1]]-1 for (i=0;i0) h--; // Decrease according to result } } int slowSearch(char* key) { // Finds string assisted by binary search on suffix array, // but without using lcp. int low,high,mid; //int cmpCount=0; int i,j; low=0; high=n-1; while (low<=high) { mid=(low+high)/2; j=sa[mid]; // Position in s i=0; // Position in key //cmpCount++; // Like strcmp while (s[j]==key[i] && key[i]) { j++; i++; //cmpCount++; } if (key[i]==0) { //printf("slowSearch used %d char comparisons\n",cmpCount); return mid; // key found } if (s[j]= key assisted by binary search on suffix array, // but without using lcp. int low,high,mid; //int cmpCount=0; int i,j; low=0; high=n-1; while (low<=high) { mid=(low+high)/2; j=sa[mid]; // Position in s i=0; // Position in key //cmpCount++; // Like strcmp while (s[j]==key[i] && key[i]) { j++; i++; //cmpCount++; } if (key[i]==0 || s[j]>key[i]) high=mid-1; else low=mid+1; } //printf("slowSearchFirst used %d char comparisons\n",cmpCount); return low; } int slowSearchLast(char* key) { // Finds last string <= key assisted by binary search on suffix array, // but without using lcp. int low,high,mid; //int cmpCount=0; int i,j; low=0; high=n-1; while (low<=high) { mid=(low+high)/2; j=sa[mid]; // Position in s i=0; // Position in key //cmpCount++; // Like strcmp while (s[j]==key[i] && key[i]) { j++; i++; //cmpCount++; } if (key[i]==0 || s[j]= key assisted by binary search on suffix array. // Uses lcp to limit char comparisons. int low,high,mid; //int cmpCount=0; int i,j; int keyLength,lowMatches,highMatches,midMatches; keyLength=strlen(key); low=0; lowMatches=0; high=n-1; highMatches=0; while (low<=high) { mid=(low+high)/2; // How many strcmp matches can be salvaged? midMatches=(lowMatcheskey[i]) { high=mid-1; highMatches=(lcp[mid]