/* Fundamental string processing from Gusfield, Algorithms on Strings, Trees, and Sequences: Computer Science and Computational Biology, pages 8-10 */ /* Note: C string conventions are used instead of Pascal as in Gusfield Note: '$' is used as a distinguished character and should not be used in input strings to application functions */ /* BPW, 5/2/98 */ #include #include #define ONE (1) char S[1001]; int sLength; int Z[1001]; /* Given a string S and a position i, Z[i] is the length of the longest substring of S that starts at i and matches a prefix of S. */ void fundamental() { int i,k,kprime,l,r,count,betaLength; char marker[1001]; printf("fundamental: %s\n",S); sLength=strlen(S); if (sLength==0) return; Z[0]=sLength; printf("S[0]='%c': Z[0]=%d\n",S[0],Z[0]); if (sLength==ONE) return; /* Find Z[1] */ for (i=ONE;S[i]==S[i-ONE];i++) ; Z[ONE]=i-ONE; printf("S[1]='%c': Z[1]=%d\n",S[ONE],Z[ONE]); if (Z[ONE]>0) { r=Z[ONE]; l=ONE; } else r=l=(-ONE); /* Find other Z[k] */ for (k=2;kr) { printf("S[%d]='%c': case 1 (search for mismatch)\n",k,S[k]); for (count=0;S[k+count]==S[count];count++) ; printf("S[%d]='%c': case 1 (Z[%d]=%d)\n",k,S[k],k,count); Z[k]=count; if (Z[k]>0) { r=k+Z[k]-ONE; l=k; printf("S[%d]='%c': case 1 (l=%d r=%d)\n",k,S[k],l,r); } else { printf("S[%d]='%c': case 1 (l=-1 r=-1)\n",k,S[k]); r=l=(-ONE); } } else /* k<=r */ { kprime=k-l; betaLength=r-k+ONE; printf("S[%d]='%c': case 2 (Z[kprime=%d]=%d ? betaLength=%d)\n", k,S[k],kprime,Z[kprime],betaLength); if (Z[kprime]=betaLength */ { for (count=ONE;S[r+count]==S[betaLength+count-ONE];count++) ; Z[k]=r+count-k; r=r+count-ONE; l=k; printf("S[%d]='%c': case 2b (? is >= scan %d chars Z[%d]=%d l=%d r=%d)\n", k,S[k],count,k,Z[k],l,r); } } } /* Print results */ for (i=0;i