Run Time: 0.310
Ranking (as of 2016-09-08): 148 out of 486
Language: C++
Note that suffix array construction is based on a sample code that can be downloaded from Competitive Programming support materials site, with a few modifications.
The rest of the code in the main function is a variation of Longest Common Substring calculation.
/* UVa 11107 - Life Forms To build using Visual Studio 2012: cl -EHsc -O2 UVa_11107_Life_Forms.cpp */ #include <algorithm> #include <map> #include <cstdio> #include <cstring> using namespace std; typedef pair<int, int> ii; #define MAX_N 101010 // second approach: O(n log n) char T[MAX_N]; // the input string, up to 100K characters int n; // the length of input string int RA[MAX_N], tempRA[MAX_N]; // rank array and temporary rank array int SA[MAX_N], tempSA[MAX_N]; // suffix array and temporary suffix array int c[MAX_N]; // for counting/radix sort char P[MAX_N]; // the pattern string (for string matching) int m; // the length of pattern string int Phi[MAX_N]; // for computing longest common prefix int PLCP[MAX_N]; int LCP[MAX_N]; // LCP[i] stores the LCP between previous suffix T+SA[i-1] // and current suffix T+SA[i] bool cmp(int a, int b) { return strcmp(T + a, T + b) < 0; } // compare void constructSA_slow() { // cannot go beyond 1000 characters for (int i = 0; i < n; i++) SA[i] = i; // initial SA: {0, 1, 2, ..., n-1} sort(SA, SA + n, cmp); // sort: O(n log n) * compare: O(n) = O(n^2 log n) } void countingSort(int k) { // O(n) int i, sum, maxi = max(300, n); // up to 255 ASCII chars or length of n memset(c, 0, sizeof c); // clear frequency table for (i = 0; i < n; i++) // count the frequency of each integer rank c[i + k < n ? RA[i + k] : 0]++; for (i = sum = 0; i < maxi; i++) { int t = c[i]; c[i] = sum; sum += t; } for (i = 0; i < n; i++) // shuffle the suffix array if necessary tempSA[c[SA[i]+k < n ? RA[SA[i]+k] : 0]++] = SA[i]; for (i = 0; i < n; i++) // update the suffix array SA SA[i] = tempSA[i]; } void constructSA() { // this version can go up to 100000 characters int i, k, r; for (i = 0; i < n; i++) RA[i] = T[i]; // initial rankings for (i = 0; i < n; i++) SA[i] = i; // initial SA: {0, 1, 2, ..., n-1} for (k = 1; k < n; k <<= 1) { // repeat sorting process log n times countingSort(k); // actually radix sort: sort based on the second item countingSort(0); // then (stable) sort based on the first item tempRA[SA[0]] = r = 0; // re-ranking; start from rank r = 0 for (i = 1; i < n; i++) // compare adjacent suffixes tempRA[SA[i]] = // if same pair => same rank r; otherwise, increase r (RA[SA[i]] == RA[SA[i-1]] && RA[SA[i]+k] == RA[SA[i-1]+k]) ? r : ++r; for (i = 0; i < n; i++) // update the rank array RA RA[i] = tempRA[i]; if (RA[SA[n-1]] == n-1) break; // nice optimization trick } } void computeLCP_slow() { LCP[0] = 0; // default value for (int i = 1; i < n; i++) { // compute LCP by definition int L = 0; // always reset L to 0 while (T[SA[i] + L] == T[SA[i-1] + L]) L++; // same L-th char, L++ LCP[i] = L; } } void computeLCP() { int i, L; Phi[SA[0]] = -1; // default value for (i = 1; i < n; i++) // compute Phi in O(n) Phi[SA[i]] = SA[i-1]; // remember which suffix is behind this suffix for (i = L = 0; i < n; i++) { // compute Permuted LCP in O(n) if (Phi[i] == -1) { PLCP[i] = 0; continue; } // special case while (T[i + L] != '.' && T[i + L] == T[Phi[i] + L]) L++; // L increased max n times PLCP[i] = L; L = max(L-1, 0); // L decreased max n times } for (i = 0; i < n; i++) // compute LCP in O(n) LCP[i] = PLCP[SA[i]]; // put the permuted LCP to the correct position } ii stringMatching() { // string matching in O(m log n) int lo = 0, hi = n-1, mid = lo; // valid matching = [0..n-1] while (lo < hi) { // find lower bound mid = (lo + hi) / 2; // this is round down int res = strncmp(T + SA[mid], P, m); // try to find P in suffix 'mid' if (res >= 0) hi = mid; // prune upper half (notice the >= sign) else lo = mid + 1; // prune lower half including mid } // observe `=' in "res >= 0" above if (strncmp(T + SA[lo], P, m) != 0) return ii(-1, -1); // if not found ii ans; ans.first = lo; lo = 0; hi = n - 1; mid = lo; while (lo < hi) { // if lower bound is found, find upper bound mid = (lo + hi) / 2; int res = strncmp(T + SA[mid], P, m); if (res > 0) hi = mid; // prune upper half else lo = mid + 1; // prune lower half including mid } // (notice the selected branch when res == 0) if (strncmp(T + SA[hi], P, m) != 0) hi--; // special case ans.second = hi; return ans; } // return lower/upperbound as first/second item of the pair, respectively ii LRS() { // returns a pair (the LRS length and its index) int i, idx = 0, maxLCP = -1; for (i = 1; i < n; i++) // O(n), start from i = 1 if (LCP[i] > maxLCP) maxLCP = LCP[i], idx = i; return ii(maxLCP, idx); } const int nr_life_forms_max = 100, nr_chars_max = 1000; int nr_life_forms, lengths[nr_life_forms_max], counters[nr_life_forms_max]; int Owners[MAX_N]; int owner(int idx) { for (int i = 0, length = lengths[i] + 1; ; i++, length += lengths[i] + 1) { if (idx < length - 1) return i; else if (idx == length - 1) return -1; } } /* int owner(int idx) { return (idx < n-m-1) ? 1 : 2; } */ ii LCS() { // returns a pair (the LCS length and its index) int i, idx = 0, maxLCP = -1; for (i = 1; i < n; i++) // O(n), start from i = 1 if (owner(SA[i]) != owner(SA[i-1]) && LCP[i] > maxLCP) maxLCP = LCP[i], idx = i; return ii(maxLCP, idx); } int main() { bool printed = false; while (true) { scanf("%d", &nr_life_forms); if (!nr_life_forms) break; if (printed) putchar('\n'); else printed = true; if (nr_life_forms == 1) { scanf("%s", T); puts(T); continue; } n = 0; for (int i = 0; i < nr_life_forms; i++) { scanf("%s", T + n); lengths[i] = strlen(T + n); n += lengths[i]; T[n++] = '.'; } T[n] = '\0'; constructSA(); // O(n log n) computeLCP(); // O(n) for (int i = 0; i < n; i++) Owners[i] = owner(SA[i]); #ifdef DEBUG printf("\nThe LCP information of '%s':\n", T); printf("i SA[i] LCP[i] Owner Suffix\n"); for (int k = 0; k < n; k++) printf("%4d %4d %4d %4d %s\n", k, SA[k], LCP[k], Owners[k], T + SA[k]); #endif int lcs_length = 0; memset(counters, 0, sizeof(counters)); for (int i = nr_life_forms, j = nr_life_forms, total = 0; j < n; ) { if (total <= nr_life_forms / 2) { if (!counters[Owners[j++]]++) ++total; } if (total > nr_life_forms / 2) { lcs_length = max(lcs_length, LCP[min_element(LCP + i + 1, LCP + j) - LCP]); #ifdef DEBUG printf("LCS [%d, %d): %d\n", i, j, lcs_length); #endif if (!--counters[Owners[i++]]) --total; } } if (!lcs_length) { puts("?"); continue; } int psa = -1; memset(counters, 0, sizeof(counters)); for (int i = nr_life_forms, j = nr_life_forms, total = 0; j < n; ) { if (total <= nr_life_forms / 2) { if (!counters[Owners[j++]]++) ++total; } if (total > nr_life_forms / 2) { int k = min_element(LCP + i + 1, LCP + j) - LCP; if (LCP[k] == lcs_length && (psa == -1 || strncmp(T + psa, T + SA[k], lcs_length))) { psa = SA[k]; char c = T[psa + lcs_length]; T[psa + lcs_length] = '\0'; #ifdef DEBUG printf("LCS [%d, %d): %s\n", i, j, T + psa); #else puts(T + psa); #endif T[psa + lcs_length] = c; } if (!--counters[Owners[i++]]) --total; } } } return 0; }
Very wise idea bro
ReplyDelete