Thursday, September 8, 2016

UVa 11107 - Life Forms

Accepted date: 2016-09-07
Run Time: 0.310
Ranking (as of 2016-09-08): 148 out of 486
Language: C++

Note that suffix array construction is based on a sample code that can be downloaded from Competitive Programming support materials site, with a few modifications.
The rest of the code in the main function is a variation of Longest Common Substring calculation.


/*
  UVa 11107 - Life Forms

  To build using Visual Studio 2012:
    cl -EHsc -O2 UVa_11107_Life_Forms.cpp
*/

#include <algorithm>
#include <map>
#include <cstdio>
#include <cstring>
using namespace std;

typedef pair<int, int> ii;

#define MAX_N 101010                         // second approach: O(n log n)
char T[MAX_N];                   // the input string, up to 100K characters
int n;                                        // the length of input string
int RA[MAX_N], tempRA[MAX_N];        // rank array and temporary rank array
int SA[MAX_N], tempSA[MAX_N];    // suffix array and temporary suffix array
int c[MAX_N];                                    // for counting/radix sort

char P[MAX_N];                  // the pattern string (for string matching)
int m;                                      // the length of pattern string

int Phi[MAX_N];                      // for computing longest common prefix
int PLCP[MAX_N];
int LCP[MAX_N];  // LCP[i] stores the LCP between previous suffix T+SA[i-1]
                                              // and current suffix T+SA[i]

bool cmp(int a, int b) { return strcmp(T + a, T + b) < 0; }      // compare

void constructSA_slow() {               // cannot go beyond 1000 characters
  for (int i = 0; i < n; i++) SA[i] = i; // initial SA: {0, 1, 2, ..., n-1}
  sort(SA, SA + n, cmp); // sort: O(n log n) * compare: O(n) = O(n^2 log n)
}

void countingSort(int k) {                                          // O(n)
  int i, sum, maxi = max(300, n);   // up to 255 ASCII chars or length of n
  memset(c, 0, sizeof c);                          // clear frequency table
  for (i = 0; i < n; i++)       // count the frequency of each integer rank
    c[i + k < n ? RA[i + k] : 0]++;
  for (i = sum = 0; i < maxi; i++) {
    int t = c[i]; c[i] = sum; sum += t;
  }
  for (i = 0; i < n; i++)          // shuffle the suffix array if necessary
    tempSA[c[SA[i]+k < n ? RA[SA[i]+k] : 0]++] = SA[i];
  for (i = 0; i < n; i++)                     // update the suffix array SA
    SA[i] = tempSA[i];
}

void constructSA() {         // this version can go up to 100000 characters
  int i, k, r;
  for (i = 0; i < n; i++) RA[i] = T[i];                 // initial rankings
  for (i = 0; i < n; i++) SA[i] = i;     // initial SA: {0, 1, 2, ..., n-1}
  for (k = 1; k < n; k <<= 1) {       // repeat sorting process log n times
    countingSort(k);  // actually radix sort: sort based on the second item
    countingSort(0);          // then (stable) sort based on the first item
    tempRA[SA[0]] = r = 0;             // re-ranking; start from rank r = 0
    for (i = 1; i < n; i++)                    // compare adjacent suffixes
      tempRA[SA[i]] = // if same pair => same rank r; otherwise, increase r
      (RA[SA[i]] == RA[SA[i-1]] && RA[SA[i]+k] == RA[SA[i-1]+k]) ? r : ++r;
    for (i = 0; i < n; i++)                     // update the rank array RA
      RA[i] = tempRA[i];
    if (RA[SA[n-1]] == n-1) break;               // nice optimization trick
} }

void computeLCP_slow() {
  LCP[0] = 0;                                              // default value
  for (int i = 1; i < n; i++) {                // compute LCP by definition
    int L = 0;                                       // always reset L to 0
    while (T[SA[i] + L] == T[SA[i-1] + L]) L++;      // same L-th char, L++
    LCP[i] = L;
} }

void computeLCP() {
  int i, L;
  Phi[SA[0]] = -1;                                         // default value
  for (i = 1; i < n; i++)                            // compute Phi in O(n)
    Phi[SA[i]] = SA[i-1];    // remember which suffix is behind this suffix
  for (i = L = 0; i < n; i++) {             // compute Permuted LCP in O(n)
    if (Phi[i] == -1) { PLCP[i] = 0; continue; }            // special case
    while (T[i + L] != '.' && T[i + L] == T[Phi[i] + L]) L++; // L increased max n times
    PLCP[i] = L;
    L = max(L-1, 0);                             // L decreased max n times
  }
  for (i = 0; i < n; i++)                            // compute LCP in O(n)
    LCP[i] = PLCP[SA[i]];   // put the permuted LCP to the correct position
}

ii stringMatching() {                      // string matching in O(m log n)
  int lo = 0, hi = n-1, mid = lo;              // valid matching = [0..n-1]
  while (lo < hi) {                                     // find lower bound
    mid = (lo + hi) / 2;                              // this is round down
    int res = strncmp(T + SA[mid], P, m);  // try to find P in suffix 'mid'
    if (res >= 0) hi = mid;        // prune upper half (notice the >= sign)
    else          lo = mid + 1;           // prune lower half including mid
  }                                      // observe `=' in "res >= 0" above
  if (strncmp(T + SA[lo], P, m) != 0) return ii(-1, -1);    // if not found
  ii ans; ans.first = lo;
  lo = 0; hi = n - 1; mid = lo;
  while (lo < hi) {            // if lower bound is found, find upper bound
    mid = (lo + hi) / 2;
    int res = strncmp(T + SA[mid], P, m);
    if (res > 0) hi = mid;                              // prune upper half
    else         lo = mid + 1;            // prune lower half including mid
  }                           // (notice the selected branch when res == 0)
  if (strncmp(T + SA[hi], P, m) != 0) hi--;                 // special case
  ans.second = hi;
  return ans;
} // return lower/upperbound as first/second item of the pair, respectively

ii LRS() {                 // returns a pair (the LRS length and its index)
  int i, idx = 0, maxLCP = -1;
  for (i = 1; i < n; i++)                         // O(n), start from i = 1
    if (LCP[i] > maxLCP)
      maxLCP = LCP[i], idx = i;
  return ii(maxLCP, idx);
}

const int nr_life_forms_max = 100, nr_chars_max = 1000;
int nr_life_forms, lengths[nr_life_forms_max], counters[nr_life_forms_max];
int Owners[MAX_N];

int owner(int idx)
{
  for (int i = 0, length = lengths[i] + 1; ; i++, length += lengths[i] + 1) {
    if (idx < length - 1)
      return i;
    else if (idx == length - 1)
      return -1;
  }
}

/*
int owner(int idx) { return (idx < n-m-1) ? 1 : 2; }
*/

ii LCS() {                 // returns a pair (the LCS length and its index)
  int i, idx = 0, maxLCP = -1;
  for (i = 1; i < n; i++)                         // O(n), start from i = 1
    if (owner(SA[i]) != owner(SA[i-1]) && LCP[i] > maxLCP)
      maxLCP = LCP[i], idx = i;
  return ii(maxLCP, idx);
}

int main()
{
  bool printed = false;
  while (true) {
    scanf("%d", &nr_life_forms);
    if (!nr_life_forms)
      break;
    if (printed)
      putchar('\n');
    else
      printed = true;
    if (nr_life_forms == 1) {
      scanf("%s", T);
      puts(T);
      continue;
    }
    n = 0;
    for (int i = 0; i < nr_life_forms; i++) {
      scanf("%s", T + n);
      lengths[i] = strlen(T + n);
      n += lengths[i];
      T[n++] = '.';
    }
    T[n] = '\0';
    constructSA(); // O(n log n)
    computeLCP(); // O(n)
    for (int i = 0; i < n; i++)
      Owners[i] = owner(SA[i]);
#ifdef DEBUG
    printf("\nThe LCP information of '%s':\n", T);
    printf("i     SA[i] LCP[i] Owner Suffix\n");
    for (int k = 0; k < n; k++)
      printf("%4d  %4d  %4d   %4d  %s\n", k, SA[k], LCP[k], Owners[k], T + SA[k]);
#endif
    int lcs_length = 0;
    memset(counters, 0, sizeof(counters));
    for (int i = nr_life_forms, j = nr_life_forms, total = 0; j < n; ) {
      if (total <= nr_life_forms / 2) {
        if (!counters[Owners[j++]]++)
          ++total;
      }
      if (total > nr_life_forms / 2) {
        lcs_length = max(lcs_length, LCP[min_element(LCP + i + 1, LCP + j) - LCP]);
#ifdef DEBUG
        printf("LCS [%d, %d): %d\n", i, j, lcs_length);
#endif
        if (!--counters[Owners[i++]])
          --total;
      }
    }
    if (!lcs_length) {
      puts("?");
      continue;
    }
    int psa = -1;
    memset(counters, 0, sizeof(counters));
    for (int i = nr_life_forms, j = nr_life_forms, total = 0; j < n; ) {
      if (total <= nr_life_forms / 2) {
        if (!counters[Owners[j++]]++)
          ++total;
      }
      if (total > nr_life_forms / 2) {
        int k = min_element(LCP + i + 1, LCP + j) - LCP;
        if (LCP[k] == lcs_length &&
          (psa == -1 || strncmp(T + psa, T + SA[k], lcs_length))) {
          psa = SA[k];
          char c = T[psa + lcs_length];
          T[psa + lcs_length] = '\0';
#ifdef DEBUG
          printf("LCS [%d, %d): %s\n", i, j, T + psa);
#else
          puts(T + psa);
#endif
          T[psa + lcs_length] = c;
        }
        if (!--counters[Owners[i++]])
          --total;
      }
    }
  }
  return 0;
}

1 comment: