后缀自动机(SAM)



前言

本来以为能20号发,太高估自己了 T^T,个人目前对于SAM的理解可能连60%都不到 QAQ
本文解题都在HihoCoder有,各位有需要自己到HihoCoder看解题提示,篇幅有限(主要是懒)就没放在里面

  1. SAM
    HihoCoder
    https://blog.csdn.net/wmdcstdio/article/details/44780707
    https://kyleyoung-ymj.github.io/Suffix-Automaton/
    https://wenku.baidu.com/view/90f22eec551810a6f4248606.html

hihoCoder 1445 : 后缀自动机二·重复旋律5

  #include <cstdio>
  #include <cstring>
  using namespace std;
  typedef long long ll;

  const int N = (int)1e6 + 15;
  char str[N];

  struct SuffixMaton{
      // SZ:    字符集大小
      // tot:   静态分配节点ID
      // lst:   上一个节点ID
      // slink: suffix link,后缀链接
      // len:  状态中的maxlen
      // trans: 状态转移
      const static int SZ = 26;
      int tot, lst;
      int slink[N << 1], len[N << 1], trans[N << 1][SZ];

      // NewNode函数:分配新节点,需要清空trans
      int NewNode(){
          int ret = ++tot;
          memset(trans[ret], 0, sizeof(trans[ret]));
          return ret;
      }

      void init(){
          tot = 0;
          lst = NewNode();            //分配S状态的节点
          slink[lst] = len[lst] = 0;  //把S状态的slink设为NULL,len设为0
      }

      void push(int val){
          int p = lst, np = NewNode();    //为新字符新建一个状态np
          len[np] = len[p] + 1;           //新状态maxlen为前一个状态maxlen + 1,这是显然的

          //对于lst到root(S状态)的路径中,无经val转移的,直接令其等于np
          //最终如果转移到NULL,那么状态np的suffix link应连接到S状态
          for(; p && trans[p][val] == 0; p = slink[p])    trans[p][val] = np;
          if(p == 0)  slink[np] = 1;
          else{
              //否则,则说明子串存在
              //记状态q为trans[p][val],即p经过val转移的状态
              //如果len[q] = len[p] + 1,则说明可以直接将slink连到q
              int q = trans[p][val];
              if(len[q] == len[p] + 1)    slink[np] = q;
              else{
                  //否则,不能直接将slink连接到那个里面
                  //新建状态q的分割状态q,将 <= len[p] + 1的部分以及q的其他属性复制到那个状态里面
                  //将q和np的slink连接到nq
                  int nq = ++tot;
                  slink[nq] = slink[q];
                  len[nq] = len[p] + 1;
                  memcpy(trans[nq], trans[q], sizeof(trans[q]));
                  slink[q] = slink[np] = nq;
                  //重定向原来指向q的
                  for(trans[p][val] = nq, p = slink[p]; p && trans[p][val] == q; p = slink[p])    trans[p][val] = nq;
              }
          }
          lst = np;
      }
  };

  SuffixMaton sam;

  int main(){
      while(~scanf("%s", str)){
          sam.init();
          for(int i = 0; str[i]; i++){
              sam.push(str[i] - 'a');
          }

          ll ans = 0;
          for(int i = 2; i <= sam.tot; i++){
              ans += (sam.len[i] - sam.len[sam.slink[i]]);
          }
          printf("%lld\n", ans);
      }
  }


hihoCoder 1449 : 后缀自动机三·重复旋律6

  #include <cstdio>
  #include <cstring>
  #include <algorithm>
  using namespace std;
  typedef long long ll;

  const int N = 1e6 + 15;
  char str[N];

  struct SuffixMaton{
      const static int SZ = 26;
      int tot, lst;
      int slink[N << 1], len[N << 1], trans[N << 1][SZ], endsz[N << 1];

      int NewNode(){
          int ret = ++tot;
          memset(trans[ret], 0, sizeof(trans[ret]));
          return ret;
      }

      void init(){
          tot = 0;
          lst = NewNode();
          slink[lst] = len[lst] = 0;
          endsz[0] = 0;             // 空节点不为绿色
          endsz[1] = 1;             // 非空节点为绿色
      }

      void push(int val){
          int p = lst, np = NewNode();
          len[np] = len[p] + 1;
          endsz[np] = 1;

          for(; p && trans[p][val] == 0; p = slink[p])    trans[p][val] = np;
          if(p == 0)  slink[np] = 1;
          else{
              int q = trans[p][val];
              if(len[q] == len[p] + 1)    slink[np] = q;
              else{
                  int nq = ++tot;
                  endsz[nq] = 0;
                  slink[nq] = slink[q];
                  len[nq] = len[p] + 1;
                  memcpy(trans[nq], trans[q], sizeof(trans[q]));
                  slink[q] = slink[np] = nq;

                  for(trans[p][val] = nq, p = slink[p]; p && trans[p][val] == q; p = slink[p])    trans[p][val] = nq;
              }
          }
          lst = np;
      }

      //利用parent树性质,endpos集合性质,求出 |endpos|  
      int que[N << 1], idg[N << 1], head, tail;
      void topoSort(){
          head = tail = 0;
          memset(idg, 0, sizeof(idg));
          for(int i = 2; i <= tot; i++)    idg[slink[i]]++;
          for(int i = 1; i <= tot; i++){
              if(idg[i] == 0)     que[tail++] = i;
          }
          while(head != tail){
              int u    = que[head++];
              int preu = slink[u];
              endsz[preu] += endsz[u];
              idg[preu]--;
              if(preu && idg[preu] == 0)     que[tail++] = preu;
          }
      }

      int ans[N];
      void solve(){
          memset(ans, 0, sizeof(ans));
          topoSort();
          int slen = strlen(str);
          for(int i = 1; i <= tot; i++){
              ans[len[i]] = max(ans[len[i]], endsz[i]);
          }
          for(int i = slen - 1; i >= 1; i--){
              ans[i] = max(ans[i], ans[i + 1]);
          }
          for(int i = 1; i <= slen; i++){
              printf("%d\n", ans[i]);
          }
      }
  };

  SuffixMaton sam;

  int main(){
      while(~scanf("%s", str)){
          sam.init();
          for(int i = 0; str[i]; i++){
              sam.push(str[i] - 'a');
          }
          sam.solve();
      }
  }


hihoCoder 1457 : 后缀自动机四·重复旋律7

  #include <cstdio>
  #include <cstring>
  #include <algorithm>
  using namespace std;
  typedef long long ll;

  const int N = 2e6 + 15;
  const int MOD = 1e9 + 7;
  char str[N];

  struct SuffixMaton{
      const static int SZ = 11;
      int tot, lst;
      int slink[N << 1], len[N << 1], trans[N << 1][SZ];

      int NewNode(){
          int ret = ++tot;
          memset(trans[ret], 0, sizeof(trans[ret]));
          return ret;
      }

      void init(){
          tot = 0;
          lst = NewNode();
          slink[lst] = len[lst] = 0;
      }

      void push(int val){
          int p = lst, np = NewNode();
          len[np] = len[p] + 1;

          for(; p && trans[p][val] == 0; p = slink[p])    trans[p][val] = np;
          if(p == 0)  slink[np] = 1;
          else{
              int q = trans[p][val];
              if(len[q] == len[p] + 1)    slink[np] = q;
              else{
                  int nq = ++tot;
                  slink[nq] = slink[q];
                  len[nq] = len[p] + 1;
                  memcpy(trans[nq], trans[q], sizeof(trans[q]));
                  slink[q] = slink[np] = nq;

                  for(trans[p][val] = nq, p = slink[p]; p && trans[p][val] == q; p = slink[p])    trans[p][val] = nq;
              }
          }
          lst = np;
      }

      int  que[N << 1], head, tail;
      int  cnt[N << 1], idg[N << 1];
      ll   sum[N << 1];
      void topoSort(){
          memset(cnt, 0, sizeof(cnt));
          memset(idg, 0, sizeof(idg));
          memset(sum, 0, sizeof(sum));

          //求出trans构成的DAG的入度
          for(int u = 1; u <= tot; u++){
              for(int val = 0; val < SZ; val++){
                  if(trans[u][val])   idg[trans[u][val]]++;
              }
          }

          //DAG一开始入度的点必定是S状态
          //cnt其实是S状态到该点且不经过val=10的路径数(因为从S状态到任意点的路径是任意一子串),故可递推求出
          //sum可根据公式 sum[v] = (sum[v] + sum[u] * 10 + val * cnt[u]) % MOD 递推
          //注意经过val=10的点已经跨字符串,不计入
          head = tail = 0;
          que[tail++] = 1;
          cnt[1] = 1;
          while(head != tail){
              int u = que[head++];
              for(int val = 0; val < SZ; val++){
                  int v = trans[u][val];
                  if(v == 0)  continue;
                  if(val != 10){
                      sum[v] = (sum[v] + sum[u] * 10 + val * cnt[u]) % MOD;
                      cnt[v] += cnt[u];
                  }
                  idg[v]--;
                  if(idg[v] == 0)     que[tail++] = v;
              }
          }
      }

      ll ans;
      void solve(){
          topoSort();
          ans = 0;
          for(int i = 2; i <= tot; i++){
              ans = (ans + sum[i]) % MOD;
          }
          printf("%lld\n", ans);
      }
  };

  SuffixMaton sam;

  int main(){
      int t;
      while(~scanf("%d", &t)){
          sam.init();
          while(t--){
              scanf("%s", str);
              for(int i = 0; str[i]; i++)     sam.push(str[i] - '0');
              if(t > 0)                       sam.push(10);
          }

          sam.solve();
      }
  }


hihoCoder 1465 : 后缀自动机五·重复旋律8

  #include <cstdio>
  #include <cstring>
  #include <algorithm>
  using namespace std;
  typedef long long ll;
  const int N = 2e5 + 15;

  char s[N], p[N];

  struct SuffixMaton{
      const static int SZ = 26;
      int tot, lst;
      int slink[N << 1], len[N << 1], trans[N << 1][SZ], endsz[N << 1];

      int NewNode(){
          int ret = ++tot;
          memset(trans[ret], 0, sizeof(trans[ret]));
          return ret;
      }

      void init(){
          tot = 0;
          lst = NewNode();
          slink[lst] = len[lst] = 0;
          endsz[0] = 0;
          endsz[1] = 1;
      }

      void push(int val){
          int p = lst, np = NewNode();
          len[np] = len[p] + 1;
          endsz[np] = 1;

          for(; p && trans[p][val] == 0; p = slink[p])    trans[p][val] = np;
          if(p == 0)  slink[np] = 1;
          else{
              int q = trans[p][val];
              if(len[q] == len[p] + 1)    slink[np] = q;
              else{
                  int nq = ++tot;
                  endsz[nq] = 0;
                  slink[nq] = slink[q];
                  len[nq] = len[p] + 1;
                  memcpy(trans[nq], trans[q], sizeof(trans[q]));
                  slink[q] = slink[np] = nq;

                  for(trans[p][val] = nq, p = slink[p]; p && trans[p][val] == q; p = slink[p])    trans[p][val] = nq;
              }
          }
          lst = np;
      }

      int que[N << 1], idg[N << 1], head, tail;
      void getEndSz(){
          head = tail = 0;
          memset(idg, 0, sizeof(idg));
          for(int i = 2; i <= tot; i++)    idg[slink[i]]++;
          for(int i = 1; i <= tot; i++){
              if(idg[i] == 0)     que[tail++] = i;
          }
          while(head != tail){
              int u    = que[head++];
              int preu = slink[u];
              if(preu == 0)   continue;
              endsz[preu] += endsz[u];
              idg[preu]--;
              if(preu && idg[preu] == 0)     que[tail++] = preu;
          }
      }

      bool used[N << 1];
      ll solve(){
          int n = strlen(p);
          for(int i = 0; i < n; i++)      p[i + n] = p[i];
          p[n << 1] = 0;
          memset(used, false, sizeof(used));

          ll ans = 0;
          int u = 1, l = 0;
          for(int i = 0; p[i]; i++){
              while(u != 1 && trans[u][p[i] - 'a'] == 0)       u = slink[u], l = len[u];
              if(trans[u][p[i] - 'a'])                         u = trans[u][p[i] - 'a'], l++;
              else                                             u = 1, l = 0;

              if(l > n){
                  while(len[slink[u]] >= n)                    u = slink[u], l = len[u];
              }
              if(l >= n && !used[u])                           ans += endsz[u], used[u] = true;
          }
          return ans;
      }
  };

  SuffixMaton sam;

  int main(){
      while(~scanf("%s", s)){
          sam.init();
          for(int i = 0; s[i]; i++)  sam.push(s[i] - 'a');
          sam.getEndSz();

          int t;
          scanf("%d", &t);
          while(t--){
              scanf("%s", p);
              printf("%lld\n", sam.solve());
          }
      }
  }