jacky860226 · October 11, 2025 05:24 · avis9ditiu · Apr 25, 2018
diff --git a/DC3.cpp b/DC3.cpp
 /**
 * DC3 (Difference Cover 3) 後綴陣列構造算法
 *
 * 算法原理：
 * 1. 將所有後綴分為三類：Type A (i%3==0), Type B1 (i%3==1), Type B2 (i%3==2)
 * 2. 遞歸處理 Type B 後綴（佔總數的 2/3）
 * 3. 利用 Type B 的排序結果來排序 Type A 後綴
 * 4. 歸併 Type A 和 Type B 得到完整的後綴陣列
 *
 * 時間複雜度分析：
 * T(n, σ) = T(⌊2n/3⌋, O(n)) + O(n + σ)
 * 其中：
 * - n = string_length（字符串長度）
 * - σ = alphabet_size（字母表大小）
 *
 * 每層遞歸的線性操作包括：
 * - 計數排序：O(n + σ)，需要處理 σ 個不同字符
 * - 三字符比較和分類：O(n)
 * - 歸併操作：O(n)
 *
 * 遞歸深度：O(log_{3/2} n)
 * 總時間複雜度：O(n + σ) 當 σ = O(n) 時為 O(n)
 *
 * 空間複雜度：O(n)，遞歸棧空間 O(log n)
 *
 * @param s 輸入字符串
 * @return 後綴陣列，sa[i] 表示字典序第 i 小的後綴在原串中的起始位置
 */
 vector<int> suffix_array_dc3(const string &s) {
  if (s.empty())
    return {};

  // 計算所需存儲空間
  // 遞歸關係
  // M(n) = (n + δ(n) + 2)                    , n ≤ 2
  //        (n + δ(n) + 2) + M(⌊2(n + δ(n))/3⌋), n > 2
  // 其中 δ(n) = 1 if n%3==1, else 0
  // 解得：M(n) <= 3n + 4log_{3/2}(n-2) + 1 ≈ 3n + O(log n)
  int storage_size = [](int n) {
    if (n <= 2)
      return n + 3;
    return 3 * n + int(4 * log(n - 2) / log(1.5)) + 1;
  }(s.size());

  // 工作數組分配
  vector<int> suffix_array_v(storage_size, 0), str_v(storage_size, 0);
  vector<int> bucket(max<int>(256, s.size() + 1), 0),
      temp_a(2 * (s.size() + 1) / 3, 0), temp_b(2 * (s.size() + 1) / 3, 0);
  /**
   * 計數排序 (Counting Sort) - DC3算法的核心排序組件
   *
   * 時間複雜度：O(length + max_val) = O(n + σ)
   * - length 通常為 O(n)，表示待排序的後綴數量
   * - max_val 為字母表大小 σ，影響計數排序的效率
   * - 當 σ = O(n) 時，整體複雜度為 O(n)
   * - 當 σ >> n 時，複雜度受 σ 主導
   *
   * 空間複雜度：O(σ) 用於 bucket 數組
   *
   * @param str 字符數組，用於獲取排序鍵值
   * @param input_idx 待排序的索引數組
   * @param sorted_idx 排序結果存放數組
   * @param length 待排序元素的數量
   * @param max_val 最大字符值（字母表大小-1）
   */
  auto counting_sort = [&](int *str, auto &input_idx, auto &sorted_idx,
                           int length, int max_val) {
    // 清空計數桶，時間複雜度 O(σ)
    for (int i = 0; i <= max_val; ++i)
      bucket[i] = 0;

    // 統計每個字符的出現次數，時間複雜度 O(n)
    for (int i = 0; i < length; ++i)
      bucket[str[input_idx[i]]]++;

    // 計算累積計數，確定排序位置，時間複雜度 O(σ)
    for (int i = 1; i <= max_val; ++i)
      bucket[i] += bucket[i - 1];

    // 從後向前穩定排序，時間複雜度 O(n)
    for (int i = length - 1; i >= 0; --i)
      sorted_idx[--bucket[str[input_idx[i]]]] = input_idx[i];
  };
  /**
   * 三字符比較函數
   * 用於判斷兩個位置的三字符前綴是否完全相同
   * 時間複雜度：O(1)
   */
  auto compare_three_chars = [&](int *str, int pos_x, int pos_y) {
    return str[pos_x] == str[pos_y] && str[pos_x + 1] == str[pos_y + 1] &&
           str[pos_x + 2] == str[pos_y + 2];
  };

  /**
   * DC3 主遞歸函數
   *
   * 時間複雜度詳細分析：
   * T(n, σ) = T(⌊2n/3⌋, O(n)) + O(n + σ)
   *
   * 線性部分 O(n + σ) 包括：
   * 1. Type B 後綴收集與三輪計數排序：3 × O(n + σ) = O(n + σ)
   * 2. 重新編號和去重檢查：O(n)
   * 3. Type A 後綴處理：O(n + σ)
   * 4. 最終歸併：O(n)
   *
   * 遞歸部分：問題規模縮減到 ⌊2n/3⌋，字母表大小變為 O(n)
   *
   * 總體複雜度：
   * - 當 σ = O(1)：T(n) = O(n)
   * - 當 σ = O(n)：T(n) = O(n)
   * - 當 σ > n：T(n) = O(σ)
   *
   * @param str 數值化的字符數組
   * @param sa 輸出的後綴數組
   * @param string_length 字符串長度
   * @param alphabet_size 字母表大小
   */
  std::function<void(int *, int *, int, int)> DC3 = [&](int *str, int *sa,
                                                        int string_length,
                                                        int alphabet_size) {
    // 長度模3等於1時需要填充，確保遞歸時 Type B1 和 B2 後綴間有隔離字符
    bool needs_padding = (string_length % 3 == 1);
    if (needs_padding)
      str[string_length++] = 0;
    // 設置工作指針，利用現有空間避免額外分配
    int *new_ranks = str + string_length + 2, // 新排名數組
        *new_sa = sa + string_length,         // 遞歸結果數組
        Num12 = 0, current_rank;              // Type B 後綴計數

    // === 第一階段：收集並排序 Type B 後綴 (i%3 != 0) ===
    // Type B 後綴占總數的 2/3，是遞歸處理的對象
    for (int i = 0; i < string_length; ++i)
      if (i % 3)
        temp_a[Num12++] = i;

    // 添加哨兵，確保邊界安全
    str[string_length] = str[string_length + 1] = 0;

    // 三輪計數排序：按三字符前綴 (str[i], str[i+1], str[i+2]) 排序
    // 每輪排序時間複雜度：O(n + σ)
    counting_sort(str + 2, temp_a, temp_b, Num12, alphabet_size); // 第三字符
    counting_sort(str + 1, temp_b, temp_a, Num12, alphabet_size); // 第二字符
    counting_sort(str, temp_a, temp_b, Num12, alphabet_size);     // 第一字符
    // 計算各類型後綴的數量
    int Num0 = 0, Num1 = (string_length + 1) / 3; // Type B1 後綴數量

    // 位置映射函數：將原始位置映射到遞歸數組中的位置
    // Type B1 (i%3==1) 映射到前半部分 [0, Num1)
    // Type B2 (i%3==2) 映射到後半部分 [Num1, Num12)
    auto POSITION_MAPPING = [&](int x) {
      return (x / 3) + (x % 3 == 1 ? 0 : Num1);
    };

    // === 第二階段：重新編號 Type B 後綴 ===
    // 為相同的三字符前綴分配相同的排名
    new_ranks[POSITION_MAPPING(temp_b[0])] = current_rank = 1;
    for (int i = 1; i < Num12; ++i) {
      // 如果三字符前綴不同，增加排名
      if (!compare_three_chars(str, temp_b[i], temp_b[i - 1]))
        current_rank++;
      new_ranks[POSITION_MAPPING(temp_b[i])] = current_rank;
    }

    // === 第三階段：遞歸處理或直接構造 SA ===
    if (current_rank < Num12) {
      // 仍有重複的三字符前綴，需要遞歸處理
      // 遞歸調用：T(⌊2n/3⌋, current_rank)
      DC3(new_ranks, new_sa, Num12, current_rank);
    } else {
      // 所有三字符前綴都不同，直接構造後綴數組
      for (int i = 0; i < Num12; ++i)
        if (new_ranks[i])
          new_sa[new_ranks[i] - 1] = i;
    }
    // === 第四階段：處理 Type A 後綴 (i%3 == 0) ===
    // 從 Type B 的排序結果中提取對應的 Type A 後綴
    for (int i = 0; i < Num12; ++i)
      if (new_sa[i] < Num1)
        temp_b[Num0++] = new_sa[i] * 3;

    // 對 Type A 後綴按首字符排序，時間複雜度 O(n + σ)
    counting_sort(str, temp_b, temp_a, Num0, alphabet_size);

    // 逆向位置映射：從遞歸數組位置映射回原始位置
    auto ORIGINAL_POSITION = [&](int x) {
      return x >= Num1 ? (x - Num1) * 3 + 2 : x * 3 + 1;
    };

    // 建立 Type B 後綴的排名查找表
    // new_ranks[position] = rank，用於常數時間比較
    new_ranks = bucket.data();
    for (int i = 0; i < Num12; ++i)
      new_ranks[temp_b[i] = ORIGINAL_POSITION(new_sa[i])] = i;
    new_ranks[string_length] = -1; // 哨兵

    // === 第五階段：歸併 Type A 和 Type B 的排序結果 ===
    /**
     * 後綴比較函數 - DC3算法的核心比較邏輯
     *
     * 比較策略：
     * 1. 保證參數順序：pos_x 為 Type B，pos_y 為 Type A
     * 2. 首字符不同：直接比較字典序
     * 3. Type B1 後綴 (pos % 3 == 1)：比較遞歸排名
     * 4. Type B2 後綴 (pos % 3 == 2)：遞歸比較下一位
     *
     * 性能優化：
     * - 計算 pos_x % 3 只執行一次，避免重複計算
     * - 利用已計算的排名信息，避免字符串比較
     * - 確保比較操作的常數時間複雜度
     *
     * 時間複雜度：O(1) 均攤，這是DC3算法達到線性時間的關鍵
     */
    std::function<bool(int, int)> suffix_comparator = [&](int pos_x,
                                                          int pos_y) {
      // 優化：計算模運算只一次，避免重複計算
      int mod3 = pos_x % 3;

      // 保證 pos_x 是 Type B 後綴，pos_y 是 Type A 後綴
      if (mod3 == 0)
        return !suffix_comparator(pos_y, pos_x);

      // 首字符不同，直接比較
      if (str[pos_x] != str[pos_y])
        return str[pos_x] < str[pos_y];

      if (mod3 == 1)
        // Type B1: 比較下一位的遞歸排名
        return new_ranks[pos_x + 1] < new_ranks[pos_y + 1];
      else
        // Type B2: 遞歸比較，交換參數順序實現正確的字典序
        return !suffix_comparator(pos_y + 1, pos_x + 1);
    };

    // 使用 std::merge 歸併兩個已排序的後綴類型
    // 注意：needs_padding 用於跳過隔離字符
    // 當 needs_padding=true 時，隔離字符 \0 必然排在 temp_b 的第一位
    // 因為 \0 是最小值，所以歸併時從 temp_b[1] 開始，跳過這個隔離字符
    merge(temp_a.begin(), temp_a.begin() + Num0,                  // Type A 範圍
          temp_b.begin() + needs_padding, temp_b.begin() + Num12, // Type B 範圍
          sa,                                                     // 輸出位置
          suffix_comparator);                                     // 比較函數
  };

  // === 算法執行階段 ===
  // 將字符串轉換為數值數組，便於處理
  for (size_t i = 0; i < s.size(); ++i)
    str_v[i] = s[i];

  // 調用主算法：DC3(字符數組, 後綴數組, 長度, 字母表大小)
  // 字母表大小設為 255，支持所有 ASCII 字符
  DC3(str_v.data(), suffix_array_v.data(), s.size(), 255);

  // 調整結果數組大小並返回
  suffix_array_v.resize(s.size());
  return suffix_array_v;
 }
diff --git a/doubling algorithm NlogN.cpp b/doubling algorithm NlogN.cpp
 #include<algorithm>
 #define radix_sort(x,y){\
 	for(i=0;i<A;++i)c[i]=0;\
 	for(i=0;i<len;++i)c[x[y[i]]]++;\
 	for(i=1;i<A;++i)c[i]+=c[i-1];\
 	for(i=len-1;i>=0;--i)sa[--c[x[y[i]]]]=y[i];\
 }
 #define sac(r,a,b) r[a]!=r[b]||a+k>=len||r[a+k]!=r[b+k]
 inline void suffix_array(const char *s,int len,int *sa,int *rank,int *tmp,int *c){
 	int A='z'+1,i,k,id=0;
 	for(i=0;i<len;++i)rank[tmp[i]=i]=s[i];
 	radix_sort(rank,tmp);
 	for(k=1;id<len-1;k<<=1){
 		for(id=0,i=len-k;i<len;++i)tmp[id++]=i;
 		for(i=0;i<len;++i)if(sa[i]>=k)tmp[id++]=sa[i]-k;
 		radix_sort(rank,tmp);
 		std::swap(rank,tmp);
 		for(rank[sa[0]]=id=0,i=1;i<len;++i)
 			rank[sa[i]]=id+=sac(tmp,sa[i-1],sa[i]);
 		A=id+1;
 	}
 }
 #undef radix_sort
 #undef sac
diff --git a/doubling algorithm NlogNlogN.cpp b/doubling algorithm NlogNlogN.cpp
 #include<algorithm>
 struct CMP{
 	int len,k,*rank,a,b;
 	inline bool operator()(int i,int j){
 		if(rank[i]!=rank[j])return rank[i]<rank[j];
 		a=(i+=k)<len?rank[i]:-1;
 		b=(j+=k)<len?rank[j]:-1;
 		return a<b;
 	}
 };
 inline void suffix_array(const char *s,int len,int *sa,int *rank,int *tmp){
 	for(int i=0;i<len;++i){
 		sa[i]=i;
 		rank[i]=s[i];
 	}
 	CMP cmp={len,1};
 	for(;;cmp.k<<=1){
 		cmp.rank=rank;
 		std::sort(sa,sa+len,cmp);
 		tmp[sa[0]]=0;
 		for(int i=1;i<len;++i)tmp[sa[i]]=tmp[sa[i-1]]+cmp(sa[i-1],sa[i]);
 		if(tmp[sa[len-1]]==len-1)break;
 		std::swap(rank,tmp);
 	}
 }
diff --git a/LCP.cpp b/LCP.cpp
 /*
 h:高度數組
 sa:後綴數組
 rank:排名
 */
 inline void suffix_array_lcp(const char *s,int len,int *h,int *sa,int *rank){
 	for(int i=0;i<len;++i)rank[sa[i]]=i;
 	for(int i=0,k=0;i<len;++i){
 		if(rank[i]==0)continue;
 		if(k)--k;
 		while(s[i+k]==s[sa[rank[i]-1]+k])++k;
 		h[rank[i]]=k;
 	}
 	h[0]=0;
 }
diff --git a/SA-IS black magic.cpp b/SA-IS black magic.cpp
 #include<string.h>
 #define MAGIC(XD){\
 	memset(sa,0,sizeof(int)*n);\
 	memcpy(x,c,sizeof(int)*z);\
 	XD\
 	memcpy(x+1,c,sizeof(int)*(z-1));\
 	for(i=0;i<n;++i){\
 		if(sa[i]&&!t[sa[i]-1])sa[x[s[sa[i]-1]]++]=sa[i]-1;\
 	}\
 	memcpy(x,c,sizeof(int)*z);\
 	for(i=n-1;i>=0;--i){\
 		if(sa[i]&&t[sa[i]-1])sa[--x[s[sa[i]-1]]]=sa[i]-1;\
 	}\
 }
 void sais(int *s,int *sa,int *p,bool *t,int *c,int n,int z){
 	bool neq=t[n-1]=1;
 	int nn=0,nmxz=-1,*q=sa+n,*ns=s+n,*x=c+z,lst=-1,i,j;
 	memset(c,0,sizeof(int)*z);
 	for(i=0;i<n;++i)++c[s[i]];
 	for(i=0;i<z-1;++i)c[i+1]+=c[i];
 	for(i=n-2;i>=0;i--)t[i]=(s[i]==s[i+1]?t[i+1]:s[i]<s[i+1]);
 	MAGIC(
 		for(i=1;i<n;++i){
 			if(t[i]&&!t[i-1])sa[--x[s[i]]]=p[q[i]=nn++]=i;
 		}
 	);
 	for(i=0;i<n;++i)if((j=sa[i])&&t[j]&&!t[j-1]){
 		neq=lst<0||memcmp(s+j,s+lst,(p[q[j]+1]-j)*sizeof(int));
 		ns[q[lst=j]]=nmxz+=neq;
 	}
 	if(nmxz==nn-1)for(i=0;i<nn;++i)q[ns[i]]=i;
 	else sais(ns,q,p+nn,t+n,c+z,nn,nmxz+1);
 	MAGIC(
 		for(i=nn-1;i>=0;--i)sa[--x[s[p[q[i]]]]]=p[q[i]];
 	);
 }
 #undef MAGIC
 /*****************這些是需要用到的陣列大小**************/
 static const int MXN=10000000;
 int s[MXN*2+5],sa[MXN*2+5],p[MXN+5],c[MXN*2+5];
 bool t[MXN*2+5];
diff --git a/SA-IS paper code.c b/SA-IS paper code.c
 /*SA-IS Algorithm*/
 const unsigned char mask[]={0x80,0x40,0x20,0x10,0x08,0x04,0x02,0x01};
 #define tget(i) ( (t[(i)/8]&mask[(i)%8])?1:0)
 #define tset(i, b) t[(i)/8]=(b)?(mask[(i)%8]|t[(i)/8]):((~mask[(i)%8])&t[(i)/8])
 #define chr(i) (cs==sizeof(int)?((int*)s)[i]:((unsigned char *)s)[i])
 #define isLMS(i) (i>0&&tget(i)&&!tget(i-1))
 // find the start or end of each bucket
 inline void getBuckets(unsigned char *s,int *bkt,int n,int K,int cs,bool end){
 	int i,sum=0;
 	// clear all buckets
 	for(i=0;i<=K;++i)bkt[i]=0;
 	// compute the size of each bucket
 	for(i=0;i<n;++i)++bkt[chr(i)];
 	for(i=0;i<=K;++i){
 		sum+=bkt[i];
 		bkt[i]=end?sum:sum-bkt[i];
 	}
 }
 // compute SAl
 inline void induceSAl(unsigned char *t,int *SA,unsigned char *s,int *bkt,int n,int K,int cs,bool end){
 	int i,j;
 	// find starts of buckets
 	getBuckets(s,bkt,n,K,cs,end);
 	for(i=0;i<n;++i){
 		j=SA[i]-1;
 		if(j>=0&&!tget(j))SA[bkt[chr(j)]++]=j;
 	}
 }
 // compute SAs
 inline void induceSAs(unsigned char *t,int *SA,unsigned char *s,int *bkt,int n,int K,int cs,bool end){
 	int i,j;
 	// find ends of buckets
 	getBuckets(s,bkt,n,K,cs,end);
 	for(i=n-1;i>=0;--i){
 		j=SA[i]-1;
 		if(j>=0&&tget(j))SA[--bkt[chr(j)]]=j;
 	}
 }
 // find the suffix array SA of s[0..n-1] in {1..K}?n
 // require s[n-1]=0 (the sentinel!), n>=2
 // use a working space (excluding s and SA) of
 // at most 2.25n+O(1) for a constant alphabet
 void SA_IS(unsigned char *s,int *SA,int n,int K,short cs){
 	// LS-type array in bits
 	unsigned char *t=(unsigned char *)malloc(n/8+1);
 	int i,j;
 	// classify the type of each character
 	// the sentinel must be in s1, important!!!
 	tset(n-2,0);tset(n-1,1);
 	for(i=n-3;i>=0;--i)
 	tset(i,(chr(i)<chr(i+1)||(chr(i)==chr(i+1)&&tget(i+1)==1))?1:0);
 	// stage 1: reduce the problem by at least 1/2
 	// sort all the S-substrings
 	// bucket array
 	int *bkt=(int *)malloc(sizeof(int)*(K+1));
 	// find ends of buckets
 	getBuckets(s,bkt,n,K,cs,true);
 	for(i=0;i<n;++i)SA[i]=-1;
 	for(i=1;i<n;++i)if(isLMS(i))SA[--bkt[chr(i)]]=i;
 	induceSAl(t,SA,s,bkt,n,K,cs,false);
 	induceSAs(t,SA,s,bkt,n,K,cs,true);
 	free(bkt);
 	// compact all the sorted substrings into
 	// the first n1 items of SA
 	// 2*n1 must be not larger than n (proveable)
 	int n1=0;
 	for(i=0;i<n;++i)if(isLMS(SA[i]))SA[n1++]=SA[i];
 	// find the lexicographic names of substrings
 	// init the name array buffer
 	for(i=n1;i<n;++i)SA[i]=-1;
 	int name=0,prev=-1;
 	for(i=0;i<n1;++i){
 		int pos=SA[i];bool diff=false;
 		for(int d=0;d<n;++d)
 		if(prev==-1||chr(pos+d)!=chr(prev+d)||tget(pos+d)!=tget(prev+d)){
 			diff=true;break;
 		}else if(d>0&&(isLMS(pos+d)||isLMS(prev+d)))break;
 		if(diff){
 			++name;prev=pos;
 		}
 		pos=(pos%2==0)?pos/2:(pos-1)/2;
 		SA[n1+pos]=name-1;
 	}
 	for(i=n-1,j=n-1;i>=n1;--i)if(SA[i]>=0)SA[j--]=SA[i];
 	// stage 2: solve the reduced problem
 	// recurse if names are not yet unique
 	int *SA1=SA,*s1=SA+n-n1;
 	if(name<n1)SA_IS((unsigned char*)s1,SA1,n1,name-1,sizeof(int));
 	else // generate the suffix array of s1 directly
 	for(i=0;i<n1;++i)SA1[s1[i]]=i;
 	// stage 3: induce the result for
 	// the original problem
 	// bucket array
 	bkt=(int *)malloc(sizeof(int)*(K+1));
 	// put all the LMS characters into their buckets
 	// find ends of buckets
 	getBuckets(s,bkt,n,K,cs,true);
 	for(i=1,j=0;i<n;++i)if(isLMS(i))s1[j++]=i; // get p1
 	// get index in s
 	for(i=0;i<n1;++i)SA1[i]=s1[SA1[i]];
 	// init SA[n1..n-1]
 	for(i=n1;i<n;++i)SA[i]=-1;
 	for(i=n1-1;i>=0;--i){
 		j=SA[i];SA[i]=-1;
 		SA[--bkt[chr(j)]]=j;
 	}
 	induceSAl(t,SA,s,bkt,n,K,cs,false);
 	induceSAs(t,SA,s,bkt,n,K,cs,true);
 	free(bkt);free(t);
 }
diff --git a/SA-IS very fast.cpp b/SA-IS very fast.cpp
 #include <time.h>
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>

 #ifndef UCHAR_SIZE
 # define UCHAR_SIZE 256
 #endif
 #ifndef MINBUCKETSIZE
 # define MINBUCKETSIZE 256
 #endif

 #define sais_index_type int
 #define sais_bool_type  int
 #define SAIS_LMSSORT2_LIMIT 0x3fffffff

 #define SAIS_MYMALLOC(_num, _type) ((_type *)malloc((_num) * sizeof(_type)))
 #define chr(_a) (cs == sizeof(sais_index_type) ? ((sais_index_type *)T)[(_a)] : ((unsigned char *)T)[(_a)])

 /* find the start or end of each bucket */
 inline void getCounts(const void *T,sais_index_type *C,sais_index_type n,sais_index_type k,int cs){
 	sais_index_type i;
 	for(i=0;i<k;++i)C[i]=0;
 	for(i=0;i<n;++i)++C[chr(i)];
 }
 inline void getBuckets(const sais_index_type *C,sais_index_type *B,sais_index_type k,sais_bool_type end){
 	sais_index_type i,sum=0;
 	if(end)for(i=0;i<k;++i){
 		sum+=C[i];
 		B[i]=sum;
 	}else for(i=0;i<k;++i){
 		sum+=C[i];
 		B[i]=sum-C[i];
 	}
 }

 /* sort all type LMS suffixes */
 inline void LMSsort1(const void *T,sais_index_type *SA,sais_index_type *C,sais_index_type *B,sais_index_type n,sais_index_type k,int cs){
 	sais_index_type *b,i,j;
 	sais_index_type c0,c1;

 	/* compute SAl */
 	if(C==B)getCounts(T,C,n,k,cs);
 	getBuckets(C,B,k,0); /* find starts of buckets */
 	j=n-1;
 	b=SA+B[c1=chr(j)];
 	--j;
 	*b++=(chr(j)<c1)?~j:j;
 	for(i=0;i<n;++i){
 		if(0<(j=SA[i])){
 			if((c0=chr(j))!=c1){
 				B[c1]=b-SA;
 				b=SA+B[c1=c0];
 			}
 			--j;
 			*b++=(chr(j)<c1)?~j:j;
 			SA[i]=0;
 		}else if(j<0)SA[i]=~j;
 	}
 	/* compute SAs */
 	if(C==B)getCounts(T,C,n,k,cs);
 	getBuckets(C,B,k,1); /* find ends of buckets */
 	for(i=n-1,b=SA+B[c1=0];0<=i;--i){
 		if(0<(j=SA[i])){
 			if((c0=chr(j))!=c1){
 				B[c1]=b-SA;
 				b=SA+B[c1=c0];
 			}
 			--j;
 			*--b=(chr(j)>c1)?~(j+1):j;
 			SA[i]=0;
 		}
 	}
 }
 inline sais_index_type LMSpostproc1(const void *T,sais_index_type *SA,sais_index_type n,sais_index_type m,int cs){
 	sais_index_type i,j,p,q,plen,qlen,name;
 	sais_index_type c0,c1;
 	sais_bool_type diff;

 	/* compact all the sorted substrings into the first m items of SA
 	2*m must be not larger than n (proveable) */
 	for(i=0;(p=SA[i])<0;++i)SA[i]=~p;
 	if(i<m){
 		for(j=i,++i;;++i){
 			if((p=SA[i])<0){
 				SA[j++]=~p;SA[i]=0;
 				if(j==m)break;
 			}
 		}
 	}
 	
 	/* store the length of all substrings */
 	i=n-1;j=n-1;c0=chr(n-1);
 	do{
 		c1=c0;
 	}while((0<=--i)&&((c0=chr(i))>=c1));
 	for(;0<=i;){
 		do{
 			c1=c0;
 		}while((0<=--i)&&((c0=chr(i))<=c1));
 		if(0<=i){
 			SA[m+((i+1)>>1)]=j-i;
 			j=i+1;
 			do{
 				c1=c0;
 			}while((0<=--i)&&((c0=chr(i))>=c1));
 		}
 	}

 	/* find the lexicographic names of all substrings */
 	for(i=0,name=0,q=n,qlen=0;i<m;++i){
 		p=SA[i],plen=SA[m+(p>>1)],diff=1;
 		if((plen==qlen)&&((q+plen)<n)){
 			for(j=0;(j<plen)&&(chr(p+j)==chr(q+j));++j);
 			if(j==plen)diff=0;
 		}
 		if(diff!=0)++name,q=p,qlen=plen;
 		SA[m+(p>>1)]=name;
 	}
 	return name;
 }
 inline void LMSsort2(const void *T,sais_index_type *SA,sais_index_type *C,sais_index_type *B,sais_index_type *D,sais_index_type n,sais_index_type k,int cs){
 	sais_index_type *b,i,j,t,d;
 	sais_index_type c0,c1;
 	
 	/* compute SAl */
 	getBuckets(C,B,k,0); /* find starts of buckets */
 	j=n-1;
 	b=SA+B[c1=chr(j)];
 	--j;
 	t=(chr(j)<c1);
 	j+=n;
 	*b++=(t&1)?~j:j;
 	for(i=0,d=0;i<n;++i){
 		if(0<(j=SA[i])){
 			if(n<=j)d+=1,j-=n;
 			if((c0=chr(j))!=c1){
 				B[c1]=b-SA;
 				b=SA+B[c1=c0];
 			}
 			--j;
 			t=c0;t=(t<<1)|(chr(j)<c1);
 			if(D[t]!=d)j+=n,D[t]=d;
 			*b++=(t&1)?~j:j;
 			SA[i]=0;
 		}else if(j<0)SA[i]=~j;
 	}
 	for(i=n-1;0<=i;--i){
 		if(0<SA[i]){
 			if(SA[i]<n){
 				SA[i]+=n;
 				for(j=i-1;SA[j]<n;--j);
 				SA[j]-=n;
 				i=j;
 			}
 		}
 	}
 	
 	/* compute SAs */
 	getBuckets(C,B,k,1); /* find ends of buckets */
 	for(i=n-1,d+=1,b=SA+B[c1=0];0<=i;--i){
 		if(0<(j=SA[i])){
 			if(n<=j)d+=1,j-=n;
 			if((c0=chr(j))!=c1){
 				B[c1]=b-SA;
 				b=SA+B[c1=c0];
 			}
 			--j;
 			t=c0;t=(t<<1)|(chr(j)>c1);
 			if(D[t]!=d)j+=n,D[t]=d;
 			*--b=(t&1)?~(j+1):j;
 			SA[i]=0;
 		}
 	}
 }
 inline sais_index_type LMSpostproc2(sais_index_type *SA,sais_index_type n,sais_index_type m){
 	sais_index_type i,j,d,name;

 	/* compact all the sorted LMS substrings into the first m items of SA */
 	for(i=0,name=0;(j=SA[i])<0;++i){
 		j=~j;
 		if(n<=j)name+=1;
 		SA[i]=j;
 	}
 	if(i<m){
 		for(d=i,++i;;++i){
 			if((j=SA[i])<0){
 				j=~j;
 				if(n<=j)name+=1;
 				SA[d++]=j;SA[i]=0;
 				if(d==m)break;
 			}
 		}
 	}
 	if(name<m){
 	/* store the lexicographic names */
 		for(i=m-1,d=name+1;0<=i;--i){
 			if(n<=(j=SA[i]))j-=n,--d;
 			SA[m+(j>>1)]=d;
 		}
 	}else{
 		/* unset flags */
 		for(i=0;i<m;++i){
 			if(n<=(j=SA[i]))j-=n,SA[i]=j;
 		}
 	}

 	return name;
 }

 /* compute SA and BWT */
 inline void induceSA(const void *T,sais_index_type *SA,sais_index_type *C,sais_index_type *B,sais_index_type n,sais_index_type k,int cs){
 	sais_index_type *b,i,j;
 	sais_index_type c0,c1;
 	/* compute SAl */
 	if(C==B)getCounts(T, C, n, k, cs);
 	getBuckets(C,B,k,0); /* find starts of buckets */
 	j =n-1;
 	b=SA+B[c1=chr(j)];
 	*b++=((0<j)&&(chr(j-1)<c1))?~j:j;
 	for(i=0;i<n;++i){
 		j=SA[i],SA[i]=~j;
 		if(0<j){
 			--j;
 			if((c0=chr(j))!=c1){
 				B[c1]=b-SA;
 				b=SA+B[c1=c0];
 			}
 			*b++=((0<j)&&(chr(j-1)<c1))?~j:j;
 		}
 	}
 	/* compute SAs */
 	if(C==B)getCounts(T,C,n,k,cs);
 	getBuckets(C,B,k,1); /* find ends of buckets */
 	for(i=n-1,b=SA+B[c1=0];0<=i;--i){
 		if(0<(j=SA[i])){
 			--j;
 			if((c0=chr(j))!=c1){
 				B[c1]=b-SA;
 				b=SA+B[c1=c0];
 			}
 			*--b=((j==0)||(chr(j-1)>c1))?~j:j;
 		}else SA[i]=~j;
 	}
 }
 inline sais_index_type computeBWT(const void *T,sais_index_type *SA,sais_index_type *C,sais_index_type *B,sais_index_type n,sais_index_type k,int cs){
 	sais_index_type *b,i,j,pidx=-1;
 	sais_index_type c0,c1;
 	/* compute SAl */
 	if(C==B)getCounts(T, C, n, k, cs);
 	getBuckets(C,B,k,0); /* find starts of buckets */
 	j=n-1;
 	b=SA+B[c1=chr(j)];
 	*b++=((0<j)&&(chr(j-1)<c1))?~j:j;
 	for(i=0;i<n;++i){
 		if(0<(j=SA[i])){
 			--j;
 			SA[i]=~((sais_index_type)(c0 = chr(j)));
 			if(c0!=c1){
 				B[c1]=b-SA;
 				b=SA+B[c1=c0];
 			}
 			*b++=((0<j)&&(chr(j-1)<c1))?~j:j;
 		}else if(j!=0)SA[i]=~j;
 	}
 	/* compute SAs */
 	if(C==B)getCounts(T,C,n,k,cs);
 	getBuckets(C,B,k,1); /* find ends of buckets */
 	for(i=n-1,b=SA+B[c1=0];0<=i;--i){
 		if(0<(j=SA[i])){
 			--j;
 			SA[i]=(c0=chr(j));
 			if(c0!=c1){
 				B[c1]=b-SA;
 				b=SA+B[c1=c0];
 			}
 			*--b=((0<j)&&(chr(j-1)>c1))?~((sais_index_type)chr(j-1)):j;
 		}else if(j!=0)SA[i]=~j;
 		else pidx=i;
 	}
 	return pidx;
 }

 /* find the suffix array SA of T[0..n-1] in {0..255}^n */
 sais_index_type sais_main(const void *T,sais_index_type *SA,sais_index_type fs,sais_index_type n,sais_index_type k,int cs,sais_bool_type isbwt){
 	sais_index_type *C,*B,*D,*RA,*b;
 	sais_index_type i,j,m,p,q,t,name,pidx=0,newfs;
 	sais_index_type c0,c1;
 	unsigned int flags;


 	if(k<=MINBUCKETSIZE){
 		if((C=SAIS_MYMALLOC(k,sais_index_type))==NULL)return -2;
 		if(k<=fs){
 			B=SA+(n+fs-k);
 			flags=1;
 		}else{
 			if((B=SAIS_MYMALLOC(k,sais_index_type))==NULL){
 				free(C);
 				return -2;
 			}
 			flags=3;
 		}
 	}else if(k<=fs){
 		C=SA+(n+fs-k);
 		if(k<=(fs-k)){
 			B=C-k;
 			flags=0;
 		}else if(k<=(MINBUCKETSIZE*4)){
 			if((B=SAIS_MYMALLOC(k,sais_index_type))==NULL)return -2;
 			flags=2;
 		}else{
 			B=C;
 			flags=8;
 		}
 	}else{
 		if((C=B=SAIS_MYMALLOC(k,sais_index_type))==NULL)return -2;
 		flags=4|8;
 	}
 	if((n<=SAIS_LMSSORT2_LIMIT)&&(2<=(n/k))){
 		if(flags&1)flags|=((k*2)<=(fs-k))?32:16;
 		else if((flags==0)&&((k*2)<=(fs-k*2)))flags|=32;
 	}

 	/* stage 1: reduce the problem by at least 1/2
 	sort all the LMS-substrings */
 	getCounts(T,C,n,k,cs);getBuckets(C,B,k,1); /* find ends of buckets */
 	for(i=0;i<n;++i)SA[i]=0;
 	b=&t;i=n-1;j=n;m=0;c0=chr(n-1);
 	do{
 		c1=c0;
 	}while((0<=--i)&&((c0=chr(i))>=c1));
 	for(;0<=i;){
 		do{
 			c1=c0;
 		}while((0<=--i)&&((c0=chr(i))<=c1));
 		if(0<=i){
 			*b=j;b=SA+--B[c1];j=i;++m;
 			do{
 				c1=c0;
 			}while((0<=--i)&&((c0=chr(i))>=c1));
 		}
 	}

 	if(1<m){
 		if(flags&(16|32)){
 			if(flags&16){
 				if((D=SAIS_MYMALLOC(k*2,sais_index_type))==NULL){
 					if(flags&(1|4))free(C);
 					if(flags&2)free(B);
 					return -2;
 				}
 			}else D=B-k*2;
 			++B[chr(j+1)];
 			for(i=0,j=0;i<k;++i){
 				j+=C[i];
 				if(B[i]!=j)SA[B[i]]+=n;
 				D[i]=D[i+k]=0;
 			}
 			LMSsort2(T,SA,C,B,D,n,k,cs);
 			name=LMSpostproc2(SA,n,m);
 			if(flags&16)free(D);
 		}else{
 			LMSsort1(T,SA,C,B,n,k,cs);
 			name=LMSpostproc1(T,SA,n,m,cs);
 		}
 	}else if(m==1){
 		*b=j+1;
 		name=1;
 	}else name = 0;
 	
 	/* stage 2: solve the reduced problem
 	recurse if names are not yet unique */
 	if(name<m){
 		if(flags&4)free(C);
 		if(flags&2)free(B);
 		newfs=(n+fs)-(m*2);
 		if((flags&(1|4|8))==0){
 			if((k+name)<=newfs)newfs-=k;
 			else flags|=8;
 		}
 		RA=SA+m+newfs;
 		for(i=m+(n>>1)-1,j=m-1;m<=i;--i){
 			if(SA[i]!=0)RA[j--]=SA[i]-1;
 		}
 		if(sais_main(RA,SA,newfs,m,name,sizeof(sais_index_type),0)!=0){
 			if(flags&1)free(C);
 			return -2;
 		}

 		i=n-1;j=m-1;c0=chr(n-1);
 		do{
 			c1=c0;
 		}while((0<=--i)&&((c0=chr(i))>=c1));
 		for(;0<=i;){
 			do{
 				c1=c0;
 			}while((0<=--i)&&((c0=chr(i))<=c1));
 			if(0<=i){
 				RA[j--]=i+1;
 				do{
 					c1=c0;
 				}while((0<=--i)&&((c0=chr(i))>=c1));
 			}
 		}
 		for(i=0;i<m;++i)SA[i]=RA[SA[i]];
 		if(flags&4){
 			if((C=B=SAIS_MYMALLOC(k,int))==NULL)return -2;
 		}
 		if(flags&2){
 			if((B=SAIS_MYMALLOC(k,int))==NULL){
 				if(flags&1)free(C);
 				return -2;
 			}
 		}
 	}

 	/* stage 3: induce the result for the original problem */
 	if(flags&8)getCounts(T,C,n,k,cs);
 	/* put all left-most S characters into their buckets */
 	if(1<m){
 		getBuckets(C,B,k,1); /* find ends of buckets */
 		i=m-1,j=n,p=SA[m-1],c1=chr(p);
 		do{
 			q=B[c0=c1];
 			while(q<j)SA[--j]=0;
 			do{
 				SA[--j]=p;
 				if(--i<0)break;
 				p=SA[i];
 			}while((c1=chr(p))==c0);
 		}while(0<=i);
 		while(0<j)SA[--j]=0;
 	}
 	if(isbwt==0)induceSA(T,SA,C,B,n,k,cs);
 	else pidx=computeBWT(T,SA,C,B,n,k,cs);
 	if(flags&(1|4))free(C);
 	if(flags&2)free(B);

 	return pidx;
 }

 /*---------------------------------------------------------------------------*/

 inline int sais(const unsigned char *T,int *SA,int n){
 	if((T==NULL)||(SA==NULL)||(n<0))return -1;
 	if(n<=1){
 		if(n==1)SA[0]=0;
 		return 0;
 	}
 	return sais_main(T,SA,0,n,UCHAR_SIZE,sizeof(unsigned char),0);
 }

 inline int sais_int(const int *T,int *SA,int n,int k){
 	if((T==NULL)||(SA==NULL)||(n<0)||(k<=0))return -1;
 	if(n<=1){
 		if(n==1)SA[0]=0;
 		return 0;
 	}
 	return sais_main(T,SA,0,n,k,sizeof(int),0);
 }

 inline int sais_bwt(const unsigned char *T,unsigned char *U,int *A,int n){
 	int i,pidx;
 	if((T==NULL)||(U==NULL)||(A==NULL)||(n<0))return -1;
 	if(n<=1){
 		if(n==1)U[0]=T[0];
 		return n;
 	}
 	pidx=sais_main(T,A,0,n,UCHAR_SIZE,sizeof(unsigned char),1);
 	if(pidx<0)return pidx;
 	U[0]=T[n-1];
 	for(i=0;i<pidx;++i)U[i+1]=(unsigned char)A[i];
 	for(i+=1;i<n;++i)U[i]=(unsigned char)A[i];
 	pidx+=1;
 	return pidx;
 }

 inline int sais_int_bwt(const int *T,int *U,int *A,int n,int k){
 	int i,pidx;
 	if((T==NULL)||(U==NULL)||(A==NULL)||(n<0)||(k<=0))return -1;
 	if(n<=1){
 		if(n==1)U[0]=T[0];
 		return n;
 	}
 	pidx=sais_main(T,A,0,n,k,sizeof(int),1);
 	if(pidx<0)return pidx;
 	U[0]=T[n-1];
 	for(i=0;i<pidx;++i)U[i+1]=A[i];
 	for(i+=1;i<n;++i)U[i]=A[i];
 	pidx+=1;
 	return pidx;
 }
 char s[10000005];
 int sa[10000005],len;
 int main(){
 	freopen("in.txt","r",stdin);
 	gets(s);
 	int st=clock();
 	sais((unsigned char*)s,sa,len=strlen(s));
 	printf("%f\n",(double)(clock()-st)/1000);
 	//for(int i=0;i<len;++i)printf("%d\n",sa[i]);
 	return 0;
 }
diff --git a/SA-IS.cpp b/SA-IS.cpp
 #define pushS(x) sa[--bkt[s[x]]] = x  
 #define pushL(x) sa[bkt[s[x]]++] = x
 #define induce_sort(v){\
 	fill_n(sa,n,0);\
 	copy_n(_bkt,A,bkt);\
 	for(i=n1-1;~i;--i)pushS(v[i]);\
 	copy_n(_bkt,A-1,bkt+1);\
 	for(i=0;i<n;++i)if(sa[i]&&!t[sa[i]-1])pushL(sa[i]-1);\
 	copy_n(_bkt,A,bkt);\
 	for(i=n-1;~i;--i)if(sa[i]&&t[sa[i]-1])pushS(sa[i]-1);\
 }
 template<typename T>
 void sais(const T s,int n,int *sa,int *_bkt,int *p,bool *t,int A){
 	int *rnk=p+n,*s1=p+n/2,*bkt=_bkt+A;
 	int n1=0,i,j,x=t[n-1]=1,y=rnk[0]=-1,cnt=-1;
 	for(i=n-2;~i;--i)t[i]=(s[i]==s[i+1]?t[i+1]:s[i]<s[i+1]);
 	for(i=1;i<n;++i)rnk[i]=t[i]&&!t[i-1]?(p[n1]=i,n1++):-1;
 	fill_n(_bkt,A,0);
 	for(i=0;i<n;++i)++_bkt[s[i]];
 	for(i=1;i<A;++i)_bkt[i]+=_bkt[i-1];
 	induce_sort(p);
 	for(i=0;i<n;++i)if(~(x=rnk[sa[i]]))
 		j=y<0||memcmp(s+p[x],s+p[y],(p[x+1]-p[x])*sizeof(s[0]))
 		,s1[y=x]=cnt+=j;
 	if(cnt+1<n1)sais(s1,n1,sa,bkt,rnk,t+n,cnt+1);
 	else for(i=0;i<n1;++i)sa[s1[i]]=i;
 	for(i=0;i<n1;++i)s1[i]=p[sa[i]];
 	induce_sort(s1);
 }
 /*****************這些是需要用到的陣列大小**************/
 const int MAXN=10000005,MAXA='z'+1;
 int sa[MAXN],bkt[MAXN+MAXA],p[MAXN*2];
 bool t[MAXN*2];
 char s[MAXN];
	/**
	* DC3 (Difference Cover 3) 後綴陣列構造算法
	*
	* 算法原理：
	* 1. 將所有後綴分為三類：Type A (i%3==0), Type B1 (i%3==1), Type B2 (i%3==2)
	* 2. 遞歸處理 Type B 後綴（佔總數的 2/3）
	* 3. 利用 Type B 的排序結果來排序 Type A 後綴
	* 4. 歸併 Type A 和 Type B 得到完整的後綴陣列
	*
	* 時間複雜度分析：
	* T(n, σ) = T(⌊2n/3⌋, O(n)) + O(n + σ)
	* 其中：
	* - n = string_length（字符串長度）
	* - σ = alphabet_size（字母表大小）
	*
	* 每層遞歸的線性操作包括：
	* - 計數排序：O(n + σ)，需要處理 σ 個不同字符
	* - 三字符比較和分類：O(n)
	* - 歸併操作：O(n)
	*
	* 遞歸深度：O(log_{3/2} n)
	* 總時間複雜度：O(n + σ) 當 σ = O(n) 時為 O(n)
	*
	* 空間複雜度：O(n)，遞歸棧空間 O(log n)
	*
	* @param s 輸入字符串
	* @return 後綴陣列，sa[i] 表示字典序第 i 小的後綴在原串中的起始位置
	*/
	vector<int> suffix_array_dc3(const string &s) {
	if (s.empty())
	return {};

	// 計算所需存儲空間
	// 遞歸關係
	// M(n) = (n + δ(n) + 2) , n ≤ 2
	// (n + δ(n) + 2) + M(⌊2(n + δ(n))/3⌋), n > 2
	// 其中 δ(n) = 1 if n%3==1, else 0
	// 解得：M(n) <= 3n + 4log_{3/2}(n-2) + 1 ≈ 3n + O(log n)
	int storage_size = [](int n) {
	if (n <= 2)
	return n + 3;
	return 3 * n + int(4 * log(n - 2) / log(1.5)) + 1;
	}(s.size());

	// 工作數組分配
	vector<int> suffix_array_v(storage_size, 0), str_v(storage_size, 0);
	vector<int> bucket(max<int>(256, s.size() + 1), 0),
	temp_a(2 * (s.size() + 1) / 3, 0), temp_b(2 * (s.size() + 1) / 3, 0);
	/**
	* 計數排序 (Counting Sort) - DC3算法的核心排序組件
	*
	* 時間複雜度：O(length + max_val) = O(n + σ)
	* - length 通常為 O(n)，表示待排序的後綴數量
	* - max_val 為字母表大小 σ，影響計數排序的效率
	* - 當 σ = O(n) 時，整體複雜度為 O(n)
	* - 當 σ >> n 時，複雜度受 σ 主導
	*
	* 空間複雜度：O(σ) 用於 bucket 數組
	*
	* @param str 字符數組，用於獲取排序鍵值
	* @param input_idx 待排序的索引數組
	* @param sorted_idx 排序結果存放數組
	* @param length 待排序元素的數量
	* @param max_val 最大字符值（字母表大小-1）
	*/
	auto counting_sort = [&](int *str, auto &input_idx, auto &sorted_idx,
	int length, int max_val) {
	// 清空計數桶，時間複雜度 O(σ)
	for (int i = 0; i <= max_val; ++i)
	bucket[i] = 0;

	// 統計每個字符的出現次數，時間複雜度 O(n)
	for (int i = 0; i < length; ++i)
	bucket[str[input_idx[i]]]++;

	// 計算累積計數，確定排序位置，時間複雜度 O(σ)
	for (int i = 1; i <= max_val; ++i)
	bucket[i] += bucket[i - 1];

	// 從後向前穩定排序，時間複雜度 O(n)
	for (int i = length - 1; i >= 0; --i)
	sorted_idx[--bucket[str[input_idx[i]]]] = input_idx[i];
	};
	/**
	* 三字符比較函數
	* 用於判斷兩個位置的三字符前綴是否完全相同
	* 時間複雜度：O(1)
	*/
	auto compare_three_chars = [&](int *str, int pos_x, int pos_y) {
	return str[pos_x] == str[pos_y] && str[pos_x + 1] == str[pos_y + 1] &&
	str[pos_x + 2] == str[pos_y + 2];
	};

	/**
	* DC3 主遞歸函數
	*
	* 時間複雜度詳細分析：
	* T(n, σ) = T(⌊2n/3⌋, O(n)) + O(n + σ)
	*
	* 線性部分 O(n + σ) 包括：
	* 1. Type B 後綴收集與三輪計數排序：3 × O(n + σ) = O(n + σ)
	* 2. 重新編號和去重檢查：O(n)
	* 3. Type A 後綴處理：O(n + σ)
	* 4. 最終歸併：O(n)
	*
	* 遞歸部分：問題規模縮減到 ⌊2n/3⌋，字母表大小變為 O(n)
	*
	* 總體複雜度：
	* - 當 σ = O(1)：T(n) = O(n)
	* - 當 σ = O(n)：T(n) = O(n)
	* - 當 σ > n：T(n) = O(σ)
	*
	* @param str 數值化的字符數組
	* @param sa 輸出的後綴數組
	* @param string_length 字符串長度
	* @param alphabet_size 字母表大小
	*/
	std::function<void(int , int , int, int)> DC3 = [&](int str, int sa,
	int string_length,
	int alphabet_size) {
	// 長度模3等於1時需要填充，確保遞歸時 Type B1 和 B2 後綴間有隔離字符
	bool needs_padding = (string_length % 3 == 1);
	if (needs_padding)
	str[string_length++] = 0;
	// 設置工作指針，利用現有空間避免額外分配
	int *new_ranks = str + string_length + 2, // 新排名數組
	*new_sa = sa + string_length, // 遞歸結果數組
	Num12 = 0, current_rank; // Type B 後綴計數

	// === 第一階段：收集並排序 Type B 後綴 (i%3 != 0) ===
	// Type B 後綴占總數的 2/3，是遞歸處理的對象
	for (int i = 0; i < string_length; ++i)
	if (i % 3)
	temp_a[Num12++] = i;

	// 添加哨兵，確保邊界安全
	str[string_length] = str[string_length + 1] = 0;

	// 三輪計數排序：按三字符前綴 (str[i], str[i+1], str[i+2]) 排序
	// 每輪排序時間複雜度：O(n + σ)
	counting_sort(str + 2, temp_a, temp_b, Num12, alphabet_size); // 第三字符
	counting_sort(str + 1, temp_b, temp_a, Num12, alphabet_size); // 第二字符
	counting_sort(str, temp_a, temp_b, Num12, alphabet_size); // 第一字符
	// 計算各類型後綴的數量
	int Num0 = 0, Num1 = (string_length + 1) / 3; // Type B1 後綴數量

	// 位置映射函數：將原始位置映射到遞歸數組中的位置
	// Type B1 (i%3==1) 映射到前半部分 [0, Num1)
	// Type B2 (i%3==2) 映射到後半部分 [Num1, Num12)
	auto POSITION_MAPPING = [&](int x) {
	return (x / 3) + (x % 3 == 1 ? 0 : Num1);
	};

	// === 第二階段：重新編號 Type B 後綴 ===
	// 為相同的三字符前綴分配相同的排名
	new_ranks[POSITION_MAPPING(temp_b[0])] = current_rank = 1;
	for (int i = 1; i < Num12; ++i) {
	// 如果三字符前綴不同，增加排名
	if (!compare_three_chars(str, temp_b[i], temp_b[i - 1]))
	current_rank++;
	new_ranks[POSITION_MAPPING(temp_b[i])] = current_rank;
	}

	// === 第三階段：遞歸處理或直接構造 SA ===
	if (current_rank < Num12) {
	// 仍有重複的三字符前綴，需要遞歸處理
	// 遞歸調用：T(⌊2n/3⌋, current_rank)
	DC3(new_ranks, new_sa, Num12, current_rank);
	} else {
	// 所有三字符前綴都不同，直接構造後綴數組
	for (int i = 0; i < Num12; ++i)
	if (new_ranks[i])
	new_sa[new_ranks[i] - 1] = i;
	}
	// === 第四階段：處理 Type A 後綴 (i%3 == 0) ===
	// 從 Type B 的排序結果中提取對應的 Type A 後綴
	for (int i = 0; i < Num12; ++i)
	if (new_sa[i] < Num1)
	temp_b[Num0++] = new_sa[i] * 3;

	// 對 Type A 後綴按首字符排序，時間複雜度 O(n + σ)
	counting_sort(str, temp_b, temp_a, Num0, alphabet_size);

	// 逆向位置映射：從遞歸數組位置映射回原始位置
	auto ORIGINAL_POSITION = [&](int x) {
	return x >= Num1 ? (x - Num1) * 3 + 2 : x * 3 + 1;
	};

	// 建立 Type B 後綴的排名查找表
	// new_ranks[position] = rank，用於常數時間比較
	new_ranks = bucket.data();
	for (int i = 0; i < Num12; ++i)
	new_ranks[temp_b[i] = ORIGINAL_POSITION(new_sa[i])] = i;
	new_ranks[string_length] = -1; // 哨兵

	// === 第五階段：歸併 Type A 和 Type B 的排序結果 ===
	/**
	* 後綴比較函數 - DC3算法的核心比較邏輯
	*
	* 比較策略：
	* 1. 保證參數順序：pos_x 為 Type B，pos_y 為 Type A
	* 2. 首字符不同：直接比較字典序
	* 3. Type B1 後綴 (pos % 3 == 1)：比較遞歸排名
	* 4. Type B2 後綴 (pos % 3 == 2)：遞歸比較下一位
	*
	* 性能優化：
	* - 計算 pos_x % 3 只執行一次，避免重複計算
	* - 利用已計算的排名信息，避免字符串比較
	* - 確保比較操作的常數時間複雜度
	*
	* 時間複雜度：O(1) 均攤，這是DC3算法達到線性時間的關鍵
	*/
	std::function<bool(int, int)> suffix_comparator = [&](int pos_x,
	int pos_y) {
	// 優化：計算模運算只一次，避免重複計算
	int mod3 = pos_x % 3;

	// 保證 pos_x 是 Type B 後綴，pos_y 是 Type A 後綴
	if (mod3 == 0)
	return !suffix_comparator(pos_y, pos_x);

	// 首字符不同，直接比較
	if (str[pos_x] != str[pos_y])
	return str[pos_x] < str[pos_y];

	if (mod3 == 1)
	// Type B1: 比較下一位的遞歸排名
	return new_ranks[pos_x + 1] < new_ranks[pos_y + 1];
	else
	// Type B2: 遞歸比較，交換參數順序實現正確的字典序
	return !suffix_comparator(pos_y + 1, pos_x + 1);
	};

	// 使用 std::merge 歸併兩個已排序的後綴類型
	// 注意：needs_padding 用於跳過隔離字符
	// 當 needs_padding=true 時，隔離字符 \0 必然排在 temp_b 的第一位
	// 因為 \0 是最小值，所以歸併時從 temp_b[1] 開始，跳過這個隔離字符
	merge(temp_a.begin(), temp_a.begin() + Num0, // Type A 範圍
	temp_b.begin() + needs_padding, temp_b.begin() + Num12, // Type B 範圍
	sa, // 輸出位置
	suffix_comparator); // 比較函數
	};

	// === 算法執行階段 ===
	// 將字符串轉換為數值數組，便於處理
	for (size_t i = 0; i < s.size(); ++i)
	str_v[i] = s[i];

	// 調用主算法：DC3(字符數組, 後綴數組, 長度, 字母表大小)
	// 字母表大小設為 255，支持所有 ASCII 字符
	DC3(str_v.data(), suffix_array_v.data(), s.size(), 255);

	// 調整結果數組大小並返回
	suffix_array_v.resize(s.size());
	return suffix_array_v;
	}
	#include<algorithm>
	#define radix_sort(x,y){\
	for(i=0;i<A;++i)c[i]=0;\
	for(i=0;i<len;++i)c[x[y[i]]]++;\
	for(i=1;i<A;++i)c[i]+=c[i-1];\
	for(i=len-1;i>=0;--i)sa[--c[x[y[i]]]]=y[i];\
	}
	#define sac(r,a,b) r[a]!=r[b]\|\|a+k>=len\|\|r[a+k]!=r[b+k]
	inline void suffix_array(const char s,int len,int sa,int rank,int tmp,int *c){
	int A='z'+1,i,k,id=0;
	for(i=0;i<len;++i)rank[tmp[i]=i]=s[i];
	radix_sort(rank,tmp);
	for(k=1;id<len-1;k<<=1){
	for(id=0,i=len-k;i<len;++i)tmp[id++]=i;
	for(i=0;i<len;++i)if(sa[i]>=k)tmp[id++]=sa[i]-k;
	radix_sort(rank,tmp);
	std::swap(rank,tmp);
	for(rank[sa[0]]=id=0,i=1;i<len;++i)
	rank[sa[i]]=id+=sac(tmp,sa[i-1],sa[i]);
	A=id+1;
	}
	}
	#undef radix_sort
	#undef sac
	#include<algorithm>
	struct CMP{
	int len,k,*rank,a,b;
	inline bool operator()(int i,int j){
	if(rank[i]!=rank[j])return rank[i]<rank[j];
	a=(i+=k)<len?rank[i]:-1;
	b=(j+=k)<len?rank[j]:-1;
	return a<b;
	}
	};
	inline void suffix_array(const char s,int len,int sa,int rank,int tmp){
	for(int i=0;i<len;++i){
	sa[i]=i;
	rank[i]=s[i];
	}
	CMP cmp={len,1};
	for(;;cmp.k<<=1){
	cmp.rank=rank;
	std::sort(sa,sa+len,cmp);
	tmp[sa[0]]=0;
	for(int i=1;i<len;++i)tmp[sa[i]]=tmp[sa[i-1]]+cmp(sa[i-1],sa[i]);
	if(tmp[sa[len-1]]==len-1)break;
	std::swap(rank,tmp);
	}
	}
	/*
	h:高度數組
	sa:後綴數組
	rank:排名
	*/
	inline void suffix_array_lcp(const char s,int len,int h,int sa,int rank){
	for(int i=0;i<len;++i)rank[sa[i]]=i;
	for(int i=0,k=0;i<len;++i){
	if(rank[i]==0)continue;
	if(k)--k;
	while(s[i+k]==s[sa[rank[i]-1]+k])++k;
	h[rank[i]]=k;
	}
	h[0]=0;
	}
	#include<string.h>
	#define MAGIC(XD){\
	memset(sa,0,sizeof(int)*n);\
	memcpy(x,c,sizeof(int)*z);\
	XD\
	memcpy(x+1,c,sizeof(int)*(z-1));\
	for(i=0;i<n;++i){\
	if(sa[i]&&!t[sa[i]-1])sa[x[s[sa[i]-1]]++]=sa[i]-1;\
	}\
	memcpy(x,c,sizeof(int)*z);\
	for(i=n-1;i>=0;--i){\
	if(sa[i]&&t[sa[i]-1])sa[--x[s[sa[i]-1]]]=sa[i]-1;\
	}\
	}
	void sais(int s,int sa,int p,bool t,int *c,int n,int z){
	bool neq=t[n-1]=1;
	int nn=0,nmxz=-1,q=sa+n,ns=s+n,*x=c+z,lst=-1,i,j;
	memset(c,0,sizeof(int)*z);
	for(i=0;i<n;++i)++c[s[i]];
	for(i=0;i<z-1;++i)c[i+1]+=c[i];
	for(i=n-2;i>=0;i--)t[i]=(s[i]==s[i+1]?t[i+1]:s[i]<s[i+1]);
	MAGIC(
	for(i=1;i<n;++i){
	if(t[i]&&!t[i-1])sa[--x[s[i]]]=p[q[i]=nn++]=i;
	}
	);
	for(i=0;i<n;++i)if((j=sa[i])&&t[j]&&!t[j-1]){
	neq=lst<0\|\|memcmp(s+j,s+lst,(p[q[j]+1]-j)*sizeof(int));
	ns[q[lst=j]]=nmxz+=neq;
	}
	if(nmxz==nn-1)for(i=0;i<nn;++i)q[ns[i]]=i;
	else sais(ns,q,p+nn,t+n,c+z,nn,nmxz+1);
	MAGIC(
	for(i=nn-1;i>=0;--i)sa[--x[s[p[q[i]]]]]=p[q[i]];
	);
	}
	#undef MAGIC
	/***************這些是需要用到的陣列大小************/
	static const int MXN=10000000;
	int s[MXN2+5],sa[MXN2+5],p[MXN+5],c[MXN*2+5];
	bool t[MXN*2+5];
	/SA-IS Algorithm/
	const unsigned char mask[]={0x80,0x40,0x20,0x10,0x08,0x04,0x02,0x01};
	#define tget(i) ( (t[(i)/8]&mask[(i)%8])?1:0)
	#define tset(i, b) t[(i)/8]=(b)?(mask[(i)%8]\|t[(i)/8]):((~mask[(i)%8])&t[(i)/8])
	#define chr(i) (cs==sizeof(int)?((int)s)[i]:((unsigned char )s)[i])
	#define isLMS(i) (i>0&&tget(i)&&!tget(i-1))
	// find the start or end of each bucket
	inline void getBuckets(unsigned char s,int bkt,int n,int K,int cs,bool end){
	int i,sum=0;
	// clear all buckets
	for(i=0;i<=K;++i)bkt[i]=0;
	// compute the size of each bucket
	for(i=0;i<n;++i)++bkt[chr(i)];
	for(i=0;i<=K;++i){
	sum+=bkt[i];
	bkt[i]=end?sum:sum-bkt[i];
	}
	}
	// compute SAl
	inline void induceSAl(unsigned char t,int SA,unsigned char s,int bkt,int n,int K,int cs,bool end){
	int i,j;
	// find starts of buckets
	getBuckets(s,bkt,n,K,cs,end);
	for(i=0;i<n;++i){
	j=SA[i]-1;
	if(j>=0&&!tget(j))SA[bkt[chr(j)]++]=j;
	}
	}
	// compute SAs
	inline void induceSAs(unsigned char t,int SA,unsigned char s,int bkt,int n,int K,int cs,bool end){
	int i,j;
	// find ends of buckets
	getBuckets(s,bkt,n,K,cs,end);
	for(i=n-1;i>=0;--i){
	j=SA[i]-1;
	if(j>=0&&tget(j))SA[--bkt[chr(j)]]=j;
	}
	}
	// find the suffix array SA of s[0..n-1] in {1..K}?n
	// require s[n-1]=0 (the sentinel!), n>=2
	// use a working space (excluding s and SA) of
	// at most 2.25n+O(1) for a constant alphabet
	void SA_IS(unsigned char s,int SA,int n,int K,short cs){
	// LS-type array in bits
	unsigned char t=(unsigned char )malloc(n/8+1);
	int i,j;
	// classify the type of each character
	// the sentinel must be in s1, important!!!
	tset(n-2,0);tset(n-1,1);
	for(i=n-3;i>=0;--i)
	tset(i,(chr(i)<chr(i+1)\|\|(chr(i)==chr(i+1)&&tget(i+1)==1))?1:0);
	// stage 1: reduce the problem by at least 1/2
	// sort all the S-substrings
	// bucket array
	int bkt=(int )malloc(sizeof(int)*(K+1));
	// find ends of buckets
	getBuckets(s,bkt,n,K,cs,true);
	for(i=0;i<n;++i)SA[i]=-1;
	for(i=1;i<n;++i)if(isLMS(i))SA[--bkt[chr(i)]]=i;
	induceSAl(t,SA,s,bkt,n,K,cs,false);
	induceSAs(t,SA,s,bkt,n,K,cs,true);
	free(bkt);
	// compact all the sorted substrings into
	// the first n1 items of SA
	// 2*n1 must be not larger than n (proveable)
	int n1=0;
	for(i=0;i<n;++i)if(isLMS(SA[i]))SA[n1++]=SA[i];
	// find the lexicographic names of substrings
	// init the name array buffer
	for(i=n1;i<n;++i)SA[i]=-1;
	int name=0,prev=-1;
	for(i=0;i<n1;++i){
	int pos=SA[i];bool diff=false;
	for(int d=0;d<n;++d)
	if(prev==-1\|\|chr(pos+d)!=chr(prev+d)\|\|tget(pos+d)!=tget(prev+d)){
	diff=true;break;
	}else if(d>0&&(isLMS(pos+d)\|\|isLMS(prev+d)))break;
	if(diff){
	++name;prev=pos;
	}
	pos=(pos%2==0)?pos/2:(pos-1)/2;
	SA[n1+pos]=name-1;
	}
	for(i=n-1,j=n-1;i>=n1;--i)if(SA[i]>=0)SA[j--]=SA[i];
	// stage 2: solve the reduced problem
	// recurse if names are not yet unique
	int SA1=SA,s1=SA+n-n1;
	if(name<n1)SA_IS((unsigned char*)s1,SA1,n1,name-1,sizeof(int));
	else // generate the suffix array of s1 directly
	for(i=0;i<n1;++i)SA1[s1[i]]=i;
	// stage 3: induce the result for
	// the original problem
	// bucket array
	bkt=(int )malloc(sizeof(int)(K+1));
	// put all the LMS characters into their buckets
	// find ends of buckets
	getBuckets(s,bkt,n,K,cs,true);
	for(i=1,j=0;i<n;++i)if(isLMS(i))s1[j++]=i; // get p1
	// get index in s
	for(i=0;i<n1;++i)SA1[i]=s1[SA1[i]];
	// init SA[n1..n-1]
	for(i=n1;i<n;++i)SA[i]=-1;
	for(i=n1-1;i>=0;--i){
	j=SA[i];SA[i]=-1;
	SA[--bkt[chr(j)]]=j;
	}
	induceSAl(t,SA,s,bkt,n,K,cs,false);
	induceSAs(t,SA,s,bkt,n,K,cs,true);
	free(bkt);free(t);
	}
	#include <time.h>
	#include <string.h>
	#include <stdio.h>
	#include <stdlib.h>

	#ifndef UCHAR_SIZE
	# define UCHAR_SIZE 256
	#endif
	#ifndef MINBUCKETSIZE
	# define MINBUCKETSIZE 256
	#endif

	#define sais_index_type int
	#define sais_bool_type int
	#define SAIS_LMSSORT2_LIMIT 0x3fffffff

	#define SAIS_MYMALLOC(_num, _type) ((_type )malloc((_num) sizeof(_type)))
	#define chr(_a) (cs == sizeof(sais_index_type) ? ((sais_index_type )T)[(_a)] : ((unsigned char )T)[(_a)])

	/* find the start or end of each bucket */
	inline void getCounts(const void T,sais_index_type C,sais_index_type n,sais_index_type k,int cs){
	sais_index_type i;
	for(i=0;i<k;++i)C[i]=0;
	for(i=0;i<n;++i)++C[chr(i)];
	}
	inline void getBuckets(const sais_index_type C,sais_index_type B,sais_index_type k,sais_bool_type end){
	sais_index_type i,sum=0;
	if(end)for(i=0;i<k;++i){
	sum+=C[i];
	B[i]=sum;
	}else for(i=0;i<k;++i){
	sum+=C[i];
	B[i]=sum-C[i];
	}
	}

	/* sort all type LMS suffixes */
	inline void LMSsort1(const void T,sais_index_type SA,sais_index_type C,sais_index_type B,sais_index_type n,sais_index_type k,int cs){
	sais_index_type *b,i,j;
	sais_index_type c0,c1;

	/* compute SAl */
	if(C==B)getCounts(T,C,n,k,cs);
	getBuckets(C,B,k,0); /* find starts of buckets */
	j=n-1;
	b=SA+B[c1=chr(j)];
	--j;
	*b++=(chr(j)<c1)?~j:j;
	for(i=0;i<n;++i){
	if(0<(j=SA[i])){
	if((c0=chr(j))!=c1){
	B[c1]=b-SA;
	b=SA+B[c1=c0];
	}
	--j;
	*b++=(chr(j)<c1)?~j:j;
	SA[i]=0;
	}else if(j<0)SA[i]=~j;
	}
	/* compute SAs */
	if(C==B)getCounts(T,C,n,k,cs);
	getBuckets(C,B,k,1); /* find ends of buckets */
	for(i=n-1,b=SA+B[c1=0];0<=i;--i){
	if(0<(j=SA[i])){
	if((c0=chr(j))!=c1){
	B[c1]=b-SA;
	b=SA+B[c1=c0];
	}
	--j;
	*--b=(chr(j)>c1)?~(j+1):j;
	SA[i]=0;
	}
	}
	}
	inline sais_index_type LMSpostproc1(const void T,sais_index_type SA,sais_index_type n,sais_index_type m,int cs){
	sais_index_type i,j,p,q,plen,qlen,name;
	sais_index_type c0,c1;
	sais_bool_type diff;

	/* compact all the sorted substrings into the first m items of SA
	2m must be not larger than n (proveable) /
	for(i=0;(p=SA[i])<0;++i)SA[i]=~p;
	if(i<m){
	for(j=i,++i;;++i){
	if((p=SA[i])<0){
	SA[j++]=~p;SA[i]=0;
	if(j==m)break;
	}
	}
	}

	/* store the length of all substrings */
	i=n-1;j=n-1;c0=chr(n-1);
	do{
	c1=c0;
	}while((0<=--i)&&((c0=chr(i))>=c1));
	for(;0<=i;){
	do{
	c1=c0;
	}while((0<=--i)&&((c0=chr(i))<=c1));
	if(0<=i){
	SA[m+((i+1)>>1)]=j-i;
	j=i+1;
	do{
	c1=c0;
	}while((0<=--i)&&((c0=chr(i))>=c1));
	}
	}

	/* find the lexicographic names of all substrings */
	for(i=0,name=0,q=n,qlen=0;i<m;++i){
	p=SA[i],plen=SA[m+(p>>1)],diff=1;
	if((plen==qlen)&&((q+plen)<n)){
	for(j=0;(j<plen)&&(chr(p+j)==chr(q+j));++j);
	if(j==plen)diff=0;
	}
	if(diff!=0)++name,q=p,qlen=plen;
	SA[m+(p>>1)]=name;
	}
	return name;
	}
	inline void LMSsort2(const void T,sais_index_type SA,sais_index_type C,sais_index_type B,sais_index_type *D,sais_index_type n,sais_index_type k,int cs){
	sais_index_type *b,i,j,t,d;
	sais_index_type c0,c1;

	/* compute SAl */
	getBuckets(C,B,k,0); /* find starts of buckets */
	j=n-1;
	b=SA+B[c1=chr(j)];
	--j;
	t=(chr(j)<c1);
	j+=n;
	*b++=(t&1)?~j:j;
	for(i=0,d=0;i<n;++i){
	if(0<(j=SA[i])){
	if(n<=j)d+=1,j-=n;
	if((c0=chr(j))!=c1){
	B[c1]=b-SA;
	b=SA+B[c1=c0];
	}
	--j;
	t=c0;t=(t<<1)\|(chr(j)<c1);
	if(D[t]!=d)j+=n,D[t]=d;
	*b++=(t&1)?~j:j;
	SA[i]=0;
	}else if(j<0)SA[i]=~j;
	}
	for(i=n-1;0<=i;--i){
	if(0<SA[i]){
	if(SA[i]<n){
	SA[i]+=n;
	for(j=i-1;SA[j]<n;--j);
	SA[j]-=n;
	i=j;
	}
	}
	}

	/* compute SAs */
	getBuckets(C,B,k,1); /* find ends of buckets */
	for(i=n-1,d+=1,b=SA+B[c1=0];0<=i;--i){
	if(0<(j=SA[i])){
	if(n<=j)d+=1,j-=n;
	if((c0=chr(j))!=c1){
	B[c1]=b-SA;
	b=SA+B[c1=c0];
	}
	--j;
	t=c0;t=(t<<1)\|(chr(j)>c1);
	if(D[t]!=d)j+=n,D[t]=d;
	*--b=(t&1)?~(j+1):j;
	SA[i]=0;
	}
	}
	}
	inline sais_index_type LMSpostproc2(sais_index_type *SA,sais_index_type n,sais_index_type m){
	sais_index_type i,j,d,name;

	/* compact all the sorted LMS substrings into the first m items of SA */
	for(i=0,name=0;(j=SA[i])<0;++i){
	j=~j;
	if(n<=j)name+=1;
	SA[i]=j;
	}
	if(i<m){
	for(d=i,++i;;++i){
	if((j=SA[i])<0){
	j=~j;
	if(n<=j)name+=1;
	SA[d++]=j;SA[i]=0;
	if(d==m)break;
	}
	}
	}
	if(name<m){
	/* store the lexicographic names */
	for(i=m-1,d=name+1;0<=i;--i){
	if(n<=(j=SA[i]))j-=n,--d;
	SA[m+(j>>1)]=d;
	}
	}else{
	/* unset flags */
	for(i=0;i<m;++i){
	if(n<=(j=SA[i]))j-=n,SA[i]=j;
	}
	}

	return name;
	}

	/* compute SA and BWT */
	inline void induceSA(const void T,sais_index_type SA,sais_index_type C,sais_index_type B,sais_index_type n,sais_index_type k,int cs){
	sais_index_type *b,i,j;
	sais_index_type c0,c1;
	/* compute SAl */
	if(C==B)getCounts(T, C, n, k, cs);
	getBuckets(C,B,k,0); /* find starts of buckets */
	j =n-1;
	b=SA+B[c1=chr(j)];
	*b++=((0<j)&&(chr(j-1)<c1))?~j:j;
	for(i=0;i<n;++i){
	j=SA[i],SA[i]=~j;
	if(0<j){
	--j;
	if((c0=chr(j))!=c1){
	B[c1]=b-SA;
	b=SA+B[c1=c0];
	}
	*b++=((0<j)&&(chr(j-1)<c1))?~j:j;
	}
	}
	/* compute SAs */
	if(C==B)getCounts(T,C,n,k,cs);
	getBuckets(C,B,k,1); /* find ends of buckets */
	for(i=n-1,b=SA+B[c1=0];0<=i;--i){
	if(0<(j=SA[i])){
	--j;
	if((c0=chr(j))!=c1){
	B[c1]=b-SA;
	b=SA+B[c1=c0];
	}
	*--b=((j==0)\|\|(chr(j-1)>c1))?~j:j;
	}else SA[i]=~j;
	}
	}
	inline sais_index_type computeBWT(const void T,sais_index_type SA,sais_index_type C,sais_index_type B,sais_index_type n,sais_index_type k,int cs){
	sais_index_type *b,i,j,pidx=-1;
	sais_index_type c0,c1;
	/* compute SAl */
	if(C==B)getCounts(T, C, n, k, cs);
	getBuckets(C,B,k,0); /* find starts of buckets */
	j=n-1;
	b=SA+B[c1=chr(j)];
	*b++=((0<j)&&(chr(j-1)<c1))?~j:j;
	for(i=0;i<n;++i){
	if(0<(j=SA[i])){
	--j;
	SA[i]=~((sais_index_type)(c0 = chr(j)));
	if(c0!=c1){
	B[c1]=b-SA;
	b=SA+B[c1=c0];
	}
	*b++=((0<j)&&(chr(j-1)<c1))?~j:j;
	}else if(j!=0)SA[i]=~j;
	}
	/* compute SAs */
	if(C==B)getCounts(T,C,n,k,cs);
	getBuckets(C,B,k,1); /* find ends of buckets */
	for(i=n-1,b=SA+B[c1=0];0<=i;--i){
	if(0<(j=SA[i])){
	--j;
	SA[i]=(c0=chr(j));
	if(c0!=c1){
	B[c1]=b-SA;
	b=SA+B[c1=c0];
	}
	*--b=((0<j)&&(chr(j-1)>c1))?~((sais_index_type)chr(j-1)):j;
	}else if(j!=0)SA[i]=~j;
	else pidx=i;
	}
	return pidx;
	}

	/* find the suffix array SA of T[0..n-1] in {0..255}^n */
	sais_index_type sais_main(const void T,sais_index_type SA,sais_index_type fs,sais_index_type n,sais_index_type k,int cs,sais_bool_type isbwt){
	sais_index_type C,B,D,RA,*b;
	sais_index_type i,j,m,p,q,t,name,pidx=0,newfs;
	sais_index_type c0,c1;
	unsigned int flags;


	if(k<=MINBUCKETSIZE){
	if((C=SAIS_MYMALLOC(k,sais_index_type))==NULL)return -2;
	if(k<=fs){
	B=SA+(n+fs-k);
	flags=1;
	}else{
	if((B=SAIS_MYMALLOC(k,sais_index_type))==NULL){
	free(C);
	return -2;
	}
	flags=3;
	}
	}else if(k<=fs){
	C=SA+(n+fs-k);
	if(k<=(fs-k)){
	B=C-k;
	flags=0;
	}else if(k<=(MINBUCKETSIZE*4)){
	if((B=SAIS_MYMALLOC(k,sais_index_type))==NULL)return -2;
	flags=2;
	}else{
	B=C;
	flags=8;
	}
	}else{
	if((C=B=SAIS_MYMALLOC(k,sais_index_type))==NULL)return -2;
	flags=4\|8;
	}
	if((n<=SAIS_LMSSORT2_LIMIT)&&(2<=(n/k))){
	if(flags&1)flags\|=((k*2)<=(fs-k))?32:16;
	else if((flags==0)&&((k2)<=(fs-k2)))flags\|=32;
	}

	/* stage 1: reduce the problem by at least 1/2
	sort all the LMS-substrings */
	getCounts(T,C,n,k,cs);getBuckets(C,B,k,1); /* find ends of buckets */
	for(i=0;i<n;++i)SA[i]=0;
	b=&t;i=n-1;j=n;m=0;c0=chr(n-1);
	do{
	c1=c0;
	}while((0<=--i)&&((c0=chr(i))>=c1));
	for(;0<=i;){
	do{
	c1=c0;
	}while((0<=--i)&&((c0=chr(i))<=c1));
	if(0<=i){
	*b=j;b=SA+--B[c1];j=i;++m;
	do{
	c1=c0;
	}while((0<=--i)&&((c0=chr(i))>=c1));
	}
	}

	if(1<m){
	if(flags&(16\|32)){
	if(flags&16){
	if((D=SAIS_MYMALLOC(k*2,sais_index_type))==NULL){
	if(flags&(1\|4))free(C);
	if(flags&2)free(B);
	return -2;
	}
	}else D=B-k*2;
	++B[chr(j+1)];
	for(i=0,j=0;i<k;++i){
	j+=C[i];
	if(B[i]!=j)SA[B[i]]+=n;
	D[i]=D[i+k]=0;
	}
	LMSsort2(T,SA,C,B,D,n,k,cs);
	name=LMSpostproc2(SA,n,m);
	if(flags&16)free(D);
	}else{
	LMSsort1(T,SA,C,B,n,k,cs);
	name=LMSpostproc1(T,SA,n,m,cs);
	}
	}else if(m==1){
	*b=j+1;
	name=1;
	}else name = 0;

	/* stage 2: solve the reduced problem
	recurse if names are not yet unique */
	if(name<m){
	if(flags&4)free(C);
	if(flags&2)free(B);
	newfs=(n+fs)-(m*2);
	if((flags&(1\|4\|8))==0){
	if((k+name)<=newfs)newfs-=k;
	else flags\|=8;
	}
	RA=SA+m+newfs;
	for(i=m+(n>>1)-1,j=m-1;m<=i;--i){
	if(SA[i]!=0)RA[j--]=SA[i]-1;
	}
	if(sais_main(RA,SA,newfs,m,name,sizeof(sais_index_type),0)!=0){
	if(flags&1)free(C);
	return -2;
	}

	i=n-1;j=m-1;c0=chr(n-1);
	do{
	c1=c0;
	}while((0<=--i)&&((c0=chr(i))>=c1));
	for(;0<=i;){
	do{
	c1=c0;
	}while((0<=--i)&&((c0=chr(i))<=c1));
	if(0<=i){
	RA[j--]=i+1;
	do{
	c1=c0;
	}while((0<=--i)&&((c0=chr(i))>=c1));
	}
	}
	for(i=0;i<m;++i)SA[i]=RA[SA[i]];
	if(flags&4){
	if((C=B=SAIS_MYMALLOC(k,int))==NULL)return -2;
	}
	if(flags&2){
	if((B=SAIS_MYMALLOC(k,int))==NULL){
	if(flags&1)free(C);
	return -2;
	}
	}
	}

	/* stage 3: induce the result for the original problem */
	if(flags&8)getCounts(T,C,n,k,cs);
	/* put all left-most S characters into their buckets */
	if(1<m){
	getBuckets(C,B,k,1); /* find ends of buckets */
	i=m-1,j=n,p=SA[m-1],c1=chr(p);
	do{
	q=B[c0=c1];
	while(q<j)SA[--j]=0;
	do{
	SA[--j]=p;
	if(--i<0)break;
	p=SA[i];
	}while((c1=chr(p))==c0);
	}while(0<=i);
	while(0<j)SA[--j]=0;
	}
	if(isbwt==0)induceSA(T,SA,C,B,n,k,cs);
	else pidx=computeBWT(T,SA,C,B,n,k,cs);
	if(flags&(1\|4))free(C);
	if(flags&2)free(B);

	return pidx;
	}

	/---------------------------------------------------------------------------/

	inline int sais(const unsigned char T,int SA,int n){
	if((T==NULL)\|\|(SA==NULL)\|\|(n<0))return -1;
	if(n<=1){
	if(n==1)SA[0]=0;
	return 0;
	}
	return sais_main(T,SA,0,n,UCHAR_SIZE,sizeof(unsigned char),0);
	}

	inline int sais_int(const int T,int SA,int n,int k){
	if((T==NULL)\|\|(SA==NULL)\|\|(n<0)\|\|(k<=0))return -1;
	if(n<=1){
	if(n==1)SA[0]=0;
	return 0;
	}
	return sais_main(T,SA,0,n,k,sizeof(int),0);
	}

	inline int sais_bwt(const unsigned char T,unsigned char U,int *A,int n){
	int i,pidx;
	if((T==NULL)\|\|(U==NULL)\|\|(A==NULL)\|\|(n<0))return -1;
	if(n<=1){
	if(n==1)U[0]=T[0];
	return n;
	}
	pidx=sais_main(T,A,0,n,UCHAR_SIZE,sizeof(unsigned char),1);
	if(pidx<0)return pidx;
	U[0]=T[n-1];
	for(i=0;i<pidx;++i)U[i+1]=(unsigned char)A[i];
	for(i+=1;i<n;++i)U[i]=(unsigned char)A[i];
	pidx+=1;
	return pidx;
	}

	inline int sais_int_bwt(const int T,int U,int *A,int n,int k){
	int i,pidx;
	if((T==NULL)\|\|(U==NULL)\|\|(A==NULL)\|\|(n<0)\|\|(k<=0))return -1;
	if(n<=1){
	if(n==1)U[0]=T[0];
	return n;
	}
	pidx=sais_main(T,A,0,n,k,sizeof(int),1);
	if(pidx<0)return pidx;
	U[0]=T[n-1];
	for(i=0;i<pidx;++i)U[i+1]=A[i];
	for(i+=1;i<n;++i)U[i]=A[i];
	pidx+=1;
	return pidx;
	}
	char s[10000005];
	int sa[10000005],len;
	int main(){
	freopen("in.txt","r",stdin);
	gets(s);
	int st=clock();
	sais((unsigned char*)s,sa,len=strlen(s));
	printf("%f\n",(double)(clock()-st)/1000);
	//for(int i=0;i<len;++i)printf("%d\n",sa[i]);
	return 0;
	}
	#define pushS(x) sa[--bkt[s[x]]] = x
	#define pushL(x) sa[bkt[s[x]]++] = x
	#define induce_sort(v){\
	fill_n(sa,n,0);\
	copy_n(_bkt,A,bkt);\
	for(i=n1-1;~i;--i)pushS(v[i]);\
	copy_n(_bkt,A-1,bkt+1);\
	for(i=0;i<n;++i)if(sa[i]&&!t[sa[i]-1])pushL(sa[i]-1);\
	copy_n(_bkt,A,bkt);\
	for(i=n-1;~i;--i)if(sa[i]&&t[sa[i]-1])pushS(sa[i]-1);\
	}
	template<typename T>
	void sais(const T s,int n,int sa,int _bkt,int p,bool t,int A){
	int rnk=p+n,s1=p+n/2,*bkt=_bkt+A;
	int n1=0,i,j,x=t[n-1]=1,y=rnk[0]=-1,cnt=-1;
	for(i=n-2;~i;--i)t[i]=(s[i]==s[i+1]?t[i+1]:s[i]<s[i+1]);
	for(i=1;i<n;++i)rnk[i]=t[i]&&!t[i-1]?(p[n1]=i,n1++):-1;
	fill_n(_bkt,A,0);
	for(i=0;i<n;++i)++_bkt[s[i]];
	for(i=1;i<A;++i)_bkt[i]+=_bkt[i-1];
	induce_sort(p);
	for(i=0;i<n;++i)if(~(x=rnk[sa[i]]))
	j=y<0\|\|memcmp(s+p[x],s+p[y],(p[x+1]-p[x])*sizeof(s[0]))
	,s1[y=x]=cnt+=j;
	if(cnt+1<n1)sais(s1,n1,sa,bkt,rnk,t+n,cnt+1);
	else for(i=0;i<n1;++i)sa[s1[i]]=i;
	for(i=0;i<n1;++i)s1[i]=p[sa[i]];
	induce_sort(s1);
	}
	/***************這些是需要用到的陣列大小************/
	const int MAXN=10000005,MAXA='z'+1;
	int sa[MAXN],bkt[MAXN+MAXA],p[MAXN*2];
	bool t[MAXN*2];
	char s[MAXN];