缺点:多重哈希容易卡常,单哈希容易被hack
#define ll long long
const ll MOD = 998244353, BASE = 171;
int n = s.size();
vector<ll> H(n+1, 0), B(n+1, 1);
// string s[] index from 0 to n-1
// B[i] = BASE^i
// s[i...j] = s[0...j] - s[0...i-1]
// hash s[0...i-1] = H[i] = s[0]*B[i-1]+s[1]*B[i-2]+...+s[i-1]*B[0]
// hash s[0...j] = H[j+1] = s[0]*B[j]+s[1]*B[j-1]+...+s[j]*B[0]
// hash s[i...j] = H[j+1] - H[i]*B[j-i+1]
// hash s[i...j-1] = H[j] - H[i]*B[j-i]
for (int i=1; i<=n; i++) {
H[i] = (H[i-1]*BASE%MOD + s[i-1])%MOD;
}
for (int i=1; i<=n; i++) {
B[i] = B[i-1]*BASE%MOD;
}
auto hash = [&](int l, int r) { // hash of s[l...r-1]
return (H[r] - H[l] * B[r - l] % MOD + MOD) % MOD;
};
对于长度为n的字符串s,下标从0开始。
令$B[i] = BASE^i$
长度为i的前缀子串s[0…i-1]
的哈希值, $H[i] = \sum \limits_{j=0}^{i-1} s[j]*B[i-1-j]$
长度为len的子串s[i…j]
的哈希值H[j+1] - H[i]*B[j-i+1]
注意取模。
毒瘤写法,自然溢出
#define ull unsigned long long
#define BASE 131
int main() {
string s = "abcdabcdab";
int n = s.size();//n=10
vector<ull> H(n+1, 0), B(n+1, 1);
for (int i=1; i<=n; i++) {
H[i] = H[i-1]*BASE+s[i-1];
B[i] = B[i-1]*BASE;
}
// string s[] index from 0 to n-1
// H[i] 前i个字符的前缀串哈希值, H[0] = 0。
// B[i] = BASE^i
// H[r]-H[l]*B[r-l] = s[l...r-1]
// s[l...r] = H[r+1] - H[l]*B[r-l+1]
auto hash = [&](int l, int r) { // hash of s[l...r-1]
return H[r] - H[l] * B[r - l];
};
cout << H[3] - H[0]*B[3] << " " << H[7] - H[4]*B[3] << endl; // abc abc
cout << H[4] - H[0]*B[4] << " " << H[8] - H[4]*B[4] << endl; // abcd abcd
cout << H[2] - H[0]*B[2] << " " << H[6] - H[4]*B[2] << " " << H[10]-H[8]*B[2] << endl; // abcd abcd
return 0;
}
/*
1677554 1677554
219759674 219759674
12805 12805 12805
*/
// 单哈希容易被卡,封装用多重哈希
// 两个串的哈希相等,说明串大概率相等。
struct SHash {
#define ll long long
// string s[] index from 0 to n-1
// B[i] = BASE^i
// s[i...j] = s[0...j] - s[0...i-1]
// hash s[0...i-1] = H[i] = s[0]*B[i-1]+s[1]*B[i-2]+...+s[i-1]*B[0]
// hash s[0...j] = H[j+1] = s[0]*B[j]+s[1]*B[j-1]+...+s[j]*B[0]
// hash s[i...j] = H[j+1] - H[i]*B[j-i+1]
// hash s[i...j-1] = H[j] - H[i]*B[j-i]
vector<ll> H, B;
ll len, base, mod;
SHash(string& s, ll base = 171, ll mod = 998244353)
: H(s.size() + 1, 0),
B(s.size() + 1, 1),
len(s.size()),
base(base),
mod(mod) {
for (int i = 1; i <= len; i++) {
H[i] = (H[i - 1] * base % mod + s[i - 1]) % mod;
B[i] = B[i - 1] * base % mod;
}
}
ll hash(int l, int r) { // hash of s[l...r-1]
return (H[r] - H[l] * B[r - l] % mod + mod) % mod;
};
};
// 167772161,469762049,754974721,998244353,1045430273,1051721729,1053818881,1000000007
struct MSHash{
vector<SHash> h;
MSHash(string& s) {
// MOD 太大会溢出
vector<pair<ll, ll>> param ={
{137, 167772161},
{139, 469762049},
{149, 754974721},
{167, 998244353},
{151, 1045430273},
{157, 1051721729},
{163, 1053818881},
{173, 1000000007}
};
for (auto [i, j] : param) {
h.emplace_back(s, i, j);
}
}
// is s[l1...r1-1] == s[l2...r2-1] ? s[] index from 0
bool check(int l1, int r1, int l2, int r2) {
for (auto& i : h) {
if (i.hash(l1, r1) != i.hash(l2, r2))
return false;
}
return true;
}
// is s[l1...r1-1] == o[l2...r2-1] ? s[] index from 0
bool check(MSHash& o, int l1, int r1, int l2, int r2) {
int sz = h.size();
for (int i=0; i<sz; i++) {
if (h[i].hash(l1, r1) != o.h[i].hash(l2, r2))
return false;
}
return true;
}
void print(int l, int r) {
cout<<'['; int s = 1;
for(auto& e:h) { if (s) s = 0; else cout << ", "; cout << e.hash(l, r); }
cout<<"]\\n";
}
};
实际解题时封装太多层还是容易超时,建议用两个哈希
SHash h1(s), h2(s, 173, 1000000007);
// 检测 s[a,b) == s[c,d)
h1.hash(a, b) == h1.hash(c, d) && h2.hash(a, b) == h2.hash(c, d)
实战还是别用结构体封装,太容易卡常了
再写一个常数较小的双重哈希
#define N 500005
#define ll long long
const ll MOD1 = 998244353, BASE1 = 251;
const ll MOD2 = 1073676287, BASE2 = 211;
ll H1[N], H2[N], B1[N], B2[N];
void init(string& s) {
int n = s.size();
H1[0] = H2[0] = 0;
B1[0] = B2[0] = 1;
for (int i=1; i<=n; i++) {
H1[i] = (H1[i-1]*BASE1%MOD1 + s[i-1])%MOD1;
B1[i] = B1[i-1]*BASE1%MOD1;
H2[i] = (H2[i-1]*BASE2%MOD2 + s[i-1])%MOD2;
B2[i] = B2[i-1]*BASE2%MOD2;
}
}
ll hash1(int l, int r) { // hash of s[l...r-1]
return (H1[r] - H1[l] * B1[r - l] % MOD1 + MOD1) % MOD1;
};
ll hash2(int l, int r) { // hash of s[l...r-1]
return (H2[r] - H2[l] * B2[r - l] % MOD2 + MOD2) % MOD2;
};
bool check(int l1, int r1, int l2, int r2) {
return hash1(l1, r1) == hash1(l2, r2) && hash2(l1, r1) == hash2(l2, r2);
}