缺点:多重哈希容易卡常,单哈希容易被hack

#define ll long long
const ll MOD = 998244353, BASE = 171;
int n = s.size();
vector<ll> H(n+1, 0), B(n+1, 1);
// string s[] index from 0 to n-1
// B[i] = BASE^i
// s[i...j] = s[0...j] - s[0...i-1]
// hash s[0...i-1] = H[i] = s[0]*B[i-1]+s[1]*B[i-2]+...+s[i-1]*B[0]
// hash s[0...j] = H[j+1] = s[0]*B[j]+s[1]*B[j-1]+...+s[j]*B[0]
// hash s[i...j] = H[j+1] - H[i]*B[j-i+1]
// hash s[i...j-1] = H[j] - H[i]*B[j-i]
for (int i=1; i<=n; i++) {
    H[i] = (H[i-1]*BASE%MOD + s[i-1])%MOD;
}
for (int i=1; i<=n; i++) {
    B[i] = B[i-1]*BASE%MOD;
}
auto hash = [&](int l, int r) {  // hash of s[l...r-1]
    return (H[r] - H[l] * B[r - l] % MOD + MOD) % MOD;
};

对于长度为n的字符串s,下标从0开始

令$B[i] = BASE^i$

长度为i的前缀子串s[0…i-1]的哈希值, $H[i] = \sum \limits_{j=0}^{i-1} s[j]*B[i-1-j]$

长度为len的子串s[i…j]的哈希值H[j+1] - H[i]*B[j-i+1] 注意取模。


毒瘤写法,自然溢出

#define ull unsigned long long
#define BASE 131
int main() {
    string s = "abcdabcdab";
    int n = s.size();//n=10
    vector<ull> H(n+1, 0), B(n+1, 1);
    for (int i=1; i<=n; i++) {
        H[i] = H[i-1]*BASE+s[i-1];
        B[i] = B[i-1]*BASE;
    }
		// string s[] index from 0 to n-1
    // H[i] 前i个字符的前缀串哈希值, H[0] = 0。
		// B[i] = BASE^i
    // H[r]-H[l]*B[r-l] = s[l...r-1]
    // s[l...r] = H[r+1] - H[l]*B[r-l+1]
    auto hash = [&](int l, int r) {  // hash of s[l...r-1]
        return H[r] - H[l] * B[r - l];
    };
		cout << H[3] - H[0]*B[3] << " " << H[7] - H[4]*B[3] << endl; // abc abc
    cout << H[4] - H[0]*B[4] << " " << H[8] - H[4]*B[4] << endl; // abcd abcd
    cout << H[2] - H[0]*B[2] << " " << H[6] - H[4]*B[2] << " " << H[10]-H[8]*B[2] << endl; // abcd abcd
    return 0;
}
/*
1677554 1677554
219759674 219759674
12805 12805 12805
*/

应用

多重哈希

// 单哈希容易被卡,封装用多重哈希
// 两个串的哈希相等,说明串大概率相等。
struct SHash {
#define ll long long
    // string s[] index from 0 to n-1
    // B[i] = BASE^i
    // s[i...j] = s[0...j] - s[0...i-1]
    // hash s[0...i-1] = H[i] = s[0]*B[i-1]+s[1]*B[i-2]+...+s[i-1]*B[0]
    // hash s[0...j] = H[j+1] = s[0]*B[j]+s[1]*B[j-1]+...+s[j]*B[0]
    // hash s[i...j] = H[j+1] - H[i]*B[j-i+1]
    // hash s[i...j-1] = H[j] - H[i]*B[j-i]
    vector<ll> H, B;
    ll len, base, mod;
    SHash(string& s, ll base = 171, ll mod = 998244353)
        : H(s.size() + 1, 0),
          B(s.size() + 1, 1),
          len(s.size()),
          base(base),
          mod(mod) {
        for (int i = 1; i <= len; i++) {
            H[i] = (H[i - 1] * base % mod + s[i - 1]) % mod;
            B[i] = B[i - 1] * base % mod;
        }
    }
    ll hash(int l, int r) {  // hash of s[l...r-1]
        return (H[r] - H[l] * B[r - l] % mod + mod) % mod;
    };
};
// 167772161,469762049,754974721,998244353,1045430273,1051721729,1053818881,1000000007
struct MSHash{
    vector<SHash> h;
    MSHash(string& s) {
        // MOD 太大会溢出
        vector<pair<ll, ll>> param ={
            {137, 167772161},
            {139, 469762049},
            {149, 754974721},
            {167, 998244353},
            {151, 1045430273},
            {157, 1051721729},
            {163, 1053818881},
            {173, 1000000007}
        };
        for (auto [i, j] : param) {
            h.emplace_back(s, i, j);
        }
    }
    // is s[l1...r1-1] == s[l2...r2-1] ? s[] index from 0
    bool check(int l1, int r1, int l2, int r2) {
        for (auto& i : h) {
            if (i.hash(l1, r1) != i.hash(l2, r2))
                return false;
        }
        return true;
    }

    // is s[l1...r1-1] == o[l2...r2-1] ? s[] index from 0
    bool check(MSHash& o, int l1, int r1, int l2, int r2) {
        int sz = h.size();
        for (int i=0; i<sz; i++) {
            if (h[i].hash(l1, r1) != o.h[i].hash(l2, r2))
                return false;
        }
        return true;
    }
    void print(int l, int r) {
        cout<<'['; int s = 1;
        for(auto& e:h) { if (s) s = 0; else cout << ", "; cout << e.hash(l, r); }
        cout<<"]\\n";
    }
};

实际解题时封装太多层还是容易超时,建议用两个哈希

SHash h1(s), h2(s, 173, 1000000007);
// 检测 s[a,b) == s[c,d)
h1.hash(a, b) == h1.hash(c, d) && h2.hash(a, b) == h2.hash(c, d)

实战还是别用结构体封装,太容易卡常了

再写一个常数较小的双重哈希

#define N 500005
#define ll long long
const ll MOD1 = 998244353, BASE1 = 251;
const ll MOD2 = 1073676287, BASE2 = 211;
ll H1[N], H2[N], B1[N], B2[N];

void init(string& s) {
    int n = s.size();
    H1[0] = H2[0] = 0;
    B1[0] = B2[0] = 1;
    for (int i=1; i<=n; i++) {
        H1[i] = (H1[i-1]*BASE1%MOD1 + s[i-1])%MOD1;
        B1[i] = B1[i-1]*BASE1%MOD1;
        H2[i] = (H2[i-1]*BASE2%MOD2 + s[i-1])%MOD2;
        B2[i] = B2[i-1]*BASE2%MOD2;
    }
}
ll hash1(int l, int r) {  // hash of s[l...r-1]
    return (H1[r] - H1[l] * B1[r - l] % MOD1 + MOD1) % MOD1;
};
ll hash2(int l, int r) {  // hash of s[l...r-1]
    return (H2[r] - H2[l] * B2[r - l] % MOD2 + MOD2) % MOD2;
};
bool check(int l1, int r1, int l2, int r2) {
    return hash1(l1, r1) == hash1(l2, r2) && hash2(l1, r1) == hash2(l2, r2);
}

滚动哈希