AtCoder Grand Contest 002 - D Stamp Rally

勉強になるとTLで見かけたので

問題

agc002.contest.atcoder.jp

キーワード

並列二分探索

前提

unionfind の結合(unite)・判定(same)に掛かる時間計算量はO(1)とする．

editorialを読む

二分探索の臭いすらしないので読んだ．

すべての兄弟のスコアをO(Q(N+M) logM)時間で求められ…，高速化の必要があります．

分かる

そこで，Q組の兄弟について並列に二分探索を行うことを考えます

🤔

「グラフに辺1, 2, . . . , Mを追加していく」というパスをO(logM)回だけ繰り返し…

😒

実装

まずは計算量O(QMlogM)の解を作成してみる．明らかにO(logM)は取り除けそうだが，あえて残す．

typedef ll bsearch_t;
bsearch_t bin_search(bsearch_t low, bsearch_t high, function<bool(bsearch_t)> func) {
    ++high; // --l;
    bsearch_t c;
    while (low + 1 < high) {
        c = (low + high) / 2;
        (func(c) ? high : low) = c;
    }
    return high; // return l;
}

ll m, n, n_q;
GraphE graph;
vector<array<int, 3>> queries;

// 辺番号idx未満まで接続したとき，queries[qid]が到達できる頂点の数
bool judge(int idx, int qid) {
    Unionfind uf(n);
    repeat(i, idx) {
        auto e = graph.edges[i];
        uf.connect(e.u, e.v);
    }
    int a = queries[qid][0];
    int b = queries[qid][1];
    int cnt = uf.size(a);
    if (!uf.same(a,b))
        cnt += uf.size(b);
    return queries[qid][2] <= cnt;
}

int main() {
    scanner >> n >> m;

    graph.resize(n);

    repeat(i, m) {
        int a, b;
        scanner >> a >> b;
        --a; --b;
        graph.connect(a, b);
    }

    queries.reserve(n_q);
    scanner >> n_q;
    repeat(i, n_q) {
        int a, b, c;
        scanner >> a >> b >> c;
        --a; --b;
        queries.push_back({ a, b, c });
    }

    repeat(i, n_q) {
        int a = bin_search(0ll, m - 1, [&i](long long x) {return judge(x, i); });
        printer << a << '\n';
    }
    return 0;
}

並列二分探索に少しずつ近づけていこう．
二分探索はDFSの形でも書ける．BITやセグメント木でDFSを用いた二分探索を書いた人は少なくないはず．

ll m, n, kei;
GraphE graph;
vector<array<int, 3>> queries;

// 辺番号idx未満まで接続したとき，queries[qid]が到達できる頂点の数
bool judge(int idx, int qid) {
    /* 略 */
}

int dfs_bsearch(int begin, int end, function<bool(int)> func, int depth = 0) {
    if (begin + 1 >= end) return end;
    int c = (begin + end) / 2;
    if (func(c)) return dfs_bsearch(begin, c, func, depth + 1);
    else         return dfs_bsearch(c, end, func, depth + 1);
}


int main() {
    
    /* 略 */
    
    repeat(i, n_q) {
        int a = dfs_bsearch(0ll, m, [&i](long long x) {return judge(x, i); });
        printer << a << '\n';
    }
    return 0;
}

judgeとbin_searchを纏める

int dfs_bsearch(int begin, int end, int idx_query, int depth = 0) {
    if (begin + 1 >= end) return end;
    int c = (begin + end) / 2;
    int cnt;
    {
        Unionfind uf(n);
        repeat(i, c) {
            auto e = graph.edges[i];
            uf.connect(e.u, e.v);
        }
        int a = queries[idx_query][0];
        int b = queries[idx_query][1];
        cnt = uf.size(a);
        if (!uf.same(a, b))
            cnt += uf.size(b);
    }

    if (queries[idx_query][2] <= cnt) return dfs_bsearch(begin, c, idx_query, depth + 1);
    else                              return dfs_bsearch(c, end, idx_query, depth + 1);
}

ところで，dfs_bsearchは未だにある1つのクエリに対して答えるのみである．
『並列』二分探索を実装したいのだから，並列的に全てのクエリを求めるコードを書くべき．

// i番目のクエリの答えはans[i]以下
vector<int> ans;
// 並行二分探索
void dfs_bsearch(int begin, int end, int depth = 0) {
    if (begin + 1 >= end) return;
    int c = (begin + end) / 2;

    Unionfind uf(n);
    repeat(i, c) {
        auto e = graph.edges[i];
        uf.connect(e.u, e.v);
    }

    repeat(idx_query, n_q) {
        // このクエリの答えは[begin,end)外に存在することが分かっている
        if (ans[idx_query] < begin) continue;

        int a = queries[idx_query][0];
        int b = queries[idx_query][1];
        int cnt = uf.size(a);
        if (!uf.same(a, b))
            cnt += uf.size(b);

        if (queries[idx_query][2] <= cnt) {
            // idx_query番目のクエリの答えはc-1以下であることが分かった
            ans[idx_query] = c-1;
        }
    }
    dfs_bsearch(begin, c, depth + 1);
    dfs_bsearch(c, end,   depth + 1);
}

int main() {
    
    /* 略 */

    ans.resize(n_q, m-1);
    dfs_bsearch(0, m);
    for (int a : ans) printer << a + 1 << '\n';

    return 0;
}

この時点の書き換えで，unionfindの部分が複数のクエリで共有された．
二分探索で取りうる値を全て走査する．よって，dfs_bsearchが呼び出される回数はO(MlogM)．
unionfindを毎回構築する計算量はO(M)．unionfind構築後に全てのクエリを確認する計算量はO(Q)
よって全てのクエリの答えを求めるのに必要な計算量はO((M+Q)MlogM)
🤔
まずはunionfindの計算量を落とすことを考える．
unionfindは，bin_searchの再帰の深さが同じもの同士で共有出来る．
- pekempeyさんのhttps://pekempey.github.io/parallel_binary_search/simulate.html の動画がとても分かりやすい．

// i番目のクエリの答えはans[i]以下
vector<int> ans;
// 並行二分探索
void dfs_bsearch(int begin, int end, int depth = 0) {
    static Unionfind uf_data[20];
    static int uf_progress[20];
    if (begin == 0) uf_data[depth] = Unionfind(n); // initialize

    if (begin + 1 >= end) return;
    int c = (begin + end) / 2;

    Unionfind& uf = uf_data[depth];
    if (begin == 0) uf = Unionfind(n); // initialize
    
    iterate(i, uf_progress[depth], c) {
        auto e = graph.edges[i];
        uf.connect(e.u, e.v);
    }
    uf_progress[depth] = c;

    repeat(idx_query, n_q) {
        /* 略 */
    }
    dfs_bsearch(begin, c, depth + 1);
    dfs_bsearch(c, end,   depth + 1);
}

unionfindの計算量を見積もる．O(logM)個のunionfindがM回connectを呼び出すので，計算量はO(MlogM)．
よって，全てのクエリの答えを求めるのに必要な計算量はO(QMlogM + MlogM)
残りはO(Q)をどうにかするだけ．
Q個のクエリ全部を見る必要は無い．必要最小限のクエリだけをチェックするようにする．
チェック対象のクエリの番号を格納したコンテナindices_qを引数に持たせる．
- コンテナindices_q_left, indices_q_rightを確保する．
- それぞれのクエリの番号idx_queryについて，区間[begin,c)に解が存在することが分かったならば，indices_q_left に入れる．indices_q_right も同様．
- dfs_bsearch(begin, c, indices_q_left, depth + 1);

// i番目のクエリの答えはans[i]以下
vector<int> ans;
// 並行二分探索
void dfs_bsearch(int begin, int end, vector<int> indices_q, int depth = 0) {
    /* 略 */

    vector<int> indices_q_left, indices_q_right;

    for (int idx_query : indices_q) {
        // このクエリの答えは[begin,end)外に存在することが分かっている
        if (ans[idx_query] < begin) continue;

        int a = queries[idx_query][0];
        int b = queries[idx_query][1];
        int cnt = uf.size(a);
        if (!uf.same(a, b))
            cnt += uf.size(b);

        if (queries[idx_query][2] <= cnt) {
            // idx_query番目のクエリの答えはc-1以下であることが分かった
            ans[idx_query] = c-1;
            indices_q_left.push_back(idx_query);
        }
        else {
            // idx_query番目のクエリの答えはc以上であることが分かった
            indices_q_right.push_back(idx_query);
        }
    }
    dfs_bsearch(begin, c, indices_q_left, depth + 1);
    dfs_bsearch(c, end,  indices_q_right, depth + 1);
}

Submission #1840657 - AtCoder Grand Contest 002 | AtCoder
AC
それぞれのクエリは必ずO(logM)回判定される．よって，全てのクエリの判定にはO(QlogM)の計算量が掛かる．
以前の実装より，unionfindで O(MlogM) ，dfsの探索で O(MlogM)
全てのクエリの答えを求めるのに必要な計算量はO(MlogM + QlogM)

最終的なコード

class GraphE {
public:
    typedef int W_T;
    struct Edge {
        int u, v;
        W_T value;
        Edge(int from = 0, int to = 0, W_T value = 0) :u(from), v(to), value(value) {}
        inline int to(int _v) const { return _v == v ? u : v; }
    };
    size_t n;
    vector<vector<int>> vertex_to;
    vector<Edge> edges;

    GraphE(int n = 1) :n(n), vertex_to(n) { }

    void resize(size_t _n) {
        n = _n;
        vertex_to.resize(_n);
    }
    void connect(int from, int to, W_T val = 0) {
        vertex_to[(size_t)from].push_back((int)edges.size());
        vertex_to[(size_t)to].push_back((int)edges.size());
        edges.emplace_back(from, to, val);
    }
};


class Unionfind {
public:
    vector<int> data;
    Unionfind(size_t size = 1) : data(size, -1) { }
    bool connect(size_t x, size_t y) {
        x = root(x); y = root(y);
        if (x != y) {
            if (data[y] < data[x]) swap(x, y);
            data[x] += data[y]; data[y] = (int)x;
        }
        return x != y;
    }
    inline bool same(size_t x, size_t y) {
        return root(x) == root(y);
    }
    inline size_t root(size_t x) {
        return (size_t)(data[x] < 0 ? x : data[x] = root(data[x]));
    }
    inline int size(size_t x) {
        return -data[root(x)];
    }
};


ll m, n, n_q;
GraphE graph;

vector<array<int, 3>> queries;

// i番目のクエリの答えはans[i]以下
vector<int> ans;
// 並行二分探索
void dfs_bsearch(int begin, int end, const vector<int>& indices_q, int depth = 0) {
    static Unionfind uf_data[20];
    static int uf_progress[20];
    if (begin == 0) uf_data[depth] = Unionfind(n); // initialize

    if (begin + 1 >= end) return;
    int c = (begin + end) / 2;

    Unionfind& uf = uf_data[depth];
    
    iterate(i, uf_progress[depth], c) {
        auto e = graph.edges[i];
        uf.connect(e.u, e.v);
    }
    uf_progress[depth] = c;

    vector<int> indices_q_left, indices_q_right;

    for (int idx_query : indices_q) {
        // このクエリの答えは[begin,end)外に存在することが分かっている
        if (ans[idx_query] < begin) continue;

        int a = queries[idx_query][0];
        int b = queries[idx_query][1];
        int cnt = uf.size(a);
        if (!uf.same(a, b))
            cnt += uf.size(b);

        if (queries[idx_query][2] <= cnt) {
            // idx_query番目のクエリの答えはc-1以下であることが分かった
            ans[idx_query] = c-1;
            indices_q_left.push_back(idx_query);
        }
        else {
            // idx_query番目のクエリの答えはc以上であることが分かった
            indices_q_right.push_back(idx_query);
        }
    }
    dfs_bsearch(begin, c, indices_q_left, depth + 1);
    dfs_bsearch(c, end,  indices_q_right, depth + 1);
}


int main() {
    scanner >> n >> m;
    graph.resize(n);

    repeat(i, m) {
        int a, b;
        scanner >> a >> b;
        --a; --b;
        graph.connect(a, b);
    }

    queries.reserve(n_q);
    scanner >> n_q;
    repeat(i, n_q) {
        int a, b, c;
        scanner >> a >> b >> c;
        --a; --b;
        queries.push_back({ a, b, c });
    }

    vector<int> indices_q(n_q);
    repeat(i, n_q) indices_q[i] = i;
    ans.resize(n_q, m-1);
    dfs_bsearch(0, m, indices_q);

    for (int a : ans) printer << a + 1 << '\n';

    return 0;
}

感想

並列二分探索が大好きになりました
一瞬平方分割とか思いついたけれど速度が間に合わず．
queries.reserve(n_q); が scanner >> n_q; の手前になっているスカポンタンに気づきました．直すの面倒なのでこのままにします．

buyoh.hateblo.jp

残念な競技プログラミング参加者による学習記録

AtCoder Grand Contest 002 - D Stamp Rally

問題

キーワード

前提

editorialを読む

実装

最終的なコード

感想