[tarantool-patches] [PATCH v2] replication: fix assertion with duplicated connect to same master

  • From: Olga Arkhangelskaia <arkholga@xxxxxxxxxxxxx>
  • To: tarantool-patches@xxxxxxxxxxxxx
  • Date: Tue, 11 Sep 2018 10:11:05 +0300

In case of duplicated connection to the same master we are not able
to check it at the very beginning due to quorum option. So we allow such
configuration to proceed till it is obvious. If it the initial
configuration - replica won't start and in case of configuration change
we will be notified about duplicated replication source.
---
https://github.com/tarantool/tarantool/issues/3610
https://github.com/tarantool/tarantool/tree/OKriw/gh-3610-assert_fail_when_connect_master_twice-1.10

v1:

Changes in v2:
- changed completely, now we let duplicated params to proceed
- stop the applier at the moment when replica has the same hash that other one
- changed test

 src/box/box.cc                                 |  4 ++-
 src/box/replication.cc                         | 21 +++++++++++++--
 test/replication/duplicate_connection.result   | 37 ++++++++++++++++++++++++++
 test/replication/duplicate_connection.test.lua | 16 +++++++++++
 4 files changed, 75 insertions(+), 3 deletions(-)
 create mode 100644 test/replication/duplicate_connection.result
 create mode 100644 test/replication/duplicate_connection.test.lua

diff --git a/src/box/box.cc b/src/box/box.cc
index f25146d01..d3aeb5de0 100644
--- a/src/box/box.cc
+++ b/src/box/box.cc
@@ -667,8 +667,10 @@ box_sync_replication(bool connect_quorum)
                diag_raise();
 
        auto guard = make_scoped_guard([=]{
-               for (int i = 0; i < count; i++)
+               for (int i = 0; i < count; i++) {
+                       applier_stop(appliers[i]);
                        applier_delete(appliers[i]); /* doesn't affect diag */
+               }
        });
 
        replicaset_connect(appliers, count, connect_quorum);
diff --git a/src/box/replication.cc b/src/box/replication.cc
index 5755ad45e..d6b65e0a1 100644
--- a/src/box/replication.cc
+++ b/src/box/replication.cc
@@ -261,6 +261,21 @@ replica_on_applier_sync(struct replica *replica)
        replicaset_check_quorum();
 }
 
+static void
+replica_on_applier_off(struct replica *replica)
+{
+       switch (replica->applier_sync_state) {
+       case APPLIER_CONNECTED:
+               replica_on_applier_sync(replica);
+               break;
+       case APPLIER_DISCONNECTED:
+               break;
+       default:
+               unreachable();
+       }
+
+}
+
 static void
 replica_on_applier_connect(struct replica *replica)
 {
@@ -396,9 +411,10 @@ replica_on_applier_state_f(struct trigger *trigger, void 
*event)
                /*
                 * Connection to self, duplicate connection
                 * to the same master, or the applier fiber
-                * has been cancelled. Assume synced.
+                * has been cancelled. In case of duplicated connection
+                * will be left in this state, otherwise assume synced.
                 */
-               replica_on_applier_sync(replica);
+               replica_on_applier_off(replica);
                break;
        case APPLIER_STOPPED:
                /* Unrecoverable error. */
@@ -427,6 +443,7 @@ replicaset_update(struct applier **appliers, int count)
        auto uniq_guard = make_scoped_guard([&]{
                replica_hash_foreach_safe(&uniq, replica, next) {
                        replica_hash_remove(&uniq, replica);
+                       replica_clear_applier(replica);
                        replica_delete(replica);
                }
        });
diff --git a/test/replication/duplicate_connection.result 
b/test/replication/duplicate_connection.result
new file mode 100644
index 000000000..5edcd4254
--- /dev/null
+++ b/test/replication/duplicate_connection.result
@@ -0,0 +1,37 @@
+test_run = require('test_run').new()
+---
+...
+engine = test_run:get_cfg('engine')
+---
+...
+box.schema.user.grant('guest', 'replication')
+---
+...
+-- Deploy a replica.
+test_run:cmd("create server replica with rpl_master=default, 
script='replication/replica.lua'")
+---
+- true
+...
+test_run:cmd("start server replica")
+---
+- true
+...
+test_run:cmd("switch replica")
+---
+- true
+...
+replication = box.cfg.replication
+---
+...
+box.cfg{replication = {replication, replication}}
+---
+- error: 'Incorrect value for option ''replication'': duplicate connection to 
the
+    same replica'
+...
+test_run:cmd("switch default")
+---
+- true
+...
+box.schema.user.revoke('guest', 'replication') 
+---
+...
diff --git a/test/replication/duplicate_connection.test.lua 
b/test/replication/duplicate_connection.test.lua
new file mode 100644
index 000000000..b9bc573ca
--- /dev/null
+++ b/test/replication/duplicate_connection.test.lua
@@ -0,0 +1,16 @@
+test_run = require('test_run').new()
+engine = test_run:get_cfg('engine')
+
+box.schema.user.grant('guest', 'replication')
+
+-- Deploy a replica.
+test_run:cmd("create server replica with rpl_master=default, 
script='replication/replica.lua'")
+test_run:cmd("start server replica")
+test_run:cmd("switch replica")
+
+replication = box.cfg.replication
+box.cfg{replication = {replication, replication}}
+
+
+test_run:cmd("switch default")
+box.schema.user.revoke('guest', 'replication') 
-- 
2.14.3 (Apple Git-98)


Other related posts: