Thanks for the patch! See 13 comments below.
On 18/07/2018 20:47, AKhatskevich wrote:
In case one need to upgrade vshard to a new version, this commit
improves reload mechanism to allow to do that for a wider variety of
possible changes (between two versions).
Changes:
* introduce cfg option `connection_outdate_delay`
* improve reload mechanism
* add `util.async_task` method, which runs a function after a
delay
* delete replicaset:rebind_connections method as it is replaced
with `rebind_replicasets` which updates all replicasets at once
Reload mechanism:
* reload all vshard modules
* create new `replicaset` and `replica` objects
* reuse old netbox connections in new replica objects if
possible
* update router/storage.internal table
* after a `connection_outdate_delay` disable old instances of
`replicaset` and `replica` objects
Reload works for modules:
* vshard.router
* vshard.storage
Here is a module reload algorithm:
* old vshard is working
* delete old vshard src
* install new vshard
* call: package.loaded['vshard.router'] = nil
* call: old_router = vshard.router -- Save working router copy.
* call: vshard.router = require('vshard.router')
* if require fails: continue using old_router
* if require succeeds: use vshard.router
In case reload process fails, old router/storage module, replicaset and
replica objects continue working properly. If reload succeeds, all old
objects would be deprecated.
Extra changes:
* introduce MODULE_INTERNALS which stores name of the module
internal data in the global namespace
Part of <shut git>112
---
test/router/reload.result | 159 +++++++++++++++++++++++++++++++++++++++++++
test/router/reload.test.lua | 48 +++++++++++++
test/router/router.result | 3 +-
test/storage/reload.result | 68 ++++++++++++++++++
test/storage/reload.test.lua | 23 +++++++
vshard/cfg.lua | 6 ++
vshard/error.lua | 5 ++
vshard/replicaset.lua | 100 ++++++++++++++++++++-------
vshard/router/init.lua | 44 ++++++++----
vshard/storage/init.lua | 45 ++++++++----
vshard/util.lua | 20 ++++++
11 files changed, 466 insertions(+), 55 deletions(-)
diff --git a/test/router/reload.result b/test/router/reload.result
index 47f3c2e..3fbbe6e 100644
--- a/test/router/reload.result
+++ b/test/router/reload.result
@@ -174,6 +174,165 @@ vshard.router.module_version()
check_reloaded()
---
...
+--
+-- Outdate old replicaset and replica objets.
+--
+rs = vshard.router.route(1)
+---
+...
+rs:callro('echo', {'some_data'})
+---
+- some_data
+- null
+- null
+...
+package.loaded["vshard.router"] = nil
+---
+...
+_ = require('vshard.router')
+---
+...
+-- Make sure outdate async task has had cpu time.
+fiber.sleep(0.005)
+---
+...
+rs.callro(rs, 'echo', {'some_data'})
+---
+- null
+- type: ShardingError
+ name: OBJECT_IS_OUTDATED
+ message: Object is outdated after module reload/reconfigure. Use new
instance.
+ code: 20
+...
+vshard.router = require('vshard.router')
+---
+...
+rs = vshard.router.route(1)
+---
+...
+rs:callro('echo', {'some_data'})
+---
+- some_data
+- null
+- null
+...
+-- Test `connection_outdate_delay`.
+old_connection_delay = cfg.connection_outdate_delay
+---
+...
+cfg.connection_outdate_delay = 0.3
+---
+...
+vshard.router.cfg(cfg)
+---
+...
+cfg.connection_outdate_delay = old_connection_delay
+---
+...
+vshard.router.internal.connection_outdate_delay = nil
+---
+...
+vshard.router = require('vshard.router')
+---
+...
+rs_new = vshard.router.route(1)
+---
+...
+rs_old = rs
+---
+...
+_, replica_old = next(rs_old.replicas)
+---
+...
+rs_new:callro('echo', {'some_data'})
+---
+- some_data
+- null
+- null
+...
+-- Check old objets are still valid.
+rs_old:callro('echo', {'some_data'})
+---
+- some_data
+- null
+- null
+...
+replica_old.conn ~= nil
+---
+- true
+...
+fiber.sleep(0.2)
+---
+...
+rs_old:callro('echo', {'some_data'})
+---
+- some_data
+- null
+- null
+...
+replica_old.conn ~= nil
+---
+- true
+...
+replica_old.outdated == nil
+---
+- true
+...
+fiber.sleep(0.2)
+---
+...
+rs_old:callro('echo', {'some_data'})
+---
+- null
+- type: ShardingError
+ name: OBJECT_IS_OUTDATED
+ message: Object is outdated after module reload/reconfigure. Use new
instance.
+ code: 20
+...
+replica_old.conn == nil
+---
+- true
+...
+replica_old.outdated == true
+---
+- true
+...
+rs_new:callro('echo', {'some_data'})
+---
+- some_data
+- null
+- null
+...
+-- Error during reconfigure process.
+_ = vshard.router.route(1):callro('echo', {'some_data'})
+---
+...
+vshard.router.internal.errinj.ERRINJ_CFG = true
+---
+...
+old_internal = table.copy(vshard.router.internal)
+---
+...
+package.loaded["vshard.router"] = nil
+---
+...
+_, err = pcall(require, 'vshard.router')
+---
+...
+err:match('Error injection:.*')
+---
+- 'Error injection: cfg'
+...
+vshard.router.internal.errinj.ERRINJ_CFG = false
+---
+...
+util.has_same_fields(old_internal, vshard.router.internal)
+---
+- true
+...
+_ = vshard.router.route(1):callro('echo', {'some_data'})
+---
+...
test_run:switch('default')
---
- true
diff --git a/test/storage/reload.result b/test/storage/reload.result
index 531d984..0281e27 100644
--- a/test/storage/reload.result
+++ b/test/storage/reload.result
@@ -174,6 +174,74 @@ vshard.storage.module_version()
check_reloaded()
---
...
+--
+-- Outdate old replicaset and replica objets.
+--
+_, rs = next(vshard.storage.internal.replicasets)
+---
+...
+package.loaded["vshard.storage"] = nil
+---
+...
+_ = require('vshard.storage')
+---
+...
+rs.callro(rs, 'echo', {'some_data'})
+---
+- null
+- type: ShardingError
+ name: OBJECT_IS_OUTDATED
+ message: Object is outdated after module reload/reconfigure. Use new
instance.
+ code: 20
+...
+_, rs = next(vshard.storage.internal.replicasets)
+---
+...
+rs.callro(rs, 'echo', {'some_data'})
+---
+- some_data
+- null
+- null
+...
+-- Error during reload process.
+_, rs = next(vshard.storage.internal.replicasets)
+---
+...
+rs:callro('echo', {'some_data'})
+---
+- some_data
+- null
+- null
+...
+vshard.storage.internal.errinj.ERRINJ_CFG = true
+---
+...
+old_internal = table.copy(vshard.storage.internal)
+---
+...
+package.loaded["vshard.storage"] = nil
+---
+...
+_, err = pcall(require, 'vshard.storage')
+---
+...
+err:match('Error injection:.*')
+---
+- 'Error injection: cfg'
+...
+vshard.storage.internal.errinj.ERRINJ_CFG = false
+---
+...
+util.has_same_fields(old_internal, vshard.storage.internal)
+---
+- true
+...
+_, rs = next(vshard.storage.internal.replicasets)
+---
+...
+_ = rs:callro('echo', {'some_data'})
+---
+...
test_run:switch('default')
---
- true
diff --git a/vshard/cfg.lua b/vshard/cfg.lua
index d5429af..87d0fc8 100644
--- a/vshard/cfg.lua
+++ b/vshard/cfg.lua
@@ -217,6 +217,10 @@ local cfg_template = {
type = 'non-negative number', name = 'Sync timeout', is_optional =
true,
default = consts.DEFAULT_SYNC_TIMEOUT
}},
+ {'connection_outdate_delay', {
+ type = 'non-negative number', name = 'Object outdate timeout',
+ is_optional = true, default = nil
+ }},
}
--
@@ -264,6 +268,8 @@ local function remove_non_box_options(cfg)
cfg.collect_bucket_garbage_interval = nil
cfg.collect_lua_garbage = nil
cfg.sync_timeout = nil
+ cfg.sync_timeout = nil
+ cfg.connection_outdate_delay = nil
end
return {
diff --git a/vshard/replicaset.lua b/vshard/replicaset.lua
index 99f59aa..ec6e95b 100644
--- a/vshard/replicaset.lua
+++ b/vshard/replicaset.lua> @@ -412,6 +424,49 @@ for name, func in
pairs(replica_mt.__index) do
end
replica_mt.__index = index
+--
+-- Meta-methods of outdated objects
+-- They define only arrtibutes from corresponding metatables to
+-- make user able to access fields of old objects.
+--
+local function outdated_warning(...)
+ return nil, lerror.vshard(lerror.code.OBJECT_IS_OUTDATED)
+end
+
+local outdated_replicaset_mt = {
+ __index = {
+ outdated = true
+ }
+}
+for fname, func in pairs(replicaset_mt.__index) do
+ outdated_replicaset_mt.__index[fname] = outdated_warning
+end
+
+local outdated_replica_mt = {
+ __index = {
+ outdated = true
+ }
+}
+for fname, func in pairs(replica_mt.__index) do
+ outdated_replica_mt.__index[fname] = outdated_warning
+end
+
+--
+-- Outdate replicaset and replica objects:
+-- * Set outdated_metatables.
+-- * Remove connections.
+--
+outdate_replicasets = function(replicasets)
+ for _, replicaset in pairs(replicasets) do
+ setmetatable(replicaset, outdated_replicaset_mt)
+ for _, replica in pairs(replicaset.replicas) do
+ setmetatable(replica, outdated_replica_mt)
+ replica.conn = nil
+ end
+ end
+ log.info('Old replicaset and replica objects are outdated.')
+end
+
--
-- Calculate for each replicaset its etalon bucket count.
-- Iterative algorithm is used to learn the best balance in a
diff --git a/vshard/router/init.lua b/vshard/router/init.lua
index a143070..a8f6bbc 100644
--- a/vshard/router/init.lua
+++ b/vshard/router/init.lua
@@ -479,12 +493,13 @@ local function router_cfg(cfg)
else
log.info('Starting router reconfiguration')
end
- local new_replicasets = lreplicaset.buildall(cfg, M.replicasets)
+ local new_replicasets = lreplicaset.buildall(cfg)
local total_bucket_count = cfg.bucket_count
local collect_lua_garbage = cfg.collect_lua_garbage
- lcfg.remove_non_box_options(cfg)
+ local box_cfg = table.deepcopy(cfg)
diff --git a/vshard/storage/init.lua b/vshard/storage/init.lua
index 052e94f..bf560e6 100644
--- a/vshard/storage/init.lua
+++ b/vshard/storage/init.lua
@@ -2,20 +2,34 @@ local log = require('log')
local luri = require('uri')
local lfiber = require('fiber')
local netbox = require('net.box') -- for net.box:self()
+local trigger = require('internal.trigger')
+
+local MODULE_INTERNALS = '__module_vshard_storage'
+-- Reload requirements, in case this module is reloaded manually.
+if rawget(_G, MODULE_INTERNALS) then
+ local vshard_modules = {
+ 'vshard.consts', 'vshard.error', 'vshard.cfg',
+ 'vshard.replicaset', 'vshard.util',
+ }
+ for _, module in pairs(vshard_modules) do
+ package.loaded[module] = nil
+ end
+end
local consts = require('vshard.consts')
local lerror = require('vshard.error')
-local util = require('vshard.util')
local lcfg = require('vshard.cfg')
local lreplicaset = require('vshard.replicaset')
-local trigger = require('internal.trigger')
+local util = require('vshard.util')
-local M = rawget(_G, '__module_vshard_storage')
+local M = rawget(_G, MODULE_INTERNALS)
if not M then
--
-- The module is loaded for the first time.
--
M = {
---------------- Common module attributes ----------------
+ -- The last passed configuration.
+ current_cfg = nil,