[tarantool-patches] [PATCH v3 5/7] box: introduce tuple_parse_iterator class

  • From: Kirill Shcherbatov <kshcherbatov@xxxxxxxxxxxxx>
  • To: tarantool-patches@xxxxxxxxxxxxx, vdavydov.dev@xxxxxxxxx
  • Date: Tue, 2 Apr 2019 18:49:36 +0300

The similar code in tuple_field_map_create and
vy_stmt_new_surrogate_delete_raw that performs tuple parsing in
deep has been refactored as reusable tuple_parse_iterator class.

Being thus encapsulated, this code will be uniformly managed and
extended in the further patches in scope of multikey indexes.

Needed for #1257
---
 src/box/tuple_format.c | 222 +++++++++++++++++++++++------------------
 src/box/tuple_format.h |  65 ++++++++++++
 src/box/vy_stmt.c      |  93 ++++++-----------
 3 files changed, 218 insertions(+), 162 deletions(-)

diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c
index 1043707ad..070897ec2 100644
--- a/src/box/tuple_format.c
+++ b/src/box/tuple_format.c
@@ -831,109 +831,34 @@ tuple_field_map_create(struct tuple_format *format, 
const char *tuple,
         * which key fields are absent in tuple_field().
         */
        memset((char *)*field_map - *field_map_size, 0, *field_map_size);
-       /*
-        * Prepare mp stack of the size equal to the maximum depth
-        * of the indexed field in the format::fields tree
-        * (fields_depth) to carry out a simultaneous parsing of
-        * the tuple and tree traversal to process type
-        * validations and field map initialization.
-        */
-       uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
-       struct mp_frame *frames = region_alloc(region, frames_sz);
-       if (frames == NULL) {
-               diag_set(OutOfMemory, frames_sz, "region", "frames");
+       struct tuple_parse_iterator it;
+       if (tuple_parse_iterator_create(&it, format, pos, defined_field_count,
+                                       region) != 0)
                goto error;
-       }
-       struct mp_stack stack;
-       mp_stack_create(&stack, format->fields_depth, frames);
-       mp_stack_push(&stack, MP_ARRAY, defined_field_count);
+       const char *pos_end;
        struct tuple_field *field;
-       struct json_token *parent = &format->fields.root;
-       while (true) {
-               int idx;
-               while ((idx = mp_stack_advance(&stack)) == -1) {
-                       /*
-                        * If the elements of the current frame
-                        * are over, pop this frame out of stack
-                        * and climb one position in the
-                        * format::fields tree to match the
-                        * changed JSON path to the data in the
-                        * tuple.
-                        */
-                       mp_stack_pop(&stack);
-                       if (mp_stack_is_empty(&stack))
-                               goto finish;
-                       parent = parent->parent;
-               }
-               /*
-                * Use the top frame of the stack and the
-                * current data offset to prepare the JSON token
-                * for the subsequent format::fields lookup.
-                */
-               struct json_token token;
-               switch (mp_stack_type(&stack)) {
-               case MP_ARRAY:
-                       token.type = JSON_TOKEN_NUM;
-                       token.num = idx;
-                       break;
-               case MP_MAP:
-                       if (mp_typeof(*pos) != MP_STR) {
-                               /*
-                                * JSON path support only string
-                                * keys for map. Skip this entry.
-                                */
-                               mp_next(&pos);
-                               mp_next(&pos);
-                               continue;
-                       }
-                       token.type = JSON_TOKEN_STR;
-                       token.str = mp_decode_str(&pos, (uint32_t *)&token.len);
-                       break;
-               default:
-                       unreachable();
-               }
-               /*
-                * Perform lookup for a field in format::fields,
-                * that represents the field metadata by JSON path
-                * corresponding to the current position in the
-                * tuple.
-                */
-               enum mp_type type = mp_typeof(*pos);
-               assert(parent != NULL);
-               field = json_tree_lookup_entry(&format->fields, parent, &token,
-                                              struct tuple_field, token);
-               if (field != NULL) {
-                       bool is_nullable = tuple_field_is_nullable(field);
-                       if (validate &&
-                           !field_mp_type_is_compatible(field->type, type,
-                                                        is_nullable) != 0) {
-                               diag_set(ClientError, ER_FIELD_TYPE,
-                                        tuple_field_path(field),
-                                        field_type_strs[field->type]);
-                               goto error;
-                       }
-                       if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
-                               (*field_map)[field->offset_slot] = pos - tuple;
-                       if (required_fields != NULL)
-                               bit_clear(required_fields, field->id);
-               }
+       while (tuple_parse_iterator_advice(&it, &field, &pos, &pos_end) > 0) {
+               if (field == NULL)
+                       continue;
                /*
-                * If the current position of the data in tuple
-                * matches the container type (MP_MAP or MP_ARRAY)
-                * and the format::fields tree has such a record,
-                * prepare a new stack frame because it needs to
-                * be analyzed in the next iterations.
+                * Check if field mp_type is compatible with type
+                * defined in format.
                 */
-               if ((type == MP_ARRAY || type == MP_MAP) &&
-                   !mp_stack_is_full(&stack) && field != NULL) {
-                       uint32_t size = type == MP_ARRAY ?
-                                       mp_decode_array(&pos) :
-                                       mp_decode_map(&pos);
-                       mp_stack_push(&stack, type, size);
-                       parent = &field->token;
-               } else {
-                       mp_next(&pos);
+               bool is_nullable = tuple_field_is_nullable(field);
+               if (validate &&
+                   !field_mp_type_is_compatible(field->type, mp_typeof(*pos),
+                                                is_nullable) != 0) {
+                       diag_set(ClientError, ER_FIELD_TYPE,
+                               tuple_field_path(field),
+                               field_type_strs[field->type]);
+                       goto error;
                }
+               /* Initialize field_map with data offset. */
+               if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
+                       (*field_map)[field->offset_slot] = pos - tuple;
+               /* Mark this field as present in the tuple. */
+               if (required_fields != NULL)
+                       bit_clear(required_fields, field->id);
        }
 finish:
        /*
@@ -1029,3 +954,104 @@ box_tuple_format_unref(box_tuple_format_t *format)
        tuple_format_unref(format);
 }
 
+int
+tuple_parse_iterator_create(struct tuple_parse_iterator *it,
+                           struct tuple_format *format, const char *data,
+                           uint32_t field_count, struct region *region)
+{
+       /*
+        * Prepare mp stack of the size equal to the maximum depth
+        * of the indexed field in the format::fields tree
+        * (fields_depth) to carry out a simultaneous parsing of
+        * the tuple and tree traversal.
+        */
+       uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
+       struct mp_frame *frames = region_alloc(region, frames_sz);
+       if (frames == NULL) {
+               diag_set(OutOfMemory, frames_sz, "region", "frames");
+               return -1;
+       }
+       mp_stack_create(&it->stack, format->fields_depth, frames);
+       mp_stack_push(&it->stack, MP_ARRAY, field_count);
+       it->parent = &format->fields.root;
+       it->format = format;
+       it->pos = data;
+       return 0;
+}
+
+int
+tuple_parse_iterator_advice(struct tuple_parse_iterator *it,
+                           struct tuple_field **field, const char **data,
+                           const char **data_end)
+{
+       int idx, rc = 0;
+       while ((idx = mp_stack_advance(&it->stack)) == -1) {
+               /*
+                * If the elements of the current frame
+                * are over, pop this frame out of stack
+                * and climb one position in the format::fields
+                * tree to match the changed JSON path to the
+                * data in the tuple.
+                */
+               mp_stack_pop(&it->stack);
+               if (mp_stack_is_empty(&it->stack))
+                       return rc;
+               it->parent = it->parent->parent;
+       }
+       /*
+        * Use the top frame of the stack and the
+        * current data offset to prepare the JSON token
+        * for the subsequent format::fields lookup.
+        */
+       struct json_token token;
+       switch (mp_stack_type(&it->stack)) {
+       case MP_ARRAY:
+               rc = 1;
+               token.type = JSON_TOKEN_NUM;
+               token.num = idx;
+               break;
+       case MP_MAP:
+               rc = 2;
+               if (mp_typeof(*it->pos) != MP_STR) {
+                       mp_next(&it->pos);
+                       mp_next(&it->pos);
+                       *field = NULL;
+                       return rc;
+               }
+               token.type = JSON_TOKEN_STR;
+               token.str = mp_decode_str(&it->pos, (uint32_t *)&token.len);
+               break;
+       default:
+               unreachable();
+       }
+       /*
+        * Perform lookup for a field in format::fields,
+        * that represents the field metadata by JSON path
+        * corresponding to the current position in the
+        * tuple.
+        */
+       assert(it->parent != NULL);
+       *field = json_tree_lookup_entry(&it->format->fields, it->parent, &token,
+                                       struct tuple_field, token);
+       *data = it->pos;
+       /*
+        * If the current position of the data in tuple
+        * matches the container type (MP_MAP or MP_ARRAY)
+        * and the format::fields tree has such a record,
+        * prepare a new stack frame because it needs to
+        * be analyzed in the next iterations.
+        */
+       enum mp_type type = mp_typeof(*it->pos);
+       if ((type == MP_ARRAY || type == MP_MAP) &&
+           !mp_stack_is_full(&it->stack) && *field != NULL) {
+               uint32_t size = type == MP_ARRAY ?
+                               mp_decode_array(&it->pos) :
+                               mp_decode_map(&it->pos);
+               mp_stack_push(&it->stack, type, size);
+               it->parent = &(*field)->token;
+       } else {
+               mp_next(&it->pos);
+       }
+       *data_end = it->pos;
+       return rc;
+}
diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h
index 22a0fb232..bef1d0903 100644
--- a/src/box/tuple_format.h
+++ b/src/box/tuple_format.h
@@ -412,6 +412,71 @@ tuple_field_map_create(struct tuple_format *format, const 
char *tuple,
 int
 tuple_format_init();
 
+/**
+ * A tuple msgpack iterator that parse tuple in deep an returns
+ * only fields that are described in the tuple_format.
+ */
+struct tuple_parse_iterator {
+       /**
+        * Tuple format is used to perform field lookups in
+        * format::fields JSON tree.
+        */
+       struct tuple_format *format;
+       /**
+        * The pointer to the parent node in the format::fields
+        * JSON tree. Is required for relative lookup for the
+        * next field.
+        */
+       struct json_token *parent;
+       /**
+        * Traversal stack of msgpack frames is used to determine
+        * when the parsing of the current composite mp structure
+        * (array or map) is completed to update to the parent
+        * pointer accordingly.
+        */
+       struct mp_stack stack;
+       /** The current read position in msgpack. */
+       const char *pos;
+};
+
+/**
+ * Initialize tuple parse iterator with tuple format, data pointer
+ * and the count of top-level msgpack fields to be processed.
+ *
+ * This function assumes that the msgpack header containing the
+ * number of top-level msgpack fields (field_count) has already
+ * been parsed and the data pointer has already been shifted
+ * correspondingly. This allows directly limit the number of
+ * fields that must be parsed.
+
+ * Function uses the region for the traversal stack allocation.
+ *
+ * Returns 0 on success. In case of memory allocation error sets
+ * diag message and returns -1.
+ */
+int
+tuple_parse_iterator_create(struct tuple_parse_iterator *it,
+                           struct tuple_format *format, const char *data,
+                           uint32_t field_count, struct region *region);
+
+/**
+ * Parse tuple in deep and update iterator state.
+ *
+ * Returns the number of fields at the current tuple nesting
+ * level that have been processed (2 for map item, 1 for array
+ * key:value pair, 0 on stop) and initializes:
+ * field - the tuple_field pointer to format::fields field
+ *         that matches to the currently processed msgpack field
+ *         (when exists),
+ * data  - the pointer to the currently processed msgpack field,
+ * data_end - the pointer to the end of currently processed
+ *            msgpack field.
+ */
+int
+tuple_parse_iterator_advice(struct tuple_parse_iterator *it,
+                           struct tuple_field **field, const char **data,
+                           const char **data_end);
+
 #if defined(__cplusplus)
 } /* extern "C" */
 #endif /* defined(__cplusplus) */
diff --git a/src/box/vy_stmt.c b/src/box/vy_stmt.c
index add86622b..1e8bb7825 100644
--- a/src/box/vy_stmt.c
+++ b/src/box/vy_stmt.c
@@ -431,81 +431,46 @@ vy_stmt_new_surrogate_delete_raw(struct tuple_format 
*format,
        /*
         * Perform simultaneous parsing of the tuple and
         * format::fields tree traversal to copy indexed field
-        * data and initialize field map. In many details the code
-        * above works like tuple_field_map_create, read it's
-        * comments for more details.
+        * data and initialize field map.
         */
-       uint32_t frames_sz = format->fields_depth * sizeof(struct mp_frame);
-       struct mp_frame *frames = region_alloc(region, frames_sz);
-       if (frames == NULL) {
-               diag_set(OutOfMemory, frames_sz, "region", "frames");
+       struct tuple_parse_iterator it;
+       if (tuple_parse_iterator_create(&it, format, src_pos, field_count,
+                                       region) != 0)
                goto out;
-       }
-       struct mp_stack stack;
-       mp_stack_create(&stack, format->fields_depth, frames);
-       mp_stack_push(&stack, MP_ARRAY, field_count);
+       int rc;
+       const char *src_pos_end;
        struct tuple_field *field;
-       struct json_token *parent = &format->fields.root;
-       while (true) {
-               int idx;
-               while ((idx = mp_stack_advance(&stack)) == -1) {
-                       mp_stack_pop(&stack);
-                       if (mp_stack_is_empty(&stack))
-                               goto finish;
-                       parent = parent->parent;
-               }
-               struct json_token token;
-               switch (mp_stack_type(&stack)) {
-               case MP_ARRAY:
-                       token.type = JSON_TOKEN_NUM;
-                       token.num = idx;
-                       break;
-               case MP_MAP:
-                       if (mp_typeof(*src_pos) != MP_STR) {
-                               mp_next(&src_pos);
-                               mp_next(&src_pos);
-                               pos = mp_encode_nil(pos);
-                               pos = mp_encode_nil(pos);
-                               continue;
-                       }
-                       token.type = JSON_TOKEN_STR;
-                       token.str = mp_decode_str(&src_pos, (uint32_t 
*)&token.len);
-                       pos = mp_encode_str(pos, token.str, token.len);
-                       break;
-               default:
-                       unreachable();
-               }
-               assert(parent != NULL);
-               field = json_tree_lookup_entry(&format->fields, parent, &token,
-                                              struct tuple_field, token);
+       while ((rc = tuple_parse_iterator_advice(&it, &field, &src_pos,
+                                                &src_pos_end)) > 0) {
                if (field == NULL || !field->is_key_part) {
-                       mp_next(&src_pos);
-                       pos = mp_encode_nil(pos);
+                       /*
+                        * Instead of copying useless data having
+                        * no representation in tuple_format,
+                        * write nil.
+                        */
+                       while (rc-- > 0)
+                               pos = mp_encode_nil(pos);
                        continue;
                }
+               if (field->token.type == JSON_TOKEN_STR) {
+                       assert(rc-- == 2);
+                       pos = mp_encode_str(pos, field->token.str,
+                                           field->token.len);
+               }
+               assert(rc == 1);
+               /* Initialize field_map with data offset. */
                if (field->offset_slot != TUPLE_OFFSET_SLOT_NIL)
                        field_map[field->offset_slot] = pos - data;
-               enum mp_type type = mp_typeof(*src_pos);
-               if ((type == MP_ARRAY || type == MP_MAP) &&
-                   !mp_stack_is_full(&stack)) {
-                       uint32_t size;
-                       if (type == MP_ARRAY) {
-                               size = mp_decode_array(&src_pos);
-                               pos = mp_encode_array(pos, size);
-                       } else {
-                               size = mp_decode_map(&src_pos);
-                               pos = mp_encode_map(pos, size);
-                       }
-                       mp_stack_push(&stack, type, size);
-                       parent = &field->token;
+               /* Copy field data. */
+               if (field->type == FIELD_TYPE_ARRAY) {
+                       pos = mp_encode_array(pos, mp_decode_array(&src_pos));
+               } else if (field->type == FIELD_TYPE_MAP) {
+                       pos = mp_encode_map(pos, mp_decode_map(&src_pos));
                } else {
-                       const char *src_field = src_pos;
-                       mp_next(&src_pos);
-                       memcpy(pos, src_field, src_pos - src_field);
-                       pos += src_pos - src_field;
+                       memcpy(pos, src_pos, src_pos_end - src_pos);
+                       pos += src_pos_end - src_pos;
                }
        }
-finish:
        assert(pos <= data + src_size);
        uint32_t bsize = pos - data;
        stmt = vy_stmt_alloc(format, bsize);
-- 
2.21.0


Other related posts: