Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade: Data migration (dump-restore) script modularized #1864

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
417 changes: 417 additions & 0 deletions gnowsys-ndf/gnowsys_ndf/ndf/management/commands/export_logic.py

Large diffs are not rendered by default.

443 changes: 78 additions & 365 deletions gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_export.py

Large diffs are not rendered by default.

196 changes: 75 additions & 121 deletions gnowsys-ndf/gnowsys_ndf/ndf/management/commands/group_import.py

Large diffs are not rendered by default.

908 changes: 908 additions & 0 deletions gnowsys-ndf/gnowsys_ndf/ndf/management/commands/import_logic.py

Large diffs are not rendered by default.

204 changes: 204 additions & 0 deletions gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
import os
import datetime
import subprocess
import re
# from threading import Thread
# import multiprocessing
# import math
from bs4 import BeautifulSoup

try:
from bson import ObjectId
except ImportError: # old pymongo
from pymongo.objectid import ObjectId
from django.template.defaultfilters import slugify
from django.core.management.base import BaseCommand, CommandError
from schema_mapping import create_factory_schema_mapper
from users_dump_restore import create_users_dump
from export_logic import dump_node
from gnowsys_ndf.ndf.models import node_collection, triple_collection, filehive_collection, counter_collection
from gnowsys_ndf.ndf.models import HistoryManager
from gnowsys_ndf.settings import GSTUDIO_DATA_ROOT, GSTUDIO_LOGS_DIR_PATH, MEDIA_ROOT, GSTUDIO_INSTITUTE_ID
from gnowsys_ndf.ndf.views.methods import get_group_name_id
from gnowsys_ndf.ndf.templatetags.simple_filters import get_latest_git_hash, get_active_branch_name

# global variables declaration
GROUP_CONTRIBUTORS = []
DUMP_PATH = None
TOP_PATH = os.path.join(GSTUDIO_DATA_ROOT, 'data_export')
node_id = None
DATA_EXPORT_PATH = None
MEDIA_EXPORT_PATH = None
RESTORE_USER_DATA = False
SCHEMA_MAP_PATH = None
log_file = None
log_file_path = None
historyMgr = HistoryManager()
DUMP_NODES_LIST = []
DUMPED_NODE_IDS = set()
ROOT_DUMP_NODE_ID = None
ROOT_DUMP_NODE_NAME = None
MULTI_DUMP = False

def create_log_file(dump_node_id):
'''
Creates log file in gstudio-logs/ with
the name of the dump folder
'''
log_file_name = 'node_export_' + str(dump_node_id)+ '.log'
if not os.path.exists(GSTUDIO_LOGS_DIR_PATH):
os.makedirs(GSTUDIO_LOGS_DIR_PATH)
global log_file_path
log_file_path = os.path.join(GSTUDIO_LOGS_DIR_PATH, log_file_name)
# print log_file_path
global log_file
log_file = open(log_file_path, 'w+')
log_file.write("\n######### Script ran on : " + str(datetime.datetime.now()) + " #########\n\n")
return log_file_path

def setup_dump_path(node_id):
'''
Creates factory_schema.json which will hold basic info
like ObjectId, name, type of TYPES_LIST and GSTUDIO_DEFAULT_GROUPS
'''
global DUMP_PATH
global TOP_PATH
global DATA_EXPORT_PATH
global MEDIA_EXPORT_PATH
# datetimestamp = datetime.datetime.now().isoformat()
datetimestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
DUMP_PATH = TOP_PATH
DATA_EXPORT_PATH = os.path.join(DUMP_PATH, 'dump')
MEDIA_EXPORT_PATH = os.path.join(DATA_EXPORT_PATH, 'media_files')
if not os.path.exists(DATA_EXPORT_PATH):
os.makedirs(DATA_EXPORT_PATH)
if not os.path.exists(MEDIA_EXPORT_PATH):
os.makedirs(MEDIA_EXPORT_PATH)
return DATA_EXPORT_PATH

def create_configs_file(node_id):
global RESTORE_USER_DATA
global DUMP_PATH
configs_file_path = os.path.join(DUMP_PATH, "migration_configs.py")
with open(configs_file_path, 'w+') as configs_file_out:
configs_file_out.write("\nRESTORE_USER_DATA=" + str(RESTORE_USER_DATA))
configs_file_out.write("\nGSTUDIO_INSTITUTE_ID='" + str(GSTUDIO_INSTITUTE_ID) + "'")
configs_file_out.write("\nNODE_ID='" + str(node_id) + "'")
configs_file_out.write("\nROOT_DUMP_NODE_NAME='" + str(ROOT_DUMP_NODE_NAME) + "'")
configs_file_out.write("\nGIT_COMMIT_HASH='" + str(get_latest_git_hash()) + "'")
configs_file_out.write("\nGIT_BRANCH_NAME='" + str(get_active_branch_name()) + "'")
configs_file_out.write('\nSYSTEM_DETAILS="' + str(os.uname()) + '"')
return configs_file_path

def write_md5_of_dump(group_dump_path, configs_file_path):
global DUMP_PATH
from checksumdir import dirhash
md5hash = dirhash(group_dump_path, 'md5')
with open(configs_file_path, 'a+') as configs_file_out:
configs_file_out.write("\nMD5='" + str(md5hash) + "'")



def call_exit():
print "\n Exiting..."
os._exit(0)

def update_globals():
global GLOBAL_DICT
global GROUP_CONTRIBUTORS
global DUMP_PATH
global TOP_PATH
global node_id
global DATA_EXPORT_PATH
global MEDIA_EXPORT_PATH
global RESTORE_USER_DATA
global SCHEMA_MAP_PATH
global log_file_path
global DUMP_NODES_LIST
global DUMPED_NODE_IDS
global ROOT_DUMP_NODE_ID
global ROOT_DUMP_NODE_NAME
global MULTI_DUMP

GLOBAL_DICT = {
"GROUP_CONTRIBUTORS": GROUP_CONTRIBUTORS,
"DUMP_PATH": DUMP_PATH,
"TOP_PATH": TOP_PATH,
"node_id": node_id,
"DATA_EXPORT_PATH": DATA_EXPORT_PATH,
"MEDIA_EXPORT_PATH": MEDIA_EXPORT_PATH,
"RESTORE_USER_DATA": RESTORE_USER_DATA,
"SCHEMA_MAP_PATH": SCHEMA_MAP_PATH,
"log_file_path": log_file_path,
"DUMP_NODES_LIST": DUMP_NODES_LIST,
"DUMPED_NODE_IDS": DUMPED_NODE_IDS,
"ROOT_DUMP_NODE_ID": ROOT_DUMP_NODE_ID,
"ROOT_DUMP_NODE_NAME": ROOT_DUMP_NODE_NAME,
"MULTI_DUMP": MULTI_DUMP
}


class Command(BaseCommand):
def handle(self, *args, **options):
global SCHEMA_MAP_PATH
global DUMP_PATH
global ROOT_DUMP_NODE_ID
global ROOT_DUMP_NODE_NAME
global MULTI_DUMP
global GLOBAL_DICT
input_name_or_id = None
if args:
input_name_or_id = args[0]
else:
input_name_or_id = raw_input("\n\tPlease enter ObjectID of the Node: ")

dump_node_obj = node_collection.one({'_id': ObjectId(input_name_or_id), '_type': 'GSystem'})

if dump_node_obj:
log_file_path = create_log_file(dump_node_obj._id)
ROOT_DUMP_NODE_ID = dump_node_obj._id
ROOT_DUMP_NODE_NAME = dump_node_obj.name

global TOP_PATH
global DUMP_NODES_LIST
datetimestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
TOP_PATH = os.path.join(GSTUDIO_DATA_ROOT, 'data_export', str(dump_node_obj._id) + "_"+ str(datetimestamp))
SCHEMA_MAP_PATH = TOP_PATH
print "\tRequest received for Export of : \n\t\tObjectId: ", dump_node_obj._id
try:
print "\t\tName : ", dump_node_obj.name
except Exception as e:
pass
global RESTORE_USER_DATA
user_data_dump = raw_input("\n\tDo you want to include Users in this export ? Enter y/Y to continue:\t ")
if user_data_dump in ['y', 'Y']:
RESTORE_USER_DATA = True
else:
RESTORE_USER_DATA = False

print "START : ", str(datetime.datetime.now())
group_dump_path = setup_dump_path(slugify(dump_node_obj._id))

global node_id
node_id = dump_node_obj._id
if RESTORE_USER_DATA:
create_users_dump(group_dump_path, dump_node_obj.contributors)

configs_file_path = create_configs_file(dump_node_obj._id)
global log_file
update_globals()
dump_node(node=dump_node_obj,collection_name=node_collection, variables_dict=GLOBAL_DICT)
create_factory_schema_mapper(SCHEMA_MAP_PATH)

log_file.write("\n*************************************************************")
log_file.write("\n######### Script Completed at : " + str(datetime.datetime.now()) + " #########\n\n")
print "END : ", str(datetime.datetime.now())

write_md5_of_dump(group_dump_path, configs_file_path)
print "*"*70
print "\n This will take few minutes. Please be patient.\n"
print "\n Log will be found at: ", log_file_path
print "\n Log will be found at: ", TOP_PATH
print "*"*70
log_file.close()
call_exit()
103 changes: 103 additions & 0 deletions gnowsys-ndf/gnowsys_ndf/ndf/management/commands/node_import.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
'''
Import can also be called using command line args as following:
python manage.py node_import <dump_path> <md5-check> <group-availability> <user-objs-restoration>
for e.g:
python manage.py node_import <dump_path> y y y
'''
import os
import json
import imp
import subprocess
from bson import json_util
import pathlib2
try:
from bson import ObjectId
except ImportError: # old pymongo
from pymongo.objectid import ObjectId

import time
import datetime

# from bson.json_util import dumps,loads,object_hook
from django.core.management.base import BaseCommand, CommandError
from gnowsys_ndf.ndf.management.commands.import_logic import *
from gnowsys_ndf.ndf.models import node_collection, triple_collection, filehive_collection, counter_collection
from gnowsys_ndf.ndf.models import HistoryManager, RCS
from gnowsys_ndf.settings import GSTUDIO_DATA_ROOT, GSTUDIO_LOGS_DIR_PATH, MEDIA_ROOT, GSTUDIO_INSTITUTE_ID, RCS_REPO_DIR
from users_dump_restore import load_users_dump
from gnowsys_ndf.settings import RCS_REPO_DIR_HASH_LEVEL
from schema_mapping import update_factory_schema_mapper
from gnowsys_ndf.ndf.views.utils import replace_in_list, merge_lists_and_maintain_unique_ele

# global variables declaration
DATA_RESTORE_PATH = None
DATA_DUMP_PATH = None
DEFAULT_USER_ID = 1
DEFAULT_USER_SET = False
USER_ID_MAP = {}
SCHEMA_ID_MAP = {}
log_file = None
CONFIG_VARIABLES = None
history_manager = HistoryManager()
rcs = RCS()


'''
Following will be available:
CONFIG_VARIABLES.FORK=True
CONFIG_VARIABLES.CLONE=False
CONFIG_VARIABLES.RESTORE_USER_DATA=True
CONFIG_VARIABLES.GSTUDIO_INSTITUTE_ID='MZ-10'
CONFIG_VARIABLES.NODE_ID='58dded48cc566201992f6e79'
CONFIG_VARIABLES.MD5='aeba0e3629fb0443861c699ae327d962'
'''

def core_import(*args):
global DATA_RESTORE_PATH
global log_file
global log_file_path
global DATA_DUMP_PATH
global CONFIG_VARIABLES
datetimestamp = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
log_file_name = 'node_import_' + str(CONFIG_VARIABLES.NODE_ID) + '_' + str(datetimestamp)
log_file_path = create_log_file(log_file_name)
log_file = open(log_file_path, 'w+')
log_file.write('\n######### Script ran on : ' + str(datetime.datetime.now()) + ' #########\n\n')
log_file.write('\nUpdated CONFIG_VARIABLES: ' + str(CONFIG_VARIABLES))
print '\n Validating the data-dump'
log_file.write(validate_data_dump(DATA_DUMP_PATH, CONFIG_VARIABLES.MD5, *args))
print '\n Node Restoration.'
user_json_file_path = os.path.join(DATA_DUMP_PATH, 'users_dump.json')
log_stmt = user_objs_restoration(True, user_json_file_path, DATA_DUMP_PATH, *args)
log_file.write(log_stmt)
print '\n Factory Schema Restoration. Please wait..'
call_group_import(os.path.join(DATA_DUMP_PATH, 'data', 'rcs-repo'), log_file_path, DATA_RESTORE_PATH, None)
copy_media_data(os.path.join(DATA_DUMP_PATH, 'media_files', 'data', 'media'))
return


class Command(BaseCommand):

def handle(self, *args, **options):
global SCHEMA_ID_MAP
global DATA_RESTORE_PATH
global DATA_DUMP_PATH
global CONFIG_VARIABLES
if args:
DATA_RESTORE_PATH = args[0]
else:
DATA_RESTORE_PATH = raw_input('\n\tEnter absolute path of data-dump folder to restore:')
print '\nDATA_RESTORE_PATH: ', DATA_RESTORE_PATH
if os.path.exists(DATA_RESTORE_PATH):
if os.path.exists(os.path.join(DATA_RESTORE_PATH, 'dump')):
DATA_DUMP_PATH = os.path.join(DATA_RESTORE_PATH, 'dump')
SCHEMA_ID_MAP = update_factory_schema_mapper(DATA_RESTORE_PATH)
CONFIG_VARIABLES = read_config_file(DATA_RESTORE_PATH)
core_import(*args)
print '*' * 70
print '\n Log will be found at: ', log_file_path
print '*' * 70
else:
print '\n No dump found at entered path.'
call_exit()

Loading